In [1]:
%load_ext rich
%load_ext autoreload
%autoreload 2

In [2]:
import polars as pl
import pointblank as pb
import odyssey.core as od

from utils import validate_ipaq
from config import INTERIM_DATA



In [4]:
prefix = "G126"
g126 = od.Dataset("G126_Q.sav", INTERIM_DATA)
lf, _meta = g126.load_data()
df = lf.select("ID", pl.col("^.*IPAQ.*$")).collect()

# validation = validate_ipaq(prefix, df)

In [5]:
from typing import Callable

def check_total_mins(
    hpd_column: str,
    mpd_column: str
    ) -> Callable:
    """
    Returns a preprocessing function to verify the minutes for a given category have been correctly calculated.
    Cap the total at 180 minutes, and preserve null values.
    """
    def preprocessor(df: pl.DataFrame) -> pl.DataFrame:
        return df.with_columns(
            (pl.col(hpd_column).fill_null(0) * 60 + pl.col(mpd_column).fill_null(0))
            .pipe(lambda expr: pl.when(expr > 180).then(180).otherwise(expr))
            .alias("check")
        )
    return preprocessor

def check_met(
    mins_column: str, 
    n_days_column: str, 
    met_column: str,
    factor: int|float # the corresponding factor for the activity (Vig: 8, Mod: 4, Walk: 3.3)
    ) -> Callable:
    """Returns a preprocessing function to verify the calculated MET value for a given category."""
    def preprocessor(df: pl.DataFrame) -> pl.DataFrame:
        return df.with_columns(
            (pl.col(mins_column).fill_null(0) * pl.col(n_days_column).fill_null(0) * factor).alias("check"),
            pl.col(met_column).fill_null(0)
        )
    return preprocessor

def check_tot_met(
    met_columns: list[str],
    tot_met_column: str
    ) -> Callable:
    """Returns a preprocessing function to verify the calculated total MET value."""
    def preprocessor(df: pl.DataFrame) -> pl.DataFrame:
        expr = sum(pl.col(col).fill_null(0) for col in met_columns)
        
        return df.with_columns(
            expr.alias("check"),
            pl.col(tot_met_column).fill_null(0) # Fill nulls with 0; otherwise the validation skips if one value in a comparison is null
        )
    return preprocessor

In [6]:
#TODO: flesh this out and run validations after initial harmonisation

def check_ipaq_cat(
    vig_d: str, # days of vigorous exercise per week
    mod_d: str,
    walk_d: str,
    vig_m: str, # mins of vigorous exercise per day
    mod_m: str,
    walk_m: str,
    tot_met: str, # total MET minutes per week
    cat: str # IPAQ category (low: 0, moderate: 1, high: 2)
    ) -> Callable:
    """
    Returns a preprocessing function to verify the IPAQ category.

    HIGH: 2
    Vigorous exercise on 3+ days for 20+ mins AND >= 1500 MET mins per week
    OR combination of any exercise on 7+ days AND >= 3000 MET mins per week

    MODERATE: 1
    Vig exercise 3+ days for 20+ mins
    OR mod exercise AND/OR walking 5+ days for 30 mins
    OR any exercise on 5+ days AND >= 600 MET mins per week

    LOW: 0
    None of the above criteria
        """
    def preprocessor(df: pl.DataFrame) -> pl.DataFrame:
        return df.with_columns(
            # We don't have the data to clearly determine "exercise of any kind on 7 days"
            # Based on my assumption that means the participant exercised every day in the week
            # I've accounted it such that if the participant did two forms of exercise in a single day
            # That would be counted as if they exercised on two days
            (pl.when(
                (pl.col(vig_d).ge(3) & pl.col(vig_m).ge(20) & pl.col(tot_met).ge(1500)) | 
                (sum(pl.col(col).fill_null(0) for col in [vig_d, mod_d, walk_d]).ge(7) & pl.col(tot_met).ge(3000))
            ).then(2)
            .when(
                (pl.col(vig_d).ge(3) & pl.col(vig_m).ge(20)) |
                (sum(pl.col(col).fill_null(0) for col in [mod_d, walk_d]).ge(5) &
                 sum(pl.col(col).fill_null(0) for col in [mod_m, walk_m]).ge(30)) |
                (sum(pl.col(col).fill_null(0) for col in [vig_d, mod_d, walk_d]).ge(5) & pl.col(tot_met).ge(600))
            ).then(1)
            .when(pl.col(tot_met).is_null()).then(None) # Don't impute 0 if data is missing
            .otherwise(0)
            ).alias("check"),
            pl.col(cat).fill_null(0) # Fill nulls with 0; otherwise the validation skips if one value in a comparison is null
        )
    return preprocessor

In [7]:
validation = (
    pb.Validate(
        data=df,
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_VIG_MINS",
        value=pb.col("check"),
        pre=check_total_mins(f"{prefix}_IPAQ_VIG_HPD", f"{prefix}_IPAQ_VIG_MPD"),
        brief="Check total mins/day equals `HPD*60 + MPD`",
        na_pass=True,
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_MOD_MINS",
        value=pb.col("check"),
        pre=check_total_mins(f"{prefix}_IPAQ_MOD_HPD", f"{prefix}_IPAQ_MOD_MPD"),
        brief="Check total mins/day equals `HPD*60 + MPD`",
        na_pass=True,
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_WALK_MINS",
        value=pb.col("check"),
        pre=check_total_mins(f"{prefix}_IPAQ_WALK_HPD", f"{prefix}_IPAQ_WALK_MPD"),
        brief="Check total mins/day equals `HPD*60 + MPD`",
        na_pass=True,
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_VIG_MET",
        value=pb.col("check"),
        pre=check_met(f"{prefix}_IPAQ_VIG_MINS", f"{prefix}_IPAQ_VIG_D", f"{prefix}_IPAQ_VIG_MET", factor=8),
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_MOD_MET",
        value=pb.col("check"),
        pre=check_met(f"{prefix}_IPAQ_MOD_MINS", f"{prefix}_IPAQ_MOD_D", f"{prefix}_IPAQ_MOD_MET", factor=4),
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_WALK_MET",
        value=pb.col("check"),
        pre=check_met(f"{prefix}_IPAQ_WALK_MINS", f"{prefix}_IPAQ_WALK_D", f"{prefix}_IPAQ_WALK_MET", factor=3.3),
    )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_TOT_MET",
        value=pb.col("check"),
        pre=check_tot_met([f"{prefix}_IPAQ_VIG_MET", f"{prefix}_IPAQ_MOD_MET", f"{prefix}_IPAQ_WALK_MET"], f"{prefix}_IPAQ_TOT_MET"),
        brief="Check `TOT_MET` equals the sum of `VIG_MET`, `MOD_MET`, and `WALK_MET`"
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_VIG_D",
        left=1,
        right=7,
        segments=(f"{prefix}_IPAQ_VIG_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_VIG_HPD", 
        left=0,
        right=18, # unrealistic to do more than 18 hours of exercise per day (even that is a stretch!!)
        segments=(f"{prefix}_IPAQ_VIG_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_VIG_MPD", 
        left=0,
        right=59,
        segments=(f"{prefix}_IPAQ_VIG_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_VIG_MINS", 
        left=0,
        right=180, # total mins per category is capped at 180 mins
        segments=(f"{prefix}_IPAQ_VIG_W", 1),
    )
    .col_vals_eq(
        columns=[f"{prefix}_IPAQ_VIG_D", f"{prefix}_IPAQ_VIG_HPD", f"{prefix}_IPAQ_VIG_MPD", f"{prefix}_IPAQ_VIG_MINS", f"{prefix}_IPAQ_VIG_MET"], 
        value=0,
        segments=(f"{prefix}_IPAQ_VIG_W", 0)
    )
    # .col_vals_null(
    #     columns=[f"{prefix}_IPAQ_VIG_D", f"{prefix}_IPAQ_VIG_HPD", f"{prefix}_IPAQ_VIG_MPD", f"{prefix}_IPAQ_VIG_MINS", f"{prefix}_IPAQ_VIG_MET"],
    #     pre=lambda df: df.with_columns(pl.col(f"{prefix}_IPAQ_VIG_W").fill_null(-1)), # Pointblank doesn't seem to like segmenting values with null, so transform null to -1 and segment y that
    #     segments=(f"{prefix}_IPAQ_VIG_W", -1)
    # )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_MOD_D",
        left=1,
        right=7,
        segments=(f"{prefix}_IPAQ_MOD_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_MOD_HPD", 
        left=0,
        right=18, # unrealistic to do more than 18 hours of exercise per day (even that is a stretch!!)
        segments=(f"{prefix}_IPAQ_MOD_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_MOD_MPD", 
        left=0,
        right=59,
        segments=(f"{prefix}_IPAQ_MOD_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_MOD_MINS", 
        left=0,
        right=180, # total mins per category is capped at 180 mins
        segments=(f"{prefix}_IPAQ_MOD_W", 1),
    )
    .col_vals_eq(
        columns=[f"{prefix}_IPAQ_MOD_D", f"{prefix}_IPAQ_MOD_HPD", f"{prefix}_IPAQ_MOD_MPD", f"{prefix}_IPAQ_MOD_MINS", f"{prefix}_IPAQ_MOD_MET"], 
        value=0,
        segments=(f"{prefix}_IPAQ_MOD_W", 0)
    )
    # .col_vals_null(
    #     columns=[f"{prefix}_IPAQ_MOD_D", f"{prefix}_IPAQ_MOD_HPD", f"{prefix}_IPAQ_MOD_MPD", f"{prefix}_IPAQ_MOD_MINS", f"{prefix}_IPAQ_MOD_MET"],
    #     pre=lambda df: df.with_columns(pl.col(f"{prefix}_IPAQ_MOD_W").fill_null(-1)), # Pointblank doesn't seem to like segmenting values with null, so transform null to -1 and segment y that
    #     segments=(f"{prefix}_IPAQ_MOD_W", -1)
    # )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_WALK_D",
        left=1,
        right=7,
        segments=(f"{prefix}_IPAQ_WALK_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_WALK_HPD", 
        left=0,
        right=18, # unrealistic to do more than 18 hours of exercise per day (even that is a stretch!!)
        segments=(f"{prefix}_IPAQ_WALK_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_WALK_MPD", 
        left=0,
        right=59,
        segments=(f"{prefix}_IPAQ_WALK_W", 1),
    )
    .col_vals_between(
        columns=f"{prefix}_IPAQ_WALK_MINS", 
        left=0,
        right=180, # total mins per category is capped at 180 mins
        segments=(f"{prefix}_IPAQ_WALK_W", 1),
    )
    .col_vals_eq(
        columns=[f"{prefix}_IPAQ_WALK_D", f"{prefix}_IPAQ_WALK_HPD", f"{prefix}_IPAQ_WALK_MPD", f"{prefix}_IPAQ_WALK_MINS", f"{prefix}_IPAQ_WALK_MET"], 
        value=0,
        segments=(f"{prefix}_IPAQ_WALK_W", 0)
    )
    # .col_vals_null(
    #     columns=[f"{prefix}_IPAQ_WALK_D", f"{prefix}_IPAQ_WALK_HPD", f"{prefix}_IPAQ_WALK_MPD", f"{prefix}_IPAQ_WALK_MINS", f"{prefix}_IPAQ_WALK_MET"],
    #     pre=lambda df: df.with_columns(pl.col(f"{prefix}_IPAQ_WALK_W").fill_null(-1)), # Pointblank doesn't seem to like segmenting values with null, so transform null to -1 and segment y that
    #     segments=(f"{prefix}_IPAQ_WALK_W", -1)
    # )
    .col_vals_eq(
        columns=f"{prefix}_IPAQ_CAT",
        value=pb.col("check"),
        pre=check_ipaq_cat(
            f"{prefix}_IPAQ_VIG_D", f"{prefix}_IPAQ_MOD_D", f"{prefix}_IPAQ_WALK_D",
            f"{prefix}_IPAQ_VIG_MINS", f"{prefix}_IPAQ_MOD_MINS", f"{prefix}_IPAQ_WALK_MINS",
            f"{prefix}_IPAQ_TOT_MET", f"{prefix}_IPAQ_CAT"
        ),
        brief="Check `IPAQ_CAT` is correctly calculated."
    )
).interrogate()

In [8]:
# Filter validation output for only failed validations
all_extracts = validation.get_data_extracts()

all_failure_ids = set()
for step, extract in all_extracts.items():
    if len(extract) > 0:
        all_failure_ids.update(extract["ID"])

In [9]:
len(all_failure_ids)

[1;36m755[0m

In [10]:
validation

Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation,Pointblank Validation
2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars,2025-06-19|04:37:22Polars
Unnamed: 0_level_2,Unnamed: 1_level_2,STEP,COLUMNS,VALUES,TBL,EVAL,UNITS,PASS,FAIL,W,E,C,EXT
#4CA64C66,1,col_vals_eq  col_vals_eq()  Check total mins/day equals HPD*60 + MPD,G126_IPAQ_VIG_MINS,check,,✓,1098,526 0.48,572 0.52,—,—,—,CSV
#4CA64C66,2,col_vals_eq  col_vals_eq()  Check total mins/day equals HPD*60 + MPD,G126_IPAQ_MOD_MINS,check,,✓,1098,623 0.57,475 0.43,—,—,—,CSV
#4CA64C66,3,col_vals_eq  col_vals_eq()  Check total mins/day equals HPD*60 + MPD,G126_IPAQ_WALK_MINS,check,,✓,1098,924 0.84,174 0.16,—,—,—,CSV
#4CA64C,4,col_vals_eq  col_vals_eq(),G126_IPAQ_VIG_MET,check,,✓,1098,1098 1.00,0 0.00,—,—,—,—
#4CA64C,5,col_vals_eq  col_vals_eq(),G126_IPAQ_MOD_MET,check,,✓,1098,1098 1.00,0 0.00,—,—,—,—
#4CA64C66,6,col_vals_eq  col_vals_eq(),G126_IPAQ_WALK_MET,check,,✓,1098,1096 1.00,2 0.00,—,—,—,CSV
#4CA64C66,7,"col_vals_eq  col_vals_eq()  Check TOT_MET equals the sum of VIG_MET, MOD_MET, and WALK_MET",G126_IPAQ_TOT_MET,check,,✓,1098,1096 1.00,2 0.00,—,—,—,CSV
#4CA64C,8,SEGMENT G126_IPAQ_VIG_W / 1 col_vals_between  col_vals_between(),G126_IPAQ_VIG_D,"[1, 7]",,✓,515,515 1.00,0 0.00,—,—,—,—
#4CA64C,9,SEGMENT G126_IPAQ_VIG_W / 1 col_vals_between  col_vals_between(),G126_IPAQ_VIG_HPD,"[0, 18]",,✓,515,515 1.00,0 0.00,—,—,—,—
#4CA64C66,10,SEGMENT G126_IPAQ_VIG_W / 1 col_vals_between  col_vals_between(),G126_IPAQ_VIG_MPD,"[0, 59]",,✓,515,509 0.99,6 0.01,—,—,—,CSV


In [15]:
validation.get_step_report(i=3, columns_subset=pb.matches(r"WALK"))

Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 3ASSERTION G126_IPAQ_WALK_MINS = check174 / 1098 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,G126_IPAQ_WALK_WFloat64,G126_IPAQ_WALK_HPDFloat64,G126_IPAQ_WALK_MPDFloat64,G126_IPAQ_WALK_MINSFloat64,G126_IPAQ_WALK_DFloat64,G126_IPAQ_WALK_METFloat64
9,1.0,2.0,0.0,180.0,7.0,4158.0
24,0.0,888.0,888.0,0.0,0.0,0.0
25,1.0,2.0,0.0,180.0,7.0,4158.0
31,0.0,888.0,888.0,0.0,0.0,0.0
35,0.0,888.0,888.0,0.0,0.0,0.0
36,1.0,2.0,0.0,180.0,5.0,2970.0
38,1.0,1.0,0.0,120.0,7.0,2772.0
39,0.0,888.0,888.0,0.0,0.0,0.0
40,0.0,888.0,888.0,0.0,0.0,0.0
41,0.0,888.0,888.0,0.0,0.0,0.0


In [13]:
validation.get_step_report(i=6, columns_subset=pb.matches(r"WALK"))

Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 6ASSERTION G126_IPAQ_WALK_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 19 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,G126_IPAQ_WALK_WFloat64,G126_IPAQ_WALK_HPDFloat64,G126_IPAQ_WALK_MPDFloat64,G126_IPAQ_WALK_MINSFloat64,G126_IPAQ_WALK_DFloat64,G126_IPAQ_WALK_METFloat64
532,1.0,1.0,0.0,61.0,5.0,1006.5
862,1.0,1.0,0.0,61.0,5.0,1006.5


In [16]:
validation.get_step_report(i=7, columns_subset=pb.matches(r"_MET"))

Report for Validation Step 7ASSERTION G126_IPAQ_TOT_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 20 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 7ASSERTION G126_IPAQ_TOT_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 20 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 7ASSERTION G126_IPAQ_TOT_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 20 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 7ASSERTION G126_IPAQ_TOT_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 20 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 7ASSERTION G126_IPAQ_TOT_MET = check2 / 1098 TEST UNIT FAILURES IN COLUMN 20 EXTRACT OF ALL 2 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,G126_IPAQ_VIG_METFloat64,G126_IPAQ_MOD_METFloat64,G126_IPAQ_WALK_METFloat64,G126_IPAQ_TOT_METFloat64
82,160.0,,,0.0
638,5760.0,0.0,,0.0


In [17]:
validation.get_step_report(i=26, columns_subset=pb.matches(r"WALK"))

Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 26ASSERTION 1 ≤ G126_IPAQ_WALK_D ≤ 713 / 923 TEST UNIT FAILURES IN COLUMN 16 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,G126_IPAQ_WALK_WFloat64,G126_IPAQ_WALK_HPDFloat64,G126_IPAQ_WALK_MPDFloat64,G126_IPAQ_WALK_MINSFloat64,G126_IPAQ_WALK_DFloat64,G126_IPAQ_WALK_METFloat64
30,1.0,1.0,10.0,70.0,0.0,0.0
46,1.0,2.0,0.0,120.0,0.0,0.0
142,1.0,1.0,0.0,60.0,0.0,0.0
152,1.0,1.0,2.0,62.0,0.0,0.0
242,1.0,3.0,0.0,180.0,0.0,0.0
285,1.0,0.0,20.0,20.0,20.0,1320.0
372,1.0,0.0,30.0,30.0,0.0,0.0
571,1.0,1.0,0.0,60.0,0.0,0.0
589,1.0,6.0,30.0,180.0,0.0,0.0
787,1.0,3.0,45.0,180.0,0.0,0.0


In [20]:
validation.get_step_report(i=31, columns_subset=pb.matches(r"WALK"))

Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 31ASSERTION G126_IPAQ_WALK_HPD = 0119 / 119 TEST UNIT FAILURES IN COLUMN 9 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,G126_IPAQ_WALK_WFloat64,G126_IPAQ_WALK_HPDFloat64,G126_IPAQ_WALK_MPDFloat64,G126_IPAQ_WALK_MINSFloat64,G126_IPAQ_WALK_DFloat64,G126_IPAQ_WALK_METFloat64
1,0.0,888.0,888.0,0.0,0.0,0.0
2,0.0,888.0,888.0,0.0,0.0,0.0
3,0.0,888.0,888.0,0.0,0.0,0.0
4,0.0,888.0,888.0,0.0,0.0,0.0
5,0.0,888.0,888.0,0.0,0.0,0.0
6,0.0,888.0,888.0,0.0,0.0,0.0
7,0.0,888.0,888.0,0.0,0.0,0.0
8,0.0,888.0,888.0,0.0,0.0,0.0
9,0.0,888.0,888.0,0.0,0.0,0.0
10,0.0,888.0,888.0,0.0,0.0,0.0


In [21]:
validation.get_step_report(i=33, columns_subset=pb.matches(r"WALK"), limit=None)

Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 33ASSERTION G126_IPAQ_WALK_MINS = 01 / 119 TEST UNIT FAILURES IN COLUMN 13 EXTRACT OF ALL 1 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,G126_IPAQ_WALK_WFloat64,G126_IPAQ_WALK_HPDFloat64,G126_IPAQ_WALK_MPDFloat64,G126_IPAQ_WALK_MINSFloat64,G126_IPAQ_WALK_DFloat64,G126_IPAQ_WALK_METFloat64
77,0.0,888.0,888.0,60.0,0.0,0.0


In [22]:
validation.get_step_report(i=35, columns_subset=pb.matches(r"ID|_D|MINS|MET|CAT"))

Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):,Report for Validation Step 35ASSERTION G126_IPAQ_CAT = check68 / 1098 TEST UNIT FAILURES IN COLUMN 21 EXTRACT OF FIRST 10 ROWS (WITH TEST UNIT FAILURES IN RED):
Unnamed: 0_level_1,IDFloat64,G126_IPAQ_VIG_MINSFloat64,G126_IPAQ_MOD_MINSFloat64,G126_IPAQ_WALK_MINSFloat64,G126_IPAQ_VIG_DFloat64,G126_IPAQ_MOD_DFloat64,G126_IPAQ_WALK_DFloat64,G126_IPAQ_VIG_METFloat64,G126_IPAQ_MOD_METFloat64,G126_IPAQ_WALK_METFloat64,G126_IPAQ_TOT_METFloat64,G126_IPAQ_CATFloat64
8,1401.0,30.0,0.0,180.0,4.0,0.0,2.0,960.0,0.0,1188.0,2148.0,1.0
73,13101.0,60.0,0.0,120.0,3.0,0.0,2.0,1440.0,0.0,792.0,2232.0,1.0
74,13102.0,60.0,0.0,15.0,3.0,0.0,5.0,1440.0,0.0,247.5,1687.5,1.0
80,14202.0,20.0,30.0,30.0,4.0,4.0,4.0,640.0,480.0,396.0,1516.0,1.0
103,19302.0,30.0,60.0,30.0,3.0,3.0,4.0,720.0,720.0,396.0,1836.0,1.0
109,21802.0,60.0,60.0,20.0,3.0,4.0,4.0,1440.0,960.0,264.0,2664.0,1.0
113,24001.0,60.0,0.0,60.0,3.0,0.0,3.0,1440.0,0.0,594.0,2034.0,1.0
154,32601.0,60.0,120.0,45.0,3.0,2.0,2.0,1440.0,960.0,297.0,2697.0,1.0
162,35201.0,20.0,10.0,15.0,6.0,7.0,7.0,960.0,280.0,346.5,1586.5,1.0
166,36002.0,40.0,30.0,120.0,3.0,2.0,3.0,960.0,240.0,1188.0,2388.0,1.0


In [23]:
df.null_count()

ID,G126_IPAQ_VIG_W,G126_IPAQ_VIG_HPD,G126_IPAQ_VIG_MPD,G126_IPAQ_MOD_W,G126_IPAQ_MOD_HPD,G126_IPAQ_MOD_MPD,G126_IPAQ_WALK_W,G126_IPAQ_WALK_HPD,G126_IPAQ_WALK_MPD,G126_IPAQ_VIG_MINS,G126_IPAQ_MOD_MINS,G126_IPAQ_WALK_MINS,G126_IPAQ_VIG_D,G126_IPAQ_MOD_D,G126_IPAQ_WALK_D,G126_IPAQ_VIG_MET,G126_IPAQ_MOD_MET,G126_IPAQ_WALK_MET,G126_IPAQ_TOT_MET,G126_IPAQ_CAT,G126_IPAQ_SIT_WD_HPD,G126_IPAQ_SIT_WD_MPD,G126_IPAQ_SIT_WD_TRUNC,G126_IPAQ_SIT_WE_HPD,G126_IPAQ_SIT_WE_MPD,G126_IPAQ_SIT_WE_TRUNC
u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32,u32
0,0,43,43,0,43,43,0,43,43,54,55,56,54,55,56,54,55,56,56,55,43,43,58,43,43,58
