### Notebook to compute statistics

#### Create a data set for each pre assigned group

In [4]:
import helpers as h
import pandas as pd
import numpy as np

In [3]:
profiles = h.read_csv_to_data_frame("patient_profiles1.csv")
metrics = h.read_csv_to_data_frame("patient_metrics.csv")

In [19]:
# create a data set for group 0 pre op
patients_in_group_0_pre_op = profiles[profiles['group'] == 0]['patient_number']
patients_in_group_0_pre_op = patients_in_group_0_pre_op.to_numpy()
print(f"pre op: {patients_in_group_0_pre_op}")

# create a data set for group 1 pre op
patients_in_group_1_early_post_op = profiles[profiles['group'] == 1]['patient_number']
patients_in_group_1_early_post_op = patients_in_group_1_early_post_op.to_numpy()
print(f"early post op: {patients_in_group_1_early_post_op}")

patients_in_group_2_mid_post_op = profiles[profiles['group'] == 2]['patient_number']
patients_in_group_2_mid_post_op = patients_in_group_2_mid_post_op.to_numpy()
print(f"mid post op: {patients_in_group_2_mid_post_op}")

patients_in_group_3_6_months = profiles[profiles['group'] == 3]['patient_number']
patients_in_group_3_6_months = patients_in_group_3_6_months.to_numpy()
print(f"6 months: {patients_in_group_3_6_months}")
print()

num_patients = len(patients_in_group_0_pre_op) + len(patients_in_group_1_early_post_op) + len(patients_in_group_2_mid_post_op) + len(patients_in_group_3_6_months)
print(f"number of patients: {num_patients}")

pre op: [ 2  3  7 11 12 16]
early post op: [ 1  5  6 13 14 15]
mid post op: [ 9 10 18 19]
6 months: [ 8 17]

number of patients: 18


In [28]:
# get the metrics for each group 
metrics_pre_op = metrics[metrics['patient_number'].isin(patients_in_group_0_pre_op)]
metrics_early_post_op = metrics[metrics['patient_number'].isin(patients_in_group_1_early_post_op)]
metrics_mid_post_op = metrics[metrics['patient_number'].isin(patients_in_group_2_mid_post_op)]
metrics_6_months = metrics[metrics['patient_number'].isin(patients_in_group_3_6_months)]

In [33]:
# statistics
def calculate_statistics_for_data_frame(df, title):
    df = df.drop(["patient_number"], axis=1)
    mean_values = df.mean()
    std_values = df.std()
    range_values = df.max() - df.min()
    
    # Combine them into a DataFrame for better visibility
    summary_stats = pd.DataFrame({
        'Mean': mean_values,
        'Std': std_values,
        'Range': range_values
    })
    summary_stats.index.name = title

    return summary_stats

In [34]:
pre_op_stats = calculate_statistics_for_data_frame(metrics_pre_op, "Pre op")
pre_op_stats

Unnamed: 0_level_0,Mean,Std,Range
Pre op,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
left_avg_load,3.783333,0.636448,1.79
right_avg_load,3.838333,0.606083,1.75
overall_load,3.811667,0.616325,1.76
load_in_left,49.591667,0.906011,2.32
load_in_right,50.408333,0.906011,2.32
imbalance,1.543333,1.107026,2.92
left_x_avg_load,3.906667,0.667253,1.64
left_y_avg_load,2.548333,1.138427,3.06
left_z_avg_load,4.363333,1.369769,4.18
left_x_contribution,37.0,5.51362,15.0


In [35]:
early_post_op_stats = calculate_statistics_for_data_frame(metrics_early_post_op, "Early post op")
early_post_op_stats

Unnamed: 0_level_0,Mean,Std,Range
Early post op,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
left_avg_load,3.61,0.712067,2.0
right_avg_load,3.69,0.691173,1.98
overall_load,3.651667,0.676769,1.84
load_in_left,49.471667,2.5209,6.36
load_in_right,50.528333,2.5209,6.36
imbalance,4.496667,1.579743,3.9
left_x_avg_load,3.655,0.659052,1.61
left_y_avg_load,2.516667,0.676599,1.59
left_z_avg_load,4.426667,0.769147,2.04
left_x_contribution,34.166667,3.868678,10.0


In [39]:
mid_post_op_stats = calculate_statistics_for_data_frame(metrics_mid_post_op, "Mid post op")
mid_post_op_stats

Unnamed: 0_level_0,Mean,Std,Range
Mid post op,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
left_avg_load,3.5675,0.306309,0.63
right_avg_load,3.525,0.53326,1.12
overall_load,3.545,0.377492,0.84
load_in_left,50.44,2.984538,7.16
load_in_right,49.56,2.984538,7.16
imbalance,4.22,3.594143,7.64
left_x_avg_load,3.6275,0.435689,1.02
left_y_avg_load,2.045,0.232737,0.49
left_z_avg_load,4.1825,0.804752,1.86
left_x_contribution,37.0,5.354126,11.0


In [46]:
six_month_stats = calculate_statistics_for_data_frame(metrics_6_months, "6 Months")
six_month_stats

Unnamed: 0_level_0,Mean,Std,Range
6 Months,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
left_avg_load,3.95,1.725341,2.44
right_avg_load,4.07,1.895046,2.68
overall_load,4.01,1.810193,2.56
load_in_left,49.435,0.799031,1.13
load_in_right,50.565,0.799031,1.13
imbalance,1.13,1.598061,2.26
left_x_avg_load,3.765,1.336432,1.89
left_y_avg_load,1.94,0.806102,1.14
left_z_avg_load,4.315,3.24562,4.59
left_x_contribution,39.5,7.778175,11.0


In [57]:
# view key metrics

metrics_to_view = ["overall_load", "ml_imbalance", ] #left_y_avg_load", "right_y_avg_load"]

key_pre_op = pre_op_stats.loc[metrics_to_view]
key_early_post_op = early_post_op_stats.loc[metrics_to_view]
key_mid_post_op = mid_post_op_stats.loc[metrics_to_view]
key_6_months = six_month_stats.loc[metrics_to_view]

print(key_pre_op)
print()
print(key_early_post_op)
print()
print(key_mid_post_op)
print()
print(key_6_months)

                   Mean        Std  Range
Pre op                                   
overall_load   3.811667   0.616325   1.76
ml_imbalance  14.823333  12.420959  31.58

                    Mean       Std  Range
Early post op                            
overall_load    3.651667  0.676769   1.84
ml_imbalance   12.783333  8.255659  19.16

                Mean       Std  Range
Mid post op                          
overall_load   3.545  0.377492   0.84
ml_imbalance  13.205  9.884997  22.46

               Mean        Std  Range
6 Months                             
overall_load   4.01   1.810193   2.56
ml_imbalance  14.28  13.321892  18.84
