### Notebook to compute statistics

#### Create a data set for each pre assigned group

In [2]:
import helpers as h
import pandas as pd
import numpy as np

In [3]:
metrics = h.read_csv_to_data_frame("clustered_patient_metrics.csv")

In [7]:
# create a data set for group 0 pre op
grp_0 = metrics[metrics['Group'] == 0]
grp_1 = metrics[metrics['Group'] == 1]
grp_2 = metrics[metrics['Group'] == 2]

In [29]:
# statistics
def calculate_statistics_for_data_frame(df, title):
    df = df.drop(["Group", "patient_number"], axis=1)
    mean_values = df.mean()
    std_values = df.std()
    _max = df.max() 
    _min = df.min()
    range_values = df.max() - df.min()
    
    # Combine them into a DataFrame for better visibility
    summary_stats = pd.DataFrame({
        'Mean': mean_values,
        'Std': std_values,
        'max': _max,
        'min': _min,
        'Range': range_values
    })
    summary_stats.index.name = title

    return summary_stats

In [30]:
grp_0_stats = calculate_statistics_for_data_frame(grp_0, "Group 0")
grp_0_stats

left_avg_load            4.18
right_avg_load           4.19
overall_load             4.19
load_in_left            50.29
load_in_right           52.88
imbalance                5.76
left_x_avg_load          4.62
left_y_avg_load          3.38
left_z_avg_load          6.65
left_x_contribution     48.00
left_y_contribution     31.00
left_z_contribution     48.00
right_x_avg_load         4.61
right_y_avg_load         3.36
right_z_avg_load         4.98
right_x_contribution    45.00
right_y_contribution    26.00
right_z_contribution    45.00
v_imbalance             12.86
ml_imbalance             6.64
ap_imbalance            25.60
actual_group             3.00
dtype: float64


Unnamed: 0_level_0,Mean,Std,max,min,Range
Group 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
left_avg_load,3.57,0.4312,4.18,2.73,1.45
right_avg_load,3.735714,0.501958,4.19,2.73,1.46
overall_load,3.651429,0.458855,4.19,2.73,1.46
load_in_left,48.917143,1.204156,50.29,47.12,3.17
load_in_right,51.082857,1.204156,52.88,49.71,3.17
imbalance,2.331429,2.220476,5.76,0.0,5.76
left_x_avg_load,3.647143,0.571743,4.62,2.82,1.8
left_y_avg_load,2.145714,0.706018,3.38,1.3,2.08
left_z_avg_load,3.81,1.481643,6.65,2.02,4.63
left_x_contribution,38.857143,5.72796,48.0,33.0,15.0


In [24]:
grp_1_stats = calculate_statistics_for_data_frame(grp_1, "Group 1")
grp_1_stats

Unnamed: 0_level_0,Mean,Std,max-min,Range
Group 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
left_avg_load,3.508571,0.910008,(left_avg_load 5.17\nright_avg_load...,2.57
right_avg_load,3.577143,0.98334,(left_avg_load 5.17\nright_avg_load...,3.02
overall_load,3.545714,0.939323,(left_avg_load 5.17\nright_avg_load...,2.79
load_in_left,49.615714,1.797107,(left_avg_load 5.17\nright_avg_load...,5.4
load_in_right,50.384286,1.797107,(left_avg_load 5.17\nright_avg_load...,5.4
imbalance,2.894286,1.958187,(left_avg_load 5.17\nright_avg_load...,5.68
left_x_avg_load,3.515714,0.617248,(left_avg_load 5.17\nright_avg_load...,1.83
left_y_avg_load,2.062857,0.271767,(left_avg_load 5.17\nright_avg_load...,0.79
left_z_avg_load,4.99,0.962046,(left_avg_load 5.17\nright_avg_load...,2.6
left_x_contribution,33.428571,4.237025,(left_avg_load 5.17\nright_avg_load...,11.0


In [14]:
grp_2_stats = calculate_statistics_for_data_frame(grp_2, "Group 2")
grp_2_stats

Unnamed: 0_level_0,Mean,Std,Range
Group 2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
left_avg_load,4.245,0.390342,0.86
right_avg_load,4.055,0.645265,1.48
overall_load,4.15,0.490034,1.12
load_in_left,51.32,2.771221,5.63
load_in_right,48.68,2.771221,5.63
imbalance,4.7,3.249328,7.14
left_x_avg_load,4.3175,0.583517,1.37
left_y_avg_load,3.2475,1.027469,2.45
left_z_avg_load,4.125,0.614139,1.18
left_x_contribution,37.0,2.309401,4.0


In [20]:
# view key metrics

metrics_to_view = ["overall_load", "ml_imbalance", ] #left_y_avg_load", "right_y_avg_load"]

key_grp_0 = grp_0_stats.loc[metrics_to_view]
key_grp_1 = grp_1_stats.loc[metrics_to_view]
key_grp_2 = grp_2_stats.loc[metrics_to_view]


print(f"{key_grp_0}, num of patients: {grp_0.shape[0]}")
print()
print(f"{key_grp_1}, num of patients: {grp_1.shape[0]}")
print()
print(f"{key_grp_2}, num of patients: {grp_2.shape[0]}")

                  Mean       Std  Range
Group 0                                
overall_load  3.651429  0.458855   1.46
ml_imbalance  4.462857  2.424800   6.34, num of patients: 7

                   Mean       Std  Range
Group 1                                 
overall_load   3.545714  0.939323   2.79
ml_imbalance  18.962857  9.590700  27.12, num of patients: 7

               Mean       Std  Range
Group 2                             
overall_load   4.15  0.490034   1.12
ml_imbalance  20.76  2.540341   6.08, num of patients: 4
