# Introduction
The purpose of this file is to load all pre-processed data in "data/", analyse, visualisation and analysing test results

# Load library and path

In [2]:
import json
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import random 

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from scipy import stats
from tqdm import tqdm

import os
import plotly.io as pio

## Participants descriptive analysis

In [6]:
df_info = pd.read_parquet('data/patients_info.parquet')
df_info.head()

Unnamed: 0,userId,gender,ageRange,cueingMethod1,cueingMethod2,cueingMethod1Start,cueingMethod1End,cueingMethod2Start,cueingMethod2End,ethnicGroup,platform
0,54hFUuEnJA,Male,65+ years,cueband,phone,2022-10-10 11:09:46.931000+01:00,2022-10-31 16:21:35.457000+00:00,2022-11-18 07:10:46.007000+00:00,2022-12-09 07:13:48.303000+00:00,,Android
1,KpZ0DlA0Qa,Male,65+ years,phone,cueband,2022-10-10 20:33:12.394000+01:00,2022-10-31 08:15:23.953000+00:00,2022-12-01 18:53:48.907000+00:00,2022-12-22 08:38:43.432000+00:00,"English, Welsh, Scottish, Northern Irish or Br...",iOS
2,gymn4uQetH,Female,45-54 years,cueband,phone,2022-10-10 17:09:37.394000+01:00,2022-10-31 06:40:01.004000+00:00,2022-11-19 19:14:05.478000+00:00,2022-12-10 06:41:12.195000+00:00,,iOS
3,JqZPr3q1LD,Male,55-64 years,cueband,phone,2022-10-12 13:44:51.067000+01:00,2022-11-02 07:43:55.523000+00:00,2022-11-18 08:49:14.698000+00:00,2022-12-09 06:45:46.859000+00:00,"English, Welsh, Scottish, Northern Irish or Br...",Android
4,CCGlM7KPWU,Female,45-54 years,phone,cueband,2022-10-25 14:42:29.405000+01:00,2022-11-15 22:21:31.352000+00:00,2022-11-29 12:57:32.672000+00:00,2022-12-20 00:05:46.527000+00:00,"English, Welsh, Scottish, Northern Irish or Br...",Android


In [None]:
import plotly.io as pio
pio.renderers.default = "notebook"
df = df_info
# fig = make_subplots(rows=2, cols=2, specs=[[{"type": "pie"}, {"type": "pie"} ], [{"type": "pie"}, {"type": "pie"} ]])
fig1 = px.histogram(df, x='ethnicGroup',barmode='group',title= "Ethnic Group")
fig2 = px.histogram(df, x='gender', barmode= 'group')
# fig3 = px.pie(df['ageRange'])


# fig.add_trace(fig1['data'][0], row=1, col=1)
# fig.add_trace(fig2['data'][0], row=1, col=2)
# fig.add_trace(fig3['data'][0], row=2, col=1)

fig = go.Figure()
fig.add_trace(go.Pie(labels=["Male", "Female"], values=[46, 23], hole=0.5))
# fig.add_trace(go.Pie(labels=["65+ years", "55-64 years", "45-54 years", "35-44 years"], values=[37, 24, 6, 2], hole=0.5), row=1, col= 2)
fig.update_layout(title='Gender distribution in Study population')
# fig1.write_image("report_plot/ethnic.png")
fig.show()

In [None]:
fig = px.pie(df, names='platform', height=300, width=600, hole=0.7, 
                   color_discrete_sequence=['#4c78a8', '#72b7b2', '#6b92bc'])
fig.update_traces(hovertemplate=None, textposition='outside',
 textinfo='percent+label', rotation=50)
fig.update_layout(margin=dict(t=50, b=35, l=0, r=0), showlegend=False,
                        plot_bgcolor='#fafafa', paper_bgcolor='#fafafa',
                        font=dict(size=17, color='#8a8d93'),
                        hoverlabel=dict(bgcolor="#444", font_size=13, font_family="Lato, sans-serif"))
fig.add_annotation(dict(x=0.5, y=0.5,  align='center',
                        xref = "paper", yref = "paper",
                        showarrow = False, font_size=22,
                        text="Platform"))
fig.write_image("report_plot/platform.png")

In [24]:
col = "ethnicGroup"
display(df_info.query("cueingMethod1 == 'cueband'")[col].value_counts(normalize= False))
display(df_info.query("cueingMethod1 == 'cueband'")[col].value_counts(normalize= True))

display(df_info.query("cueingMethod1 == 'phone'")[col].value_counts(normalize= False))
display(df_info.query("cueingMethod1 == 'phone'")[col].value_counts(normalize= True))

English, Welsh, Scottish, Northern Irish or British    19
Any other White background                              1
Caribbean                                               1
Name: ethnicGroup, dtype: int64

English, Welsh, Scottish, Northern Irish or British    0.904762
Any other White background                             0.047619
Caribbean                                              0.047619
Name: ethnicGroup, dtype: float64

English, Welsh, Scottish, Northern Irish or British    22
Any other ethnic group                                  1
Irish                                                   1
Name: ethnicGroup, dtype: int64

English, Welsh, Scottish, Northern Irish or British    0.916667
Any other ethnic group                                 0.041667
Irish                                                  0.041667
Name: ethnicGroup, dtype: float64

# Effectiveness of cueing
Compare the score of participants before trial and after trial

significance code         p-value
   ***                 [0, 0.001]
    **              (0.001, 0.01]
     *               (0.01, 0.05]
     .                (0.05, 0.1]
                         (0.1, 1] 

## Before trial and after trial
Compare assessment score of patients before and after trial

All participants test results in assessment 1 and 4

Because the same population so we will use dependent ttest

In [3]:
cols = ['userId', 'nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'gender', 'ageRange', \
    'freq_mean', 'duration_mean', 'severity_mean', 'record_count', \
    'cueingMethod1', 'cueingMethod2', 'assessmentNumber', 'phase']
df = pd.read_parquet('data/score_pannel.parquet', columns= cols)

In [5]:
numeric_var = ['nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'freq_mean', 'duration_mean', 'severity_mean']

before = df.query("assessmentNumber ==1 or assessmentNumber ==3").assign(phase = 'pre_treatment')
after = df.query("assessmentNumber ==2 or assessmentNumber == 4").assign(phase = 'post_treatment')
h0 = pd.concat([before, after])

before.shape, after.shape
descriptive = h0.groupby(['phase']).describe().T.reset_index()
descriptive.columns = ['var', 'des', 'post_treatment', 'pre_treatment']
descriptive.query("des == 'mean'").to_clipboard()
descriptive.query("des == 'mean'")


Unnamed: 0,var,des,post_treatment,pre_treatment
1,nmsq_score,mean,10.913043,11.869565
9,pdq8_score,mean,8.137681,8.702899
17,updrs22_score,mean,3.492754,3.702899
25,romps_score,mean,19.528986,20.826087
33,freq_mean,mean,26.288299,31.15763
41,duration_mean,mean,25.425079,29.849097
49,severity_mean,mean,26.487691,30.646437
57,record_count,mean,16.574627,9.977099
65,assessmentNumber,mean,3.0,2.0


In [6]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
def ttest_results(x):
    var = x['var']
    score1 = before[var]
    score2 = after[var]
    D = np.array(score1) - np.array(score2)
    
    alternative = 'two-sided'
    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,two-sided,4.163765,0.132299,5.505281e-05
1,pdq8_score,two-sided,2.447957,0.077388,0.01563162
2,updrs22_score,two-sided,2.148288,0.160304,0.0334496
3,romps_score,two-sided,5.203413,0.152364,6.986953e-07
4,freq_mean,two-sided,4.590183,,1.037227e-05
5,duration_mean,two-sided,4.120406,,6.717764e-05
6,severity_mean,two-sided,4.179631,,5.347507e-05


In [38]:
descriptive = df.groupby(['assessmentNumber']).describe().T.reset_index()
descriptive.columns = ['var', 'des', '1', '2', '3', '4']
descriptive.query("des == 'mean'").to_clipboard()
descriptive.query("des == 'mean'")
# descriptive

Unnamed: 0,var,des,1,2,3,4
1,nmsq_score,mean,12.333333,11.043478,11.405797,10.782609
9,pdq8_score,mean,9.173913,8.434783,8.231884,7.84058
17,updrs22_score,mean,3.855072,3.507246,3.550725,3.478261
25,romps_score,mean,21.869565,20.028986,19.782609,19.028986
33,freq_mean,mean,35.912787,28.839968,26.329316,23.659306
41,duration_mean,mean,34.188107,27.837333,25.443333,22.939726
49,severity_mean,mean,35.067821,29.081012,26.157032,23.815783
57,record_count,mean,8.409091,17.147059,11.569231,15.984848
65,phase,mean,0.0,1.0,2.0,3.0


## Effectiveness of cueing
- Create cueband group: A1, B2, compare pre_treatment and post_treatment
- Create mobile group: A2, B1, compare pre_treatment and post_treatment


In [6]:
cols = ['userId', 'nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'gender', 'ageRange', \
    'freq_mean', 'duration_mean', 'severity_mean', 'record_count', \
    'cueingMethod1', 'cueingMethod2', 'assessmentNumber', 'phase']
df = pd.read_parquet('data/score_pannel.parquet', columns= cols)

In [7]:
cuebandA = df.query("cueingMethod1 == 'cueband' and (assessmentNumber == 1 or assessmentNumber == 2)").assign(cueingMethod = 'cueband')
cuebandB = df.query("cueingMethod2 == 'cueband' and (assessmentNumber == 3 or assessmentNumber == 4)").assign(cueingMethod = 'cueband')
cueband = pd.concat([cuebandA, cuebandB])
pre_cueband = cueband.query("assessmentNumber == 1 or assessmentNumber == 3").assign(phase = 'pre_treatment')
post_cueband = cueband.query("assessmentNumber == 2 or assessmentNumber == 4").assign(phase = 'post_treatment')

phoneA = df.query("cueingMethod2 == 'phone' and (assessmentNumber == 3 or assessmentNumber == 4)").assign(cueingMethod = 'phone')
phoneB = df.query("cueingMethod1 == 'phone' and (assessmentNumber == 1 or assessmentNumber == 2)").assign(cueingMethod = 'phone')
phone = pd.concat([phoneA, phoneB])
pre_phone = phone.query("assessmentNumber == 1 or assessmentNumber == 3").assign(phase = 'pre_treatment')
post_phone = phone.query("assessmentNumber == 2 or assessmentNumber == 4").assign(phase = 'post_treatment')

In [8]:
numeric_var = ['nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'freq_mean', 'duration_mean', 'severity_mean']

descriptive = pd.concat([pre_cueband, post_cueband, pre_phone, post_phone])[numeric_var + ['cueingMethod', 'phase']].groupby(['cueingMethod','phase']).describe().T.reset_index()
descriptive.columns = ['var', 'des', 'post_cueband', 'pre_cueband', 'post_phone', 'pre_phone']
descriptive.query("des == 'mean'")

Unnamed: 0,var,des,post_cueband,pre_cueband,post_phone,pre_phone
1,nmsq_score,mean,10.869565,11.797101,10.956522,11.942029
9,pdq8_score,mean,8.086957,8.594203,8.188406,8.811594
17,updrs22_score,mean,3.376812,3.84058,3.608696,3.565217
25,romps_score,mean,19.376812,20.797101,19.681159,20.855072
33,freq_mean,mean,24.684474,31.188397,27.892123,31.127328
41,duration_mean,mean,23.85879,30.388349,26.991368,29.318015
49,severity_mean,mean,24.960956,30.920187,28.014425,30.376835


### Cueband effect

In [10]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
def ttest_results(x):
    var = x['var']
    score1 = pre_cueband[var]
    score2 = post_cueband[var]
    alternative = 'greater'
    D = np.array(score1) - np.array(score2)


    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)

    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,greater,3.04053,0.146575,0.001676
1,pdq8_score,greater,1.692894,0.083087,0.047526
2,updrs22_score,greater,3.291293,0.343497,0.000792
3,romps_score,greater,4.222657,0.184623,3.7e-05
4,freq_mean,greater,4.095167,,6.1e-05
5,duration_mean,greater,4.021715,,7.9e-05
6,severity_mean,greater,4.029751,,7.6e-05


### Mobile phone cueing effect
Compare score of participants before using cueing as mobile phone

In [12]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
def ttest_results(x):
    var = x['var']
    score1 = pre_phone[var]
    score2 = post_phone[var]
    alternative = 'greater'
    D = np.array(score1) - np.array(score2)

    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)

    x['pvalue'] = ttest.pvalue

    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,greater,2.850119,0.121215,0.00289
1,pdq8_score,greater,1.76309,0.073353,0.041189
2,updrs22_score,greater,-0.335349,-0.038037,0.630803
3,romps_score,greater,3.17378,0.126185,0.00113
4,freq_mean,greater,2.329068,,0.011487
5,duration_mean,greater,1.736975,,0.043564
6,severity_mean,greater,1.831567,,0.035799


## Cueing effectiveness after 2 weeks cool off

In [13]:
cols = ['userId', 'nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'gender', 'ageRange', \
    'freq_mean', 'duration_mean', 'severity_mean', 'record_count', \
    'cueingMethod1', 'cueingMethod2', 'assessmentNumber', 'phase']
df = pd.read_parquet('data/score_pannel.parquet', columns= cols)

In [14]:
a1_cooldown = df.query("cueingMethod1 == 'cueband' and (assessmentNumber ==1 or assessmentNumber == 2 or assessmentNumber == 3 or assessmentNumber == 4)")
b1_cooldown = df.query("cueingMethod1 == 'phone' and (assessmentNumber ==1 or assessmentNumber == 2 or assessmentNumber == 3 or assessmentNumber == 4)")

In [15]:
descriptive = a1_cooldown[numeric_var + ['phase']].groupby('phase').describe().T.reset_index()
descriptive.columns = ['var', 'des', 'assessmentNumber 1', 'assessmentNumber 2', 'assessmentNumber 3', 'assessmentNumber 4']
descriptive.query("des == 'mean'")

Unnamed: 0,var,des,assessmentNumber 1,assessmentNumber 2,assessmentNumber 3,assessmentNumber 4
1,nmsq_score,mean,11.513514,10.513514,10.783784,10.351351
9,pdq8_score,mean,7.648649,7.0,6.972973,6.540541
17,updrs22_score,mean,4.081081,3.486486,3.540541,3.675676
25,romps_score,mean,21.891892,19.756757,20.0,19.108108
33,freq_mean,mean,37.106506,29.368118,28.082497,27.618744
41,duration_mean,mean,36.303865,28.135606,27.120263,26.569457
49,severity_mean,mean,36.19738,29.272467,27.351521,27.292747


In [16]:
descriptive = b1_cooldown[numeric_var + ['phase']].groupby('phase').describe().T.reset_index()
descriptive.columns = ['var', 'des', 'assessmentNumber 1', 'assessmentNumber 2', 'assessmentNumber 3', 'assessmentNumber 4']
descriptive.query("des == 'mean'")

Unnamed: 0,var,des,assessmentNumber 1,assessmentNumber 2,assessmentNumber 3,assessmentNumber 4
1,nmsq_score,mean,13.28125,11.65625,12.125,11.28125
9,pdq8_score,mean,10.9375,10.09375,9.6875,9.34375
17,updrs22_score,mean,3.59375,3.53125,3.5625,3.25
25,romps_score,mean,21.84375,20.34375,19.53125,18.9375
33,freq_mean,mean,34.565041,28.209595,24.283938,18.907979
41,duration_mean,mean,31.799347,27.481329,23.486914,18.584049
49,severity_mean,mean,33.792512,28.852502,24.763461,19.643427


In [17]:
numeric_var = ['nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'freq_mean', 'duration_mean', 'severity_mean']

before = df.query("assessmentNumber ==2")
after = df.query("assessmentNumber == 3")
before.shape, after.shape

((69, 15), (69, 15))

In [19]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
def ttest_results(x):
    var = x['var']
    score1 = before[var]
    score2 = after[var]

    alternative = 'less'
    D = np.array(score1) - np.array(score2)
    
    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,less,-1.391861,-0.078631,0.08425
1,pdq8_score,less,0.591997,0.025401,0.722092
2,updrs22_score,less,-0.299516,-0.030343,0.38273
3,romps_score,less,0.69906,0.029169,0.756551
4,freq_mean,less,0.665118,,0.745817
5,duration_mean,less,0.290659,,0.613874
6,severity_mean,less,0.834222,,0.796371


### Cueband effect after cool off

In [None]:
cueband = a1_cooldown.query("assessmentNumber == 2")
cooldown = a1_cooldown.query("assessmentNumber == 3")

numeric_var = ['nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'freq_mean', \
    'duration_mean', 'severity_mean']

fig = make_subplots(rows=5, cols=2, subplot_titles=numeric_var)

for i, var in enumerate(numeric_var):
    row = (i // 2) + 1
    col = (i % 2) + 1
    
    fig.add_trace(
        go.Histogram(x=cueband[var], name=f'Cueband trial {var}'),
        row=row,
        col=col
    )
    fig.add_trace(
        go.Histogram(x=cooldown[var], name=f'Cooldown {var}'),
        row=row,
        col=col
    )

fig.update_layout(height=1000, width=800, title_text="Histograms of Numeric Variables")

fig.show()

In [None]:
fig = make_subplots(rows=5, cols=2, subplot_titles=numeric_var)

for i, var in enumerate(numeric_var):
    row = (i // 2) + 1
    col = (i % 2) + 1
    fig.add_trace(
        go.Box(x=cueband[var], name=f'Cueband'),
        row=row,
        col=col
    )
    fig.add_trace(
        go.Box(x=cooldown[var], name=f'Cooldown'),
        row=row,
        col=col
    )

fig.update_layout(height=1000, width=800, title_text="Box Plots of Numeric Variables")

fig.show()

In [23]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
cueband = a1_cooldown.query("assessmentNumber == 2")
cooldown = a1_cooldown.query("assessmentNumber == 3")
def ttest_results(x):
    var = x['var']
    score1 = cueband[var]
    score2 = cooldown[var]
    alternative = 'less'
    D = np.array(score1) - np.array(score2)
    
    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,less,-0.818317,-0.068824,0.20928
1,pdq8_score,less,0.058638,0.003534,0.523218
2,updrs22_score,less,-0.254916,-0.033394,0.400119
3,romps_score,less,-0.43072,-0.021186,0.33462
4,freq_mean,less,-0.155718,,0.438588
5,duration_mean,less,-0.442456,,0.33048
6,severity_mean,less,0.002368,,0.500938


In [33]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
begin = a1_cooldown.query("assessmentNumber == 1")
cooldown = a1_cooldown.query("assessmentNumber == 3")
def ttest_results(x):
    var = x['var']
    score1 = begin[var]
    score2 = cooldown[var]
    alternative = 'greater'
    D = np.array(score1) - np.array(score2)
    
    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,greater,1.74365,0.115732,0.044878
1,pdq8_score,greater,1.452823,0.086773,0.077467
2,updrs22_score,greater,2.406748,0.297667,0.010675
3,romps_score,greater,2.734547,0.109792,0.004815
4,freq_mean,greater,3.10634,,0.001976
5,duration_mean,greater,3.435598,,0.000828
6,severity_mean,greater,3.691283,,0.000413


### Mobile phone cool off

In [65]:
descriptive = b1_cooldown[numeric_var + ['phase']].groupby('phase').describe().T.reset_index()
descriptive.columns = ['var', 'des', 'assessmentNumber 1', 'assessmentNumber 2', 'assessmentNumber 3', 'assessmentNumber 4']
descriptive.query("des == 'mean'")

Unnamed: 0,var,des,assessmentNumber 1,assessmentNumber 2,assessmentNumber 3,assessmentNumber 4
1,nmsq_score,mean,13.28125,11.65625,12.125,11.28125
9,pdq8_score,mean,10.9375,10.09375,9.6875,9.34375
17,updrs22_score,mean,3.59375,3.53125,3.5625,3.25
25,romps_score,mean,21.84375,20.34375,19.53125,18.9375
33,freq_mean,mean,34.565041,28.209595,24.283938,18.907979
41,duration_mean,mean,31.799347,27.481329,23.486914,18.584049
49,severity_mean,mean,33.792512,28.852502,24.763461,19.643427


In [25]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
mobile = b1_cooldown.query("assessmentNumber == 2")
cooldown = b1_cooldown.query("assessmentNumber == 3")
def ttest_results(x):
    var = x['var']
    score1 = mobile[var]
    score2 = cooldown[var]
    alternative = 'less'
    D = np.array(score1) - np.array(score2)
    
    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,less,-1.125829,-0.087225,0.134439
1,pdq8_score,less,0.784943,0.048924,0.780778
2,updrs22_score,less,-0.157669,-0.025662,0.43787
3,romps_score,less,2.171537,0.187219,0.981169
4,freq_mean,less,2.051627,,0.975333
5,duration_mean,less,1.284709,,0.895469
6,severity_mean,less,1.578069,,0.937301


In [35]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
mobile = b1_cooldown.query("assessmentNumber == 1")
cooldown = b1_cooldown.query("assessmentNumber == 3")
def ttest_results(x):
    var = x['var']
    score1 = mobile[var]
    score2 = cooldown[var]
    alternative = 'two-sided'
    D = np.array(score1) - np.array(score2)

    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,two-sided,2.257555,0.142188,0.031162
1,pdq8_score,two-sided,2.147812,0.119048,0.039657
2,updrs22_score,two-sided,0.157669,0.025662,0.87574
3,romps_score,two-sided,4.458447,0.277283,0.000101
4,freq_mean,two-sided,3.654868,,0.001012
5,duration_mean,two-sided,1.955912,,0.06017
6,severity_mean,two-sided,2.415099,,0.022263


# Compare effectiveness between cueband and phone

In [26]:
cols = ['userId', 'nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'gender', 'ageRange', \
    'freq_mean', 'freq_median', 'duration_mean', 'duration_median', 'severity_mean', 'severity_median', 'record_count', \
    'cueingMethod1', 'cueingMethod2', 'assessmentNumber', 'phase']
df = pd.read_parquet('data/score_pannel.parquet', columns= cols)

In [27]:
cuebandA = df.query("cueingMethod1 == 'cueband' and assessmentNumber == 2").assign(cueingMethod = 'cueband')
cuebandB = df.query("cueingMethod2 == 'cueband' and assessmentNumber == 4").assign(cueingMethod = 'cueband')
cueband = pd.concat([cuebandA, cuebandB])

phoneA = df.query("cueingMethod2 == 'phone' and assessmentNumber == 4").assign(cueingMethod = 'phone')
phoneB = df.query("cueingMethod1 == 'phone' and assessmentNumber == 2").assign(cueingMethod = 'phone')
phone = pd.concat([phoneA, phoneB])

In [28]:
numeric_var = ['nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'freq_mean', 'duration_mean', 'severity_mean']

descriptive = pd.concat([cueband, phone])[numeric_var + ['cueingMethod']].groupby('cueingMethod').describe().T.reset_index()
descriptive.columns = ['var', 'des', 'cueband', 'phone']
descriptive.query("des == 'mean'")

Unnamed: 0,var,des,cueband,phone
1,nmsq_score,mean,10.869565,10.956522
9,pdq8_score,mean,8.086957,8.188406
17,updrs22_score,mean,3.376812,3.608696
25,romps_score,mean,19.376812,19.681159
33,freq_mean,mean,24.684474,27.892123
41,duration_mean,mean,23.85879,26.991368
49,severity_mean,mean,24.960956,28.014425


### Visualise answers and scores

In [None]:
df1, df2 = before, after
fig = make_subplots(rows=4, cols=2, subplot_titles=numeric_var)

for i, var in enumerate(numeric_var):
    row = (i // 2) + 1
    col = (i % 2) + 1
    
    fig.add_trace(
        go.Histogram(x=df1[var], name=f'Cueband trial {var}', ),
        row=row,
        col=col
    )
    fig.add_trace(
        go.Histogram(x=df2[var], name=f'Phone trial {var}'),
        row=row,
        col=col
    )
    

fig.update_layout(height=1000, width=800, title_text="Histograms of Numeric Variables")
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)

fig.show()

In [None]:
numeric_var = ['nmsq_score', 'pdq8_score', 'updrs22_score', 'romps_score', 'freq_mean', 'duration_mean', 'severity_mean']

df1 = df1.assign(source='before')
df2 = df2.assign(source='after')
vis = pd.concat([df1, df2])
fig = px.histogram(vis[["nmsq_score", "source"]], color="source", marginal= "box")
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
fig = make_subplots(rows=5, cols=2, subplot_titles=numeric_var)

for i, var in enumerate(numeric_var):
    row = (i // 2) + 1
    col = (i % 2) + 1
    fig.add_trace(
        go.Box(x=df1[var], name=f'Cueband'),
        row=row,
        col=col
    )
    fig.add_trace(
        go.Box(x=df2[var], name=f'Phone'),
        row=row,
        col=col
    )

fig.update_layout(height=1000, width=800, title_text="Box Plots of Numeric Variables")

fig.show()

### Dependent T-test
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html

In [31]:
test_results = pd.DataFrame({'var': numeric_var})
# x = test_results.iloc[0]
def ttest_results(x):
    var = x['var']
    score1 = cueband[var]
    score2 = phone[var]
    alternative = 'less'
    D = np.array(score1) - np.array(score2)
    
    ttest = stats.ttest_rel(score1, score2, nan_policy= 'omit', alternative=alternative)
    x['alternative'] = alternative
    x['tstatistic'] = ttest.statistic
    x['CohenD'] = np.mean(D)/np.var(D)
    
    x['pvalue'] = ttest.pvalue
    # x['df'] = ttest.df
    
    return x

test_results = test_results.apply(ttest_results, axis=1)
test_results.to_clipboard()
test_results

Unnamed: 0,var,alternative,tstatistic,CohenD,pvalue
0,nmsq_score,less,-0.308211,-0.016065,0.379432
1,pdq8_score,less,-0.29463,-0.012583,0.384587
2,updrs22_score,less,-1.618675,-0.166165,0.055073
3,romps_score,less,-0.794443,-0.030496,0.214851
4,freq_mean,less,-2.144043,,0.017917
5,duration_mean,less,-2.018143,,0.023886
6,severity_mean,less,-2.006977,,0.02449
