### Code for analysing improved patients i.e., patients considered at high risk in the factual world (no intervention) and considered at low risk in the counterfactual world (lifestyle intervention)
(Counterfactual inference is performed separately on 4 different groups of subjects: females < 65 years, males <65 years, females >65 years and males > 65 years)
<li> run CountDiscreteCombinations.ipynb to retrieve the number of patients for each unique features combination </li>
<li> run ReadSimulationsBatch.ipynb to retrieve the subset of improved patients in each batch of simulations</li>
<li> total number of improved patients (nI=491) </li>

In [1]:
import pandas as pd
import numpy as np

#### Subgroup 1

In [None]:
i1=pd.read_csv(r'improved\df_improved_g1.csv', delimiter=',') #each row describes a unique combination of input features
i1= i1.drop(["Unnamed: 0",'Pdiabetes_min','Pdiabetes_max','Pdiabetes_star_min','Pdiabetes_star_max','Pdelta_min','Pdelta_max','out_R','out_C'], axis=1)
i1.head()

In [None]:
# retrieve the number of patients in p1 for each combination of input features
i1_patients = i1.loc[i1.index.repeat(i1['count'])].drop('count', axis=1).reset_index(drop=True) #each row describes a single patient
print(i1_patients)
#Distribution of features in categories
for col in i1_patients.columns:
    print("Value counts for column \033[1m'{}'\033[0m:".format(col))
    print(i1_patients[col].value_counts().sort_index())

In [4]:
#subgroup 1
print('\033[1mTotal # of improved patients in subgroup 1 \033[0m',i1_patients.shape)
print('# improved patients with a change in \033[1mboth BMI and FBS \033[0m', i1_patients.loc[(i1_patients['BMI']!=i1_patients['BMI_final'])&(i1_patients['FBS']!=i1_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mBMI category only \033[0m',i1_patients.loc[(i1_patients['BMI']!=i1_patients['BMI_final'])&(i1_patients['FBS']==i1_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mFBS category only\033[0m', i1_patients.loc[(i1_patients['BMI']==i1_patients['BMI_final'])&(i1_patients['FBS']!=i1_patients['FBS_final']),:].shape)
print('# improved patients with no change \033[1mneither in BMI nor in FBS\033[0m',i1_patients.loc[(i1_patients['BMI']==i1_patients['BMI_final'])&(i1_patients['FBS']==i1_patients['FBS_final']),:].shape)

[1mTotal # of improved patients in subgroup 1 [0m (119, 11)
# improved patients with a change in [1mboth BMI and FBS [0m (33, 11)
# improved patients with a change in [1mBMI category only [0m (49, 11)
# improved patients with a change in [1mFBS category only[0m (37, 11)
# improved patients with no change [1mneither in BMI nor in FBS[0m (0, 11)


#### Subgroup 2

In [None]:
i2=pd.read_csv(r'improved\df_improved_g2.csv', delimiter=',')  #each row describes a unique combination of input features
i2=i2.drop(["Unnamed: 0",'Pdiabetes_min','Pdiabetes_max','Pdiabetes_star_min','Pdiabetes_star_max','Pdelta_min','Pdelta_max','out_R','out_C'], axis=1)
i2

In [6]:
# Create a new DataFrame by repeating rows based on the 'count' column
i2_patients = i2.loc[i2.index.repeat(i2['count'])].drop('count', axis=1).reset_index(drop=True) #each row describes a single patient
print(i2_patients)
#Distribution of features in categories
for col in i2_patients.columns:
    print("Value counts for column \033[1m'{}'\033[0m:".format(col))
    print(i2_patients[col].value_counts().sort_index())

     LDL  BMI  Pressure  Diabetes  TG  HDL  FBS  BMI_final  FBS_final  age  \
0      0    0         0         0   0    1    1          0          0  0.0   
1      0    0         0         0   0    1    1          0          0  0.0   
2      0    0         0         0   0    1    1          0          0  0.0   
3      0    0         0         0   0    1    1          0          0  0.0   
4      0    0         1         1   0    0    1          0          0  0.0   
..   ...  ...       ...       ...  ..  ...  ...        ...        ...  ...   
224    1    2         1         0   1    1    1          2          0  0.0   
225    1    2         1         1   1    0    0          1          0  0.0   
226    1    3         0         0   0    1    1          2          1  0.0   
227    1    3         0         1   0    1    1          2          1  0.0   
228    1    4         0         1   1    1    1          4          0  0.0   

     sex  
0    1.0  
1    1.0  
2    1.0  
3    1.0  
4    1.0

In [7]:
#subgroup 2
print('\033[1mTotal # of improved patients in subgroup 2 \033[0m',i2_patients.shape)
print('# improved patients with a change in \033[1mboth BMI and FBS \033[0m', i2_patients.loc[(i2_patients['BMI']!=i2_patients['BMI_final'])&(i2_patients['FBS']!=i2_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mBMI category only \033[0m',i2_patients.loc[(i2_patients['BMI']!=i2_patients['BMI_final'])&(i2_patients['FBS']==i2_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mFBS category only\033[0m', i2_patients.loc[(i2_patients['BMI']==i2_patients['BMI_final'])&(i2_patients['FBS']!=i2_patients['FBS_final']),:].shape)
print('# improved patients with no change \033[1mneither in BMI nor in FBS\033[0m',i2_patients.loc[(i2_patients['BMI']==i2_patients['BMI_final'])&(i2_patients['FBS']==i2_patients['FBS_final']),:].shape)

[1mTotal # of improved patients in subgroup 2 [0m (229, 11)
# improved patients with a change in [1mboth BMI and FBS [0m (40, 11)
# improved patients with a change in [1mBMI category only [0m (89, 11)
# improved patients with a change in [1mFBS category only[0m (100, 11)
# improved patients with no change [1mneither in BMI nor in FBS[0m (0, 11)


#### Subgroup 3

In [None]:
i3=pd.read_csv(r'improved\df_improved_g3.csv', delimiter=',')  #each row describes a unique combination of input features
i3=i3.drop(["Unnamed: 0",'Pdiabetes_min','Pdiabetes_max','Pdiabetes_star_min','Pdiabetes_star_max','Pdelta_min','Pdelta_max','out_R','out_C'], axis=1)
i3

In [9]:
# Create a new DataFrame by repeating rows based on the 'count' column
i3_patients = i3.loc[i3.index.repeat(i3['count'])].drop('count', axis=1).reset_index(drop=True) #each row describes a single patient
#Distribution of features in categories
for col in i3_patients.columns:
    print("Value counts for column \033[1m'{}'\033[0m:".format(col))
    print(i3_patients[col].value_counts().sort_index())

Value counts for column [1m'LDL'[0m:
0    68
1    13
Name: LDL, dtype: int64
Value counts for column [1m'BMI'[0m:
1    43
2    16
3    17
4     5
Name: BMI, dtype: int64
Value counts for column [1m'Pressure'[0m:
0    18
1    63
Name: Pressure, dtype: int64
Value counts for column [1m'Diabetes'[0m:
0    75
1     6
Name: Diabetes, dtype: int64
Value counts for column [1m'TG'[0m:
0    68
1    13
Name: TG, dtype: int64
Value counts for column [1m'HDL'[0m:
0    63
1    18
Name: HDL, dtype: int64
Value counts for column [1m'FBS'[0m:
0     4
1    77
Name: FBS, dtype: int64
Value counts for column [1m'BMI_final'[0m:
0    32
1    19
2    21
3     8
4     1
Name: BMI_final, dtype: int64
Value counts for column [1m'FBS_final'[0m:
0    43
1    38
Name: FBS_final, dtype: int64
Value counts for column [1m'age'[0m:
1.0    81
Name: age, dtype: int64
Value counts for column [1m'sex'[0m:
0.0    81
Name: sex, dtype: int64


In [10]:
#subgroup 3
print('\033[1mTotal # of improved patients in subgroup 3 \033[0m',i3_patients.shape)
print('# improved patients with a change in \033[1mboth BMI and FBS \033[0m', i3_patients.loc[(i3_patients['BMI']!=i3_patients['BMI_final'])&(i3_patients['FBS']!=i3_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mBMI category only \033[0m',i3_patients.loc[(i3_patients['BMI']!=i3_patients['BMI_final'])&(i3_patients['FBS']==i3_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mFBS category only\033[0m', i3_patients.loc[(i3_patients['BMI']==i3_patients['BMI_final'])&(i3_patients['FBS']!=i3_patients['FBS_final']),:].shape)
print('# improved patients with no change \033[1mneither in BMI nor in FBS\033[0m',i3_patients.loc[(i3_patients['BMI']==i3_patients['BMI_final'])&(i3_patients['FBS']==i3_patients['FBS_final']),:].shape)

[1mTotal # of improved patients in subgroup 3 [0m (81, 11)
# improved patients with a change in [1mboth BMI and FBS [0m (15, 11)
# improved patients with a change in [1mBMI category only [0m (42, 11)
# improved patients with a change in [1mFBS category only[0m (24, 11)
# improved patients with no change [1mneither in BMI nor in FBS[0m (0, 11)


#### Subgroup 4

In [None]:
i4=pd.read_csv(r'improved\df_improved_g4.csv', delimiter=',')  #each row describes a unique combination of input features
i4=i4.drop(["Unnamed: 0",'Pdiabetes_min','Pdiabetes_max','Pdiabetes_star_min','Pdiabetes_star_max','Pdelta_min','Pdelta_max','out_R','out_C'], axis=1)
i4

In [12]:
# Create a new DataFrame by repeating rows based on the 'count' column
i4_patients = i4.loc[i4.index.repeat(i4['count'])].drop('count', axis=1).reset_index(drop=True) #each row describes a single patient
#Distribution of features in categories
for col in i4_patients.columns:
    print("Value counts for column \033[1m'{}'\033[0m:".format(col))
    print(i4_patients[col].value_counts().sort_index())

Value counts for column [1m'LDL'[0m:
0    55
1     7
Name: LDL, dtype: int64
Value counts for column [1m'BMI'[0m:
1    35
2    22
3     3
4     2
Name: BMI, dtype: int64
Value counts for column [1m'Pressure'[0m:
0    31
1    31
Name: Pressure, dtype: int64
Value counts for column [1m'Diabetes'[0m:
0    56
1     6
Name: Diabetes, dtype: int64
Value counts for column [1m'TG'[0m:
0    51
1    11
Name: TG, dtype: int64
Value counts for column [1m'HDL'[0m:
0    30
1    32
Name: HDL, dtype: int64
Value counts for column [1m'FBS'[0m:
0     3
1    59
Name: FBS, dtype: int64
Value counts for column [1m'BMI_final'[0m:
0    24
1    21
2    13
3     3
4     1
Name: BMI_final, dtype: int64
Value counts for column [1m'FBS_final'[0m:
0    41
1    21
Name: FBS_final, dtype: int64
Value counts for column [1m'age'[0m:
1.0    62
Name: age, dtype: int64
Value counts for column [1m'sex'[0m:
1.0    62
Name: sex, dtype: int64


In [13]:
#subgroup 4
print('\033[1mTotal # of improved patients in subgroup 4 \033[0m',i4_patients.shape)
print('# improved patients with a change in \033[1mboth BMI and FBS \033[0m', i4_patients.loc[(i4_patients['BMI']!=i4_patients['BMI_final'])&(i4_patients['FBS']!=i4_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mBMI category only \033[0m',i4_patients.loc[(i4_patients['BMI']!=i4_patients['BMI_final'])&(i4_patients['FBS']==i4_patients['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mFBS category only\033[0m', i4_patients.loc[(i4_patients['BMI']==i4_patients['BMI_final'])&(i4_patients['FBS']!=i4_patients['FBS_final']),:].shape)
print('# improved patients with no change \033[1mneither in BMI nor in FBS\033[0m',i4_patients.loc[(i4_patients['BMI']==i4_patients['BMI_final'])&(i4_patients['FBS']==i4_patients['FBS_final']),:].shape)

[1mTotal # of improved patients in subgroup 4 [0m (62, 11)
# improved patients with a change in [1mboth BMI and FBS [0m (12, 11)
# improved patients with a change in [1mBMI category only [0m (24, 11)
# improved patients with a change in [1mFBS category only[0m (26, 11)
# improved patients with no change [1mneither in BMI nor in FBS[0m (0, 11)


### Whole set of improved patients

In [None]:
print(i1_patients.shape)
print(i2_patients.shape)
print(i3_patients.shape)
print(i4_patients.shape)
df_tot=pd.concat([i1_patients,i2_patients,i3_patients,i4_patients])
df_tot

In [15]:
print('\033[1mTotal # of improved patients\033[0m',df_tot.shape)
print('# improved patients with a change in \033[1mboth BMI and FBS \033[0m',df_tot.loc[(df_tot['BMI']!=df_tot['BMI_final'])&(df_tot['FBS']!=df_tot['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mBMI category only \033[0m',df_tot.loc[(df_tot['BMI']!=df_tot['BMI_final'])&(df_tot['FBS']==df_tot['FBS_final']),:].shape)
print('# improved patients with a change in \033[1mFBS category only\033[0m',df_tot.loc[(df_tot['BMI']==df_tot['BMI_final'])&(df_tot['FBS']!=df_tot['FBS_final']),:].shape)
print('# improved patients with no change \033[1mneither in BMI nor in FBS\033[0m',df_tot.loc[(df_tot['BMI']==df_tot['BMI_final'])&(df_tot['FBS']==df_tot['FBS_final']),:].shape)

[1mTotal # of improved patients[0m (491, 11)
# improved patients with a change in [1mboth BMI and FBS [0m (100, 11)
# improved patients with a change in [1mBMI category only [0m (204, 11)
# improved patients with a change in [1mFBS category only[0m (187, 11)
# improved patients with no change [1mneither in BMI nor in FBS[0m (0, 11)


In [16]:
print('# improved patients with a change \033[1mat least in FBS\033[0m',df_tot.loc[((df_tot['FBS']==1) & (df_tot['FBS_final']==0)),:].shape,'\n')
print('# improved patients with a change \033[1mat least in BMI\033[0m',df_tot.loc[(df_tot['BMI']!=df_tot['BMI_final']),:].shape)
print('- # improved patients with transition 4->3',df_tot.loc[((df_tot['BMI']==4) & (df_tot['BMI_final']==3)),:].shape)
print('- # improved patients with transition 3->2',df_tot.loc[((df_tot['BMI']==3) & (df_tot['BMI_final']==2)),:].shape)
print('- # improved patients with transition 2->1',df_tot.loc[((df_tot['BMI']==2) & (df_tot['BMI_final']==1)),:].shape)
print('- # improved patients with transition 1->0',df_tot.loc[((df_tot['BMI']==1) & (df_tot['BMI_final']==0)),:].shape)

# improved patients with a change [1mat least in FBS[0m (287, 11) 

# improved patients with a change [1mat least in BMI[0m (304, 11)
- # improved patients with transition 4->3 (17, 11)
- # improved patients with transition 3->2 (25, 11)
- # improved patients with transition 2->1 (82, 11)
- # improved patients with transition 1->0 (180, 11)


In [17]:
print('# improved patients with a change in \033[1mboth BMI and FBS \033[0m',df_tot.loc[(df_tot['BMI']!=df_tot['BMI_final'])&(df_tot['FBS']!=df_tot['FBS_final']),:].shape)

print('- # improved patients with transition 4->3',df_tot.loc[((df_tot['BMI']==4) & (df_tot['BMI_final']==3)& ((df_tot['FBS']==1) & (df_tot['FBS_final']==0))),:].shape)
print('- # improved patients with transition 3->2',df_tot.loc[((df_tot['BMI']==3) & (df_tot['BMI_final']==2)&((df_tot['FBS']==1) & (df_tot['FBS_final']==0))),:].shape)
print('- # improved patients with transition 2->1',df_tot.loc[((df_tot['BMI']==2) & (df_tot['BMI_final']==1)&((df_tot['FBS']==1) & (df_tot['FBS_final']==0))),:].shape)
print('- # improved patients with transition 1->0',df_tot.loc[((df_tot['BMI']==1) & (df_tot['BMI_final']==0)&((df_tot['FBS']==1) & (df_tot['FBS_final']==0))),:].shape)

# improved patients with a change in [1mboth BMI and FBS [0m (100, 11)
- # improved patients with transition 4->3 (3, 11)
- # improved patients with transition 3->2 (7, 11)
- # improved patients with transition 2->1 (51, 11)
- # improved patients with transition 1->0 (39, 11)


In [18]:
#Distribution of features in categories
for col in df_tot.columns:
    print("Value counts for column \033[1m'{}'\033[0m:".format(col))
    print(df_tot[col].value_counts().sort_index())

Value counts for column [1m'LDL'[0m:
0    428
1     63
Name: LDL, dtype: int64
Value counts for column [1m'BMI'[0m:
0      6
1    260
2    148
3     50
4     27
Name: BMI, dtype: int64
Value counts for column [1m'Pressure'[0m:
0    127
1    364
Name: Pressure, dtype: int64
Value counts for column [1m'Diabetes'[0m:
0    454
1     37
Name: Diabetes, dtype: int64
Value counts for column [1m'TG'[0m:
0    371
1    120
Name: TG, dtype: int64
Value counts for column [1m'HDL'[0m:
0    194
1    297
Name: HDL, dtype: int64
Value counts for column [1m'FBS'[0m:
0     44
1    447
Name: FBS, dtype: int64
Value counts for column [1m'BMI_final'[0m:
0    186
1    162
2     91
3     42
4     10
Name: BMI_final, dtype: int64
Value counts for column [1m'FBS_final'[0m:
0    331
1    160
Name: FBS_final, dtype: int64
Value counts for column [1m'age'[0m:
0.0    348
1.0    143
Name: age, dtype: int64
Value counts for column [1m'sex'[0m:
0.0    200
1.0    291
Name: sex, dtype: int64
