In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import confusion_matrix, roc_auc_score

In [2]:
df = pd.read_hdf('../data/features/ach-at-hex_6000_eff_combined.h5')
y = pd.read_hdf('../data/processed/y_4_class_6000.h5')
subject = pd.read_hdf('../data/processed/subject_6000.h5')

In [9]:
y = y.reset_index(drop=True)
subject = subject.reset_index(drop=True)

In [4]:
# baseline vs. rest
base_v_drug = y.copy()
base_v_drug[base_v_drug>=1] = 'drug'
base_v_drug[base_v_drug==0] = 'baseline'
base_v_drug.name = 'drug_applied'
base_v_drug.shape, base_v_drug.head(), base_v_drug.tail()

((990,),
 0    baseline
 1    baseline
 2    baseline
 3    baseline
 4    baseline
 Name: drug_applied, dtype: object,
 985    drug
 986    drug
 987    drug
 988    drug
 989    drug
 Name: drug_applied, dtype: object)

In [10]:
# ach vs. at or hex
first_v_second = y.copy()
first_v_second = first_v_second[first_v_second!=0]
first_v_second[first_v_second>1] = 'second'
first_v_second[first_v_second==1] = 'first'
first_v_second.name = 'drug_applied'
df_first_v_second = df.iloc[first_v_second.index,:]
subject_first_v_second = subject[first_v_second.index]
first_v_second.shape, first_v_second.head(), first_v_second.tail()

((660,),
 30    first
 31    first
 32    first
 33    first
 34    first
 Name: drug_applied, dtype: object,
 985    second
 986    second
 987    second
 988    second
 989    second
 Name: drug_applied, dtype: object)

In [16]:
# at vs. hex
at_v_hex = y.copy()
at_v_hex = at_v_hex[at_v_hex>1]
at_v_hex[at_v_hex==2] = 'at'
at_v_hex[at_v_hex==3] = 'hex'
at_v_hex.name = 'drug_applied'
df_at_v_hex = df.iloc[at_v_hex.index,:]
subject_at_v_hex = subject[at_v_hex.index]
at_v_hex.shape, at_v_hex.head(), at_v_hex.tail()



((330,),
 60    at
 61    at
 62    at
 63    at
 64    at
 Name: drug_applied, dtype: object,
 985    hex
 986    hex
 987    hex
 988    hex
 989    hex
 Name: drug_applied, dtype: object)

In [12]:
base_v_drug.to_hdf('../data/FINAL/y_base_v_drug.h5', key='data', complevel=9)
subject.to_hdf('../data/FINAL/subject_base_v_drug.h5', key='data', complevel=9)
df.to_hdf('../data/FINAL/X_base_v_drug.h5', key='data', complevel=9)

In [14]:
first_v_second.to_hdf('../data/FINAL/y_first_v_second.h5', key='data', complevel=9)
subject_first_v_second.to_hdf('../data/FINAL/subject_first_v_second.h5', key='data', complevel=9)
df_first_v_second.to_hdf('../data/FINAL/X_first_v_second.h5', key='data', complevel=9)

In [17]:
at_v_hex.to_hdf('../data/FINAL/y_at_v_hex.h5', key='data', complevel=9)
subject_at_v_hex.to_hdf('../data/FINAL/subject_at_v_hex.h5', key='data', complevel=9)
df_at_v_hex.to_hdf('../data/FINAL/X_at_v_hex.h5', key='data', complevel=9)


In [18]:
# baseline vs. at
base_v_at = y.copy()
base_v_at = base_v_at[(base_v_at==0) | (base_v_at==2)]
base_v_at[base_v_at==0] = 'baseline'
base_v_at[base_v_at==2] = 'at'
df_base_v_at = df.iloc[base_v_at.index,:]
subject_base_v_at = subject[base_v_at.index]
base_v_at.shape, base_v_at.head(), base_v_at.tail()


((480,),
 0    baseline
 1    baseline
 2    baseline
 3    baseline
 4    baseline
 Name: y, dtype: object,
 925    baseline
 926    baseline
 927    baseline
 928    baseline
 929    baseline
 Name: y, dtype: object)

In [23]:
# baseline vs. hex
base_v_hex = y.copy()
base_v_hex = base_v_hex[(base_v_hex==0) | (base_v_hex==3)]
base_v_hex[base_v_hex==0] = 'baseline'
base_v_hex[base_v_hex==3] = 'hex'
base_v_hex.name = 'drug_applied'
df_base_v_hex = df.iloc[base_v_hex.index,:]
subject_base_v_hex = subject[base_v_hex.index]
base_v_hex.shape, base_v_hex.head(), base_v_hex.tail()

((510,),
 0    baseline
 1    baseline
 2    baseline
 3    baseline
 4    baseline
 Name: drug_applied, dtype: object,
 985    hex
 986    hex
 987    hex
 988    hex
 989    hex
 Name: drug_applied, dtype: object)

In [24]:
# ach vs. hex
ach_v_hex = y.copy()
ach_v_hex = ach_v_hex[(ach_v_hex==1) | (ach_v_hex==3)]
ach_v_hex[ach_v_hex==1] = 'ach'
ach_v_hex[ach_v_hex==3] = 'hex'
ach_v_hex.name = 'drug_applied'
df_ach_v_hex = df.iloc[ach_v_hex.index,:]
subject_ach_v_hex = subject[ach_v_hex.index]
ach_v_hex.shape, ach_v_hex.head(), ach_v_hex.tail()

((510,),
 30    ach
 31    ach
 32    ach
 33    ach
 34    ach
 Name: drug_applied, dtype: object,
 985    hex
 986    hex
 987    hex
 988    hex
 989    hex
 Name: drug_applied, dtype: object)

In [25]:
ach_v_hex.to_hdf('../data/FINAL/y_ach_v_hex.h5', key='data', complevel=9)
subject_ach_v_hex.to_hdf('../data/FINAL/subject_ach_v_hex.h5', key='data', complevel=9)
df_ach_v_hex.to_hdf('../data/FINAL/X_ach_v_hex.h5', key='data', complevel=9)

In [26]:
base_v_hex.to_hdf('../data/FINAL/y_base_v_hex.h5', key='data', complevel=9)
subject_base_v_hex.to_hdf('../data/FINAL/subject_base_v_hex.h5', key='data', complevel=9)
df_base_v_hex.to_hdf('../data/FINAL/X_base_v_hex.h5', key='data', complevel=9)

In [27]:
base_v_at.to_hdf('../data/FINAL/y_base_v_at.h5', key='data', complevel=9)
subject_base_v_at.to_hdf('../data/FINAL/subject_base_v_at.h5', key='data', complevel=9)
df_base_v_at.to_hdf('../data/FINAL/X_base_v_at.h5', key='data', complevel=9)