In [None]:
import pandas as pd
import sklearn
import scipy
import matplotlib.pyplot as plt

In [None]:
feature_columns = [
     'SvO2', 'Heart_Rate', 'Ejection_Fraction',
       'Radial_Artery_Mean_Pressure', 'Radial_Artery_Systolic_Pressure',
       'Radial_Artery_Diastolic_Pressure', 'Radial_Artery_DpDt',
       'Pulmonary_Artery_Mean_Pressure', 'Central_Venous_Pressure', 'gender',
       'age_years', 'bsa', 'height', 'weight'
]

In [None]:
import os

In [None]:
data_location = os.path.normpath('../data')

In [None]:
file = os.path.join(data_location, "hackathon_low_mixed_venous_oximetry.csv")
file = os.path.join(data_location, "hackathon_low_cardiac_output.csv")
df = (
    pd.read_csv(file)
    .dropna()
)

In [None]:
subjects = list(set(df.subject_id))
N = int(len(subjects)/3*2)
train_subjects = subjects[:N]
test_subjects = subjects[N:]

df = (
    df
    .assign(ClassificationLabel = df.ClassificationLabel=="Positive")
    .assign(gender=df.gender=="F")
)

df_train=df.loc[df.subject_id.isin(train_subjects)]
df_test=df.loc[df.subject_id.isin(test_subjects)]

print(len(df_test))
print(len(df_train))

# Classification

In [None]:
from sklearn.model_selection import train_test_split
X = df_train[feature_columns].values
y = df_train.ClassificationLabel

X_test = df_test[feature_columns].values
y_test = df_test.ClassificationLabel

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier(max_depth=2)
clf = clf.fit(X,y)

y_test_predict = clf.predict(X_test)


In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(clf)

In [None]:
import sklearn.metrics as metrics
conf_matrix = metrics.confusion_matrix(y_test, y_test_predict)
pd.DataFrame(conf_matrix,
            index=[f"label_{i}" for i in range(0, max(y_test)+1)],
            columns=[f"pred_{i}" for i in range(0, max(y_test)+1)])

# Regression

In [None]:
from sklearn.model_selection import train_test_split
X = df_train[feature_columns].values
y = df_train.ClassificationLabel

X_test = df_test[feature_columns].values
y_test = df_test["RegressionLabel-CardiacIndex"]

In [None]:
clf = sklearn.linear_model.Lasso(alpha=0, fit_intercept=True,
                                 normalize=True, precompute=False, copy_X=True,
                                 max_iter=1000, tol=0.0001, warm_start=False,
                                 positive=False, random_state=True, selection='cyclic')

clf = sklearn.linear_model.LinearRegression()
from sklearn.ensemble import GradientBoostingRegressor
clf = GradientBoostingRegressor()

In [None]:
clf.fit(X,y)
y_pred = clf.predict(X)

In [None]:
clf.fit(X,y)
y_pred = clf.predict(X_test)

In [None]:
sklearn.metrics.mean_squared_error(y_test, y_pred)

In [None]:
plt.scatter(y_test, y_pred)

In [None]:
sklearn.ensemble.GradientBoostingRegressor

# Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

In [None]:
pca = PCA(n_components=3)

scaler = MinMaxScaler()
scaler.fit(X)
X_normalized = scaler.transform(X)
pca.fit(X_normalized)

In [None]:
import numpy as np
np.argmax(pca.components_[0])

In [None]:
X_pca = pca.fit_transform(X_normalized)

xx = X_pca[:,1]
yy = X_pca[:,2]
plt.scatter(xx,yy)

In [None]:
df_train[X_pca[:,0]<0].gender.sum()