In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
rm_features = [
    'IOP', 'SBP', 'DBP', 'HR', 'P1', 'P4', 'P5', 'R4', 'R5'
]

data_dir = '../data/seed/'
plot_dir =  data_dir + 'SSL_results/'

data = pd.read_csv(data_dir + 'SEED_progression_enhanced.csv')

labeled_X = data.copy()
labeled_X = labeled_X[labeled_X['func_progression'] != -1].reset_index(drop=True)

unlabeled_X = data.copy()
unlabeled_X = unlabeled_X[unlabeled_X['func_progression'] == -1].reset_index(drop=True)

labeled_X, labels = labeled_X[rm_features], labeled_X['func_progression']
unlabeled_X = unlabeled_X[rm_features]

split = int(0.8 * len(labeled_X))
labeled_X, test_X = labeled_X[:split], labeled_X[split:]
labels, test_labels = labels[:split], labels[split:]

labeled_X

  data = pd.read_csv(data_dir + 'SEED_progression_enhanced.csv')


Unnamed: 0,IOP,SBP,DBP,HR,P1,P4,P5,R4,R5
0,14.0,160.5,89.000000,82.000000,54.353404,24.467735,20.920736,3958.933240,2230.158494
1,17.0,150.5,71.666667,82.666667,50.612120,23.680001,20.146534,4867.423344,2236.001740
2,17.0,132.5,78.500000,65.000000,46.617885,22.671671,19.437524,5146.392218,2237.120771
3,21.0,138.0,71.000000,95.500000,48.432416,24.941110,19.296306,5678.779132,4768.998472
4,13.0,143.0,67.500000,73.333333,48.062857,22.853702,19.825284,4093.769151,2230.304842
...,...,...,...,...,...,...,...,...,...
2143,15.0,160.0,97.500000,52.000000,55.001836,24.668886,21.008155,4148.395844,2231.561631
2144,24.0,141.0,81.500000,67.500000,52.034682,29.763503,19.068848,5162.070910,10132.463522
2145,15.0,108.5,67.500000,76.000000,39.435975,20.759126,18.238056,5103.683479,2235.849699
2146,20.0,123.0,82.500000,75.000000,45.229977,23.468874,18.887381,5821.254934,3910.935462


In [3]:
labels

0       1
1       1
2       0
3       1
4       1
       ..
2143    0
2144    1
2145    0
2146    0
2147    1
Name: func_progression, Length: 2148, dtype: int64

In [4]:
unlabeled_X

Unnamed: 0,IOP,SBP,DBP,HR,P1,P4,P5,R4,R5
0,16.0,144.000000,85.5,98.000000,50.055924,23.496370,20.077229,4635.607939,2234.535487
1,13.0,162.000000,76.5,46.000000,53.586192,24.214022,20.828681,3842.424242,2228.803494
2,20.0,136.000000,93.0,51.333333,49.318582,24.270283,19.652881,5568.706499,3453.537418
3,12.0,139.500000,87.0,90.500000,48.757540,22.993921,19.971621,3839.077995,2228.561475
4,18.0,112.000000,63.0,70.000000,40.240945,21.292278,18.250711,5887.861383,2681.545684
...,...,...,...,...,...,...,...,...,...
10723,15.0,132.500000,69.0,81.500000,45.671763,22.341760,19.335764,4681.690635,2234.134459
10724,15.0,155.500000,89.5,88.500000,53.214678,24.234647,20.679634,4223.281692,2232.054591
10725,16.0,122.500000,80.0,101.500000,44.137203,22.003486,19.027839,5029.395916,2236.179508
10726,17.0,138.666667,84.0,87.000000,48.656426,23.192795,19.793699,4986.792931,2236.553095


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [6]:
class MLP(nn.Module):
    def __init__(self, n_in, n_h1, n_h2, n_out):
        super(MLP, self).__init__()
        self.h1 = nn.Linear(n_in, n_h1)
        self.relu1 = nn.ReLU()
        self.h2 = nn.Linear(n_h1, n_h2)
        self.relu2 = nn.ReLU()
        self.output = nn.Linear(n_h2, n_out)

    def forward(self, x):
        x = self.h1(x)
        x = self.relu1(x)
        x = self.h2(x)
        x = self.relu2(x)
        x = self.output(x)
        return torch.sigmoid(x)

In [7]:
# First training is fully supervised
n_in = labeled_X.shape[1]
n_h1 = 20
n_h2 = int(n_h1/2)

mlp = MLP(n_in, n_h1, n_h2, 1)
criterion = nn.BCELoss()
optimizer = optim.Adam(mlp.parameters(), lr=0.001)

for epoch in tqdm(range(100)):
    out = mlp(torch.tensor(labeled_X.values, dtype=torch.float32))
    loss = criterion(out, torch.tensor(labels.values, dtype=torch.float32).unsqueeze(1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

100%|██████████| 100/100 [00:00<00:00, 509.43it/s]


In [8]:
def mlp_evaluation(model, X, y):
    model.eval()
    with torch.no_grad():
        preds = model(X)
        labels = (preds > 0.5).int().numpy().flatten()
        true = y.numpy().flatten()

        acc = accuracy_score(true, labels)
        prec = precision_score(true, labels)
        rec = recall_score(true, labels)
        f1 = f1_score(true, labels)

        return acc, prec, rec, f1

acc, prec, rec, f1 = mlp_evaluation(mlp, torch.tensor(test_X.values, dtype=torch.float32), torch.tensor(test_labels.values, dtype=torch.float32).unsqueeze(1))
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {rec}")
print(f"F1: {f1}")

Accuracy: 0.5418994413407822
Precision: 0.0
Recall: 0.0
F1: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
param_grid = {
    'C': [0.1, 1.0, 10.0],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto']
}

best_params = None
best_score = -np.inf
svm = None

total_combinations = len(param_grid['C']) * len(param_grid['kernel']) * len(param_grid['gamma'])

with tqdm(total=total_combinations, desc="Grid Search") as pbar:
    for C in param_grid['C']:
        for kernel in param_grid['kernel']:
            for gamma in param_grid['gamma']:
                model = SVC(C=C, kernel=kernel, gamma=gamma)
                scores = cross_val_score(model, labeled_X, labels, cv=5, n_jobs=-1)
                mean_score = scores.mean()

                pbar.update(1)

                if mean_score > best_score:
                    best_score = mean_score
                    best_params = {'C': C, 'kernel': kernel, 'gamma': gamma}
                    svm = model

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

Grid Search:   0%|          | 0/18 [00:00<?, ?it/s]

Grid Search: 100%|██████████| 18/18 [08:10<00:00, 27.26s/it]

Best Parameters: {'C': 10.0, 'kernel': 'rbf', 'gamma': 'auto'}
Best Cross-Validation Score: 0.5651845828590015





In [12]:
def svm_evaluation(model, X, y):
    preds = model.predict(X)

    acc = accuracy_score(y, preds)
    prec = precision_score(y, preds)
    rec = recall_score(y, preds)
    f1 = f1_score(y, preds)

    return acc, prec, rec, f1

svm.fit(labeled_X, labels)

acc, prec, rec, f1 = svm_evaluation(svm, test_X, test_labels)
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {rec}")
print(f"F1: {f1}")

Accuracy: 0.6182495344506518
Precision: 0.6564885496183206
Recall: 0.34959349593495936
F1: 0.4562334217506631


In [13]:
import joblib
import json

joblib.dump(svm, 'svm.joblib')

with open('svm_params.json', 'w') as f:
    json.dump(best_params, f)

In [None]:
model = SVC(C=10.0, kernel='poly', degree=4, gamma='auto')
model.fit(labeled_X, labels)

acc, prec, rec, f1 = svm_evaluation(svm, test_X, test_labels)
print(f"Accuracy: {acc}")
print(f"Precision: {prec}")
print(f"Recall: {rec}")
print(f"F1: {f1}")