In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys; sys.path.append('../')
%matplotlib inline
import numpy as np
import sys,os
import pickle, gzip
import pandas as pd
import matplotlib,matplotlib.pyplot as plt
import utils
import encoders
import pytorch_models
import pandas as pd
import sklearn, sklearn.model_selection, sklearn.neighbors
import sklearn.linear_model, sklearn.ensemble
import gzip
import utils
import encoders
import collections
from mlp import MLP_train, MLP

In [3]:
import torch
import torch.optim
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
import sklearn, sklearn.metrics

In [4]:
device = "cuda"

In [5]:
utils.btype_names

{0: 'Normal', 1: 'ESSV (PAC)', 2: 'ESV (PVC)'}

In [6]:
utils.rtype_names

{0: 'NSR', 1: 'AFib', 2: 'AFlutter'}

In [7]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-embeddings_file', default="../test_emb_v2.csv.gz", help='File with embeddings')
parser.add_argument('-labels_file', nargs='?', default="../test_labels_v2.csv.gz", help='')
parser.add_argument('-f', help='')
parser.add_argument('-num_examples', nargs='?', type=int, default=20000, help='')
parser.add_argument('-num_trials', nargs='?', type=int, default=4, help='')
parser.add_argument('-model', type=str, default="mlp", choices=["knn","mlp","lr","adaboost","conv-resnet","conv-transformer"],help='Model to evaluate embeddings with.')
parser.add_argument('-encode_method', type=str, default=None, choices=[o for o in dir(encoders) if not o.startswith("_")], help='to encode the signals on the fly')
args = parser.parse_args()

In [8]:
args

Namespace(embeddings_file='../test_emb_v2.csv.gz', encode_method=None, f='/tmp/jupyterkjahsdaaaaaafkajsd/kernel-e37701c4-eb97-4e38-b504-7110d048b410.json', labels_file='../test_labels_v2.csv.gz', model='mlp', num_examples=20000, num_trials=4)

In [9]:
[o for o in dir(encoders) if not o.startswith("_")]

['F',
 'biosppy_mean_beat',
 'convautoencoder',
 'convautoencoder_random',
 'dir_path',
 'fft',
 'none',
 'np',
 'os',
 'pca',
 'pca_10',
 'pca_100',
 'pca_50',
 'periodogram',
 'pickle',
 'rand',
 'torch']

In [10]:
def evaluate(model_name, num_examples, label_type, seed, encode_method=None):
        
    print("Generating subset", seed)

    data, labels = utils.getSubset(num_examples, 
                                   embeddings_file=args.embeddings_file, 
                                   labels_file=args.labels_file, 
                                   seed=seed,
                                   balanced=label_type)

    if encode_method != None:

        enc = getattr(encoders, encode_method)()
        print("Encoder:",enc)
        newdata = []
        for emb in data.values:
            newdata.append(enc.encode(emb))
        data = np.asarray(newdata)

    print(collections.Counter(labels[label_type]))

    X, X_test, y, y_test = \
        sklearn.model_selection.train_test_split(data, labels[label_type], 
                                                 train_size=len(labels)//2, 
                                                 test_size=len(labels)//2, 
                                                 stratify=labels[label_type],
                                                 random_state=seed)
    print("X", X.shape, "X_test", X_test.shape)
    if model_name == "knn":
        model = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3)
    elif model_name == "lr":
        model = sklearn.linear_model.LogisticRegression(multi_class="auto")
    elif model_name == "adaboost":
        model = sklearn.ensemble.AdaBoostClassifier()
    elif model_name == "mlp":
        network = pytorch_models.MLP(
            in_channels=len(X.values[0]), 
            out_channels=1000,
            n_classes=len(set(y)),
            seed=seed)
        model = pytorch_models.PyTorchModel(network, device="cuda", batch_size=32, n_epoch=40, seed=seed)
    elif model_name == "conv-basic":
        network = pytorch_models.CNN(
            in_channels=1, 
            out_channels=10,
            n_layers=5,
            stride=2,
            kernel=50,
            final_layer=120,
            n_classes=len(set(y)), 
            seed=seed)
        model = pytorch_models.PyTorchModel(network, device="cuda", batch_size=32, n_epoch=40, seed=seed)
    elif model_name == "conv-resnet":
        network = pytorch_models.ResNet1D(
            in_channels=1, 
            base_filters=128, # 64 for ResNet1D, 352 for ResNeXt1D
            kernel_size=16, 
            stride=2, 
            groups=32, 
            n_block=48, 
            n_classes=len(set(y)), 
            downsample_gap=6, 
            increasefilter_gap=12, 
            use_do=True,
            seed=seed)
        model = pytorch_models.PyTorchModel(network, device="cuda", batch_size=32, n_epoch=40, seed=seed)
    else:
        print("Unknown model")
        sys.exit();
    print(model)
    model.fit(X, y.values.flatten())
    y_pred = model.predict(X_test)
    bacc = sklearn.metrics.balanced_accuracy_score(y_test.values.flatten(),y_pred)
    print("   Run {} ".format(seed) + model_name + ", label_type: {}".format(label_type) + ", Balanced Accuracy Test: {}".format(bacc)) 

    return bacc


In [15]:
results = pd.read_csv("results-bal2.csv")

In [22]:
#results = pd.DataFrame()

In [32]:
#label_type = "rtype"
for label_type in ["rtype", "btype"]:
    for num_examples in [48, 120, 1200, 12000]:
        for seed in range(0,6):
            for model_desc in ["knn+pca_50", "knn+fft", "knn", "mlp", "conv-resnet", "conv-basic"]:

                model_desc = model_desc.split("+")
                model = model_desc[0]
                encode_method = model_desc[1] if (len(model_desc) > 1) else None

                res = {"model":model,
                      "num_examples":int(num_examples),
                      "label_type":label_type,
                      "seed":int(seed),
                      "encode_method":encode_method}
                if (len(results)> 0) and (len(pd.merge(pd.DataFrame(res, index =[0]), results)) > 0):
                    print("already done: ", res)
                    continue;
                print("running: ", res)
                bacc = evaluate(model, num_examples=num_examples, label_type=label_type, seed=seed, encode_method=encode_method)
                res["bacc"] = bacc
                results = results.append(res, ignore_index=True)
                #results.to_csv("results-bkp.csv", index=False)
                results.to_csv("results-bal2.csv", index=False)


already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'rtype', 'seed': 0, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'rtype', 'seed': 0, 'encode_method': 'fft'}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'rtype', 'seed': 0, 'encode_method': None}
already done:  {'model': 'mlp', 'num_examples': 48, 'label_type': 'rtype', 'seed': 0, 'encode_method': None}
already done:  {'model': 'conv-resnet', 'num_examples': 48, 'label_type': 'rtype', 'seed': 0, 'encode_method': None}
already done:  {'model': 'conv-basic', 'num_examples': 48, 'label_type': 'rtype', 'seed': 0, 'encode_method': None}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'rtype', 'seed': 1, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'rtype', 'seed': 1, 'encode_method': 'fft'}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'rtype', 'seed': 1, 'encode_method': 

already done:  {'model': 'knn', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 1, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 1, 'encode_method': 'fft'}
already done:  {'model': 'knn', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 1, 'encode_method': None}
already done:  {'model': 'mlp', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 1, 'encode_method': None}
already done:  {'model': 'conv-resnet', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 1, 'encode_method': None}
already done:  {'model': 'conv-basic', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 1, 'encode_method': None}
already done:  {'model': 'knn', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 2, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 2, 'encode_method': 'fft'}
already done:  {'model': 'knn', 'num_examples': 1200, 'label_type': 'rtype', 'seed': 2,

already done:  {'model': 'conv-resnet', 'num_examples': 48, 'label_type': 'btype', 'seed': 4, 'encode_method': None}
already done:  {'model': 'conv-basic', 'num_examples': 48, 'label_type': 'btype', 'seed': 4, 'encode_method': None}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'btype', 'seed': 5, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'btype', 'seed': 5, 'encode_method': 'fft'}
already done:  {'model': 'knn', 'num_examples': 48, 'label_type': 'btype', 'seed': 5, 'encode_method': None}
already done:  {'model': 'mlp', 'num_examples': 48, 'label_type': 'btype', 'seed': 5, 'encode_method': None}
already done:  {'model': 'conv-resnet', 'num_examples': 48, 'label_type': 'btype', 'seed': 5, 'encode_method': None}
already done:  {'model': 'conv-basic', 'num_examples': 48, 'label_type': 'btype', 'seed': 5, 'encode_method': None}
already done:  {'model': 'knn', 'num_examples': 120, 'label_type': 'btype', 'seed': 0, 'encod

already done:  {'model': 'conv-basic', 'num_examples': 12000, 'label_type': 'btype', 'seed': 2, 'encode_method': None}
already done:  {'model': 'knn', 'num_examples': 12000, 'label_type': 'btype', 'seed': 3, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 12000, 'label_type': 'btype', 'seed': 3, 'encode_method': 'fft'}
already done:  {'model': 'knn', 'num_examples': 12000, 'label_type': 'btype', 'seed': 3, 'encode_method': None}
already done:  {'model': 'mlp', 'num_examples': 12000, 'label_type': 'btype', 'seed': 3, 'encode_method': None}
already done:  {'model': 'conv-resnet', 'num_examples': 12000, 'label_type': 'btype', 'seed': 3, 'encode_method': None}
already done:  {'model': 'conv-basic', 'num_examples': 12000, 'label_type': 'btype', 'seed': 3, 'encode_method': None}
already done:  {'model': 'knn', 'num_examples': 12000, 'label_type': 'btype', 'seed': 4, 'encode_method': 'pca_50'}
already done:  {'model': 'knn', 'num_examples': 12000, 'label_type': 'bty

In [None]:
results2 = results.copy().fillna("None")
del results2["seed"]
results2.groupby(["label_type", "num_examples", "model","encode_method"]).mean(["bacc"])

In [None]:
# print(results.groupby(["label_type", "num_examples", "model"]).mean(["bacc"]).to_latex())