In [34]:
import sys
sys.path.insert(0, '../')

import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

# Dataset loading
from typing import Tuple
from pickle import load, dump
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from src.vg2signal import read_raw_vg_as_df, make_smoother, make_shoulder_getter, make_detilter

In [51]:
class convVolta(nn.Module):
    def __init__(self, input_dim,
                       input_channel,
                       kernel_size):
        super(convVolta, self).__init__()
        self.conv1   = nn.Conv1d(1,  input_channel, kernel_size,     stride=2)
        self.conv2   = nn.Conv1d(input_channel, input_channel//2, kernel_size, stride=2)
        self.conv3   = nn.Conv1d(input_channel//2, input_channel//4, kernel_size, stride=2)
        self.conv4   = nn.Conv1d(input_channel//4, 1, kernel_size, stride=2)
        
        # self.final   = nn.Linear(input_dim, 1)
        self.pool    = nn.AdaptiveAvgPool2d((1,1))
        self.relu    = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.conv4(x)
        x = self.pool(x)
        x = self.sigmoid(x)

        return x    

In [52]:
def v2signal_extra_features(vg_filename: str,
             do_log: bool,
             smoothing_bw: float,
             vcenter: float,
             vwidth: float,
             stiffness: float):

    vg_df = read_raw_vg_as_df(vg_filename)

    if (vg_df['I'].to_numpy() < 0).any():
        temp = [None] * 11
        return [None, None, vg_df] + temp

    if do_log:
        cur_var_name = "logI"
        #vg_df[cur_var_name] = np.emath.logn(logbase, vg_df["I"])
        vg_df[cur_var_name] = np.log2(vg_df["I"])
    else:
        cur_var_name = "I"

    smoother = make_smoother(smoothing_bw)

    vg_df["smoothed"] = smoother(vg_df["V"], vg_df[cur_var_name].to_numpy())

    shoulder_getter = make_shoulder_getter(1, 1.1)
    (peak_signal, peak_v_shoulder) = shoulder_getter(vg_df["V"],
                                                     vg_df["smoothed"])

    vcenter = peak_v_shoulder
    vstart = vcenter - 0.5*vwidth
    vend = vcenter + 0.5*vwidth

    detilter = make_detilter(vstart, vend, stiffness)
    vg_df["detilted"] = detilter(vg_df["V"].to_numpy(),
                                 vg_df["smoothed"].to_numpy())

   
    return  vg_df

def run_vg2_raw(folderpath: str, 
            do_log:bool, 
            recenter:bool, 
            smoothing_bw:float, 
            stiffness:float, 
            vcenter:float, 
            vwidth1:float, 
            vwidth2:float) -> Tuple[dict, str]:

    os.chdir(folderpath)  # change to desired folderpath
    dfxl     = pd.DataFrame(columns=['labels', 'VI'])
    
    for filename in os.listdir():
        if filename[-3:] == 'txt':
            print("Analyzing:", filename)
            df = v2signal_extra_features(filename,
                                        do_log,
                                        smoothing_bw,
                                        vcenter,
                                        vwidth1,
                                        stiffness)

            idx1 = filename.rfind("cbz")
            idx2 = filename[idx1:].find("_")
            conc = filename[idx1 + 3:idx1 + idx2]
            replicate = filename[idx1 + idx2 + 1:filename.rfind(".")]

            # Crop only the signal
            try:
                crop = (df['V']<1.15) & (df['V']>0.93)
                VI   =  df['detilted'][crop].to_numpy() 
            except:
                continue

            if 'p' in conc:  
                pi = conc.find('p')
                conctemp = conc[:pi] + '.' + conc[pi + 1:]
                conc = conctemp
            
            concstrxl   = str(float(conc))
            dfxl = pd.concat([dfxl, pd.DataFrame({'labels':concstrxl, 'VI':[VI]})])

    with open(f"{folderpath}/raw_data.pickle", 'wb') as f:
        dump(dfxl, f)
   
    return    dfxl  

def load_dataset(dataset_path=None) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
    if dataset_path==None: dataset_path = DATASET_PATH

    if ('ML1_ML2'in os.path.basename(dataset_path)) or ('test' in os.path.basename(dataset_path)):
        datasets = sorted([f"{i}/raw_data.pickle" for i in glob(f'{dataset_path}/*')])
        
        df = []
        for dataset in datasets:

            with open(dataset, 'rb') as f:
                data = load(f)
                df.append(data) 
        df = pd.concat(df)

    else:
        dataset_path = f"{dataset_path}/raw_data.pickle"
        with open(dataset_path, 'rb') as f:
            df = load(f)

    X = [x.iloc[0] for (_,x) in df[['VI']].iterrows()] 
    X = np.array(X)
    y = df['labels'].apply(lambda x: float(x))

    # Split the total dataset into training (60%) and testing (40%) dataset
    X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size=0.4, shuffle=True, random_state=20, stratify=y)

    return (X_train, X_test, y_train, y_test), (X, y)

In [59]:
# Count number of parameters:
input_dim     = 55
input_channel = 64
kernel_size   = 3

model       = convVolta(input_dim, input_channel, kernel_size)

In [60]:
num_params = np.sum([np.prod(parameter.detach().numpy().shape) for parameter in model.parameters() if parameter.requires_grad])
num_params

8033

In [61]:
(X_train, X_test, y_train, y_test), _ =  load_dataset('/Users/sangam/Desktop/Epilepsey/Code/vgramreg/dataset/ML1_ML2')

In [62]:
(ML1_X_train, ML1_X_test, ML1_y_train, ML1_y_test), _  = load_dataset('/Users/sangam/Desktop/Epilepsey/Code/vgramreg/dataset/ML1_ML2/2024_02_19_ML1')
(ML2_X_train, ML2_X_test, ML2_y_train, ML2_y_test), _  = load_dataset('/Users/sangam/Desktop/Epilepsey/Code/vgramreg/dataset/ML1_ML2/2024_02_22_ML2')
(ML4_X_train, ML4_X_test, ML4_y_train, ML4_y_test), _  = load_dataset('/Users/sangam/Desktop/Epilepsey/Code/vgramreg/dataset/ML4')


In [63]:
X_train = np.concatenate([ML1_X_train, ML2_X_train, ML4_X_train], axis=0)
X_test  = np.concatenate([ML1_X_test, ML2_X_test, ML4_X_test], axis=0)
y_train = np.concatenate([ML1_y_train, ML2_y_train, ML4_y_train], axis=0)
y_test  = np.concatenate([ML1_y_test, ML2_y_test, ML4_y_test], axis=0)

In [64]:
# Train the model
iteration    = 100000
lr           = 1e-5
batch_size   = 32
num_datasize = len(X_train)

# Define loss function
criteron  = nn.MSELoss()

# Define optimizer
optim     = torch.optim.Adam(model.parameters(),lr=lr)
num_step  = num_datasize // batch_size

for epoch in range(iteration):
    model.train()
    ind_shuffle = np.random.permutation(num_datasize)
    X_train        = X_train[ind_shuffle]
    y_train        = y_train[ind_shuffle]

    for i in range(num_step):
        start = i*batch_size
        end   = start + batch_size

        X_    = torch.tensor(np.transpose(X_train[start:end][np.newaxis,...], (1,0,2)), dtype=torch.float32)
        y_    = torch.tensor(y_train[start:end]/16.0, dtype=torch.float32)
        
        # Train Deep Learning Model
        output = model(X_)
        loss   = criteron(output, y_)

        optim.zero_grad()
        loss.backward()
        optim.step()

    if (epoch%500 == 0):
        print(f"Epoch:{epoch} | Loss:{loss.item():.2f}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch:0 | Loss:0.19
Epoch:500 | Loss:0.14
Epoch:1000 | Loss:0.18
Epoch:1500 | Loss:0.16
Epoch:2000 | Loss:0.17
Epoch:2500 | Loss:0.16
Epoch:3000 | Loss:0.20
Epoch:3500 | Loss:0.17
Epoch:4000 | Loss:0.16
Epoch:4500 | Loss:0.17
Epoch:5000 | Loss:0.15
Epoch:5500 | Loss:0.14
Epoch:6000 | Loss:0.16
Epoch:6500 | Loss:0.17
Epoch:7000 | Loss:0.15
Epoch:7500 | Loss:0.16
Epoch:8000 | Loss:0.19
Epoch:8500 | Loss:0.17
Epoch:9000 | Loss:0.18
Epoch:9500 | Loss:0.18
Epoch:10000 | Loss:0.18
Epoch:10500 | Loss:0.18
Epoch:11000 | Loss:0.16
Epoch:11500 | Loss:0.16
Epoch:12000 | Loss:0.19
Epoch:12500 | Loss:0.16
Epoch:13000 | Loss:0.14
Epoch:13500 | Loss:0.19
Epoch:14000 | Loss:0.18
Epoch:14500 | Loss:0.16
Epoch:15000 | Loss:0.16
Epoch:15500 | Loss:0.16
Epoch:16000 | Loss:0.15
Epoch:16500 | Loss:0.19
Epoch:17000 | Loss:0.13
Epoch:17500 | Loss:0.20
Epoch:18000 | Loss:0.19
Epoch:18500 | Loss:0.16
Epoch:19000 | Loss:0.19
Epoch:19500 | Loss:0.19
Epoch:20000 | Loss:0.14
Epoch:20500 | Loss:0.18
Epoch:21000 | Lo

KeyboardInterrupt: 

In [49]:
X_t       = torch.tensor(np.transpose(X_test[np.newaxis,...], (1,0,2)), dtype=torch.float32)
test_pred = model(X_t)

In [50]:
r2_score(y_test, test_pred.squeeze().detach().numpy())

0.02945621556021727

array([8.090213 , 7.721645 , 8.065046 , 8.034036 , 7.9796767, 7.71783  ,
       7.78349  , 8.050802 , 7.7186203, 7.7165174, 7.8573875, 7.936646 ,
       8.125941 , 7.9429097, 8.038576 , 7.728791 , 7.989955 , 8.0466795,
       7.9552026, 7.7020636, 7.9745417, 8.067125 , 7.7954593, 7.7102504,
       7.716678 , 7.738204 , 8.108349 , 8.061393 , 7.887176 , 8.079269 ,
       8.05351  , 7.724674 , 7.7506037, 8.13549  , 7.715234 , 7.9610424,
       8.068936 , 7.717053 , 7.710494 , 7.738425 , 8.000651 , 8.063717 ,
       7.7187304, 7.985941 , 8.155564 , 8.053347 , 7.7255297, 8.021147 ,
       8.143539 , 7.7249837, 8.101517 , 8.023556 , 8.11741  , 7.730401 ,
       8.133028 , 7.8790226, 8.098868 , 7.7217765, 7.7202053, 8.086627 ,
       7.894541 , 8.076915 , 7.8489404, 7.9665785, 7.7393403, 7.913589 ,
       7.7058344, 7.9713373, 7.7234445, 7.963597 , 8.13247  , 8.056703 ,
       8.134447 , 7.942874 , 8.009087 , 7.800006 , 7.745957 , 7.705672 ,
       7.8689437, 7.9988728, 7.71984  , 7.722468 , 