In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F
from IPython import display
from tqdm.notebook import tqdm
import random
import math, time, os
from matplotlib import pyplot as plt
import pickle as pkl

from AutoEncoder import Autoencoder as AE
from copy import deepcopy
from sklearn.model_selection import train_test_split

device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
prefix = './data_dump'


In [2]:
# day checker
file_paths = ['../Datasets/cf_train_no_noise.csv', 
              '../Datasets/cf_test_no_noise.csv', 
              '../Datasets/cf_train.csv', 
              '../Datasets/cf_test.csv']

for file in file_paths:
    print("NEW FILE")
    df = pd.read_csv(file)
    print(np.unique(df['day_no'].value_counts().to_numpy()))

    # cur_day = df['day_no'][0]
    # cur_era = df['era'][0]
    # for i in range(1, df.shape[0]):
    #     if df['day_no'][i] == cur_day and df['era'][i] == cur_era:
    #         pass
    #     elif df['day_no'][i] == cur_day and df['era'][i] != cur_era:
    #         print("1")
    #     elif df['day_no'][i] != cur_day and df['era'][i] == cur_era:
    #         print("2")
    #     else:
    #         pass
    #     cur_era = df['era'][i]
    #     cur_day = df['day_no'][i] 

NEW FILE
[65]
NEW FILE
[65]
NEW FILE
[65]
NEW FILE
[65]


In [3]:
class SinusodialDataset(Dataset):
    def __init__(self, df):
        """ creating label columns of eras and targets """
        self.NUM_FEATURES = 24
        self.X = df.iloc[:, :self.NUM_FEATURES]
        self.y = df['target_10_val']
        self.X = self.create_categorical_one_hot(self.X)
    
    def create_categorical_one_hot(self, df):
        categories = [0, 0.25, 0.5, 0.75, 1]
        one_hot_encoded_columns = []
        for col in df.columns:
            for cat in categories:
                new_col_name = f"{col}_{cat}"
                one_hot_encoded_col = (df[col] == cat).astype(int)
                one_hot_encoded_col.name = new_col_name
                one_hot_encoded_columns.append(one_hot_encoded_col)

        # Concatenate the one-hot encoded columns along axis 1
        return pd.concat(one_hot_encoded_columns, axis=1)

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        X = torch.tensor(self.X.iloc[idx].values, dtype=torch.float32)
        y = torch.tensor(int(self.y.iloc[idx]), dtype=torch.long)
        return X, y

In [4]:
def make_data_splits(train_df, test_df, batch_size=32, train_perc=0.9):

    def encode(v, class_values):
        return class_values.index(v)

    #adding new era_label column indexed 0, 1,...
    class_values = train_df['target_10_val'].unique().tolist()
    train_df['target_10_val'] = train_df['target_10_val'].apply(lambda x: encode(x, class_values))
    train_df.reset_index(drop=True, inplace=True)

    class_values = test_df['target_10_val'].unique().tolist()
    test_df['target_10_val'] = test_df['target_10_val'].apply(lambda x: encode(x, class_values))
    test_df.reset_index(drop=True, inplace=True)

    train_samples = int(len(train_df)*train_perc)
    val_samples = len(train_df)-train_samples
    
    data = SinusodialDataset(train_df)
    data_test = SinusodialDataset(test_df)

    data_train, data_val = train_test_split(data, test_size=(1-train_perc), shuffle=False)

    print("Train-val-test lengths: ", len(data_train), len(data_val), len(data_test))

    loader_train = DataLoader(data_train, batch_size=batch_size, shuffle=False)
    loader_val = DataLoader(data_val, batch_size=batch_size, shuffle=False)
    loader_test = DataLoader(data_test, batch_size=batch_size, shuffle=False)

    return loader_train, loader_val, loader_test, data

In [5]:
class TestTimeAdapter(nn.Module):
    
    def __init__(self, ae_dims, cl_dims, lr=1e-3, weight_decay=1e-3):
        super(TestTimeAdapter,self).__init__()
        self.ae_dims=ae_dims
        self.cl_dims = cl_dims

        self.ae = AE(ae_dims)
        self.classifier=nn.ModuleList()
        
        for i in range(len(cl_dims)-2):
            self.classifier.append(nn.Linear(cl_dims[i],cl_dims[i+1]))
            self.classifier.append(nn.ReLU())
        self.classifier.append(nn.Linear(cl_dims[i+1],cl_dims[i+2]))
        self.classifier.append(nn.LogSoftmax(dim=1))
        self.optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)

    def forward(self, x, classifier=False):
        if classifier:
            x = x.float()
            x = self.ae.encode(x)
            for l in self.classifier:
                x = l(x)
            return x
        
        x = x.float()
        x = self.ae(x)
        return x

In [6]:
def accuracy(y_pred, y_test, verbose=True):
    m = y_test.shape[0]
    predicted = torch.max(y_pred, 1)[1]
    correct = (predicted == y_test).float().sum().item()
    if verbose:
        print(correct,m)
    accuracy = correct/m
    return accuracy, correct

In [17]:
def Test(net, loader_test, \
         device='cpu', Loss=nn.NLLLoss(reduction='sum'), \
         test_time_epochs = 3):
    net.train()
    total_samples = 0
    correct_samples = 0
    loss = 0.0
    step=0
    (X_prev, y_prev) = (None, None)
    (X_prev_prev, y_prev_prev) = (None, None)
    for (X, y) in loader_test:
        # print(step, end=" ")
        X=X.to(device)
        y=y.to(device)
        total_samples += y.shape[0]
   
        for e in range(test_time_epochs): 
            x_reconst = net(X, classifier=False)
            ae_loss = 0.0
            for feat in range(0, X.shape[-1], 5):
                ae_loss += Loss(nn.LogSoftmax(dim=1)(x_reconst[:, feat:feat+5]),X[:, feat:feat+5].argmax(dim=1))
            net.optimizer.zero_grad()
            ae_loss.backward()
            net.optimizer.step()
        
            if X_prev_prev is not None:
                y_pred = net(X_prev_prev, classifier=True)
                cl_loss = Loss(y_pred, y_prev_prev)
                net.optimizer.zero_grad()
                cl_loss.backward()
                net.optimizer.step()

        y_pred = net(X, classifier=True)
        cl_loss = Loss(y_pred, y)

        loss += (ae_loss+cl_loss).item()
        _, i_cor_sam = accuracy(y_pred, y,verbose=False)
        correct_samples += i_cor_sam
        step+=1

        (X_prev_prev, y_prev_prev) = (X_prev, y_prev)
        (X_prev, y_prev) = (X, y)

    # print()
    
    acc = correct_samples / total_samples
    loss /= total_samples
    print('Test/Val loss:', loss, 'Test/Val acc:', acc)
    return loss, acc

In [24]:
def Train(Net, train_loader, noise_level, epochs=20, Loss=nn.NLLLoss(reduction='sum'), 
          verbose=False, device='cpu',
          val_ds=None, loader_test=None):
    model_save_time = time.time()
    losses = []
    accs = []
    val_losses=[]
    val_accL=[]
    Net.to(device)
    for e in range(epochs):
        Net.train()
        step=0
        tot_loss=0.0
        start_time = time.time()
        correct_samples = 0
        total_samples = 0
        for (X,y) in train_loader:
            X=X.to(device)
            y=y.to(device)
            total_samples += y.shape[0]
            x_reconst = Net(X, classifier=False) # B x nd x nc = B x 24 x 5
            ae_loss = 0.0
            for feat in range(0, X.shape[-1], 5):
                # print((x_reconst[:, feat:feat+5]),X[:, feat:feat+5].argmax(dim=1))
                # print((x_reconst[:, feat:feat+5]).shape,X[:, feat:feat+5].argmax(dim=1).shape)
                ae_loss += Loss(nn.LogSoftmax(dim=1)(x_reconst[:, feat:feat+5]),X[:, feat:feat+5].argmax(dim=1))
            Net.optimizer.zero_grad()
            ae_loss.backward()
            Net.optimizer.step()

            y_pred = Net(X, classifier=True)
            cl_loss = Loss(y_pred, y)
            Net.optimizer.zero_grad()
            cl_loss.backward()
            Net.optimizer.step()

            step+=1
            tot_loss+=(ae_loss+cl_loss)
            if verbose:
                _, i_cor_sam = accuracy(y_pred, y,verbose=False)
                correct_samples += i_cor_sam
            
        end_time = time.time()
        t = end_time-start_time
        l = tot_loss.item()/total_samples
        losses += [l]
        a = correct_samples/total_samples
        accs += [a]

        if verbose:
            print('Epoch %2d Loss: %2.5e Accuracy: %2.5f Epoch Time: %2.5f' %(e,l,a,t))

        val_loss, val_acc = Test(deepcopy(Net), val_ds, device = device)
        val_losses.append(val_loss)
        val_accL.append(val_acc)

        # print("TESTING BUDDY:")
        # Test(deepcopy(Net), loader_test, device=device)

        torch.save(Net.state_dict(), f'{prefix}/net_{noise_level}_{str(model_save_time)}.pth')

    return Net, losses, accs, val_losses, val_accL

In [25]:
def plot_loss_acc(losses, accs, val_losses, val_accs, noise_level):

    plt.plot(np.array(accs),color='red', label='Train accuracy')
    plt.plot(np.array(val_accs),color='blue', label='Val accuracy')
    plt.legend()
    plt.savefig(f'{prefix}/acc_{noise_level}.png')
    plt.clf()

    plt.plot(np.array(losses),color='red', label='Train loss')
    plt.plot(np.array(val_losses),color='blue', label='Val loss')
    plt.legend()
    plt.savefig(f'{prefix}/loss_{noise_level}.png')
    plt.clf()
    return

In [26]:
df_paths = ['../Datasets/cf_train_no_noise.csv', 
              '../Datasets/cf_test_no_noise.csv', 
              '../Datasets/cf_train.csv', 
              '../Datasets/cf_test.csv']

noise_levels = ['none', 'high']
batch_sizes = [65, 65,]

losses_arr = []
accs_arr = []

for i in range(0, 3, 2):
    train_df = pd.read_csv(df_paths[i])
    test_df = pd.read_csv(df_paths[i+1])

    print("Noise Level:", noise_levels[i//2])
    loader_train, loader_val, loader_test, data = make_data_splits(train_df, test_df, batch_size=batch_sizes[i//2])
    net = TestTimeAdapter(ae_dims=[120, 64, 32, 16, 32, 64, 120], 
                          cl_dims=[16, 16, 5], ).to(device)
    net, losses, accs, val_losses, val_accL = Train(net, loader_train, noise_levels[i//2], \
                                    epochs=20, verbose=True, device=device, \
                                        val_ds=loader_val,loader_test=loader_test)
    plot_loss_acc(losses, accs, val_losses, val_accL, noise_levels[i//2])
    #Testing code
    test_loss, test_acc = Test(deepcopy(net), loader_test, device=device)
    print(test_loss, test_acc)

Noise Level: none
Train-val-test lengths:  56160 6240 62400
Epoch  0 Loss: 1.35562e+01 Accuracy: 0.66451 Epoch Time: 13.73608
Test/Val loss: 6.441279413761237 Test/Val acc: 0.7636217948717948
Epoch  1 Loss: 5.64665e+00 Accuracy: 0.81033 Epoch Time: 13.19080
Test/Val loss: 4.4791025479634605 Test/Val acc: 0.8016025641025641
Epoch  2 Loss: 4.06327e+00 Accuracy: 0.84687 Epoch Time: 13.49744
Test/Val loss: 3.3675367538745586 Test/Val acc: 0.8283653846153847
Epoch  3 Loss: 3.18884e+00 Accuracy: 0.86519 Epoch Time: 14.06193
Test/Val loss: 2.6006258988991764 Test/Val acc: 0.8421474358974359
Epoch  4 Loss: 2.62698e+00 Accuracy: 0.87815 Epoch Time: 12.89028
Test/Val loss: 2.056350353436592 Test/Val acc: 0.8649038461538462
Epoch  5 Loss: 2.26596e+00 Accuracy: 0.89154 Epoch Time: 13.38373
Test/Val loss: 1.5909811148276696 Test/Val acc: 0.875
Epoch  6 Loss: 1.90132e+00 Accuracy: 0.90132 Epoch Time: 13.34750
Test/Val loss: 1.2922603154793764 Test/Val acc: 0.8849358974358974
Epoch  7 Loss: 1.78554e+

<Figure size 640x480 with 0 Axes>

In [None]:
test_loss, test_acc = Test(net, loader_test, device=device)
print(test_loss, test_acc)

In [None]:
test_loss, test_acc = Test(net, loader_val, device=device)
print(test_loss, test_acc)