# CICY4: LSTM-448 [5-fold-CV] - Inference

In [1]:
import numpy as np
import random
import pandas as pd
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
seed = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed)

 # LSTM-based network architecture

In [4]:
class LSTM_block(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs):
        super(LSTM_block,self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.L = n_rnnlayers
        self.lstm = nn.LSTM(input_size = self.D,
                           hidden_size = self.M,
                           num_layers = self.L,
                           batch_first = True)

        self.feat_vec_size = self.M
        self.fc1 = nn.Linear(self.feat_vec_size, 1024)
        self.fc2 = nn.Linear(1024, 4)
    def forward(self, X):
        #input X is NxTxD
        #initial hidden states
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        c0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #get LSTM unit output:
        #output is NxTxM
        out, _ = self.lstm(X, (h0,c0))
        #we only want the output y at the final time step
        # output is now of shape (N, M)
        xx = out[:, -1, :]
        xx = self.fc1(xx)
        #final output is 4
        xx = self.fc2(xx)
        return xx

# Load data & define dataset class

In [5]:
# load only the test data for inference
#Test set is the original test set from the 72% dataset
path2 = '/kaggle/input/calabi-yau-cicy-4-folds/'
X_test = np.load(path2+'conf_Xtest.npy')
y_test= np.load(path2+'hodge_ytest.npy')

print(X_test.shape, y_test.shape)

(181137, 16, 20) (181137, 4)


In [6]:
#Convert data to torch tensor with float32 precision
#(needed to be compatible with the floating decision of the network parameters)
X_test = torch.from_numpy(X_test.astype(np.float32))
y_test= torch.from_numpy(y_test.astype(np.float32))

In [7]:
class CICY4Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        X0 = self.X[idx]
        y0 = self.y[idx]
        return X0, y0

#train_set = CICY4Dataset(X_train, y_train)
#val_set = CICY4Dataset(X_valid, y_valid)
test_set = CICY4Dataset(X_test, y_test)

In [8]:
from torch.utils.data import DataLoader

#train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
#val_loader = DataLoader(val_set, batch_size=128, shuffle=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)

# Utility functions

In [9]:
def load_model(load_model_weight):
    if torch.cuda.is_available():
        trained_model = torch.load(load_model_weight)
    else:
        trained_model = torch.load(load_model_weight, map_location=torch.device('cpu'))
    return trained_model

In [10]:
################## GET PREDICTIONS + ACCURACY #####################
def get_pred_n_acc(models, device, dataloader, num_iter):
    i = 0
    ypreds =[]
    targets = []
    #The last batch might not have size 128
    while i< num_iter:
        for data, target in dataloader:
            #this empty list is to hold all models' preds
            ypred = []
            data= data.to(device)
            data = data.to(torch.float32)
            target = target.to(torch.float32)
            #append the 'i^th' target
            targets.append(target)
            for model in models:
                model.eval()
                yp = model(data)
                yp = yp.detach().cpu().numpy()
                ypred.append(yp)
            #take the mean of all models' predictions
            ypred = np.array(ypred).mean(axis = 0)
            ypred = np.round(ypred)
            i+=1
            #append ypred, targets inside the 'i' loop
            # append the 'i^th' mean prediction
            ypreds.append(ypred)
            if i == num_iter:
                break
     #Do not convert ypreds, targets to np.array at this point,
    #since the last batch has a different size, causing an error !
    #CALCULATING ACCURACY
    yp =  np.concatenate([ypreds[j] for j in range(len(ypreds))], axis = 0)
    tgs =  np.concatenate([targets[j] for j in range(len(targets))], axis = 0)
    h11_acc = ((yp[:,0] == tgs[:,0]).sum())/len(yp)
    h21_acc = ((yp[:,1] == tgs[:,1]).sum())/len(yp)
    h31_acc = ((yp[:,2] == tgs[:,2]).sum())/len(yp)
    h22_acc = ((yp[:,3] == tgs[:,3]).sum())/len(yp)
    acc = [h11_acc*100,h21_acc*100,h31_acc*100,h22_acc*100 ]
    return  yp, tgs, acc

# Load 5 models from the 5-fold CV training

In [11]:
model_path = '/kaggle/input/calabi-yau-cicy-4-folds/trained_models-[5-fold-CV]/'
model_list = []
for i in range(5):
    model = load_model(model_path + f'LSTM-448-d80-fold{i}.pt')
    model_list.append(model)

In [12]:
model_list

[LSTM_block(
   (lstm): LSTM(20, 448, num_layers=2, batch_first=True)
   (fc1): Linear(in_features=448, out_features=1024, bias=True)
   (fc2): Linear(in_features=1024, out_features=4, bias=True)
 ),
 LSTM_block(
   (lstm): LSTM(20, 448, num_layers=2, batch_first=True)
   (fc1): Linear(in_features=448, out_features=1024, bias=True)
   (fc2): Linear(in_features=1024, out_features=4, bias=True)
 ),
 LSTM_block(
   (lstm): LSTM(20, 448, num_layers=2, batch_first=True)
   (fc1): Linear(in_features=448, out_features=1024, bias=True)
   (fc2): Linear(in_features=1024, out_features=4, bias=True)
 ),
 LSTM_block(
   (lstm): LSTM(20, 448, num_layers=2, batch_first=True)
   (fc1): Linear(in_features=448, out_features=1024, bias=True)
   (fc2): Linear(in_features=1024, out_features=4, bias=True)
 ),
 LSTM_block(
   (lstm): LSTM(20, 448, num_layers=2, batch_first=True)
   (fc1): Linear(in_features=448, out_features=1024, bias=True)
   (fc2): Linear(in_features=1024, out_features=4, bias=True)
 )]

In [13]:
for model in model_list:
    #Test accuracy
    yts, tgts, accts = get_pred_n_acc([model], device, test_loader, num_iter = len(test_loader))
    print(f'Test accuracies: {accts}')

Test accuracies: [99.69967483175718, 96.78364994451714, 92.26607484942336, 75.60686110513038]
Test accuracies: [99.8084322915804, 98.29024440064703, 95.34385575558831, 81.98325024705058]
Test accuracies: [99.04381766287396, 91.4241706553603, 82.03900914777212, 58.75166310582598]
Test accuracies: [99.58815703031408, 95.28809685486675, 90.19747483948613, 68.58289581918659]
Test accuracies: [99.70795585661682, 97.5455042316037, 93.88197883370046, 78.75254641514434]


# Ensembles consisting only of LSTM-448 models from 5 fold CV

In [14]:
#Ensemble of all 5 models
yts5, tgts5, accts5 = get_pred_n_acc(model_list, device, test_loader, num_iter = len(test_loader))
print(f'Test accuracies: {accts5}')

Test accuracies: [99.74494443432319, 97.74424882823499, 94.13924267267318, 76.81202625636949]


In [15]:
# Ensemble of only the best 3 models
yts3e, tgts3e, accts3e = get_pred_n_acc([model_list[0],model_list[1],model_list[4]], device, test_loader, num_iter = len(test_loader))
print(f'Test accuracies: {accts3e}')

Test accuracies: [99.8018074716927, 98.31011886031014, 95.29692994805036, 82.43925868265457]


In [16]:
# Ensemble of only the best 2 models
yts2e, tgts2e, accts2e = get_pred_n_acc([model_list[1],model_list[4]], device, test_loader, num_iter = len(test_loader))
print(f'Test accuracies: {accts2e}')

Test accuracies: [99.79904713007282, 98.4641459226994, 95.58234927154584, 83.0354924725484]


# Ensembles involving other models (trained on 72% dataset)

In [17]:
#LSTM-424 trained on 72% dataset
lstm_424 = load_model('/kaggle/input/calabi-yau-cicy-4-folds/trained_models/LSTM-424.pt')
#LSTM-448 trained on 72% dataset
lstm_448 = load_model('/kaggle/input/calabi-yau-cicy-4-folds/trained_models/LSTM-448.pt')

In [18]:
##################### CNN ###############################
class CNN_block(nn.Module):  
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,128, 4, 1)
        self.conv2 = nn.Conv2d(128, 64, 3, 1)
        self.mxpool = nn.MaxPool2d(2,2)
        self.flat = nn.Flatten()
        self.conv_total = nn.Sequential(
            self.conv1,
            self.mxpool,
            self.conv2,
            self.mxpool
        )
    def forward(self,x):
        x = F.relu(self.conv_total(x))
        #reshape is the same as flat(x)
        #x = x.reshape(x.shape[0], -1)
        x = self.flat(x)   
        return x

In [19]:
class LSTM_block(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs):
        super(LSTM_block,self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.L = n_rnnlayers        
        self.lstm = nn.LSTM(input_size = self.D,
                           hidden_size = self.M,
                           num_layers = self.L,
                           batch_first = True)    
    def forward(self, X):
        #input X is NxTxD
        #initial hidden states
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        c0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #get LSTM unit output:
        #output is NxTxM
        out, _ = self.lstm(X, (h0,c0))
        #out, _ = self.gru(X, h0)   
        #we only want h(T) at the final time step
        # output is now of shape (N, M)
        out = out[:, -1, :]
        return out

In [20]:
class CNN_LSTM_hybrid(nn.Module):
    def __init__(self, cnn_block, lstm_block, feat_vec_size):
        super(CNN_LSTM_hybrid, self).__init__()
        self.cnn_block = cnn_block
        self.lstm_block = lstm_block
        self.feat_vec_size = feat_vec_size
        self.fc1 = nn.Linear(self.feat_vec_size, 1024)
        self.fc2 = nn.Linear(1024, 4)       
    def forward(self, x):
        #output of cnn block is (N,384)
        x1 = x.view(-1,1, 16,20)
        x1 = self.cnn_block(x1)
        #output of rnn block is (N,M)
        x2 = self.lstm_block(x)
        #concatenate the 2 outputs to produce a feat vec (N, M+384)
        xx = torch.cat([x1, x2], dim = 1)
        # pass through linear layers
        xx = self.fc1(xx)
        #final output is 4
        xx = self.fc2(xx)       
        return xx

In [21]:
cnn_lstm_400 = load_model('/kaggle/input/calabi-yau-cicy-4-folds/trained_models/CNN-LSTM-400.pt')

In [22]:
# Ensemble of the best 2 LSTM-448 models + CNN_LSTM_400
yts3m, tgts3m, accts3m = get_pred_n_acc([model_list[1],model_list[4], cnn_lstm_400], device, test_loader, num_iter = len(test_loader))
print(f'Test accuracies: {accts3m}')

Test accuracies: [99.82609847794764, 98.6805567056979, 96.10791831597079, 84.6674064382208]


In [23]:
# Ensemble of the best 2 LSTM-448 models from 5fold CV + CNN_LSTM_400 + LSTM-448
yts4m, tgts4m, accts4m = get_pred_n_acc([model_list[1],model_list[4], cnn_lstm_400, lstm_448], device, test_loader, num_iter = len(test_loader))
print(f'Test accuracies: {accts4m}')

Test accuracies: [99.840452254371, 98.70595184860078, 96.25918503674015, 85.03177153204481]


In [24]:
#for verification - this ensemble isn't included in the paper
# Ensemble of the best 2 LSTM-448 models from 5fold CV +  LSTM-448 
yts5m, tgts5m, accts5m = get_pred_n_acc([model_list[1],model_list[4], lstm_448], device, test_loader, num_iter = len(test_loader))
print(f'Test accuracies: {accts5m}')

Test accuracies: [99.8200257263839, 98.53315446319637, 95.8020724644882, 83.92045799588158]


# Save the accuracies to a dataframe

In [25]:
mlist = ['LSTM-448-5f', 'LSTM-448-f0f1f4', 'LSTM-448-f1f4', 
         'Ens-f1f4-CL400','Ens-0f1f4-CL400', 'LSTM-448-0f1f4']

acc_list = np.array([accts5, accts3e, accts2e, accts3m, accts4m, accts5m])

In [27]:
acc_ens5f_dict = {'Test_h11':acc_list[:,0], 'Test_h21':acc_list[:,1],
            'Test_h31':acc_list[:,2],
           'Test_h22':acc_list[:,3]}
df = pd.DataFrame(acc_ens5f_dict, index = mlist)
df.to_csv('acc_ens5f.csv')
df

Unnamed: 0,Test_h11,Test_h21,Test_h31,Test_h22
LSTM-448-5f,99.744944,97.744249,94.139243,76.812026
LSTM-448-f0f1f4,99.801807,98.310119,95.29693,82.439259
LSTM-448-f1f4,99.799047,98.464146,95.582349,83.035492
Ens-f1f4-CL400,99.826098,98.680557,96.107918,84.667406
Ens-0f1f4-CL400,99.840452,98.705952,96.259185,85.031772
LSTM-448-0f1f4,99.820026,98.533154,95.802072,83.920458
