In [1]:
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
sns.set_style("darkgrid")

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler

# Load data and define utility functions

In [2]:
# load data
path = '/kaggle/input/calabi-yau-cicy-4-folds/'
conf = np.load('/kaggle/input/calabi-yau-cicy-4-folds/conf.npy')
hodge = np.load(os.path.join(path, 'hodge.npy'))
direct = np.load(os.path.join(path, 'direct.npy'))

X = conf
y = hodge
h11 = hodge[:,0]
h21 = hodge[:,1]
h22 = hodge[:,3]
h31 = hodge[:,2]

In [3]:
import sys
sys.path.append('/kaggle/input/calabi-yau-cicy-4-folds')
from CICY4_functions import data_generator

if not os.path.exists('saved_models'):
    os.makedirs('saved_models')
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
from sklearn.model_selection import train_test_split

def train_test(X, y, test_size):
    X_train,  X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 101, shuffle = True)
    
    X_train = torch.from_numpy(X_train.astype(np.float32))
    #only need reshape if the y dimension is 1
    #y_train = torch.from_numpy(y_train.astype(np.float32).reshape(-1, 1))
    y_train = torch.from_numpy(y_train.astype(np.float32))

    X_test = torch.from_numpy(X_test.astype(np.float32))
    #y_test = torch.from_numpy(y_test.astype(np.float32).reshape(-1, 1))
    y_test = torch.from_numpy(y_test.astype(np.float32))                         
    
    print(f'X_train shape: {X_train.shape}, \n y_train shape:{y_train.shape},\
                 \n X_test shape: {X_test.shape}, \n y_test shape:{y_test.shape}')
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = train_test(X, y, 0.2)

train_gen = lambda: data_generator(X_train, y_train)
test_gen = lambda: data_generator(X_test, y_test)

X_train shape: torch.Size([737197, 16, 20]), 
 y_train shape:torch.Size([737197, 4]),                 
 X_test shape: torch.Size([184300, 16, 20]), 
 y_test shape:torch.Size([184300, 4])


In [5]:
################## TRAINING LOOP #####################
def batch_gd_scheduler(model, criterion, optimizer, train_gen, test_gen, scheduler, 
             epochs, device, batch_size=128,  cnn = False):
  train_losses = np.zeros(epochs)
  test_losses = np.zeros(epochs)
  model.train()

  for i in range(epochs):
    t0 = datetime.now()
    train_loss = []
    for inputs, target in train_gen():
        if cnn:
            inputs = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)

        optimizer.zero_grad()
        out = model(inputs)
        loss = criterion(out, target)
        loss.backward()
        optimizer.step() 
        train_loss.append(loss.item())

    train_loss = np.mean(train_loss)

    test_loss = []
    for inputs, target in test_gen():
        if cnn:
            inputs = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        out = model(inputs)
        loss = criterion(out, target)
        test_loss.append(loss.item())
        
    test_loss = np.mean(test_loss) 
    train_losses[i] = train_loss
    test_losses[i] = test_loss
    
    #Apply scheduler after the train+validate parts
    before_lr = optimizer.param_groups[0]["lr"]
    scheduler.step(test_loss)
    after_lr = optimizer.param_groups[0]["lr"]

    dt = datetime.now()-t0
    print(f'Epoch: {i+1}/{epochs}, train loss: {train_loss: .4f}\
          test_loss: {test_loss: .4f}, duration: {dt}, \
          learning rate: {before_lr, after_lr}')
  return train_losses, test_losses

In [6]:
def train_from_scratch_or_load(load_model_weight, cnn = False):
    
    if load_model_weight==None:
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters())
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
        train_losses, test_losses=batch_gd_scheduler(model, criterion, 
                                                     optimizer,train_gen, test_gen,
                                                      scheduler, epochs, 
                                                       device = device, cnn=cnn)
        return train_losses, test_losses
    else:
        if torch.cuda.is_available():
            trained_model = torch.load(load_model_weight)     
        else:
            trained_model = torch.load(load_model_weight, map_location=torch.device('cpu'))       
        return trained_model

In [7]:
def plot_losses(train_losses, test_losses):
    # Plot the train loss and test loss per iteration
    plt.plot(train_losses, label='train loss')
    plt.plot(test_losses, label='test loss')
    plt.legend()
    plt.show()

In [8]:
################## ACCURACY CALCULATION #####################
def calc_accuracy_mr(model, train_gen, test_gen, device, cnn = False):
    model.eval()
    n_correct_h11 = 0.
    n_total_h11 = 0.
    n_correct_h21 = 0.
    n_total_h21 = 0.
    n_correct_h31 = 0.
    n_total_h31 = 0.
    n_correct_h22 = 0.
    n_total_h22 = 0.
    
    batch_size =128
    
    for inputs, target in train_gen():
        if cnn:
            inputs = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        #Perform the prediction
        #round up the prediction to the nearest integer
        ypred = torch.round(model(inputs))
        yp_h11 = ypred[:,0]
        yp_h21 = ypred[:,1]
        yp_h31 = ypred[:,2]
        yp_h22 = ypred[:,3]
        tg_h11 = target[:,0]
        tg_h21 = target[:,1]
        tg_h31 = target[:,2]
        tg_h22 = target[:,3]
        # update counts
        n_correct_h11 += (yp_h11 == tg_h11).sum().item()
        n_total_h11 += tg_h11.shape[0]
        n_correct_h21 += (yp_h21 == tg_h21).sum().item()
        n_total_h21 += tg_h21.shape[0]
        n_correct_h31 += (yp_h31 == tg_h31).sum().item()
        n_total_h31 += tg_h31.shape[0]
        n_correct_h22 += (yp_h22 == tg_h22).sum().item()
        n_total_h22 += tg_h22.shape[0]
    train_acc_h11 = n_correct_h11 / n_total_h11
    train_acc_h21 = n_correct_h21 / n_total_h21
    train_acc_h31 = n_correct_h31 / n_total_h31
    train_acc_h22 = n_correct_h22 / n_total_h22
    
    #TEST SET
    n_correct_h11 = 0.
    n_total_h11 = 0.
    n_correct_h21 = 0.
    n_total_h21 = 0.
    n_correct_h31 = 0.
    n_total_h31 = 0.
    n_correct_h22 = 0.
    n_total_h22 = 0.
    for inputs, target in test_gen():
        if cnn:
            inputs = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        
        # Forward pass
        ypred = torch.round(model(inputs))
        yp_h11 = ypred[:,0]
        yp_h21 = ypred[:,1]
        yp_h31 = ypred[:,2]
        yp_h22 = ypred[:,3]
        tg_h11 = target[:,0]
        tg_h21 = target[:,1]
        tg_h31 = target[:,2]
        tg_h22 = target[:,3]
        # update counts
        n_correct_h11 += (yp_h11 == tg_h11).sum().item()
        n_total_h11 += tg_h11.shape[0]
        n_correct_h21 += (yp_h21 == tg_h21).sum().item()
        n_total_h21 += tg_h21.shape[0]
        n_correct_h31 += (yp_h31 == tg_h31).sum().item()
        n_total_h31 += tg_h31.shape[0]
        n_correct_h22 += (yp_h22 == tg_h22).sum().item()
        n_total_h22 += tg_h22.shape[0]
    test_acc_h11 = n_correct_h11 / n_total_h11
    test_acc_h21 = n_correct_h21 / n_total_h21
    test_acc_h31 = n_correct_h31 / n_total_h31
    test_acc_h22 = n_correct_h22 / n_total_h22
    
    train_acc = [train_acc_h11,train_acc_h21,train_acc_h31,train_acc_h22]
    test_acc = [test_acc_h11,test_acc_h21,test_acc_h31,test_acc_h22]

    return train_acc, test_acc

# Model 1: GRU-based

In [9]:
class RNN_cicy4(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs):
        super(RNN_cicy4,self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.L = n_rnnlayers        
        #self.lstm = nn.LSTM(input_size = self.D,
        #                   hidden_size = self.M,
        #                   num_layers = self.L,
        #                   batch_first = True)    
        self.gru = nn.GRU(input_size = self.D,
                           hidden_size = self.M,
                           num_layers = self.L,
                           batch_first = True)
        self.fc1 = nn.Linear(self.M, 128)
        self.fc2 = nn.Linear(128, self.K)
       
    def forward(self, X):
        #input X is NxTxD
        #initial hidden states
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #c0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #get LSTM unit output:
        #output is NxTxM
        #out, _ = self.lstm(X, (h0,c0))
        out, _ = self.gru(X, h0)   
        #we only want h(T) at the final time step
        out = out[:, -1, :]
        out = F.relu(self.fc1(out))
        out = self.fc2(out)

        return out

In [10]:
gru_model_weight = '/kaggle/input/calabi-yau-cicy-4-folds/GRU_cicy4_Hodge_v5.pt'
trained_gru = train_from_scratch_or_load(gru_model_weight, cnn = False)

In [11]:
prev_train_acc, prev_test_acc = calc_accuracy_mr(trained_gru, train_gen , test_gen, device = device, cnn= False)
print(f'Train accuracy for h11:{prev_train_acc[0]:.4f}, Test accuracy for h11: {prev_test_acc[0]:.4f}')
print(f'Train accuracy for h21:{prev_train_acc[1]:.4f}, Test accuracy for h21: {prev_test_acc[1]:.4f}')
print(f'Train accuracy for h31:{prev_train_acc[2]:.4f}, Test accuracy for h31: {prev_test_acc[2]:.4f}')
print(f'Train accuracy for h22:{prev_train_acc[3]:.4f}, Test accuracy for h22: {prev_test_acc[3]:.4f}')

Train accuracy for h11:0.7202, Test accuracy for h11: 0.7195
Train accuracy for h21:0.6606, Test accuracy for h21: 0.6564
Train accuracy for h31:0.3886, Test accuracy for h31: 0.3659
Train accuracy for h22:0.1638, Test accuracy for h22: 0.1545


# Model 2: CNN-GRU hybrid

In [12]:
##################### CNN ###############################
class CNN_block(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,128, 4, 1)
        #self.bn1 = nn.BatchNorm2d(128)
        self.conv2 = nn.Conv2d(128,64, 3, 1)
        #self.bn2 = nn.BatchNorm2d(64)
        self.mxpool = nn.MaxPool2d(2,2)
        self.flat = nn.Flatten()
        self.conv_total = nn.Sequential(
            self.conv1,
            #self.bn1,
            self.mxpool,
            #self.bn2,
            self.conv2,
            self.mxpool,
        )

    def forward(self,x):
        x = F.relu(self.conv_total(x))
        #reshape is the same as flat(x)
        #x = x.reshape(x.shape[0], -1)
        x = self.flat(x)
        
        return x

In [13]:
class RNN_block(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs):
        super(RNN_block,self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.L = n_rnnlayers        
        #self.lstm = nn.LSTM(input_size = self.D,
        #                   hidden_size = self.M,
        #                   num_layers = self.L,
        #                   batch_first = True)    
        self.gru = nn.GRU(input_size = self.D,
                           hidden_size = self.M,
                           num_layers = self.L,
                           batch_first = True)
        #self.fc1 = nn.Linear(self.M, 128)
        #self.fc2 = nn.Linear(128, self.K)
    def forward(self, X):
        #input X is NxTxD
        #initial hidden states
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #c0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #get LSTM unit output:
        #output is NxTxM
        #out, _ = self.lstm(X, (h0,c0))
        out, _ = self.gru(X, h0)   
        #we only want h(T) at the final time step
        # output is now of shape (N, M)
        out = out[:, -1, :]

        return out

In [14]:
class CNN_RNN_hybrid(nn.Module):
    def __init__(self, cnn_block, rnn_block, feat_vec_size):
        super(CNN_RNN_hybrid, self).__init__()
        self.cnn_block = cnn_block
        self.rnn_block = rnn_block
        self.feat_vec_size = feat_vec_size
        self.fc1 = nn.Linear(self.feat_vec_size, 1024)
        self.fc2 = nn.Linear(1024, 4)
        
    def forward(self, x):
        #output of cnn block is (N,384)
        x1 = x.view(-1,1, 16,20)
        x1 = self.cnn_block(x1)
        #output of rnn block is (N,M = 64)
        x2 = self.rnn_block(x)
        #concatenate the 2 outputs to produce a feat vec (N, M+384)
        xx = torch.cat([x1, x2], dim = 1)
        # pass through linear layers
        xx = self.fc1(xx)
        #final output is 4
        xx = self.fc2(xx)
        
        return xx

In [15]:
#gru_block = RNN_block(20, 256, 2, 4)
#cnn_block = CNN_block()
cnn_gru_model_weight = '/kaggle/input/calabi-yau-cicy-4-folds/CNN_GRU_hybrid_cicy4_Hodge_v2.pt'
trained_cnn_gru = train_from_scratch_or_load(cnn_gru_model_weight, cnn = False)
trained_cnn_gru 

CNN_RNN_hybrid(
  (cnn_block): CNN_block(
    (conv1): Conv2d(1, 128, kernel_size=(4, 4), stride=(1, 1))
    (conv2): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
    (mxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (flat): Flatten(start_dim=1, end_dim=-1)
    (conv_total): Sequential(
      (0): Conv2d(1, 128, kernel_size=(4, 4), stride=(1, 1))
      (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (2): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (rnn_block): RNN_block(
    (gru): GRU(20, 256, num_layers=2, batch_first=True)
  )
  (fc1): Linear(in_features=640, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=4, bias=True)
)

In [38]:
train_acc_hyd, test_acc_hyd = calc_accuracy_mr(trained_cnn_gru, train_gen , test_gen, device = device, cnn= False)
print(f'Train accuracy for h11:{train_acc_hyd[0]:.4f}, Test accuracy for h11: {test_acc_hyd[0]:.4f}')
print(f'Train accuracy for h21:{train_acc_hyd[1]:.4f}, Test accuracy for h21: {test_acc_hyd[1]:.4f}')
print(f'Train accuracy for h31:{train_acc_hyd[2]:.4f}, Test accuracy for h31: {test_acc_hyd[2]:.4f}')
print(f'Train accuracy for h22:{train_acc_hyd[3]:.4f}, Test accuracy for h22: {test_acc_hyd[3]:.4f}')

Train accuracy for h11:0.8479, Test accuracy for h11: 0.8434
Train accuracy for h21:0.7524, Test accuracy for h21: 0.7426
Train accuracy for h31:0.5719, Test accuracy for h31: 0.5254
Train accuracy for h22:0.2230, Test accuracy for h22: 0.1979


# Ensemble model

In [34]:
################## ACCURACY CALCULATION #####################
def calc_ensemble_acc(model1, model2, weighted_average, train_gen, test_gen, device):
    model1.eval()
    model2.eval()

    n_correct_h11 = 0.
    n_total_h11 = 0.
    n_correct_h21 = 0.
    n_total_h21 = 0.
    n_correct_h31 = 0.
    n_total_h31 = 0.
    n_correct_h22 = 0.
    n_total_h22 = 0.
    w1,w2 = weighted_average
    batch_size =128
    
    for inputs, target in train_gen():
        inputs, target = inputs.to(device), target.to(device)
        #Perform the prediction
        #round up the prediction to the nearest integer
        ypred1 = model1(inputs)*w1
        ypred2 = model2(inputs)*w2
        ypred = torch.round((ypred1 + ypred2))
        yp_h11 = ypred[:,0]
        yp_h21 = ypred[:,1]
        yp_h31 = ypred[:,2]
        yp_h22 = ypred[:,3]
        tg_h11 = target[:,0]
        tg_h21 = target[:,1]
        tg_h31 = target[:,2]
        tg_h22 = target[:,3]
        # update counts
        n_correct_h11 += (yp_h11 == tg_h11).sum().item()
        n_total_h11 += tg_h11.shape[0]
        n_correct_h21 += (yp_h21 == tg_h21).sum().item()
        n_total_h21 += tg_h21.shape[0]
        n_correct_h31 += (yp_h31 == tg_h31).sum().item()
        n_total_h31 += tg_h31.shape[0]
        n_correct_h22 += (yp_h22 == tg_h22).sum().item()
        n_total_h22 += tg_h22.shape[0]
    train_acc_h11 = n_correct_h11 / n_total_h11
    train_acc_h21 = n_correct_h21 / n_total_h21
    train_acc_h31 = n_correct_h31 / n_total_h31
    train_acc_h22 = n_correct_h22 / n_total_h22
    
    train_acc = [train_acc_h11, train_acc_h21,train_acc_h31, train_acc_h22]
    
    #TEST SET
    n_correct_h11 = 0.
    n_total_h11 = 0.
    n_correct_h21 = 0.
    n_total_h21 = 0.
    n_correct_h31 = 0.
    n_total_h31 = 0.
    n_correct_h22 = 0.
    n_total_h22 = 0.
    for inputs, target in test_gen():
        inputs, target = inputs.to(device), target.to(device)
        
        # Forward pass
        ypred1 = model1(inputs)*w1
        ypred2 = model2(inputs)*w2
        ypred = torch.round((ypred1 + ypred2))
        yp_h11 = ypred[:,0]
        yp_h21 = ypred[:,1]
        yp_h31 = ypred[:,2]
        yp_h22 = ypred[:,3]
        tg_h11 = target[:,0]
        tg_h21 = target[:,1]
        tg_h31 = target[:,2]
        tg_h22 = target[:,3]
        # update counts
        n_correct_h11 += (yp_h11 == tg_h11).sum().item()
        n_total_h11 += tg_h11.shape[0]
        n_correct_h21 += (yp_h21 == tg_h21).sum().item()
        n_total_h21 += tg_h21.shape[0]
        n_correct_h31 += (yp_h31 == tg_h31).sum().item()
        n_total_h31 += tg_h31.shape[0]
        n_correct_h22 += (yp_h22 == tg_h22).sum().item()
        n_total_h22 += tg_h22.shape[0]
    test_acc_h11 = n_correct_h11 / n_total_h11
    test_acc_h21 = n_correct_h21 / n_total_h21
    test_acc_h31 = n_correct_h31 / n_total_h31
    test_acc_h22 = n_correct_h22 / n_total_h22
    test_acc = [test_acc_h11, test_acc_h21, test_acc_h31, test_acc_h22]
    return  train_acc, test_acc

# Accuracy comparison

In [41]:
#CNN-GRU-GRU: 50-50 ensemble
print('CNN-GRU-GRU: 50-50 ensemble')
train_acc, test_acc = calc_ensemble_acc(trained_gru,  trained_cnn_gru, [0.5,0.5], train_gen, test_gen, device)
print(f'Train accuracy for h11:{train_acc[0]:.4f}, Test accuracy for h11: {test_acc[0]:.4f}')
print(f'Train accuracy for h21:{train_acc[1]:.4f}, Test accuracy for h21: {test_acc[1]:.4f}')
print(f'Train accuracy for h31:{train_acc[2]:.4f}, Test accuracy for h31: {test_acc[2]:.4f}')
print(f'Train accuracy for h22:{train_acc[3]:.4f}, Test accuracy for h22: {test_acc[3]:.4f}')

CNN-GRU-GRU: 50-50 ensemble
Train accuracy for h11:0.8121, Test accuracy for h11: 0.8090
Train accuracy for h21:0.7297, Test accuracy for h21: 0.7203
Train accuracy for h31:0.5207, Test accuracy for h31: 0.4871
Train accuracy for h22:0.2130, Test accuracy for h22: 0.1941


In [42]:
#CNN-GRU-GRU: 70-30 ensemble
print('CNN-GRU-GRU: 70-30 ensemble')
train_acc, test_acc = calc_ensemble_acc(trained_gru,  trained_cnn_gru, [0.3,0.7], train_gen, test_gen, device)
print(f'Train accuracy for h11:{train_acc[0]:.4f}, Test accuracy for h11: {test_acc[0]:.4f}')
print(f'Train accuracy for h21:{train_acc[1]:.4f}, Test accuracy for h21: {test_acc[1]:.4f}')
print(f'Train accuracy for h31:{train_acc[2]:.4f}, Test accuracy for h31: {test_acc[2]:.4f}')
print(f'Train accuracy for h22:{train_acc[3]:.4f}, Test accuracy for h22: {test_acc[3]:.4f}')

CNN-GRU-GRU: 70-30 ensemble
Train accuracy for h11:0.8406, Test accuracy for h11: 0.8375
Train accuracy for h21:0.7481, Test accuracy for h21: 0.7381
Train accuracy for h31:0.5659, Test accuracy for h31: 0.5252
Train accuracy for h22:0.2273, Test accuracy for h22: 0.2050


In [43]:
#CNN-GRU-GRU: 70-30 ensemble
print('CNN-GRU-GRU: 60-40 ensemble')
train_acc, test_acc = calc_ensemble_acc(trained_gru,  trained_cnn_gru, [0.4,0.6], train_gen, test_gen, device)
print(f'Train accuracy for h11:{train_acc[0]:.4f}, Test accuracy for h11: {test_acc[0]:.4f}')
print(f'Train accuracy for h21:{train_acc[1]:.4f}, Test accuracy for h21: {test_acc[1]:.4f}')
print(f'Train accuracy for h31:{train_acc[2]:.4f}, Test accuracy for h31: {test_acc[2]:.4f}')
print(f'Train accuracy for h22:{train_acc[3]:.4f}, Test accuracy for h22: {test_acc[3]:.4f}')

CNN-GRU-GRU: 60-40 ensemble
Train accuracy for h11:0.8272, Test accuracy for h11: 0.8237
Train accuracy for h21:0.7399, Test accuracy for h21: 0.7303
Train accuracy for h31:0.5459, Test accuracy for h31: 0.5080
Train accuracy for h22:0.2212, Test accuracy for h22: 0.1994


In [39]:
print('JUST CNN-GRU MODEL')
print(f'Train accuracy for h11:{train_acc_hyd[0]:.4f}, Test accuracy for h11: {test_acc_hyd[0]:.4f}')
print(f'Train accuracy for h21:{train_acc_hyd[1]:.4f}, Test accuracy for h21: {test_acc_hyd[1]:.4f}')
print(f'Train accuracy for h31:{train_acc_hyd[2]:.4f}, Test accuracy for h31: {test_acc_hyd[2]:.4f}')
print(f'Train accuracy for h22:{train_acc_hyd[3]:.4f}, Test accuracy for h22: {test_acc_hyd[3]:.4f}')

JUST CNN-GRU MODEL
Train accuracy for h11:0.8479, Test accuracy for h11: 0.8434
Train accuracy for h21:0.7524, Test accuracy for h21: 0.7426
Train accuracy for h31:0.5719, Test accuracy for h31: 0.5254
Train accuracy for h22:0.2230, Test accuracy for h22: 0.1979


In [40]:
print('JUST GRU MODEL')
print(f'Train accuracy for h11:{prev_train_acc[0]:.4f}, Test accuracy for h11: {prev_test_acc[0]:.4f}')
print(f'Train accuracy for h21:{prev_train_acc[1]:.4f}, Test accuracy for h21: {prev_test_acc[1]:.4f}')
print(f'Train accuracy for h31:{prev_train_acc[2]:.4f}, Test accuracy for h31: {prev_test_acc[2]:.4f}')
print(f'Train accuracy for h22:{prev_train_acc[3]:.4f}, Test accuracy for h22: {prev_test_acc[3]:.4f}')

JUST GRU MODEL
Train accuracy for h11:0.7202, Test accuracy for h11: 0.7195
Train accuracy for h21:0.6606, Test accuracy for h21: 0.6564
Train accuracy for h31:0.3886, Test accuracy for h31: 0.3659
Train accuracy for h22:0.1638, Test accuracy for h22: 0.1545


# Get predictions

In [86]:
X_train.shape[0]//128

5759

In [63]:
################## ACCURACY CALCULATION #####################
def get_pred(model1, model2, weighted_average, train_gen, test_gen, device):
    model1.eval()
    model2.eval()

    ypred_train_list = []
    train_target = []
    w1,w2 = weighted_average
    batch_size =128
    
    for inputs, target in train_gen():
        ypred_batch = []
        target_batch = []
        inputs, target = inputs.to(device), target.to(device)
        #Perform the prediction
        #round up the prediction to the nearest integer
        ypred1 = model1(inputs)*w1
        ypred2 = model2(inputs)*w2
        ypred = torch.round((ypred1 + ypred2))
        ypred = ypred.cpu().detach().numpy()
        target = target.cpu().detach().numpy()
        ypred_batch.append(ypred)
        target_batch.append(target)
     
    ypred_train_list.append(ypred_batch)
    train_target.append(target_batch)
    ypred_train_list= np.array(ypred_train_list)
    train_target = np.array(train_target)
    print(f'Train target shape: {train_target.shape}, prediction shape: {ypred_train_list.shape}')
    #TEST SET
    ypred_test_list = []
    test_target = []
    for inputs, target in test_gen():
        ypred_batch = []
        target_batch = []
        inputs, target = inputs.to(device), target.to(device)  
        # Forward pass
        ypred1 = model1(inputs)*w1
        ypred2 = model2(inputs)*w2
        ypred = torch.round((ypred1 + ypred2))
        ypred = ypred.cpu().detach().numpy()
        target = target.cpu().detach().numpy()
        ypred_batch.append(ypred)
        target_batch.append(target)
     
    ypred_test_list.append(ypred_batch)
    test_target.append(target_batch)
    ypred_test_list= np.array(ypred_test_list)
    test_target = np.array(test_target)
    print(f'Test target shape: {test_target.shape}, prediction shape: {ypred_test_list.shape}')
    
    return  ypred_train_list, train_target,  ypred_test_list,  test_target

In [64]:
#CNN-GRU-GRU: 70-30 ensemble
print('CNN-GRU-GRU: 70-30 ensemble')
ypred_train_list, train_target,  ypred_test_list,  test_target = get_pred(trained_gru, trained_cnn_gru, [0.3,0.7], train_gen, test_gen, device)

CNN-GRU-GRU: 70-30 ensemble
Train target shape: (1, 1, 128, 4), prediction shape: (1, 1, 128, 4)
Test target shape: (1, 1, 128, 4), prediction shape: (1, 1, 128, 4)


In [67]:
ypred_train_list[0,0,0], train_target[0,0,0]

(array([ 10.,   0.,  33., 217.], dtype=float32),
 array([ 10.,   0.,  33., 216.], dtype=float32))

In [68]:
ypred_test_list[0,0,0], test_target[0,0,0]

(array([  6.,   1.,  54., 280.], dtype=float32),
 array([  6.,   0.,  53., 280.], dtype=float32))

In [69]:
ypred_test_list[0,0,127], test_target[0,0,127]

(array([  6.,   1.,  52., 273.], dtype=float32),
 array([  6.,   3.,  53., 274.], dtype=float32))