# CICY4: (ANN, CNN, RNN) ensemble for predicting h22

In [1]:
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
sns.set_style("darkgrid")

import torch
import torch.nn as nn
import torch.nn.functional as F

In this notebook, we used 3 trained models for predicting h22 to create an ensemble which will be used for predicting h22. Out of the 4 Hodge numbers, h22 is the hardest to predict with the best accuracy obtained so far being around 10%. 

# Load data

In [2]:
# load data
path = '/kaggle/input/calabi-yau-cicy-4-folds'
conf = np.load(os.path.join(path, 'conf.npy'))
hodge = np.load(os.path.join(path, 'hodge.npy'))
direct = np.load(os.path.join(path, 'direct.npy'))
conf.shape, hodge.shape, direct.shape

((921497, 16, 20), (921497, 4), (921497,))

In [3]:
X = conf
h21 = hodge[:,1]
h22 = hodge[:,3]
h31 = hodge[:,2]

# Utility functions: Model, Train Loop, Accuracy

In [4]:
if not os.path.exists('saved_models'):
    os.makedirs('saved_models')

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [6]:
import sys
sys.path.append('/kaggle/input/calabi-yau-cicy-4-folds')
from CICY4_functions import data_generator, batch_gd,  calc_accuracy

In [7]:
from sklearn.model_selection import train_test_split
def train_test(X, y):
    X_train,  X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 101, shuffle = True)
    
    X_train = torch.from_numpy(X_train.astype(np.float32))
    y_train = torch.from_numpy(y_train.astype(np.float32).reshape(-1, 1))

    X_test = torch.from_numpy(X_test.astype(np.float32))
    y_test = torch.from_numpy(y_test.astype(np.float32).reshape(-1, 1))
    
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    return X_train, X_test, y_train, y_test

In [8]:
def plot_losses(train_losses, test_losses):
    # Plot the train loss and test loss per iteration
    plt.plot(train_losses, label='train loss')
    plt.plot(test_losses, label='test loss')
    plt.legend()
    plt.show()

In [9]:
X_train, X_test, y_train, y_test = train_test(X, h22)
train_gen = lambda: data_generator(X_train, y_train)
test_gen = lambda: data_generator(X_test, y_test)

torch.Size([737197, 16, 20]) torch.Size([737197, 1]) torch.Size([184300, 16, 20]) torch.Size([184300, 1])


In [10]:
#https://discuss.pytorch.org/t/loading-a-saved-model-for-continue-training/17244/3
def load_checkpoint(model, optimizer, losslogger, filename='checkpoint.pth.tar'):
    # Note: Input model & optimizer should be pre-defined.  This routine only updates their states.
    start_epoch = 0
    if os.path.isfile(filename):
        print("=> loading checkpoint '{}'".format(filename))
        checkpoint = torch.load(filename)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        #losslogger = checkpoint['losslogger']
        print("=> loaded checkpoint '{}' (epoch {})"
                  .format(filename, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(filename))

    return model, optimizer, start_epoch

#TO LOAD:
#model, optimizer, start_epoch = load_checkpoint(model, optimizer, losslogger)
#model = model.to(device)
# now individually transfer the optimizer parts...
#for state in optimizer.state.values():
#    for k, v in state.items():
#        if isinstance(v, torch.Tensor):
#            state[k] = v.to(device)

In [25]:
def train_or_load(load_model_weight,  cnn = False):
    
    if load_model_weight==None:
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters())
        train_losses, test_losses=batch_gd(model, criterion, optimizer,train_gen, test_gen, epochs, 
                                           device = device, cnn=cnn)
        return train_losses, test_losses
    else:
        if torch.cuda.is_available():
            trained_model = torch.load(load_model_weight)     
        else:
            trained_model = torch.load(load_model_weight, map_location=torch.device('cpu'))       
        return trained_model

# ANN Model for h22

In [None]:
#FIRST MODEL: SAME AS H11 PREDICTION
from CICY4_functions import ANN_cicy4_h11

In [28]:
ann_model_weight = '/kaggle/input/calabi-yau-cicy-4-folds/ANN_cicy4_h22.pt'
trained_ann = train_or_load(ann_model_weight, cnn = False)

In [29]:
train_acc, test_acc = calc_accuracy(trained_ann, train_gen, test_gen, device = device, cnn= False)
print(f"Train acc: {train_acc:.4f}, Test acc: {test_acc:.4f}")

Train acc: 0.0951, Test acc: 0.0761


# RNN Model for h22 with LSTM layers

In [12]:
class RNN_cicy4(nn.Module):
    def __init__(self, n_inputs, n_hidden, n_rnnlayers, n_outputs, is_lstm=True):
        super(RNN_cicy4,self).__init__()
        self.D = n_inputs
        self.M = n_hidden
        self.K = n_outputs
        self.L = n_rnnlayers
        self.is_lstm = is_lstm
        
        self.lstm = nn.LSTM(input_size = self.D,
                           hidden_size = self.M,
                           num_layers = self.L,
                           batch_first = True)
        
        self.gru = nn.GRU(input_size = self.D,
                           hidden_size = self.M,
                           num_layers = self.L,
                           batch_first = True)
        self.fc1 = nn.Linear(self.M, 128)
        self.fc2 = nn.Linear(128, self.K)
       
    def forward(self, X):
        #input X is NxTxD
        #initial hidden states
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        c0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        #get LSTM unit output:
        #output is NxTxM
        if self.is_lstm:
            out, _ = self.lstm(X, (h0,c0))
        else:
            out, _ = self.gru(X, h0)
        
        #we only want h(T) at the final time step
        out = out[:, -1, :]
        out = F.relu(self.fc1(out))
        out = self.fc2(out)

        return out

In [26]:
#lstm_model_weight = None
lstm_model_weight = '/kaggle/input/calabi-yau-cicy-4-folds/LSTM_cicy4_h22.pt'
trained_lstm = train_or_load(lstm_model_weight, cnn = False)

In [27]:
train_acc, test_acc = calc_accuracy(trained_lstm, train_gen, test_gen, device = device, cnn= False)
print(f"Train acc: {train_acc:.4f}, Test acc: {test_acc:.4f}")

Train acc: 0.1009, Test acc: 0.0996


In [None]:
#state = {'epoch': epoch + 1, 'state_dict': model1.state_dict(), 'optimizer': optimizer.state_dict(),}
#torch.save(state, '/kaggle/working/saved_models/LSTM_cicy4_h22S.pt')

# CNN Model for h22 

In [31]:
#same as CNN for h11
class CNN_cicy4(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,128, 4, 1)
        self.conv2 = nn.Conv2d(128,64, 3, 1)
        self.mxpool = nn.MaxPool2d(2,2)
        self.flat = nn.Flatten()
        self.conv_total = nn.Sequential(
            self.conv1,
            self.mxpool,
            self.conv2,
            self.mxpool,
        )
        self.linear1 = nn.Linear(384,512)
        self.linear2 = nn.Linear(512,128)
        self.linear3 = nn.Linear(128,1)


    def forward(self,x):
        x = F.relu(self.conv_total(x))
        x = self.flat(x)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        
        return x


In [32]:
#cnn_model_weight = None
cnn_model_weight = '/kaggle/input/calabi-yau-cicy-4-folds/CNN_cicy4_h22.pt'
trained_cnn = train_or_load(cnn_model_weight, cnn = True)

In [33]:
train_acc, test_acc = calc_accuracy(trained_cnn, train_gen, test_gen, device = device, cnn= True)
print(f"Train acc: {train_acc:.4f}, Test acc: {test_acc:.4f}")

Train acc: 0.0583, Test acc: 0.0551


In [None]:
#state = {'epoch': epoch + 1, 'state_dict': model2.state_dict(),  'optimizer': optimizer.state_dict(), }
#torch.save(state, '/kaggle/working/saved_models/CNN_cicy4_h22S.pt')

# Ensemble prediction

## No weighted average, just the mean of 3 models

In [58]:
################## ACCURACY CALCULATION #####################
def calc_ensemble_acc(model1, model2, model3, train_gen, test_gen, device):
    model1.eval()
    model2.eval()
    model3.eval()
    n_correct = 0.
    n_total = 0.
    batch_size =128
    for inputs, target in train_gen():
        inputs2 = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        #model 1 must be a cnn
        yp1 = model1(inputs2)
        yp2 = model2(inputs)
        yp3 = model3(inputs)
        ypred = torch.round((yp1+yp2+yp3)/3)
    
        n_correct += (ypred == target).sum().item()
        n_total += target.shape[0]
    train_acc = n_correct / n_total
    
    #TEST SET
    n_correct = 0.
    n_total = 0
    for inputs, target in test_gen():
        inputs2 = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        #model 1 must be a cnn
        yp1 = model1(inputs2)
        yp2 = model2(inputs)
        yp3 = model3(inputs)
        ypred_test = torch.round((yp1+yp2+yp3)/3)

        # update counts
        n_correct += (ypred_test == target).sum().item()
        n_total += target.shape[0]
        
        test_acc = n_correct / n_total

    return train_acc, test_acc

In [59]:
#recall that the first model must be a CNN
train_acc, test_acc = calc_ensemble_acc(trained_cnn, trained_lstm, trained_ann, train_gen, test_gen, device)
print(f'Train acc: {train_acc}, test acc: {test_acc}')

Train acc: 0.10066444912311165, test acc: 0.08885293606671299


In [65]:
print(f'Train acc: {train_acc*100:.4f}, test acc: {test_acc*100:.4f}')

Train acc: 10.0664, test acc: 8.8853


## Use weighted average for the 3 models

In [63]:
################## ACCURACY CALCULATION #####################
def calc_ensemble_acc_w(model1, model2, model3, weighted_average, train_gen, test_gen, device):
    model1.eval()
    model2.eval()
    model3.eval()
    n_correct = 0.
    n_total = 0.
    batch_size =128
    w1,w2,w3 = weighted_average
    for inputs, target in train_gen():
        inputs2 = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        #model 1 must be a cnn
        yp1 = model1(inputs2)*w1
        yp2 = model2(inputs)*w2
        yp3 = model3(inputs)*w3
        ypred = torch.round(yp1+yp2+yp3)
    
        n_correct += (ypred == target).sum().item()
        n_total += target.shape[0]
    train_acc = n_correct / n_total
    
    #TEST SET
    n_correct = 0.
    n_total = 0
    for inputs, target in test_gen():
        inputs2 = inputs.view(batch_size,1,16,20)
        inputs, target = inputs.to(device), target.to(device)
        #model 1 must be a cnn
        yp1 = model1(inputs2)*w1
        yp2 = model2(inputs)*w2
        yp3 = model3(inputs)*w3
        ypred_test = torch.round(yp1+yp2+yp3)

        # update counts
        n_correct += (ypred_test == target).sum().item()
        n_total += target.shape[0]
        
        test_acc = n_correct / n_total

    return train_acc, test_acc

In [66]:
#recall that the first model must be a CNN
train_accw, test_accw = calc_ensemble_acc_w(trained_cnn, trained_lstm, trained_ann, [0.2, 0.4, 0.4], train_gen, test_gen, device)
print(f'Train acc: {train_accw*100:.4f}, test acc: {test_accw*100:.4f}')

Train acc: 11.0756, test acc: 9.6247


In [67]:
#recall that the first model must be a CNN
train_accw, test_accw = calc_ensemble_acc_w(trained_cnn, trained_lstm, trained_ann, [0.3, 0.35, 0.35], train_gen, test_gen, device)
print(f'Train acc: {train_accw*100:.4f}, test acc: {test_accw*100:.4f}')

Train acc: 10.3194, test acc: 9.0949
