In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
from skorch import NeuralNetRegressor
from sklearn.model_selection import GridSearchCV

In [2]:
# load training data
Xtr_loadpath = 'Xtr.csv'
Xts_loadpath = 'Xts.csv'
ytr_loadpath = 'ytr.csv'

Xtr = np.loadtxt(Xtr_loadpath, delimiter=",")
Xts = np.loadtxt(Xts_loadpath, delimiter=",")
ytr = np.loadtxt(ytr_loadpath, delimiter=",")

In [3]:
# standardize the training data
Xtr_mean = np.mean(Xtr,axis=0)
Xtr_std = np.std(Xtr,axis=0)
ytr_mean = np.mean(ytr)
ytr_std = np.std(ytr)

Xtr_standardized = ((Xtr-Xtr_mean[None,:])/Xtr_std[None,:]) # revise this line as needed
Xts_standardized = ((Xts-Xtr_mean[None,:])/Xtr_std[None,:]) # revise this line as needed
ytr_standardized = ((ytr-ytr_mean)/ytr_std)

# save the standardized training data
Xtr_savepath = 'Xtr_pytorch.csv'
Xts_savepath = 'Xts_pytorch.csv'
ytr_savepath = 'ytr_pytorch.csv'
yts_hat_savepath = 'yts_hat_pytorch.csv'

np.savetxt(Xtr_savepath, Xtr_standardized, delimiter=",")
np.savetxt(Xts_savepath, Xts_standardized, delimiter=",")
np.savetxt(ytr_savepath, ytr_standardized, delimiter=",")

In [4]:
# # feature selection

# Xtr_feat_sel = Xtr_standardized
# Xtr_feat_sel[:,1] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,3] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,4] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,7] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,10] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,14] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,15] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,16] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,17] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,18] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,19] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,22] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,23] = np.ones(Xtr_feat_sel.shape[0])*1e-5
# Xtr_feat_sel[:,24] = np.ones(Xtr_feat_sel.shape[0])*1e-5

In [5]:
# Convert the numpy arrays to PyTorch tensors
Xtr_torch = torch.Tensor(Xtr_standardized)
ytr_torch = torch.Tensor(ytr)

batch_size = 100  # size of each batch

# Create a training Dataset
train_ds = torch.utils.data.TensorDataset(Xtr_torch, ytr_torch)
# Creates a training DataLoader from this Dataset
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True) 


In [6]:
# # create a model
# d_in = Xtr.shape[1]
# d_h = 100
# d_out = 1

# class NeuralNet(nn.Module):
#     def __init__(self,din,dh1,dh2,dh3,dout):
#         super(NeuralNet, self).__init__()
#         self.Dense1 = nn.Linear(din,dh1)
#         self.Dense2 = nn.Linear(dh1,dh2)
#         self.Dense3 = nn.Linear(dh2,dh3)
#         self.Dense4 = nn.Linear(dh3,dout)
#         self.ReLU = nn.ReLU()
        
#     def forward(self,x):
#         x = self.ReLU(self.Dense1(x))
#         x = self.ReLU(self.Dense2(x))        
#         x = self.ReLU(self.Dense3(x))        
#         out = self.Dense4(x)
#         return out

# model = NeuralNet(din=d_in, dh1=d_h, dh2=64*2, dh3=30*2, dout=d_out)

# print(str(model))

In [7]:
nin = Xtr.shape[1]
nout = 1
#nh = 256

# model = nn.Sequential(
#     nn.Linear(nin, 128*5),
#     nn.ReLU(),
#     nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
#     nn.Linear(128*5, 64*5),
#     nn.ReLU(),
#     nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
#     nn.Linear(64*5, 32*5),
#     nn.ReLU(),
#     nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
#     nn.Linear(32*5, 16*5),
#     nn.ReLU(),
#     nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
#     nn.Linear(16*5, 8*5),
#     nn.ReLU(),
#     nn.Linear(8*5, 4*5),
#     nn.ReLU(),
#     nn.Linear(4*5, 2*5),
#     nn.ReLU(),
#     nn.Linear(2*5, nout)
# )

model = nn.Sequential(
    nn.Linear(nin, 2187),
    nn.ReLU(),
    nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
    nn.Linear(2187, 729),
    nn.ReLU(),
    nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
    nn.Linear(729, 243),
    nn.ReLU(),
    nn.Dropout(p=0.5),  # Add dropout layer with probability 0.2
    nn.Linear(243, 81),
    nn.ReLU(),
    nn.Linear(81, 27),
    nn.ReLU(),
    nn.Linear(27, 9),
    nn.ReLU(),
    nn.Linear(9, nout)
)

print(str(model))

Sequential(
  (0): Linear(in_features=26, out_features=2187, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=2187, out_features=729, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=729, out_features=243, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.5, inplace=False)
  (9): Linear(in_features=243, out_features=81, bias=True)
  (10): ReLU()
  (11): Linear(in_features=81, out_features=27, bias=True)
  (12): ReLU()
  (13): Linear(in_features=27, out_features=9, bias=True)
  (14): ReLU()
  (15): Linear(in_features=9, out_features=1, bias=True)
)


In [8]:
# Choosing the optimizer and loss function

epochs = 350
lrate = 2.5e-6
decay = lrate/epochs
lambda1 = lambda epoch: (1-decay)*epoch

opt = optim.Adam(model.parameters(), lr=lrate)
scheduler = optim.lr_scheduler.LambdaLR(opt, lr_lambda=lambda1)
criterion = nn.MSELoss()
#criterion = nn.HuberLoss(reduction='mean', delta=0.6)
#criterion = nn.L1Loss()

In [9]:
# training the model
num_epoch = epochs

a_tr_loss = np.zeros([num_epoch])
a_tr_Rsq = np.zeros([num_epoch])

for epoch in range(num_epoch):
    #if epoch == 100:
        #opt = optim.Adam(model.parameters(), lr=1e-6)
    #if epoch == 100:
        #opt = optim.Adam(model.parameters(), lr=8e-7)
    #if epoch == 150:
        #opt = optim.Adam(model.parameters(), lr=4e-7)
    #if epoch == 200:
        #opt = optim.Adam(model.parameters(), lr=2e-7)
    #if epoch == 250:
        #opt = optim.Adam(model.parameters(), lr=1e-7)
    model.train() # put model in training mode
    batch_loss_tr = []
    batch_Rsq_tr = []
    # iterate over training set
    for train_iter, data in enumerate(train_loader):
        x_batch,y_batch = data
        
        y_batch = y_batch.view(-1,1)
        #y_batch = y_batch.type(torch.long)
        
        out = model(x_batch)
        # Compute Loss
        loss = criterion(out,y_batch.type(torch.float))
        batch_loss_tr.append(loss.item())
        # Compute R-square
        Rsq = r2_score(y_batch.type(torch.float).detach().numpy(), out.detach().numpy())
        batch_Rsq_tr.append(Rsq.item())
        # Compute gradients using back propagation
        opt.zero_grad()
        loss.backward()
        # Take an optimization 'step'
        opt.step()
        
    # Take scheduler step
    scheduler.step()
        
    a_tr_loss[epoch] = np.mean(batch_loss_tr) # Compute average loss over epoch
    a_tr_Rsq[epoch] = np.mean(batch_Rsq_tr)
    print('Epoch: {0:2d}   Train Loss: {1:.3f}   '.format(epoch+1, a_tr_loss[epoch])
         + 'R^2: {0:.3f}   '.format(a_tr_Rsq[epoch])
         )
    
with torch.no_grad():
    predict = model(torch.Tensor(Xtr_standardized)).detach().numpy().ravel()

r2 = r2_score(ytr,predict)
print('training R2: ',r2)
        

Epoch:  1   Train Loss: 101.007   R^2: -0.015   
Epoch:  2   Train Loss: 101.007   R^2: -0.011   
Epoch:  3   Train Loss: 101.001   R^2: -0.012   
Epoch:  4   Train Loss: 100.992   R^2: -0.017   
Epoch:  5   Train Loss: 100.965   R^2: -0.014   
Epoch:  6   Train Loss: 100.884   R^2: -0.018   
Epoch:  7   Train Loss: 100.617   R^2: -0.008   
Epoch:  8   Train Loss: 99.512   R^2: 0.002   
Epoch:  9   Train Loss: 96.717   R^2: 0.039   
Epoch: 10   Train Loss: 94.701   R^2: 0.061   
Epoch: 11   Train Loss: 93.469   R^2: 0.075   
Epoch: 12   Train Loss: 91.929   R^2: 0.088   
Epoch: 13   Train Loss: 90.254   R^2: 0.103   
Epoch: 14   Train Loss: 88.693   R^2: 0.119   
Epoch: 15   Train Loss: 87.273   R^2: 0.124   
Epoch: 16   Train Loss: 86.560   R^2: 0.134   
Epoch: 17   Train Loss: 85.690   R^2: 0.142   
Epoch: 18   Train Loss: 84.291   R^2: 0.151   
Epoch: 19   Train Loss: 83.318   R^2: 0.168   
Epoch: 20   Train Loss: 82.272   R^2: 0.177   
Epoch: 21   Train Loss: 81.663   R^2: 0.177   

In [12]:
# save the model: you must use the .pth format for pytorch models!
model_savepath = 'model.pth'

# To save a PyTorch model, we first pass an input through the model, 
# and then save the "trace". 
# For this purpose, we can use any input. 
# We will create a random input with the proper dimension.
x = torch.randn(26) # random input
x = x[None,:] # add singleton batch index
with torch.no_grad():
    traced_cell = torch.jit.trace(model, (x))

# Now we save the trace
torch.jit.save(traced_cell, model_savepath)

Tensor-likes are not close!

Mismatched elements: 1 / 1 (100.0%)
Greatest absolute difference: 1.7879984378814697 at index (0, 0) (up to 1e-05 allowed)
Greatest relative difference: 1.2870993137160507 at index (0, 0) (up to 1e-05 allowed)
  _check_trace(


In [13]:
# generate kaggle submission file using the validation script
!python {"validation.py " + model_savepath + " --Xts_path " + Xts_savepath + " --Xtr_path " + Xtr_savepath + " --yts_hat_path " + yts_hat_savepath } 

training R2 =  0.718322265149214
test target predictions saved in yts_hat_pytorch.csv
