In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import numpy as np
import matplotlib.pyplot as plt

In [8]:
import os
kaggle_data={"username":"kleber0","key":"31c7efab560bf0ae89adce7fa28372ed"}
os.environ['KAGGLE_USERNAME']=kaggle_data["username"]
os.environ['KAGGLE_KEY']=kaggle_data["key"]
import kaggle
!kaggle competitions download -c idl-fall2021-hw1p2
!unzip /content/dev.npy.zip
!unzip /content/dev_labels.npy.zip
!unzip /content/train.npy.zip
!unzip /content/train_labels.npy.zip
!unzip /content/test.npy.zip

Downloading dev.npy.zip to /content
 97% 239M/246M [00:02<00:00, 136MB/s]
100% 246M/246M [00:02<00:00, 126MB/s]
Downloading dev_labels.npy.zip to /content
  0% 0.00/617k [00:00<?, ?B/s]
100% 617k/617k [00:00<00:00, 197MB/s]
Downloading train.npy.zip to /content
100% 1.92G/1.92G [00:14<00:00, 158MB/s]
100% 1.92G/1.92G [00:14<00:00, 144MB/s]
Downloading sample.csv.zip to /content
100% 4.03M/4.03M [00:00<00:00, 27.0MB/s]

Downloading test.npy.zip to /content
 99% 239M/241M [00:02<00:00, 79.6MB/s]
100% 241M/241M [00:02<00:00, 111MB/s] 
Downloading train_labels.npy.zip to /content
  0% 0.00/5.16M [00:00<?, ?B/s]
100% 5.16M/5.16M [00:00<00:00, 171MB/s]
Archive:  /content/dev.npy.zip
  inflating: dev.npy                 
Archive:  /content/dev_labels.npy.zip
  inflating: dev_labels.npy          
Archive:  /content/train.npy.zip
  inflating: train.npy               
Archive:  /content/train_labels.npy.zip
  inflating: train_labels.npy        
Archive:  /content/test.npy.zip
  inflating: test.n

In [9]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [11]:
train_data = np.load('/content/train.npy', allow_pickle = True)
train_labels = np.load('/content/train_labels.npy', allow_pickle = True)
val_data = np.load('/content/dev.npy', allow_pickle = True)
val_labels = np.load('/content/dev_labels.npy', allow_pickle = True)
test_data = np.load('/content/test.npy', allow_pickle = True)

In [12]:
output_size = 71
hidden_size = 2048
learning_rate = 0.001
epochs = 20
batch_size = 32
context = 20
pad_val = 20
input_size = (1 + 2*context)*40

In [13]:
class myDataset(Dataset):
    
    def __init__(self, X, Y=None, pad_val=5, context=5, is_test = False):
        
        ### Add data and label to self (1-2 lines)
        self.X = X
        self.Y = Y
        self.is_test = is_test
        ### Define data index mapping (4-6 lines)
        index_map_x = []
        for i, x in enumerate(X):
            for j, xx in enumerate(x):
                index_pair_x = (i, j)
                index_map_x.append(index_pair_x)
                
        ### Define label index mapping (4-6 lines)
        if(not is_test):
            index_map_y = []
            for i, y in enumerate(Y):
                for j, yy in enumerate(y):
                    index_pair_y = (i, j)
                    index_map_y.append(index_pair_y)

            ### Assert the data index mapping and label index mapping are the same (1 line)

            assert(set(index_map_x) == set(index_map_y))

        ### Assign data index mapping to self (1 line)
        self.index_map_X = index_map_x
        
        ### Add length to self (1 line)
        self.length = len(self.index_map_X)
        
        ### Add context and offset to self (1-2 line)
        self.pad_val = pad_val
        self.context = context
        
        ### Zero pad data as-needed for context size = 1 (1-2 lines)
        for i, x in enumerate(self.X):
            self.X[i] = np.pad(x,((pad_val, pad_val), (0, 0)),'constant',constant_values=0)
        
    def __len__(self):
        
        ### Return length (1 line)
        return self.length
    
    def __getitem__(self, index):
        
        ### Get index pair from index map (1-2 lines)
        i, j= self.index_map_X[index]
        
        ### Calculate starting timestep using offset and context (1 line)
        start_j = j + self.pad_val - self.context
        
        ## Calculate ending timestep using offset and context (1 line)
        end_j = j + self.pad_val + self.context + 1
        
        ### Get data at index pair with context (1 line)
        x = self.X[i][start_j:end_j,:]
        
        ### Get label at index pair (1 line)
        if(not self.is_test):
            y = self.Y[i][j]
            ### Return data at index pair with context and label at index pair (1 line)
            return x, y
        else:
            return x
    
    def collate_fn(batch):
        
        batch_x = [x for x,y in batch]
        batch_x = torch.as_tensor(batch_x)
        if(not self.is_test):
            batch_y = [y for x,y in batch]        
            batch_y = torch.as_tensor(batch_y)
            return batch_x, batch_y
        else:
            return batch_x

In [14]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.l1 = nn.Linear(input_size, hidden_size,device=device)
        self.l2 = nn.Linear(hidden_size,int(hidden_size/2),device=device)
        self.l3 = nn.Linear(int(hidden_size/2),int(hidden_size/4),device=device)
        self.l4 = nn.Linear(int(hidden_size/4),output_size,device=device)
    
    def forward(self,x):
        out = self.l1(x)
        out = F.relu(out)
        out = self.l2(out)
        out = F.relu(out)
        out = self.l3(out)
        out = F.relu(out)
        out = self.l4(out)
        return out

In [None]:
# torch.save({
#     'epoch':epoch,
#     'model_state_dict': model.state_dict(),
#     'optimizer_state_dict':optimizer.state_dict(),
#     'loss':loss
# },PATH)

In [None]:
# checkpoint = {
#     "epoch":20,
#     "model_state":model.state_dict(),
#     "optim_state":optimizer.state_dict(),
#     "loss":loss
# }
# torch.save(checkpoint,"checkpoint.pth")

In [15]:
def init_weights(model):
    if isinstance(model, nn.Linear):
        torch.nn.init.xavier_uniform(model.weight)
        model.bias.data.fill_(0.01)

In [16]:
criterion = nn.CrossEntropyLoss().to(device)
model = Model(input_size,hidden_size,output_size)
model.apply(init_weights)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
# checkpoint = torch.load(PATH)
# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# epoch = checkpoint['epoch']
# loss = checkpoint['loss']


  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
train_dataset = myDataset(train_data, train_labels, pad_val, context)
val_dataset = myDataset(val_data, val_labels, pad_val, context)
train_loader = DataLoader(dataset = train_dataset ,batch_size = batch_size, shuffle = True)
val_loader = DataLoader(dataset = val_dataset, batch_size = batch_size, shuffle = False)

In [None]:
n_total_steps = len(train_loader)
val_loss_min = np.Inf
for epoch in range(epochs):
    running_loss = 0
    last_loss = 0
    model.train()
    for i, (data, labels) in enumerate(train_loader):
#         print(data.shape)
        data = data.reshape(batch_size,-1).to(device)
#         print(data.dtype)
#         print(data.shape)
        labels = labels.to(device)
#         print(labels.shape)
        output = model(data.float())
        loss = criterion(output, labels.long())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        running_loss = running_loss/(i+1)
    running_val_loss = 0
    model.eval()
    for j, (val_data,val_labels) in enumerate(val_loader):
        val_data = val_data.reshape(batch_size,-1).to(device)
        val_labels = val_labels.to(device) 
        val_output = model(val_data.float())
        val_loss = criterion(val_output, val_labels.long())
        running_val_loss += val_loss
        running_val_loss = running_val_loss/(j+1)
        
    if (i+1) % 2000 == 1999:
       
        print(f'epoch {epoch+1} /{num_epochs}, step {i+1}/{n_total_steps}, train_loss = {running_loss}, val_loss = {running_val_loss}')
        running_loss = 0
        running_val_loss = 0
    checkpoint = {
        "epoch":epoch+1,
        "model_state":model.state_dict(),
        "optim_state":optimizer.state_dict(),
        "val_loss":loss
    }
    if running_val_loss < val_loss_min:
        val_loss_min = val_loss
        torch.save(checkpoint,PATH)
        

In [None]:
test_dataset = myDataset(test_data,None,pad_val,context,is_test=True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
pred = []
for k,(test_data) in enumerate(test_dataloader):
    with torch.no_grad():
        test_data = val_data.reshape(batch_size,-1).to(device)
        test_output = model(test_data.float())
        pred.append(torch.argmax(test_output, dim=1))