In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable

In [2]:
SEQ_LEN = 4
AMMINO_LEN = 21

In [3]:
training_data = pd.read_csv("task3_ks39mcp5/train.csv")
test_data = pd.read_csv("task3_ks39mcp5/test.csv")

In [4]:
def sequence(seq):
    vector = [ord(letter) for letter in seq]
    
    return vector

In [5]:
def bitmask(seq, alphabet=None):
    if alphabet==None:
        alphabet = ['R', 'H', 'K', 'D', 'E', 'S', 'T', 'N', 'Q', 'C', 'U', 'G', 'P', 'A', 'I', 'L', 'M', 'F', 'W', 'Y', 'V']
    
    vector = [[0 if char != letter else 1 for char in alphabet] for letter in seq]
    return vector

In [6]:
y_train = training_data['Active']

In [7]:
def process_data(data):
    X = np.zeros((data.shape[0], SEQ_LEN, AMMINO_LEN))
    for i in range(data.shape[0]):
        X[i,:,:] = bitmask(data.iloc[i, 0])
    return X

In [8]:
X_train = process_data(training_data)
X_test = process_data(test_data)

In [9]:
X_train = Variable(torch.Tensor(X_train))
X_test = Variable(torch.Tensor(X_test))

y_train = Variable(torch.Tensor(y_train))
y_train = torch.reshape(y_train, (y_train.shape[0], 1))

In [10]:
X_train.shape

torch.Size([112000, 4, 21])

In [11]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU not available, CPU used


In [105]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.shape[0]

        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden

In [106]:
model = Model(input_size=AMMINO_LEN, output_size=1, hidden_dim=21, n_layers=1)
model = model.to(device)

In [107]:
#Params
lr = 0.01
n_epochs = 100

In [108]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [109]:
X_train = X_train.to(device)
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad()
    output, hidden = model(X_train)
    output = output.to(device)
    output = output[:,2:]
    y_train = y_train.to(device)
    loss = criterion(output, y_train.long())
    loss.backward() # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/100............. Loss: 0.3474
Epoch: 20/100............. Loss: 0.1603
Epoch: 30/100............. Loss: 0.1276
Epoch: 40/100............. Loss: 0.1090
Epoch: 50/100............. Loss: 0.0911
Epoch: 60/100............. Loss: 0.0801
Epoch: 70/100............. Loss: 0.0740
Epoch: 80/100............. Loss: 0.0701
Epoch: 90/100............. Loss: 0.0672
Epoch: 100/100............. Loss: 0.0650


In [118]:
preds = model(X_test)[0]

In [119]:
preds = preds[:,3,0]

In [114]:
preds.shape

torch.Size([48000])

In [120]:
preds

tensor([-2.7529, -5.5822, -5.3998,  ..., -4.1693, -3.4740,  1.9178],
       grad_fn=<SelectBackward>)

In [121]:
sub = np.zeros(len(preds))
for i in range(len(preds)):
    if preds[i]>0.5:
        sub[i] = 1
    else:
        sub[i] = 0

In [123]:
np.savetxt("submission.csv", sub, fmt="%s")