In [105]:
import preprocessing as pp
import random
import copy
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchtext import data
import pandas as pd

In [106]:
df=pd.read_csv('projtest.csv')

In [108]:
df['decision_binary'].value_counts()

0.0    100
1.0     17
Name: decision_binary, dtype: int64

In [109]:
100/117

0.8547008547008547

In [76]:
train, test, val, TEXT, LABEL = pp.get_data('projtrain.csv', 'projval.csv', 'projtest.csv', None)

Connected!


In [79]:
TEXT.build_vocab(train)
print(TEXT.vocab.itos[10]) 

evidence


In [80]:
len(train)

714

In [81]:
len(test)

117

In [82]:
len(val)

59

In [83]:
LABEL.build_vocab(train)

In [84]:
print(TEXT.vocab.itos[:10])

['<unk>', '<pad>', 'petitioner', 'resident', 'cms', 'ex', 'facility', 'c.f.r', 'care', 'i.g']


In [86]:
len(TEXT.vocab)

32168

In [87]:
BATCH_SIZE = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train, val, test), 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.alj_text),
    sort_within_batch = True, 
    device = device)

In [88]:
def binary_accuracy(preds, y):
    """
    Return accuracy per batch
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [89]:


class WordEmbAvg(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, pad_idx, two_layers=True, dropout_p=0.0):
        
        super().__init__()
        
        # Define an embedding layer, a couple of linear layers, and 
        # the ReLU non-linearity.

        ##YOUR CODE HERE##
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        if two_layers == True:
            self.linear1 = nn.Linear(embedding_dim, hidden_dim)
            self.linear2 = nn.Linear(hidden_dim, output_dim) 
        else:
            self.linear1 = nn.Linear(embedding_dim, output_dim)
            self.linear2 = None
        self.relu = nn.ReLU()
        self.drop_layer = nn.Dropout(p=dropout_p)

        
        
    def forward(self, text):

        ##YOUR CODE HERE##
        embedded = self.embedding(text)
        embedded = embedded.mean(0)
        if not self.linear2:
            linear1_output = self.linear1(embedded)
            output = self.relu(linear1_output)
            output = self.drop_layer(output)
            return output
        else:
            linear1_output = self.linear1(embedded)
            linear2_input = self.relu(linear1_output)
            output = self.linear2(linear2_input)
            output = self.drop_layer(output)
            return output

In [110]:
class Training_module( ):

    def __init__(self, model):
        self.model = model
        self.loss_fn = nn.BCEWithLogitsLoss()
        ##YOUR CODE HERE##
        # Choose an optimizer. optim.Adam is a popular choice
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)
    
    def train_epoch(self, iterator):
        '''
        Train the model for one epoch. For this repeat the following, 
        going through all training examples.
        1. Get the next batch of inputs from the iterator.
        2. Determine the predictions using a forward pass.
        3. Compute the loss.
        4. Compute gradients using a backward pass.
        5. Execute one step of the optimizer to update the model paramters.
        '''
        epoch_loss = 0
        epoch_acc = 0
    
        for batch in iterator:
          # batch.alj_text has the texts and batch.decision_binary has the labels.
        
            self.optimizer.zero_grad()
                
            ##YOUR CODE HERE##
            
            predictions = self.model(batch.alj_text).squeeze(1)
            loss = self.loss_fn(predictions, batch.decision_binary)
            accuracy = binary_accuracy(predictions, batch.decision_binary)
        
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()
            epoch_acc += accuracy.item()
        
        return epoch_loss / len(iterator), epoch_acc / len(iterator)
    
    def train_model(self, train_iterator, dev_iterator, num_epochs=5):
        """
        Train the model for multiple epochs, and after each evaluate on the
        development set.  Return the best performing model.
        """  
        dev_accs = [0.]
        for epoch in range(num_epochs):
            self.train_epoch(train_iterator)
            dev_acc = self.evaluate(dev_iterator)
            print(f"Epoch {epoch}: Dev Accuracy: {dev_acc[1]} Dev Loss:{dev_acc[0]}")
            if dev_acc[1] > max(dev_accs):
                best_model = copy.deepcopy(self)
            dev_accs.append(dev_acc[1])
        return best_model.model
                
    def evaluate(self, iterator):
        '''
        Evaluate the performance of the model on the given examples.
        '''
        epoch_loss = 0
        epoch_acc = 0
        all_predicts = []
    
        with torch.no_grad():
    
            for batch in iterator:
                
                predictions = self.model(batch.alj_text).squeeze(1)
                all_predicts.append(predictions)
            
                loss = self.loss_fn(predictions, batch.decision_binary)
            
                acc = binary_accuracy(predictions, batch.decision_binary)
        
                epoch_loss += loss.item()
                epoch_acc += acc.item()
        print(all_predicts)
        return epoch_loss / len(iterator), epoch_acc / len(iterator)
    
    
        

In [111]:
INPUT_DIM = len(TEXT.vocab)
#You can try many different embedding dimensions. Common values are 20, 32, 64, 100, 128, 512
EMBEDDING_DIM = 100
HIDDEN_DIM = 50
OUTPUT_DIM = 1
#Get the index of the pad token using the stoi function
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

In [112]:
INPUT_DIM

32168

In [113]:
EMBEDDING_DIM

100

In [114]:
HIDDEN_DIM

50

In [115]:
OUTPUT_DIM

1

In [116]:
PAD_IDX

1

In [122]:
for batch in train_iterator:
    print(batch)
    print(batch.alj_text, batch.decision_binary)


[torchtext.data.batch.Batch of size 10]
	[.alj_text]:[torch.LongTensor of size 1676x10]
	[.decision_binary]:[torch.FloatTensor of size 10]
tensor([[ 262,  262,  262,  ...,  262,  279,  279],
        [  35,   35,   35,  ...,   35,  580, 1761],
        [ 339,  339,  339,  ...,  339,  363, 3725],
        ...,
        [   4,  154,    1,  ...,    1,    1,    1],
        [  65,   48,    1,  ...,    1,    1,    1],
        [  38,  176,    1,  ...,    1,    1,    1]]) tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 1.])

[torchtext.data.batch.Batch of size 4]
	[.alj_text]:[torch.LongTensor of size 78276x4]
	[.decision_binary]:[torch.FloatTensor of size 4]
tensor([[ 9752,   262,   279,   279],
        [15113,    35,  2221,   417],
        [   35,   339,  3937,   298],
        ...,
        [    9,     1,     1,     1],
        [   80,     1,     1,     1],
        [  464,     1,     1,     1]]) tensor([1., 1., 0., 0.])

[torchtext.data.batch.Batch of size 10]
	[.alj_text]:[torch.LongTensor of size 


[torchtext.data.batch.Batch of size 10]
	[.alj_text]:[torch.LongTensor of size 1342x10]
	[.decision_binary]:[torch.FloatTensor of size 10]
tensor([[262, 262, 262,  ..., 262, 262, 262],
        [ 35,  35,  35,  ...,  35,  35,  35],
        [339, 339, 339,  ..., 339, 339, 339],
        ...,
        [154,   1,   1,  ...,   1,   1,   1],
        [ 48,   1,   1,  ...,   1,   1,   1],
        [176,   1,   1,  ...,   1,   1,   1]]) tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

[torchtext.data.batch.Batch of size 10]
	[.alj_text]:[torch.LongTensor of size 1783x10]
	[.decision_binary]:[torch.FloatTensor of size 10]
tensor([[ 262,  262,  262,  ...,  279,  262,  262],
        [  35,   35,   35,  ..., 1408,   35,   35],
        [ 339,  339,  339,  ..., 1674,  339,  339],
        ...,
        [ 154,    1,    1,  ...,    1,    1,    1],
        [  48,    1,    1,  ...,    1,    1,    1],
        [ 176,    1,    1,  ...,    1,    1,    1]]) tensor([0., 1., 0., 1., 1., 0., 0., 0., 0., 1.])

[torc


[torchtext.data.batch.Batch of size 10]
	[.alj_text]:[torch.LongTensor of size 3095x10]
	[.decision_binary]:[torch.FloatTensor of size 10]
tensor([[ 279,  279,  262,  ...,  279,  262,  262],
        [4579, 4406,   35,  ..., 4388,   35,   35],
        [  76, 1459,  339,  ..., 5567,  339,  339],
        ...,
        [   2,    1,    1,  ...,    1,    1,    1],
        [ 179,    1,    1,  ...,    1,    1,    1],
        [  93,    1,    1,  ...,    1,    1,    1]]) tensor([1., 1., 0., 0., 0., 0., 0., 1., 1., 0.])

[torchtext.data.batch.Batch of size 10]
	[.alj_text]:[torch.LongTensor of size 2364x10]
	[.decision_binary]:[torch.FloatTensor of size 10]
tensor([[262, 262, 262,  ..., 262, 262, 262],
        [ 35,  35,  35,  ...,  35,  35,  35],
        [339, 339, 339,  ..., 339, 339, 339],
        ...,
        [154, 176,   1,  ...,   1,   1,   1],
        [ 48,   1,   1,  ...,   1,   1,   1],
        [176,   1,   1,  ...,   1,   1,   1]]) tensor([1., 0., 0., 1., 0., 0., 0., 0., 1., 1.])

[torc

In [119]:
model = WordEmbAvg(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, PAD_IDX)

In [120]:
model = model.to(device)
tm = Training_module(model)

#Training the model
best_model = tm.train_model(train_iterator, valid_iterator)

[tensor([-0.5495, -0.5964, -0.7197, -0.7359, -0.9269, -0.9269, -1.0208, -1.0832,
        -1.0832, -1.3517]), tensor([-0.5415, -0.5320, -0.5887, -0.5656, -0.5546, -0.4816, -0.8406, -0.9077,
        -0.9369, -0.9015]), tensor([-0.1758, -0.5584, -0.5205, -0.5519, -0.5828, -0.6856, -0.6279, -0.3824,
        -0.7381, -0.6415]), tensor([-0.6342, -0.3346, -0.5979, -0.6969, -0.6969, -0.6981, -0.7053, -0.7138,
        -0.6693, -0.7195]), tensor([-0.4902, -0.6538, -0.3138, -0.5569, -0.5209, -0.4877, -0.7876, -0.8005,
        -0.8055, -0.8403]), tensor([-0.3947, -1.2038, -1.3308, -1.3592, -1.4351, -1.4239, -1.4961, -1.5949,
        -1.5965])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.5222808669010798
[tensor([-0.6728, -0.7456, -0.8581, -0.8452, -1.0162, -1.0162, -1.1093, -1.1575,
        -1.1575, -1.4256]), tensor([-0.6532, -0.6446, -0.7081, -0.6095, -0.6259, -0.4420, -0.9130, -0.9808,
        -1.0270, -0.9606]), tensor([-0.1089, -0.6036, -0.6258, -0.6741, -0.6861, -0.7820, -0.7198, -0

In [121]:
tm.model = best_model
test_loss, test_acc = tm.evaluate(test_iterator)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

[tensor([-0.6196, -0.2543, -0.6251, -0.7020, -0.6722, -0.8301, -0.7737, -0.9123,
        -0.8419, -0.7635]), tensor([-0.2020, -0.3430, -0.2978, -0.7176, -0.6170, -0.4696, -0.7163, -0.7104,
        -0.9722, -0.9500]), tensor([-0.5514, -0.5677, -0.6386, -0.5826, -0.4027, -0.6413, -0.4362, -0.6155,
        -0.4556, -0.4441]), tensor([-0.5081, -0.4282, -0.6221, -0.6558, -0.5977, -0.6236, -0.7335, -0.7436,
        -0.6348, -0.7112]), tensor([-0.5318, -0.2414, -0.5734, -0.2648, -0.6642, -0.2754, -0.6566, -0.7430,
        -0.7222, -0.5763]), tensor([-0.5469, -0.5975, -0.6439, -0.5210, -0.6553, -0.7595, -0.7904, -0.8504,
        -0.7644, -0.5067]), tensor([-0.5577, -0.5843, -0.3638, -0.5586, -0.2437, -0.5049, -0.4320, -0.5736,
        -0.6433, -0.6490]), tensor([-0.6076, -0.6223, -0.5521, -0.5770, -0.6226, -0.5751, -0.6106, -0.6659,
        -0.5960, -0.5968]), tensor([-0.5772, -0.4670, -0.5457, -0.6335, -0.6093, -0.3388, -0.6051, -0.6118,
        -0.5424, -0.5785]), tensor([-0.2163, -0.6303, -

In [101]:
norms = torch.norm(best_model.embedding.weight, p=2, dim=1, keepdim=True).data.squeeze()
norms

tensor([ 8.1913, 10.2324, 10.6609,  ..., 10.3941, 10.1796, 10.8370])

In [102]:
highest_10 = norms.argsort()[-10:]

In [103]:
for i in highest_10:
    print(TEXT.vocab.itos[i])

haggle
sargent
1395nn(b)(2
preadmission
issue.1
weakly
whomever
3­
traveling
10/2/08


In [123]:
model = WordEmbAvg(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, PAD_IDX)
model = model.to(device)
tm = Training_module(model)
tm.optimizer = optim.Adagrad(tm.model.parameters(), lr=1e-3)
optim_model = tm.train_model(train_iterator, valid_iterator)
tm.model = optim_model
test_loss, test_acc = tm.evaluate(test_iterator)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

[tensor([-0.1006, -0.1322, -0.1603, -0.1753, -0.2401, -0.2401, -0.2262, -0.2591,
        -0.2591, -0.2767]), tensor([-0.1045, -0.1157, -0.1446, -0.1397, -0.1815, -0.2004, -0.2331, -0.2328,
        -0.2207, -0.2173]), tensor([-0.0713, -0.1793, -0.1401, -0.1125, -0.1820, -0.1776, -0.1972, -0.1794,
        -0.2285, -0.2097]), tensor([-0.1213, -0.1317, -0.1598, -0.2350, -0.2350, -0.2228, -0.2048, -0.1999,
        -0.1666, -0.2189]), tensor([-0.1321, -0.1317, -0.1383, -0.1258, -0.1385, -0.1930, -0.1897, -0.2057,
        -0.1942, -0.1847]), tensor([-0.1102, -0.2512, -0.2455, -0.2547, -0.2775, -0.2827, -0.2916, -0.2874,
        -0.3010])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.6306751867135366
[tensor([-0.1373, -0.1884, -0.2297, -0.2583, -0.3651, -0.3651, -0.3612, -0.4053,
        -0.4053, -0.4798]), tensor([-0.1444, -0.1618, -0.2016, -0.1969, -0.2471, -0.2660, -0.3399, -0.3587,
        -0.3509, -0.3428]), tensor([-0.1019, -0.2374, -0.1952, -0.1744, -0.2414, -0.2427, -0.2702, -0

In [124]:
model = WordEmbAvg(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, PAD_IDX, two_layers=False)
model = model.to(device)
tm = Training_module(model)
onelayer_model = tm.train_model(train_iterator, valid_iterator)
tm.model = onelayer_model
test_loss, test_acc = tm.evaluate(test_iterator)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

[tensor([0.0142, 0.0000, 0.0000, 0.0000, 0.0039, 0.0039, 0.0000, 0.0000, 0.0000,
        0.0331]), tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0201, 0.0272, 0.0065, 0.0000,
        0.0603]), tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0493, 0.0000,
        0.0132]), tensor([0.0000, 0.0097, 0.0000, 0.0000, 0.0000, 0.0031, 0.0000, 0.0000, 0.0000,
        0.0126]), tensor([0.0000, 0.0000, 0.0434, 0.0000, 0.0000, 0.0398, 0.0000, 0.0000, 0.0000,
        0.0000]), tensor([0.0000, 0.0359, 0.0169, 0.0303, 0.0684, 0.0722, 0.0720, 0.0613, 0.0659])]
Epoch 0: Dev Accuracy: 0.6407407472531 Dev Loss:0.6956133246421814
[tensor([0.0472, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000]), tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), tensor([0.0081, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0158, 0.0000,
        0.0000]), tensor([0.0000, 0.0086, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000]), tensor([0.0000,

In [125]:
for dropout_rate in [0, 0.1, 0.25, 0.5, 0.75]:
    print(f'Dropout rate: {dropout_rate}')
    model = WordEmbAvg(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, PAD_IDX, dropout_p=dropout_rate)
    model = model.to(device)
    tm = Training_module(model)
    dropout_model = tm.train_model(train_iterator, valid_iterator)
    tm.model = dropout_model
    test_loss, test_acc = tm.evaluate(test_iterator)
    print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Dropout rate: 0
[tensor([-0.6277, -0.7806, -0.8412, -0.8342, -1.1416, -1.1416, -1.0421, -1.1712,
        -1.1712, -1.3914]), tensor([-0.7609, -0.6085, -0.6505, -0.6940, -0.7861, -0.5057, -0.9856, -1.0631,
        -1.1114, -0.9087]), tensor([-0.3164, -0.6587, -0.8133, -0.7380, -0.7821, -0.6895, -0.8458, -0.4230,
        -0.8630, -0.6802]), tensor([-0.6356, -0.3467, -0.7113, -0.8079, -0.8079, -0.8973, -0.9032, -0.8715,
        -0.8310, -0.8573]), tensor([-0.5831, -0.7263, -0.2980, -0.6266, -0.6354, -0.4693, -0.8133, -0.9249,
        -0.8799, -0.8655]), tensor([-0.4251, -1.1211, -1.2091, -1.2275, -1.2897, -1.2432, -1.3020, -1.4339,
        -1.4267])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.4965086628993352
[tensor([-0.7263, -0.9957, -1.0059, -0.9961, -1.3637, -1.3637, -1.2472, -1.3552,
        -1.3552, -1.7082]), tensor([-0.9152, -0.7113, -0.8069, -0.8250, -0.9806, -0.5004, -1.1814, -1.2648,
        -1.3417, -1.0357]), tensor([-0.2975, -0.8025, -0.9819, -0.9119, -0.9447, -0.8

[tensor([-0.6149, -0.0000, -0.0000, -0.9099, -0.0000, -1.2040, -1.3445, -1.3210,
        -0.0000, -0.0000]), tensor([-0.0000, -0.6259, -0.7333, -0.6722, -0.6737, -0.6353, -1.0403, -1.1928,
        -1.2203, -1.2594]), tensor([-0.0000, -0.5576, -0.7504, -0.7387, -0.0000, -0.8444, -0.8336, -0.5873,
        -0.9026, -0.9333]), tensor([-0.0000, -0.4130, -0.0000, -0.0000, -0.7896, -0.0000, -0.8131, -0.0000,
        -0.9693, -0.0000]), tensor([-0.6418, -0.7596, -0.4257, -0.7308, -0.0000, -0.6956, -1.0630, -0.0000,
        -1.1405, -1.1752]), tensor([-0.0000, -1.7666, -2.0028, -0.0000, -2.1081, -2.1369, -0.0000, -2.3573,
        -2.3366])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.5778748293717703
[tensor([-0.0000, -0.0000, -1.0093, -1.1208, -1.4609, -1.4609, -1.6405, -1.5907,
        -1.5907, -0.0000]), tensor([-0.0000, -0.7595, -0.9418, -0.7775, -0.8732, -0.6792, -1.2763, -1.4673,
        -0.0000, -1.4843]), tensor([-0.3736, -0.0000, -0.9071, -0.9065, -0.8778, -1.0143, -1.0013, -0

[tensor([-0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.5811, -0.0000,
        -0.0000, -0.0000]), tensor([-0.6538, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000]), tensor([-0., -0., -0., -0., -0., -0., -0., -0., -0., -0.]), tensor([-0.0000, -0.0000, -0.0000, -0.0000, -0.4745, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.5733]), tensor([-0.5579, -0.0000, -0.4295, -0.5345, -0.0000, -0.0000, -0.0000, -0.3361,
        -0.0000, -0.0000]), tensor([-0.4260, -0.0000, -0.0000, -0.2835, -0.0000, -0.2565, -0.2932, -0.0000,
        -0.0000])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.6668262978394827
[tensor([-0., -0., -0., -0., -0., -0., -0., -0., -0., 0.]), tensor([-0.0000, -0.0000, -0.0000, -0.0000, -0.6715, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000]), tensor([-0.0000e+00, -8.4456e-01, -0.0000e+00, -0.0000e+00, -0.0000e+00,
        -7.6450e-01, -0.0000e+00, -0.0000e+00, -0.0000e+00, -6.2615e-04]), tensor([-0.0000, -0.25

In [126]:
df = pd.DataFrame(columns=['embedding', 'hidden', 'accuracy'])
for embed_dimension in [20, 32, 64, 100]:
    for hidden_dimension in [10, 25, 40, 50]:
        print(f'Embedding dimensions: {embed_dimension}, Hidden dimensions: {hidden_dimension}')
        model = WordEmbAvg(INPUT_DIM, embed_dimension, hidden_dimension, OUTPUT_DIM, PAD_IDX)
        model = model.to(device)
        tm = Training_module(model)
        dim_model = tm.train_model(train_iterator, valid_iterator)
        tm.model = dim_model
        test_loss, test_acc = tm.evaluate(test_iterator)
        print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
        row = [embed_dimension, hidden_dimension, test_acc*100]
        df.loc[len(df)] = row

Embedding dimensions: 20, Hidden dimensions: 10
[tensor([-0.2346, -0.1349, -0.1892, -0.1687, -0.1621, -0.1621, -0.1499, -0.1535,
        -0.1535, -0.1318]), tensor([-0.1630, -0.1627, -0.1642, -0.1765, -0.1550, -0.1506, -0.1705, -0.1356,
        -0.1320, -0.1476]), tensor([-0.1663, -0.2145, -0.1544, -0.1550, -0.1586, -0.1826, -0.1553, -0.1525,
        -0.1661, -0.1605]), tensor([-0.1303, -0.1735, -0.1488, -0.1920, -0.1920, -0.1994, -0.1542, -0.1711,
        -0.1477, -0.1698]), tensor([-0.1424, -0.1956, -0.1415, -0.1453, -0.1440, -0.1555, -0.1450, -0.1216,
        -0.1182, -0.1466]), tensor([-0.1364, -0.1367, -0.1275, -0.1271, -0.1310, -0.1361, -0.1276, -0.1299,
        -0.1299])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.638836661974589
[tensor([-0.4632, -0.3731, -0.4527, -0.4096, -0.4555, -0.4555, -0.4272, -0.4600,
        -0.4600, -0.4305]), tensor([-0.3947, -0.3851, -0.3948, -0.3894, -0.3891, -0.3366, -0.4517, -0.4074,
        -0.4038, -0.4201]), tensor([-0.2794, -0.4561, 

[tensor([-0.3635, -0.3856, -0.3906, -0.3672, -0.4213, -0.4213, -0.4452, -0.4296,
        -0.4296, -0.5135]), tensor([-0.3592, -0.3408, -0.3416, -0.3688, -0.3248, -0.3106, -0.3486, -0.3701,
        -0.4197, -0.3886]), tensor([-0.2880, -0.3607, -0.3639, -0.3509, -0.3582, -0.3773, -0.3618, -0.2884,
        -0.3735, -0.3405]), tensor([-0.2889, -0.2798, -0.3240, -0.3484, -0.3484, -0.3556, -0.3736, -0.3642,
        -0.3781, -0.3338]), tensor([-0.3106, -0.3115, -0.2839, -0.3536, -0.3105, -0.2964, -0.3827, -0.3764,
        -0.4022, -0.3712]), tensor([-0.2855, -0.4842, -0.5198, -0.5245, -0.5575, -0.5478, -0.5626, -0.5993,
        -0.5967])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.5790853997071584
[tensor([-0.6433, -0.7050, -0.7333, -0.7038, -0.8158, -0.8158, -0.8419, -0.8282,
        -0.8282, -0.9913]), tensor([-0.6297, -0.6246, -0.6277, -0.6509, -0.6007, -0.5438, -0.6731, -0.7190,
        -0.8103, -0.7361]), tensor([-0.4326, -0.6656, -0.6792, -0.6480, -0.6778, -0.6848, -0.6939, -0

[tensor([-0.3674, -0.3687, -0.3891, -0.4156, -0.4321, -0.4321, -0.4240, -0.3832,
        -0.3832, -0.3952]), tensor([-0.3791, -0.3915, -0.3765, -0.4033, -0.3691, -0.3713, -0.4096, -0.3743,
        -0.3765, -0.3868]), tensor([-0.3633, -0.3622, -0.3623, -0.3839, -0.3575, -0.3809, -0.3660, -0.3643,
        -0.3695, -0.3791]), tensor([-0.4220, -0.3642, -0.3595, -0.3580, -0.3580, -0.3697, -0.3743, -0.3771,
        -0.3990, -0.3868]), tensor([-0.3843, -0.3865, -0.3568, -0.3817, -0.3569, -0.3758, -0.3894, -0.3823,
        -0.3901, -0.3704]), tensor([-0.3488, -0.4061, -0.4039, -0.3930, -0.4036, -0.4055, -0.4147, -0.4176,
        -0.4095])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.5742957989374796
[tensor([-0.4765, -0.4867, -0.5357, -0.5618, -0.6151, -0.6151, -0.6085, -0.5599,
        -0.5599, -0.6098]), tensor([-0.5009, -0.5084, -0.4984, -0.5309, -0.4889, -0.4689, -0.5667, -0.5331,
        -0.5413, -0.5519]), tensor([-0.4364, -0.4778, -0.4804, -0.5160, -0.4761, -0.5125, -0.4911, -0

[tensor([-0.3621, -0.3803, -0.3886, -0.3864, -0.4326, -0.4326, -0.4529, -0.4454,
        -0.4454, -0.5230]), tensor([-0.3806, -0.3776, -0.3992, -0.3688, -0.3838, -0.3167, -0.4133, -0.4404,
        -0.4531, -0.4440]), tensor([-0.2944, -0.3883, -0.3471, -0.3669, -0.3629, -0.4211, -0.3793, -0.3206,
        -0.4057, -0.3649]), tensor([-0.3495, -0.3203, -0.3775, -0.3956, -0.3956, -0.4055, -0.4092, -0.4060,
        -0.3836, -0.4216]), tensor([-0.3601, -0.3843, -0.2811, -0.3911, -0.3658, -0.3295, -0.3968, -0.4053,
        -0.4090, -0.4175]), tensor([-0.3362, -0.4830, -0.4983, -0.5088, -0.5115, -0.5101, -0.5222, -0.5460,
        -0.5455])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.5719412565231323
[tensor([-0.6310, -0.6960, -0.7161, -0.6917, -0.8036, -0.8036, -0.8878, -0.8557,
        -0.8557, -1.0471]), tensor([-0.6917, -0.6618, -0.7490, -0.6475, -0.6745, -0.4864, -0.7552, -0.8370,
        -0.8734, -0.8420]), tensor([-0.3870, -0.6746, -0.6016, -0.6487, -0.6501, -0.7742, -0.6896, -0

[tensor([-0.0291, -0.0619, -0.1333, -0.0720, -0.1595, -0.1595, -0.1571, -0.2135,
        -0.2135, -0.2765]), tensor([-0.0166, -0.0762, -0.0478, -0.0714, -0.0794, -0.0659, -0.1509, -0.1628,
        -0.1870, -0.1346]), tensor([ 0.0253, -0.0865, -0.0153, -0.0634, -0.0249, -0.1169, -0.0643, -0.0466,
        -0.1104, -0.1133]), tensor([-0.0320, -0.0029, -0.0502, -0.1019, -0.1019, -0.1041, -0.1060, -0.1158,
        -0.1025, -0.1351]), tensor([-0.0333, -0.0482, -0.0070, -0.0778, -0.0143, -0.0646, -0.1248, -0.1212,
        -0.1163, -0.1158]), tensor([ 0.0054, -0.2212, -0.2506, -0.2435, -0.2813, -0.2791, -0.2945, -0.3161,
        -0.3213])]
Epoch 0: Dev Accuracy: 0.8259259263674418 Dev Loss:0.6594084401925405
[tensor([-0.3336, -0.4676, -0.6402, -0.5651, -0.8199, -0.8199, -0.8311, -0.9531,
        -0.9531, -1.2217]), tensor([-0.3571, -0.4333, -0.4318, -0.4766, -0.4975, -0.4618, -0.7243, -0.8219,
        -0.8691, -0.7419]), tensor([-0.1992, -0.4903, -0.3782, -0.4989, -0.3976, -0.5723, -0.4967, -0

[tensor([-0.3774, -0.4627, -0.5160, -0.5698, -0.9275, -0.9275, -0.9329, -1.0751,
        -1.0751, -1.4514]), tensor([-0.4000, -0.3854, -0.4301, -0.4557, -0.5964, -0.5416, -0.7973, -0.9563,
        -0.9749, -0.9238]), tensor([-0.1590, -0.4412, -0.4446, -0.5481, -0.4696, -0.5009, -0.5063, -0.3982,
        -0.6833, -0.6874]), tensor([-0.3440, -0.3229, -0.6341, -0.6488, -0.6488, -0.7203, -0.7194, -0.7186,
        -0.7536, -0.7785]), tensor([-0.3967, -0.3952, -0.2839, -0.5123, -0.5752, -0.5067, -0.7367, -0.8997,
        -0.8827, -0.9191]), tensor([-0.3636, -1.3275, -1.5039, -1.5652, -1.6243, -1.6367, -1.6910, -1.7772,
        -1.7919])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.539091631770134
[tensor([-0.6213, -0.8404, -0.9014, -0.9688, -1.5122, -1.5122, -1.5050, -1.7262,
        -1.7262, -2.2850]), tensor([-0.6850, -0.6711, -0.7782, -0.7347, -1.0054, -0.7876, -1.3090, -1.5363,
        -1.5646, -1.4471]), tensor([-0.1836, -0.7569, -0.7591, -0.9650, -0.7905, -0.8378, -0.8458, -0.

[tensor([-0.2310, -0.2284, -0.2455, -0.2595, -0.2900, -0.2900, -0.2876, -0.3038,
        -0.3038, -0.3307]), tensor([-0.2422, -0.2275, -0.2386, -0.2461, -0.2500, -0.2471, -0.2678, -0.2863,
        -0.2821, -0.2880]), tensor([-0.2250, -0.2502, -0.2432, -0.2500, -0.2469, -0.2411, -0.2486, -0.2426,
        -0.2603, -0.2681]), tensor([-0.2311, -0.2305, -0.2515, -0.2601, -0.2601, -0.2617, -0.2608, -0.2607,
        -0.2722, -0.2673]), tensor([-0.2336, -0.2468, -0.2275, -0.2438, -0.2472, -0.2531, -0.2719, -0.2698,
        -0.2774, -0.2801]), tensor([-0.2304, -0.3222, -0.3388, -0.3398, -0.3464, -0.3503, -0.3536, -0.3635,
        -0.3642])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.607671856880188
[tensor([-0.3906, -0.3993, -0.4514, -0.4787, -0.5880, -0.5880, -0.5959, -0.6412,
        -0.6412, -0.7377]), tensor([-0.4188, -0.3838, -0.4118, -0.4207, -0.4446, -0.4114, -0.5082, -0.5817,
        -0.5682, -0.5706]), tensor([-0.3193, -0.4242, -0.4180, -0.4452, -0.4435, -0.4351, -0.4590, -0.

[tensor([-0.4715, -0.5501, -0.5865, -0.7920, -1.1209, -1.1209, -1.2301, -1.2365,
        -1.2365, -1.8212]), tensor([-0.5191, -0.4432, -0.5593, -0.5505, -0.6004, -0.6354, -1.0122, -1.1399,
        -1.1404, -1.1713]), tensor([-0.2291, -0.4718, -0.5512, -0.5456, -0.6189, -0.6243, -0.7029, -0.5380,
        -0.7992, -0.8793]), tensor([-0.4051, -0.3288, -0.7224, -0.7191, -0.7191, -0.7521, -0.7689, -0.8351,
        -0.8102, -0.8924]), tensor([-0.4623, -0.5506, -0.3687, -0.5310, -0.6732, -0.6478, -0.9520, -0.9758,
        -1.0398, -1.1388]), tensor([-0.3700, -1.6404, -1.9077, -1.9642, -2.0500, -2.0740, -2.1355, -2.2632,
        -2.2657])]
Epoch 0: Dev Accuracy: 0.8611111144224802 Dev Loss:0.5333086301883062
[tensor([-0.8150, -0.9603, -0.9446, -1.1844, -1.5835, -1.5835, -1.6893, -1.6986,
        -1.6986, -2.3956]), tensor([-0.8589, -0.7725, -0.9292, -0.8510, -0.9784, -0.8255, -1.5000, -1.6001,
        -1.5921, -1.5915]), tensor([-0.3404, -0.7973, -0.8958, -0.8899, -0.9782, -0.9765, -1.0712, -0

In [128]:
df

Unnamed: 0,embedding,hidden,accuracy
0,20.0,10.0,84.404761
1,20.0,25.0,84.404761
2,20.0,40.0,84.404761
3,20.0,50.0,84.404761
4,32.0,10.0,84.404761
5,32.0,25.0,84.404761
6,32.0,40.0,84.404761
7,32.0,50.0,84.404761
8,64.0,10.0,84.404761
9,64.0,25.0,84.404761
