# Simple NNs with Law2Vec embeddings

## Importing data, pre-trained embeddings

In [1]:
import copy
from itertools import product
from numpy import isnan
import pandas as pd
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext.data as data
import torchtext.vocab as vocab
import sys

sys.path.append('../data_pipeline/')
import preprocessing as pre
from training import TrainingModule

In [2]:
train_data, test_data, val_data, TEXT, LABEL = pre.get_data(
    'train_small.csv', 'val_small.csv', 'test_small.csv', 50)

Connected!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sans_nulls_df['alj_text'] = sans_nulls_df['alj_text'].str.slice(0, 1000000)


In [4]:
USE_CUDA = torch.cuda.is_available()

vectors = vocab.Vectors('../embeds/Law2Vec.100d.txt') # Law2Vec available from https://archive.org/details/Law2Vec

TEXT.build_vocab(train_data, vectors=vectors,
                 unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)

BATCH_SIZE = 5

train_it, test_it, val_it = data.BucketIterator.splits(
    (train_data, test_data, val_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.alj_text),
    sort_within_batch=True,
    device = torch.device('cuda' if USE_CUDA else 'cpu'))

  0%|          | 0/169439 [00:00<?, ?it/s]Skipping token b'169439' with 1-dimensional vector [b'100']; likely a header
100%|██████████| 169439/169439 [00:11<00:00, 14910.71it/s]


## Checking pretrained vectors have been applied

In [6]:
vectors['medicare']

tensor([ 0.6415, -0.5367, -0.3537, -0.0634, -0.1798,  0.0626, -0.1836, -0.2705,
         0.2504,  0.5061,  0.4746, -0.2351, -0.0465,  0.3184,  0.8974,  0.0470,
        -0.2594,  0.3485, -0.3356,  0.1163,  0.2207,  0.2707,  0.4748,  0.1122,
        -0.1188, -0.0790,  0.4377, -0.4711,  0.1401, -0.0234, -0.2009, -0.2143,
         0.1335, -0.4407,  0.4077, -0.0634,  0.5104,  0.1820, -0.4729, -0.1758,
         0.6194,  0.5708, -0.3034, -0.3658,  0.1609,  0.0753, -0.2024, -0.1472,
         0.0665,  0.1823,  0.3091, -0.0913,  0.2495,  0.0777, -0.1873, -0.5850,
        -0.3243,  0.1540, -0.5094,  0.6227,  0.1163, -0.6202, -0.4416, -0.3509,
        -0.5760, -0.4837, -0.6283,  0.0938,  0.3528, -0.0674, -0.7097, -0.2053,
        -0.6007, -0.1306,  0.0146, -0.0830,  0.5486, -0.2328, -0.3193,  0.1496,
        -0.1635,  0.0755, -0.2594, -0.0317,  0.1249, -0.5599,  0.0722, -0.0369,
         0.3139,  0.0102, -0.3353,  0.1142, -0.1163,  0.1505,  0.0952,  0.0206,
        -0.0733, -0.4851,  0.4995,  0.04

In [7]:
TEXT.vocab.vectors[TEXT.vocab.stoi['medicare']]

tensor([ 0.6415, -0.5367, -0.3537, -0.0634, -0.1798,  0.0626, -0.1836, -0.2705,
         0.2504,  0.5061,  0.4746, -0.2351, -0.0465,  0.3184,  0.8974,  0.0470,
        -0.2594,  0.3485, -0.3356,  0.1163,  0.2207,  0.2707,  0.4748,  0.1122,
        -0.1188, -0.0790,  0.4377, -0.4711,  0.1401, -0.0234, -0.2009, -0.2143,
         0.1335, -0.4407,  0.4077, -0.0634,  0.5104,  0.1820, -0.4729, -0.1758,
         0.6194,  0.5708, -0.3034, -0.3658,  0.1609,  0.0753, -0.2024, -0.1472,
         0.0665,  0.1823,  0.3091, -0.0913,  0.2495,  0.0777, -0.1873, -0.5850,
        -0.3243,  0.1540, -0.5094,  0.6227,  0.1163, -0.6202, -0.4416, -0.3509,
        -0.5760, -0.4837, -0.6283,  0.0938,  0.3528, -0.0674, -0.7097, -0.2053,
        -0.6007, -0.1306,  0.0146, -0.0830,  0.5486, -0.2328, -0.3193,  0.1496,
        -0.1635,  0.0755, -0.2594, -0.0317,  0.1249, -0.5599,  0.0722, -0.0369,
         0.3139,  0.0102, -0.3353,  0.1142, -0.1163,  0.1505,  0.0952,  0.0206,
        -0.0733, -0.4851,  0.4995,  0.04

## Simple NN Model

In [9]:
class WordEmbAvgPtEmbeds(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, pad_idx, two_layers=True, dropout_p=0.0):
        
        super().__init__()
        
        # Define an embedding layer, a couple of linear layers, and 
        # the ReLU non-linearity.

        ##YOUR CODE HERE##
        self.embedding = nn.Embedding.from_pretrained(TEXT.vocab.vectors)
        if two_layers == True:
            self.linear1 = nn.Linear(embedding_dim, hidden_dim)
            self.linear2 = nn.Linear(hidden_dim, output_dim) 
        else:
            self.linear1 = nn.Linear(embedding_dim, output_dim)
            self.linear2 = None
        self.relu = nn.ReLU()
        self.drop_layer = nn.Dropout(p=dropout_p)

        
        
    def forward(self, text):

        ##YOUR CODE HERE##
        embedded = self.embedding(text)
        embedded = embedded.mean(0)
        if not self.linear2:
            linear1_output = self.linear1(embedded)
            output = self.relu(linear1_output)
            output = self.drop_layer(output)
            return output
        else:
            linear1_output = self.linear1(embedded)
            linear2_input = self.relu(linear1_output)
            output = self.linear2(linear2_input)
            output = self.drop_layer(output)
            return output

## Training models

In [10]:
# Store training results

df = pd.DataFrame(columns=['architecture', 'embeddings',
                           'hidden', 'dropouts',
                           'learning_rate', 'epochs',
                           'dev_acc', 'dev_prec', 'dev_recall',
                           'metric'])

# Model architecture parameters
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_SIZE = TEXT.vocab.vectors.size(1)
HIDDEN_SIZES = [10, 25, 40, 50]
OUTPUT_SIZE = 1
DROPOUTS = [0, 0.1, 0.25, 0.5, 0.75]
PADDING_IDX = TEXT.vocab.stoi[TEXT.pad_token]

# Model training hyperparameters
LEARNING_RATE = [0.01, 0.001, 0.0001]
train_len = 0
train_pos = 0
for batch in train_it:
    train_len += len(batch.decision_binary)
    train_pos += batch.decision_binary.sum().item()
POS_WEIGHT = torch.tensor([(train_len - train_pos) / train_pos])
if USE_CUDA:
    POS_WEIGHT = POS_WEIGHT.cuda()
EPOCHS = 10

# Iterator over various model parameters
param_iter = product (HIDDEN_SIZES, DROPOUTS, LEARNING_RATE)

# Magic loop
best_acc = (None, None)
best_rec = (None, None)
best_prec = (None, None)
for i, (hidden_size, dropout, lr) in enumerate(param_iter):
    print(f'Architecture #{i}\n' + '-' * 20)
    model = WordEmbAvgPtEmbeds(INPUT_DIM, EMBEDDING_SIZE, hidden_size,
                OUTPUT_SIZE, PADDING_IDX, dropout_p=dropout)
    
    tm = TrainingModule(model, lr, POS_WEIGHT, USE_CUDA, EPOCHS)
    
    best_models = tm.train_model(train_it, val_it)
    
    for metric, best_model in best_models.items():
        row = [i, 'Law2Vec', hidden_size, dropout,
               lr, EPOCHS, best_model.accuracy,
               best_model.precision, best_model.recall, metric]
        df.loc[len(df)] = row
        if best_acc[0] is None or best_model.accuracy > best_acc[1]:
            best_acc = (copy.deepcopy(best_model.model), best_model.accuracy)
        if best_rec[0] is None or best_model.recall > best_rec[1]:
            best_rec = (copy.deepcopy(best_model.model), best_model.recall)
        if best_prec[0] is None or isnan(best_prec[1]) or\
          best_model.precision > best_prec[1]:
            best_prec = (copy.deepcopy(best_model.model), best_model.precision)
    
    print('-' * 20 + '\n')


Architecture #0
--------------------
Epoch 0: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9198
Epoch 1: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9014
Epoch 2: Dev Accuracy: 0.5000; Dev Precision: 0.3333; Dev Recall: 1.0000; Dev Loss:0.8936
Epoch 3: Dev Accuracy: 0.7500; Dev Precision: 0.5000; Dev Recall: 1.0000; Dev Loss:0.8847
Epoch 4: Dev Accuracy: 0.7500; Dev Precision: 0.5000; Dev Recall: 1.0000; Dev Loss:0.8878
Epoch 5: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8836
Epoch 6: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8818
Epoch 7: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8790
Epoch 8: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8837
Epoch 9: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8857
--------------------

Architecture #1
--------------------
Epoc

Epoch 9: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8790
--------------------

Architecture #9
--------------------
Epoch 0: Dev Accuracy: 0.2500; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8987
Epoch 1: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8920
Epoch 2: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8966
Epoch 3: Dev Accuracy: 0.5000; Dev Precision: 0.3333; Dev Recall: 1.0000; Dev Loss:0.9031
Epoch 4: Dev Accuracy: 0.2500; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.9027
Epoch 5: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8985
Epoch 6: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.9028
Epoch 7: Dev Accuracy: 0.2500; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.9070
Epoch 8: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.9071
Epoch 9: Dev Accuracy: 0.7500; Dev Precision: nan; De

Epoch 6: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.8947
Epoch 7: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.8944
Epoch 8: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.8940
Epoch 9: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.8935
--------------------

Architecture #18
--------------------
Epoch 0: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8904
Epoch 1: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8896
Epoch 2: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8974
Epoch 3: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8988
Epoch 4: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.9013
Epoch 5: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.9066
Epoch 6: Dev Accuracy: 0.2500; Dev Precision: 0.2500; D

Epoch 5: Dev Accuracy: 0.7500; Dev Precision: 0.5000; Dev Recall: 1.0000; Dev Loss:0.8905
Epoch 6: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8904
Epoch 7: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8898
Epoch 8: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8896
Epoch 9: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8891
--------------------

Architecture #27
--------------------
Epoch 0: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9068
Epoch 1: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9011
Epoch 2: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8808
Epoch 3: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8793
Epoch 4: Dev Accuracy: 0.5000; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.8910
Epoch 5: Dev Accuracy: 0.2500; Dev Precision: 0.2

Epoch 1: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9081
Epoch 2: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9077
Epoch 3: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9067
Epoch 4: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9060
Epoch 5: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9054
Epoch 6: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9049
Epoch 7: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9039
Epoch 8: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9035
Epoch 9: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9029
--------------------

Architecture #36
--------------------
Epoch 0: Dev Accuracy: 0.2500; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.9039
Epoch 1: Dev Accuracy: 0.7500; Dev Preci

Epoch 9: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.8989
--------------------

Architecture #44
--------------------
Epoch 0: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9075
Epoch 1: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9068
Epoch 2: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9073
Epoch 3: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9069
Epoch 4: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9062
Epoch 5: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9055
Epoch 6: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9056
Epoch 7: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9052
Epoch 8: Dev Accuracy: 0.2500; Dev Precision: 0.2500; Dev Recall: 1.0000; Dev Loss:0.9052
Epoch 9: Dev Accuracy: 0.2500; Dev Preci

Epoch 7: Dev Accuracy: 0.7500; Dev Precision: 0.5000; Dev Recall: 1.0000; Dev Loss:0.8909
Epoch 8: Dev Accuracy: 0.7500; Dev Precision: 0.5000; Dev Recall: 1.0000; Dev Loss:0.8914
Epoch 9: Dev Accuracy: 0.7500; Dev Precision: 0.5000; Dev Recall: 1.0000; Dev Loss:0.8914
--------------------

Architecture #53
--------------------
Epoch 0: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8881
Epoch 1: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8882
Epoch 2: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8881
Epoch 3: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8882
Epoch 4: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8886
Epoch 5: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8892
Epoch 6: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.8893
Epoch 7: Dev Accuracy: 0.7500; Dev Precision: nan; Dev Recall

## Save results of model training

In [18]:
SAVE_PREFIX = '../results/SimpleNNLaw2Vec_'
df.to_csv(f'{SAVE_PREFIX}models.csv')
torch.save(best_acc[0], f'{SAVE_PREFIX}best_acc.pt')
torch.save(best_rec[0], f'{SAVE_PREFIX}best_rec.pt')
torch.save(best_prec[0], f'{SAVE_PREFIX}best_prec.pt')

  "type " + obj.__name__ + ". It won't be checked "


## Confirming embeddings have not been trained

In [19]:
medicare_tensor = torch.LongTensor([TEXT.vocab.stoi['medicare']])
if USE_CUDA:
    medicare_tensor = medicare_tensor.cuda()
model.embedding(medicare_tensor)

tensor([[ 0.6415, -0.5367, -0.3537, -0.0634, -0.1798,  0.0626, -0.1836, -0.2705,
          0.2504,  0.5061,  0.4746, -0.2351, -0.0465,  0.3184,  0.8974,  0.0470,
         -0.2594,  0.3485, -0.3356,  0.1163,  0.2207,  0.2707,  0.4748,  0.1122,
         -0.1188, -0.0790,  0.4377, -0.4711,  0.1401, -0.0234, -0.2009, -0.2143,
          0.1335, -0.4407,  0.4077, -0.0634,  0.5104,  0.1820, -0.4729, -0.1758,
          0.6194,  0.5708, -0.3034, -0.3658,  0.1609,  0.0753, -0.2024, -0.1472,
          0.0665,  0.1823,  0.3091, -0.0913,  0.2495,  0.0777, -0.1873, -0.5850,
         -0.3243,  0.1540, -0.5094,  0.6227,  0.1163, -0.6202, -0.4416, -0.3509,
         -0.5760, -0.4837, -0.6283,  0.0938,  0.3528, -0.0674, -0.7097, -0.2053,
         -0.6007, -0.1306,  0.0146, -0.0830,  0.5486, -0.2328, -0.3193,  0.1496,
         -0.1635,  0.0755, -0.2594, -0.0317,  0.1249, -0.5599,  0.0722, -0.0369,
          0.3139,  0.0102, -0.3353,  0.1142, -0.1163,  0.1505,  0.0952,  0.0206,
         -0.0733, -0.4851,  