# Simple NNs with Law2Vec embeddings

## Importing data, pre-trained embeddings

In [1]:
import copy
from itertools import product
from numpy import isnan
import pandas as pd
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext.data as data
import torchtext.vocab as vocab
import sys
import warnings
warnings.filterwarnings('ignore')

sys.path.append('../data_pipeline/')
import preprocessing as pre
from training import TrainingModule

SEED = 1312
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
train_data, test_data, val_data, TEXT, LABEL = pre.get_data(
    'train_small.csv', 'val_small.csv', 'test_small.csv')

Connected!


In [3]:
USE_CUDA = torch.cuda.is_available()

vectors = vocab.Vectors('../embeds/Law2Vec.100d.txt') # Law2Vec available from https://archive.org/details/Law2Vec

TEXT.build_vocab(train_data, vectors=vectors,
                 unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)

BATCH_SIZE = 5

train_it, test_it, val_it = data.BucketIterator.splits(
    (train_data, test_data, val_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.alj_text),
    sort_within_batch=True,
    device = torch.device('cuda' if USE_CUDA else 'cpu'))

## Checking pretrained vectors have been applied

In [4]:
vectors['medicare']

tensor([ 0.6415, -0.5367, -0.3537, -0.0634, -0.1798,  0.0626, -0.1836, -0.2705,
         0.2504,  0.5061,  0.4746, -0.2351, -0.0465,  0.3184,  0.8974,  0.0470,
        -0.2594,  0.3485, -0.3356,  0.1163,  0.2207,  0.2707,  0.4748,  0.1122,
        -0.1188, -0.0790,  0.4377, -0.4711,  0.1401, -0.0234, -0.2009, -0.2143,
         0.1335, -0.4407,  0.4077, -0.0634,  0.5104,  0.1820, -0.4729, -0.1758,
         0.6194,  0.5708, -0.3034, -0.3658,  0.1609,  0.0753, -0.2024, -0.1472,
         0.0665,  0.1823,  0.3091, -0.0913,  0.2495,  0.0777, -0.1873, -0.5850,
        -0.3243,  0.1540, -0.5094,  0.6227,  0.1163, -0.6202, -0.4416, -0.3509,
        -0.5760, -0.4837, -0.6283,  0.0938,  0.3528, -0.0674, -0.7097, -0.2053,
        -0.6007, -0.1306,  0.0146, -0.0830,  0.5486, -0.2328, -0.3193,  0.1496,
        -0.1635,  0.0755, -0.2594, -0.0317,  0.1249, -0.5599,  0.0722, -0.0369,
         0.3139,  0.0102, -0.3353,  0.1142, -0.1163,  0.1505,  0.0952,  0.0206,
        -0.0733, -0.4851,  0.4995,  0.04

In [5]:
TEXT.vocab.vectors[TEXT.vocab.stoi['medicare']]

tensor([ 0.6415, -0.5367, -0.3537, -0.0634, -0.1798,  0.0626, -0.1836, -0.2705,
         0.2504,  0.5061,  0.4746, -0.2351, -0.0465,  0.3184,  0.8974,  0.0470,
        -0.2594,  0.3485, -0.3356,  0.1163,  0.2207,  0.2707,  0.4748,  0.1122,
        -0.1188, -0.0790,  0.4377, -0.4711,  0.1401, -0.0234, -0.2009, -0.2143,
         0.1335, -0.4407,  0.4077, -0.0634,  0.5104,  0.1820, -0.4729, -0.1758,
         0.6194,  0.5708, -0.3034, -0.3658,  0.1609,  0.0753, -0.2024, -0.1472,
         0.0665,  0.1823,  0.3091, -0.0913,  0.2495,  0.0777, -0.1873, -0.5850,
        -0.3243,  0.1540, -0.5094,  0.6227,  0.1163, -0.6202, -0.4416, -0.3509,
        -0.5760, -0.4837, -0.6283,  0.0938,  0.3528, -0.0674, -0.7097, -0.2053,
        -0.6007, -0.1306,  0.0146, -0.0830,  0.5486, -0.2328, -0.3193,  0.1496,
        -0.1635,  0.0755, -0.2594, -0.0317,  0.1249, -0.5599,  0.0722, -0.0369,
         0.3139,  0.0102, -0.3353,  0.1142, -0.1163,  0.1505,  0.0952,  0.0206,
        -0.0733, -0.4851,  0.4995,  0.04

## Simple NN Model

In [6]:
class WordEmbAvgPtEmbeds(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, pad_idx, two_layers=True, dropout_p=0.0):
        
        super().__init__()
        
        self.embedding = nn.Embedding.from_pretrained(TEXT.vocab.vectors)
        if two_layers == True:
            self.linear1 = nn.Linear(embedding_dim, hidden_dim)
            self.linear2 = nn.Linear(hidden_dim, output_dim) 
        else:
            self.linear1 = nn.Linear(embedding_dim, output_dim)
            self.linear2 = None
        self.relu = nn.ReLU()
        self.drop_layer = nn.Dropout(p=dropout_p)

        
        
    def forward(self, text):
        embedded = self.embedding(text)
        embedded = embedded.mean(0)
        if not self.linear2:
            linear1_output = self.linear1(embedded)
            output = self.relu(linear1_output)
            output = self.drop_layer(output)
            return output
        else:
            linear1_output = self.linear1(embedded)
            linear2_input = self.relu(linear1_output)
            output = self.linear2(linear2_input)
            output = self.drop_layer(output)
            return output

## Training models

In [7]:
# Store training results
df = pd.DataFrame(columns=['architecture', 'embeddings',
                           'hidden', 'dropouts',
                           'learning_rate', 'epochs',
                           'dev_acc', 'dev_prec', 'dev_recall',
                           'metric'])

# Model architecture parameters
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_SIZE = TEXT.vocab.vectors.size(1)
HIDDEN_SIZES = [10, 25, 40, 50]
OUTPUT_SIZE = 1
DROPOUTS = [0, 0.1, 0.25, 0.5, 0.75]
PADDING_IDX = TEXT.vocab.stoi[TEXT.pad_token]

# Model training hyperparameters
LEARNING_RATE = [0.01, 0.001, 0.0001]
train_len = 0
train_pos = 0
for batch in train_it:
    train_len += len(batch.decision_binary)
    train_pos += batch.decision_binary.sum().item()
POS_WEIGHT = torch.tensor([(train_len - train_pos) / train_pos])
if USE_CUDA:
    POS_WEIGHT = POS_WEIGHT.cuda()
EPOCHS = 10

# Iterator over various model parameters
param_iter = product (HIDDEN_SIZES, DROPOUTS, LEARNING_RATE)

# Magic loop
best_acc = (None, None)
best_rec = (None, None)
best_prec = (None, None)
for i, (hidden_size, dropout, lr) in enumerate(param_iter):
    print(f'Architecture #{i}\n' + '-' * 20)
    model = WordEmbAvgPtEmbeds(INPUT_DIM, EMBEDDING_SIZE, hidden_size,
                OUTPUT_SIZE, PADDING_IDX, dropout_p=dropout)
    
    tm = TrainingModule(model, lr, POS_WEIGHT, USE_CUDA, EPOCHS)
    
    best_models = tm.train_model(train_it, val_it)
    
    for metric, best_model in best_models.items():
        row = [i, 'Law2Vec', hidden_size, dropout,
               lr, EPOCHS, best_model.accuracy,
               best_model.precision, best_model.recall, metric]
        df.loc[len(df)] = row
        if best_acc[0] is None or isnan(best_acc[1]) or\
           best_model.accuracy > best_acc[1]:
            best_acc = (copy.deepcopy(best_model.model), best_model.accuracy)
        if best_rec[0] is None or isnan(best_rec[1]) or\
           best_model.recall > best_rec[1]:
            best_rec = (copy.deepcopy(best_model.model), best_model.recall)
        if best_prec[0] is None or isnan(best_prec[1]) or\
           best_model.precision > best_prec[1]:
            best_prec = (copy.deepcopy(best_model.model), best_model.precision)
    
    print('-' * 20 + '\n')


Architecture #0
--------------------
Epoch 0: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1584
Epoch 1: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1603
Epoch 2: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1625
Epoch 3: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1615
Epoch 4: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1589
Epoch 5: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1607
Epoch 6: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1619
Epoch 7: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1609
Epoch 8: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1608
Epoch 9: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1616
--------------------

Architecture #1
--------------------
Epoch 0: Dev Accuracy: 0.135

Epoch 6: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1632
Epoch 7: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1627
Epoch 8: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1631
Epoch 9: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1629
--------------------

Architecture #9
--------------------
Epoch 0: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1634
Epoch 1: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1581
Epoch 2: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1598
Epoch 3: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1618
Epoch 4: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1633
Epoch 5: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1649
Epoch 6: Dev Accuracy: 0.8644; Dev Precision: nan;

Epoch 3: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1629
Epoch 4: Dev Accuracy: 0.1695; Dev Precision: 0.1273; Dev Recall: 0.8750; Dev Loss:0.1625
Epoch 5: Dev Accuracy: 0.1525; Dev Precision: 0.1250; Dev Recall: 0.8750; Dev Loss:0.1625
Epoch 6: Dev Accuracy: 0.2203; Dev Precision: 0.1346; Dev Recall: 0.8750; Dev Loss:0.1622
Epoch 7: Dev Accuracy: 0.5593; Dev Precision: 0.1538; Dev Recall: 0.5000; Dev Loss:0.1618
Epoch 8: Dev Accuracy: 0.2203; Dev Precision: 0.1346; Dev Recall: 0.8750; Dev Loss:0.1623
Epoch 9: Dev Accuracy: 0.2542; Dev Precision: 0.1400; Dev Recall: 0.8750; Dev Loss:0.1622
--------------------

Architecture #18
--------------------
Epoch 0: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1656
Epoch 1: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1616
Epoch 2: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1621
Epoch 3: Dev Accuracy: 0.8644; Dev Precisio

Epoch 0: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1628
Epoch 1: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1628
Epoch 2: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1634
Epoch 3: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1627
Epoch 4: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1626
Epoch 5: Dev Accuracy: 0.3729; Dev Precision: 0.1463; Dev Recall: 0.7500; Dev Loss:0.1621
Epoch 6: Dev Accuracy: 0.7966; Dev Precision: 0.1667; Dev Recall: 0.1250; Dev Loss:0.1615
Epoch 7: Dev Accuracy: 0.4915; Dev Precision: 0.1071; Dev Recall: 0.3750; Dev Loss:0.1618
Epoch 8: Dev Accuracy: 0.4915; Dev Precision: 0.1333; Dev Recall: 0.5000; Dev Loss:0.1619
Epoch 9: Dev Accuracy: 0.5424; Dev Precision: 0.1200; Dev Recall: 0.3750; Dev Loss:0.1617
--------------------

Architecture #27
--------------------
Epoch 0: Dev Accuracy: 0.8644; Dev Preci

Epoch 7: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1727
Epoch 8: Dev Accuracy: 0.7458; Dev Precision: 0.1111; Dev Recall: 0.1250; Dev Loss:0.1580
Epoch 9: Dev Accuracy: 0.5085; Dev Precision: 0.1613; Dev Recall: 0.6250; Dev Loss:0.1625
--------------------

Architecture #35
--------------------
Epoch 0: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1648
Epoch 1: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1645
Epoch 2: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1635
Epoch 3: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1638
Epoch 4: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1630
Epoch 5: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1632
Epoch 6: Dev Accuracy: 0.5254; Dev Precision: 0.0833; Dev Recall: 0.2500; Dev Loss:0.1618
Epoch 7: Dev Accuracy: 0.4237; Dev Preci

Epoch 3: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1632
Epoch 4: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1599
Epoch 5: Dev Accuracy: 0.5932; Dev Precision: 0.1000; Dev Recall: 0.2500; Dev Loss:0.1617
Epoch 6: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1631
Epoch 7: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1608
Epoch 8: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1641
Epoch 9: Dev Accuracy: 0.1864; Dev Precision: 0.1429; Dev Recall: 1.0000; Dev Loss:0.1626
--------------------

Architecture #44
--------------------
Epoch 0: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1644
Epoch 1: Dev Accuracy: 0.2034; Dev Precision: 0.1321; Dev Recall: 0.8750; Dev Loss:0.1624
Epoch 2: Dev Accuracy: 0.7627; Dev Precision: 0.1250; Dev Recall: 0.1250; Dev Loss:0.1613
Epoch 3: Dev Accuracy: 0.4746; Dev Precision: 

Epoch 0: Dev Accuracy: 0.1525; Dev Precision: 0.1250; Dev Recall: 0.8750; Dev Loss:0.1641
Epoch 1: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1596
Epoch 2: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1604
Epoch 3: Dev Accuracy: 0.1356; Dev Precision: 0.1356; Dev Recall: 1.0000; Dev Loss:0.1672
Epoch 4: Dev Accuracy: 0.8305; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.1598
Epoch 5: Dev Accuracy: 0.7288; Dev Precision: 0.1000; Dev Recall: 0.1250; Dev Loss:0.1608
Epoch 6: Dev Accuracy: 0.8644; Dev Precision: nan; Dev Recall: 0.0000; Dev Loss:0.1577
Epoch 7: Dev Accuracy: 0.8305; Dev Precision: 0.0000; Dev Recall: 0.0000; Dev Loss:0.1578
Epoch 8: Dev Accuracy: 0.6610; Dev Precision: 0.2000; Dev Recall: 0.5000; Dev Loss:0.1614
Epoch 9: Dev Accuracy: 0.7288; Dev Precision: 0.1000; Dev Recall: 0.1250; Dev Loss:0.1594
--------------------

Architecture #53
--------------------
Epoch 0: Dev Accuracy: 0.1356; Dev Precision: 0.1

## Save results of model training

In [8]:
SAVE_PREFIX = '../results/SimpleNNLaw2Vec_'
df.to_csv(f'{SAVE_PREFIX}models.csv')
torch.save(best_acc[0], f'{SAVE_PREFIX}best_acc.pt')
torch.save(best_rec[0], f'{SAVE_PREFIX}best_rec.pt')
torch.save(best_prec[0], f'{SAVE_PREFIX}best_prec.pt')

## Confirming embeddings have not been trained

In [9]:
medicare_tensor = torch.LongTensor([TEXT.vocab.stoi['medicare']])
if USE_CUDA:
    medicare_tensor = medicare_tensor.cuda()
model.embedding(medicare_tensor)

tensor([[ 0.6415, -0.5367, -0.3537, -0.0634, -0.1798,  0.0626, -0.1836, -0.2705,
          0.2504,  0.5061,  0.4746, -0.2351, -0.0465,  0.3184,  0.8974,  0.0470,
         -0.2594,  0.3485, -0.3356,  0.1163,  0.2207,  0.2707,  0.4748,  0.1122,
         -0.1188, -0.0790,  0.4377, -0.4711,  0.1401, -0.0234, -0.2009, -0.2143,
          0.1335, -0.4407,  0.4077, -0.0634,  0.5104,  0.1820, -0.4729, -0.1758,
          0.6194,  0.5708, -0.3034, -0.3658,  0.1609,  0.0753, -0.2024, -0.1472,
          0.0665,  0.1823,  0.3091, -0.0913,  0.2495,  0.0777, -0.1873, -0.5850,
         -0.3243,  0.1540, -0.5094,  0.6227,  0.1163, -0.6202, -0.4416, -0.3509,
         -0.5760, -0.4837, -0.6283,  0.0938,  0.3528, -0.0674, -0.7097, -0.2053,
         -0.6007, -0.1306,  0.0146, -0.0830,  0.5486, -0.2328, -0.3193,  0.1496,
         -0.1635,  0.0755, -0.2594, -0.0317,  0.1249, -0.5599,  0.0722, -0.0369,
          0.3139,  0.0102, -0.3353,  0.1142, -0.1163,  0.1505,  0.0952,  0.0206,
         -0.0733, -0.4851,  