In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import sys
dir= "/content/drive/MyDrive/Github/DeepLearning2022/ContractReviewer"
os.chdir(dir)


## Load Libraries

In [3]:
#!pip install bert-pytorch
!pip install sentencepiece
!pip install transformers
!pip install onnx
!pip install fasttext
!pip install torchtext
!pip install scikit-learn
!pip install d2l==1.0.0-alpha1.post0
!pip install sklearn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 12.8 MB/s 
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.97
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 14.5 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 63.1 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 69.6 MB/s 
Insta

In [4]:
import yaml
import argparse
import time
import copy

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from utility import load
from modules import DecomposableAttention
from data_.my_dataset import MyDataset, coll
from losses import FocalLoss, reweight
from sklearn.metrics import classification_report

In [5]:
parser = argparse.ArgumentParser(description='CS7643 Contract Reviewer')
parser.add_argument('--config', default='configs/config.yaml') 
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [7]:
#This method is used to load configuration
def load_config():
#Load configuration    
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)
    
    return args

## Set up Hyperparameters

In [8]:
# Run this when setting up configurations via notebook
def load_config_notebook():
  class Args:
    batch_size= 300
    learning_rate= 0.00001
    reg= 0.0001
    epochs= 5
    steps= [6, 8]
    warmup= 0
    momentum= 0.9
    gamma= 1
    beta= .9999
    max_netural= 10
    save_best= True
    model= DecomposableAttention

  return Args()


In [9]:
def load_train_test(batch_size):

    data_train, ref_train, data_valid, ref_valid, data_test, ref_test = load()
     
    # https://stackoverflow.com/questions/65279115/how-to-use-collate-fn-with-dataloaders 
    
    train_data=MyDataset(data_train, ref_train)
    train_loader=DataLoader(train_data,batch_size=batch_size, collate_fn=coll, shuffle=True)

    valid_data=MyDataset(data_valid, ref_valid)
    valid_loader=DataLoader(valid_data,batch_size=batch_size, collate_fn=coll, shuffle=False)

    test_data=MyDataset(data_test, ref_test, use_faiss=False)
    test_loader=DataLoader(test_data,batch_size=batch_size, collate_fn=coll, shuffle=False)

    return train_loader, valid_loader, test_loader

In [10]:
def get_counts_training_data(train_loader):
    entailment = 0
    contradiction = 0
    neutral = 0

    for i, x in enumerate(train_loader):
        a = x['Label'].bincount().cpu().numpy()
        entailment += a[0]
        contradiction += a[1]
        neutral += a[2]

    # We'll feed this list to the focal loss implementation.
    cls_num_list = list([entailment,contradiction,neutral])
    return cls_num_list


In [11]:
def accuracy(output, target):
    """Computes the precision@k for the specified values of k"""
    batch_size = target.shape[0]

    _, pred = torch.max(output, dim=-1)

    correct = pred.eq(target).sum() * 1.0

    acc = correct / batch_size

    return acc

In [12]:
def train(epoch, data_loader, model, optimizer, criterion):
    iter_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
   
    for idx, x in enumerate(data_loader): 
        start = time.time()
        optimizer.zero_grad()
        outputs = model.forward(x)
       
        loss = criterion(outputs, x["Label"])
        loss.backward()
        optimizer.step()        
        batch_acc = accuracy(outputs, x["Label"])
        losses.update(loss.item(), outputs.shape[0])
        acc.update(batch_acc, outputs.shape[0])
        
        iter_time.update(time.time() - start)
        if idx % 10 == 0:
            print(('Epoch: [{0}][{1}/{2}]\t'
                   'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t'
                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                   'Prec @1 {top1.val:.4f} ({top1.avg:.4f})\t')
                  .format(epoch, idx, len(data_loader), iter_time=iter_time, loss=losses, top1=acc))

In [13]:
def validate(epoch, val_loader, model, criterion):
    iter_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    num_class = 3
    cm = torch.zeros(num_class, num_class)
    # evaluation loop
    for idx, x in enumerate(val_loader):
        start = time.time()   

        torch.no_grad()
        out = model(x)
        loss = criterion(out, x["Label"])     
        batch_acc = accuracy(out, x["Label"])

        # update confusion matrix
        _, preds = torch.max(out, 1)   
        for t, p in zip(x["Label"], preds.view(-1)):
            cm[t.long(), p.long()] += 1       

        losses.update(loss.item())
        acc.update(batch_acc)

        iter_time.update(time.time() - start)
        if idx % 10 == 0:
            print(('Epoch: [{0}][{1}/{2}]\t'
                   'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t')
                  .format(epoch, idx, len(val_loader), iter_time=iter_time, loss=losses, top1=acc))
    cm = cm / cm.sum(1)
    per_cls_acc = cm.diag().detach().numpy().tolist()
    
    print("* Prec @1: {top1.avg:.4f}".format(top1=acc))
    return acc.avg, cm

In [14]:
def adjust_learning_rate(optimizer, epoch, args):
    epoch += 1
    if epoch <= args.warmup:
        lr = args.learning_rate * epoch / args.warmup
    elif epoch > args.steps[1]:
        lr = args.learning_rate * 0.01
    elif epoch > args.steps[0]:
        lr = args.learning_rate * 0.1
    else:
        lr = args.learning_rate
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

## Train and save model

In [15]:
def main():
  #Load config for command line
  #args = load_config()
  print("\r Loading config")
  #Load config for jupyter notebook
  args = load_config_notebook()

  # Load Data
  print("\r Loading data")
  train_loader, _ , test_loader = load_train_test(args.batch_size)

  #Reweight training    
  print("\r reweighting data")
  #cls_num_list= list(get_counts_training_data(train_loader))
  cls_num_list= list([0.00020353383324465444, 0.0006849963955165558, 0.00010011808071699749])
  per_cls_weights = reweight(cls_num_list, beta=args.beta)

  print("\r Setting up Module")
  #https://github.gatech.edu/Sgudiduri3/DeepLearning2022/blob/main/dataM_focal_loss_20221121.ipynb
  net = DecomposableAttention(100, 200).to(device)

  #Optimization using focal loss
  criterion = FocalLoss(weight=per_cls_weights, gamma=args.gamma).to(device)
  optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=args.momentum)

  # Loop through epoch
  # Loop through dataset
  print("\r Training and Testing")
  best = 0.0
  best_cm = None
  best_model = None
  for epoch in range(args.epochs):  # loop over the dataset multiple times \
      adjust_learning_rate(optimizer, epoch, args)

      # train loop
      train(epoch, train_loader, net, optimizer, criterion)

      # validation loop
      acc, cm = validate(epoch, test_loader, net, criterion)

      if acc > best:
          best = acc
          best_cm = cm
          best_model = copy.deepcopy(net)
    
  print('\r Best Prec @1 Acccuracy: {:.4f}'.format(best))
   
  if args.save_best:
      torch.save(best_model.state_dict(), './checkpoints/' + 'decomposable_attention.pth')    
  print('\r Finished Training')

In [16]:
main()

 Loading config
 Loading data
 reweighting data
 Setting up Module
 Training and Testing
Epoch: [0][0/360]	Time 2.590 (2.590)	Loss 2.2742 (2.2742)	Prec @1 0.0317 (0.0317)	
Epoch: [0][10/360]	Time 0.017 (0.251)	Loss 0.1873 (0.3579)	Prec @1 0.9050 (0.8199)	
Epoch: [0][20/360]	Time 0.017 (0.139)	Loss 0.0506 (0.2396)	Prec @1 0.8929 (0.8575)	
Epoch: [0][30/360]	Time 0.017 (0.100)	Loss 0.0590 (0.1830)	Prec @1 0.8889 (0.8681)	
Epoch: [0][40/360]	Time 0.017 (0.080)	Loss 0.0604 (0.1505)	Prec @1 0.8850 (0.8750)	
Epoch: [0][50/360]	Time 0.017 (0.067)	Loss 0.0467 (0.1316)	Prec @1 0.9009 (0.8776)	
Epoch: [0][60/360]	Time 0.017 (0.059)	Loss 0.0416 (0.1180)	Prec @1 0.9050 (0.8800)	
Epoch: [0][70/360]	Time 0.016 (0.053)	Loss 0.0461 (0.1069)	Prec @1 0.8929 (0.8835)	
Epoch: [0][80/360]	Time 0.016 (0.049)	Loss 0.0238 (0.0980)	Prec @1 0.9302 (0.8871)	
Epoch: [0][90/360]	Time 0.016 (0.045)	Loss 0.0285 (0.0916)	Prec @1 0.9174 (0.8886)	
Epoch: [0][100/360]	Time 0.017 (0.042)	Loss 0.0507 (0.0868)	Prec @1 0.8

  df = df[df.span_nbr !=-1 ][df.premise != ''] [df.hypotheis != '']


Epoch: [0][160/360]	Time 0.017 (0.033)	Loss 0.0340 (0.0708)	Prec @1 0.9050 (0.8898)	
Epoch: [0][170/360]	Time 0.016 (0.032)	Loss 0.0406 (0.0691)	Prec @1 0.8889 (0.8900)	
Epoch: [0][180/360]	Time 0.017 (0.031)	Loss 0.0597 (0.0674)	Prec @1 0.8734 (0.8904)	
Epoch: [0][190/360]	Time 0.017 (0.030)	Loss 0.0473 (0.0662)	Prec @1 0.8889 (0.8905)	
Epoch: [0][200/360]	Time 0.016 (0.030)	Loss 0.0352 (0.0649)	Prec @1 0.9091 (0.8905)	
Epoch: [0][210/360]	Time 0.018 (0.029)	Loss 0.0680 (0.0639)	Prec @1 0.8584 (0.8906)	
Epoch: [0][220/360]	Time 0.017 (0.029)	Loss 0.0302 (0.0630)	Prec @1 0.9009 (0.8907)	
Epoch: [0][230/360]	Time 0.017 (0.028)	Loss 0.0395 (0.0619)	Prec @1 0.9009 (0.8910)	
Epoch: [0][240/360]	Time 0.017 (0.028)	Loss 0.0429 (0.0609)	Prec @1 0.8889 (0.8915)	
Epoch: [0][250/360]	Time 0.017 (0.027)	Loss 0.0356 (0.0600)	Prec @1 0.9009 (0.8916)	
Epoch: [0][260/360]	Time 0.017 (0.027)	Loss 0.0354 (0.0590)	Prec @1 0.9050 (0.8922)	
Epoch: [0][270/360]	Time 0.017 (0.026)	Loss 0.0430 (0.0582)	Prec 

  df = df[df.span_nbr !=-1 ][df.premise != ''] [df.hypotheis != '']


Epoch: [0][0/105]	Time 0.026 (0.026)	
Epoch: [0][10/105]	Time 0.034 (0.028)	
Epoch: [0][20/105]	Time 0.027 (0.028)	
Epoch: [0][30/105]	Time 0.027 (0.027)	
Epoch: [0][40/105]	Time 0.027 (0.027)	
Epoch: [0][50/105]	Time 0.028 (0.027)	
Epoch: [0][60/105]	Time 0.027 (0.027)	
Epoch: [0][70/105]	Time 0.026 (0.027)	
Epoch: [0][80/105]	Time 0.026 (0.027)	
Epoch: [0][90/105]	Time 0.027 (0.027)	
Epoch: [0][100/105]	Time 0.028 (0.027)	
* Prec @1: 0.8927
Epoch: [1][0/360]	Time 0.017 (0.017)	Loss 0.0354 (0.0354)	Prec @1 0.9050 (0.9050)	
Epoch: [1][10/360]	Time 0.017 (0.017)	Loss 0.0337 (0.0349)	Prec @1 0.9050 (0.9005)	
Epoch: [1][20/360]	Time 0.018 (0.017)	Loss 0.0450 (0.0367)	Prec @1 0.8929 (0.8997)	
Epoch: [1][30/360]	Time 0.017 (0.017)	Loss 0.0459 (0.0397)	Prec @1 0.8889 (0.8967)	
Epoch: [1][40/360]	Time 0.017 (0.017)	Loss 0.0480 (0.0400)	Prec @1 0.8850 (0.8967)	
Epoch: [1][50/360]	Time 0.017 (0.017)	Loss 0.0411 (0.0411)	Prec @1 0.9009 (0.8950)	
Epoch: [1][60/360]	Time 0.017 (0.017)	Loss 0.0318 

#Load saved model

In [5]:
device = torch.device("cuda")
best_model = DecomposableAttention(100, 200).to(device)
best_model.load_state_dict(torch.load('./checkpoints/decomposable_attention.pth'))
best_model.to(device)

DecomposableAttention(
  (attend): Attend(
    (f): Sequential(
      (0): Dropout(p=0.2, inplace=False)
      (1): Linear(in_features=100, out_features=200, bias=True)
      (2): ReLU()
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=200, out_features=200, bias=True)
      (5): ReLU()
    )
  )
  (compare): Compare(
    (g): Sequential(
      (0): Dropout(p=0.2, inplace=False)
      (1): Linear(in_features=200, out_features=200, bias=True)
      (2): ReLU()
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=200, out_features=200, bias=True)
      (5): ReLU()
    )
  )
  (aggregate): Aggregate(
    (h): Sequential(
      (0): Dropout(p=0.2, inplace=False)
      (1): Linear(in_features=400, out_features=200, bias=True)
      (2): ReLU()
      (3): Flatten(start_dim=1, end_dim=-1)
      (4): Dropout(p=0.2, inplace=False)
      (5): Linear(in_features=200, out_features=200, bias=True)
      (6): ReLU()
      (7): Flatten(start_dim=1, end_dim=-1)


## Predict on test dataset

max_neutral = 3

In [14]:
from sklearn.metrics import classification_report
data_train, ref_train, data_valid, ref_valid, data_test, ref_test = load()
test_data=MyDataset(data_test, ref_test, use_faiss=False, max_neutral=3)
test_loader=DataLoader(test_data,batch_size=len(test_data.df),\
                       collate_fn=coll, shuffle=False)
best_model.eval()

y_hat = None
labels = None
for idx, x in enumerate(test_loader):
  with torch.no_grad():
    out = best_model(x)
    _, preds = torch.max(out, 1)   
    #Get prediction and labels to get metrix
    y_hat = preds.cpu().numpy()
    labels = x["Label"].cpu().numpy()
    print(f"\n {classification_report(labels, y_hat, labels=[0,1,2])}")

  df = df[df.span_nbr !=-1 ][df.premise != ''] [df.hypotheis != '']



               precision    recall  f1-score   support

           0       0.00      0.00      0.00      1971
           1       0.00      0.00      0.00       422
           2       0.89      1.00      0.94     19690

    accuracy                           0.89     22083
   macro avg       0.30      0.33      0.31     22083
weighted avg       0.80      0.89      0.84     22083



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


max_neutral = 1

In [15]:
from sklearn.metrics import classification_report
data_train, ref_train, data_valid, ref_valid, data_test, ref_test = load()
test_data=MyDataset(data_test, ref_test, use_faiss=False, max_neutral=1)
test_loader=DataLoader(test_data,batch_size=len(test_data.df),\
                       collate_fn=coll, shuffle=False)
best_model.eval()

y_hat = None
labels = None
for idx, x in enumerate(test_loader):
  with torch.no_grad():
    out = best_model(x)
    _, preds = torch.max(out, 1)   
    #Get prediction and labels to get metrix
    y_hat = preds.cpu().numpy()
    labels = x["Label"].cpu().numpy()
    print(f"\n {classification_report(labels, y_hat, labels=[0,1,2])}")

  df = df[df.span_nbr !=-1 ][df.premise != ''] [df.hypotheis != '']



               precision    recall  f1-score   support

           0       0.00      0.00      0.00      1971
           1       0.00      0.00      0.00       422
           2       0.89      1.00      0.94     19675

    accuracy                           0.89     22068
   macro avg       0.30      0.33      0.31     22068
weighted avg       0.79      0.89      0.84     22068



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


max neutral = 0

In [17]:
from sklearn.metrics import classification_report
data_train, ref_train, data_valid, ref_valid, data_test, ref_test = load()
test_data=MyDataset(data_test, ref_test, use_faiss=False, max_neutral=0)
test_loader=DataLoader(test_data,batch_size=len(test_data.df),\
                       collate_fn=coll, shuffle=False)
best_model.eval()

y_hat = None
labels = None
for idx, x in enumerate(test_loader):
  with torch.no_grad():
    out = best_model(x)
    _, preds = torch.max(out, 1)   
    #Get prediction and labels to get metrix
    y_hat = preds.cpu().numpy()
    labels = x["Label"].cpu().numpy()
    print(f"\n {classification_report(labels, y_hat, labels=[0,1,2])}")

  df = df[df.span_nbr !=-1 ][df.premise != ''] [df.hypotheis != '']



               precision    recall  f1-score   support

           0       0.00      0.00      0.00      1971
           1       0.00      0.00      0.00       422
           2       0.89      1.00      0.94     19751

    accuracy                           0.89     22144
   macro avg       0.30      0.33      0.31     22144
weighted avg       0.80      0.89      0.84     22144



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
