<a href="https://colab.research.google.com/github/sumanthd17/aspect-based-sentiment/blob/master/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize

import psutil
import humanize
import os
import GPUtil as GPU

GPUs = GPU.getGPUs()
gpu = GPUs[0]

def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))

printm()

Gen RAM Free: 12.7 GB  | Proc size: 160.9 MB
GPU RAM Free: 15079MB | Used: 0MB | Util   0% | Total 15079MB


In [2]:
## Install transformers library
!pip install transformers



In [3]:
# Install dependencies
import time
import datetime
import random
from tqdm import tqdm

import numpy as np
import pandas as pd

import nltk
nltk.download('punkt')

import torch
import torch.nn.functional as F
import transformers as optimus

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
!git clone https://github.com/sumanthd17/aspect-based-sentiment.git

fatal: destination path 'aspect-based-sentiment' already exists and is not an empty directory.


In [5]:
cd aspect-based-sentiment

/content/aspect-based-sentiment


In [6]:
def load_train_data(input_dir):
  """
  Load input train data

  Arguments:
  input_dir {str} - path to data dir

  Returns:
  df {DataFrame} - loaded data in data frame
  """
  df = pd.read_csv(input_dir + "train-QA.csv", sep="\t", names=['id', 'ques', 'ans', 'sentiment'])
  return df

In [7]:
def load_val_data(input_dir):
  """
  Load input test data

  Arguments:
  input_dir {str} - path to data dir

  Returns:
  df {DataFrame} - loaded data in data frame
  """
  df = pd.read_csv(input_dir + "val-QA.csv", sep="\t", names=['id', 'ques', 'ans', 'sentiment'])
  return df

In [8]:
def hyper_params():
  """
  Function to initialize hyper-parameters

  Returns:
  BATCH_SIZE {int} - batchsize of dataloader
  MAX_SEQ_LENGTH {int} - maximum length of input text sequence (<512 as bert is maxed at 512)
  LEARNING_RATE {float} - learning rate for the optimizer
  EPOCHS {int} - total training epochs
  WARMUP {float} - portion of warmup steps for scheduler
  """
  BATCH_SIZE = 32
  MAX_SEQ_LENGTH = 256
  LEARNING_RATE = 2e-5
  EPOCHS = 5
  WARMUP = 0.1
  return BATCH_SIZE, MAX_SEQ_LENGTH, LEARNING_RATE, WARMUP, EPOCHS

In [9]:
# configure device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
# script for generating auxilary QA paris
!python create_data.py

2977
747
1491


In [11]:
# load train and validation data
train_data = load_train_data('QA_pairs/')
val_data = load_val_data('QA_pairs/')

# load hyper-parameters
batch_size, max_seq_len, lr, warmup, epochs = hyper_params()

# initialize training steps and warmup steps
num_training_steps = int(len(train_data) / batch_size) * epochs
num_warmup_steps = warmup * num_training_steps

print(len(train_data))
print(len(val_data))

15008
3750


In [12]:
# define BERT tokenizer
# using bert-base-uncased
tokenizer_class, pretrained_weights = (
    optimus.BertTokenizer,
    "bert-base-uncased",
)

# load the tokenizer from pre-trained model
tokenizer = tokenizer_class.from_pretrained(pretrained_weights, do_lower_case=True)

In [13]:
# label to index mapping
sent2idx = {
    'None': 0,
    'Positive': 1,
    'Negative': 2
}

In [14]:
# visualize tokenizer output
tokenizer('hi, my name is sumanth', 'I am an engineer')

{'input_ids': [101, 7632, 1010, 2026, 2171, 2003, 7680, 4630, 2232, 102, 1045, 2572, 2019, 3992, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [15]:
# initialize empty dataframe
train = pd.DataFrame()

# iterate each row in previously loaded train_data
# tokenizer(sent_a, sent_b) returns a dictionary of encoded values namely input_ids, attention_mask, token_type_ids
# input_ids - IDs returned by bert-tokenizer
# attention_mask - 1's for all the ids in the sentence and 0's for all padded tokens
# token_type_ids - 0's for all tokens in sent_a, 1's for all tokens in sent_b
for i, row in train_data.iterrows():
  d = {}
  encoded = tokenizer(row['ques'], row['ans'])
  d['input_ids'] = encoded['input_ids']
  d['attention_mask'] = encoded['attention_mask']
  d['token_type_ids'] = encoded['token_type_ids']
  d['label'] = sent2idx[row['sentiment']]
  train = train.append(d, ignore_index=True)

# padding all lists to MAX_SEQ_LENGTH
train['input_ids'] = train['input_ids'].apply(lambda x: x + (max_seq_len - len(x))*[0])
train['attention_mask'] = train['attention_mask'].apply(lambda x: x + (max_seq_len - len(x))*[0])
train['token_type_ids'] = train['token_type_ids'].apply(lambda x: x + (max_seq_len - len(x))*[0])

In [16]:
# visualize data
train

Unnamed: 0,attention_mask,input_ids,label,token_type_ids
0,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3976...",2.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
1,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 6671...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 2236...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
3,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3808...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
4,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3976...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
...,...,...,...,...
15003,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3808...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
15004,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3976...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
15005,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 6671...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
15006,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 2236...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."


In [17]:
# initialize empty dataframe
val = pd.DataFrame()

# iterate each row in previously loaded train_data
# tokenizer(sent_a, sent_b) returns a dictionary of encoded values namely input_ids, attention_mask, token_type_ids
# input_ids - IDs returned by bert-tokenizer
# attention_mask - 1's for all the ids in the sentence and 0's for all padded tokens
# token_type_ids - 0's for all tokens in sent_a, 1's for all tokens in sent_b
for i, row in val_data.iterrows():
  d = {}
  encoded = tokenizer(row['ques'], row['ans'])
  d['input_ids'] = encoded['input_ids']
  d['attention_mask'] = encoded['attention_mask']
  d['token_type_ids'] = encoded['token_type_ids']
  d['label'] = sent2idx[row['sentiment']]
  val = val.append(d, ignore_index=True)

# padding all lists to MAX_SEQ_LENGTH
val['input_ids'] = val['input_ids'].apply(lambda x: x + (max_seq_len - len(x))*[0])
val['attention_mask'] = val['attention_mask'].apply(lambda x: x + (max_seq_len - len(x))*[0])
val['token_type_ids'] = val['token_type_ids'].apply(lambda x: x + (max_seq_len - len(x))*[0])

In [18]:
# visualize data
val

Unnamed: 0,attention_mask,input_ids,label,token_type_ids
0,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 6671...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3808...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
2,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 2236...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
3,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3976...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
4,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 6671...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...
3745,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3976...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
3746,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 2236...",1.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."
3747,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 6671...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3748,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[101, 2054, 2079, 2017, 2228, 2055, 1996, 3808...",0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, ..."


In [19]:
# Import torch utilities to loading data
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.sampler import RandomSampler

In [20]:
# convert all lists to pytorch tensors
input_ids = torch.tensor([r for r in train['input_ids']], dtype=torch.long)
attention_mask = torch.tensor([r for r in train['attention_mask']], dtype=torch.long)
token_type_ids = torch.tensor([r for r in train['token_type_ids']], dtype=torch.long)
label_ids = torch.tensor([r for r in train['label']], dtype=torch.long)

In [21]:
# wraping tensors into TensorDataset
# Initializing RandomSampler for train data 
# Initialing DataLoader for efficiently loading data
train_dataset = TensorDataset(input_ids, attention_mask, token_type_ids, label_ids)
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)

In [22]:
# convert all lists to pytorch tensors
input_ids = torch.tensor([r for r in val['input_ids']], dtype=torch.long)
attention_mask = torch.tensor([r for r in val['attention_mask']], dtype=torch.long)
token_type_ids = torch.tensor([r for r in val['token_type_ids']], dtype=torch.long)
label_ids = torch.tensor([r for r in val['label']], dtype=torch.long)

In [23]:
# wraping tensors into TensorDataset
# Initialing DataLoader for efficiently loading data
val_dataset = TensorDataset(input_ids, attention_mask, token_type_ids, label_ids)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

In [24]:
# define model - BertForSequenceClassification
# using bert-base-uncased
model_class, pretrained_weights = (
    optimus.BertForSequenceClassification,
    "bert-base-uncased",
)

# loading pre-trained bert model
model = model_class.from_pretrained(pretrained_weights, num_labels=3)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [25]:
# load model to device
model = model.to(device)

In [26]:
no_decay = ['bias', 'gamma', 'beta']
optimizer_parameters = [
      {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
      {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
      ]

In [27]:
# Initializing optimizer for weight updates
# Initializing scheduler for updating learning rate
optimizer = optimus.AdamW(optimizer_parameters, lr=lr, correct_bias=False)
scheduler = optimus.get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)

In [28]:
# adding seed values for reproducibility
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
# placeholder for train-val stats
training_stats = []

total_t0 = time.time()

# Interate through all epochs
for epoch_i in range(0, epochs):
    ## TRAINING
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    t0 = time.time()
    # re-initialize train loss for each epoch
    total_train_loss = 0

    # convert model to train mode
    model.train()

    # Iterate for all batches in the dataloader
    for step, batch in enumerate(tqdm(train_dataloader)):

        # print stats after every 100 steps
        if step % 100 == 0 and not step == 0:
            elapsed = str(datetime.timedelta(seconds=int(round(time.time() - t0))))
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        # un-pack vales in the batch
        input_ids, input_mask, segment_ids, label_ids = batch

        # push inputs to device
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        label_ids = label_ids.to(device)

        # remove any previously computed gradients
        model.zero_grad()        

        # forward pass of the network
        loss, _ = model(input_ids=input_ids,
                        attention_mask=input_mask,
                        token_type_ids=segment_ids,
                        labels=label_ids)

        # update loss
        total_train_loss += loss.item()

        # back-propogation
        loss.backward()
        optimizer.step()
        scheduler.step()

    # calculate stats for each epoch
    avg_train_loss = total_train_loss / (len(train_dataloader) * train_dataloader.batch_size)    
    
    training_time = str(datetime.timedelta(seconds=int(round(time.time() - t0))))

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))
        
    ## VALIDATION

    print("")
    print("Running Validation...")

    t0 = time.time()

    # convert model to eval mode
    model.eval()

    # initialize placeholders
    total_eval_accuracy = 0
    total_eval_loss = 0

    # iterate for all batched in the dataloader
    for step, batch in enumerate(tqdm(val_dataloader)):
        # unpack values in the batch
        input_ids, input_mask, segment_ids, label_ids = batch

        # load tensors to device
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        label_ids = label_ids.to(device)
        
        # perform forward pass without tracking gradients
        with torch.no_grad():        
            loss, logits = model(input_ids=input_ids,
                        attention_mask=input_mask,
                        token_type_ids=segment_ids,
                        labels=label_ids)
        # update loss
        total_eval_loss += loss.item()

        # Output of the model is the scores for 3 classes namely None, Positive, and Negative
        # Computing softmax for finding max probablity
        logits = F.softmax(logits, dim=-1)
        # move ground truths and predictions to cpu
        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        # get index of max probability
        outputs = np.argmax(logits, axis=1)

        # comapre ground truths and predictions
        total_eval_accuracy += np.sum(outputs == label_ids)        

    # compute validation stats
    avg_val_accuracy = total_eval_accuracy / (len(val_dataloader) * val_dataloader.batch_size)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    avg_val_loss = total_eval_loss / (len(val_dataloader) * val_dataloader.batch_size)
    
    validation_time = str(datetime.timedelta(seconds=int(round(time.time() - t0))))
    
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # add stats to placeholder
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(str(datetime.timedelta(seconds=int(round(time.time() - total_t0))))))

  0%|          | 0/469 [00:00<?, ?it/s]

Training...


 21%|██▏       | 100/469 [02:17<08:40,  1.41s/it]

  Batch   100  of    469.    Elapsed: 0:02:17.


 27%|██▋       | 126/469 [02:53<08:06,  1.42s/it]

In [None]:
## INFERENCE

def load_test_data(input_dir):
  """
  Load input test data

  Arguments:
  input_dir {str} - path to data dir

  Returns:
  df {DataFrame} - loaded data in data frame
  """
  df = pd.read_csv(input_dir + "test-QA.csv", sep="\t", names=['id', 'ques', 'ans', 'sentiment'])
  return df

In [None]:
# load test data
test_data = load_test_data('QA_pairs/')

In [None]:
# index to label mapping
idx2sentiment = {
    0: "None",
    1: "Positive",
    2: "Negative"
}

In [None]:
all_aspects = ['price',
               'shopping',
               'transit-location', 
               'safety',
               'nightlife',
               'live',
               'multiculture',
               'green-nature',
               'touristy',
               'quiet',
               'dining',
               'general']

In [None]:
# grouping all the text on 'id'
test_grouped_by_id = test_data.groupby(['id'])

In [None]:
# convert model to eval mode
model.eval()

final_preds = pd.DataFrame()
test_accuracy = 0

l = 0
# Iterate all groups in the dataframe groupby
for id, group in tqdm(test_grouped_by_id):

  test = pd.DataFrame()
  # encode all the rows with BERT tokenizer
  for i, row in group.iterrows():
    d = {}
    encoded = tokenizer(row['ques'], row['ans'])
    d['input_ids'] = encoded['input_ids']
    d['attention_mask'] = encoded['attention_mask']
    d['token_type_ids'] = encoded['token_type_ids']
    d['label'] = sent2idx[row['sentiment']]
    test = test.append(d, ignore_index=True)

  # pad all inputs to MAX_SEQ_LENGTH
  test['input_ids'] = test['input_ids'].apply(lambda x: x + (max_seq_len - len(x))*[0])
  test['attention_mask'] = test['attention_mask'].apply(lambda x: x + (max_seq_len - len(x))*[0])
  test['token_type_ids'] = test['token_type_ids'].apply(lambda x: x + (max_seq_len - len(x))*[0])

  # convert input lists to tensors
  input_ids = torch.tensor([r for r in test['input_ids']], dtype=torch.long)
  attention_mask = torch.tensor([r for r in test['attention_mask']], dtype=torch.long)
  token_type_ids = torch.tensor([r for r in test['token_type_ids']], dtype=torch.long)
  label_ids = torch.tensor([r for r in test['label']], dtype=torch.long)

  # push tensors to device
  input_ids = input_ids.to(device)
  attention_mask = attention_mask.to(device)
  token_type_ids = token_type_ids.to(device)
  label_ids = label_ids.to(device)

  # compute forward pass without tracking gradients
  with torch.no_grad():        
    loss, logits = model(input_ids=input_ids,
                        attention_mask=attention_mask,
                        token_type_ids=token_type_ids,
                        labels=label_ids)

  # compute softmax on the outputs
  logits = F.softmax(logits, dim=-1)
  logits = logits.detach().cpu().numpy()
  label_ids = label_ids.to('cpu').numpy()
  outputs = np.argmax(logits, axis=1)

  # evaluate predictions
  test_accuracy += np.sum(outputs == label_ids)
  l += len(group)

  test['pred'] = outputs

  # write predictions to dataframe
  res = [idx for idx, val in enumerate(outputs) if val != 0]
  for val in res:
    d = {}
    d['id'] = id
    d['text'] = group.iloc[val]['ans']
    d['aspect'] = group.iloc[val]['ques'].split(' ')[6]
    d['sentiment'] = idx2sentiment[outputs[val]]
    d['target'] = 'LOCATION1' if 'LOCATION1' in group.iloc[val]['ans'] else 'LOCATION2'
    final_preds = final_preds.append(d, ignore_index=True)

In [None]:
# print test accuracy
test_accuracy / l

In [None]:
# write predictions to csv file
final_preds.to_csv('prediction.csv', index=False)

In [None]:
# check deive and usage
!nvidia-smi