<a href="https://colab.research.google.com/github/myidispg/Kaggle-competitions/blob/master/kaggle_jigsaw_unintended_bias_toxicity_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A notebook for the Jigsaw Unintended Bias in Toxicity Classification on Kaggle.
https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/overview

##All the necessary imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import zipfile
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

PAD_token = 0
SOS_token = 1
EOS_token = 2
UNK_token = 4

MAX_LENGTH = 220

dataset_directory = 'kaggle_jigsaw_unintended_bias/'

print(pd.__version__)
print(device)

0.23.4
cuda


## Mount Google Drive. It will be used for saving models later
The drive will be mounted at the path: '/content/drive/My Drive/

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Download the Kaggle dataset.

In [4]:
os.environ['KAGGLE_USERNAME'] = "myidispg"
os.environ['KAGGLE_KEY'] = "c991620902499acb95ee0c402d169f34"
!kaggle competitions download -c jigsaw-unintended-bias-in-toxicity-classification


Downloading sample_submission.csv.zip to /content
  0% 0.00/221k [00:00<?, ?B/s]
100% 221k/221k [00:00<00:00, 30.4MB/s]
Downloading test.csv.zip to /content
 41% 5.00M/12.1M [00:00<00:00, 17.6MB/s]
100% 12.1M/12.1M [00:00<00:00, 34.7MB/s]
Downloading train.csv.zip to /content
 96% 262M/273M [00:02<00:00, 122MB/s]
100% 273M/273M [00:02<00:00, 97.8MB/s]


## Check whether the test, train and sample_submission zips have been downloaded.

In [3]:
print(os.listdir())

['.config', 'glove.6B.zip', 'glove_vectors', 'gdrive', 'kaggle_jigsaw_unintended_bias', 'sample_data']


## Download and extract the glove vector files. 
Create a list of words, word2idx, and id2emb for the glove vectors.

In [6]:
glove_url = 'http://nlp.stanford.edu/data/glove.6B.zip'

from urllib.request import urlretrieve
urlretrieve(glove_url, 'glove.6B.zip')

print('Glove embeddings downloaded.\nExtracting the files now...')
with zipfile.ZipFile('glove.6B.zip', 'r') as zip_ref:
  zip_ref.extractall('glove_vectors')

print(os.listdir('glove_vectors/')

Glove embeddings downloaded.
Extracting the files now...
['glove.6B.200d.txt', 'glove.6B.100d.txt', 'glove.6B.50d.txt', 'glove.6B.300d.txt']


In [4]:
words = []
idx = 0
word2idx = {}
embeddings = []

with open('glove_vectors/glove.6B.50d.txt', 'rb') as f:
  for l in f:
    line = l.decode().split()
    word = line[0]
    words.append(word)
    word2idx[word] = idx
    idx += 1
    embeddings.append(line[1: ])
  f.close()

print('Created the required lists and dictionaries of the glove files!!!')

Created the required lists and dictionaries of the glove files!!!


In [5]:
print(len(word2idx))
print(len(words))
print(len(embeddings))

400000
400000
400000


##Since the glove vectors won't have SOS, EOS and PAD tokens, append them to the last of the words list initialize random vectors for them.


In [6]:
num_known_words = len(words)

PAD_token = len(words) # Since length will always be one index greater than the last index.
SOS_token = len(words) + 1
EOS_token = len(words) + 2
UNK_token = len(words) + 3

word2idx['SOS'] = SOS_token
word2idx['EOS'] = EOS_token
word2idx['PAD'] = PAD_token
word2idx['UNK'] = UNK_token

words.append('SOS')
words.append('EOS')
words.append('PAD')
words.append('UNK')

# Append 3 random vectors to serve as embeddings for SOS, PAD and EOS.
for i in range(4):
  embeddings.append([random.randrange(-1, 1) for _ in range(50)])
  
print('Embedded random vectors for SOS, PAD and EOS.')

Embedded random vectors for SOS, PAD and EOS.


In [10]:
print(len(word2idx))
print(len(words))
print(len(embeddings))

400004
400004
400004


## Unzip the files and check.

In [9]:
# unzip the train.csv file
train_zip_filename = 'train.csv.zip'
with zipfile.ZipFile(train_zip_filename, 'r') as zip: 
    # printing all the contents of the zip file 
    zip.printdir() 
  
    # extracting all the files 
    print('Extracting all the files now...') 
    zip.extractall(dataset_directory) 
    print('Done!')
    
# unzip the test.csv file
train_zip_filename = 'test.csv.zip'
with zipfile.ZipFile(train_zip_filename, 'r') as zip: 
    # printing all the contents of the zip file 
    zip.printdir() 
  
    # extracting all the files 
    print('Extracting all the files now...') 
    zip.extractall(dataset_directory) 
    print('Done!') 
    
# unzip the sample_submission.csv file
train_zip_filename = 'sample_submission.csv.zip'
with zipfile.ZipFile(train_zip_filename, 'r') as zip: 
    # printing all the contents of the zip file 
    zip.printdir() 
  
    # extracting all the files 
    print('Extracting all the files now...') 
    zip.extractall(dataset_directory) 
    print('Done!')
    
!rm sample_submission.csv.zip
!rm test.csv.zip
!rm train.csv.zip

File Name                                             Modified             Size
train.csv                                      2019-03-28 21:17:38    816211476
Extracting all the files now...
Done!
File Name                                             Modified             Size
test.csv                                       2019-03-28 15:08:42     30179878
Extracting all the files now...
Done!
File Name                                             Modified             Size
sample_submission.csv                          2019-03-28 15:08:42      1167854
Extracting all the files now...
Done!


## Check if the files have been extracted properly

In [7]:
if set(['train.csv', 'test.csv', 'sample_submission.csv']).issubset(os.listdir(dataset_directory)):
  print('The dataset has been extracted properly.')
else:
  print('There was some issue in dataset download or extraction.')
 
print(f'The contents of the dataset directory are- {os.listdir(dataset_directory)}')

The dataset has been extracted properly.
The contents of the dataset directory are- ['train.csv', 'sample_submission.csv', 'test.csv']


## Load the training data in a pandas dataframe

In [8]:
df_train = pd.read_csv(dataset_directory + 'train.csv')
# print(f'The first entry in train dataframe is: \n{df_train[:1]}\n')
print(f'The column names in train dataframe are- {list(df_train.columns.values)}\n')

df_test = pd.read_csv(dataset_directory + 'test.csv')
# print(f'The first entry in test dataframe is: \n{df_train[:1]}\n')
print(f'The column names in test dataframe are- {list(df_test.columns.values)}')


The column names in train dataframe are- ['id', 'target', 'comment_text', 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat', 'asian', 'atheist', 'bisexual', 'black', 'buddhist', 'christian', 'female', 'heterosexual', 'hindu', 'homosexual_gay_or_lesbian', 'intellectual_or_learning_disability', 'jewish', 'latino', 'male', 'muslim', 'other_disability', 'other_gender', 'other_race_or_ethnicity', 'other_religion', 'other_sexual_orientation', 'physical_disability', 'psychiatric_or_mental_illness', 'transgender', 'white', 'created_date', 'publication_id', 'parent_id', 'article_id', 'rating', 'funny', 'wow', 'sad', 'likes', 'disagree', 'sexual_explicit', 'identity_annotator_count', 'toxicity_annotator_count']

The column names in test dataframe are- ['id', 'comment_text']


## Remove the special characters from the dataset.
Credit goes to https://www.kaggle.com/gpreda/jigsaw-fast-compact-solution

In [0]:
def preprocess(data):
    '''
    Credit goes to https://www.kaggle.com/gpreda/jigsaw-fast-compact-solution
    '''
    punct = "/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'
    def clean_special_chars(text, punct):
        for p in punct:
            text = text.replace(p, ' ')
        return text

    data = data.astype(str).apply(lambda x: clean_special_chars(x, punct))
    return data
  
df_train['comment_text'] = preprocess(df_train['comment_text'])
df_test['comment_text'] = preprocess(df_test['comment_text'])

##Convert the dataframes to numpy arrays
Convert the dataframes to numpy arrays. Also, the id, target and comment_text columns of the train dataset are preserved. The others are discarded as of now.

In [10]:
df_train = df_train.iloc[:, :3]
np_train = df_train.iloc[:, :].values
print(np_train[100])
np_test = df_test.iloc[:, :].values
print(np_test[100])

del df_train, df_test

[239722 0.0
 'Loving this collection  Cant wait till Season 2 is released  Should be any day now according to http   yeezy season2 com ']
[7000100
 'Did you even read the editorial \n\n The best course is for the democratic world to continue to demand the return of the country s legislature  and the end of the sham constituent assembly that usurped it  Impartial outsiders  working with both the opposition and Mr  Maduro  could help negotiate a schedule for the return of legitimate elections  \n\nHow exactly do you interpret that as a call for a military invasion ']


## Filter out sentences that are more than 220 words in length


In [11]:
np_train_filtered = []
np_test_filtered = []

print(f'train_set before filtering: {np_train.shape[0]}')

print('filtering sentences of train set')
for i in range(np_train.shape[0]):
  if len(np_train[i][2].split()) <= MAX_LENGTH:
    np_train_filtered.append(np_train[i])

print('filtering sentences of test set')
for i in range(np_test.shape[0]):
  if len(np_test[i][1].split()) <= MAX_LENGTH:
    np_test_filtered.append(np_test[i])

np_train = np.asarray(np_train_filtered)
np_test = np.asarray(np_test_filtered)
print('Filtered the sentences')
print(f'train_set after filtering: {np_train.shape[0]}')

del np_train_filtered, np_test_filtered

train_set before filtering: 1804874
filtering sentences of train set
filtering sentences of test set
Filtered the sentences
train_set after filtering: 1804864


## Normalize the strings, stem, convert to lower case and remove all punctutations.

Will try stemming later. Stemming might result in information loss while the model will try to understand the context of the sentence.

Note: The symbol to remove apostrophes was found on- https://stackoverflow.com/questions/44296593/how-to-remove-apostrophe-marks-from-a-string-in-python?rq=1

#------Code for this block(REMOVED AS OF NOW. FOUND A FASTER VERSION)--------
import re
import unicodedata

# Turn a Unicode string to plain ASCII, thanks to
# https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizetring(s):
    s = unicodeToAscii(s.lower().strip())
#     s = [word.replace("'", "") for word in s.split()]
#     for i in range(len(s)):
#       s[i] = ps.stem(s[i])
    s = re.sub(r"http[s]?:\/\/\S+", '', s)
    s = s.replace(u"\u2019", "") # remove apostrophes
    s = re.sub(r"([.!?])", r" \1", s) # remove punctuations
    s = re.sub(r"[^a-zA-Z0-9.!?]+", r" ", s) # remove all characters except alphabets and .!?
    return s

print('Normalizing dataset....')
for i in range(len(np_test)):
  np_test[i][1] = normalizetring(str(np_test[i][1]))
# print(np_test[100])
print('Normalized Test dataset')
for i in range(len(np_train)):
  np_train[i][2] = normalizetring(str(np_train[i][2]))
# print(np_train[100])
print('Normalized Train dataset')

## Get the amount of toxic and non-toxic samples in x_train.

In [12]:
toxic_count = 0
non_toxic_count = 0

np_train_toxic = []
np_train_non_toxic = []

for i in range(np_train.shape[0]):
  if np_train[i][1] >= 0.5:
    toxic_count += 1
    np_train_toxic.append(np_train[i])
  else:
    non_toxic_count += 1
    np_train_non_toxic.append(np_train[i])
    
np_train_toxic = np.asarray(np_train_toxic)
np_train_non_toxic = np.asarray(np_train_non_toxic)

print(f'The number of toxic samples are: {toxic_count} and non toxic sample count is: {non_toxic_count}')
print(f'The first 2 non toxic comments are: {np_train_non_toxic[:2]}\nThe first 2 toxic are: {np_train_toxic[:2]}')

The number of toxic samples are: 144333 and non toxic sample count is: 1660531
The first 2 non toxic comments are: [[59848 0.0
  'This is so cool  It s like   would you want your mother to read this    Really great idea  well done ']
 [59849 0.0
  'Thank you   This would make my life a lot less anxiety inducing  Keep it up  and don t let anyone get in your way ']]
The first 2 toxic are: [[59856 0.8936170212765957 'haha you guys are a bunch of losers ']
 [59859 0.6666666666666666 'ur a sh tty comment ']]


##According to some data analysis earlier, the dataset was heavily biased to non-toxic comments. 
So, while training, I will generate x_train as many as times as the number of epochs.
Each time, the generated x_train will have balanced amount of toxic and non -toxic samples. The non-toxic samples will be picked randomly.

In [0]:
def generateTrainSet(np_train_toxic, np_train_non_toxic, toxic_count):
  x_train_toxic = np.asarray(np_train_toxic[:, 2])
  y_train_toxic = np.asarray(np_train_toxic[:, 1])
  
  x_train_non_toxic = []
  y_train_non_toxic = []
  
  for i in range(toxic_count):
    choice = random.choice(np_train_non_toxic)
    x_train_non_toxic.append(choice[2])
    y_train_non_toxic.append(choice[1])
  
  x_train_non_toxic = np.asarray(x_train_non_toxic)
  y_train_non_toxic = np.asarray(y_train_non_toxic)
  
  x_train = np.concatenate((x_train_toxic, x_train_non_toxic)).reshape((toxic_count * 2, 1))
  del x_train_toxic, x_train_non_toxic
  x_train = np.asarray(x_train)
  
  y_train = np.concatenate((y_train_toxic, y_train_non_toxic)).reshape((toxic_count * 2, 1))
  del y_train_toxic, y_train_non_toxic
  y_train = np.where(np.asarray(y_train) >= 0.5, 1, 0)
  
  data = np.concatenate((x_train, y_train), axis=1)
  
  np.random.shuffle(data)
  return data[:, 0], data[:, 1]

## Extract x_train, y_train, x_valid, y_valid and x_test from the numpy arrays.
The train dataset will be split into train and validation set to measure the performance of the model.

In [18]:
x_train = np_train[:, 2]
y_train = np_train[:, 1]

from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.40, shuffle=True)

print(f'The len of the train set is: {x_train.shape[0]}')
print(f'The len of the validation set is: {x_valid.shape[0]}')

y_train = np.where(y_train >= 0.5, 1, 0)
y_valid = np.where(y_valid >= 0.5, 1, 0)

x_test = np_test[:, 1]
x_test_ids = np_test[:, 0]

The len of the train set is: 907876
The len of the validation set is: 605251


## Now we have the train set, test set and validation set. We have also created word embeddings for the known words by GloVe embeddings and added random embeddings for unknown words including SOS, EOS, PAD.

## Creating sequences from words.
Some functions to create sequences of indexes for a given sentence. The output will be a Pytorch tensor.

In [0]:
def indexesFromSentence(sentence):
  words = []
  for word in sentence.split():
    try:
      words.append(word2idx[word])
    except KeyError:
      words.append(word2idx['UNK'])
  return words  
    
#     return [word2idx[word] for word in sentence.split(' ')]
    
def tensorFromSentence(sentence):
    indexes = indexesFromSentence(sentence)
    indexes.append(EOS_token)
    if len(indexes) < MAX_LENGTH:
      for i in range(len(indexes), MAX_LENGTH):
        indexes.append(PAD_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

In [19]:
print(f'Vocabulary size according to word list: {len(words)}')
print(f'Vocabulary size of embeddings vector: {len(embeddings)}')
print(f'Number of dimensions of embeddings: {len(embeddings[400003])}')

Vocabulary size according to word list: 400004
Vocabulary size of embeddings vector: 400004
Number of dimensions of embeddings: 50


##Define the GRU model.
The model will take an input word's index, fetch up the corresponding embedding and then run through a Gated Recurrent Unit to return an Output and a Hidden State. The output will further be passed on to an ANN.

In [0]:
class GRU(nn.Module):
  def __init__(self, vocab_size, embedding_dims, hidden_size, num_layers):
    super(GRU, self).__init__()
    self.vocab_size = vocab_size
    self.embedding_dims = embedding_dims
    self.hidden_size = hidden_size
    self.num_layers = num_layers
#     embedding_weights = torch.tensor(embeddings, dtype=torch.float)
#     self.embedding = nn.Embedding.from_pretrained(embedding_weights)
#     self.embedding = nn.Embedding(self.vocab_size, self.embedding_dims)
    self.gru = nn.GRU(self.embedding_dims, self.hidden_size, num_layers=self.num_layers)
  
  def forward(self, input, hidden_state):
#     embedded = self.embedding(input).view(1, 1, -1) # Output of size -> 1, 1, embedding_dims
#     output = embedded
    input = input.view(1, 1, -1)
    # dims of hidden_state = 2, 1, hidden_size (2 because 2 layers)
    output, hidden_state = self.gru(input, hidden_state)
    return output, hidden_state
  
  def initHidden(self):
    return torch.randn(self.num_layers, 1, self.hidden_size, device=device)
    
    
class ANN(nn.Module):
  def __init__(self, input_size):
    super(ANN, self).__init__()
    self.fc1 = nn.Linear(input_size, 128)
    self.fc2 = nn.Linear(128, 64)
    self.fc3 = nn.Linear(64, 2)
    self.softmax = nn.LogSoftmax(dim=1)
    
  def forward(self, input):
    output = F.dropout(F.relu(self.fc1(input)), p=0.2)
    output = F.dropout(F.relu(self.fc2(output)), p=0.2)
    output = self.fc3(output)
#     print(f'Before softmax: {output}')
    output = self.softmax(output)
    return output

# class ANN(nn.Module):
#   def __init__(self, input_size):
#     super(ANN, self).__init__()
#     self.fc1 = nn.Linear(input_size, 128)
#     self.fc2 = nn.Linear(128, 2)
#     self.softmax = nn.LogSoftmax(dim=1)
    
#   def forward(self, input):
#     output = F.dropout(F.relu(self.fc1(input)), p=0.2)
#     output = self.fc2(output)
# #     print(f'Before softmax: {output}')
# #     output = self.softmax(output)
#     return output
  
# for i in range(10):
#   input_tensor = tensorFromSentence(x_train[i])
#   hidden = gru.initHidden()
#   loss = 0
#   for index in range(input_tensor.shape[0]):
#     output, hidden = gru(input_tensor[index], hidden)
    
#   y_pred = ann(output)
# #     y_pred = nn.LogSoftmax(y_pred)
# #   print(f'after softmax: {y_pred}')
# #     topv, topi = y_pred.topk(1)
# #     print(f'topv: {topv[0][0][0]}, topi: {topi}')
# #     print(y_pred, torch.tensor(y_train[2], device=device))
# #     loss += criterion(topv.view(1, -1), torch.tensor(y_train[index], device=device).view(1, ))
#   loss += criterion(y_pred.view(1, -1), torch.tensor(y_train[index], device=device).view(1, ))
#   loss.backward()
#   gru_optimizer.step()
#   ann_optimizer.step()
#   print(f'loss: {loss}')
#   loss = 0
  

## Some functions to find time elapsed
These functions help to calculate the elapsed time and the remaining time.

In [0]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

## Training a single sentence.

In [0]:
def trainSentence(input_tensor, target_label, gru, ann, gru_optimizer, ann_optimizer, criterion):
  
  
  hidden_state = gru.initHidden()
  
  gru_optimizer.zero_grad()
  ann_optimizer.zero_grad()
  
  loss = 0
  # pass the whole sentence through the rnn. 
  for index in range(input_tensor.shape[0]):
    word_embedding = []
    try:
#       print(f'here: {embeddings[input_tensor[index]]}')
      word_embedding = torch.FloatTensor(embeddings[index]).to(device)
#       print('here too')
    except KeyError:
      word_embedding = torch.FloatTensor(embeddings[word2idx['UNK']]).to(device)
#     output, hidden_state = gru(input_tensor[index], hidden_state)
    output, hidden_state = gru(word_embedding, hidden_state)
#     print(output[0])
  
  # feed the output tensor to the ann.
  y_pred = ann(output)
  print(f'y_pred: {y_pred}')
  loss += criterion(y_pred.view(1, -1), torch.tensor(target_label, device=device).view(1, ))
  
  loss.backward()
  gru_optimizer.step()
  ann_optimizer.step()
  
  return loss

## Save model checkpoints after an interval.
Save the model checkpoints to the drive and load from there whenever resuming training.

In [0]:
def save_model(model, optimizer, model_name, iter, print_loss_total, plot_loss_total, plot_losses):
  path = f'/content/gdrive/My Drive/Machine Learning Models/new_kaggle_jigsaw_{model_name}_max_len_220.pth'
  
  # Check if a previous model exists and then delete it.
  saved_models = os.listdir('gdrive/My Drive/Machine Learning Models/')
  if os.path.exists(path):
    print('Deleting existing model')
    os.remove(path)
  
  print(f'Saving {model_name} model...')
  torch.save({'iteration': iter,
             'model_state_dictionary': model.state_dict(),
             'optimizer_state_dictionary': optimizer.state_dict(),
             'print_loss_total': print_loss_total,
             'plot_loss_total': plot_loss_total,
             'plot_losses': plot_losses}, path)
  print(f'{model_name} saved successfully.')
  
def load_model(model_name):
  path = f'/content/gdrive/My Drive/Machine Learning Models/new_kaggle_jigsaw_{model_name}_max_len_220.pth'
  if os.path.exists(path):
    checkpoint = torch.load(path)
    return checkpoint
  else:
    return None
 
# save_model(gru, gru_optimizer, 'gru', 1000, 100, 1000, 100)

# checkpoint = load_model('gru')
# print(type(checkpoint['print_loss_total']))

In [0]:
def trainIters(gru, ann, epochs = 3, print_every=5000, save_every = 6000, plot_every=5000, learning_rate=0.01, resume=False):
  print('Training model...')
  
  for j in range(epochs):
    print(f'EPOCH {j+1}')
    start = time.time()
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    x_train, y_train = generateTrainSet(np_train_toxic, np_train_non_toxic, toxic_count)

#     n_iters = x_train.shape[0]
    n_iters = 1
    print('Total number of iteration: ' + str(n_iters))
    current_iteration = 1

    if resume == True:
      gru_checkpoint = load_model('gru')
      ann_checkpoint = load_model('ann')

      current_iteration = gru_checkpoint['iteration'] if gru_checkpoint != None else 1
      gru.load_state_dictionary = gru_checkpoint['model_state_dictionary']
      gru.train()
      gru.to(device)
      ann.load_state_dictionary = ann_checkpoint['model_state_dictionary']
      ann.train()
      ann.to(device)
      gru_optimizer.load_state_dictionary = gru_checkpoint['optimizer_state_dictionary']
      ann_optimizer.load_state_dictionary = ann_checkpoint['optimizer_state_dictionary']

      plot_losses = gru_checkpoint['plot_losses']
      print_loss_total = gru_checkpoint['print_loss_total']
      plot_loss_total = gru_checkpoint['plot_loss_total']
  
    for iter in range(current_iteration, n_iters+1):
      input_tensor = tensorFromSentence(x_train[iter-1])

      loss = trainSentence(input_tensor, y_train[iter-1], gru, ann, gru_optimizer, ann_optimizer, criterion)

      print_loss_total = print_loss_total + loss
      plot_loss_total = plot_loss_total + loss

      if iter % save_every == 0:
        save_model(gru, gru_optimizer, 'gru', iter, print_loss_total, plot_loss_total, plot_losses)
        save_model(ann, ann_optimizer, 'ann', iter, print_loss_total, plot_loss_total, plot_losses)
        print(f'models saved after iteration: {iter}')

      if iter % print_every == 0:
        print_loss_avg = print_loss_total / print_every
        print_loss_total = 0
        print(f'Epoch: {j+1} Time since: {timeSince(start, iter/n_iters)}; Iteration: {iter}; Percentage elapsed: {(iter/n_iters)*100}; Loss: {print_loss_avg}')

      if iter % plot_every == 0:
        plot_loss_avg = plot_loss_total / plot_every
        plot_losses.append(plot_loss_avg)
        plot_loss_total = 0
      
  showPlot()
    

In [52]:
EMBEDDING_DIMS = 50
HIDDEN_SIZE = 64
    
gru = GRU(len(words), EMBEDDING_DIMS, HIDDEN_SIZE, num_layers=1).to(device)
ann = ANN(HIDDEN_SIZE).to(device)

criterion = nn.CrossEntropyLoss()

gru_optimizer = optim.SGD(gru.parameters(), lr=0.01)
ann_optimizer = optim.SGD(ann.parameters(), lr=0.01)

trainIters(gru, ann, print_every=500, resume=False)

Training model...
EPOCH 1
Total number of iteration: 1


ValueError: ignored