In [99]:
#Install the transformers for using pre-trained Models
!pip install transformers



In [100]:
#Importing all the necessary packages
import torch
import pickle
import math
import torch.nn as nn
import torch.optim as optim
from transformers import AutoTokenizer, AutoModel
import warnings
import time
import copy
import numpy as np
from earlystopping import EarlyStopping
warnings.filterwarnings("ignore")

In [101]:
#Defining the tokenizer and pre_trained model 
#Incase of ERNIE Large Model
tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-2.0-large-en")
pre_trained_model = AutoModel.from_pretrained('nghuyong/ernie-2.0-large-en')

#Incase of ERNIE Normal Model
#tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-2.0-en")
#pre_trained_model = AutoModel.from_pretrained('nghuyong/ernie-2.0-en')

In [102]:
# Load the train, test and dev dataset
# Done by Phani
def load_dataset(filename):
    with open(filename,'r') as fp:
        lines = [line.strip() for line in fp]
    return lines

In [103]:
# Getting the words, pos tags, probablities in a single list from both the Train and Dev dataset
# Done by Phani
def word_traindev_Data(data):
    wordLines = data
    words = []
    probabilities = []
    wordList = []
    pos = []
    empty = []
    for line in wordLines:
        lineSplit = line.strip().split('\t')
        if line:
            word = lineSplit[1]
            prob = lineSplit[4]
            temp = lineSplit[5]
            words.append(word)
            probabilities.append(float(prob))
            pos.append(temp)
        elif not (len(empty) and []):
            wordList.append((words, pos, probabilities))
            words = []
            probabilities = []
            pos = []
    return wordList

In [104]:
# Getting the words in a single list from the Test dataset
# Done by Phani
def word_test_Data(data):
    wordLines = data
    words = []
    testWord = []
    empty = []
    for line in wordLines:
        lineSplit = line.strip().split('\t')
        if line:
            word = lineSplit[1]            
            words.append(word)
        elif not len(empty):
            testWord.append(words)
            words = []       
    return testWord

In [105]:
# Generate separate list of words, pos and probablities for Train and Dev data
# Done by Phani
def data_preprocess_train_dev(data):
    text = []
    pos = []
    probs = []
    for i,j,k in data:
            text.append(i)
            pos.append(j)
            probs.append(k)
    return text,pos, probs

In [106]:
# Generate separate list of words for Test data
# Done by Phani
def data_preprocess_test(data):
    text = []
    for i in data:
            text.append(i)
    return text

In [107]:
# Replicating probablities for matching length incase of sub tokenized words
#Done by Mithilaesh
def prob_list(batch_data,batch_probs):
    pb = []
    for i,j in zip(batch_data,batch_probs):
        tp = []
        for k,l in zip(i,j):
            temp = tokenizer.tokenize(k)
            if len(temp) == 1:
                tp.append(float(l))
            if len(temp) > 1:
                for i in range(len(temp)):
                    tp.append(float(l))
        pb.append(tp)
    return pb

In [108]:
# Replicating feature vectors for matching length incase of sub tokenized words
#Done by Mithilaesh
def feature_list(batch_data,feature):
    fv = []
    for i,j in zip(batch_data,feature):
        tp = []
        for k,l in zip(i,j):
            temp = tokenizer.tokenize(k)
            if len(temp) == 1:
                tp.append(l)
            if len(temp) > 1:
                for i in range(len(temp)):
                    tp.append(l)
        fv.append(tp)
    return fv

In [109]:
# Generate sentence from words in dataset
#Done by Mithilaesh
def get_sentence(words):    
    tokenized_text = []
    for i in words:
        sent = ''
        for h in i:
            if sent == '':
                sent = sent + h
            else:
                sent = sent+ " " +h
        tokens = tokenizer.tokenize(sent)
        tid = tokenizer.encode(tokens, add_special_tokens=False)
        tokenized_text.append(tid)
    return tokenized_text

In [110]:
# function to pad data for equal length
#Done by Mithilaesh
def pad_func(data):
    max_len = 0
    for i in data:
        if len(i) > max_len:
            max_len = len(i)
    if type(i[0]) is list:
        padded = [i + [[0, 0, 0, 0, 0]]*(max_len-len(i)) for i in data]
    else:
        padded = [i + [0]*(max_len-len(i)) for i in data]
    return padded

In [111]:
#data augmentation function to randomly reverse a sentence, capitalize a word and remove a word from a sentence
#Done by Mithilaesh
def data_augment(words, probs):
    aug_word_list = []
    aug_prob_list = []
    for i in range(len(words)):
        aug_word_list.append(words[i])
        aug_prob_list.append(probs[i])
        
        if (i%2) == 0:
            temp_word = copy.copy(words[i])
            temp_word.reverse()
            aug_word_list.append(temp_word)
            
            temp_prb = copy.copy(probs[i])
            temp_prb.reverse()
            aug_prob_list.append(temp_prb)
            
        if (i%3) == 0:
            temp_word = copy.copy(words[i])
            temp_word[0] = temp_word[0].upper()
            aug_word_list.append(temp_word)
            
            temp_prb = copy.copy(probs[i])
            aug_prob_list.append(temp_prb)
            
        if (i%5) == 0:
            temp_word = copy.copy(words[i])
            temp_word.remove(temp_word[0])
            aug_word_list.append(temp_word)
            
            
            temp_prb = copy.copy(probs[i])
            temp_prb.remove(temp_prb[0])
            aug_prob_list.append(temp_prb)
                
    return aug_word_list, aug_prob_list

In [112]:
#create feature vector for the words based on starts with capital, full word is capital, has hashtags, 
#word can be tokenized and word that is a connector word
#Done Srihasa
def feature_add(trainWords, conn_words):
    feature = []
    for i in trainWords:
        temp1 = []
        for j in i:
            temp2 =[0] * 5
            if (j[0].isupper()) and (j not in conn_words):
                temp2[0] = 1
            else:
                temp2[0] = 0
            if '#' in j:
                temp2[1] = 1
            else:
                temp2[1] = 0
            if j.isupper() and (j not in conn_words):
                temp2[2] = 1
            else:
                temp2[2] = 0
            if (len(tokenizer.tokenize(j))>1) and (j not in conn_words):
                temp2[3] = 1
            else:
                temp2[3] = 0
            if j not in conn_words:
                temp2[4] = 1
            else:
                temp2[4] = 0
            temp1.append(temp2)
        feature.append(temp1)
    return feature

In [113]:
#get a list of connector words from the dataset
#Done Srihasa
def connectors(trainWords, trainLabels):
    words = []
    for i,j in zip(trainWords, trainLabels):
        for k,l in zip(i,j):
            if l == 0.0:
                words.append(k)
    mylist = list(dict.fromkeys(words))
    mylist.sort()
    return mylist

In [114]:
#function to shuffle the dataset 
#Done Srihasa
def func_shuffle(tokens, probablities, feature):
    mapIndexPosition = list(zip(tokens, probablities, feature))
    np.random.shuffle(mapIndexPosition)
    tokens, probablities, feature = zip(*mapIndexPosition)
    return tokens, probablities, feature

In [115]:
# function to get attention mask
#Done Srihasa
def gen_attention(data):
    attention_mask = []
    for i in data:
        tmp = list([1] * (np.count_nonzero(i))) + list([0] * (len(i) - (np.count_nonzero(i))))
        attention_mask.append(tmp)
    return attention_mask


In [118]:
# Specifying dataset file names
#Done by Phani
TRAINING_FILE = "train.txt"
DEV_FILE = "dev.txt"
TEST_FILE = "test.txt"


In [119]:
# Preprocessing work on the dataset 
#Done by Phani
trainText = word_traindev_Data(load_dataset(TRAINING_FILE))
testEval = word_test_Data(load_dataset(TEST_FILE))
devText = word_traindev_Data(load_dataset(DEV_FILE))

trainWords,trainTags, trainLabels = data_preprocess_train_dev(trainText)
devWords, devTags, devLabels = data_preprocess_train_dev(devText)
testWords = data_preprocess_test(testEval)

#augmenting the dataset size
#trainWords, trainLabels = data_augment(trainWords, trainLabels)
#devWords, devLabels = data_augment(devWords, devLabels)

#getting connectors from dataset
conn_words = connectors(trainWords, trainLabels)

In [120]:
#Training data
#Done by Phani
train_tokens = get_sentence(trainWords)
train_probablities = prob_list(trainWords,trainLabels)
train_features = feature_add(trainWords, conn_words)
train_feature = feature_list(trainWords,train_features)

train_tokens, train_probablities, train_feature = func_shuffle(train_tokens, train_probablities, train_feature)

In [121]:
#Dev data
#Done by Phani
dev_tokens = get_sentence(devWords)
dev_probablities = prob_list(devWords,devLabels)
dev_features = feature_add(devWords,conn_words)
dev_feature = feature_list(devWords,dev_features)

dev_tokens, dev_probablities, dev_feature = func_shuffle(dev_tokens, dev_probablities, dev_feature)

In [122]:
#defining the model class
#Done by Mithilaesh
class ErnieModel(nn.Module):
    def __init__(self):
        super(ErnieModel, self).__init__()
        self.ernie = pre_trained_model
        #self.linear = nn.Linear(773, 1) Incase of ERNIE Normal 
        self.linear = nn.Linear(1029, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, tokens, attention, feature_vect):
        pooled_output,_ = self.ernie(tokens, attention)
        final_op = torch.cat((pooled_output, feature_vect), dim=-1)
        linear_output = self.linear(final_op)
        proba = self.sigmoid(linear_output)
        return proba

In [123]:
#setting model parameters
#Done by Mithilaesh
model = ErnieModel()

model_path = 'ernie_1024_layer_lr0001.pth'
#model_path = 'ernie_768_layer_lr0001.pth'
#model_path = 'ernie_768_layer_lr01.pth'

early_stopping = EarlyStopping(model_path,4,True)
optimizer = optim.Adamax(model.parameters(), lr=0.0001)
loss_func = nn.MSELoss(reduction = 'mean')
batch = 32

In [24]:
# Training the model
#Done by Mithilaesh
for epoch_num in range(10):
    model.train()
    start_time = time.time()
    print("Running epoch number ---->{}".format(epoch_num))
    training_loss = []
    validation_loss = []
    for i in range(0, len(train_tokens), batch):
        model.zero_grad()
        train_tokens_pad, train_probablities_pad, train_feature_pad = func_shuffle(train_tokens[i:i+batch], train_probablities[i:i+batch], train_feature[i:i+batch])
        train_tokens_pad = pad_func(train_tokens_pad)
        train_probablities_pad = pad_func(train_probablities_pad)
        train_feature_pad = pad_func(train_feature_pad)
        train_attention_pad = gen_attention(train_tokens_pad)
        train_probas = model(torch.tensor(train_tokens_pad), torch.tensor(train_attention_pad), torch.tensor(train_feature_pad))
        train_grd_truth = []
        for i in train_probablities_pad:
            p = []
            for j in i:
                q=[]
                q.append(j)
                p.append(q)
            train_grd_truth.append(p)
        train_batch_loss = loss_func(train_probas, torch.tensor(train_grd_truth))
        training_loss.append(train_batch_loss.item())
        train_batch_loss.backward()
        optimizer.zero_grad()
        optimizer.step()
        
    print("Training loss ---->{}".format((np.average(training_loss))))
    print("Total runtime ----> %s seconds\n" % (time.time() - start_time))
    
    #Validation Run
    with torch.no_grad():
        start_time = time.time()
        for i in range(0, len(dev_tokens), batch):
            dev_tokens_pad, dev_probablities_pad, dev_feature_pad = func_shuffle(dev_tokens[i:i+batch], dev_probablities[i:i+batch], dev_feature[i:i+batch])
            dev_tokens_pad = pad_func(dev_tokens_pad)
            dev_probablities_pad = pad_func(dev_probablities_pad)
            dev_feature_pad = pad_func(dev_feature_pad)
            dev_attention_pad = gen_attention(dev_tokens_pad)
            dev_probas = model(torch.tensor(dev_tokens_pad), torch.tensor(dev_attention_pad), torch.tensor(dev_feature_pad))
            dev_grd_truth = []
            for i in dev_probablities_pad:
                p = []
                for j in i:
                    q=[]
                    q.append(j)
                    p.append(q)
                dev_grd_truth.append(p)
            dev_batch_loss = loss_func(dev_probas, torch.tensor(dev_grd_truth))
            validation_loss.append(dev_batch_loss.item())
        
    print("Validation loss ---->{}".format((np.average(validation_loss))))
    print("Total runtime ----> %s seconds\n" % (time.time() - start_time))
    early_stopping(np.average(validation_loss), model)

    if early_stopping.early_stop is True:
        print("Early stopping")
        break
    

Running epoch number ---->0
Training loss ---->0.1910816727365766
Total runtime ----> 1718.4136307239532 seconds

Validation loss ---->0.19444727659225464
Total runtime ----> 65.90989923477173 seconds

Validation loss is (inf --> 0.19445).  Saving model ...
Running epoch number ---->1
Training loss ---->0.19243688140596663
Total runtime ----> 1703.3324127197266 seconds

Validation loss ---->0.19446736991405486
Total runtime ----> 65.60792970657349 seconds

Early Stopping = 1 of 4
Running epoch number ---->2
Training loss ---->0.19100528700011118
Total runtime ----> 1688.3708982467651 seconds

Validation loss ---->0.19418314337730408
Total runtime ----> 65.87995219230652 seconds

Validation loss is (0.19445 --> 0.19418).  Saving model ...
Running epoch number ---->3
Training loss ---->0.1919568237236568
Total runtime ----> 1704.4216659069061 seconds

Validation loss ---->0.1966002243757248
Total runtime ----> 67.31215262413025 seconds

Early Stopping = 1 of 4
Running epoch number ---->4

In [124]:
#loading the trained model
model = torch.load(model_path)

In [125]:
#Test Data
#Done by Srihasa
tokenized_test_text = []
for i in testWords:
    sent = ""
    for j in i:
        if sent == "":
            sent += j
        else:
            sent = sent + " " + j
    tokenized_test_text.append(sent)
test_features = feature_add(testWords, conn_words)
test_feature = feature_list(testWords,test_features)

In [126]:
# Testing the model on Test Dataset
#Done by Srihasa
test_prob=[]
test_tokens = []
test_res = []

with torch.no_grad():
    for batch_data, feat in zip(tokenized_test_text, test_feature):
        cp = []
        tokens = tokenizer.tokenize(batch_data)
        test_tokens.append(tokens)
        tid = tokenizer.encode_plus(tokens, add_special_tokens=False, return_attention_mask=True, return_tensors='pt')
        cp.append(feat)
        test_probas = model(tid['input_ids'], tid['attention_mask'], torch.tensor(cp))
        test_res.append(test_probas.data.tolist())
        out = batch_data.split(" ")
        temp_ans = []
        index = 0
        for i in out:
            if (len(tokenizer.tokenize(i))) == 1:
                temp_ans.append(test_probas[0][index].item())
                index = index + 1
            else:
                holder = []
                for j in range(len(tokenizer.tokenize(i))):
                    holder.append(test_probas[0][index].item())
                    index = index + 1
                prb = np.average(holder)
                temp_ans.append(prb) 
        test_prob.append(temp_ans)
    

In [127]:
#Predicted Test Probablities
for i in range(10):
    print("Sentence = {}".format(tokenized_test_text[i]))
    print("Emphasis Values = {}\n".format(test_prob[i]))

Sentence = We 'll be closed from 12/24 to 1/1 . See you in the New Year !
Emphasis Values = [0.43861860036849976, 0.44173575937747955, 0.4431616961956024, 0.43971166014671326, 0.4531821012496948, 0.4483940601348877, 0.41523823142051697, 0.4402421514193217, 0.4406854510307312, 0.46668708324432373, 0.42807328701019287, 0.4387187659740448, 0.4422082006931305, 0.44605904817581177, 0.44864422082901, 0.48147207498550415]

Sentence = No matter how hard you work , someone else is working harder .
Emphasis Values = [0.4177796244621277, 0.454275518655777, 0.4609808623790741, 0.4430331587791443, 0.3878982365131378, 0.4430483281612396, 0.3943283259868622, 0.39992910623550415, 0.39056774973869324, 0.4118628203868866, 0.43296104669570923, 0.44407224655151367, 0.466374009847641]

Sentence = The less I needed , the better I felt .
Emphasis Values = [0.4869804382324219, 0.4960853159427643, 0.4346615970134735, 0.41988831758499146, 0.43193504214286804, 0.4629516899585724, 0.49009206891059875, 0.410679489

In [128]:
#Dev Data
#Done by Mithilaesh and Srihasa
tokenized_dev_text = []
for i in devWords:
    sent = ""
    for j in i:
        if sent == "":
            sent += j
        else:
            sent = sent + " " + j
    tokenized_dev_text.append(sent)
dev_features = feature_add(devWords, conn_words)
dev_feature = feature_list(devWords,dev_features)

In [129]:
# Validating the model on Dev Dataset
#Done by Mithilaesh and Srihasa
dev_prob=[]

with torch.no_grad():
      for batch_data, feat in zip(tokenized_dev_text, dev_feature):
        cp = []
        tokens = tokenizer.tokenize(batch_data)
        tid = tokenizer.encode_plus(tokens, add_special_tokens=False, return_attention_mask=True, return_tensors='pt')
        cp.append(feat)
        dev_probas = model(tid['input_ids'], tid['attention_mask'], torch.tensor(cp))
        dev_probas=dev_probas.data
        out = batch_data.split(" ")
        temp_ans = []
        index = 0
        for i in out:
            if (len(tokenizer.tokenize(i))) == 1:
                temp_ans.append(dev_probas[0][index].item())
                index = index + 1
            else:
                holder = []
                for j in range(len(tokenizer.tokenize(i))):
                    holder.append(dev_probas[0][index].item())
                    index = index + 1
                prb = np.average(holder)
                temp_ans.append(prb) 
        dev_prob.append(temp_ans)
    

In [130]:
#Predicted Dev Probablities
for i in range(10):
    print("Sentence = {}".format(tokenized_dev_text[i]))
    print("Emphasis Values = {}".format(dev_prob[i]))
    print("Ground Truth = {}\n".format(devLabels[i]))


Sentence = Life is defined more by its risks than by its samenesses .
Emphasis Values = [0.4668006896972656, 0.40153881907463074, 0.4603216052055359, 0.4468671381473541, 0.436434268951416, 0.42450734972953796, 0.448442280292511, 0.4330403506755829, 0.4563688039779663, 0.4017581045627594, 0.4246480464935303, 0.449434757232666]
Ground Truth = [0.4444444444444444, 0.1111111111111111, 0.2222222222222222, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 1.0, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.7777777777777778, 0.1111111111111111]

Sentence = There is magic in the night when pumpkins glow by moonlight .
Emphasis Values = [0.4742286205291748, 0.39304935932159424, 0.49770572781562805, 0.4703209102153778, 0.47929349541664124, 0.4939355254173279, 0.501082718372345, 0.360744908452034, 0.471821665763855, 0.4787140190601349, 0.43171289563179016, 0.4839663505554199]
Ground Truth = [0.2222222222222222, 0.2222222222222222, 0.8888888888888888, 0.3333333333333333, 0

In [132]:
#Function for getting first 4 emphasized words
#Done by Phani
def finalProbs(data,values):        
    temp_list = [list(x) for x in zip(data,values)]
    sentence_list = []
    probas_list = []
    for sentences,probas in temp_list:
        sentence_list.append([[list] for list in sentences])
        probas_list.append([prob for prob in probas])

    wordsFinal = []
    probFinal = []
    temp2 = []
    for word, prob in zip(sentence_list,probas_list):
        wordList = []
        probList = []
        for i,j in zip(word,prob):
            if not(i[0].startswith("##")):
                wordList.append(i)
                probList.append(j)
            else:
                wordTemp = wordList[-1]+[i[0]]
                probTemp = probList[-1]+[j[0]]
                wordTemp = [''.join(wordTemp)]
                wordList.append(wordTemp)
                probList.append(probTemp)
                del(wordList[-2])
                del(probList[-2])
      
        for k in probList:
            if len(k) == 1:
                temp2.append(k)
            else:
                average = [np.average(k)]
                temp2.append(average)
        wordsFinal.append(wordList)
        probFinal.append(temp2)
        wordList = []
        probList = []
        temp2 = []
    return wordsFinal,probFinal

def compute_loss(i):
    wlist = []
    plist = []
    for j in i:
        wlist.append(j[0])
        plist.append(j[1])
        wtemp = []
        ptemp = []
    for i,j in sorted(zip(plist,wlist),reverse = True):
        wtemp.append(j)
        ptemp.append(i)
        
    wfinal = []
    loss = []
    finalList = []
    for i,j in zip(wtemp,ptemp):
        for k,l in zip(wtemp[1:],ptemp[1:]):
            currentWord = i[0]
            currentProb = float(j[0])
            nextprob = float(l[0])
            temp = currentProb - nextprob
            lossTemp = -max((temp),0) * math.log1p(temp)
            loss.append(lossTemp)
        wfinal.append([[currentWord],[currentProb],[np.average(loss)]])
    finalList.append(wfinal)
    
    return finalList

def final_rank(words,probs):
    loss_test = [] 
    for i,j in zip(words,probs):
        loss_temp = []
        for k,l in zip(i,j):
            if '##' in k[0]:
                loss_temp.append([k,l])
        if loss_temp is []:
            loss_temp.append('[]')
        loss_test.append(loss_temp)
    
    
    subword_dict = []
    subword_list = []
    for i in loss_test:
        empty_dict = dict.fromkeys(['Rank1','Rank2','Rank3','Rank4'])
        if (i == []):
            subword_list.append([["No subwords"]])
            subword_dict.append(empty_dict)
            continue
        else:
            if (len(i) == 1):
                subword_list.append(i)
                for a in i:
                    empty_dict['Rank1'] = a[0]
                subword_dict.append(empty_dict)
            else:
                j = compute_loss(i)
                subword_list.append(j)
                for c in j:
                    for d in c:
                        if empty_dict['Rank1'] is None:
                            empty_dict['Rank1'] = d[0]
                        elif empty_dict['Rank2'] is None:
                             empty_dict['Rank2'] = d[0]
                        elif empty_dict['Rank3'] is None:
                            empty_dict['Rank3'] = d[0]
                        else:
                            empty_dict['Rank4'] = d[0]
                subword_dict.append(empty_dict)
                
    wd = []
    for i,j in zip(testWords,testProbs):
        dic = sorted(zip(j,i),reverse=True)
        wd.append(dic)
        word_dict = []    
    for i,k in zip(wd,subword_dict):
        empty_word_dict = dict.fromkeys(['Rank1','Rank2','Rank3','Rank4'])
        for j in i:
            if '##' not in j[1][0]:
                if empty_word_dict['Rank1'] is None:
                    empty_word_dict['Rank1'] = j[1]
                elif empty_word_dict['Rank2'] is None:
                    empty_word_dict['Rank2'] = j[1]
                elif empty_word_dict['Rank3'] is None:
                    empty_word_dict['Rank3'] = j[1]
                elif empty_word_dict['Rank4'] is None:
                    empty_word_dict['Rank4'] = j[1]
        word_dict.append(empty_word_dict)
    
    
    final_word_dict = []
    for i,j in zip(subword_dict,word_dict):
        final_word_dict.append((i,j))
    return final_word_dict

In [133]:
#Running pairwise ranking for getting top 4 words
wordList = []
probList = []
test_res = [item for sublist in test_res for item in sublist]
testWords,testProbs = finalProbs(test_tokens,test_res)
finalDict = final_rank(testWords,testProbs)

In [147]:
#printing the values got through pairwise ranking
#Done by Mithilaesh
for i in finalDict[:20]:
    if (i[0]).get('Rank1') is None:
        print(i[1])
    else:
        print(i[0])

{'Rank1': ['/'], 'Rank2': ['!'], 'Rank3': ['see'], 'Rank4': ['/']}
{'Rank1': ['.'], 'Rank2': ['how'], 'Rank3': ['matter'], 'Rank4': ['harder']}
{'Rank1': ['less'], 'Rank2': ['felt'], 'Rank3': ['better'], 'Rank4': ['the']}
{'Rank1': ['believe'], 'Rank2': ['who'], 'Rank3': ['.'], 'Rank4': ['time']}
{'Rank1': ['you'], 'Rank2': ['a'], 'Rank3': ['the'], 'Rank4': ['not']}
{'Rank1': ['-'], 'Rank2': ['.'], 'Rank3': ['.'], 'Rank4': ['all']}
{'Rank1': ['un##lea##rn##ing'], 'Rank2': None, 'Rank3': None, 'Rank4': None}
{'Rank1': ['count'], 'Rank2': ['my'], 'Rank3': ['count'], 'Rank4': [',']}
{'Rank1': ['fear'], 'Rank2': ['.'], 'Rank3': ['feel'], 'Rank4': ['and']}
{'Rank1': ['language'], 'Rank2': ['a'], 'Rank3': ['.'], 'Rank4': ['warm']}
{'Rank1': ['er##r'], 'Rank2': None, 'Rank3': None, 'Rank4': None}
{'Rank1': ['.'], 'Rank2': ['making'], 'Rank3': [','], 'Rank4': ['not']}
{'Rank1': ['matt##ie'], 'Rank2': None, 'Rank3': None, 'Rank4': None}
{'Rank1': ['year'], 'Rank2': ['lunar'], 'Rank3': ['happy']