In [1]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import spacy
import re
import pickle
from inltk.inltk import tokenize
from time import time
from tqdm import tqdm

# from utils import translate_sentence, bleu, save_checkpoint, load_checkpoint
from torchtext import data
from torchtext import datasets

from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard
from torchtext.datasets import TranslationDataset
from torchtext.data import Field, BucketIterator

import torch
import spacy
from torchtext.data.metrics import bleu_score
import sys
import time
from inltk.inltk import tokenize

In [2]:
spacy_eng = spacy.load("en_core_web_sm")
str_punct = '''[!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~।]'''

def tokenize_hi(text):
    text = re.sub(str_punct,'',text).lower()
    return tokenize(text, "hi")

def tokenize_eng(text):
    text = re.sub(str_punct,'',text).lower()
    return [tok for tok in spacy_eng.tokenizer(text)]


In [3]:
hindi = Field(tokenize=tokenize_hi, lower=True, init_token="<sos>", eos_token="<eos>")
english = Field(
    tokenize=tokenize_eng, lower=True, init_token="<sos>", eos_token="<eos>"
)

In [4]:
hindi = data.Field(tokenize=tokenize_hi)
english = data.Field(tokenize=tokenize_eng)

In [5]:
st = time.time()
mt_train = datasets.TranslationDataset(
     path='./data_torch/data_sm', exts=('.hi', '.en'),
     fields=(hindi, english))
hindi.build_vocab(mt_train, max_size=15000, min_freq=2)
english.build_vocab(mt_train, max_size=15000, min_freq=2)

print(f'Total time taken for hindi.build_vocab was {time.time() - st}')

Total time taken for hindi.build_vocab was 2.9937100410461426


### Total time taken for hindi.build_vocab was 2783.8125097751617


In [6]:
# with open('hindi_vocab_1.pickle', 'wb') as handle:
#     pickle.dump(hindi, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('english_vocab_1.pickle', 'wb') as handle:
#     pickle.dump(english, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('hindi_vocab.pickle', 'rb') as handle:
#     hindi = pickle.load(handle)

# with open('english_vocab.pickle', 'rb') as handle:
#     english = pickle.load(handle)

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, bidirectional=True)

        self.fc_hidden = nn.Linear(hidden_size * 2, hidden_size)
        self.fc_cell = nn.Linear(hidden_size * 2, hidden_size)
        self.dropout = nn.Dropout(p)

    def forward(self, x):
        # x: (seq_length, N) where N is batch size

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (seq_length, N, embedding_size)

        encoder_states, (hidden, cell) = self.rnn(embedding)
        # outputs shape: (seq_length, N, hidden_size)

        # Use forward, backward cells and hidden through a linear layer
        # so that it can be input to the decoder which is not bidirectional
        # Also using index slicing ([idx:idx+1]) to keep the dimension
        hidden = self.fc_hidden(torch.cat((hidden[0:1], hidden[1:2]), dim=2))
        cell = self.fc_cell(torch.cat((cell[0:1], cell[1:2]), dim=2))

        return encoder_states, hidden, cell


class Decoder(nn.Module):
    def __init__(
        self, input_size, embedding_size, hidden_size, output_size, num_layers, p
    ):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(hidden_size * 2 + embedding_size, hidden_size, num_layers)

        self.energy = nn.Linear(hidden_size * 3, 1)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(p)
        self.softmax = nn.Softmax(dim=0)
        self.relu = nn.ReLU()

    def forward(self, x, encoder_states, hidden, cell):
        x = x.unsqueeze(0)
        # x: (1, N) where N is the batch size

        embedding = self.dropout(self.embedding(x))
        # embedding shape: (1, N, embedding_size)

        sequence_length = encoder_states.shape[0]
        h_reshaped = hidden.repeat(sequence_length, 1, 1)
        # h_reshaped: (seq_length, N, hidden_size*2)

        energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
        # energy: (seq_length, N, 1)

        attention = self.softmax(energy)
        # attention: (seq_length, N, 1)

        # attention: (seq_length, N, 1), snk
        # encoder_states: (seq_length, N, hidden_size*2), snl
        # we want context_vector: (1, N, hidden_size*2), i.e knl
        context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)

        rnn_input = torch.cat((context_vector, embedding), dim=2)
        # rnn_input: (1, N, hidden_size*2 + embedding_size)

        outputs, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        # outputs shape: (1, N, hidden_size)

        predictions = self.fc(outputs).squeeze(0)
        # predictions: (N, hidden_size)

        return predictions, hidden, cell


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teacher_force_ratio=0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = len(english.vocab)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
        encoder_states, hidden, cell = self.encoder(source)

        # First input will be <SOS> token
        x = target[0]

        for t in range(1, target_len):
            # At every time step use encoder_states and update hidden, cell
            output, hidden, cell = self.decoder(x, encoder_states, hidden, cell)

            # Store prediction for current time step
            outputs[t] = output

            # Get the best word the Decoder predicted (index in the vocabulary)
            best_guess = output.argmax(1)
            x = target[t] if random.random() < teacher_force_ratio else best_guess

        return outputs

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Training hyperparameters
# num_epochs = 100
learning_rate = 3e-4
# batch_size = 32

# Model hyperparameters
input_size_encoder = len(hindi.vocab)
input_size_decoder = len(english.vocab)
output_size = len(english.vocab)
encoder_embedding_size = 300
decoder_embedding_size = 300
hidden_size = 1024
num_layers = 1
enc_dropout = 0.0
dec_dropout = 0.0

print(f'length of input_size_encoder is {input_size_encoder}')

print(f'length of input_size_decoder is {input_size_decoder}')

# Tensorboard to get nice loss plot
# writer = SummaryWriter(f"runs/loss_plot")
step = 0
model_file_name = "checkpoint_attn_v2.pth.tar"

# train_iterator = data.BucketIterator(
#      dataset=mt_train, batch_size=batch_size,
#      sort_key=lambda x: data.interleave_keys(len(x.src), len(x.trg)), device=device)

length of input_size_encoder is 120
length of input_size_decoder is 2


In [9]:
!pwd

/disk_ext/nlp/seq2seq_attention


In [23]:
encoder_net = Encoder(
    input_size_encoder, encoder_embedding_size, hidden_size, num_layers, enc_dropout
).to(device)


decoder_net = Decoder(
    input_size_decoder,
    decoder_embedding_size,
    hidden_size,
    output_size,
    num_layers,
    dec_dropout,
).to(device)

model = Seq2Seq(encoder_net, decoder_net).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

pad_idx = 1
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

# if load_model:
load_model = True
save_model = True

load_checkpoint(torch.load(model_file_name), model, optimizer)

=> Loading checkpoint


RuntimeError: Error(s) in loading state_dict for Seq2Seq:
	size mismatch for encoder.embedding.weight: copying a param with shape torch.Size([12689, 300]) from checkpoint, the shape in current model is torch.Size([12668, 300]).
	size mismatch for decoder.embedding.weight: copying a param with shape torch.Size([13005, 300]) from checkpoint, the shape in current model is torch.Size([15002, 300]).
	size mismatch for decoder.fc.weight: copying a param with shape torch.Size([13005, 1024]) from checkpoint, the shape in current model is torch.Size([15002, 1024]).
	size mismatch for decoder.fc.bias: copying a param with shape torch.Size([13005]) from checkpoint, the shape in current model is torch.Size([15002]).

In [None]:
def translate_sentence(model, sentence, german, english, device, max_length=50):
    # Load german tokenizer
#     spacy_ger = spacy.load("de")

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    if type(sentence) == str:
#         tokens = [token.text.lower() for token in spacy_ger(sentence)]
        tokens = [i.lower() for i in tokenize(sentence, "hi")]
    else:
        tokens = [token.lower() for token in sentence]

    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, german.init_token)
    tokens.append(german.eos_token)

    # Go through each german token and convert to an index
    text_to_indices = [german.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    # Build encoder hidden, cell state
    with torch.no_grad():
        outputs_encoder, hiddens, cells = model.encoder(sentence_tensor)

    outputs = [english.vocab.stoi["<sos>"]]

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hiddens, cells = model.decoder(
                previous_word, outputs_encoder, hiddens, cells
            )
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == english.vocab.stoi["<eos>"]:
            break

    translated_sentence = [english.vocab.itos[idx] for idx in outputs]

    # remove start token
    return translated_sentence[1:]


def bleu(data, model, german, english, device):
    targets = []
    outputs = []

    for example in tqdm(data):
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, german, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        targets.append([trg])
        outputs.append(prediction)

    return bleu_score(outputs, targets)


def save_checkpoint(state, filename=model_file_name):
    print("=> Saving checkpoint")
    torch.save(state, filename)


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [13]:
import pandas as pd

In [14]:
df = pd.read_csv('./data_torch/pmindia.v1.hi-en.csv')

In [15]:
df.columns

Index(['english_sentence', 'hindi_sentence'], dtype='object')

In [26]:
sent = df['hindi_sentence'][0]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 अग्रिम धन राशि इन अस्पतालों को चिकित्सा निरीक्षकों को दी जाएगी, जो हर मामले को देखते हुए सहायता प्रदान करेंगे। 
 and translated_sent is 
resources is spreading to <unk>


In [27]:
sent = df['hindi_sentence'][2]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 इस तरह आरएएन का कामकाज स्वास्थ्य एवं परिवार कल्याण विभाग के अधीन लाया जाएगा। 
 and translated_sent is 
to provide the up of this health and family welfare with <unk>


In [28]:
sent = df['hindi_sentence'][10]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 इस प्रकार एक स्वायशासी निकाय के रूप में जेएसके को बंद किया जा सकता है क्योंकि निधि के तौर पर उसका कामकाज विभाग द्वारा संभव है। 
 and translated_sent is 
as a result is to be utilized as it can be administered by the department as a fund possible can be <unk>


In [29]:
sent = df['hindi_sentence'][16]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 महारानी मैक्सिमा ने इन कदमों के जरिए हुई प्रगति की सराहना की। 
 and translated_sent is 
ceos appreciated the progress made by these initiatives through these progress achieved made in these initiatives these initiatives made by these initiatives from ‘ <unk>


In [30]:
sent = df['hindi_sentence'][20]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 प्रधान मंत्री ने कहा कि 16वीं लोकसभा में करीब 315 पहली बार सांसद चुनकर आए हैं, जो पहली लोकसभा के समान है । 
 and translated_sent is 
on the same the prime minister said that the sixteenth lok sabha was comparable to the <unk>


In [31]:
sent = df['hindi_sentence'][40]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 उन्होंने कहा कि इस कार्य को 2022 तक पूरा कर लेने का लक्ष्य है। 
 and translated_sent is 
that the aim is to achieving this task by 2022 out this target to by 2022 out this target is by 2022 out this target is by 2022 out this target is by 2022 objectives of this purpose by 2022 this is by 2022 objectives of this purpose by 2022


In [32]:
sent = df['hindi_sentence'][80]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 सेना ने हमेशा देश को प्राथमिकता दी है। 
 and translated_sent is 
nations the nation the country given the country to accorded to the nation and the nation is affordable youth country country has accorded the nation ’s priority ” the nation nation priority ” the country ’s priority on priority ” the nation nation its priority on priority ” the nation


In [37]:
sent = df['hindi_sentence'][1100]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 भारत की उत्सव परम्परा, प्रकृति-प्रेम को बलवान बनाने वाली, बालक से लेकर के हर व्यक्ति को संस्कारित करने वाली रही है। 
 and translated_sent is 
if india ’s land is a <unk>


In [39]:
sent = df['hindi_sentence'][10000]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 परस्पर सहयोग के लिए दिमाग में कुछ विचार आए हैं, जिसे मैं यहां साझा कर रहा हूं। 
 and translated_sent is 
possible here with a <unk>


In [40]:
sent = df['hindi_sentence'][3000]
translated_sent = ' '.join(translate_sentence(model, sent, hindi, english, device))
print(f'org sentenece is \n {sent} \n and translated_sent is \n{translated_sent}')

org sentenece is 
 और अगर वो नहीं हुआ तो कहते हैं कि रिफॉर्म नहीं हुआ। 
 and translated_sent is 
if you n’t n’t have enough then do n’t have <unk>


In [11]:
mt_test = datasets.TranslationDataset(
     path='./data_torch/data_val', exts=('.hi', '.en'),
     fields=(hindi, english))

In [None]:
def bleu(data, model, german, english, device):
#     targets = []
#     outputs = []
    all_scores = []
    for example in tqdm(data):
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, german, english, device)
        prediction = prediction[:-1]  # remove <eos> token

        
#         targets.append([trg])
#         outputs.append(prediction)
        
        try:
#             score = bleu_score([prediction], [trg], max_n=2)
#             score = sentence_bleu(prediction, trg)
#             all_scores.append(score)
#             if score>0:
            print(f'trg is {trg}')
            print(f'prediction is {prediction}')
            print(f'score is {score}')

            
        except Exception as e:
#             print(f'Exception is {e}')
#             print(f'trg is {trg}')
#             print(f'prediction is {prediction}')
            pass

    return (all_scores, sum(all_scores)/len(all_scores))

In [None]:
bleu(mt_test, model, hindi, english, device)

In [86]:
sent1 = ['prime', 'minister', 'said', 'the', 'union', 'government', 'is', 'working', 'focusing', 'focusing', 'on', 'infrastructure', 'augmentation', 'and', 'it', 'requires', 'is', 'focus', 'infrastructure', 'development', 'deficit', '<unk>']
# sent1 =  ' '.join(sent1)
sent1

['prime',
 'minister',
 'said',
 'the',
 'union',
 'government',
 'is',
 'working',
 'focusing',
 'focusing',
 'on',
 'infrastructure',
 'augmentation',
 'and',
 'it',
 'requires',
 'is',
 'focus',
 'infrastructure',
 'development',
 'deficit',
 '<unk>']

In [87]:
sent2 = ['the', 'union', 'government', 'of', 'india', 'infrastructure', 'is', 'focusing', 'focused', 'on', 'focusing', 'on', 'infrastructure', 'technology', 'and', 'real', 'structure', 'with', 'speed', 'targets', 'being', 'infrastructure', 'is', 'focusing', 'on', '<unk>']
# sent2 =  ' '.join(sent2)
# sent2

In [98]:
sentence_bleu(['my','name','is','mohit' ], ['i', 'am','mohit'])

1.384292958842266e-231

In [95]:
from nltk.translate.bleu_score import sentence_bleu
reference = [['this', 'is' 'test']]
candidate = ['this', 'is', 'a', 'test']
score = sentence_bleu(reference, candidate)
print(score)

1.2882297539194154e-231


In [67]:
candidate_corpus = [['My', 'full', 'pytorch', 'test'], ['Another', 'Sentence']]
references_corpus = [[['My', 'full', 'pytorch', 'test'], ['Completely', 'Different']], [['No', 'Match']]]
bleu_score(candidate_corpus, references_corpus)

0.8408964276313782

In [73]:
tokenize_hi('प्रधानमंत्री ने कहा कि भारत में केंद्र सरकार बुनियादी ढांचे पर ध्यान केंद्रित कर रही है।')

['▁प्रधानमंत्री',
 '▁ने',
 '▁कहा',
 '▁कि',
 '▁भारत',
 '▁में',
 '▁केंद्र',
 '▁सरकार',
 '▁बुनियादी',
 '▁ढांचे',
 '▁पर',
 '▁ध्यान',
 '▁केंद्रित',
 '▁कर',
 '▁रही',
 '▁है']

In [65]:
all_scores, score = bleu(mt_test, model, hindi, english, device)
all_scores, score


  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<01:46,  9.41it/s][A
  0%|          | 4/1000 [00:00<01:29, 11.13it/s][A
  0%|          | 5/1000 [00:00<02:09,  7.67it/s][A
  1%|          | 6/1000 [00:00<02:11,  7.59it/s][A
  1%|          | 8/1000 [00:00<02:07,  7.77it/s][A
  1%|          | 10/1000 [00:01<01:45,  9.38it/s][A
  1%|▏         | 14/1000 [00:01<01:22, 11.96it/s][A
  2%|▏         | 16/1000 [00:01<01:39,  9.92it/s][A
  2%|▏         | 18/1000 [00:01<01:29, 11.01it/s][A
  2%|▏         | 20/1000 [00:01<01:26, 11.31it/s][A
  2%|▏         | 22/1000 [00:01<01:21, 11.96it/s][A
  2%|▏         | 24/1000 [00:02<01:38,  9.90it/s][A
  3%|▎         | 26/1000 [00:02<01:48,  8.98it/s][A
  3%|▎         | 28/1000 [00:02<01:57,  8.25it/s][A
  3%|▎         | 30/1000 [00:02<01:38,  9.88it/s][A
  3%|▎         | 32/1000 [00:02<01:23, 11.57it/s][A
  4%|▎         | 35/1000 [00:03<01:11, 13.45it/s][A
  4%|▎         | 37/1000 [00:03<01:17, 12.39it/s][A
  4%|▍

 64%|██████▎   | 636/1000 [01:06<01:01,  5.97it/s][A
 64%|██████▎   | 637/1000 [01:07<01:05,  5.53it/s][A
 64%|██████▍   | 638/1000 [01:07<01:01,  5.93it/s][A
 64%|██████▍   | 640/1000 [01:07<00:52,  6.88it/s][A
 64%|██████▍   | 641/1000 [01:07<01:03,  5.64it/s][A
 65%|██████▍   | 647/1000 [01:07<00:45,  7.72it/s][A
 65%|██████▌   | 651/1000 [01:08<00:39,  8.78it/s][A
 65%|██████▌   | 653/1000 [01:08<00:40,  8.61it/s][A
 66%|██████▌   | 655/1000 [01:08<00:42,  8.18it/s][A
 66%|██████▌   | 657/1000 [01:09<00:44,  7.64it/s][A
 66%|██████▌   | 660/1000 [01:09<00:34,  9.75it/s][A
 66%|██████▌   | 662/1000 [01:09<00:29, 11.45it/s][A
 67%|██████▋   | 667/1000 [01:09<00:23, 14.38it/s][A
 67%|██████▋   | 670/1000 [01:09<00:27, 11.97it/s][A
 67%|██████▋   | 673/1000 [01:09<00:22, 14.40it/s][A
 68%|██████▊   | 676/1000 [01:10<00:27, 11.77it/s][A
 68%|██████▊   | 678/1000 [01:10<00:26, 11.95it/s][A
 68%|██████▊   | 680/1000 [01:10<00:25, 12.36it/s][A
 68%|██████▊   | 682/1000 [0

([0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,

In [66]:
all_scores

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0

In [41]:
print(f"Bleu score {score * 100:.2f}")

IndexError: index 4 is out of bounds for dimension 0 with size 4

In [8]:
from nltk import translate

In [5]:
hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party'
hypothesis2 = 'It is to insure the troops forever hearing the activity guidebook that party direct'

reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands'
reference2 = 'It is the guiding principle which guarantees the military forces always being under the command of the Party'
reference3 = 'It is the practical guide for the army always to heed the directions of the party'

In [None]:
meteor_score()

In [None]:
translate.meteor_score()

In [9]:
round(translate.meteor_score([reference1, reference2, reference3], hypothesis1),4)


TypeError: 'module' object is not callable