<a href="https://colab.research.google.com/github/rajashekar/colab/blob/main/Password_GPU_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data, model, and training

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Colab/password/

/content/drive/MyDrive/Colab/password


In [None]:
%ls data/passwords_db.txt

data/passwords_db.txt


In [None]:
!wc -l data/passwords_db.txt

18308616 data/passwords_db.txt


In [None]:
!shuf data/passwords_db.txt | split -a1 -d -l $(( $(wc -l < data/passwords_db.txt) * 90 / 100 )) - data/passwords_db_split

In [None]:
!ls -ltr data/passwords_db_split*

-rw------- 1 root root 160482666 Jun 24 18:55 data/passwords_db_split0
-rw------- 1 root root  17830887 Jun 24 18:55 data/passwords_db_split1


In [None]:
!wc -l data/passwords_db_split0

16477754 data/passwords_db_split0


In [None]:
!wc -l data/passwords_db_split1

1830863 data/passwords_db_split1


In [None]:
!shuf data/passwords_db_split1 | split -a1 -d -l $(( $(wc -l < data/passwords_db_split1) * 50 / 100 )) - data/passwords_db_split_0

In [None]:
!ls -ltr data/passwords_db_split*

-rw------- 1 root root 160482666 Jun 24 18:55 data/passwords_db_split0
-rw------- 1 root root  17830887 Jun 24 18:55 data/passwords_db_split1
-rw------- 1 root root        11 Jun 24 19:36 data/passwords_db_split_02
-rw------- 1 root root   8915131 Jun 24 19:36 data/passwords_db_split_01
-rw------- 1 root root   8915745 Jun 24 19:36 data/passwords_db_split_00


In [None]:
!wc -l data/passwords_db_split_00

915431 data/passwords_db_split_00


In [None]:
!wc -l data/passwords_db_split_01

915431 data/passwords_db_split_01


In [None]:
!mv data/passwords_db_split0 data/passwords_db_train

In [None]:
!mv data/passwords_db_split_00 data/passwords_db_val

In [None]:
!mv data/passwords_db_split_01 data/passwords_db_test

In [None]:
import numpy as np
import tensorflow as tf
import os

import distutils

In [None]:
!head -10 data/passwords_db_train > data/passwords_min

# Build vocab

In [None]:
# reading from test to build vocab, to reduce loading memory
data = open('data/passwords_db_test').read()
passwds = data.split("\n")

In [None]:
vocab = sorted(list(set(''.join(passwds))))

In [None]:
len(vocab)

95

In [None]:
max_len = max(passwds, key=len)

In [None]:
max_len

'quentinloveshisfriendsandfamilyevenwhiskeybeer2007'

In [None]:
len(max_len)

50

In [None]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True, lower=False)
tokenizer.fit_on_texts(passwds)

In [None]:
vocab_size = len(tokenizer.index_word) + 1
seq_len = len(max_len) - 1

In [None]:
print(f"Vocab size: {vocab_size}")
print(f"Seq len: {seq_len}")

Vocab size: 96
Seq len: 49


In [None]:
in_ten = tokenizer.texts_to_sequences(["pass", "testadf"])
tf.keras.preprocessing.sequence.pad_sequences(in_ten, padding='post')

array([[29,  1, 11, 11,  0,  0,  0],
       [15,  2, 11, 15,  1, 22, 31]], dtype=int32)

In [None]:
if distutils.version.LooseVersion(tf.__version__) < '2.0':
    raise Exception('This notebook is compatible with TensorFlow 2.0 or higher.')

TRAIN_TXT = '/content/drive/MyDrive/Colab/password/data/passwords_db_train'
VAL_TXT = '/content/drive/MyDrive/Colab/password/data/passwords_db_val'
TEST_TXT = '/content/drive/MyDrive/Colab/password/data/passwords_db_test'

def transform(lines):
  vectors = tokenizer.texts_to_sequences(lines)
  return tf.keras.preprocessing.sequence.pad_sequences(vectors, padding='post')

def input_fn(file_name, seq_len=3, batch_size=32):
  """Return a dataset of source and target sequences for training."""
  with tf.io.gfile.GFile(file_name, 'r') as f:
    lines = f.readlines()

  source = tf.constant(transform(lines), dtype=tf.int32)
  ds = tf.data.Dataset.from_tensor_slices(source)

  def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

  BUFFER_SIZE = 10000
  ds = ds.map(split_input_target).shuffle(BUFFER_SIZE).batch(batch_size, drop_remainder=True)

  # return ds.repeat() - When providing an infinite dataset, you must specify the number of steps to run (if you did not intend to create an infinite dataset, make sure to not call `repeat()` on the dataset).
  return ds

In [None]:
test_s = input_fn('data/passwords_min', batch_size=2)

In [None]:
for x,y  in test_s:
  print(x, y)

tf.Tensor(
[[32  2  9  8  7  9  0  0  0  0  0]
 [30 26 21 20 21  9  5 11  6 19  6]], shape=(2, 11), dtype=int32) tf.Tensor(
[[ 2  9  8  7  9  0  0  0  0  0  0]
 [26 21 20 21  9  5 11  6 19  6 18]], shape=(2, 11), dtype=int32)
tf.Tensor(
[[ 1  9 20  1  8 22  3  6  0  0  0]
 [32  1 15  1  8  5  3  6 13  0  0]], shape=(2, 11), dtype=int32) tf.Tensor(
[[ 9 20  1  8 22  3  6  0  0  0  0]
 [ 1 15  1  8  5  3  6 13  0  0  0]], shape=(2, 11), dtype=int32)
tf.Tensor(
[[30 23 10  5  2 15 21  7  0  0  0]
 [45 38 53 36 52 47  3  6 13  0  0]], shape=(2, 11), dtype=int32) tf.Tensor(
[[23 10  5  2 15 21  7  0  0  0  0]
 [38 53 36 52 47  3  6 13  0  0  0]], shape=(2, 11), dtype=int32)
tf.Tensor(
[[ 1  8  8 23  9 13  3  0  0  0  0]
 [10  1 26  7 23 10  2  4  4 18  0]], shape=(2, 11), dtype=int32) tf.Tensor(
[[ 8  8 23  9 13  3  0  0  0  0  0]
 [ 1 26  7 23 10  2  4  4 18  0  0]], shape=(2, 11), dtype=int32)
tf.Tensor(
[[ 1 10  5 30 21  6  0  0  0  0  0]
 [37 10 25 22  5  1 37  0  0  0  0]], shape=(2, 1

In [None]:
embedding_dim = vocab_size
rnn_units = 256 # was 1024

In [None]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [None]:
from nltk.translate.bleu_score import corpus_bleu

def bleu_score(y_true, y_pred):
  batch_score = []
  for ref, hyp in zip(y_true, y_pred):
    batch_score.append(corpus_bleu([chr(c) for c in ref], [chr(c) for c in hyp], weights=[0.25]))
  return np.average(batch_score)

In [None]:
test_dataset = input_fn(TEST_TXT)

In [None]:
sample_test_input, sample_test_target = next(iter(test_dataset))

In [None]:
sample_test_input

<tf.Tensor: shape=(32, 49), dtype=int32, numpy=
array([[ 6, 16,  6, ...,  0,  0,  0],
       [42, 42, 44, ...,  0,  0,  0],
       [ 3, 19, 14, ...,  0,  0,  0],
       ...,
       [33,  1,  8, ...,  0,  0,  0],
       [33,  1, 15, ...,  0,  0,  0],
       [ 9,  7, 37, ...,  0,  0,  0]], dtype=int32)>

In [None]:
sample_test_target.shape

TensorShape([32, 49])

In [None]:
tf.data.experimental.cardinality(test_dataset).numpy()

28607

In [173]:
# try model without training
def calculate_bleu_score(model, dataset, till_batch):
  scores = []

  for batch_i, (input_test_batch, target_test_batch) in enumerate(dataset, 1):
      test_batch_predictions = model(input_test_batch)
      test_batch_predictions = np.array([tf.argmax(i, axis=1).numpy() for i in test_batch_predictions], dtype=np.int32)
      scores.append(bleu_score(target_test_batch.numpy(), test_batch_predictions))
      if batch_i % 100 == 0:
        print(f"Till {batch_i} the avg bleu score is {np.average(scores)}")
      if batch_i > till_batch:
        break
  
  print(f"Final avg bleu score is {np.average(scores)}")
  return np.average(scores)

In [None]:
training_model =  MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

training_model.compile(
    optimizer='adam',
    loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True))

In [None]:
sample_test_preds = training_model(sample_test_input)

In [None]:
sample_test_preds.shape

TensorShape([32, 49, 96])

In [None]:
tf.random.categorical(sample_test_preds[0], num_samples=1)

<tf.Tensor: shape=(49, 1), dtype=int64, numpy=
array([[41],
       [18],
       [43],
       [20],
       [12],
       [69],
       [38],
       [18],
       [91],
       [49],
       [58],
       [41],
       [51],
       [29],
       [26],
       [39],
       [84],
       [71],
       [40],
       [ 0],
       [85],
       [24],
       [70],
       [86],
       [ 2],
       [16],
       [39],
       [68],
       [92],
       [88],
       [48],
       [26],
       [62],
       [64],
       [54],
       [48],
       [48],
       [62],
       [20],
       [ 1],
       [77],
       [95],
       [27],
       [81],
       [83],
       [46],
       [34],
       [94],
       [75]])>

In [None]:
tf.argmax(sample_test_preds[0], axis=1)

<tf.Tensor: shape=(49,), dtype=int64, numpy=
array([ 6,  6,  6,  6,  6,  6, 65, 49, 92, 92, 92, 92, 92, 92, 92, 92, 92,
       92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
       92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92])>

In [None]:
np.array([tf.argmax(i, axis=1).numpy() for i in sample_test_preds], dtype=np.int32)

array([[ 6,  6,  6, ..., 92, 92, 92],
       [18, 18, 26, ..., 92, 92, 92],
       [58, 92, 54, ..., 92, 92, 92],
       ...,
       [88, 57,  5, ..., 92, 92, 92],
       [88, 57, 58, ..., 92, 92, 92],
       [ 2, 46, 74, ..., 92, 92, 92]], dtype=int32)

In [None]:
np.array([tf.squeeze(tf.random.categorical(i, num_samples=1), axis=-1).numpy() for i in sample_test_preds], dtype=np.int32)

array([[18, 48, 52, ..., 36, 64, 95],
       [84, 37, 48, ..., 19, 62, 89],
       [75, 61, 22, ..., 71, 62, 75],
       ...,
       [76, 20,  1, ..., 15, 35, 73],
       [95, 17, 69, ..., 38, 50, 44],
       [37, 89, 46, ...,  1, 40, 86]], dtype=int32)

In [None]:
sample_test_target.numpy()

array([[16,  6, 16, ...,  0,  0,  0],
       [42, 44,  9, ...,  0,  0,  0],
       [19, 14,  3, ...,  0,  0,  0],
       ...,
       [ 1,  8,  8, ...,  0,  0,  0],
       [ 1, 15, 34, ...,  0,  0,  0],
       [ 7, 37, 25, ...,  0,  0,  0]], dtype=int32)

In [None]:
bleu_score(sample_test_target.numpy(), np.array([tf.argmax(i, axis=1).numpy() for i in sample_test_preds], dtype=np.int32))

0.08491452745550594

In [None]:
bleu_score(sample_test_target.numpy(), np.array([tf.squeeze(tf.random.categorical(i, num_samples=1), axis=-1).numpy() for i in sample_test_preds], dtype=np.int32))

0.15440914668420708

In [None]:
earlystopping_cb = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'min',
    verbose = 1,
    patience = 3,
    restore_best_weights = True
)

In [114]:
training_model.fit(
    input_fn(TRAIN_TXT),
    epochs=100,
    validation_data=input_fn(VAL_TXT),
    callbacks=[earlystopping_cb]
)
training_model.save_weights('gpu_password_gru', overwrite=True)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 16: early stopping


In [115]:
sample_test_preds = training_model(sample_test_input)

In [116]:
bleu_score(sample_test_target.numpy(), np.array([tf.argmax(i, axis=1).numpy() for i in sample_test_preds], dtype=np.int32))

0.9634732692712848

In [117]:
bleu_score(sample_test_target.numpy(), np.array([tf.squeeze(tf.random.categorical(i, num_samples=1), axis=-1).numpy() for i in sample_test_preds], dtype=np.int32))

0.956432663858295

In [118]:
prediction_model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [119]:
prediction_model.load_weights('gpu_password_gru')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f8549f25dd0>

In [142]:
class OneStep(tf.keras.Model):
  def __init__(self, model, tokenizer, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.tokenizer = tokenizer

  #@tf.function
  def generate_one_step(self, input_chars, states=None):
    #import pdb; pdb.set_trace()
    # Convert strings to token IDs.
    # input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    if input_chars[0] == '':
      input_chars = ' '
    input_ids = self.tokenizer.texts_to_sequences(input_chars)
    #print(input_chars)
    input_ids = tf.keras.preprocessing.sequence.pad_sequences(input_ids, padding='post')

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.tokenizer.sequences_to_texts([predicted_ids.numpy()])

    # Return the characters and model state.
    return predicted_chars, states

In [143]:
one_step_model = OneStep(prediction_model, tokenizer)

In [144]:
import time

start = time.time()
states = None
next_char = ['w']
result = [next_char]

for n in range(2):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

wal 

________________________________________________________________________________

Run time: 0.01281881332397461


In [155]:
def pwds_from_chars(model, start_char, len):
  states = None
  next_char = [start_char]
  result = [next_char]

  for n in range(len-1):
    next_char, states = model.generate_one_step(next_char, states=states)
    result.append(next_char)

  result = tf.strings.join(result)
  return result[0].numpy().decode('utf-8')

In [156]:
all_pos = []
while True:
  pred = pwds_from_chars(one_step_model,'w', 3)
  if pred not in all_pos:
    all_pos.append(pred)
  else:
    break

print(all_pos)

['wod', 'wzw', 'war', 'wal', 'wan', 'woo', 'way', 'wbz', 'was', 'wha', 'win']


In [148]:
given_pass = 'pass'
attempts = 1

while True:
  pred = pwds_from_chars(given_pass[0], len(given_pass))
  print(f"{attempts} - {pred}")
  if pred == given_pass:
    break
  attempts += 1

print(f"Model took {attempts} attempts to find password - {given_pass}")

1 - psyk
2 - pass
Model took 2 attempts to find password - pass


In [151]:
def gen_passwords_random(num, pass_len=5, start_char=None):
  passwords = []
  gen_char = False
  if start_char is None:
    gen_char = True
  for i in range(num):
    # if start char is none, then randomly pick start_char
    if gen_char:
      start_char = tokenizer.index_word[random.randint(1, len(tokenizer.index_word) - 1)]

    password = start_char
    for j in range(pass_len - 1):
      password += tokenizer.index_word[random.randint(1, len(tokenizer.index_word) - 1)]
    passwords.append(password)
  return passwords

In [152]:
gen_passwords_random(4, pass_len=5)

['?M\\QX', 'h.R=3', 'j_3%Z', '>r`,c']

In [157]:
import random

def gen_passwords(model, num, pass_len=5, seq_len=3, start_char=None):
  passwords = []
  gen_char = False
  if start_char is None:
    gen_char = True
  for i in range(num):
    # if start char is none, then randomly pick start_char
    if gen_char:
      start_char = tokenizer.index_word[random.randint(1, len(tokenizer.index_word) - 1)]
    passwords.append(pwds_from_chars(model, start_char, pass_len))
  return passwords

In [158]:
gen_passwords(one_step_model, 4, pass_len=5, start_char='m')

['mi3ke', 'meian', 'mybit', 'mx101']

In [150]:
import hashlib
import requests

def check_pwned(passes):
  headers = {
    "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 GTB7.1 (.NET CLR 3.5.30729)", 
    "Referer": "https://haveibeenpwned.com/"
  }
  found_passwds = {}
  pwned_api = 'https://api.pwnedpasswords.com/range/'
  for p in passes:
    hash_object = hashlib.sha1(p.encode())
    pbHash = hash_object.hexdigest().upper()
    try:
      res = requests.get(pwned_api + pbHash[:5],  headers=headers, timeout=10)
      range_hashes = res.text.split('\r\n')
      for h in range_hashes:
        h_c = h.split(':')
        if h_c[0] == pbHash[5:]:
          found_passwds[p] = h_c[1]
    except Exception as e:
      print(f'request timed out for pass {p}')
  return found_passwds

In [159]:
random_preds = {}
model_preds = {}

total_num_pass = 100
pass_length = 5

for i in vocab:
  gen_rand_pass = gen_passwords_random(total_num_pass, pass_len=pass_length, start_char=i)
  random_preds[i] = len(check_pwned(gen_rand_pass))/total_num_pass
  gen_pass = gen_passwords(one_step_model, total_num_pass, pass_len=pass_length, start_char=i)
  model_preds[i] = len(check_pwned(gen_pass))/total_num_pass
  print(f"char '{i}' : random password prob {random_preds[i]}, model password prob: {model_preds[i]}")

char ' ' : random password prob 0.0, model password prob: 0.07
char '!' : random password prob 0.0, model password prob: 0.07
char '"' : random password prob 0.0, model password prob: 0.01
char '#' : random password prob 0.0, model password prob: 0.09
char '$' : random password prob 0.0, model password prob: 0.03
char '%' : random password prob 0.0, model password prob: 0.03
char '&' : random password prob 0.0, model password prob: 0.02
char ''' : random password prob 0.0, model password prob: 0.0
char '(' : random password prob 0.0, model password prob: 0.09
char ')' : random password prob 0.0, model password prob: 0.01
char '*' : random password prob 0.0, model password prob: 0.1
char '+' : random password prob 0.0, model password prob: 0.02
char ',' : random password prob 0.0, model password prob: 0.0
char '-' : random password prob 0.0, model password prob: 0.05
char '.' : random password prob 0.0, model password prob: 0.08
char '/' : random password prob 0.0, model password prob: 

In [161]:
data = open('data/passwords_db.txt').read()
passwds = data.split("\n")

In [162]:
num_pass_w_char = {}

for p in passwds:
  if p[0] not in num_pass_w_char:
    num_pass_w_char[p[0]] = 1
  else:
    num_pass_w_char[p[0]] += 1

In [163]:
len(num_pass_w_char)

95

In [164]:
prob_pass_start_chars = dict([(k, num_pass_w_char[k]/len(passwds)) for k in num_pass_w_char])

In [165]:
sorted_num_pass_w_char = sorted(prob_pass_start_chars.items(), key=lambda x:x[1])
sorted_num_pass_w_char[-10:]

[('d', 0.03461719691880605),
 ('j', 0.03637090666105474),
 ('l', 0.037107718185376865),
 ('b', 0.04236693574397236),
 ('c', 0.04299341670646122),
 ('1', 0.04496276261609492),
 ('a', 0.04987919076574708),
 ('0', 0.05549201231310918),
 ('s', 0.05710338470677496),
 ('m', 0.057332020217583886)]

In [166]:
char_dist = dict(sorted_num_pass_w_char)

In [167]:
rand_we_avg = 0
for x in vocab:
  rand_we_avg += (char_dist[x] * random_preds[x])

In [168]:
rand_we_avg

0.0016382739340715905

In [169]:
model_we_avg = 0
for x in vocab:
  model_we_avg += (char_dist[x] * model_preds[x])

In [170]:
model_we_avg

0.6875325897089879

In [171]:
un_training_model =  MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [175]:
calculate_bleu_score(un_training_model, test_dataset, 2000)

Till 100 the avg bleu score is 0.018183166314990548
Till 200 the avg bleu score is 0.01529686819042395
Till 300 the avg bleu score is 0.016322811503924825
Till 400 the avg bleu score is 0.016333799094959876
Till 500 the avg bleu score is 0.016331452453641964
Till 600 the avg bleu score is 0.01648631089842078
Till 700 the avg bleu score is 0.016482913309508247
Till 800 the avg bleu score is 0.01653024482696646
Till 900 the avg bleu score is 0.016532653150128248
Till 1000 the avg bleu score is 0.016551498191266546
Till 1100 the avg bleu score is 0.016683781923230962
Till 1200 the avg bleu score is 0.016653107757944364
Till 1300 the avg bleu score is 0.016605543173830662
Till 1400 the avg bleu score is 0.016603309213219182
Till 1500 the avg bleu score is 0.016644149075179242
Till 1600 the avg bleu score is 0.01680033114806393
Till 1700 the avg bleu score is 0.016735394051841875
Till 1800 the avg bleu score is 0.016781774873187674
Till 1900 the avg bleu score is 0.016789838478140667
Till 2

0.016832254258689144

In [176]:
calculate_bleu_score(prediction_model, test_dataset, 2000)

Till 100 the avg bleu score is 0.9666294146422548
Till 200 the avg bleu score is 0.966820789340762
Till 300 the avg bleu score is 0.9666761858438804
Till 400 the avg bleu score is 0.9667023019369888
Till 500 the avg bleu score is 0.9667130121659668
Till 600 the avg bleu score is 0.9666516252683379
Till 700 the avg bleu score is 0.9666666314629235
Till 800 the avg bleu score is 0.9666705388908906
Till 900 the avg bleu score is 0.9667085815441442
Till 1000 the avg bleu score is 0.9666578121104012
Till 1100 the avg bleu score is 0.9666725714312067
Till 1200 the avg bleu score is 0.9666455825630912
Till 1300 the avg bleu score is 0.96662355100773
Till 1400 the avg bleu score is 0.9666270606692746
Till 1500 the avg bleu score is 0.9666466590417622
Till 1600 the avg bleu score is 0.9666300633024727
Till 1700 the avg bleu score is 0.9666374314055945
Till 1800 the avg bleu score is 0.9666533660427095
Till 1900 the avg bleu score is 0.9666667801753819
Till 2000 the avg bleu score is 0.966656757

0.9666582611198066