In [1]:
import numpy as np
import string
from nltk.corpus import stopwords
from nltk.corpus import gutenberg
import nltk
import pickle
from numpy.linalg import norm
import random
from tqdm import tqdm
import os

batch_size = 50
num_epochs = 20

def softmax(z):
    z = z - np.max(z, axis=0)
    e_z = np.exp(z)
    sum_e_z = np.sum(e_z, axis = 0)
    return e_z / sum_e_z

class word2vec(object):

    def __init__(self):
        self.N = 300
        self.X_train = []
        self.y_train = []
        self.window_size = 4
        self.alpha = 0.01
        self.words = []
        self.word_index = {}
        self.validation_mappings = {}
        self.analogy_mappings = {}
        self.loss = 0

    def initialize(self,V, data, word_index, validation_mappings, analogy_mappings, index_word):
        self.V = V
        np.random.seed(1)
        self.W1 = np.random.rand(self.V, self.N)
        self.W2 = np.random.rand(self.N, self.V)

        self.words = data
        for i in range(len(data)):
            self.word_index[data[i]] = i
        
        self.word_index = word_index
        self.index_word = index_word
        self.validation_mappings = validation_mappings
        self.analogy_mappings = analogy_mappings
  
      
    def feed_forward(self, X):
        # transposing X to shape V, batch_size
        X_ = np.array(X).T
        
        assert X_.shape == (self.V, len(X))
        
        self.h = np.matmul(self.W1.T, X_)
        self.u = np.matmul(self.W2.T, self.h)
        self.y_hat = softmax(self.u)
        
        assert self.y_hat.shape == (self.V, len(X))

        return self.y_hat
          
    def backpropagate(self, x, t):
        t_ = np.array(t).T
        x_ = np.array(x).T

        assert t_.shape == (self.V, len(x))
        assert x_.shape == (self.V, len(x))
#         e = self.y_hat - t_
        e = -t_ + np.multiply(self.y_hat, self.window_size*2)
#         print("y_pred shape :",np.multiply(self.y_hat,4).shape)
        self.grad_W2 = np.dot(self.h, e.T)
        self.grad_W1 = np.dot(x_, np.dot(self.W2, e).T)
        self.W1 = self.W1 - self.alpha * self.grad_W1
        self.W2 = self.W2 - self.alpha * self.grad_W2
    

    def train(self, step, batch_x, batch_y):
        assert np.array(batch_y).shape == (len(batch_y), self.V)
        
        self.y_hat = self.feed_forward(batch_x)
        self.backpropagate(batch_x, batch_y)
        
        u = self.u.T   # shape of u is (batch_size, V)
        sum_ = 0.0
        for i,y in enumerate(batch_y):
            for j,data in enumerate(y):
                if data == 1:
                    sum_ += u[i][j]
        #u = np.array([u[i][list(y).index(1)] for i,y in enumerate(batch_y)])
        
        #print(u)
        loss = None
        if step % 20 == 0:
#             print(np.sum(np.exp(self.u), axis=0))
            #loss = (-1*u.sum(axis=0) + np.multiply(np.log(np.sum(np.exp(self.u))),4))/len(batch_x)
            loss = (-1*sum_ + self.window_size*2*np.log(np.sum(np.exp(self.u), axis=0)).sum())/len(batch_x)

        return loss


def isEnglish(word):
    hasNum = any(chr.isdigit() for chr in word)
    flag = False
    for char in word:
        if (char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z'):
            continue
        flag = True
        break

    return not (hasNum or flag)


def preprocessing(sentence):
    stop_words = set(stopwords.words('english'))
    
    # remove stopwords and punctuation
    x = [word.strip(string.punctuation).lower() for word in sentence]
    x = [word for word in x if word not in stop_words]

    # remove words containing numbers and non-english characters
    x = [word for word in x if isEnglish(word)]
    x = [word for word in x if len(word) != 0 and len(word) > 2]
    
    return x


def index_word_maps(data: list) -> tuple:
    
    words = sorted(list(set(data)))
    
    word_to_index = {word: index for index, word in enumerate(words)}
    index_to_word = {index: word for index, word in enumerate(words)}
    return word_to_index, index_to_word


def word_to_one_hot_vector(word: str, word_to_index: dict, vocabulary_size: int) -> np.ndarray:
    one_hot_vector = np.zeros(vocabulary_size)
    one_hot_vector[word_to_index[word]] = 1
    return one_hot_vector


def context_words_to_vector(context_words: list,
                            word_to_index: dict) -> np.ndarray:
    vocabulary_size = len(word_to_index)
    context_words_vectors = [
        word_to_one_hot_vector(word, word_to_index, vocabulary_size)
        for word in context_words]
    return np.sum(context_words_vectors , axis=0)



def search(w2v, vec, w1, w2, w3):
    sim = 0.0
    index = 0

    omitted = [w1, w2, w3]
    indices = [val for val in w2v.word_index.values() if val not in omitted]

    for word in indices:
        v = w2v.W1[word]
        score = np.matmul(np.array(vec), np.array(v))/(norm(np.array(vec))*norm(np.array(v)))
        if score > sim: 
            sim = score
            index = word
    
    return index

def topSimilar(w2v, word):

    v1 = w2v.W1[w2v.word_index[word]]
    similarities = []
    for word_,index in w2v.word_index.items():
        vec = w2v.W1[w2v.word_index[word_]]
        similarities.append((index,np.matmul(np.array(vec), np.array(v1))/(norm(np.array(vec))*norm(np.array(v1)))))

    similarities.sort(reverse=True, key = lambda x: x[1])
    
    for i in range(10):
        print(similarities[i][1], w2v.index_word[similarities[i][0]])




def evaluate(w2v):
    score = 0
    cnt = 0

    man = w2v.W1[w2v.word_index["man"]]
    woman = w2v.W1[w2v.word_index["woman"]]
    king = w2v.W1[w2v.word_index["king"]]
    queen = w2v.W1[w2v.word_index["queen"]]
    sim1 = np.matmul(np.array(man), np.array(king))/(norm(np.array(man))*norm(np.array(king)))
    sim2 = np.matmul(np.array(woman), np.array(queen))/(norm(np.array(woman))*norm(np.array(queen)))

    print("sim1 and sim2 are {}, {}".format(sim1, sim2))
    pred = woman-man+king
    sim2 = np.matmul(np.array(pred), np.array(queen))/(norm(np.array(pred))*norm(np.array(queen)))
    print("cosine of pred and queen: {}".format(sim2))

    
    for key,value in w2v.analogy_mappings.items():
        w1, w2 = key.split(":")[0], key.split(":")[1]
        w3, w4 = value.split(":")[0], value.split(":")[1]

        if w1 not in w2v.word_index or w2 not in w2v.word_index or w3 not in w2v.word_index or w4 not in w2v.word_index:
            print("word not present")
            continue

        w1vec, w2vec = w2v.W1[w2v.word_index[w1]], w2v.W1[w2v.word_index[w2]]
        w3vec = w2v.W1[w2v.word_index[w3]]

        # if cnt == 2:
        #     print(w1vec)
        
        pred = w2vec - w1vec + w3vec
        index = search(w2v, pred, w2v.word_index[w1], w2v.word_index[w2], w2v.word_index[w3])
        actual = w2v.word_index[w4]

        if actual == index: score += 1
        cnt += 1

        print("Actual: {}:{}::{}:{}, pred: {}".format(w1, w2, w3, w4, w2v.index_word[index]))
    
    acc = score/cnt
    print("Accuracy is {}".format(acc))


    for key,value in w2v.validation_mappings.items():
        w1, w2 = key.split(":")[0], key.split(":")[1]
        w3, w4 = value.split(":")[0], value.split(":")[1]

        if w1 not in w2v.word_index or w2 not in w2v.word_index or w3 not in w2v.word_index or w4 not in w2v.word_index:
            print("word not present")
            continue

        w1vec, w2vec = w2v.W1[w2v.word_index[w1]], w2v.W1[w2v.word_index[w2]]
        w3vec = w2v.W1[w2v.word_index[w3]]

        pred = w2vec - w1vec + w3vec
        index = search(w2v, pred, w2v.word_index[w1], w2v.word_index[w2], w2v.word_index[w3])
        actual = w2v.word_index[w4]

        if actual == index: score += 1
        cnt += 1

        print("Actual: {}:{}::{}:{}, pred: {}".format(w1, w2, w3, w4, w2v.index_word[index]))
    
    acc = score/cnt
    print("Accuracy is {}".format(acc))

def train_testmodel(w2v, sents, word_to_index, num_epochs):
    loss = 0.0
    step = 0

    batch_x = []
    batch_y = []
    for epoch in tqdm(range(num_epochs)):
        
        for m in tqdm(range(0,len(sents))):
            sent = sents[m]

            if len(sent) < (w2v.window_size*2 + 1):
                continue
            
            for i in range(w2v.window_size, len(sent) - w2v.window_size):
                train_word = word_to_one_hot_vector(sent[i], word_to_index, len(word_to_index.keys()))
                context_words = sent[i-w2v.window_size:i] + sent[i+1:i+w2v.window_size+1]
                # if sent[i] == "rajasthan":
                #     print("train word: {}".format(sent[i]))
                #     print("context words: {}".format(context_words))
                # continue

                if len(context_words) > 0:
                    y = train_word
                    x = context_words_to_vector(context_words, word_to_index)
                    batch_y.append(x)
                    batch_x.append(y)

                if len(batch_x) == batch_size:
                    step += 1
                    loss = w2v.train(step, batch_x, batch_y)
                    batch_x = []
                    batch_y = []
                    if step % 20 == 0: print("step: {}, loss: {}, epoch: {}".format(step, loss, epoch))
                    if step % 400 == 0:
                        evaluate(w2v)
                    
                    if step%1000 == 0:
                        print("saving weights")
                        with open('w2vecbow_v4.pkl', 'wb') as f:
                            pickle.dump(w2v, f)
    
    if len(batch_x) > 0:
        loss = w2v.train(step, batch_x, batch_y)
        print("step: {}, loss: {}".format(step+1, loss))


def count_instances(w2v, sents):
    
    total = 0

    for m in range(0,len(sents)):
        sent = sents[m]
        sent = preprocessing(sent)

        if len(sent) < (w2v.window_size*2 + 1):
            continue
        
        for i in range(w2v.window_size, len(sent) - w2v.window_size):
            context_words = sent[i-w2v.window_size:i] + sent[i+1:i+w2v.window_size+1]
            if len(context_words) > 0:
                total += 1
            
    print("Total instances are {}".format(total))

def exists(tokens, sents):

    for sent in sents:
        str1 = " ".join(sent)
        str2 = " ".join(tokens)

        if str1 == str2:
            return True
    
    return False


def main():

    # collecting all the sentences
    sents = []
    for f in os.listdir("./data/data"):
        print(f)
        path = "./data/data/{}".format(f)
        file = open(path)
        file_sents = file.readlines()
        
        cnt = 0
        for sent in file_sents:
            tokens = sent.split()
            tokens = preprocessing(tokens)
            if len(tokens) > 25 and len(tokens) < 6: continue
            if tokens not in sents:
                sents.append(tokens)
                cnt += 1
            if cnt > 500: break

    f = open("./gutenberg.txt")
    lines = f.readlines()
    for sent in lines:
        sents.append(sent.replace("\n", "").split())
    f.close()
    random.shuffle(sents)
    
#     # collect all the words
    words = []
    for sent in sents:
        words += sent    

    f = open("./Analogy_dataset.txt")
    lines = f.readlines()
    analogy_mappings = {}
    analogy_words = {}
    for line in lines:
        words_ = line.split()
        words_ = [word.lower() for word in words_]
        if(len(words_) < 1):
            continue
        analogy_mappings[words_[0]+":"+words_[1]] = words_[2]+":"+words_[3]
        analogy_words[words_[0]] = 1
        analogy_words[words_[1]] = 1
        analogy_words[words_[2]] = 1
        analogy_words[words_[3]] = 1
    f.close()

    f = open("./Validation.txt")
    lines = f.readlines()
    validation_mappings = {}
    validation_words = {}
    for line in lines:
        words_ = line.split()
        words_ = [word.lower() for word in words_]
        validation_mappings[words_[0]+":"+words_[1]] = words_[2]+":"+words_[3]
        validation_words[words_[0]] = 1
        validation_words[words_[1]] = 1
        validation_words[words_[2]] = 1
        validation_words[words_[3]] = 1
    f.close()

    analogy_words = list(analogy_words.keys())
    validation_words = list(validation_words.keys())

    # add analogy and validation words to words list
    for word in analogy_words: words.append(word.lower())
    for word in validation_words: words.append(word.lower())
    
    word_to_index, index_to_word = index_word_maps(words)


    w2v = word2vec()
    w2v.initialize(len(word_to_index.keys()), words, word_to_index, validation_mappings, analogy_mappings, index_to_word)
    print("Length of vocab is {}".format(w2v.V))

    # compute count of data instances currently present
    count_instances(w2v, sents)

    train_testmodel(w2v, sents, word_to_index, num_epochs)

    topSimilar(w2v, "delhi")
    
#     save weights
    with open('w2vecskipgram.pkl', 'wb') as f:
        pickle.dump(w2v, f)
        
if __name__ == "__main__":
    main()

news13_gujarat_gandhinagar.txt
news13_gujarat.txt
news12_portugal_portuguese.txt
news13_krona.txt
news12_London_England.txt
news12_Switzerland_Swiss.txt
news12_Athens_Greece.txt
news12_Madrid_Spain.txt
news13_nigeria.txt
news12_thailand_thai.txt
news12_iran_rial.txt
news13_dollar.txt
news13_telangana.txt
news12_sweden_krona.txt
news12_Australia_Australian.txt
news12_ottawa_canada.txt
news12_Korea_Korean.txt
Newsamanantar_Kerala_Thiruvananthapuram.txt
news12_kenya_africa.txt
news12_nairobi_africa.txt
news12_japan_yen.txt
news13_slovakia.txt
news12_nigeria_naira.txt
news13_russia.txt
news12_Mexico_Mexican.txt
news12_croatia_croatian.txt
news13_usa.txt
news12_netherlands_europe.txt
news12_Ireland_Irish.txt
news12_riga_latvia.txt
news12_budapest_hungary.txt
news12_denmark_krone.txt
news12_kiev_ukraine.txt
news12_serbia_belgrade.txt
news12_slovakia_slovakian.txt
news12_russia_ruble.txt
news12_punjab_chandigarh.txt
news12_lisbon_portugal.txt
news13_assam.txt
news12_sweden_swedish.txt
news12_

  0%|                                                    | 0/20 [00:00<?, ?it/s]
  0%|                                                | 0/122310 [00:00<?, ?it/s][A
  0%|                                      | 5/122310 [00:00<6:40:42,  5.09it/s][A
  0%|                                      | 9/122310 [00:01<7:06:58,  4.77it/s][A
  0%|                                     | 17/122310 [00:02<5:08:25,  6.61it/s][A
  0%|                                     | 29/122310 [00:03<3:40:17,  9.25it/s][A
  0%|                                     | 38/122310 [00:04<3:34:06,  9.52it/s][A
  0%|                                     | 47/122310 [00:05<3:29:51,  9.71it/s][A
  0%|                                     | 51/122310 [00:06<4:12:59,  8.05it/s][A
  0%|                                     | 56/122310 [00:07<4:38:22,  7.32it/s][A
  0%|                                     | 68/122310 [00:08<3:41:23,  9.20it/s][A
  0%|                                     | 76/122310 [00:08<3:42:08,  9.17it/s

step: 20, loss: 159.19384569327812, epoch: 0



  0%|                                    | 182/122310 [00:18<2:52:25, 11.81it/s][A
  0%|                                    | 188/122310 [00:19<3:17:24, 10.31it/s][A
  0%|                                    | 198/122310 [00:20<3:11:15, 10.64it/s][A
  0%|                                    | 200/122310 [00:21<4:10:46,  8.12it/s][A
  0%|                                    | 205/122310 [00:22<4:34:07,  7.42it/s][A
  0%|                                    | 220/122310 [00:23<3:17:31, 10.30it/s][A
  0%|                                    | 232/122310 [00:24<3:00:03, 11.30it/s][A
  0%|                                    | 247/122310 [00:24<2:36:52, 12.97it/s][A
  0%|                                    | 251/122310 [00:25<3:15:41, 10.40it/s][A
  0%|                                    | 254/122310 [00:26<4:05:05,  8.30it/s][A
  0%|                                    | 271/122310 [00:27<2:55:47, 11.57it/s][A
  0%|                                    | 280/122310 [00:28<3:03:44, 11.07

step: 40, loss: 186.7107404019368, epoch: 0



  0%|                                    | 356/122310 [00:36<3:34:02,  9.50it/s][A
  0%|                                    | 366/122310 [00:37<3:25:39,  9.88it/s][A
  0%|                                    | 374/122310 [00:38<3:31:55,  9.59it/s][A
  0%|                                    | 391/122310 [00:39<2:43:46, 12.41it/s][A
  0%|                                    | 396/122310 [00:41<4:15:32,  7.95it/s][A
  0%|                                    | 398/122310 [00:42<5:07:10,  6.61it/s][A
  0%|                                    | 403/122310 [00:42<5:19:18,  6.36it/s][A
  0%|                                    | 409/122310 [00:43<5:18:44,  6.37it/s][A
  0%|                                    | 421/122310 [00:44<4:07:25,  8.21it/s][A
  0%|▏                                   | 426/122310 [00:45<4:36:22,  7.35it/s][A
  0%|▏                                   | 436/122310 [00:46<4:05:31,  8.27it/s][A
  0%|▏                                   | 456/122310 [00:47<2:50:26, 11.92

step: 60, loss: 200.37095717760093, epoch: 0



  0%|▏                                   | 525/122310 [00:55<3:43:24,  9.09it/s][A
  0%|▏                                   | 532/122310 [00:55<3:51:07,  8.78it/s][A
  0%|▏                                   | 551/122310 [00:56<2:40:32, 12.64it/s][A
  0%|▏                                   | 562/122310 [00:57<2:40:22, 12.65it/s][A
  0%|▏                                   | 578/122310 [00:58<2:21:33, 14.33it/s][A
  0%|▏                                   | 593/122310 [00:59<2:13:47, 15.16it/s][A
  0%|▏                                   | 597/122310 [01:00<2:49:11, 11.99it/s][A
  0%|▏                                   | 601/122310 [01:01<3:26:52,  9.81it/s][A
  0%|▏                                   | 608/122310 [01:02<3:39:15,  9.25it/s][A
  1%|▏                                   | 618/122310 [01:02<3:24:34,  9.91it/s][A
  1%|▏                                   | 636/122310 [01:03<2:34:40, 13.11it/s][A
  1%|▏                                   | 646/122310 [01:04<2:41:22, 12.57

step: 80, loss: 204.8249440594978, epoch: 0



  1%|▏                                   | 717/122310 [01:12<2:57:36, 11.41it/s][A
  1%|▏                                   | 725/122310 [01:13<3:08:55, 10.73it/s][A
  1%|▏                                   | 733/122310 [01:14<3:17:02, 10.28it/s][A
  1%|▏                                   | 746/122310 [01:15<2:53:39, 11.67it/s][A
  1%|▏                                   | 758/122310 [01:16<2:44:52, 12.29it/s][A
  1%|▏                                   | 761/122310 [01:16<3:30:01,  9.65it/s][A
  1%|▏                                   | 770/122310 [01:17<3:25:44,  9.85it/s][A
  1%|▏                                   | 785/122310 [01:18<2:47:57, 12.06it/s][A
  1%|▏                                   | 809/122310 [01:19<2:01:31, 16.66it/s][A
  1%|▏                                   | 816/122310 [01:20<2:23:43, 14.09it/s][A
  1%|▏                                   | 819/122310 [01:21<3:06:06, 10.88it/s][A
  1%|▏                                   | 825/122310 [01:22<3:29:43,  9.65

step: 100, loss: 161.61538112492394, epoch: 0



  1%|▎                                   | 895/122310 [01:30<4:34:14,  7.38it/s][A
  1%|▎                                   | 900/122310 [01:30<4:53:58,  6.88it/s][A
  1%|▎                                   | 913/122310 [01:31<3:37:43,  9.29it/s][A
  1%|▎                                   | 914/122310 [01:32<4:54:47,  6.86it/s][A
  1%|▎                                   | 922/122310 [01:33<4:28:12,  7.54it/s][A
  1%|▎                                   | 934/122310 [01:34<3:34:57,  9.41it/s][A
  1%|▎                                   | 945/122310 [01:35<3:14:34, 10.40it/s][A
  1%|▎                                   | 958/122310 [01:36<2:51:56, 11.76it/s][A
  1%|▎                                   | 969/122310 [01:36<2:48:30, 12.00it/s][A
  1%|▎                                   | 979/122310 [01:37<2:50:44, 11.84it/s][A
  1%|▎                                   | 991/122310 [01:38<2:43:56, 12.33it/s][A
  1%|▎                                  | 1001/122310 [01:39<2:51:25, 11.79

step: 120, loss: 166.45656255577035, epoch: 0



  1%|▎                                  | 1051/122310 [01:47<4:01:47,  8.36it/s][A
  1%|▎                                  | 1058/122310 [01:48<4:05:13,  8.24it/s][A
  1%|▎                                  | 1068/122310 [01:49<3:40:32,  9.16it/s][A
  1%|▎                                  | 1072/122310 [01:50<4:19:43,  7.78it/s][A
  1%|▎                                  | 1084/122310 [01:51<3:30:46,  9.59it/s][A
  1%|▎                                  | 1099/122310 [01:51<2:50:59, 11.81it/s][A
  1%|▎                                  | 1104/122310 [01:52<3:21:31, 10.02it/s][A
  1%|▎                                  | 1110/122310 [01:53<3:42:12,  9.09it/s][A
  1%|▎                                  | 1117/122310 [01:55<4:58:35,  6.76it/s][A
  1%|▎                                  | 1123/122310 [01:56<4:57:12,  6.80it/s][A
  1%|▎                                  | 1126/122310 [01:57<5:42:23,  5.90it/s][A
  1%|▎                                  | 1134/122310 [01:58<4:56:32,  6.81

step: 140, loss: 203.97734358208444, epoch: 0



  1%|▎                                  | 1205/122310 [02:04<3:55:54,  8.56it/s][A
  1%|▎                                  | 1215/122310 [02:05<3:34:11,  9.42it/s][A
  1%|▎                                  | 1224/122310 [02:06<3:28:15,  9.69it/s][A
  1%|▎                                  | 1233/122310 [02:07<3:23:48,  9.90it/s][A
  1%|▎                                  | 1241/122310 [02:08<3:28:01,  9.70it/s][A
  1%|▎                                  | 1247/122310 [02:09<3:48:23,  8.83it/s][A
  1%|▎                                  | 1256/122310 [02:10<3:37:02,  9.30it/s][A
  1%|▎                                  | 1266/122310 [02:11<3:22:38,  9.96it/s][A
  1%|▎                                  | 1278/122310 [02:11<3:01:45, 11.10it/s][A
  1%|▎                                  | 1287/122310 [02:12<3:05:43, 10.86it/s][A
  1%|▎                                  | 1296/122310 [02:13<3:08:18, 10.71it/s][A
  1%|▎                                  | 1306/122310 [02:14<3:04:26, 10.93

step: 160, loss: 195.39258345436073, epoch: 0



  1%|▍                                  | 1369/122310 [02:22<4:16:29,  7.86it/s][A
  1%|▍                                  | 1373/122310 [02:23<4:53:27,  6.87it/s][A
  1%|▍                                  | 1374/122310 [02:24<6:27:50,  5.20it/s][A
  1%|▍                                  | 1390/122310 [02:25<3:40:10,  9.15it/s][A
  1%|▍                                  | 1406/122310 [02:25<2:49:09, 11.91it/s][A
  1%|▍                                  | 1413/122310 [02:26<3:07:14, 10.76it/s][A
  1%|▍                                  | 1423/122310 [02:27<3:03:36, 10.97it/s][A
  1%|▍                                  | 1433/122310 [02:28<3:01:27, 11.10it/s][A
  1%|▍                                  | 1441/122310 [02:29<3:11:32, 10.52it/s][A
  1%|▍                                  | 1447/122310 [02:30<3:34:22,  9.40it/s][A
  1%|▍                                  | 1451/122310 [02:31<4:12:23,  7.98it/s][A
  1%|▍                                  | 1458/122310 [02:32<4:11:28,  8.01

step: 180, loss: 179.21571738614554, epoch: 0



  1%|▍                                  | 1531/122310 [02:39<3:07:13, 10.75it/s][A
  1%|▍                                  | 1540/122310 [02:40<3:09:07, 10.64it/s][A
  1%|▍                                  | 1547/122310 [02:41<3:23:45,  9.88it/s][A
  1%|▍                                  | 1557/122310 [02:42<3:14:03, 10.37it/s][A
  1%|▍                                  | 1571/122310 [02:43<2:46:28, 12.09it/s][A
  1%|▍                                  | 1585/122310 [02:44<2:31:12, 13.31it/s][A
  1%|▍                                  | 1599/122310 [02:45<3:04:48, 10.89it/s][A
  1%|▍                                  | 1601/122310 [02:46<3:50:04,  8.74it/s][A
  1%|▍                                  | 1615/122310 [02:47<3:08:07, 10.69it/s][A
  1%|▍                                  | 1626/122310 [02:48<2:59:27, 11.21it/s][A
  1%|▍                                  | 1631/122310 [02:49<3:28:46,  9.63it/s][A
  1%|▍                                  | 1637/122310 [02:50<3:47:09,  8.85

step: 200, loss: 217.55292750566355, epoch: 0



  1%|▍                                  | 1692/122310 [02:57<4:02:32,  8.29it/s][A
  1%|▍                                  | 1710/122310 [02:58<2:47:45, 11.98it/s][A
  1%|▍                                  | 1718/122310 [02:58<2:59:54, 11.17it/s][A
  1%|▍                                  | 1726/122310 [02:59<3:09:54, 10.58it/s][A
  1%|▍                                  | 1736/122310 [03:00<3:05:09, 10.85it/s][A
  1%|▍                                  | 1745/122310 [03:01<3:07:14, 10.73it/s][A
  1%|▌                                  | 1750/122310 [03:02<3:37:19,  9.25it/s][A
  1%|▌                                  | 1753/122310 [03:03<4:28:03,  7.50it/s][A
  1%|▌                                  | 1764/122310 [03:04<3:42:58,  9.01it/s][A
  1%|▌                                  | 1768/122310 [03:05<4:20:08,  7.72it/s][A
  1%|▌                                  | 1778/122310 [03:05<3:46:45,  8.86it/s][A
  1%|▌                                  | 1789/122310 [03:06<3:22:45,  9.91

step: 220, loss: 158.7909013371665, epoch: 0



  2%|▌                                  | 1867/122310 [03:14<2:55:54, 11.41it/s][A
  2%|▌                                  | 1876/122310 [03:15<3:00:40, 11.11it/s][A
  2%|▌                                  | 1884/122310 [03:16<3:10:13, 10.55it/s][A
  2%|▌                                  | 1891/122310 [03:17<3:24:33,  9.81it/s][A
  2%|▌                                  | 1901/122310 [03:18<3:13:58, 10.35it/s][A
  2%|▌                                  | 1905/122310 [03:18<3:53:09,  8.61it/s][A
  2%|▌                                  | 1914/122310 [03:19<3:39:53,  9.13it/s][A
  2%|▌                                  | 1923/122310 [03:20<3:30:51,  9.52it/s][A
  2%|▌                                  | 1932/122310 [03:21<3:25:49,  9.75it/s][A
  2%|▌                                  | 1939/122310 [03:22<3:36:31,  9.27it/s][A
  2%|▌                                  | 1950/122310 [03:23<3:15:13, 10.28it/s][A
  2%|▌                                  | 1957/122310 [03:24<3:29:21,  9.58

step: 240, loss: 186.99965174045442, epoch: 0



  2%|▌                                  | 2018/122310 [03:32<4:39:10,  7.18it/s][A
  2%|▌                                  | 2022/122310 [03:32<5:13:06,  6.40it/s][A
  2%|▌                                  | 2025/122310 [03:33<6:02:52,  5.52it/s][A
  2%|▌                                  | 2028/122310 [03:34<6:50:36,  4.88it/s][A
  2%|▌                                  | 2030/122310 [03:35<8:04:22,  4.14it/s][A
  2%|▌                                  | 2051/122310 [03:36<3:18:10, 10.11it/s][A
  2%|▌                                  | 2054/122310 [03:37<4:05:31,  8.16it/s][A
  2%|▌                                  | 2068/122310 [03:38<3:10:03, 10.54it/s][A
  2%|▌                                  | 2075/122310 [03:38<3:24:47,  9.79it/s][A
  2%|▌                                  | 2081/122310 [03:39<3:45:11,  8.90it/s][A
  2%|▌                                  | 2087/122310 [03:40<4:00:49,  8.32it/s][A
  2%|▌                                  | 2100/122310 [03:41<3:14:41, 10.29

step: 260, loss: 273.76053042056327, epoch: 0



  2%|▋                                  | 2197/122310 [03:49<2:41:28, 12.40it/s][A
  2%|▋                                  | 2202/122310 [03:50<3:11:35, 10.45it/s][A
  2%|▋                                  | 2208/122310 [03:51<3:32:45,  9.41it/s][A
  2%|▋                                  | 2216/122310 [03:52<3:33:59,  9.35it/s][A
  2%|▋                                  | 2223/122310 [03:52<3:42:51,  8.98it/s][A
  2%|▋                                  | 2226/122310 [03:53<4:33:46,  7.31it/s][A
  2%|▋                                  | 2240/122310 [03:54<3:21:08,  9.95it/s][A
  2%|▋                                  | 2245/122310 [03:55<3:49:56,  8.70it/s][A
  2%|▋                                  | 2253/122310 [03:56<3:46:23,  8.84it/s][A
  2%|▋                                  | 2257/122310 [03:57<4:23:08,  7.60it/s][A
  2%|▋                                  | 2273/122310 [03:58<3:04:59, 10.81it/s][A
  2%|▋                                  | 2280/122310 [03:59<3:20:03, 10.00

step: 280, loss: 201.12989765800398, epoch: 0



  2%|▋                                  | 2350/122310 [04:06<3:57:04,  8.43it/s][A
  2%|▋                                  | 2354/122310 [04:07<4:33:55,  7.30it/s][A
  2%|▋                                  | 2368/122310 [04:08<3:21:53,  9.90it/s][A
  2%|▋                                  | 2383/122310 [04:09<2:45:52, 12.05it/s][A
  2%|▋                                  | 2394/122310 [04:10<2:43:33, 12.22it/s][A
  2%|▋                                  | 2399/122310 [04:11<3:14:29, 10.28it/s][A
  2%|▋                                  | 2403/122310 [04:12<3:53:07,  8.57it/s][A
  2%|▋                                  | 2415/122310 [04:13<3:17:53, 10.10it/s][A
  2%|▋                                  | 2425/122310 [04:13<3:10:23, 10.49it/s][A
  2%|▋                                  | 2440/122310 [04:14<2:40:26, 12.45it/s][A
  2%|▋                                  | 2449/122310 [04:15<2:49:25, 11.79it/s][A
  2%|▋                                  | 2453/122310 [04:16<3:27:13,  9.64

step: 300, loss: 176.2447606085959, epoch: 0



  2%|▋                                  | 2527/122310 [04:24<2:49:27, 11.78it/s][A
  2%|▋                                  | 2540/122310 [04:25<2:37:40, 12.66it/s][A
  2%|▋                                  | 2543/122310 [04:26<3:21:19,  9.92it/s][A
  2%|▋                                  | 2552/122310 [04:27<3:19:19, 10.01it/s][A
  2%|▋                                  | 2562/122310 [04:27<3:10:48, 10.46it/s][A
  2%|▋                                  | 2572/122310 [04:28<3:06:09, 10.72it/s][A
  2%|▋                                  | 2586/122310 [04:29<2:42:14, 12.30it/s][A
  2%|▋                                  | 2599/122310 [04:30<2:32:50, 13.05it/s][A
  2%|▋                                  | 2609/122310 [04:31<2:38:59, 12.55it/s][A
  2%|▋                                  | 2616/122310 [04:32<2:57:53, 11.21it/s][A
  2%|▊                                  | 2626/122310 [04:33<2:56:51, 11.28it/s][A
  2%|▊                                  | 2635/122310 [04:34<3:01:54, 10.96

step: 320, loss: 228.8372882131082, epoch: 0



  2%|▊                                  | 2692/122310 [04:41<4:34:41,  7.26it/s][A
  2%|▊                                  | 2699/122310 [04:42<4:25:57,  7.50it/s][A
  2%|▊                                  | 2707/122310 [04:43<4:08:31,  8.02it/s][A
  2%|▊                                  | 2712/122310 [04:44<4:31:42,  7.34it/s][A
  2%|▊                                  | 2717/122310 [04:45<4:50:11,  6.87it/s][A
  2%|▊                                  | 2731/122310 [04:46<3:26:38,  9.64it/s][A
  2%|▊                                  | 2739/122310 [04:47<3:29:20,  9.52it/s][A
  2%|▊                                  | 2742/122310 [04:48<4:18:23,  7.71it/s][A
  2%|▊                                  | 2766/122310 [04:48<2:26:57, 13.56it/s][A
  2%|▊                                  | 2774/122310 [04:49<2:42:29, 12.26it/s][A
  2%|▊                                  | 2781/122310 [04:50<3:01:13, 10.99it/s][A
  2%|▊                                  | 2789/122310 [04:51<3:10:04, 10.48

step: 340, loss: 158.3499229383841, epoch: 0



  2%|▊                                  | 2872/122310 [04:59<2:45:12, 12.05it/s][A
  2%|▊                                  | 2883/122310 [05:00<2:42:42, 12.23it/s][A
  2%|▊                                  | 2894/122310 [05:01<2:41:29, 12.32it/s][A
  2%|▊                                  | 2897/122310 [05:01<3:24:59,  9.71it/s][A
  2%|▊                                  | 2901/122310 [05:02<4:03:50,  8.16it/s][A
  2%|▊                                  | 2906/122310 [05:03<4:26:24,  7.47it/s][A
  2%|▊                                  | 2914/122310 [05:04<4:08:17,  8.01it/s][A
  2%|▊                                  | 2922/122310 [05:05<3:57:58,  8.36it/s][A
  2%|▊                                  | 2928/122310 [05:06<4:10:28,  7.94it/s][A
  2%|▊                                  | 2941/122310 [05:07<3:17:49, 10.06it/s][A
  2%|▊                                  | 2943/122310 [05:07<4:18:09,  7.71it/s][A
  2%|▊                                  | 2948/122310 [05:08<4:38:50,  7.13

step: 360, loss: 160.57714177329987, epoch: 0



  2%|▊                                  | 3018/122310 [05:16<3:18:46, 10.00it/s][A
  2%|▊                                  | 3027/122310 [05:17<3:16:35, 10.11it/s][A
  2%|▊                                  | 3030/122310 [05:18<4:02:37,  8.19it/s][A
  2%|▊                                  | 3044/122310 [05:19<3:08:50, 10.53it/s][A
  2%|▊                                  | 3050/122310 [05:20<3:29:50,  9.47it/s][A
  3%|▉                                  | 3063/122310 [05:21<2:59:01, 11.10it/s][A
  3%|▉                                  | 3071/122310 [05:21<3:08:18, 10.55it/s][A
  3%|▉                                  | 3084/122310 [05:22<2:47:22, 11.87it/s][A
  3%|▉                                  | 3092/122310 [05:23<2:59:34, 11.07it/s][A
  3%|▉                                  | 3100/122310 [05:24<3:08:49, 10.52it/s][A
  3%|▉                                  | 3106/122310 [05:25<3:30:51,  9.42it/s][A
  3%|▉                                  | 3110/122310 [05:26<4:09:29,  7.96

step: 380, loss: 176.51268265032988, epoch: 0



  3%|▉                                  | 3204/122310 [05:34<2:32:04, 13.05it/s][A
  3%|▉                                  | 3214/122310 [05:34<2:37:42, 12.59it/s][A
  3%|▉                                  | 3221/122310 [05:35<2:56:46, 11.23it/s][A
  3%|▉                                  | 3238/122310 [05:36<2:24:01, 13.78it/s][A
  3%|▉                                  | 3248/122310 [05:37<2:31:46, 13.07it/s][A
  3%|▉                                  | 3261/122310 [05:38<2:25:35, 13.63it/s][A
  3%|▉                                  | 3266/122310 [05:39<2:55:53, 11.28it/s][A
  3%|▉                                  | 3269/122310 [05:40<3:41:37,  8.95it/s][A
  3%|▉                                  | 3280/122310 [05:40<3:17:27, 10.05it/s][A
  3%|▉                                  | 3295/122310 [05:41<2:42:31, 12.20it/s][A
  3%|▉                                  | 3302/122310 [05:42<3:00:24, 10.99it/s][A
  3%|▉                                  | 3314/122310 [05:43<2:48:23, 11.78

step: 400, loss: 149.3120013987118, epoch: 0
sim1 and sim2 are 0.14833237102034713, 0.6234065806286979
cosine of pred and queen: 0.6982325368912232
Actual: athens:greece::madrid:spain, pred: royals
Actual: bangkok:thailand::islamabad:pakistan, pred: sevens
Actual: beijing:china::tokyo:japan, pred: hezron
Actual: berlin:germany::rome:italy, pred: idolatresses
Actual: cairo:egypt::ottawa:canada, pred: headless
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: basil
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: unintegrated
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: comestibles
Actual: china:asia::greece:europe, pred: crescents
Actual: nigeria:africa::france:europe, pred: tekoa
Actual: kenya:africa::netherlands:europe, pred: sixes
Actual: mumbai:asia::nairobi:africa, pred:

Actual: convenient:inconvenient::convincing:unconvincing, pred: lim
Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: becamest
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: colonization
Actual: grandfather:grandmother::father:mother, pred: oscillating
Actual: grandpa:grandma::sons:daughters, pred: lazhar
Actual: king:queen::husband:wife, pred: turkeys
Actual: man:woman::brothers:sisters, pred: beaver
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: efficiency
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: loftiest
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: mesmeric
Actual: tripura:agartala::odisha:bhubaneswar, pred: ervadi
Actual: algeria:dinar::japan:yen, pre


  3%|▉                                | 3389/122310 [07:21<116:08:22,  3.52s/it][A

Actual: india:rupee::denmark:krone, pred: jehizkiah
Accuracy is 0.0



  3%|▉                                 | 3394/122310 [07:22<92:44:50,  2.81s/it][A
  3%|▉                                 | 3400/122310 [07:30<80:43:31,  2.44s/it][A
  3%|▉                                 | 3401/122310 [07:31<77:35:04,  2.35s/it][A
  3%|▉                                 | 3412/122310 [07:32<41:17:54,  1.25s/it][A
  3%|▉                                 | 3422/122310 [07:33<26:27:45,  1.25it/s][A
  3%|▉                                 | 3425/122310 [07:34<24:04:08,  1.37it/s][A
  3%|▉                                 | 3430/122310 [07:35<20:52:37,  1.58it/s][A
  3%|▉                                 | 3434/122310 [07:36<17:48:04,  1.85it/s][A
  3%|▉                                 | 3446/122310 [07:37<10:14:47,  3.22it/s][A
  3%|▉                                  | 3455/122310 [07:38<7:48:56,  4.22it/s][A

step: 420, loss: 173.56766859870092, epoch: 0



  3%|▉                                  | 3462/122310 [07:39<6:47:42,  4.86it/s][A
  3%|▉                                  | 3468/122310 [07:40<6:16:48,  5.26it/s][A
  3%|▉                                  | 3482/122310 [07:41<4:20:50,  7.59it/s][A
  3%|▉                                  | 3489/122310 [07:41<4:16:36,  7.72it/s][A
  3%|█                                  | 3497/122310 [07:42<4:04:34,  8.10it/s][A
  3%|█                                  | 3509/122310 [07:43<3:26:19,  9.60it/s][A
  3%|█                                  | 3517/122310 [07:44<3:27:48,  9.53it/s][A
  3%|█                                  | 3520/122310 [07:45<4:13:24,  7.81it/s][A
  3%|█                                  | 3531/122310 [07:46<3:34:59,  9.21it/s][A
  3%|█                                  | 3540/122310 [07:47<3:26:58,  9.56it/s][A
  3%|█                                  | 3557/122310 [07:47<2:38:04, 12.52it/s][A
  3%|█                                  | 3564/122310 [07:48<2:57:47, 11.13

step: 440, loss: 138.97638402335667, epoch: 0



  3%|█                                  | 3640/122310 [07:56<2:40:27, 12.33it/s][A
  3%|█                                  | 3650/122310 [07:57<2:43:37, 12.09it/s][A
  3%|█                                  | 3659/122310 [07:58<2:51:16, 11.55it/s][A
  3%|█                                  | 3663/122310 [07:59<3:28:29,  9.48it/s][A
  3%|█                                  | 3676/122310 [08:00<2:57:22, 11.15it/s][A
  3%|█                                  | 3688/122310 [08:01<2:45:33, 11.94it/s][A
  3%|█                                  | 3693/122310 [08:01<3:21:33,  9.81it/s][A
  3%|█                                  | 3711/122310 [08:02<2:39:07, 12.42it/s][A
  3%|█                                  | 3717/122310 [08:03<3:08:16, 10.50it/s][A
  3%|█                                  | 3719/122310 [08:05<4:20:43,  7.58it/s][A
  3%|█                                  | 3727/122310 [08:06<4:13:53,  7.78it/s][A
  3%|█                                  | 3731/122310 [08:06<4:51:08,  6.79

step: 460, loss: 225.96252309836152, epoch: 0



  3%|█                                 | 3747/122310 [08:32<29:16:50,  1.12it/s][A

step: 480, loss: 209.73072917861566, epoch: 0



  3%|█                                 | 3771/122310 [08:34<15:25:05,  2.14it/s][A
  3%|█                                 | 3774/122310 [08:35<14:57:28,  2.20it/s][A
  3%|█                                 | 3784/122310 [08:36<11:42:09,  2.81it/s][A
  3%|█                                 | 3790/122310 [08:40<13:48:26,  2.38it/s][A
  3%|█                                 | 3796/122310 [08:41<11:56:58,  2.75it/s][A
  3%|█                                  | 3806/122310 [08:42<8:58:20,  3.67it/s][A
  3%|█                                  | 3811/122310 [08:43<8:25:03,  3.91it/s][A
  3%|█                                  | 3828/122310 [08:44<5:17:18,  6.22it/s][A
  3%|█                                  | 3840/122310 [08:45<4:37:49,  7.11it/s][A
  3%|█                                  | 3846/122310 [08:47<5:00:46,  6.56it/s][A
  3%|█                                  | 3854/122310 [08:48<5:06:05,  6.45it/s][A
  3%|█                                  | 3858/122310 [08:49<6:01:57,  5.45

step: 500, loss: 133.73123257283996, epoch: 0



  3%|█                                  | 3899/122310 [08:56<5:14:19,  6.28it/s][A
  3%|█                                  | 3907/122310 [08:57<4:54:30,  6.70it/s][A
  3%|█                                  | 3919/122310 [08:58<4:10:48,  7.87it/s][A
  3%|█▏                                 | 3935/122310 [08:59<3:28:46,  9.45it/s][A
  3%|█▏                                 | 3950/122310 [09:01<3:14:54, 10.12it/s][A
  3%|█▏                                 | 3955/122310 [09:02<3:55:38,  8.37it/s][A
  3%|█▏                                 | 3969/122310 [09:03<3:38:38,  9.02it/s][A
  3%|█▏                                 | 3980/122310 [09:04<3:39:55,  8.97it/s][A
  3%|█▏                                 | 3990/122310 [09:06<3:46:13,  8.72it/s][A
  3%|█▏                                 | 3996/122310 [09:07<4:21:47,  7.53it/s][A
  3%|█▏                                 | 4003/122310 [09:08<4:35:11,  7.16it/s][A
  3%|█▏                                 | 4012/122310 [09:09<4:38:53,  7.07

step: 520, loss: 176.82740905637874, epoch: 0



  3%|█▏                                 | 4063/122310 [09:19<7:35:17,  4.33it/s][A
  3%|█▏                                 | 4071/122310 [09:19<6:09:02,  5.34it/s][A
  3%|█▏                                 | 4080/122310 [09:20<5:07:28,  6.41it/s][A
  3%|█▏                                 | 4092/122310 [09:21<4:03:54,  8.08it/s][A
  3%|█▏                                 | 4099/122310 [09:22<4:08:57,  7.91it/s][A
  3%|█▏                                 | 4107/122310 [09:23<4:05:41,  8.02it/s][A
  3%|█▏                                 | 4113/122310 [09:24<4:23:44,  7.47it/s][A
  3%|█▏                                 | 4117/122310 [09:25<5:06:10,  6.43it/s][A
  3%|█▏                                 | 4125/122310 [09:26<4:47:30,  6.85it/s][A
  3%|█▏                                 | 4132/122310 [09:27<4:39:14,  7.05it/s][A
  3%|█▏                                 | 4143/122310 [09:28<4:08:21,  7.93it/s][A
  3%|█▏                                 | 4157/122310 [09:29<3:30:07,  9.37

step: 540, loss: 218.47113742245114, epoch: 0



  3%|█▏                                 | 4219/122310 [09:38<4:09:58,  7.87it/s][A
  3%|█▏                                 | 4224/122310 [09:40<4:46:54,  6.86it/s][A
  3%|█▏                                 | 4236/122310 [09:41<3:54:49,  8.38it/s][A
  3%|█▏                                 | 4247/122310 [09:42<3:34:44,  9.16it/s][A
  3%|█▏                                 | 4258/122310 [09:42<3:19:29,  9.86it/s][A
  3%|█▏                                 | 4264/122310 [09:43<3:43:51,  8.79it/s][A
  3%|█▏                                 | 4276/122310 [09:44<3:23:52,  9.65it/s][A
  4%|█▏                                 | 4284/122310 [09:45<3:33:46,  9.20it/s][A
  4%|█▏                                 | 4297/122310 [09:46<3:07:37, 10.48it/s][A
  4%|█▏                                 | 4304/122310 [09:47<3:25:21,  9.58it/s][A
  4%|█▏                                 | 4311/122310 [09:49<4:55:36,  6.65it/s][A
  4%|█▏                                 | 4313/122310 [09:50<5:55:34,  5.53

step: 560, loss: 143.99576964860265, epoch: 0



  4%|█▏                                 | 4363/122310 [10:00<5:56:14,  5.52it/s][A
  4%|█▎                                 | 4376/122310 [10:01<4:33:48,  7.18it/s][A
  4%|█▎                                 | 4381/122310 [10:02<4:59:36,  6.56it/s][A
  4%|█▎                                 | 4386/122310 [10:03<5:28:14,  5.99it/s][A
  4%|█▎                                 | 4396/122310 [10:04<4:44:10,  6.92it/s][A
  4%|█▎                                 | 4406/122310 [10:05<4:09:27,  7.88it/s][A
  4%|█▎                                 | 4417/122310 [10:06<3:46:46,  8.66it/s][A
  4%|█▎                                 | 4432/122310 [10:07<3:04:04, 10.67it/s][A
  4%|█▎                                 | 4442/122310 [10:08<3:05:05, 10.61it/s][A
  4%|█▎                                 | 4446/122310 [10:09<3:46:24,  8.68it/s][A
  4%|█▎                                 | 4453/122310 [10:10<3:57:42,  8.26it/s][A
  4%|█▎                                 | 4459/122310 [10:11<4:13:42,  7.74

step: 580, loss: 182.22672690488906, epoch: 0



  4%|█▎                                 | 4538/122310 [10:20<3:52:11,  8.45it/s][A
  4%|█▎                                 | 4546/122310 [10:21<3:51:32,  8.48it/s][A
  4%|█▎                                 | 4559/122310 [10:22<3:14:49, 10.07it/s][A
  4%|█▎                                 | 4563/122310 [10:23<3:53:36,  8.40it/s][A
  4%|█▎                                 | 4584/122310 [10:23<2:36:21, 12.55it/s][A
  4%|█▎                                 | 4590/122310 [10:24<3:02:27, 10.75it/s][A
  4%|█▎                                 | 4604/122310 [10:25<2:43:21, 12.01it/s][A
  4%|█▎                                 | 4609/122310 [10:26<3:15:23, 10.04it/s][A
  4%|█▎                                 | 4613/122310 [10:27<3:55:18,  8.34it/s][A
  4%|█▎                                 | 4632/122310 [10:28<2:45:38, 11.84it/s][A
  4%|█▎                                 | 4645/122310 [10:29<2:46:13, 11.80it/s][A
  4%|█▎                                 | 4666/122310 [10:30<2:24:12, 13.60

step: 600, loss: 157.2219995496521, epoch: 0



  4%|█▎                                 | 4728/122310 [10:40<4:07:03,  7.93it/s][A
  4%|█▎                                 | 4743/122310 [10:41<3:12:27, 10.18it/s][A
  4%|█▎                                 | 4749/122310 [10:42<3:37:19,  9.02it/s][A
  4%|█▎                                 | 4755/122310 [10:43<4:00:10,  8.16it/s][A
  4%|█▎                                 | 4759/122310 [10:44<4:42:25,  6.94it/s][A
  4%|█▎                                 | 4764/122310 [10:44<5:05:37,  6.41it/s][A
  4%|█▎                                 | 4769/122310 [10:45<5:24:51,  6.03it/s][A
  4%|█▎                                 | 4772/122310 [10:46<6:18:12,  5.18it/s][A
  4%|█▎                                 | 4776/122310 [10:47<6:40:05,  4.90it/s][A
  4%|█▎                                 | 4785/122310 [10:48<5:12:27,  6.27it/s][A
  4%|█▎                                 | 4797/122310 [10:49<4:03:30,  8.04it/s][A
  4%|█▍                                 | 4810/122310 [10:50<3:21:06,  9.74

step: 620, loss: 142.69912891139015, epoch: 0



  4%|█▍                                 | 4879/122310 [10:59<3:02:43, 10.71it/s][A
  4%|█▍                                 | 4886/122310 [11:00<3:20:33,  9.76it/s][A
  4%|█▍                                 | 4903/122310 [11:01<2:40:51, 12.16it/s][A
  4%|█▍                                 | 4911/122310 [11:02<2:59:29, 10.90it/s][A
  4%|█▍                                 | 4919/122310 [11:03<3:11:43, 10.20it/s][A
  4%|█▍                                 | 4930/122310 [11:04<3:03:03, 10.69it/s][A
  4%|█▍                                 | 4941/122310 [11:04<2:58:21, 10.97it/s][A
  4%|█▍                                 | 4948/122310 [11:05<3:15:53,  9.98it/s][A
  4%|█▍                                 | 4953/122310 [11:06<3:46:15,  8.64it/s][A
  4%|█▍                                 | 4963/122310 [11:07<3:30:04,  9.31it/s][A
  4%|█▍                                 | 4967/122310 [11:08<4:10:33,  7.81it/s][A
  4%|█▍                                 | 4975/122310 [11:09<4:02:23,  8.07

step: 640, loss: 201.48313499008052, epoch: 0



  4%|█▍                                 | 5078/122310 [11:17<2:40:37, 12.16it/s][A
  4%|█▍                                 | 5092/122310 [11:18<2:29:30, 13.07it/s][A
  4%|█▍                                 | 5101/122310 [11:19<2:41:37, 12.09it/s][A
  4%|█▍                                 | 5111/122310 [11:20<2:45:52, 11.78it/s][A
  4%|█▍                                 | 5123/122310 [11:21<2:41:01, 12.13it/s][A
  4%|█▍                                 | 5128/122310 [11:22<3:12:17, 10.16it/s][A
  4%|█▍                                 | 5131/122310 [11:23<4:01:11,  8.10it/s][A
  4%|█▍                                 | 5143/122310 [11:24<3:26:24,  9.46it/s][A
  4%|█▍                                 | 5151/122310 [11:25<3:34:22,  9.11it/s][A
  4%|█▍                                 | 5159/122310 [11:26<3:46:12,  8.63it/s][A
  4%|█▍                                 | 5164/122310 [11:27<4:14:26,  7.67it/s][A
  4%|█▍                                 | 5177/122310 [11:28<3:24:37,  9.54

step: 660, loss: 197.93563392714285, epoch: 0



  4%|█▌                                 | 5242/122310 [11:36<3:36:32,  9.01it/s][A
  4%|█▌                                 | 5252/122310 [11:37<3:24:43,  9.53it/s][A
  4%|█▌                                 | 5267/122310 [11:38<2:49:34, 11.50it/s][A
  4%|█▌                                 | 5279/122310 [11:39<2:49:58, 11.48it/s][A
  4%|█▌                                 | 5283/122310 [11:40<3:27:17,  9.41it/s][A
  4%|█▌                                 | 5290/122310 [11:41<3:43:39,  8.72it/s][A
  4%|█▌                                 | 5297/122310 [11:42<4:04:00,  7.99it/s][A
  4%|█▌                                 | 5308/122310 [11:43<3:38:41,  8.92it/s][A
  4%|█▌                                 | 5322/122310 [11:44<3:02:59, 10.66it/s][A
  4%|█▌                                 | 5326/122310 [11:45<3:40:12,  8.85it/s][A
  4%|█▌                                 | 5332/122310 [11:46<3:59:02,  8.16it/s][A
  4%|█▌                                 | 5338/122310 [11:47<4:14:08,  7.67

step: 680, loss: 234.4448166625367, epoch: 0



  4%|█▌                                 | 5398/122310 [11:55<4:08:36,  7.84it/s][A
  4%|█▌                                 | 5402/122310 [11:56<4:48:13,  6.76it/s][A
  4%|█▌                                 | 5405/122310 [11:57<5:37:18,  5.78it/s][A
  4%|█▌                                 | 5413/122310 [11:58<5:07:09,  6.34it/s][A
  4%|█▌                                 | 5425/122310 [11:59<3:59:37,  8.13it/s][A
  4%|█▌                                 | 5434/122310 [12:00<3:52:00,  8.40it/s][A
  4%|█▌                                 | 5446/122310 [12:01<3:22:00,  9.64it/s][A
  4%|█▌                                 | 5458/122310 [12:02<3:04:58, 10.53it/s][A
  4%|█▌                                 | 5465/122310 [12:03<3:22:47,  9.60it/s][A
  4%|█▌                                 | 5474/122310 [12:04<3:23:32,  9.57it/s][A
  4%|█▌                                 | 5479/122310 [12:05<3:53:21,  8.34it/s][A
  4%|█▌                                 | 5487/122310 [12:06<3:53:00,  8.36

step: 700, loss: 178.99200187564372, epoch: 0



  5%|█▌                                 | 5560/122310 [12:14<3:53:22,  8.34it/s][A
  5%|█▌                                 | 5569/122310 [12:15<3:42:10,  8.76it/s][A
  5%|█▌                                 | 5581/122310 [12:16<3:13:28, 10.06it/s][A
  5%|█▌                                 | 5592/122310 [12:17<3:01:57, 10.69it/s][A
  5%|█▌                                 | 5603/122310 [12:18<2:57:03, 10.99it/s][A
  5%|█▌                                 | 5613/122310 [12:19<3:00:14, 10.79it/s][A
  5%|█▌                                 | 5624/122310 [12:20<2:56:15, 11.03it/s][A
  5%|█▌                                 | 5635/122310 [12:21<2:57:46, 10.94it/s][A
  5%|█▌                                 | 5641/122310 [12:22<3:21:16,  9.66it/s][A
  5%|█▌                                 | 5647/122310 [12:23<3:44:28,  8.66it/s][A
  5%|█▌                                 | 5650/122310 [12:24<4:36:32,  7.03it/s][A
  5%|█▌                                 | 5664/122310 [12:25<3:38:36,  8.89

step: 720, loss: 176.4793907915963, epoch: 0



  5%|█▋                                 | 5751/122310 [12:34<3:13:09, 10.06it/s][A
  5%|█▋                                 | 5761/122310 [12:35<3:16:46,  9.87it/s][A
  5%|█▋                                 | 5765/122310 [12:36<3:59:58,  8.09it/s][A
  5%|█▋                                 | 5774/122310 [12:37<3:46:00,  8.59it/s][A
  5%|█▋                                 | 5782/122310 [12:38<3:45:30,  8.61it/s][A
  5%|█▋                                 | 5789/122310 [12:39<3:56:15,  8.22it/s][A
  5%|█▋                                 | 5796/122310 [12:40<4:07:08,  7.86it/s][A
  5%|█▋                                 | 5802/122310 [12:41<4:43:30,  6.85it/s][A
  5%|█▋                                 | 5806/122310 [12:42<5:27:09,  5.94it/s][A
  5%|█▋                                 | 5814/122310 [12:43<4:56:58,  6.54it/s][A
  5%|█▋                                 | 5826/122310 [12:44<3:59:49,  8.09it/s][A
  5%|█▋                                 | 5828/122310 [12:45<5:08:31,  6.29

step: 740, loss: 152.51857094863874, epoch: 0



  5%|█▋                                 | 5902/122310 [12:54<3:54:01,  8.29it/s][A
  5%|█▋                                 | 5918/122310 [12:56<3:11:49, 10.11it/s][A
  5%|█▋                                 | 5928/122310 [12:57<3:13:41, 10.01it/s][A
  5%|█▋                                 | 5934/122310 [12:58<3:36:11,  8.97it/s][A
  5%|█▋                                 | 5935/122310 [12:58<4:45:23,  6.80it/s][A
  5%|█▋                                 | 5943/122310 [12:59<4:21:40,  7.41it/s][A
  5%|█▋                                 | 5950/122310 [13:00<4:15:47,  7.58it/s][A
  5%|█▋                                 | 5956/122310 [13:01<4:24:13,  7.34it/s][A
  5%|█▋                                 | 5969/122310 [13:02<3:27:39,  9.34it/s][A
  5%|█▋                                 | 5984/122310 [13:03<2:52:29, 11.24it/s][A
  5%|█▋                                 | 5994/122310 [13:04<2:56:54, 10.96it/s][A
  5%|█▋                                 | 6000/122310 [13:06<4:28:19,  7.22

step: 760, loss: 157.05338246492036, epoch: 0



  5%|█▋                                 | 6056/122310 [13:13<3:56:30,  8.19it/s][A
  5%|█▋                                 | 6064/122310 [13:14<3:50:43,  8.40it/s][A
  5%|█▋                                 | 6068/122310 [13:15<4:25:20,  7.30it/s][A
  5%|█▋                                 | 6083/122310 [13:16<3:11:12, 10.13it/s][A
  5%|█▋                                 | 6087/122310 [13:17<3:49:18,  8.45it/s][A
  5%|█▋                                 | 6092/122310 [13:18<4:15:20,  7.59it/s][A
  5%|█▋                                 | 6097/122310 [13:19<4:36:19,  7.01it/s][A
  5%|█▋                                 | 6110/122310 [13:20<3:28:05,  9.31it/s][A
  5%|█▊                                 | 6122/122310 [13:21<3:03:34, 10.55it/s][A
  5%|█▊                                 | 6130/122310 [13:21<3:11:47, 10.10it/s][A
  5%|█▊                                 | 6134/122310 [13:22<3:49:26,  8.44it/s][A
  5%|█▊                                 | 6148/122310 [13:23<3:02:00, 10.64

step: 780, loss: 138.5039474572346, epoch: 0



  5%|█▊                                 | 6221/122310 [13:31<3:06:30, 10.37it/s][A
  5%|█▊                                 | 6238/122310 [13:32<2:28:38, 13.02it/s][A
  5%|█▊                                 | 6245/122310 [13:33<2:48:24, 11.49it/s][A
  5%|█▊                                 | 6257/122310 [13:34<2:39:50, 12.10it/s][A
  5%|█▊                                 | 6273/122310 [13:35<2:18:48, 13.93it/s][A
  5%|█▊                                 | 6286/122310 [13:36<2:16:39, 14.15it/s][A
  5%|█▊                                 | 6297/122310 [13:37<2:22:08, 13.60it/s][A
  5%|█▊                                 | 6303/122310 [13:37<2:46:48, 11.59it/s][A
  5%|█▊                                 | 6318/122310 [13:38<2:26:46, 13.17it/s][A
  5%|█▊                                 | 6325/122310 [13:39<2:46:25, 11.62it/s][A
  5%|█▊                                 | 6334/122310 [13:40<2:53:14, 11.16it/s][A
  5%|█▊                                 | 6343/122310 [13:41<2:57:27, 10.89

step: 800, loss: 154.63535732907883, epoch: 0
sim1 and sim2 are 0.3194252773786367, 0.3305928837872562
cosine of pred and queen: 0.2361750141411837
Actual: athens:greece::madrid:spain, pred: altafaj
Actual: bangkok:thailand::islamabad:pakistan, pred: henriksen
Actual: beijing:china::tokyo:japan, pred: industry
Actual: berlin:germany::rome:italy, pred: bispebjerg
Actual: cairo:egypt::ottawa:canada, pred: headless
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: basil
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: unintegrated
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: sines
Actual: china:asia::greece:europe, pred: harboured
Actual: nigeria:africa::france:europe, pred: chieftain
Actual: kenya:africa::netherlands:europe, pred: sixes
Actual: mumbai:asia::nairobi:africa, pre

Actual: convenient:inconvenient::convincing:unconvincing, pred: lim
Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: colonization
Actual: grandfather:grandmother::father:mother, pred: man
Actual: grandpa:grandma::sons:daughters, pred: lazhar
Actual: king:queen::husband:wife, pred: turkeys
Actual: man:woman::brothers:sisters, pred: paraguayan
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: efficiency
Actual: fortunate:fortunately::efficient:efficiently, pred: kandla
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: top
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: mesmeric
Actual: tripura:agartala::odisha:bhubaneswar, pred: ervadi
Actual: algeria:dinar::japan:yen, pred: holyhea


  5%|█▋                               | 6399/122310 [15:17<140:27:10,  4.36s/it][A

Actual: india:rupee::denmark:krone, pred: jehizkiah
Accuracy is 0.0



  5%|█▊                                | 6412/122310 [15:18<75:51:30,  2.36s/it][A
  5%|█▊                                | 6418/122310 [15:19<59:05:44,  1.84s/it][A
  5%|█▊                                | 6429/122310 [15:20<37:32:00,  1.17s/it][A
  5%|█▊                                | 6443/122310 [15:21<22:59:36,  1.40it/s][A
  5%|█▊                                | 6447/122310 [15:23<21:44:10,  1.48it/s][A
  5%|█▊                                | 6461/122310 [15:24<13:30:55,  2.38it/s][A
  5%|█▊                                 | 6477/122310 [15:25<8:45:25,  3.67it/s][A
  5%|█▊                                 | 6487/122310 [15:25<7:11:23,  4.47it/s][A
  5%|█▊                                 | 6496/122310 [15:26<6:10:08,  5.21it/s][A
  5%|█▊                                 | 6510/122310 [15:27<4:40:21,  6.88it/s][A
  5%|█▊                                 | 6535/122310 [15:28<2:59:06, 10.77it/s][A
  5%|█▊                                 | 6540/122310 [15:29<3:18:32,  9.72

step: 820, loss: 169.46261586652187, epoch: 0



  5%|█▉                                 | 6619/122310 [15:36<3:03:48, 10.49it/s][A
  5%|█▉                                 | 6628/122310 [15:37<3:05:47, 10.38it/s][A
  5%|█▉                                 | 6636/122310 [15:38<3:13:03,  9.99it/s][A
  5%|█▉                                 | 6638/122310 [15:39<4:09:18,  7.73it/s][A
  5%|█▉                                 | 6650/122310 [15:40<3:23:07,  9.49it/s][A
  5%|█▉                                 | 6655/122310 [15:41<3:51:43,  8.32it/s][A
  5%|█▉                                 | 6662/122310 [15:41<3:54:57,  8.20it/s][A
  5%|█▉                                 | 6675/122310 [15:43<4:06:48,  7.81it/s][A
  5%|█▉                                 | 6683/122310 [15:44<3:57:13,  8.12it/s][A
  5%|█▉                                 | 6689/122310 [15:45<4:08:14,  7.76it/s][A
  5%|█▉                                 | 6702/122310 [15:46<3:19:46,  9.64it/s][A
  5%|█▉                                 | 6709/122310 [15:47<3:30:28,  9.15

step: 840, loss: 138.53621379748787, epoch: 0



  6%|█▉                                 | 6762/122310 [15:54<3:31:27,  9.11it/s][A
  6%|█▉                                 | 6765/122310 [15:55<4:14:56,  7.55it/s][A
  6%|█▉                                 | 6776/122310 [15:56<3:35:30,  8.94it/s][A
  6%|█▉                                 | 6786/122310 [15:56<3:20:06,  9.62it/s][A
  6%|█▉                                 | 6788/122310 [15:57<4:23:35,  7.30it/s][A
  6%|█▉                                 | 6794/122310 [15:58<4:33:55,  7.03it/s][A
  6%|█▉                                 | 6798/122310 [15:59<5:06:59,  6.27it/s][A
  6%|█▉                                 | 6807/122310 [16:00<4:19:51,  7.41it/s][A
  6%|█▉                                 | 6810/122310 [16:03<8:29:22,  3.78it/s][A
  6%|█▉                                 | 6818/122310 [16:04<6:41:21,  4.80it/s][A
  6%|█▉                                 | 6824/122310 [16:05<6:09:16,  5.21it/s][A
  6%|█▉                                 | 6828/122310 [16:06<6:24:30,  5.01

step: 860, loss: 157.47192004688637, epoch: 0



  6%|█▉                                 | 6879/122310 [16:12<4:13:07,  7.60it/s][A
  6%|█▉                                 | 6885/122310 [16:13<4:22:56,  7.32it/s][A
  6%|█▉                                 | 6886/122310 [16:14<5:38:57,  5.68it/s][A
  6%|█▉                                 | 6893/122310 [16:15<5:09:25,  6.22it/s][A
  6%|█▉                                 | 6902/122310 [16:16<4:27:06,  7.20it/s][A
  6%|█▉                                 | 6917/122310 [16:17<3:16:49,  9.77it/s][A
  6%|█▉                                 | 6925/122310 [16:18<3:22:38,  9.49it/s][A
  6%|█▉                                 | 6934/122310 [16:19<3:20:51,  9.57it/s][A
  6%|█▉                                 | 6941/122310 [16:19<3:33:20,  9.01it/s][A
  6%|█▉                                 | 6949/122310 [16:20<3:34:15,  8.97it/s][A
  6%|█▉                                 | 6953/122310 [16:21<4:09:16,  7.71it/s][A
  6%|█▉                                 | 6961/122310 [16:22<3:57:02,  8.11

step: 880, loss: 140.4934002110097, epoch: 0



  6%|██                                 | 7028/122310 [16:30<4:19:43,  7.40it/s][A
  6%|██                                 | 7038/122310 [16:31<3:44:07,  8.57it/s][A
  6%|██                                 | 7050/122310 [16:32<3:12:42,  9.97it/s][A
  6%|██                                 | 7056/122310 [16:33<3:32:24,  9.04it/s][A
  6%|██                                 | 7062/122310 [16:34<3:49:39,  8.36it/s][A
  6%|██                                 | 7073/122310 [16:35<3:20:35,  9.57it/s][A
  6%|██                                 | 7076/122310 [16:35<4:09:14,  7.71it/s][A
  6%|██                                 | 7082/122310 [16:36<4:18:20,  7.43it/s][A
  6%|██                                 | 7087/122310 [16:37<4:38:28,  6.90it/s][A
  6%|██                                 | 7097/122310 [16:38<3:53:40,  8.22it/s][A
  6%|██                                 | 7104/122310 [16:39<3:56:30,  8.12it/s][A
  6%|██                                 | 7119/122310 [16:40<3:00:08, 10.66

step: 900, loss: 123.66007064954552, epoch: 0



  6%|██                                 | 7197/122310 [16:48<3:18:12,  9.68it/s][A
  6%|██                                 | 7215/122310 [16:49<2:29:02, 12.87it/s][A
  6%|██                                 | 7222/122310 [16:50<2:49:59, 11.28it/s][A
  6%|██                                 | 7229/122310 [16:51<3:07:20, 10.24it/s][A
  6%|██                                 | 7241/122310 [16:51<2:50:26, 11.25it/s][A
  6%|██                                 | 7247/122310 [16:52<3:14:31,  9.86it/s][A
  6%|██                                 | 7252/122310 [16:53<3:44:27,  8.54it/s][A
  6%|██                                 | 7256/122310 [16:54<4:23:55,  7.27it/s][A
  6%|██                                 | 7261/122310 [16:55<4:46:13,  6.70it/s][A
  6%|██                                 | 7271/122310 [16:56<4:02:31,  7.91it/s][A
  6%|██                                 | 7279/122310 [16:57<3:55:59,  8.12it/s][A
  6%|██                                 | 7291/122310 [16:58<3:20:33,  9.56

step: 920, loss: 173.01367857748053, epoch: 0



  6%|██                                 | 7344/122310 [17:08<8:11:54,  3.90it/s][A
  6%|██                                 | 7358/122310 [17:09<5:24:59,  5.90it/s][A
  6%|██                                 | 7366/122310 [17:10<5:03:13,  6.32it/s][A
  6%|██                                 | 7371/122310 [17:11<5:22:34,  5.94it/s][A
  6%|██                                 | 7387/122310 [17:12<3:52:11,  8.25it/s][A
  6%|██                                 | 7394/122310 [17:14<4:12:29,  7.59it/s][A
  6%|██                                 | 7400/122310 [17:15<4:28:18,  7.14it/s][A
  6%|██                                 | 7408/122310 [17:16<4:29:48,  7.10it/s][A
  6%|██                                 | 7419/122310 [17:17<4:01:44,  7.92it/s][A
  6%|██                                 | 7422/122310 [17:18<4:46:39,  6.68it/s][A
  6%|██▏                                | 7429/122310 [17:19<4:40:58,  6.81it/s][A
  6%|██▏                                | 7431/122310 [17:20<5:56:48,  5.37

step: 940, loss: 127.25517754624975, epoch: 0



  6%|██▏                                | 7512/122310 [17:29<4:06:58,  7.75it/s][A
  6%|██▏                                | 7519/122310 [17:30<4:22:59,  7.27it/s][A
  6%|██▏                                | 7531/122310 [17:31<3:50:31,  8.30it/s][A
  6%|██▏                                | 7541/122310 [17:32<3:54:32,  8.16it/s][A
  6%|██▏                                | 7549/122310 [17:34<4:20:19,  7.35it/s][A
  6%|██▏                                | 7557/122310 [17:35<4:20:02,  7.35it/s][A
  6%|██▏                                | 7566/122310 [17:36<4:08:55,  7.68it/s][A
  6%|██▏                                | 7577/122310 [17:37<3:42:26,  8.60it/s][A
  6%|██▏                                | 7589/122310 [17:38<3:27:44,  9.20it/s][A
  6%|██▏                                | 7590/122310 [17:39<4:55:52,  6.46it/s][A
  6%|██▏                                | 7597/122310 [17:40<4:49:27,  6.61it/s][A
  6%|██▏                                | 7603/122310 [17:41<4:54:55,  6.48

step: 960, loss: 179.12955192904522, epoch: 0



  6%|██▏                                | 7679/122310 [17:50<3:13:09,  9.89it/s][A
  6%|██▏                                | 7689/122310 [17:51<3:06:44, 10.23it/s][A
  6%|██▏                                | 7705/122310 [17:52<2:33:43, 12.43it/s][A
  6%|██▏                                | 7710/122310 [17:53<3:03:01, 10.44it/s][A
  6%|██▏                                | 7716/122310 [17:54<3:24:03,  9.36it/s][A
  6%|██▏                                | 7725/122310 [17:55<3:25:49,  9.28it/s][A
  6%|██▏                                | 7734/122310 [17:56<3:35:27,  8.86it/s][A
  6%|██▏                                | 7736/122310 [17:57<4:35:07,  6.94it/s][A
  6%|██▏                                | 7740/122310 [17:58<5:17:28,  6.01it/s][A
  6%|██▏                                | 7746/122310 [17:59<5:14:27,  6.07it/s][A
  6%|██▏                                | 7748/122310 [18:00<6:39:01,  4.78it/s][A
  6%|██▏                                | 7753/122310 [18:01<6:30:13,  4.89

step: 980, loss: 134.97437595524403, epoch: 0



  6%|██▏                                | 7814/122310 [18:10<5:05:32,  6.25it/s][A
  6%|██▏                                | 7824/122310 [18:11<4:17:34,  7.41it/s][A
  6%|██▏                                | 7836/122310 [18:12<3:30:57,  9.04it/s][A
  6%|██▏                                | 7847/122310 [18:13<3:12:03,  9.93it/s][A
  6%|██▏                                | 7852/122310 [18:14<3:40:12,  8.66it/s][A
  6%|██▎                                | 7863/122310 [18:14<3:18:47,  9.60it/s][A
  6%|██▎                                | 7872/122310 [18:15<3:19:18,  9.57it/s][A
  6%|██▎                                | 7884/122310 [18:17<3:19:25,  9.56it/s][A
  6%|██▎                                | 7889/122310 [18:18<4:16:44,  7.43it/s][A
  6%|██▎                                | 7894/122310 [18:19<5:05:23,  6.24it/s][A
  6%|██▎                                | 7902/122310 [18:21<5:01:07,  6.33it/s][A
  6%|██▎                                | 7915/122310 [18:22<4:05:38,  7.76

step: 1000, loss: 187.8163184636301, epoch: 0
saving weights



  7%|██▎                                | 7989/122310 [18:32<5:58:50,  5.31it/s][A
  7%|██▎                                | 8002/122310 [18:34<4:41:37,  6.76it/s][A
  7%|██▎                                | 8014/122310 [18:35<4:07:43,  7.69it/s][A
  7%|██▎                                | 8021/122310 [18:36<4:25:17,  7.18it/s][A
  7%|██▎                                | 8027/122310 [18:37<4:46:25,  6.65it/s][A
  7%|██▎                                | 8039/122310 [18:38<4:07:19,  7.70it/s][A
  7%|██▎                                | 8047/122310 [18:39<4:14:54,  7.47it/s][A
  7%|██▎                                | 8052/122310 [18:41<4:49:32,  6.58it/s][A
  7%|██▎                                | 8065/122310 [18:42<4:01:33,  7.88it/s][A
  7%|██▎                                | 8077/122310 [18:43<3:40:44,  8.62it/s][A
  7%|██▎                                | 8090/122310 [18:44<3:23:12,  9.37it/s][A
  7%|██▎                                | 8099/122310 [18:45<3:35:01,  8.85

step: 1020, loss: 154.08895936425515, epoch: 0



  7%|██▎                                | 8202/122310 [18:57<4:40:13,  6.79it/s][A
  7%|██▎                                | 8205/122310 [18:58<5:29:16,  5.78it/s][A
  7%|██▎                                | 8214/122310 [18:59<5:01:46,  6.30it/s][A
  7%|██▎                                | 8224/122310 [19:01<4:35:32,  6.90it/s][A
  7%|██▎                                | 8231/122310 [19:02<4:46:31,  6.64it/s][A
  7%|██▎                                | 8237/122310 [19:03<5:07:23,  6.18it/s][A
  7%|██▎                                | 8239/122310 [19:04<6:24:38,  4.94it/s][A
  7%|██▎                                | 8249/122310 [19:05<5:15:55,  6.02it/s][A
  7%|██▎                                | 8257/122310 [19:06<5:04:39,  6.24it/s][A
  7%|██▎                                | 8265/122310 [19:08<4:56:40,  6.41it/s][A
  7%|██▎                                | 8267/122310 [19:09<6:14:48,  5.07it/s][A
  7%|██▎                                | 8275/122310 [19:10<5:35:12,  5.67

step: 1040, loss: 134.1811420977373, epoch: 0



  7%|██▍                                | 8353/122310 [19:20<4:03:45,  7.79it/s][A
  7%|██▍                                | 8364/122310 [19:21<3:49:02,  8.29it/s][A
  7%|██▍                                | 8372/122310 [19:23<4:02:09,  7.84it/s][A
  7%|██▍                                | 8387/122310 [19:24<3:23:37,  9.32it/s][A
  7%|██▍                                | 8390/122310 [19:25<4:21:10,  7.27it/s][A
  7%|██▍                                | 8395/122310 [19:26<4:56:50,  6.40it/s][A
  7%|██▍                                | 8409/122310 [19:27<3:56:16,  8.03it/s][A
  7%|██▍                                | 8419/122310 [19:29<3:52:51,  8.15it/s][A
  7%|██▍                                | 8431/122310 [19:30<3:36:11,  8.78it/s][A
  7%|██▍                                | 8440/122310 [19:31<3:44:50,  8.44it/s][A
  7%|██▍                                | 8450/122310 [19:32<3:45:55,  8.40it/s][A
  7%|██▍                                | 8461/122310 [19:33<3:37:20,  8.73

step: 1060, loss: 167.92702165419374, epoch: 0



  7%|██▍                                | 8523/122310 [19:44<4:18:51,  7.33it/s][A
  7%|██▍                                | 8532/122310 [19:45<4:16:50,  7.38it/s][A
  7%|██▍                                | 8533/122310 [19:46<5:37:57,  5.61it/s][A
  7%|██▍                                | 8544/122310 [19:47<4:41:05,  6.75it/s][A
  7%|██▍                                | 8552/122310 [19:48<4:39:39,  6.78it/s][A
  7%|██▍                                | 8559/122310 [19:50<4:52:06,  6.49it/s][A
  7%|██▍                                | 8570/122310 [19:51<4:16:59,  7.38it/s][A
  7%|██▍                                | 8577/122310 [19:52<4:34:26,  6.91it/s][A
  7%|██▍                                | 8584/122310 [19:53<4:44:49,  6.65it/s][A
  7%|██▍                                | 8590/122310 [19:54<5:03:30,  6.24it/s][A
  7%|██▍                                | 8603/122310 [19:55<4:05:16,  7.73it/s][A
  7%|██▍                                | 8619/122310 [19:57<3:18:32,  9.54

step: 1080, loss: 135.8179040890581, epoch: 0



  7%|██▍                                | 8685/122310 [20:07<4:34:45,  6.89it/s][A
  7%|██▍                                | 8694/122310 [20:08<4:26:24,  7.11it/s][A
  7%|██▍                                | 8704/122310 [20:09<4:10:47,  7.55it/s][A
  7%|██▍                                | 8718/122310 [20:11<3:32:50,  8.89it/s][A
  7%|██▍                                | 8729/122310 [20:12<3:28:07,  9.10it/s][A
  7%|██▍                                | 8736/122310 [20:13<3:53:06,  8.12it/s][A
  7%|██▌                                | 8749/122310 [20:14<3:29:54,  9.02it/s][A
  7%|██▌                                | 8755/122310 [20:15<3:58:42,  7.93it/s][A
  7%|██▌                                | 8765/122310 [20:16<3:56:07,  8.01it/s][A
  7%|██▌                                | 8769/122310 [20:18<4:44:51,  6.64it/s][A
  7%|██▌                                | 8793/122310 [20:19<2:54:16, 10.86it/s][A
  7%|██▌                                | 8801/122310 [20:20<3:16:26,  9.63

step: 1100, loss: 154.8771934822498, epoch: 0



  7%|██▌                                | 8869/122310 [20:30<4:30:42,  6.98it/s][A
  7%|██▌                                | 8879/122310 [20:31<4:13:01,  7.47it/s][A
  7%|██▌                                | 8889/122310 [20:32<4:02:14,  7.80it/s][A
  7%|██▌                                | 8898/122310 [20:34<4:03:26,  7.76it/s][A
  7%|██▌                                | 8903/122310 [20:35<4:38:37,  6.78it/s][A
  7%|██▌                                | 8909/122310 [20:36<5:01:07,  6.28it/s][A
  7%|██▌                                | 8912/122310 [20:37<6:07:01,  5.15it/s][A
  7%|██▌                                | 8926/122310 [20:38<4:22:53,  7.19it/s][A
  7%|██▌                                | 8934/122310 [20:40<4:31:06,  6.97it/s][A
  7%|██▌                                | 8943/122310 [20:41<4:19:29,  7.28it/s][A
  7%|██▌                                | 8952/122310 [20:42<4:15:01,  7.41it/s][A
  7%|██▌                                | 8955/122310 [20:43<4:53:18,  6.44

step: 1120, loss: 144.5233884184897, epoch: 0



  7%|██▌                                | 9025/122310 [20:52<4:53:08,  6.44it/s][A
  7%|██▌                                | 9041/122310 [20:53<3:29:33,  9.01it/s][A
  7%|██▌                                | 9043/122310 [20:54<4:24:34,  7.14it/s][A
  7%|██▌                                | 9052/122310 [20:55<4:06:56,  7.64it/s][A
  7%|██▌                                | 9057/122310 [20:56<4:32:48,  6.92it/s][A
  7%|██▌                                | 9065/122310 [20:57<4:14:38,  7.41it/s][A
  7%|██▌                                | 9074/122310 [20:58<3:55:33,  8.01it/s][A
  7%|██▌                                | 9086/122310 [20:59<3:23:14,  9.29it/s][A
  7%|██▌                                | 9095/122310 [21:00<3:27:14,  9.11it/s][A
  7%|██▌                                | 9107/122310 [21:01<3:05:07, 10.19it/s][A
  7%|██▌                                | 9120/122310 [21:02<2:47:34, 11.26it/s][A
  7%|██▌                                | 9132/122310 [21:03<2:41:05, 11.71

step: 1140, loss: 114.2175295106956, epoch: 0



  8%|██▋                                | 9232/122310 [21:11<2:18:02, 13.65it/s][A
  8%|██▋                                | 9246/122310 [21:12<2:12:24, 14.23it/s][A
  8%|██▋                                | 9260/122310 [21:13<2:09:03, 14.60it/s][A
  8%|██▋                                | 9267/122310 [21:14<2:31:35, 12.43it/s][A
  8%|██▋                                | 9271/122310 [21:15<3:07:03, 10.07it/s][A
  8%|██▋                                | 9278/122310 [21:16<3:21:21,  9.36it/s][A
  8%|██▋                                | 9289/122310 [21:17<3:05:07, 10.18it/s][A
  8%|██▋                                | 9295/122310 [21:18<3:26:38,  9.12it/s][A
  8%|██▋                                | 9297/122310 [21:19<5:47:14,  5.42it/s][A
  8%|██▋                                | 9302/122310 [21:20<5:46:36,  5.43it/s][A
  8%|██▋                                | 9306/122310 [21:21<6:04:42,  5.16it/s][A
  8%|██▋                                | 9316/122310 [21:22<4:38:38,  6.76

step: 1160, loss: 136.60232746451925, epoch: 0



  8%|██▋                                | 9373/122310 [21:30<4:00:11,  7.84it/s][A
  8%|██▋                                | 9381/122310 [21:31<3:56:33,  7.96it/s][A
  8%|██▋                                | 9389/122310 [21:32<3:51:36,  8.13it/s][A
  8%|██▋                                | 9398/122310 [21:33<3:35:04,  8.75it/s][A
  8%|██▋                                | 9401/122310 [21:33<4:22:01,  7.18it/s][A
  8%|██▋                                | 9408/122310 [21:34<4:19:48,  7.24it/s][A
  8%|██▋                                | 9418/122310 [21:35<3:46:54,  8.29it/s][A
  8%|██▋                                | 9424/122310 [21:36<4:07:41,  7.60it/s][A
  8%|██▋                                | 9437/122310 [21:37<3:19:12,  9.44it/s][A
  8%|██▋                                | 9444/122310 [21:38<3:32:36,  8.85it/s][A
  8%|██▋                                | 9448/122310 [21:39<4:12:36,  7.45it/s][A
  8%|██▋                                | 9454/122310 [21:40<4:22:42,  7.16

step: 1180, loss: 129.7834066980135, epoch: 0



  8%|██▋                                | 9528/122310 [21:49<3:38:30,  8.60it/s][A
  8%|██▋                                | 9537/122310 [21:50<3:34:31,  8.76it/s][A
  8%|██▋                                | 9548/122310 [21:51<3:15:37,  9.61it/s][A
  8%|██▋                                | 9558/122310 [21:51<3:10:07,  9.88it/s][A
  8%|██▋                                | 9572/122310 [21:52<2:46:06, 11.31it/s][A
  8%|██▋                                | 9579/122310 [21:53<3:05:19, 10.14it/s][A
  8%|██▋                                | 9583/122310 [21:54<3:45:18,  8.34it/s][A
  8%|██▋                                | 9592/122310 [21:55<3:40:58,  8.50it/s][A
  8%|██▋                                | 9601/122310 [21:56<3:33:04,  8.82it/s][A
  8%|██▊                                | 9611/122310 [21:57<3:23:54,  9.21it/s][A
  8%|██▊                                | 9627/122310 [21:58<2:42:14, 11.58it/s][A
  8%|██▊                                | 9637/122310 [21:59<2:44:54, 11.39

step: 1200, loss: 1078.3754225550383, epoch: 0
sim1 and sim2 are 0.4449996699898632, 0.08849512026239267
cosine of pred and queen: -0.005000015193760531
Actual: athens:greece::madrid:spain, pred: altafaj
Actual: bangkok:thailand::islamabad:pakistan, pred: henriksen
Actual: beijing:china::tokyo:japan, pred: prooijen
Actual: berlin:germany::rome:italy, pred: statutory
Actual: cairo:egypt::ottawa:canada, pred: enver
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: gas
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: unintegrated
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: lockdown
Actual: china:asia::greece:europe, pred: peaches
Actual: nigeria:africa::france:europe, pred: surprisal
Actual: kenya:africa::netherlands:europe, pred: hovered
Actual: mumbai:asia::nairobi:africa, p


  8%|██▊                                | 9691/122310 [22:20<2:58:09, 10.54it/s][A

Actual: chhattisgarh:raipur::assam:dispur, pred: quaint
Actual: goa:panaji::rajasthan:jaipur, pred: mayflowers
Actual: jharkhand:ranchi::punjab:chandigarh, pred: rasmussen
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: deccan
Actual: india:delhi::serbia:belgrade, pred: archway
Actual: spain:spanish::korea:korean, pred: devour
Actual: syria:arabic::australia:english, pred: one
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: steffen
Actual: argentina:peso::russia:ruble, pred: starers
Actual: armenia:dram::iran:rial, pred: shemaah
Actual: brazil:real::sweden:krona, pred: exacerbate
Actual: europe:euro::japan:yen, pred: thornes
Actual: india:rupee::denmark:krone, pred: jehizkiah
Actual: usa:dollar::nigeria:naira, pred: strides
Actual: switzerland:swiss::spain:spanish, pred: limping
Actual: thailand:thai::india:indian, pred: brasserie
Actual: sweden:swedish::netherlands:dutch, pred: pbs
Actual: russia:russian::germany:german, pred: 


  8%|██▌                              | 9693/122310 [23:42<127:36:54,  4.08s/it][A

Actual: india:rupee::denmark:krone, pred: jehizkiah
Accuracy is 0.0



  8%|██▋                               | 9700/122310 [23:43<91:53:19,  2.94s/it][A
  8%|██▋                               | 9705/122310 [23:44<72:13:17,  2.31s/it][A
  8%|██▋                               | 9716/122310 [23:45<43:08:03,  1.38s/it][A
  8%|██▋                               | 9723/122310 [23:46<32:20:33,  1.03s/it][A
  8%|██▋                               | 9727/122310 [23:46<27:40:06,  1.13it/s][A
  8%|██▋                               | 9737/122310 [23:47<17:55:35,  1.74it/s][A
  8%|██▋                               | 9745/122310 [23:48<13:35:11,  2.30it/s][A
  8%|██▋                               | 9752/122310 [23:49<11:04:56,  2.82it/s][A
  8%|██▊                                | 9771/122310 [23:50<6:10:38,  5.06it/s][A
  8%|██▊                                | 9775/122310 [23:51<6:26:17,  4.86it/s][A
  8%|██▊                                | 9785/122310 [23:52<5:20:57,  5.84it/s][A
  8%|██▊                                | 9788/122310 [23:53<5:51:09,  5.34

step: 1220, loss: 122.645141602691, epoch: 0



  8%|██▊                                | 9864/122310 [24:02<4:37:09,  6.76it/s][A
  8%|██▊                                | 9872/122310 [24:03<4:17:29,  7.28it/s][A
  8%|██▊                                | 9879/122310 [24:04<4:16:57,  7.29it/s][A
  8%|██▊                                | 9888/122310 [24:05<3:56:16,  7.93it/s][A
  8%|██▊                                | 9892/122310 [24:06<4:33:17,  6.86it/s][A
  8%|██▊                                | 9900/122310 [24:07<4:19:13,  7.23it/s][A
  8%|██▊                                | 9906/122310 [24:08<4:26:55,  7.02it/s][A
  8%|██▊                                | 9910/122310 [24:09<5:02:12,  6.20it/s][A
  8%|██▊                                | 9917/122310 [24:09<4:45:53,  6.55it/s][A
  8%|██▊                                | 9923/122310 [24:10<4:48:35,  6.49it/s][A
  8%|██▊                                | 9932/122310 [24:11<4:14:18,  7.37it/s][A
  8%|██▊                                | 9941/122310 [24:12<3:52:03,  8.07

step: 1240, loss: 125.34239143674844, epoch: 0



  8%|██▊                               | 10025/122310 [24:21<2:31:56, 12.32it/s][A
  8%|██▊                               | 10036/122310 [24:22<2:30:52, 12.40it/s][A
  8%|██▊                               | 10047/122310 [24:22<2:30:03, 12.47it/s][A
  8%|██▊                               | 10055/122310 [24:23<2:42:09, 11.54it/s][A
  8%|██▊                               | 10065/122310 [24:24<2:43:13, 11.46it/s][A
  8%|██▊                               | 10075/122310 [24:25<2:43:23, 11.45it/s][A
  8%|██▊                               | 10081/122310 [24:26<3:05:13, 10.10it/s][A
  8%|██▊                               | 10087/122310 [24:27<3:24:53,  9.13it/s][A
  8%|██▊                               | 10099/122310 [24:28<2:57:36, 10.53it/s][A
  8%|██▊                               | 10113/122310 [24:29<2:33:37, 12.17it/s][A
  8%|██▊                               | 10123/122310 [24:29<2:36:08, 11.98it/s][A
  8%|██▊                               | 10132/122310 [24:30<2:43:00, 11.47

step: 1260, loss: 158.17675271750915, epoch: 0



  8%|██▊                               | 10215/122310 [24:38<2:35:33, 12.01it/s][A
  8%|██▊                               | 10223/122310 [24:39<2:48:17, 11.10it/s][A
  8%|██▊                               | 10232/122310 [24:40<2:50:37, 10.95it/s][A
  8%|██▊                               | 10250/122310 [24:41<2:14:41, 13.87it/s][A
  8%|██▊                               | 10257/122310 [24:42<2:34:21, 12.10it/s][A
  8%|██▊                               | 10261/122310 [24:43<3:09:16,  9.87it/s][A
  8%|██▊                               | 10277/122310 [24:43<2:30:10, 12.43it/s][A
  8%|██▊                               | 10286/122310 [24:44<2:37:58, 11.82it/s][A
  8%|██▊                               | 10293/122310 [24:45<2:54:52, 10.68it/s][A
  8%|██▊                               | 10300/122310 [24:46<3:09:18,  9.86it/s][A
  8%|██▊                               | 10305/122310 [24:47<3:36:48,  8.61it/s][A
  8%|██▊                               | 10316/122310 [24:48<3:10:17,  9.81

step: 1280, loss: 116.22695010735697, epoch: 0



  8%|██▉                               | 10381/122310 [24:56<3:33:48,  8.72it/s][A
  8%|██▉                               | 10389/122310 [24:57<3:31:31,  8.82it/s][A
  8%|██▉                               | 10394/122310 [24:57<3:55:28,  7.92it/s][A
  9%|██▉                               | 10405/122310 [24:58<3:19:39,  9.34it/s][A
  9%|██▉                               | 10413/122310 [24:59<3:20:41,  9.29it/s][A
  9%|██▉                               | 10416/122310 [25:00<4:07:50,  7.52it/s][A
  9%|██▉                               | 10423/122310 [25:01<4:02:57,  7.68it/s][A
  9%|██▉                               | 10429/122310 [25:02<4:10:51,  7.43it/s][A
  9%|██▉                               | 10431/122310 [25:03<5:14:39,  5.93it/s][A
  9%|██▉                               | 10441/122310 [25:04<4:13:00,  7.37it/s][A
  9%|██▉                               | 10446/122310 [25:04<4:34:35,  6.79it/s][A
  9%|██▉                               | 10458/122310 [25:05<3:34:43,  8.68

step: 1300, loss: 167.21455289507648, epoch: 0



  9%|██▉                               | 10552/122310 [25:14<2:27:09, 12.66it/s][A
  9%|██▉                               | 10561/122310 [25:15<2:38:45, 11.73it/s][A
  9%|██▉                               | 10567/122310 [25:16<3:13:16,  9.64it/s][A
  9%|██▉                               | 10572/122310 [25:17<3:43:36,  8.33it/s][A
  9%|██▉                               | 10581/122310 [25:18<3:34:08,  8.70it/s][A
  9%|██▉                               | 10592/122310 [25:19<3:20:58,  9.26it/s][A
  9%|██▉                               | 10603/122310 [25:20<3:07:20,  9.94it/s][A
  9%|██▉                               | 10617/122310 [25:21<2:41:56, 11.50it/s][A
  9%|██▉                               | 10625/122310 [25:22<2:53:03, 10.76it/s][A
  9%|██▉                               | 10632/122310 [25:22<3:09:46,  9.81it/s][A
  9%|██▉                               | 10639/122310 [25:23<3:21:59,  9.21it/s][A
  9%|██▉                               | 10644/122310 [25:24<3:53:10,  7.98

step: 1320, loss: 168.30406444347133, epoch: 0



  9%|██▉                               | 10703/122310 [25:33<4:41:00,  6.62it/s][A
  9%|██▉                               | 10718/122310 [25:34<3:16:43,  9.45it/s][A
  9%|██▉                              | 10721/122310 [25:40<11:31:52,  2.69it/s][A
  9%|██▉                               | 10744/122310 [25:41<5:32:31,  5.59it/s][A
  9%|██▉                               | 10756/122310 [25:42<4:36:35,  6.72it/s][A
  9%|██▉                               | 10763/122310 [25:43<4:29:35,  6.90it/s][A
  9%|██▉                               | 10768/122310 [25:44<4:42:01,  6.59it/s][A
  9%|██▉                               | 10774/122310 [25:45<4:45:21,  6.51it/s][A
  9%|██▉                               | 10785/122310 [25:46<4:01:49,  7.69it/s][A
  9%|███                               | 10794/122310 [25:47<3:43:40,  8.31it/s][A
  9%|███                               | 10802/122310 [25:48<3:42:37,  8.35it/s][A
  9%|███                               | 10820/122310 [25:49<2:40:22, 11.59

step: 1340, loss: 135.38340172959695, epoch: 0



  9%|███                               | 10852/122310 [25:52<2:44:33, 11.29it/s][A
  9%|███                               | 10856/122310 [25:52<3:17:16,  9.42it/s][A
  9%|███                               | 10868/122310 [25:53<2:58:52, 10.38it/s][A
  9%|███                               | 10881/122310 [25:54<2:42:48, 11.41it/s][A
  9%|███                               | 10891/122310 [25:55<2:45:11, 11.24it/s][A
  9%|███                               | 10897/122310 [25:56<3:12:40,  9.64it/s][A
  9%|███                               | 10903/122310 [25:57<3:32:20,  8.74it/s][A
  9%|███                               | 10922/122310 [25:58<2:31:15, 12.27it/s][A
  9%|███                               | 10924/122310 [25:59<3:22:53,  9.15it/s][A
  9%|███                               | 10928/122310 [26:00<4:02:44,  7.65it/s][A
  9%|███                               | 10930/122310 [26:01<5:15:31,  5.88it/s][A
  9%|███                               | 10937/122310 [26:02<4:46:33,  6.48

step: 1360, loss: 134.4039082092896, epoch: 0



  9%|███                               | 11024/122310 [26:10<3:28:56,  8.88it/s][A
  9%|███                               | 11032/122310 [26:11<3:26:25,  8.98it/s][A
  9%|███                               | 11042/122310 [26:11<3:10:35,  9.73it/s][A
  9%|███                               | 11044/122310 [26:12<4:06:30,  7.52it/s][A
  9%|███                               | 11053/122310 [26:13<3:41:46,  8.36it/s][A
  9%|███                               | 11055/122310 [26:14<4:43:59,  6.53it/s][A
  9%|███                               | 11063/122310 [26:15<4:12:48,  7.33it/s][A
  9%|███                               | 11068/122310 [26:16<4:30:06,  6.86it/s][A
  9%|███                               | 11075/122310 [26:17<4:17:36,  7.20it/s][A
  9%|███                               | 11089/122310 [26:18<3:07:49,  9.87it/s][A
  9%|███                               | 11094/122310 [26:18<3:35:54,  8.59it/s][A
  9%|███                               | 11101/122310 [26:19<3:39:52,  8.43

step: 1380, loss: 162.81650270132366, epoch: 0



  9%|███                               | 11175/122310 [26:27<3:00:20, 10.27it/s][A
  9%|███                               | 11179/122310 [26:28<3:35:19,  8.60it/s][A
  9%|███                               | 11187/122310 [26:29<3:30:48,  8.79it/s][A
  9%|███                               | 11212/122310 [26:30<2:05:46, 14.72it/s][A
  9%|███                               | 11215/122310 [26:31<2:43:28, 11.33it/s][A
  9%|███                               | 11219/122310 [26:32<3:18:11,  9.34it/s][A
  9%|███                               | 11229/122310 [26:32<3:05:47,  9.97it/s][A
  9%|███                               | 11238/122310 [26:33<3:03:01, 10.11it/s][A
  9%|███▏                              | 11251/122310 [26:34<2:40:46, 11.51it/s][A
  9%|███▏                              | 11267/122310 [26:35<2:16:54, 13.52it/s][A
  9%|███▏                              | 11277/122310 [26:36<2:24:01, 12.85it/s][A
  9%|███▏                              | 11285/122310 [26:37<2:36:27, 11.83

step: 1400, loss: 117.48070625226515, epoch: 0



  9%|███▏                              | 11354/122310 [26:45<3:07:41,  9.85it/s][A
  9%|███▏                              | 11365/122310 [26:46<2:53:50, 10.64it/s][A
  9%|███▏                              | 11367/122310 [26:46<3:47:24,  8.13it/s][A
  9%|███▏                              | 11372/122310 [26:47<4:09:44,  7.40it/s][A
  9%|███▏                              | 11375/122310 [26:48<4:57:13,  6.22it/s][A
  9%|███▏                              | 11380/122310 [26:49<5:04:45,  6.07it/s][A
  9%|███▏                              | 11392/122310 [26:50<3:41:15,  8.35it/s][A
  9%|███▏                              | 11401/122310 [26:51<3:26:29,  8.95it/s][A
  9%|███▏                              | 11410/122310 [26:53<4:16:36,  7.20it/s][A
  9%|███▏                              | 11422/122310 [26:53<3:29:37,  8.82it/s][A
  9%|███▏                              | 11427/122310 [26:54<3:50:27,  8.02it/s][A
  9%|███▏                              | 11430/122310 [26:55<4:31:59,  6.79

step: 1420, loss: 135.78313279569517, epoch: 0



  9%|███▏                              | 11494/122310 [27:02<3:03:17, 10.08it/s][A
  9%|███▏                              | 11507/122310 [27:03<2:40:33, 11.50it/s][A
  9%|███▏                              | 11516/122310 [27:04<2:46:08, 11.11it/s][A
  9%|███▏                              | 11523/122310 [27:05<3:00:47, 10.21it/s][A
  9%|███▏                              | 11536/122310 [27:06<2:39:35, 11.57it/s][A
  9%|███▏                              | 11545/122310 [27:07<2:45:09, 11.18it/s][A
  9%|███▏                              | 11557/122310 [27:07<2:34:16, 11.96it/s][A
  9%|███▏                              | 11571/122310 [27:08<2:20:04, 13.18it/s][A
  9%|███▏                              | 11576/122310 [27:09<2:49:06, 10.91it/s][A
  9%|███▏                              | 11592/122310 [27:10<2:20:21, 13.15it/s][A
  9%|███▏                              | 11604/122310 [27:11<2:18:26, 13.33it/s][A
 10%|███▏                              | 11621/122310 [27:12<2:01:28, 15.19

step: 1440, loss: 205.0488989664183, epoch: 0



 10%|███▏                              | 11682/122310 [27:20<3:14:18,  9.49it/s][A
 10%|███▏                              | 11689/122310 [27:20<3:23:15,  9.07it/s][A
 10%|███▎                              | 11699/122310 [27:21<3:08:21,  9.79it/s][A
 10%|███▎                              | 11710/122310 [27:22<2:53:13, 10.64it/s][A
 10%|███▎                              | 11718/122310 [27:23<3:00:43, 10.20it/s][A
 10%|███▎                              | 11728/122310 [27:24<2:54:06, 10.59it/s][A
 10%|███▎                              | 11736/122310 [27:25<3:01:49, 10.14it/s][A
 10%|███▎                              | 11752/122310 [27:26<2:26:32, 12.57it/s][A
 10%|███▎                              | 11759/122310 [27:27<2:43:58, 11.24it/s][A
 10%|███▎                              | 11766/122310 [27:27<2:59:33, 10.26it/s][A
 10%|███▎                              | 11772/122310 [27:28<3:20:04,  9.21it/s][A
 10%|███▎                              | 11775/122310 [27:30<5:18:12,  5.79

step: 1460, loss: 153.955050688419, epoch: 0



 10%|███▎                              | 11825/122310 [27:37<4:18:28,  7.12it/s][A
 10%|███▎                              | 11833/122310 [27:38<3:58:18,  7.73it/s][A
 10%|███▎                              | 11836/122310 [27:39<4:45:42,  6.44it/s][A
 10%|███▎                              | 11844/122310 [27:40<4:13:05,  7.27it/s][A
 10%|███▎                              | 11855/122310 [27:41<3:26:49,  8.90it/s][A
 10%|███▎                              | 11858/122310 [27:41<4:13:31,  7.26it/s][A
 10%|███▎                              | 11867/122310 [27:42<3:44:32,  8.20it/s][A
 10%|███▎                              | 11879/122310 [27:43<3:06:37,  9.86it/s][A
 10%|███▎                              | 11886/122310 [27:44<3:17:48,  9.30it/s][A
 10%|███▎                              | 11892/122310 [27:45<3:33:50,  8.61it/s][A
 10%|███▎                              | 11896/122310 [27:46<4:09:09,  7.39it/s][A
 10%|███▎                              | 11904/122310 [27:47<3:51:56,  7.93

step: 1480, loss: 116.65509740693676, epoch: 0



 10%|███▎                              | 11973/122310 [27:55<3:52:35,  7.91it/s][A
 10%|███▎                              | 11987/122310 [27:56<3:05:48,  9.90it/s][A
 10%|███▎                              | 11994/122310 [27:57<3:18:46,  9.25it/s][A
 10%|███▎                              | 11997/122310 [27:58<4:04:49,  7.51it/s][A
 10%|███▎                              | 12009/122310 [27:59<3:22:06,  9.10it/s][A
 10%|███▎                              | 12014/122310 [28:00<3:51:16,  7.95it/s][A
 10%|███▎                              | 12022/122310 [28:01<3:45:39,  8.15it/s][A
 10%|███▎                              | 12033/122310 [28:02<3:17:25,  9.31it/s][A
 10%|███▎                              | 12039/122310 [28:03<3:38:27,  8.41it/s][A
 10%|███▎                              | 12049/122310 [28:03<3:20:34,  9.16it/s][A
 10%|███▎                              | 12055/122310 [28:04<3:40:53,  8.32it/s][A
 10%|███▎                              | 12067/122310 [28:05<3:08:52,  9.73

step: 1500, loss: 114.64880962308125, epoch: 0



 10%|███▍                              | 12155/122310 [28:14<2:40:11, 11.46it/s][A
 10%|███▍                              | 12163/122310 [28:15<2:52:08, 10.66it/s][A
 10%|███▍                              | 12178/122310 [28:16<2:28:25, 12.37it/s][A
 10%|███▍                              | 12185/122310 [28:17<2:47:30, 10.96it/s][A
 10%|███▍                              | 12193/122310 [28:17<3:00:13, 10.18it/s][A
 10%|███▍                              | 12203/122310 [28:18<2:56:07, 10.42it/s][A
 10%|███▍                              | 12212/122310 [28:19<2:59:48, 10.21it/s][A
 10%|███▍                              | 12222/122310 [28:20<2:58:03, 10.30it/s][A
 10%|███▍                              | 12232/122310 [28:21<2:57:12, 10.35it/s][A
 10%|███▍                              | 12234/122310 [28:22<3:47:32,  8.06it/s][A
 10%|███▍                              | 12253/122310 [28:23<2:36:48, 11.70it/s][A
 10%|███▍                              | 12259/122310 [28:24<2:59:45, 10.20

step: 1520, loss: 166.42548515650708, epoch: 0



 10%|███▍                              | 12358/122310 [28:32<2:40:38, 11.41it/s][A
 10%|███▍                              | 12362/122310 [28:33<3:17:14,  9.29it/s][A
 10%|███▍                              | 12375/122310 [28:34<2:50:17, 10.76it/s][A
 10%|███▍                              | 12382/122310 [28:35<3:06:20,  9.83it/s][A
 10%|███▍                              | 12387/122310 [28:40<7:55:33,  3.85it/s][A
 10%|███▍                              | 12391/122310 [28:41<7:44:53,  3.94it/s][A
 10%|███▍                              | 12401/122310 [28:42<5:50:50,  5.22it/s][A
 10%|███▍                              | 12410/122310 [28:42<4:56:46,  6.17it/s][A
 10%|███▍                              | 12423/122310 [28:43<3:48:36,  8.01it/s][A
 10%|███▍                              | 12431/122310 [28:44<3:48:56,  8.00it/s][A
 10%|███▍                              | 12447/122310 [28:45<3:00:32, 10.14it/s][A
 10%|███▍                              | 12456/122310 [28:46<3:06:52,  9.80

step: 1540, loss: 140.11924900700862, epoch: 0



 10%|███▍                              | 12492/122310 [28:51<3:32:45,  8.60it/s][A
 10%|███▍                              | 12507/122310 [28:52<2:47:09, 10.95it/s][A
 10%|███▍                              | 12516/122310 [28:53<2:50:47, 10.71it/s][A
 10%|███▍                              | 12522/122310 [28:54<3:12:46,  9.49it/s][A
 10%|███▍                              | 12532/122310 [28:55<3:01:41, 10.07it/s][A
 10%|███▍                              | 12542/122310 [28:56<2:55:50, 10.40it/s][A
 10%|███▍                              | 12551/122310 [28:58<3:52:57,  7.85it/s][A
 10%|███▍                              | 12555/122310 [28:58<4:18:28,  7.08it/s][A
 10%|███▍                              | 12565/122310 [28:59<3:43:56,  8.17it/s][A
 10%|███▍                              | 12577/122310 [29:00<3:10:12,  9.61it/s][A
 10%|███▍                              | 12590/122310 [29:01<2:46:38, 10.97it/s][A
 10%|███▌                              | 12599/122310 [29:02<2:49:51, 10.76

step: 1560, loss: 130.27953265356925, epoch: 0



 10%|███▌                              | 12665/122310 [29:09<2:48:51, 10.82it/s][A
 10%|███▌                              | 12675/122310 [29:10<2:50:45, 10.70it/s][A
 10%|███▌                              | 12679/122310 [29:11<3:25:30,  8.89it/s][A
 10%|███▌                              | 12691/122310 [29:12<3:00:59, 10.09it/s][A
 10%|███▌                              | 12695/122310 [29:13<3:39:08,  8.34it/s][A
 10%|███▌                              | 12705/122310 [29:14<3:21:59,  9.04it/s][A
 10%|███▌                              | 12716/122310 [29:15<3:06:03,  9.82it/s][A
 10%|███▌                              | 12723/122310 [29:16<3:19:43,  9.15it/s][A
 10%|███▌                              | 12729/122310 [29:17<3:38:00,  8.38it/s][A
 10%|███▌                              | 12731/122310 [29:18<4:41:23,  6.49it/s][A
 10%|███▌                              | 12738/122310 [29:19<4:29:11,  6.78it/s][A
 10%|███▌                              | 12750/122310 [29:20<3:34:31,  8.51

step: 1580, loss: 132.48880797346655, epoch: 0



 10%|███▌                              | 12826/122310 [29:28<4:37:57,  6.56it/s][A
 10%|███▌                              | 12834/122310 [29:29<4:16:19,  7.12it/s][A
 10%|███▌                              | 12838/122310 [29:30<4:49:06,  6.31it/s][A
 11%|███▌                              | 12847/122310 [29:31<4:06:23,  7.40it/s][A
 11%|███▌                              | 12855/122310 [29:32<3:51:10,  7.89it/s][A
 11%|███▌                              | 12863/122310 [29:33<3:41:01,  8.25it/s][A
 11%|███▌                              | 12879/122310 [29:34<2:42:40, 11.21it/s][A
 11%|███▌                              | 12889/122310 [29:35<2:42:57, 11.19it/s][A
 11%|███▌                              | 12897/122310 [29:35<2:53:46, 10.49it/s][A
 11%|███▌                              | 12906/122310 [29:36<2:54:57, 10.42it/s][A
 11%|███▌                              | 12915/122310 [29:37<2:58:45, 10.20it/s][A
 11%|███▌                              | 12925/122310 [29:38<2:58:41, 10.20

step: 1600, loss: 156.53631461267432, epoch: 0
sim1 and sim2 are 0.5236140945250137, -0.05483556742128631
cosine of pred and queen: -0.09465211464783983
Actual: athens:greece::madrid:spain, pred: rest
Actual: bangkok:thailand::islamabad:pakistan, pred: henriksen
Actual: beijing:china::tokyo:japan, pred: prooijen
Actual: berlin:germany::rome:italy, pred: statutory
Actual: cairo:egypt::ottawa:canada, pred: enver
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: china
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: unintegrated
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: folded
Actual: china:asia::greece:europe, pred: peaches
Actual: nigeria:africa::france:europe, pred: could
Actual: kenya:africa::netherlands:europe, pred: hovered
Actual: mumbai:asia::nairobi:africa, pred: ba

Actual: convenient:inconvenient::convincing:unconvincing, pred: lim
Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: unto
Actual: islamabad:pakistan::oslo:norway, pred: colonization
Actual: grandfather:grandmother::father:mother, pred: bring
Actual: grandpa:grandma::sons:daughters, pred: children
Actual: king:queen::husband:wife, pred: turkeys
Actual: man:woman::brothers:sisters, pred: nidhi
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: efficiency
Actual: fortunate:fortunately::efficient:efficiently, pred: kandla
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: year
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: whistlingly
Actual: tripura:agartala::odisha:bhubaneswar, pred: ervadi
Actual: algeria:dinar::japan:yen, pred: holyhead



 11%|███▍                            | 12985/122310 [31:27<103:51:11,  3.42s/it][A
 11%|███▌                             | 12993/122310 [31:28<76:32:05,  2.52s/it][A
 11%|███▌                             | 13000/122310 [31:29<58:06:00,  1.91s/it][A
 11%|███▌                             | 13011/122310 [31:30<37:54:21,  1.25s/it][A
 11%|███▌                             | 13020/122310 [31:31<27:30:20,  1.10it/s][A
 11%|███▌                             | 13023/122310 [31:32<25:12:41,  1.20it/s][A
 11%|███▌                             | 13031/122310 [31:33<18:11:49,  1.67it/s][A
 11%|███▌                             | 13034/122310 [31:34<16:50:58,  1.80it/s][A
 11%|███▌                             | 13046/122310 [31:34<10:11:31,  2.98it/s][A
 11%|███▋                              | 13050/122310 [31:35<9:34:30,  3.17it/s][A
 11%|███▋                              | 13057/122310 [31:36<7:49:41,  3.88it/s][A
 11%|███▌                             | 13059/122310 [31:39<11:29:07,  2.64

step: 1620, loss: 134.3490361684143, epoch: 0



 11%|███▋                              | 13111/122310 [31:45<4:29:49,  6.75it/s][A
 11%|███▋                              | 13118/122310 [31:46<4:20:57,  6.97it/s][A
 11%|███▋                              | 13123/122310 [31:47<4:36:27,  6.58it/s][A
 11%|███▋                              | 13134/122310 [31:48<3:40:53,  8.24it/s][A
 11%|███▋                              | 13142/122310 [31:49<3:36:57,  8.39it/s][A
 11%|███▋                              | 13154/122310 [31:53<5:52:59,  5.15it/s][A
 11%|███▋                              | 13157/122310 [31:53<6:15:18,  4.85it/s][A
 11%|███▋                              | 13164/122310 [31:54<5:33:35,  5.45it/s][A
 11%|███▋                              | 13169/122310 [31:55<5:34:13,  5.44it/s][A
 11%|███▋                              | 13176/122310 [31:56<5:01:41,  6.03it/s][A
 11%|███▋                              | 13186/122310 [31:57<4:06:40,  7.37it/s][A
 11%|███▋                              | 13195/122310 [31:58<3:44:57,  8.08

step: 1640, loss: 128.03931015307043, epoch: 0



 11%|███▋                              | 13244/122310 [32:03<3:29:52,  8.66it/s][A
 11%|███▋                              | 13249/122310 [32:04<3:54:43,  7.74it/s][A
 11%|███▋                              | 13259/122310 [32:05<3:26:45,  8.79it/s][A
 11%|███▋                              | 13271/122310 [32:06<2:58:22, 10.19it/s][A
 11%|███▋                              | 13284/122310 [32:07<2:38:38, 11.45it/s][A
 11%|███▋                              | 13288/122310 [32:08<3:13:05,  9.41it/s][A
 11%|███▋                              | 13297/122310 [32:09<3:09:07,  9.61it/s][A
 11%|███▋                              | 13314/122310 [32:09<2:25:37, 12.47it/s][A
 11%|███▋                              | 13329/122310 [32:10<2:12:05, 13.75it/s][A
 11%|███▋                              | 13339/122310 [32:11<2:19:44, 13.00it/s][A
 11%|███▋                              | 13351/122310 [32:12<2:18:35, 13.10it/s][A
 11%|███▋                              | 13361/122310 [32:13<2:24:11, 12.59

step: 1660, loss: 126.35321551347384, epoch: 0



 11%|███▋                              | 13418/122310 [32:21<4:48:47,  6.28it/s][A
 11%|███▋                              | 13427/122310 [32:22<4:10:47,  7.24it/s][A
 11%|███▋                              | 13443/122310 [32:23<3:00:32, 10.05it/s][A
 11%|███▋                              | 13445/122310 [32:24<3:49:11,  7.92it/s][A
 11%|███▋                              | 13458/122310 [32:25<3:05:15,  9.79it/s][A
 11%|███▋                              | 13466/122310 [32:26<3:09:41,  9.56it/s][A
 11%|███▋                              | 13472/122310 [32:26<3:27:34,  8.74it/s][A
 11%|███▋                              | 13480/122310 [32:27<3:26:03,  8.80it/s][A
 11%|███▋                              | 13484/122310 [32:28<4:00:49,  7.53it/s][A
 11%|███▊                              | 13496/122310 [32:29<3:14:57,  9.30it/s][A
 11%|███▊                              | 13505/122310 [32:30<3:10:08,  9.54it/s][A
 11%|███▊                              | 13510/122310 [32:31<3:37:47,  8.33

step: 1680, loss: 135.0478002668186, epoch: 0



 11%|███▊                              | 13571/122310 [32:39<4:19:15,  6.99it/s][A
 11%|███▊                              | 13580/122310 [32:40<3:48:52,  7.92it/s][A
 11%|███▊                              | 13587/122310 [32:41<3:49:25,  7.90it/s][A
 11%|███▊                              | 13603/122310 [32:42<2:46:27, 10.88it/s][A
 11%|███▊                              | 13612/122310 [32:43<2:51:53, 10.54it/s][A
 11%|███▊                              | 13625/122310 [32:44<2:37:04, 11.53it/s][A
 11%|███▊                              | 13635/122310 [32:44<2:39:34, 11.35it/s][A
 11%|███▊                              | 13639/122310 [32:45<3:14:15,  9.32it/s][A
 11%|███▊                              | 13643/122310 [32:46<3:49:43,  7.88it/s][A
 11%|███▊                              | 13650/122310 [32:47<3:58:03,  7.61it/s][A
 11%|███▊                              | 13658/122310 [32:48<3:54:15,  7.73it/s][A
 11%|███▊                              | 13668/122310 [32:49<3:38:46,  8.28

step: 1700, loss: 138.71395280134712, epoch: 0



 11%|███▊                              | 13718/122310 [32:59<6:36:56,  4.56it/s][A
 11%|███▊                              | 13726/122310 [33:01<5:52:48,  5.13it/s][A
 11%|███▊                              | 13738/122310 [33:02<4:43:27,  6.38it/s][A
 11%|███▊                              | 13749/122310 [33:03<4:11:20,  7.20it/s][A
 11%|███▊                              | 13756/122310 [33:04<4:21:44,  6.91it/s][A
 11%|███▊                              | 13763/122310 [33:06<4:37:09,  6.53it/s][A
 11%|███▊                              | 13773/122310 [33:07<4:09:51,  7.24it/s][A
 11%|███▊                              | 13786/122310 [33:08<3:35:35,  8.39it/s][A
 11%|███▊                              | 13803/122310 [33:09<2:49:19, 10.68it/s][A
 11%|███▊                              | 13809/122310 [33:10<3:11:39,  9.43it/s][A
 11%|███▊                              | 13816/122310 [33:11<3:26:02,  8.78it/s][A
 11%|███▊                              | 13830/122310 [33:12<2:54:47, 10.34

step: 1720, loss: 184.36516636037427, epoch: 0



 11%|███▊                              | 13904/122310 [33:21<3:18:36,  9.10it/s][A
 11%|███▊                              | 13913/122310 [33:22<3:15:42,  9.23it/s][A
 11%|███▊                              | 13920/122310 [33:23<3:34:12,  8.43it/s][A
 11%|███▊                              | 13923/122310 [33:24<4:21:05,  6.92it/s][A
 11%|███▊                              | 13927/122310 [33:25<4:57:37,  6.07it/s][A
 11%|███▊                              | 13938/122310 [33:26<4:03:39,  7.41it/s][A
 11%|███▉                              | 13941/122310 [33:27<4:49:10,  6.25it/s][A
 11%|███▉                              | 13950/122310 [33:28<4:15:04,  7.08it/s][A
 11%|███▉                              | 13966/122310 [33:29<3:03:00,  9.87it/s][A
 11%|███▉                              | 13970/122310 [33:30<3:40:50,  8.18it/s][A
 11%|███▉                              | 13981/122310 [33:31<3:17:45,  9.13it/s][A
 11%|███▉                              | 13983/122310 [33:32<4:15:48,  7.06

step: 1740, loss: 139.9435120452151, epoch: 0



 11%|███▉                              | 14046/122310 [33:41<5:23:25,  5.58it/s][A
 11%|███▉                              | 14049/122310 [33:42<6:33:04,  4.59it/s][A
 11%|███▉                              | 14060/122310 [33:43<4:48:18,  6.26it/s][A
 12%|███▉                              | 14067/122310 [33:44<4:44:18,  6.35it/s][A
 12%|███▉                              | 14080/122310 [33:45<3:37:31,  8.29it/s][A
 12%|███▉                              | 14093/122310 [33:46<3:05:22,  9.73it/s][A
 12%|███▉                              | 14095/122310 [33:48<4:18:00,  6.99it/s][A
 12%|███▉                              | 14107/122310 [33:49<3:47:58,  7.91it/s][A
 12%|███▉                              | 14117/122310 [33:50<3:39:28,  8.22it/s][A
 12%|███▉                              | 14128/122310 [33:51<3:23:19,  8.87it/s][A
 12%|███▉                              | 14132/122310 [33:52<4:04:13,  7.38it/s][A
 12%|███▉                              | 14139/122310 [33:53<4:21:46,  6.89

step: 1760, loss: 160.35529202948513, epoch: 0



 12%|███▉                              | 14203/122310 [34:03<4:09:28,  7.22it/s][A
 12%|███▉                              | 14209/122310 [34:04<4:23:46,  6.83it/s][A
 12%|███▉                              | 14215/122310 [34:05<4:34:28,  6.56it/s][A
 12%|███▉                              | 14224/122310 [34:06<4:06:28,  7.31it/s][A
 12%|███▉                              | 14227/122310 [34:07<4:51:55,  6.17it/s][A
 12%|███▉                              | 14234/122310 [34:08<4:38:18,  6.47it/s][A
 12%|███▉                              | 14238/122310 [34:09<5:08:54,  5.83it/s][A
 12%|███▉                              | 14247/122310 [34:10<4:38:36,  6.46it/s][A
 12%|███▉                              | 14255/122310 [34:11<4:20:59,  6.90it/s][A
 12%|███▉                              | 14267/122310 [34:12<3:31:54,  8.50it/s][A
 12%|███▉                              | 14274/122310 [34:13<3:42:41,  8.09it/s][A
 12%|███▉                              | 14288/122310 [34:14<3:04:18,  9.77

step: 1780, loss: 137.84269463081625, epoch: 0



 12%|███▉                              | 14350/122310 [34:23<4:11:51,  7.14it/s][A
 12%|███▉                              | 14356/122310 [34:24<4:20:41,  6.90it/s][A
 12%|███▉                              | 14365/122310 [34:25<3:55:59,  7.62it/s][A
 12%|███▉                              | 14384/122310 [34:26<2:52:11, 10.45it/s][A
 12%|███▉                              | 14389/122310 [34:27<3:22:59,  8.86it/s][A
 12%|████                              | 14403/122310 [34:28<2:52:05, 10.45it/s][A
 12%|████                              | 14411/122310 [34:29<3:10:35,  9.44it/s][A
 12%|████                              | 14424/122310 [34:30<2:52:23, 10.43it/s][A
 12%|████                              | 14433/122310 [34:31<3:04:59,  9.72it/s][A
 12%|████                              | 14442/122310 [34:32<3:06:28,  9.64it/s][A
 12%|████                              | 14458/122310 [34:33<2:32:26, 11.79it/s][A
 12%|████                              | 14472/122310 [34:34<2:21:25, 12.71

step: 1800, loss: 126.77085227029907, epoch: 0



 12%|████                              | 14558/122310 [34:43<2:56:11, 10.19it/s][A
 12%|████                              | 14565/122310 [34:44<3:11:00,  9.40it/s][A
 12%|████                              | 14576/122310 [34:45<2:59:48,  9.99it/s][A
 12%|████                              | 14586/122310 [34:46<3:10:43,  9.41it/s][A
 12%|████                              | 14590/122310 [34:48<4:00:12,  7.47it/s][A
 12%|████                              | 14598/122310 [34:49<3:59:29,  7.50it/s][A
 12%|████                              | 14608/122310 [34:50<3:44:32,  7.99it/s][A
 12%|████                              | 14620/122310 [34:51<3:17:28,  9.09it/s][A
 12%|████                              | 14626/122310 [34:52<3:46:23,  7.93it/s][A
 12%|████                              | 14630/122310 [34:53<4:43:50,  6.32it/s][A
 12%|████                              | 14633/122310 [34:54<5:51:27,  5.11it/s][A
 12%|████                              | 14638/122310 [34:56<6:17:22,  4.76

step: 1820, loss: 124.71954555743879, epoch: 0



 12%|████                              | 14724/122310 [35:06<3:37:22,  8.25it/s][A
 12%|████                              | 14731/122310 [35:07<4:07:59,  7.23it/s][A
 12%|████                              | 14732/122310 [35:08<5:30:33,  5.42it/s][A
 12%|████                              | 14749/122310 [35:09<3:41:42,  8.09it/s][A
 12%|████                              | 14759/122310 [35:11<3:44:47,  7.97it/s][A
 12%|████                              | 14771/122310 [35:12<3:27:32,  8.64it/s][A
 12%|████                              | 14773/122310 [35:13<4:31:42,  6.60it/s][A
 12%|████                              | 14788/122310 [35:14<3:38:00,  8.22it/s][A
 12%|████                              | 14793/122310 [35:16<4:14:37,  7.04it/s][A
 12%|████                              | 14803/122310 [35:17<3:59:16,  7.49it/s][A
 12%|████                              | 14815/122310 [35:18<3:43:55,  8.00it/s][A
 12%|████                              | 14826/122310 [35:19<3:34:17,  8.36

step: 1840, loss: 93.40530543419293, epoch: 0



 12%|████▏                             | 14886/122310 [35:29<3:42:07,  8.06it/s][A
 12%|████▏                             | 14894/122310 [35:30<3:45:31,  7.94it/s][A
 12%|████▏                             | 14899/122310 [35:31<4:13:31,  7.06it/s][A
 12%|████▏                             | 14907/122310 [35:32<4:03:42,  7.35it/s][A
 12%|████▏                             | 14917/122310 [35:33<3:38:16,  8.20it/s][A
 12%|████▏                             | 14925/122310 [35:34<3:44:12,  7.98it/s][A
 12%|████▏                             | 14937/122310 [35:35<3:22:06,  8.85it/s][A
 12%|████▏                             | 14949/122310 [35:36<3:00:22,  9.92it/s][A
 12%|████▏                             | 14956/122310 [35:37<3:12:10,  9.31it/s][A
 12%|████▏                             | 14962/122310 [35:38<3:38:23,  8.19it/s][A
 12%|████▏                             | 14971/122310 [35:39<3:38:35,  8.18it/s][A
 12%|████▏                             | 14985/122310 [35:40<3:00:22,  9.92

step: 1860, loss: 95.93435681690703, epoch: 0



 12%|████▏                             | 15059/122310 [35:50<3:20:41,  8.91it/s][A
 12%|████▏                             | 15070/122310 [35:51<3:09:04,  9.45it/s][A
 12%|████▏                             | 15079/122310 [35:52<3:11:48,  9.32it/s][A
 12%|████▏                             | 15086/122310 [35:53<3:26:07,  8.67it/s][A
 12%|████▏                             | 15093/122310 [35:54<3:34:12,  8.34it/s][A
 12%|████▏                             | 15106/122310 [35:55<3:05:56,  9.61it/s][A
 12%|████▏                             | 15113/122310 [35:56<3:24:54,  8.72it/s][A
 12%|████▏                             | 15126/122310 [35:57<3:00:11,  9.91it/s][A
 12%|████▏                             | 15136/122310 [35:58<2:59:15,  9.96it/s][A
 12%|████▏                             | 15145/122310 [35:59<3:04:17,  9.69it/s][A
 12%|████▏                             | 15155/122310 [36:00<3:01:01,  9.87it/s][A
 12%|████▏                             | 15158/122310 [36:01<3:56:56,  7.54

step: 1880, loss: 111.01822705224608, epoch: 0



 12%|████▏                             | 15251/122310 [36:10<3:12:41,  9.26it/s][A
 12%|████▏                             | 15256/122310 [36:11<3:46:33,  7.88it/s][A
 12%|████▏                             | 15268/122310 [36:12<3:16:58,  9.06it/s][A
 12%|████▏                             | 15280/122310 [36:13<2:59:38,  9.93it/s][A
 13%|████▎                             | 15292/122310 [36:14<2:49:18, 10.53it/s][A
 13%|████▎                             | 15298/122310 [36:15<3:21:55,  8.83it/s][A
 13%|████▎                             | 15302/122310 [36:16<4:06:53,  7.22it/s][A
 13%|████▎                             | 15306/122310 [36:17<4:50:17,  6.14it/s][A
 13%|████▎                             | 15314/122310 [36:19<4:33:53,  6.51it/s][A
 13%|████▎                             | 15321/122310 [36:19<4:23:29,  6.77it/s][A
 13%|████▎                             | 15331/122310 [36:20<3:51:57,  7.69it/s][A
 13%|████▎                             | 15341/122310 [36:22<3:38:41,  8.15

step: 1900, loss: 156.93898598980167, epoch: 0



 13%|████▎                             | 15400/122310 [36:31<4:29:19,  6.62it/s][A
 13%|████▎                             | 15406/122310 [36:32<4:34:28,  6.49it/s][A
 13%|████▎                             | 15415/122310 [36:33<4:08:25,  7.17it/s][A
 13%|████▎                             | 15423/122310 [36:34<4:02:35,  7.34it/s][A
 13%|████▎                             | 15435/122310 [36:35<3:25:35,  8.66it/s][A
 13%|████▎                             | 15443/122310 [36:36<3:26:39,  8.62it/s][A
 13%|████▎                             | 15447/122310 [36:37<4:10:20,  7.11it/s][A
 13%|████▎                             | 15452/122310 [36:39<4:48:42,  6.17it/s][A
 13%|████▎                             | 15465/122310 [36:40<3:38:36,  8.15it/s][A
 13%|████▎                             | 15475/122310 [36:41<3:25:31,  8.66it/s][A
 13%|████▎                             | 15488/122310 [36:42<2:59:14,  9.93it/s][A
 13%|████▎                             | 15494/122310 [36:43<3:35:12,  8.27

step: 1920, loss: 130.62990938648625, epoch: 0



 13%|████▎                             | 15578/122310 [36:52<2:53:03, 10.28it/s][A
 13%|████▎                             | 15592/122310 [36:53<2:43:55, 10.85it/s][A
 13%|████▎                             | 15608/122310 [36:54<2:26:41, 12.12it/s][A
 13%|████▎                             | 15623/122310 [36:56<2:22:16, 12.50it/s][A
 13%|████▎                             | 15633/122310 [36:57<2:33:09, 11.61it/s][A
 13%|████▎                             | 15635/122310 [36:58<3:28:09,  8.54it/s][A
 13%|████▎                             | 15641/122310 [36:59<3:48:20,  7.79it/s][A
 13%|████▎                             | 15646/122310 [37:00<4:14:21,  6.99it/s][A
 13%|████▎                             | 15655/122310 [37:01<3:56:18,  7.52it/s][A
 13%|████▎                             | 15660/122310 [37:02<4:23:27,  6.75it/s][A
 13%|████▎                             | 15667/122310 [37:03<4:18:51,  6.87it/s][A
 13%|████▎                             | 15670/122310 [37:04<5:07:17,  5.78

step: 1940, loss: 115.24839709423341, epoch: 0



 13%|████▍                             | 15740/122310 [37:13<3:48:12,  7.78it/s][A
 13%|████▍                             | 15749/122310 [37:14<3:39:25,  8.09it/s][A
 13%|████▍                             | 15757/122310 [37:15<3:35:51,  8.23it/s][A
 13%|████▍                             | 15767/122310 [37:16<3:18:54,  8.93it/s][A
 13%|████▍                             | 15776/122310 [37:17<3:17:08,  9.01it/s][A
 13%|████▍                             | 15795/122310 [37:18<2:24:07, 12.32it/s][A
 13%|████▍                             | 15804/122310 [37:19<2:33:17, 11.58it/s][A
 13%|████▍                             | 15812/122310 [37:20<2:46:51, 10.64it/s][A
 13%|████▍                             | 15817/122310 [37:21<3:17:07,  9.00it/s][A
 13%|████▍                             | 15829/122310 [37:22<2:54:15, 10.18it/s][A
 13%|████▍                             | 15836/122310 [37:23<3:09:59,  9.34it/s][A
 13%|████▍                             | 15843/122310 [37:24<3:25:16,  8.64

step: 1960, loss: 118.81544507405091, epoch: 0



 13%|████▍                             | 15896/122310 [37:32<5:08:49,  5.74it/s][A
 13%|████▍                             | 15908/122310 [37:33<3:46:19,  7.84it/s][A
 13%|████▍                             | 15923/122310 [37:34<2:52:03, 10.31it/s][A
 13%|████▍                             | 15931/122310 [37:35<3:01:00,  9.80it/s][A
 13%|████▍                             | 15943/122310 [37:36<2:46:41, 10.64it/s][A
 13%|████▍                             | 15947/122310 [37:37<3:24:37,  8.66it/s][A
 13%|████▍                             | 15952/122310 [37:38<3:50:12,  7.70it/s][A
 13%|████▍                             | 15959/122310 [37:39<3:51:37,  7.65it/s][A
 13%|████▍                             | 15962/122310 [37:40<4:39:39,  6.34it/s][A
 13%|████▍                             | 15973/122310 [37:41<3:41:45,  7.99it/s][A
 13%|████▍                             | 15985/122310 [37:43<4:02:47,  7.30it/s][A
 13%|████▍                             | 15994/122310 [37:44<3:44:30,  7.89

step: 1980, loss: 194.63377476383707, epoch: 0



 13%|████▍                             | 16060/122310 [37:52<3:56:09,  7.50it/s][A
 13%|████▍                             | 16069/122310 [37:53<3:41:56,  7.98it/s][A
 13%|████▍                             | 16073/122310 [37:54<4:09:14,  7.10it/s][A
 13%|████▍                             | 16090/122310 [37:55<3:00:35,  9.80it/s][A
 13%|████▍                             | 16100/122310 [37:56<3:01:03,  9.78it/s][A
 13%|████▍                             | 16105/122310 [37:57<3:28:19,  8.50it/s][A
 13%|████▍                             | 16110/122310 [37:58<3:57:09,  7.46it/s][A
 13%|████▍                             | 16113/122310 [37:59<4:45:37,  6.20it/s][A
 13%|████▍                             | 16116/122310 [38:00<5:37:52,  5.24it/s][A
 13%|████▍                             | 16120/122310 [38:01<5:59:08,  4.93it/s][A
 13%|████▍                             | 16129/122310 [38:02<4:42:05,  6.27it/s][A
 13%|████▍                             | 16136/122310 [38:03<4:31:25,  6.52

step: 2000, loss: 153.40271372766344, epoch: 0
sim1 and sim2 are 0.5808812161508992, 0.031423982640285626
cosine of pred and queen: 0.06807359158221606
Actual: athens:greece::madrid:spain, pred: rest
Actual: bangkok:thailand::islamabad:pakistan, pred: henriksen
Actual: beijing:china::tokyo:japan, pred: would
Actual: berlin:germany::rome:italy, pred: statutory
Actual: cairo:egypt::ottawa:canada, pred: enver
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: sent
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: unintegrated
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: blaze
Actual: china:asia::greece:europe, pred: peaches
Actual: nigeria:africa::france:europe, pred: would
Actual: kenya:africa::netherlands:europe, pred: resigns
Actual: mumbai:asia::nairobi:africa, pred: baleine


Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: unto
Actual: islamabad:pakistan::oslo:norway, pred: zemira
Actual: grandfather:grandmother::father:mother, pred: man
Actual: grandpa:grandma::sons:daughters, pred: children
Actual: king:queen::husband:wife, pred: spree
Actual: man:woman::brothers:sisters, pred: nidhi
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: efficiency
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: year
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: whistlingly
Actual: tripura:agartala::odisha:bhubaneswar, pred: ervadi
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: argentina:peso::japan:yen, pred: government
Actual: india:rupee::denma


 13%|████▏                           | 16191/122310 [39:56<124:26:16,  4.22s/it][A
 13%|████▎                            | 16199/122310 [39:57<87:22:25,  2.96s/it][A
 13%|████▎                            | 16203/122310 [39:58<72:51:37,  2.47s/it][A
 13%|████▎                            | 16215/122310 [39:59<42:08:21,  1.43s/it][A
 13%|████▍                            | 16219/122310 [40:01<38:14:12,  1.30s/it][A
 13%|████▍                            | 16225/122310 [40:02<29:07:01,  1.01it/s][A
 13%|████▍                            | 16232/122310 [40:03<21:14:52,  1.39it/s][A
 13%|████▍                            | 16239/122310 [40:04<15:51:46,  1.86it/s][A
 13%|████▍                            | 16249/122310 [40:05<10:45:32,  2.74it/s][A
 13%|████▌                             | 16260/122310 [40:06<7:35:35,  3.88it/s][A
 13%|████▌                             | 16266/122310 [40:07<6:52:22,  4.29it/s][A
 13%|████▌                             | 16270/122310 [40:07<6:46:09,  4.35

step: 2020, loss: 142.5513827752446, epoch: 0



 13%|████▌                             | 16327/122310 [40:14<3:06:17,  9.48it/s][A
 13%|████▌                             | 16339/122310 [40:14<2:45:41, 10.66it/s][A
 13%|████▌                             | 16346/122310 [40:15<2:58:49,  9.88it/s][A
 13%|████▌                             | 16352/122310 [40:16<3:16:48,  8.97it/s][A
 13%|████▌                             | 16361/122310 [40:17<3:09:43,  9.31it/s][A
 13%|████▌                             | 16369/122310 [40:18<3:11:58,  9.20it/s][A
 13%|████▌                             | 16372/122310 [40:19<3:55:24,  7.50it/s][A
 13%|████▌                             | 16387/122310 [40:20<2:50:45, 10.34it/s][A
 13%|████▌                             | 16394/122310 [40:21<3:03:06,  9.64it/s][A
 13%|████▌                             | 16412/122310 [40:21<2:17:09, 12.87it/s][A
 13%|████▌                             | 16417/122310 [40:22<2:44:15, 10.74it/s][A
 13%|████▌                             | 16425/122310 [40:23<2:52:10, 10.25

step: 2040, loss: 218.88063825343394, epoch: 0



 13%|████▌                             | 16511/122310 [40:31<3:32:17,  8.31it/s][A
 14%|████▌                             | 16520/122310 [40:32<3:18:22,  8.89it/s][A
 14%|████▌                             | 16534/122310 [40:33<2:40:54, 10.96it/s][A
 14%|████▌                             | 16536/122310 [40:34<3:29:44,  8.41it/s][A
 14%|████▌                             | 16540/122310 [40:36<5:17:31,  5.55it/s][A
 14%|████▌                             | 16546/122310 [40:36<5:00:39,  5.86it/s][A
 14%|████▌                             | 16555/122310 [40:37<4:10:54,  7.02it/s][A
 14%|████▌                             | 16562/122310 [40:38<4:02:24,  7.27it/s][A
 14%|████▌                             | 16578/122310 [40:39<2:50:38, 10.33it/s][A
 14%|████▌                             | 16592/122310 [40:40<2:27:36, 11.94it/s][A
 14%|████▌                             | 16597/122310 [40:41<2:54:13, 10.11it/s][A
 14%|████▌                             | 16611/122310 [40:42<2:29:24, 11.79

step: 2060, loss: 140.3997711177379, epoch: 0



 14%|████▋                             | 16675/122310 [40:49<3:11:09,  9.21it/s][A
 14%|████▋                             | 16680/122310 [40:50<3:35:11,  8.18it/s][A
 14%|████▋                             | 16682/122310 [40:51<4:35:31,  6.39it/s][A
 14%|████▋                             | 16684/122310 [40:51<5:39:08,  5.19it/s][A
 14%|████▋                             | 16703/122310 [40:52<2:53:42, 10.13it/s][A
 14%|████▋                             | 16714/122310 [40:53<2:43:41, 10.75it/s][A
 14%|████▋                             | 16726/122310 [40:54<2:31:21, 11.63it/s][A
 14%|████▋                             | 16736/122310 [40:55<2:31:50, 11.59it/s][A
 14%|████▋                             | 16743/122310 [40:56<2:47:45, 10.49it/s][A
 14%|████▋                             | 16754/122310 [40:57<2:38:20, 11.11it/s][A
 14%|████▋                             | 16757/122310 [40:58<3:20:28,  8.78it/s][A
 14%|████▋                             | 16772/122310 [40:58<2:36:29, 11.24

step: 2080, loss: 137.2175010602358, epoch: 0



 14%|████▋                             | 16849/122310 [41:06<2:33:09, 11.48it/s][A
 14%|████▋                             | 16862/122310 [41:07<2:20:46, 12.48it/s][A
 14%|████▋                             | 16868/122310 [41:08<2:43:27, 10.75it/s][A
 14%|████▋                             | 16874/122310 [41:09<3:03:08,  9.59it/s][A
 14%|████▋                             | 16881/122310 [41:10<3:12:48,  9.11it/s][A
 14%|████▋                             | 16889/122310 [41:11<3:13:01,  9.10it/s][A
 14%|████▋                             | 16896/122310 [41:12<3:19:57,  8.79it/s][A
 14%|████▋                             | 16901/122310 [41:13<3:43:19,  7.87it/s][A
 14%|████▋                             | 16909/122310 [41:13<3:33:34,  8.23it/s][A
 14%|████▋                             | 16913/122310 [41:14<4:05:53,  7.14it/s][A
 14%|████▋                             | 16920/122310 [41:15<3:57:29,  7.40it/s][A
 14%|████▋                             | 16940/122310 [41:16<2:26:40, 11.97

step: 2100, loss: 120.13543071793087, epoch: 0



 14%|████▋                             | 17029/122310 [41:24<3:02:18,  9.62it/s][A
 14%|████▋                             | 17035/122310 [41:25<3:20:01,  8.77it/s][A
 14%|████▋                             | 17043/122310 [41:26<3:17:27,  8.89it/s][A
 14%|████▋                             | 17048/122310 [41:27<3:40:52,  7.94it/s][A
 14%|████▋                             | 17060/122310 [41:27<3:01:05,  9.69it/s][A
 14%|████▋                             | 17072/122310 [41:28<2:42:17, 10.81it/s][A
 14%|████▋                             | 17081/122310 [41:29<2:44:03, 10.69it/s][A
 14%|████▊                             | 17090/122310 [41:30<2:45:47, 10.58it/s][A
 14%|████▊                             | 17098/122310 [41:31<2:53:42, 10.09it/s][A
 14%|████▊                             | 17121/122310 [41:32<1:57:35, 14.91it/s][A
 14%|████▊                             | 17129/122310 [41:33<2:13:22, 13.14it/s][A
 14%|████▊                             | 17139/122310 [41:34<2:19:08, 12.60

step: 2120, loss: 113.10354399015807, epoch: 0



 14%|████▊                             | 17206/122310 [41:42<3:02:10,  9.62it/s][A
 14%|████▊                             | 17212/122310 [41:42<3:20:32,  8.73it/s][A
 14%|████▊                             | 17216/122310 [41:43<3:52:18,  7.54it/s][A
 14%|████▊                             | 17230/122310 [41:44<2:54:08, 10.06it/s][A
 14%|████▊                             | 17237/122310 [41:45<3:05:48,  9.43it/s][A
 14%|████▊                             | 17247/122310 [41:46<2:55:14,  9.99it/s][A
 14%|████▊                             | 17260/122310 [41:47<2:33:06, 11.43it/s][A
 14%|████▊                             | 17274/122310 [41:48<2:16:52, 12.79it/s][A
 14%|████▊                             | 17281/122310 [41:49<2:34:15, 11.35it/s][A
 14%|████▊                             | 17290/122310 [41:49<2:39:05, 11.00it/s][A
 14%|████▊                             | 17296/122310 [41:50<2:59:26,  9.75it/s][A
 14%|████▊                             | 17300/122310 [41:51<3:34:04,  8.18

step: 2140, loss: 144.1750352341387, epoch: 0



 14%|████▊                             | 17368/122310 [42:01<4:01:17,  7.25it/s][A
 14%|████▊                             | 17378/122310 [42:02<3:47:10,  7.70it/s][A
 14%|████▊                             | 17382/122310 [42:03<4:27:55,  6.53it/s][A
 14%|████▊                             | 17388/122310 [42:04<4:43:40,  6.16it/s][A
 14%|████▊                             | 17404/122310 [42:05<3:22:25,  8.64it/s][A
 14%|████▊                             | 17412/122310 [42:06<3:31:56,  8.25it/s][A
 14%|████▊                             | 17421/122310 [42:07<3:36:27,  8.08it/s][A
 14%|████▊                             | 17425/122310 [42:08<4:20:34,  6.71it/s][A
 14%|████▊                             | 17429/122310 [42:10<4:56:16,  5.90it/s][A
 14%|████▊                             | 17438/122310 [42:11<4:27:54,  6.52it/s][A
 14%|████▊                             | 17443/122310 [42:12<4:51:29,  6.00it/s][A
 14%|████▊                             | 17446/122310 [42:13<5:48:48,  5.01

step: 2160, loss: 125.28833741882758, epoch: 0



 14%|████▊                             | 17514/122310 [42:23<5:04:41,  5.73it/s][A
 14%|████▊                             | 17520/122310 [42:24<5:08:51,  5.65it/s][A
 14%|████▊                             | 17529/122310 [42:25<4:32:24,  6.41it/s][A
 14%|████▊                             | 17536/122310 [42:26<4:31:55,  6.42it/s][A
 14%|████▉                             | 17541/122310 [42:27<4:56:16,  5.89it/s][A
 14%|████▉                             | 17551/122310 [42:30<6:40:24,  4.36it/s][A
 14%|████▉                             | 17558/122310 [42:31<6:00:58,  4.84it/s][A
 14%|████▉                             | 17568/122310 [42:32<4:55:02,  5.92it/s][A
 14%|████▉                             | 17569/122310 [42:33<6:19:39,  4.60it/s][A
 14%|████▉                             | 17577/122310 [42:35<5:26:11,  5.35it/s][A
 14%|████▉                             | 17587/122310 [42:36<4:35:55,  6.33it/s][A
 14%|████▉                             | 17596/122310 [42:36<3:58:21,  7.32

step: 2180, loss: 113.90432614305027, epoch: 0



 14%|████▉                             | 17632/122310 [42:42<4:09:13,  7.00it/s][A
 14%|████▉                             | 17644/122310 [42:43<3:08:55,  9.23it/s][A
 14%|████▉                             | 17652/122310 [42:44<3:09:03,  9.23it/s][A
 14%|████▉                             | 17659/122310 [42:45<3:11:53,  9.09it/s][A
 14%|████▉                             | 17665/122310 [42:46<3:19:30,  8.74it/s][A
 14%|████▉                             | 17685/122310 [42:47<2:08:28, 13.57it/s][A
 14%|████▉                             | 17690/122310 [42:47<2:32:37, 11.42it/s][A
 14%|████▉                             | 17701/122310 [42:48<2:24:15, 12.09it/s][A
 14%|████▉                             | 17706/122310 [42:49<2:46:44, 10.46it/s][A
 14%|████▉                             | 17714/122310 [42:50<2:46:32, 10.47it/s][A
 14%|████▉                             | 17724/122310 [42:51<3:20:46,  8.68it/s][A
 14%|████▉                             | 17730/122310 [42:52<3:23:38,  8.56

step: 2200, loss: 127.2972508088447, epoch: 0



 15%|████▉                             | 17814/122310 [42:58<2:24:31, 12.05it/s][A
 15%|████▉                             | 17820/122310 [42:59<2:39:39, 10.91it/s][A
 15%|████▉                             | 17833/122310 [42:59<2:15:10, 12.88it/s][A
 15%|████▉                             | 17837/122310 [43:00<2:43:06, 10.68it/s][A
 15%|████▉                             | 17844/122310 [43:01<2:48:37, 10.32it/s][A
 15%|████▉                             | 17855/122310 [43:02<2:28:55, 11.69it/s][A
 15%|████▉                             | 17864/122310 [43:02<2:27:07, 11.83it/s][A
 15%|████▉                             | 17878/122310 [43:03<2:04:33, 13.97it/s][A
 15%|████▉                             | 17887/122310 [43:04<2:09:36, 13.43it/s][A
 15%|████▉                             | 17898/122310 [43:05<2:06:11, 13.79it/s][A
 15%|████▉                             | 17903/122310 [43:05<2:28:04, 11.75it/s][A
 15%|████▉                             | 17912/122310 [43:06<2:26:27, 11.88

step: 2220, loss: 113.7989378506114, epoch: 0



 15%|█████                             | 18014/122310 [43:13<1:49:40, 15.85it/s][A
 15%|█████                             | 18026/122310 [43:13<1:49:05, 15.93it/s][A
 15%|█████                             | 18036/122310 [43:14<1:54:11, 15.22it/s][A
 15%|█████                             | 18038/122310 [43:15<2:31:59, 11.43it/s][A
 15%|█████                             | 18044/122310 [43:16<2:46:47, 10.42it/s][A
 15%|█████                             | 18049/122310 [43:17<4:03:58,  7.12it/s][A
 15%|█████                             | 18056/122310 [43:18<3:45:00,  7.72it/s][A
 15%|█████                             | 18064/122310 [43:19<3:23:56,  8.52it/s][A
 15%|█████                             | 18075/122310 [43:19<2:49:29, 10.25it/s][A
 15%|█████                             | 18083/122310 [43:20<2:47:24, 10.38it/s][A
 15%|█████                             | 18088/122310 [43:21<3:05:03,  9.39it/s][A
 15%|█████                             | 18096/122310 [43:22<2:57:20,  9.79

step: 2240, loss: 101.01232965061543, epoch: 0



 15%|█████                             | 18150/122310 [43:27<3:02:39,  9.50it/s][A
 15%|█████                             | 18162/122310 [43:28<2:30:47, 11.51it/s][A
 15%|█████                             | 18174/122310 [43:29<2:14:41, 12.89it/s][A
 15%|█████                             | 18187/122310 [43:30<2:01:00, 14.34it/s][A
 15%|█████                             | 18197/122310 [43:30<2:03:02, 14.10it/s][A
 15%|█████                             | 18208/122310 [43:31<2:01:00, 14.34it/s][A
 15%|█████                             | 18220/122310 [43:32<1:56:12, 14.93it/s][A
 15%|█████                             | 18240/122310 [43:33<1:33:20, 18.58it/s][A
 15%|█████                             | 18247/122310 [43:33<1:49:29, 15.84it/s][A
 15%|█████                             | 18257/122310 [43:34<1:54:22, 15.16it/s][A
 15%|█████                             | 18265/122310 [43:35<2:05:18, 13.84it/s][A
 15%|█████                             | 18271/122310 [43:36<2:22:48, 12.14

step: 2260, loss: 110.5270129431675, epoch: 0



 15%|█████                             | 18339/122310 [43:42<2:22:48, 12.13it/s][A
 15%|█████                             | 18346/122310 [43:43<2:32:41, 11.35it/s][A
 15%|█████                             | 18354/122310 [43:44<2:34:44, 11.20it/s][A
 15%|█████                             | 18366/122310 [43:45<2:58:10,  9.72it/s][A
 15%|█████                             | 18373/122310 [43:46<2:59:02,  9.68it/s][A
 15%|█████                             | 18385/122310 [43:47<2:32:18, 11.37it/s][A
 15%|█████                             | 18393/122310 [43:48<2:34:15, 11.23it/s][A
 15%|█████                             | 18400/122310 [43:48<2:41:28, 10.72it/s][A
 15%|█████                             | 18409/122310 [43:49<2:35:16, 11.15it/s][A
 15%|█████                             | 18417/122310 [43:50<2:36:20, 11.08it/s][A
 15%|█████                             | 18427/122310 [43:51<2:27:09, 11.77it/s][A
 15%|█████▏                            | 18445/122310 [43:51<1:51:45, 15.49

step: 2280, loss: 141.57933421424516, epoch: 0



 15%|█████▏                            | 18503/122310 [43:57<3:02:11,  9.50it/s][A
 15%|█████▏                            | 18515/122310 [43:58<2:30:28, 11.50it/s][A
 15%|█████▏                            | 18525/122310 [43:59<2:23:27, 12.06it/s][A
 15%|█████▏                            | 18531/122310 [43:59<2:38:24, 10.92it/s][A
 15%|█████▏                            | 18535/122310 [44:00<3:06:36,  9.27it/s][A
 15%|█████▏                            | 18546/122310 [44:01<2:37:38, 10.97it/s][A
 15%|█████▏                            | 18560/122310 [44:02<2:09:23, 13.36it/s][A
 15%|█████▏                            | 18578/122310 [44:02<1:43:47, 16.66it/s][A
 15%|█████▏                            | 18587/122310 [44:03<1:52:50, 15.32it/s][A
 15%|█████▏                            | 18600/122310 [44:04<1:48:15, 15.97it/s][A
 15%|█████▏                            | 18606/122310 [44:05<2:07:12, 13.59it/s][A
 15%|█████▏                            | 18610/122310 [44:05<2:34:47, 11.16

step: 2300, loss: 123.34709040205053, epoch: 0



 15%|█████▏                            | 18703/122310 [44:12<2:23:52, 12.00it/s][A
 15%|█████▏                            | 18706/122310 [44:13<2:59:07,  9.64it/s][A
 15%|█████▏                            | 18718/122310 [44:14<2:28:58, 11.59it/s][A
 15%|█████▏                            | 18726/122310 [44:14<2:31:48, 11.37it/s][A
 15%|█████▏                            | 18736/122310 [44:15<2:23:47, 12.01it/s][A
 15%|█████▏                            | 18746/122310 [44:16<2:18:45, 12.44it/s][A
 15%|█████▏                            | 18754/122310 [44:16<2:24:40, 11.93it/s][A
 15%|█████▏                            | 18758/122310 [44:17<2:52:22, 10.01it/s][A
 15%|█████▏                            | 18768/122310 [44:18<2:36:20, 11.04it/s][A
 15%|█████▏                            | 18788/122310 [44:19<1:49:21, 15.78it/s][A
 15%|█████▏                            | 18799/122310 [44:19<1:51:22, 15.49it/s][A
 15%|█████▏                            | 18819/122310 [44:20<1:31:10, 18.92

step: 2320, loss: 103.22667277281828, epoch: 0



 15%|█████▎                            | 18900/122310 [44:27<2:14:07, 12.85it/s][A
 15%|█████▎                            | 18906/122310 [44:28<2:30:48, 11.43it/s][A
 15%|█████▎                            | 18913/122310 [44:28<2:39:21, 10.81it/s][A
 15%|█████▎                            | 18924/122310 [44:29<2:23:40, 11.99it/s][A
 15%|█████▎                            | 18945/122310 [44:30<1:42:25, 16.82it/s][A
 16%|█████▎                            | 18960/122310 [44:31<1:36:50, 17.79it/s][A
 16%|█████▎                            | 18963/122310 [44:31<2:05:54, 13.68it/s][A
 16%|█████▎                            | 18969/122310 [44:32<2:23:31, 12.00it/s][A
 16%|█████▎                            | 18978/122310 [44:33<2:23:13, 12.03it/s][A
 16%|█████▎                            | 18986/122310 [44:34<2:28:01, 11.63it/s][A
 16%|█████▎                            | 18993/122310 [44:34<2:37:13, 10.95it/s][A
 16%|█████▎                            | 19004/122310 [44:35<2:22:13, 12.11

step: 2340, loss: 111.77807988390683, epoch: 0



 16%|█████▎                            | 19082/122310 [44:42<2:44:45, 10.44it/s][A
 16%|█████▎                            | 19089/122310 [44:43<2:50:38, 10.08it/s][A
 16%|█████▎                            | 19093/122310 [44:43<3:17:26,  8.71it/s][A
 16%|█████▎                            | 19103/122310 [44:44<2:49:29, 10.15it/s][A
 16%|█████▎                            | 19120/122310 [44:45<2:03:34, 13.92it/s][A
 16%|█████▎                            | 19127/122310 [44:46<2:16:50, 12.57it/s][A
 16%|█████▎                            | 19135/122310 [44:46<2:23:13, 12.01it/s][A
 16%|█████▎                            | 19141/122310 [44:47<2:38:30, 10.85it/s][A
 16%|█████▎                            | 19153/122310 [44:48<2:18:33, 12.41it/s][A
 16%|█████▎                            | 19157/122310 [44:49<2:46:29, 10.33it/s][A
 16%|█████▎                            | 19163/122310 [44:49<2:58:24,  9.64it/s][A
 16%|█████▎                            | 19182/122310 [44:50<1:59:32, 14.38

step: 2360, loss: 112.77717922021395, epoch: 0



 16%|█████▎                            | 19281/122310 [44:57<2:17:25, 12.50it/s][A
 16%|█████▎                            | 19288/122310 [44:57<2:28:15, 11.58it/s][A
 16%|█████▎                            | 19300/122310 [44:58<2:12:27, 12.96it/s][A
 16%|█████▎                            | 19308/122310 [44:59<2:19:36, 12.30it/s][A
 16%|█████▎                            | 19323/122310 [45:00<1:57:19, 14.63it/s][A
 16%|█████▎                            | 19331/122310 [45:00<2:07:18, 13.48it/s][A
 16%|█████▍                            | 19337/122310 [45:01<2:24:31, 11.87it/s][A
 16%|█████▍                            | 19343/122310 [45:03<3:29:25,  8.19it/s][A
 16%|█████▍                            | 19347/122310 [45:03<3:48:22,  7.51it/s][A
 16%|█████▍                            | 19358/122310 [45:04<3:02:37,  9.40it/s][A
 16%|█████▍                            | 19370/122310 [45:05<2:32:48, 11.23it/s][A
 16%|█████▍                            | 19383/122310 [45:06<2:12:24, 12.96

step: 2380, loss: 128.52248445239783, epoch: 0



 16%|█████▍                            | 19441/122310 [45:12<3:14:00,  8.84it/s][A
 16%|█████▍                            | 19453/122310 [45:12<2:35:48, 11.00it/s][A
 16%|█████▍                            | 19462/122310 [45:13<2:31:14, 11.33it/s][A
 16%|█████▍                            | 19477/122310 [45:15<2:39:49, 10.72it/s][A
 16%|█████▍                            | 19487/122310 [45:15<2:31:03, 11.34it/s][A
 16%|█████▍                            | 19502/122310 [45:16<2:05:20, 13.67it/s][A
 16%|█████▍                            | 19516/122310 [45:17<1:54:02, 15.02it/s][A
 16%|█████▍                            | 19519/122310 [45:18<2:22:49, 11.99it/s][A
 16%|█████▍                            | 19530/122310 [45:18<2:14:08, 12.77it/s][A
 16%|█████▍                            | 19541/122310 [45:19<2:07:52, 13.39it/s][A
 16%|█████▍                            | 19548/122310 [45:20<2:20:05, 12.23it/s][A
 16%|█████▍                            | 19556/122310 [45:21<2:25:34, 11.76

step: 2400, loss: 103.91193327239391, epoch: 0
sim1 and sim2 are 0.6242151547906103, 0.021054676072419738
cosine of pred and queen: -0.05449849845912749
Actual: athens:greece::madrid:spain, pred: rest
Actual: bangkok:thailand::islamabad:pakistan, pred: henriksen
Actual: beijing:china::tokyo:japan, pred: president
Actual: berlin:germany::rome:italy, pred: could
Actual: cairo:egypt::ottawa:canada, pred: near
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: year
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: unintegrated
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: disobliging
Actual: china:asia::greece:europe, pred: savaging
Actual: nigeria:africa::france:europe, pred: could
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: bal


 16%|█████▍                            | 19625/122310 [45:40<2:10:01, 13.16it/s][A

Actual: india:rupee::denmark:krone, pred: jehizkiah
Actual: usa:dollar::nigeria:naira, pred: strides
Actual: switzerland:swiss::spain:spanish, pred: commended
Actual: thailand:thai::india:indian, pred: brasserie
Actual: sweden:swedish::netherlands:dutch, pred: chovanec
Actual: russia:russian::germany:german, pred: one
Actual: portugal:portuguese::slovakia:slovakian, pred: thunderstorm
Actual: poland:polish::italy:italian, pred: least
Actual: norway:norwegian::mexico:mexican, pred: nought
Actual: japan:japanese::australia:australian, pred: mechanicall
Actual: italy:italian::ireland:irish, pred: grameen
Actual: croatia:croatian::france:french, pred: life
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.0
Actual: walk:walks::vanish:vanishes, pred: bariloche
Actual: work:works::generate:generates, pred: conventionally
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: shall
Actual: sing:sings::shuffle:shuffles, pred: vikassh


 16%|█████▎                           | 19635/122310 [46:39<65:40:44,  2.30s/it][A

Actual: india:rupee::denmark:krone, pred: jehizkiah
Accuracy is 0.0



 16%|█████▎                           | 19636/122310 [46:40<63:49:27,  2.24s/it][A
 16%|█████▎                           | 19641/122310 [46:41<50:10:11,  1.76s/it][A
 16%|█████▎                           | 19649/122310 [46:41<33:50:09,  1.19s/it][A
 16%|█████▎                           | 19653/122310 [46:42<28:12:11,  1.01it/s][A
 16%|█████▎                           | 19657/122310 [46:43<23:09:57,  1.23it/s][A
 16%|█████▎                           | 19666/122310 [46:44<14:33:36,  1.96it/s][A
 16%|█████▍                            | 19677/122310 [46:44<9:15:58,  3.08it/s][A
 16%|█████▍                            | 19680/122310 [46:45<8:56:22,  3.19it/s][A
 16%|█████▍                            | 19685/122310 [46:46<7:45:43,  3.67it/s][A
 16%|█████▍                            | 19693/122310 [46:47<5:54:10,  4.83it/s][A
 16%|█████▍                            | 19702/122310 [46:47<4:36:45,  6.18it/s][A
 16%|█████▍                            | 19709/122310 [46:48<4:09:18,  6.86

step: 2420, loss: 100.78547217152794, epoch: 0



 16%|█████▌                            | 19786/122310 [46:55<2:06:48, 13.47it/s][A
 16%|█████▌                            | 19800/122310 [46:55<1:53:24, 15.06it/s][A
 16%|█████▌                            | 19811/122310 [46:56<1:54:01, 14.98it/s][A
 16%|█████▌                            | 19819/122310 [46:57<2:04:25, 13.73it/s][A
 16%|█████▌                            | 19828/122310 [46:58<2:08:56, 13.25it/s][A
 16%|█████▌                            | 19841/122310 [46:58<1:57:44, 14.51it/s][A
 16%|█████▌                            | 19857/122310 [46:59<1:42:56, 16.59it/s][A
 16%|█████▌                            | 19868/122310 [47:00<1:46:28, 16.04it/s][A
 16%|█████▌                            | 19879/122310 [47:01<1:49:23, 15.61it/s][A
 16%|█████▌                            | 19883/122310 [47:01<2:15:25, 12.61it/s][A
 16%|█████▌                            | 19889/122310 [47:02<2:31:49, 11.24it/s][A
 16%|█████▌                            | 19901/122310 [47:03<2:14:17, 12.71

step: 2440, loss: 108.14910333771077, epoch: 0



 16%|█████▌                            | 19979/122310 [47:10<2:21:12, 12.08it/s][A
 16%|█████▌                            | 19985/122310 [47:10<2:36:36, 10.89it/s][A
 16%|█████▌                            | 19993/122310 [47:11<2:37:13, 10.85it/s][A
 16%|█████▌                            | 20000/122310 [47:12<2:43:54, 10.40it/s][A
 16%|█████▌                            | 20007/122310 [47:13<2:50:30, 10.00it/s][A
 16%|█████▌                            | 20022/122310 [47:13<2:11:36, 12.95it/s][A
 16%|█████▌                            | 20030/122310 [47:14<2:19:20, 12.23it/s][A
 16%|█████▌                            | 20038/122310 [47:15<2:25:10, 11.74it/s][A
 16%|█████▌                            | 20055/122310 [47:16<1:54:00, 14.95it/s][A
 16%|█████▌                            | 20064/122310 [47:16<2:02:28, 13.91it/s][A
 16%|█████▌                            | 20066/122310 [47:17<2:41:14, 10.57it/s][A
 16%|█████▌                            | 20072/122310 [47:18<2:54:16,  9.78

step: 2460, loss: 244.66174577427722, epoch: 0



 16%|█████▍                           | 20079/122310 [47:29<18:37:48,  1.52it/s][A
 16%|█████▍                           | 20080/122310 [47:29<18:49:44,  1.51it/s][A
 16%|█████▍                           | 20088/122310 [47:30<11:22:35,  2.50it/s][A
 16%|█████▌                            | 20095/122310 [47:31<8:20:26,  3.40it/s][A
 16%|█████▌                            | 20110/122310 [47:32<4:41:22,  6.05it/s][A
 16%|█████▌                            | 20128/122310 [47:32<3:01:46,  9.37it/s][A
 16%|█████▌                            | 20136/122310 [47:33<2:57:05,  9.62it/s][A
 16%|█████▌                            | 20141/122310 [47:34<3:11:00,  8.92it/s][A
 16%|█████▌                            | 20148/122310 [47:35<3:09:11,  9.00it/s][A
 16%|█████▌                            | 20161/122310 [47:35<2:33:04, 11.12it/s][A
 16%|█████▌                            | 20172/122310 [47:36<2:21:14, 12.05it/s][A
 16%|█████▌                            | 20179/122310 [47:37<2:32:18, 11.18

step: 2480, loss: 98.18798844664168, epoch: 0



 17%|█████▌                            | 20209/122310 [47:40<2:48:08, 10.12it/s][A
 17%|█████▌                            | 20214/122310 [47:41<3:07:57,  9.05it/s][A
 17%|█████▌                            | 20216/122310 [47:41<3:56:53,  7.18it/s][A
 17%|█████▌                            | 20228/122310 [47:42<2:54:46,  9.73it/s][A
 17%|█████▋                            | 20236/122310 [47:43<2:50:07, 10.00it/s][A
 17%|█████▋                            | 20239/122310 [47:44<3:27:41,  8.19it/s][A
 17%|█████▋                            | 20247/122310 [47:44<3:10:42,  8.92it/s][A
 17%|█████▋                            | 20254/122310 [47:45<3:09:09,  8.99it/s][A
 17%|█████▋                            | 20265/122310 [47:46<2:39:25, 10.67it/s][A
 17%|█████▋                            | 20270/122310 [47:47<3:00:02,  9.45it/s][A
 17%|█████▋                            | 20278/122310 [47:47<2:54:14,  9.76it/s][A
 17%|█████▋                            | 20281/122310 [47:48<3:32:27,  8.00

step: 2500, loss: 140.5277385189959, epoch: 0



 17%|█████▋                            | 20350/122310 [47:55<2:47:37, 10.14it/s][A
 17%|█████▋                            | 20362/122310 [47:56<2:25:04, 11.71it/s][A
 17%|█████▋                            | 20370/122310 [47:57<2:30:37, 11.28it/s][A
 17%|█████▋                            | 20375/122310 [47:58<2:52:04,  9.87it/s][A
 17%|█████▋                            | 20384/122310 [47:58<2:43:37, 10.38it/s][A
 17%|█████▋                            | 20394/122310 [47:59<2:32:15, 11.16it/s][A
 17%|█████▋                            | 20402/122310 [48:00<2:35:08, 10.95it/s][A
 17%|█████▋                            | 20422/122310 [48:01<1:49:41, 15.48it/s][A
 17%|█████▋                            | 20427/122310 [48:01<2:14:08, 12.66it/s][A
 17%|█████▋                            | 20438/122310 [48:02<2:10:21, 13.02it/s][A
 17%|█████▋                            | 20446/122310 [48:03<2:19:44, 12.15it/s][A
 17%|█████▋                            | 20453/122310 [48:04<2:30:22, 11.29

step: 2520, loss: 119.9884487746991, epoch: 0



 17%|█████▋                            | 20558/122310 [48:11<1:52:26, 15.08it/s][A
 17%|█████▋                            | 20568/122310 [48:11<1:56:46, 14.52it/s][A
 17%|█████▋                            | 20577/122310 [48:12<2:03:17, 13.75it/s][A
 17%|█████▋                            | 20579/122310 [48:13<2:42:12, 10.45it/s][A
 17%|█████▋                            | 20587/122310 [48:14<2:41:44, 10.48it/s][A
 17%|█████▋                            | 20594/122310 [48:14<2:47:08, 10.14it/s][A
 17%|█████▋                            | 20604/122310 [48:15<2:33:16, 11.06it/s][A
 17%|█████▋                            | 20609/122310 [48:16<2:54:08,  9.73it/s][A
 17%|█████▋                            | 20615/122310 [48:17<3:04:21,  9.19it/s][A
 17%|█████▋                            | 20622/122310 [48:17<3:03:43,  9.22it/s][A
 17%|█████▋                            | 20631/122310 [48:18<2:49:23, 10.00it/s][A
 17%|█████▋                            | 20634/122310 [48:19<3:26:55,  8.19

step: 2540, loss: 115.22024074781203, epoch: 0



 17%|█████▊                            | 20718/122310 [48:26<2:04:50, 13.56it/s][A
 17%|█████▊                            | 20729/122310 [48:26<2:02:24, 13.83it/s][A
 17%|█████▊                            | 20743/122310 [48:27<1:50:38, 15.30it/s][A
 17%|█████▊                            | 20749/122310 [48:28<2:09:08, 13.11it/s][A
 17%|█████▊                            | 20767/122310 [48:29<1:43:45, 16.31it/s][A
 17%|█████▊                            | 20775/122310 [48:29<1:55:51, 14.61it/s][A
 17%|█████▊                            | 20780/122310 [48:30<2:18:46, 12.19it/s][A
 17%|█████▊                            | 20788/122310 [48:31<2:24:26, 11.71it/s][A
 17%|█████▊                            | 20800/122310 [48:32<2:10:19, 12.98it/s][A
 17%|█████▊                            | 20807/122310 [48:33<2:23:03, 11.82it/s][A
 17%|█████▊                            | 20814/122310 [48:33<2:32:45, 11.07it/s][A
 17%|█████▊                            | 20826/122310 [48:34<2:15:07, 12.52

step: 2560, loss: 122.82825218507554, epoch: 0



 17%|█████▊                            | 20907/122310 [48:41<2:35:50, 10.84it/s][A
 17%|█████▊                            | 20913/122310 [48:42<2:49:28,  9.97it/s][A
 17%|█████▊                            | 20921/122310 [48:42<2:46:26, 10.15it/s][A
 17%|█████▊                            | 20938/122310 [48:43<2:02:22, 13.81it/s][A
 17%|█████▊                            | 20941/122310 [48:44<2:35:21, 10.87it/s][A
 17%|█████▊                            | 20947/122310 [48:45<2:49:22,  9.97it/s][A
 17%|█████▊                            | 20953/122310 [48:45<2:59:40,  9.40it/s][A
 17%|█████▊                            | 20962/122310 [48:46<2:46:35, 10.14it/s][A
 17%|█████▊                            | 20974/122310 [48:47<2:22:25, 11.86it/s][A
 17%|█████▊                            | 20981/122310 [48:48<2:32:30, 11.07it/s][A
 17%|█████▊                            | 20991/122310 [48:48<2:24:07, 11.72it/s][A
 17%|█████▊                            | 20999/122310 [48:49<2:28:29, 11.37

step: 2580, loss: 91.54691740005889, epoch: 0



 17%|█████▊                            | 21045/122310 [48:56<3:18:12,  8.51it/s][A
 17%|█████▊                            | 21053/122310 [48:57<3:05:44,  9.09it/s][A
 17%|█████▊                            | 21060/122310 [48:57<3:04:11,  9.16it/s][A
 17%|█████▊                            | 21068/122310 [48:58<2:57:12,  9.52it/s][A
 17%|█████▊                            | 21074/122310 [48:59<3:05:45,  9.08it/s][A
 17%|█████▊                            | 21087/122310 [49:00<2:26:56, 11.48it/s][A
 17%|█████▊                            | 21099/122310 [49:01<2:12:15, 12.75it/s][A
 17%|█████▊                            | 21103/122310 [49:01<2:39:42, 10.56it/s][A
 17%|█████▊                            | 21111/122310 [49:02<2:39:40, 10.56it/s][A
 17%|█████▊                            | 21122/122310 [49:03<2:23:53, 11.72it/s][A
 17%|█████▊                            | 21126/122310 [49:04<2:51:41,  9.82it/s][A
 17%|█████▉                            | 21137/122310 [49:04<2:30:13, 11.22

step: 2600, loss: 146.0469045639438, epoch: 0



 17%|█████▉                            | 21219/122310 [49:11<2:27:42, 11.41it/s][A
 17%|█████▉                            | 21224/122310 [49:12<2:48:37,  9.99it/s][A
 17%|█████▉                            | 21229/122310 [49:13<3:07:35,  8.98it/s][A
 17%|█████▉                            | 21235/122310 [49:13<3:14:07,  8.68it/s][A
 17%|█████▉                            | 21239/122310 [49:14<3:39:47,  7.66it/s][A
 17%|█████▉                            | 21247/122310 [49:15<3:17:17,  8.54it/s][A
 17%|█████▉                            | 21255/122310 [49:16<3:03:58,  9.16it/s][A
 17%|█████▉                            | 21269/122310 [49:16<2:20:57, 11.95it/s][A
 17%|█████▉                            | 21276/122310 [49:17<2:31:06, 11.14it/s][A
 17%|█████▉                            | 21279/122310 [49:18<3:07:36,  8.98it/s][A
 17%|█████▉                            | 21291/122310 [49:19<2:33:31, 10.97it/s][A
 17%|█████▉                            | 21296/122310 [49:19<2:54:00,  9.68

step: 2620, loss: 91.87920605306383, epoch: 0



 17%|█████▉                            | 21384/122310 [49:26<2:18:50, 12.11it/s][A
 17%|█████▉                            | 21391/122310 [49:27<2:29:28, 11.25it/s][A
 17%|█████▉                            | 21400/122310 [49:28<2:26:59, 11.44it/s][A
 18%|█████▉                            | 21414/122310 [49:29<2:03:59, 13.56it/s][A
 18%|█████▉                            | 21420/122310 [49:29<2:21:31, 11.88it/s][A
 18%|█████▉                            | 21437/122310 [49:30<1:51:38, 15.06it/s][A
 18%|█████▉                            | 21447/122310 [49:31<1:55:49, 14.51it/s][A
 18%|█████▉                            | 21451/122310 [49:32<3:07:09,  8.98it/s][A
 18%|█████▉                            | 21460/122310 [49:33<2:53:14,  9.70it/s][A
 18%|█████▉                            | 21469/122310 [49:34<2:43:36, 10.27it/s][A
 18%|█████▉                            | 21487/122310 [49:35<2:00:41, 13.92it/s][A
 18%|█████▉                            | 21492/122310 [49:35<2:21:00, 11.92

step: 2640, loss: 110.95964276835835, epoch: 0



 18%|█████▉                            | 21549/122310 [49:41<3:10:58,  8.79it/s][A
 18%|█████▉                            | 21551/122310 [49:42<4:02:33,  6.92it/s][A
 18%|█████▉                            | 21554/122310 [49:43<4:36:47,  6.07it/s][A
 18%|█████▉                            | 21566/122310 [49:44<3:06:51,  8.99it/s][A
 18%|█████▉                            | 21580/122310 [49:44<2:21:58, 11.82it/s][A
 18%|██████                            | 21590/122310 [49:45<2:17:05, 12.25it/s][A
 18%|██████                            | 21599/122310 [49:46<2:18:13, 12.14it/s][A
 18%|██████                            | 21608/122310 [49:47<2:18:48, 12.09it/s][A
 18%|██████                            | 21617/122310 [49:47<2:19:26, 12.04it/s][A
 18%|██████                            | 21625/122310 [49:48<2:24:34, 11.61it/s][A
 18%|██████                            | 21634/122310 [49:51<4:32:39,  6.15it/s][A
 18%|██████                            | 21638/122310 [49:52<4:39:27,  6.00

step: 2660, loss: 106.23557715850401, epoch: 0



 18%|██████                            | 21675/122310 [49:57<3:50:19,  7.28it/s][A
 18%|██████                            | 21685/122310 [49:57<3:07:00,  8.97it/s][A
 18%|██████                            | 21692/122310 [49:58<3:05:45,  9.03it/s][A
 18%|██████                            | 21696/122310 [49:59<3:30:22,  7.97it/s][A
 18%|██████                            | 21705/122310 [50:00<3:03:44,  9.13it/s][A
 18%|██████                            | 21715/122310 [50:00<2:42:04, 10.34it/s][A
 18%|██████                            | 21728/122310 [50:01<2:15:24, 12.38it/s][A
 18%|██████                            | 21739/122310 [50:02<2:08:48, 13.01it/s][A
 18%|██████                            | 21753/122310 [50:03<1:54:07, 14.68it/s][A
 18%|██████                            | 21758/122310 [50:03<2:16:46, 12.25it/s][A
 18%|██████                            | 21768/122310 [50:04<2:13:19, 12.57it/s][A
 18%|██████                            | 21781/122310 [50:05<2:00:28, 13.91

step: 2680, loss: 102.35710434571833, epoch: 0



 18%|██████                            | 21834/122310 [50:12<3:35:20,  7.78it/s][A
 18%|██████                            | 21849/122310 [50:12<2:27:01, 11.39it/s][A
 18%|██████                            | 21858/122310 [50:13<2:25:01, 11.54it/s][A
 18%|██████                            | 21866/122310 [50:14<2:28:53, 11.24it/s][A
 18%|██████                            | 21870/122310 [50:15<2:57:01,  9.46it/s][A
 18%|██████                            | 21878/122310 [50:16<2:50:31,  9.82it/s][A
 18%|██████                            | 21886/122310 [50:16<2:46:31, 10.05it/s][A
 18%|██████                            | 21900/122310 [50:17<2:12:52, 12.59it/s][A
 18%|██████                            | 21923/122310 [50:18<1:33:21, 17.92it/s][A
 18%|██████                            | 21929/122310 [50:19<1:52:10, 14.91it/s][A
 18%|██████                            | 21932/122310 [50:19<2:24:01, 11.62it/s][A
 18%|██████                            | 21943/122310 [50:20<2:13:17, 12.55

step: 2700, loss: 95.18560476014115, epoch: 0



 18%|██████                            | 21989/122310 [50:27<4:11:45,  6.64it/s][A
 18%|██████                            | 21995/122310 [50:27<3:56:46,  7.06it/s][A
 18%|██████                            | 22000/122310 [50:28<3:59:52,  6.97it/s][A
 18%|██████                            | 22005/122310 [50:29<4:04:02,  6.85it/s][A
 18%|██████                            | 22009/122310 [50:30<4:19:04,  6.45it/s][A
 18%|██████                            | 22013/122310 [50:30<4:32:36,  6.13it/s][A
 18%|██████                            | 22019/122310 [50:31<4:12:35,  6.62it/s][A
 18%|██████                            | 22026/122310 [50:32<3:45:12,  7.42it/s][A
 18%|██████                            | 22032/122310 [50:33<3:40:46,  7.57it/s][A
 18%|██████▏                           | 22041/122310 [50:33<3:08:00,  8.89it/s][A
 18%|██████▏                           | 22049/122310 [50:34<2:58:07,  9.38it/s][A
 18%|██████▏                           | 22058/122310 [50:35<2:44:35, 10.15

step: 2720, loss: 121.60003634487904, epoch: 0



 18%|██████▏                           | 22131/122310 [50:42<2:11:08, 12.73it/s][A
 18%|██████▏                           | 22140/122310 [50:43<2:14:23, 12.42it/s][A
 18%|██████▏                           | 22149/122310 [50:43<2:16:02, 12.27it/s][A
 18%|██████▏                           | 22163/122310 [50:44<1:58:12, 14.12it/s][A
 18%|██████▏                           | 22171/122310 [50:45<2:07:21, 13.10it/s][A
 18%|██████▏                           | 22176/122310 [50:46<2:29:36, 11.15it/s][A
 18%|██████▏                           | 22184/122310 [50:46<2:31:42, 11.00it/s][A
 18%|██████▏                           | 22197/122310 [50:47<2:09:56, 12.84it/s][A
 18%|██████▏                           | 22214/122310 [50:48<1:45:58, 15.74it/s][A
 18%|██████▏                           | 22230/122310 [50:49<1:35:59, 17.38it/s][A
 18%|██████▏                           | 22240/122310 [50:49<1:43:17, 16.15it/s][A
 18%|██████▏                           | 22247/122310 [50:50<1:58:26, 14.08

step: 2740, loss: 106.80540456603026, epoch: 0



 18%|██████▏                           | 22329/122310 [50:57<2:35:15, 10.73it/s][A
 18%|██████▏                           | 22339/122310 [50:58<2:25:09, 11.48it/s][A
 18%|██████▏                           | 22345/122310 [50:58<2:39:46, 10.43it/s][A
 18%|██████▏                           | 22360/122310 [50:59<2:05:41, 13.25it/s][A
 18%|██████▏                           | 22365/122310 [51:00<2:27:48, 11.27it/s][A
 18%|██████▏                           | 22371/122310 [51:01<2:42:09, 10.27it/s][A
 18%|██████▏                           | 22380/122310 [51:02<2:35:31, 10.71it/s][A
 18%|██████▏                           | 22386/122310 [51:02<2:48:50,  9.86it/s][A
 18%|██████▏                           | 22389/122310 [51:03<3:24:32,  8.14it/s][A
 18%|██████▏                           | 22411/122310 [51:05<2:31:15, 11.01it/s][A
 18%|██████▏                           | 22425/122310 [51:05<2:09:21, 12.87it/s][A
 18%|██████▏                           | 22433/122310 [51:06<2:16:03, 12.23

step: 2760, loss: 138.08989824305942, epoch: 0



 18%|██████▎                           | 22509/122310 [51:12<1:51:50, 14.87it/s][A
 18%|██████▎                           | 22520/122310 [51:13<1:51:57, 14.86it/s][A
 18%|██████▎                           | 22525/122310 [51:14<2:13:08, 12.49it/s][A
 18%|██████▎                           | 22534/122310 [51:14<2:14:07, 12.40it/s][A
 18%|██████▎                           | 22539/122310 [51:15<2:34:42, 10.75it/s][A
 18%|██████▎                           | 22552/122310 [51:16<2:10:25, 12.75it/s][A
 18%|██████▎                           | 22566/122310 [51:16<1:53:42, 14.62it/s][A
 18%|██████▎                           | 22575/122310 [51:17<1:59:28, 13.91it/s][A
 18%|██████▎                           | 22584/122310 [51:18<2:04:35, 13.34it/s][A
 18%|██████▎                           | 22597/122310 [51:19<1:53:41, 14.62it/s][A
 18%|██████▎                           | 22604/122310 [51:19<2:07:07, 13.07it/s][A
 18%|██████▎                           | 22608/122310 [51:20<2:34:00, 10.79

step: 2780, loss: 126.05330885162104, epoch: 0



 19%|██████▎                           | 22684/122310 [51:27<2:30:13, 11.05it/s][A
 19%|██████▎                           | 22694/122310 [51:28<2:20:38, 11.81it/s][A
 19%|██████▎                           | 22705/122310 [51:28<2:10:03, 12.76it/s][A
 19%|██████▎                           | 22720/122310 [51:29<1:50:43, 14.99it/s][A
 19%|██████▎                           | 22724/122310 [51:30<2:16:13, 12.18it/s][A
 19%|██████▎                           | 22742/122310 [51:30<1:44:39, 15.86it/s][A
 19%|██████▎                           | 22750/122310 [51:31<1:56:01, 14.30it/s][A
 19%|██████▎                           | 22752/122310 [51:32<2:32:25, 10.89it/s][A
 19%|██████▎                           | 22759/122310 [51:33<2:39:02, 10.43it/s][A
 19%|██████▎                           | 22772/122310 [51:33<2:11:30, 12.62it/s][A
 19%|██████▎                           | 22779/122310 [51:34<2:22:10, 11.67it/s][A
 19%|██████▎                           | 22788/122310 [51:35<2:20:10, 11.83

step: 2800, loss: 126.92756671550015, epoch: 0
sim1 and sim2 are 0.6025105840406492, 0.05290484134828417
cosine of pred and queen: 0.035652355989997685
Actual: athens:greece::madrid:spain, pred: back
Actual: bangkok:thailand::islamabad:pakistan, pred: henriksen
Actual: beijing:china::tokyo:japan, pred: president
Actual: berlin:germany::rome:italy, pred: could
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: year
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: spittle
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: would
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: baleine
Actual


 19%|██████▎                           | 22844/122310 [51:50<2:22:08, 11.66it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: macedonian
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: olukolade
Actual: india:delhi::serbia:belgrade, pred: archway
Actual: spain:spanish::korea:korean, pred: turnouts
Actual: syria:arabic::australia:english, pred: world
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: steffen
Actual: argentina:peso::russia:ruble, pred: bank
Actual: armenia:dram::iran:rial, pred: sowest
Actual: brazil:real::sweden:krona, pred: exacerbate
Actual: europe:euro::japan:yen, pred: shaoping
Actual: india:rupee::denmark:krone, pred: shaun
Actual: usa:dollar::nigeria:naira, pred: pigeons
Actual: switzerland:swiss::spain:spanish, pred: erste
Actual: thailand:thai::india:indian, pred: vayalar
Actual: sweden:swedish::netherlands:dutch, pred: chovanec
Actual: russia:russian::germany:german, pred: back
Actual: portugal:portuguese::slovakia:slovakian, pred: thunderstorm
Actual: poland:polish::italy:italian


 19%|██████▏                          | 22854/122310 [52:53<68:27:28,  2.48s/it][A

Actual: india:rupee::denmark:krone, pred: shaun
Accuracy is 0.005917159763313609



 19%|██████▏                          | 22865/122310 [52:54<45:36:49,  1.65s/it][A
 19%|██████▏                          | 22873/122310 [52:54<34:17:59,  1.24s/it][A
 19%|██████▏                          | 22887/122310 [52:55<21:16:35,  1.30it/s][A
 19%|██████▏                          | 22899/122310 [52:56<14:52:41,  1.86it/s][A
 19%|██████▎                           | 22914/122310 [52:57<9:53:50,  2.79it/s][A
 19%|██████▎                           | 22923/122310 [52:57<8:04:07,  3.42it/s][A
 19%|██████▍                           | 22934/122310 [52:58<6:13:46,  4.43it/s][A
 19%|██████▍                           | 22940/122310 [52:59<5:42:01,  4.84it/s][A
 19%|██████▍                           | 22951/122310 [53:00<4:25:56,  6.23it/s][A
 19%|██████▍                           | 22958/122310 [53:00<4:05:03,  6.76it/s][A
 19%|██████▍                           | 22968/122310 [53:01<3:25:47,  8.05it/s][A
 19%|██████▍                           | 22977/122310 [53:02<3:06:06,  8.90

step: 2820, loss: 92.3297846476041, epoch: 0



 19%|██████▍                           | 23044/122310 [53:09<2:17:42, 12.01it/s][A
 19%|██████▍                           | 23053/122310 [53:09<2:16:57, 12.08it/s][A
 19%|██████▍                           | 23060/122310 [53:10<2:26:44, 11.27it/s][A
 19%|██████▍                           | 23064/122310 [53:11<2:52:34,  9.58it/s][A
 19%|██████▍                           | 23073/122310 [53:11<2:39:34, 10.36it/s][A
 19%|██████▍                           | 23083/122310 [53:12<2:25:56, 11.33it/s][A
 19%|██████▍                           | 23088/122310 [53:13<2:45:45,  9.98it/s][A
 19%|██████▍                           | 23105/122310 [53:14<1:59:07, 13.88it/s][A
 19%|██████▍                           | 23108/122310 [53:15<3:18:06,  8.35it/s][A
 19%|██████▍                           | 23115/122310 [53:16<3:11:45,  8.62it/s][A
 19%|██████▍                           | 23126/122310 [53:17<2:41:32, 10.23it/s][A
 19%|██████▍                           | 23131/122310 [53:17<2:57:27,  9.31

step: 2840, loss: 130.74906541446788, epoch: 0



 19%|██████▍                           | 23193/122310 [53:23<2:58:47,  9.24it/s][A
 19%|██████▍                           | 23209/122310 [53:24<2:07:22, 12.97it/s][A
 19%|██████▍                           | 23220/122310 [53:25<2:01:55, 13.54it/s][A
 19%|██████▍                           | 23226/122310 [53:26<2:18:28, 11.93it/s][A
 19%|██████▍                           | 23243/122310 [53:26<1:50:31, 14.94it/s][A
 19%|██████▍                           | 23251/122310 [53:27<2:02:42, 13.46it/s][A
 19%|██████▍                           | 23253/122310 [53:28<2:41:54, 10.20it/s][A
 19%|██████▍                           | 23260/122310 [53:29<2:47:47,  9.84it/s][A
 19%|██████▍                           | 23266/122310 [53:29<2:59:45,  9.18it/s][A
 19%|██████▍                           | 23272/122310 [53:30<3:08:43,  8.75it/s][A
 19%|██████▍                           | 23278/122310 [53:31<3:15:26,  8.45it/s][A
 19%|██████▍                           | 23285/122310 [53:32<3:11:58,  8.60

step: 2860, loss: 90.10012875402518, epoch: 0



 19%|██████▍                           | 23373/122310 [53:39<2:03:27, 13.36it/s][A
 19%|██████▍                           | 23380/122310 [53:40<2:16:48, 12.05it/s][A
 19%|██████▌                           | 23383/122310 [53:40<2:51:13,  9.63it/s][A
 19%|██████▌                           | 23391/122310 [53:41<2:47:27,  9.84it/s][A
 19%|██████▌                           | 23393/122310 [53:42<3:35:02,  7.67it/s][A
 19%|██████▌                           | 23395/122310 [53:43<4:28:11,  6.15it/s][A
 19%|██████▌                           | 23408/122310 [53:43<2:56:25,  9.34it/s][A
 19%|██████▌                           | 23415/122310 [53:44<2:58:08,  9.25it/s][A
 19%|██████▌                           | 23425/122310 [53:45<2:39:26, 10.34it/s][A
 19%|██████▌                           | 23431/122310 [53:46<2:52:32,  9.55it/s][A
 19%|██████▌                           | 23441/122310 [53:47<2:36:10, 10.55it/s][A
 19%|██████▌                           | 23445/122310 [53:47<3:04:17,  8.94

step: 2880, loss: 115.24635554497945, epoch: 0



 19%|██████▌                           | 23512/122310 [53:54<2:51:21,  9.61it/s][A
 19%|██████▌                           | 23519/122310 [53:55<2:54:51,  9.42it/s][A
 19%|██████▌                           | 23527/122310 [53:56<2:49:42,  9.70it/s][A
 19%|██████▌                           | 23533/122310 [53:57<3:01:12,  9.09it/s][A
 19%|██████▌                           | 23540/122310 [53:57<3:01:34,  9.07it/s][A
 19%|██████▌                           | 23553/122310 [53:58<2:24:51, 11.36it/s][A
 19%|██████▌                           | 23562/122310 [53:59<2:23:40, 11.45it/s][A
 19%|██████▌                           | 23574/122310 [54:00<2:09:47, 12.68it/s][A
 19%|██████▌                           | 23580/122310 [54:00<2:27:10, 11.18it/s][A
 19%|██████▌                           | 23589/122310 [54:01<2:25:44, 11.29it/s][A
 19%|██████▌                           | 23595/122310 [54:02<2:40:26, 10.25it/s][A
 19%|██████▌                           | 23613/122310 [54:03<1:56:12, 14.16

step: 2900, loss: 116.25596325857173, epoch: 0



 19%|██████▌                           | 23694/122310 [54:10<2:50:44,  9.63it/s][A
 19%|██████▌                           | 23706/122310 [54:11<2:24:17, 11.39it/s][A
 19%|██████▌                           | 23721/122310 [54:11<1:59:21, 13.77it/s][A
 19%|██████▌                           | 23730/122310 [54:12<2:05:01, 13.14it/s][A
 19%|██████▌                           | 23735/122310 [54:13<2:27:45, 11.12it/s][A
 19%|██████▌                           | 23739/122310 [54:14<2:55:18,  9.37it/s][A
 19%|██████▌                           | 23747/122310 [54:14<2:49:53,  9.67it/s][A
 19%|██████▌                           | 23761/122310 [54:15<2:14:48, 12.18it/s][A
 19%|██████▌                           | 23778/122310 [54:16<1:48:47, 15.10it/s][A
 19%|██████▌                           | 23786/122310 [54:17<1:59:58, 13.69it/s][A
 19%|██████▌                           | 23793/122310 [54:18<2:13:37, 12.29it/s][A
 19%|██████▌                           | 23797/122310 [54:18<2:42:03, 10.13

step: 2920, loss: 111.3353202665368, epoch: 0



 20%|██████▋                           | 23855/122310 [54:25<3:01:12,  9.06it/s][A
 20%|██████▋                           | 23869/122310 [54:26<2:19:28, 11.76it/s][A
 20%|██████▋                           | 23880/122310 [54:27<2:11:12, 12.50it/s][A
 20%|██████▋                           | 23884/122310 [54:28<2:39:33, 10.28it/s][A
 20%|██████▋                           | 23891/122310 [54:28<2:44:54,  9.95it/s][A
 20%|██████▋                           | 23899/122310 [54:29<2:43:51, 10.01it/s][A
 20%|██████▋                           | 23909/122310 [54:30<2:30:04, 10.93it/s][A
 20%|██████▋                           | 23917/122310 [54:31<2:32:28, 10.76it/s][A
 20%|██████▋                           | 23930/122310 [54:31<2:10:48, 12.54it/s][A
 20%|██████▋                           | 23938/122310 [54:32<2:18:00, 11.88it/s][A
 20%|██████▋                           | 23948/122310 [54:33<2:14:32, 12.18it/s][A
 20%|██████▋                           | 23959/122310 [54:34<2:08:10, 12.79

step: 2940, loss: 114.06231275308153, epoch: 0



 20%|██████▋                           | 24020/122310 [54:41<3:45:54,  7.25it/s][A
 20%|██████▋                           | 24032/122310 [54:42<2:48:13,  9.74it/s][A
 20%|██████▋                           | 24039/122310 [54:42<2:52:01,  9.52it/s][A
 20%|██████▋                           | 24045/122310 [54:43<3:02:03,  9.00it/s][A
 20%|██████▋                           | 24052/122310 [54:44<3:01:37,  9.02it/s][A
 20%|██████▋                           | 24066/122310 [54:45<2:19:35, 11.73it/s][A
 20%|██████▋                           | 24071/122310 [54:45<2:41:07, 10.16it/s][A
 20%|██████▋                           | 24083/122310 [54:46<2:19:31, 11.73it/s][A
 20%|██████▋                           | 24090/122310 [54:47<2:29:44, 10.93it/s][A
 20%|██████▋                           | 24100/122310 [54:48<2:22:42, 11.47it/s][A
 20%|██████▋                           | 24103/122310 [54:49<2:58:06,  9.19it/s][A
 20%|██████▋                           | 24114/122310 [54:49<2:32:56, 10.70

step: 2960, loss: 107.22905300159283, epoch: 0



 20%|██████▋                           | 24203/122310 [54:56<2:27:36, 11.08it/s][A
 20%|██████▋                           | 24210/122310 [54:57<2:34:18, 10.60it/s][A
 20%|██████▋                           | 24214/122310 [54:58<3:00:54,  9.04it/s][A
 20%|██████▋                           | 24224/122310 [54:58<2:37:36, 10.37it/s][A
 20%|██████▋                           | 24227/122310 [54:59<3:11:31,  8.54it/s][A
 20%|██████▋                           | 24235/122310 [55:00<2:56:52,  9.24it/s][A
 20%|██████▋                           | 24244/122310 [55:01<2:41:23, 10.13it/s][A
 20%|██████▋                           | 24250/122310 [55:01<2:51:54,  9.51it/s][A
 20%|██████▋                           | 24258/122310 [55:02<2:44:19,  9.94it/s][A
 20%|██████▋                           | 24260/122310 [55:03<3:31:02,  7.74it/s][A
 20%|██████▋                           | 24263/122310 [55:04<4:04:41,  6.68it/s][A
 20%|██████▋                           | 24270/122310 [55:04<3:37:04,  7.53

step: 2980, loss: 103.07800391862612, epoch: 0



 20%|██████▊                           | 24353/122310 [55:11<2:04:08, 13.15it/s][A
 20%|██████▊                           | 24360/122310 [55:12<2:15:20, 12.06it/s][A
 20%|██████▊                           | 24379/122310 [55:12<1:40:49, 16.19it/s][A
 20%|██████▊                           | 24385/122310 [55:13<1:58:31, 13.77it/s][A
 20%|██████▊                           | 24394/122310 [55:14<2:02:44, 13.29it/s][A
 20%|██████▊                           | 24398/122310 [55:15<2:29:47, 10.89it/s][A
 20%|██████▊                           | 24406/122310 [55:15<2:29:17, 10.93it/s][A
 20%|██████▊                           | 24413/122310 [55:16<2:35:10, 10.51it/s][A
 20%|██████▊                           | 24417/122310 [55:17<3:01:43,  8.98it/s][A
 20%|██████▊                           | 24425/122310 [55:18<2:50:52,  9.55it/s][A
 20%|██████▊                           | 24439/122310 [55:18<2:11:56, 12.36it/s][A
 20%|██████▊                           | 24452/122310 [55:19<1:57:20, 13.90

step: 3000, loss: 101.16808005529377, epoch: 0
saving weights



 20%|██████▌                          | 24490/122310 [55:35<16:59:46,  1.60it/s][A
 20%|██████▌                          | 24497/122310 [55:35<12:42:22,  2.14it/s][A
 20%|██████▌                          | 24503/122310 [55:36<10:09:10,  2.68it/s][A
 20%|██████▊                           | 24507/122310 [55:37<9:07:07,  2.98it/s][A
 20%|██████▊                           | 24512/122310 [55:38<7:45:14,  3.50it/s][A
 20%|██████▊                           | 24523/122310 [55:38<5:02:43,  5.38it/s][A
 20%|██████▊                           | 24533/122310 [55:39<3:55:24,  6.92it/s][A
 20%|██████▊                           | 24553/122310 [55:40<2:25:02, 11.23it/s][A
 20%|██████▊                           | 24560/122310 [55:41<2:30:44, 10.81it/s][A

step: 3020, loss: 115.9437886687076, epoch: 0



 20%|██████▊                           | 24565/122310 [55:41<2:46:13,  9.80it/s][A
 20%|██████▊                           | 24577/122310 [55:42<2:22:01, 11.47it/s][A
 20%|██████▊                           | 24583/122310 [55:43<2:33:46, 10.59it/s][A
 20%|██████▊                           | 24593/122310 [55:43<2:22:22, 11.44it/s][A
 20%|██████▊                           | 24600/122310 [55:44<2:29:44, 10.88it/s][A
 20%|██████▊                           | 24611/122310 [55:45<2:14:59, 12.06it/s][A
 20%|██████▊                           | 24619/122310 [55:46<2:19:06, 11.70it/s][A
 20%|██████▊                           | 24630/122310 [55:46<2:08:25, 12.68it/s][A
 20%|██████▊                           | 24637/122310 [55:47<2:18:55, 11.72it/s][A
 20%|██████▊                           | 24640/122310 [55:48<2:52:23,  9.44it/s][A
 20%|██████▊                           | 24648/122310 [55:49<2:45:07,  9.86it/s][A
 20%|██████▊                           | 24650/122310 [55:50<4:35:50,  5.90

step: 3040, loss: 130.9172043705386, epoch: 0



 20%|██████▊                           | 24717/122310 [55:56<3:19:10,  8.17it/s][A
 20%|██████▊                           | 24729/122310 [55:57<2:34:22, 10.53it/s][A
 20%|██████▉                           | 24735/122310 [55:58<2:46:19,  9.78it/s][A
 20%|██████▉                           | 24740/122310 [55:58<3:02:46,  8.90it/s][A
 20%|██████▉                           | 24746/122310 [55:59<3:07:32,  8.67it/s][A
 20%|██████▉                           | 24759/122310 [56:00<2:22:57, 11.37it/s][A
 20%|██████▉                           | 24772/122310 [56:00<2:02:35, 13.26it/s][A
 20%|██████▉                           | 24782/122310 [56:01<2:01:56, 13.33it/s][A
 20%|██████▉                           | 24793/122310 [56:02<1:57:41, 13.81it/s][A
 20%|██████▉                           | 24805/122310 [56:03<1:51:53, 14.52it/s][A
 20%|██████▉                           | 24814/122310 [56:03<1:57:23, 13.84it/s][A
 20%|██████▉                           | 24819/122310 [56:04<2:18:45, 11.71

step: 3060, loss: 104.00285084561078, epoch: 0



 20%|██████▉                           | 24888/122310 [56:11<2:39:15, 10.20it/s][A
 20%|██████▉                           | 24902/122310 [56:12<2:06:39, 12.82it/s][A
 20%|██████▉                           | 24912/122310 [56:12<2:04:41, 13.02it/s][A
 20%|██████▉                           | 24920/122310 [56:13<2:11:10, 12.37it/s][A
 20%|██████▉                           | 24929/122310 [56:14<2:11:27, 12.35it/s][A
 20%|██████▉                           | 24932/122310 [56:14<2:44:21,  9.87it/s][A
 20%|██████▉                           | 24940/122310 [56:15<2:39:40, 10.16it/s][A
 20%|██████▉                           | 24943/122310 [56:16<3:14:16,  8.35it/s][A
 20%|██████▉                           | 24948/122310 [56:17<3:26:32,  7.86it/s][A
 20%|██████▉                           | 24955/122310 [56:17<3:15:18,  8.31it/s][A
 20%|██████▉                           | 24959/122310 [56:18<3:37:48,  7.45it/s][A
 20%|██████▉                           | 24961/122310 [56:19<4:28:49,  6.04

step: 3080, loss: 112.35969602887424, epoch: 0



 20%|██████▉                           | 25036/122310 [56:26<2:35:51, 10.40it/s][A
 20%|██████▉                           | 25044/122310 [56:26<2:34:14, 10.51it/s][A
 20%|██████▉                           | 25053/122310 [56:27<2:26:31, 11.06it/s][A
 20%|██████▉                           | 25058/122310 [56:28<2:45:51,  9.77it/s][A
 20%|██████▉                           | 25068/122310 [56:30<3:59:24,  6.77it/s][A
 21%|██████▉                           | 25075/122310 [56:31<3:40:20,  7.35it/s][A
 21%|██████▉                           | 25080/122310 [56:31<3:44:56,  7.20it/s][A
 21%|██████▉                           | 25092/122310 [56:32<2:51:25,  9.45it/s][A
 21%|██████▉                           | 25096/122310 [56:33<3:12:47,  8.40it/s][A
 21%|██████▉                           | 25107/122310 [56:34<2:39:23, 10.16it/s][A
 21%|██████▉                           | 25115/122310 [56:34<2:36:00, 10.38it/s][A
 21%|██████▉                           | 25127/122310 [56:35<2:14:14, 12.07

step: 3100, loss: 115.84467747776543, epoch: 0



 21%|███████                           | 25192/122310 [56:40<2:05:08, 12.93it/s][A
 21%|███████                           | 25194/122310 [56:41<2:42:32,  9.96it/s][A
 21%|███████                           | 25205/122310 [56:42<2:21:00, 11.48it/s][A
 21%|███████                           | 25221/122310 [56:43<1:51:15, 14.54it/s][A
 21%|███████                           | 25229/122310 [56:43<2:00:15, 13.46it/s][A
 21%|███████                           | 25237/122310 [56:44<2:07:35, 12.68it/s][A
 21%|███████                           | 25245/122310 [56:45<2:13:25, 12.13it/s][A
 21%|███████                           | 25247/122310 [56:45<2:53:50,  9.31it/s][A
 21%|███████                           | 25257/122310 [56:46<2:32:59, 10.57it/s][A
 21%|███████                           | 25263/122310 [56:47<2:44:46,  9.82it/s][A
 21%|███████                           | 25269/122310 [56:48<2:53:15,  9.33it/s][A
 21%|███████                           | 25273/122310 [56:49<4:18:52,  6.25

step: 3120, loss: 98.45129280822134, epoch: 0



 21%|███████                           | 25336/122310 [56:55<2:16:53, 11.81it/s][A
 21%|███████                           | 25344/122310 [56:56<2:20:21, 11.51it/s][A
 21%|███████                           | 25354/122310 [56:57<2:13:01, 12.15it/s][A
 21%|███████                           | 25362/122310 [56:57<2:17:26, 11.76it/s][A
 21%|███████                           | 25379/122310 [56:58<1:46:43, 15.14it/s][A
 21%|███████                           | 25398/122310 [56:59<1:28:10, 18.32it/s][A
 21%|███████                           | 25403/122310 [57:00<1:48:31, 14.88it/s][A
 21%|███████                           | 25414/122310 [57:00<1:48:14, 14.92it/s][A
 21%|███████                           | 25429/122310 [57:01<1:37:53, 16.49it/s][A
 21%|███████                           | 25436/122310 [57:02<1:52:10, 14.39it/s][A
 21%|███████                           | 25443/122310 [57:02<2:04:30, 12.97it/s][A
 21%|███████                           | 25449/122310 [57:03<2:20:23, 11.50

step: 3140, loss: 107.95158914828335, epoch: 0



 21%|███████                           | 25518/122310 [57:10<2:54:22,  9.25it/s][A
 21%|███████                           | 25527/122310 [57:11<2:39:20, 10.12it/s][A
 21%|███████                           | 25540/122310 [57:11<2:10:18, 12.38it/s][A
 21%|███████                           | 25551/122310 [57:12<2:02:53, 13.12it/s][A
 21%|███████                           | 25566/122310 [57:13<1:45:24, 15.30it/s][A
 21%|███████                           | 25576/122310 [57:14<1:49:28, 14.73it/s][A
 21%|███████                           | 25581/122310 [57:14<2:10:27, 12.36it/s][A
 21%|███████                           | 25590/122310 [57:15<2:10:26, 12.36it/s][A
 21%|███████                           | 25594/122310 [57:16<2:37:14, 10.25it/s][A
 21%|███████                           | 25598/122310 [57:17<3:02:43,  8.82it/s][A
 21%|███████                           | 25610/122310 [57:17<2:23:05, 11.26it/s][A
 21%|███████                           | 25620/122310 [57:18<2:12:20, 12.18

step: 3160, loss: 103.97479924032108, epoch: 0



 21%|███████▏                          | 25688/122310 [57:24<2:23:18, 11.24it/s][A
 21%|███████▏                          | 25700/122310 [57:25<2:03:43, 13.01it/s][A
 21%|███████▏                          | 25709/122310 [57:26<2:04:00, 12.98it/s][A
 21%|███████▏                          | 25719/122310 [57:26<2:00:14, 13.39it/s][A
 21%|███████▏                          | 25729/122310 [57:27<1:57:34, 13.69it/s][A
 21%|███████▏                          | 25743/122310 [57:28<1:43:05, 15.61it/s][A
 21%|███████▏                          | 25766/122310 [57:28<1:17:20, 20.81it/s][A
 21%|███████▏                          | 25770/122310 [57:29<1:38:37, 16.32it/s][A
 21%|███████▏                          | 25777/122310 [57:30<1:51:55, 14.37it/s][A
 21%|███████▏                          | 25790/122310 [57:30<1:42:33, 15.69it/s][A
 21%|███████▏                          | 25797/122310 [57:31<1:55:20, 13.95it/s][A
 21%|███████▏                          | 25802/122310 [57:32<2:15:14, 11.89

step: 3180, loss: 125.77355295563044, epoch: 0



 21%|███████▏                          | 25869/122310 [57:38<2:30:48, 10.66it/s][A
 21%|███████▏                          | 25876/122310 [57:39<2:33:27, 10.47it/s][A
 21%|███████▏                          | 25882/122310 [57:40<2:42:18,  9.90it/s][A
 21%|███████▏                          | 25889/122310 [57:40<2:41:29,  9.95it/s][A
 21%|███████▏                          | 25901/122310 [57:41<2:13:06, 12.07it/s][A
 21%|███████▏                          | 25905/122310 [57:42<2:37:52, 10.18it/s][A
 21%|███████▏                          | 25922/122310 [57:42<1:51:32, 14.40it/s][A
 21%|███████▏                          | 25924/122310 [57:43<2:26:59, 10.93it/s][A
 21%|███████▏                          | 25928/122310 [57:44<2:50:56,  9.40it/s][A
 21%|███████▏                          | 25930/122310 [57:44<3:36:19,  7.43it/s][A
 21%|███████▏                          | 25933/122310 [57:45<4:07:20,  6.49it/s][A
 21%|███████▏                          | 25940/122310 [57:46<3:34:03,  7.50

step: 3200, loss: 112.7175997588443, epoch: 0
sim1 and sim2 are 0.6201217503882138, 0.06041767949165839
cosine of pred and queen: 0.034010464334337846
Actual: athens:greece::madrid:spain, pred: week
Actual: bangkok:thailand::islamabad:pakistan, pred: rather
Actual: beijing:china::tokyo:japan, pred: coolheaded
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: near
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: year
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: dorpang
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: blaze
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: baleine
Actual: ma


 21%|███████▏                          | 26007/122310 [58:10<2:00:16, 13.34it/s][A

Actual: japan:japanese::australia:australian, pred: flaky
Actual: italy:italian::ireland:irish, pred: grameen
Actual: croatia:croatian::france:french, pred: new
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.044444444444444446
Actual: walk:walks::vanish:vanishes, pred: comte
Actual: work:works::generate:generates, pred: conventionally
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: thy
Actual: sing:sings::shuffle:shuffles, pred: vikassheel
Actual: sit:sits::go:goes, pred: connie
Actual: say:says::provide:provides, pred: wonderer
Actual: scream:screams::sing:sings, pred: riga
Actual: play:plays::listen:listens, pred: documentation
Actual: predict:predicts::search:searches, pred: achish
Actual: machine:machines::lion:lions, pred: thamer
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes, pred: ibbotsons
Actual: melon:melons::pig:pigs, pred: illustrative
Actual: hand:hands::goat:goats, pred:


 21%|███████                          | 26014/122310 [59:10<74:48:27,  2.80s/it][A

Actual: india:rupee::denmark:krone, pred: pernicious
Accuracy is 0.011834319526627219



 21%|███████                          | 26020/122310 [59:11<58:26:39,  2.19s/it][A
 21%|███████                          | 26026/122310 [59:11<44:49:59,  1.68s/it][A
 21%|███████                          | 26038/122310 [59:12<26:58:54,  1.01s/it][A
 21%|███████                          | 26054/122310 [59:13<15:38:48,  1.71it/s][A
 21%|███████                          | 26061/122310 [59:14<12:52:30,  2.08it/s][A
 21%|███████                          | 26067/122310 [59:14<10:52:13,  2.46it/s][A
 21%|███████▏                          | 26080/122310 [59:15<7:11:33,  3.72it/s][A
 21%|███████▎                          | 26095/122310 [59:16<4:53:32,  5.46it/s][A
 21%|███████▎                          | 26112/122310 [59:17<3:26:28,  7.77it/s][A
 21%|███████▎                          | 26124/122310 [59:17<2:56:04,  9.10it/s][A
 21%|███████▎                          | 26132/122310 [59:18<2:50:01,  9.43it/s][A
 21%|███████▎                          | 26139/122310 [59:19<2:50:40,  9.39

step: 3220, loss: 105.90049754344517, epoch: 0



 21%|███████▎                          | 26202/122310 [59:26<3:50:34,  6.95it/s][A
 21%|███████▎                          | 26210/122310 [59:27<3:21:35,  7.94it/s][A
 21%|███████▎                          | 26211/122310 [59:27<4:14:58,  6.28it/s][A
 21%|███████▎                          | 26217/122310 [59:28<3:53:50,  6.85it/s][A
 21%|███████▎                          | 26225/122310 [59:29<3:19:43,  8.02it/s][A
 21%|███████▎                          | 26237/122310 [59:30<2:37:10, 10.19it/s][A
 21%|███████▎                          | 26245/122310 [59:30<2:36:36, 10.22it/s][A
 21%|███████▎                          | 26250/122310 [59:31<2:52:54,  9.26it/s][A
 21%|███████▎                          | 26266/122310 [59:32<2:03:11, 12.99it/s][A
 21%|███████▎                          | 26275/122310 [59:33<2:04:54, 12.81it/s][A
 21%|███████▎                          | 26286/122310 [59:33<1:58:38, 13.49it/s][A
 21%|███████▎                          | 26288/122310 [59:34<2:34:52, 10.33

step: 3240, loss: 109.81295627839704, epoch: 0



 22%|███████▎                          | 26361/122310 [59:40<2:02:10, 13.09it/s][A
 22%|███████▎                          | 26376/122310 [59:41<1:43:48, 15.40it/s][A
 22%|███████▎                          | 26387/122310 [59:41<1:44:17, 15.33it/s][A
 22%|███████▎                          | 26400/122310 [59:42<1:39:03, 16.14it/s][A
 22%|███████▎                          | 26404/122310 [59:43<2:03:38, 12.93it/s][A
 22%|███████▎                          | 26415/122310 [59:44<1:57:12, 13.64it/s][A
 22%|███████▎                          | 26419/122310 [59:44<2:23:08, 11.17it/s][A
 22%|███████▎                          | 26425/122310 [59:45<2:35:01, 10.31it/s][A
 22%|███████▎                          | 26434/122310 [59:46<2:26:13, 10.93it/s][A
 22%|███████▎                          | 26444/122310 [59:46<2:15:58, 11.75it/s][A
 22%|███████▎                          | 26453/122310 [59:47<2:13:08, 12.00it/s][A
 22%|███████▎                          | 26462/122310 [59:48<2:11:42, 12.13

step: 3260, loss: 103.3898156565009, epoch: 0



 22%|███████▍                          | 26545/122310 [59:54<1:52:39, 14.17it/s][A
 22%|███████▍                          | 26548/122310 [59:55<2:21:14, 11.30it/s][A
 22%|███████▍                          | 26556/122310 [59:56<2:22:01, 11.24it/s][A
 22%|███████▍                          | 26564/122310 [59:57<2:23:14, 11.14it/s][A
 22%|███████▍                          | 26569/122310 [59:57<2:40:56,  9.92it/s][A
 22%|███████▍                          | 26580/122310 [59:58<2:19:06, 11.47it/s][A
 22%|███████▍                          | 26591/122310 [59:59<2:07:10, 12.54it/s][A
 22%|██████▉                         | 26594/122310 [1:00:00<2:38:21, 10.07it/s][A
 22%|██████▉                         | 26607/122310 [1:00:00<2:08:16, 12.44it/s][A
 22%|██████▉                         | 26611/122310 [1:00:01<2:34:46, 10.31it/s][A
 22%|██████▉                         | 26622/122310 [1:00:02<2:15:44, 11.75it/s][A
 22%|██████▉                         | 26634/122310 [1:00:02<2:01:04, 13.17

step: 3280, loss: 98.37174830072378, epoch: 0



 22%|██████▉                         | 26714/122310 [1:00:09<1:46:46, 14.92it/s][A
 22%|██████▉                         | 26717/122310 [1:00:10<2:15:44, 11.74it/s][A
 22%|██████▉                         | 26728/122310 [1:00:10<2:04:43, 12.77it/s][A
 22%|██████▉                         | 26735/122310 [1:00:11<2:14:36, 11.83it/s][A
 22%|██████▉                         | 26747/122310 [1:00:12<2:00:22, 13.23it/s][A
 22%|███████                         | 26758/122310 [1:00:13<1:55:42, 13.76it/s][A
 22%|███████                         | 26766/122310 [1:00:13<2:03:22, 12.91it/s][A
 22%|███████                         | 26774/122310 [1:00:14<2:08:54, 12.35it/s][A
 22%|███████                         | 26784/122310 [1:00:15<2:04:42, 12.77it/s][A
 22%|███████                         | 26795/122310 [1:00:16<1:58:33, 13.43it/s][A
 22%|███████                         | 26798/122310 [1:00:16<2:28:56, 10.69it/s][A
 22%|███████                         | 26815/122310 [1:00:17<1:49:51, 14.49

step: 3300, loss: 105.24661027740855, epoch: 0



 22%|███████                         | 26891/122310 [1:00:24<2:31:21, 10.51it/s][A
 22%|███████                         | 26903/122310 [1:00:24<2:09:14, 12.30it/s][A
 22%|███████                         | 26914/122310 [1:00:25<2:00:46, 13.16it/s][A
 22%|███████                         | 26921/122310 [1:00:26<2:11:29, 12.09it/s][A
 22%|███████                         | 26929/122310 [1:00:26<2:15:02, 11.77it/s][A
 22%|███████                         | 26942/122310 [1:00:27<1:56:44, 13.61it/s][A
 22%|███████                         | 26957/122310 [1:00:28<1:41:09, 15.71it/s][A
 22%|███████                         | 26963/122310 [1:00:29<1:57:42, 13.50it/s][A
 22%|███████                         | 26968/122310 [1:00:29<2:18:04, 11.51it/s][A
 22%|███████                         | 26979/122310 [1:00:30<2:06:34, 12.55it/s][A
 22%|███████                         | 26985/122310 [1:00:31<2:21:47, 11.20it/s][A
 22%|███████                         | 26996/122310 [1:00:32<2:08:57, 12.32

step: 3320, loss: 152.1349507697126, epoch: 0



 22%|███████                         | 27063/122310 [1:00:38<2:29:14, 10.64it/s][A
 22%|███████                         | 27072/122310 [1:00:39<2:22:17, 11.16it/s][A
 22%|███████                         | 27090/122310 [1:00:40<1:44:07, 15.24it/s][A
 22%|███████                         | 27096/122310 [1:00:40<2:00:50, 13.13it/s][A
 22%|███████                         | 27102/122310 [1:00:41<2:15:56, 11.67it/s][A
 22%|███████                         | 27111/122310 [1:00:42<2:14:01, 11.84it/s][A
 22%|███████                         | 27120/122310 [1:00:43<2:12:16, 11.99it/s][A
 22%|███████                         | 27125/122310 [1:00:43<2:31:23, 10.48it/s][A
 22%|███████                         | 27131/122310 [1:00:44<2:41:24,  9.83it/s][A
 22%|███████                         | 27144/122310 [1:00:45<2:09:34, 12.24it/s][A
 22%|███████                         | 27150/122310 [1:00:45<2:25:06, 10.93it/s][A
 22%|███████                         | 27158/122310 [1:00:46<2:24:52, 10.95

step: 3340, loss: 111.55986681246169, epoch: 0



 22%|███████                         | 27228/122310 [1:00:53<1:57:40, 13.47it/s][A
 22%|███████▏                        | 27235/122310 [1:00:54<2:08:38, 12.32it/s][A
 22%|███████▏                        | 27247/122310 [1:00:54<1:56:45, 13.57it/s][A
 22%|███████▏                        | 27255/122310 [1:00:55<2:04:15, 12.75it/s][A
 22%|███████▏                        | 27262/122310 [1:00:56<2:14:00, 11.82it/s][A
 22%|███████▏                        | 27272/122310 [1:00:56<2:07:57, 12.38it/s][A
 22%|███████▏                        | 27279/122310 [1:00:57<2:16:49, 11.58it/s][A
 22%|███████▏                        | 27285/122310 [1:00:58<2:30:42, 10.51it/s][A
 22%|███████▏                        | 27290/122310 [1:01:00<4:27:32,  5.92it/s][A
 22%|███████▏                        | 27305/122310 [1:01:01<2:55:32,  9.02it/s][A
 22%|███████▏                        | 27311/122310 [1:01:02<2:59:32,  8.82it/s][A
 22%|███████▏                        | 27317/122310 [1:01:02<3:02:28,  8.68

step: 3360, loss: 96.62095928576913, epoch: 0



 22%|███████▏                        | 27374/122310 [1:01:07<2:54:10,  9.08it/s][A
 22%|███████▏                        | 27383/122310 [1:01:08<2:38:34,  9.98it/s][A
 22%|███████▏                        | 27392/122310 [1:01:09<2:28:10, 10.68it/s][A
 22%|███████▏                        | 27399/122310 [1:01:10<2:33:26, 10.31it/s][A
 22%|███████▏                        | 27404/122310 [1:01:10<2:50:38,  9.27it/s][A
 22%|███████▏                        | 27412/122310 [1:01:11<2:42:12,  9.75it/s][A
 22%|███████▏                        | 27424/122310 [1:01:12<2:14:35, 11.75it/s][A
 22%|███████▏                        | 27433/122310 [1:01:13<2:12:43, 11.91it/s][A
 22%|███████▏                        | 27448/122310 [1:01:13<1:49:05, 14.49it/s][A
 22%|███████▏                        | 27451/122310 [1:01:14<2:18:55, 11.38it/s][A
 22%|███████▏                        | 27468/122310 [1:01:15<2:18:01, 11.45it/s][A
 22%|███████▏                        | 27471/122310 [1:01:16<2:42:25,  9.73

step: 3380, loss: 110.0119061642883, epoch: 0



 23%|███████▏                        | 27533/122310 [1:01:22<2:59:07,  8.82it/s][A
 23%|███████▏                        | 27549/122310 [1:01:23<2:02:07, 12.93it/s][A
 23%|███████▏                        | 27560/122310 [1:01:23<1:55:00, 13.73it/s][A
 23%|███████▏                        | 27569/122310 [1:01:24<1:57:37, 13.42it/s][A
 23%|███████▏                        | 27574/122310 [1:01:25<2:17:00, 11.52it/s][A
 23%|███████▏                        | 27576/122310 [1:01:25<2:56:13,  8.96it/s][A
 23%|███████▏                        | 27577/122310 [1:01:26<3:55:47,  6.70it/s][A
 23%|███████▏                        | 27586/122310 [1:01:27<3:05:30,  8.51it/s][A
 23%|███████▏                        | 27600/122310 [1:01:27<2:12:46, 11.89it/s][A
 23%|███████▏                        | 27607/122310 [1:01:28<2:19:58, 11.28it/s][A
 23%|███████▏                        | 27617/122310 [1:01:29<2:09:59, 12.14it/s][A
 23%|███████▏                        | 27625/122310 [1:01:30<2:12:36, 11.90

step: 3400, loss: 94.00633818966293, epoch: 0



 23%|███████▏                        | 27705/122310 [1:01:36<2:04:43, 12.64it/s][A
 23%|███████▎                        | 27713/122310 [1:01:37<2:08:53, 12.23it/s][A
 23%|███████▎                        | 27718/122310 [1:01:37<2:28:06, 10.64it/s][A
 23%|███████▎                        | 27720/122310 [1:01:38<3:08:52,  8.35it/s][A
 23%|███████▎                        | 27729/122310 [1:01:39<2:43:37,  9.63it/s][A
 23%|███████▎                        | 27737/122310 [1:01:40<2:35:29, 10.14it/s][A
 23%|███████▎                        | 27743/122310 [1:01:41<3:31:42,  7.44it/s][A
 23%|███████▎                        | 27751/122310 [1:01:43<4:40:18,  5.62it/s][A
 23%|███████▎                        | 27758/122310 [1:01:44<4:04:03,  6.46it/s][A
 23%|███████▎                        | 27769/122310 [1:01:44<3:07:08,  8.42it/s][A
 23%|███████▎                        | 27784/122310 [1:01:45<2:17:45, 11.44it/s][A
 23%|███████▎                        | 27796/122310 [1:01:46<2:03:09, 12.79

step: 3420, loss: 96.91558299727622, epoch: 0



 23%|███████▎                        | 27849/122310 [1:01:50<2:05:30, 12.54it/s][A
 23%|███████▎                        | 27857/122310 [1:01:51<2:08:56, 12.21it/s][A
 23%|███████▎                        | 27866/122310 [1:01:52<2:09:56, 12.11it/s][A
 23%|███████▎                        | 27870/122310 [1:01:52<2:37:08, 10.02it/s][A
 23%|███████▎                        | 27875/122310 [1:01:53<2:54:41,  9.01it/s][A
 23%|███████▎                        | 27886/122310 [1:01:54<2:24:32, 10.89it/s][A
 23%|███████▎                        | 27891/122310 [1:01:55<2:41:27,  9.75it/s][A
 23%|███████▎                        | 27904/122310 [1:01:55<2:07:56, 12.30it/s][A
 23%|███████▎                        | 27912/122310 [1:01:56<2:11:33, 11.96it/s][A
 23%|███████▎                        | 27919/122310 [1:01:57<2:19:40, 11.26it/s][A
 23%|███████▎                        | 27928/122310 [1:01:57<2:14:08, 11.73it/s][A
 23%|███████▎                        | 27936/122310 [1:01:58<2:16:48, 11.50

step: 3440, loss: 97.99686513382068, epoch: 0



 23%|███████▎                        | 28012/122310 [1:02:05<2:03:08, 12.76it/s][A
 23%|███████▎                        | 28019/122310 [1:02:05<2:12:00, 11.91it/s][A
 23%|███████▎                        | 28023/122310 [1:02:06<2:37:03, 10.01it/s][A
 23%|███████▎                        | 28029/122310 [1:02:07<3:34:04,  7.34it/s][A
 23%|███████▎                        | 28032/122310 [1:02:08<3:58:32,  6.59it/s][A
 23%|███████▎                        | 28044/122310 [1:02:09<2:49:06,  9.29it/s][A
 23%|███████▎                        | 28049/122310 [1:02:09<3:01:33,  8.65it/s][A
 23%|███████▎                        | 28057/122310 [1:02:10<2:47:29,  9.38it/s][A
 23%|███████▎                        | 28069/122310 [1:02:11<2:16:26, 11.51it/s][A
 23%|███████▎                        | 28076/122310 [1:02:12<2:22:21, 11.03it/s][A
 23%|███████▎                        | 28085/122310 [1:02:12<2:16:47, 11.48it/s][A
 23%|███████▎                        | 28091/122310 [1:02:13<2:28:30, 10.57

step: 3460, loss: 105.52910614229535, epoch: 0



 23%|███████▎                        | 28147/122310 [1:02:19<2:44:11,  9.56it/s][A
 23%|███████▎                        | 28159/122310 [1:02:19<2:14:04, 11.70it/s][A
 23%|███████▎                        | 28162/122310 [1:02:20<2:45:25,  9.49it/s][A
 23%|███████▎                        | 28169/122310 [1:02:21<2:44:04,  9.56it/s][A
 23%|███████▎                        | 28179/122310 [1:02:22<2:24:07, 10.89it/s][A
 23%|███████▍                        | 28189/122310 [1:02:22<2:12:22, 11.85it/s][A
 23%|███████▍                        | 28195/122310 [1:02:23<2:25:06, 10.81it/s][A
 23%|███████▍                        | 28203/122310 [1:02:24<2:23:17, 10.95it/s][A
 23%|███████▍                        | 28213/122310 [1:02:24<2:11:59, 11.88it/s][A
 23%|███████▍                        | 28226/122310 [1:02:25<1:53:34, 13.81it/s][A
 23%|███████▍                        | 28237/122310 [1:02:26<1:49:51, 14.27it/s][A
 23%|███████▍                        | 28259/122310 [1:02:27<1:21:26, 19.25

step: 3480, loss: 92.7419651724313, epoch: 0



 23%|███████▍                        | 28333/122310 [1:02:33<2:12:54, 11.79it/s][A
 23%|███████▍                        | 28344/122310 [1:02:34<2:01:42, 12.87it/s][A
 23%|███████▍                        | 28348/122310 [1:02:34<2:26:50, 10.66it/s][A
 23%|███████▍                        | 28356/122310 [1:02:35<2:23:58, 10.88it/s][A
 23%|███████▍                        | 28369/122310 [1:02:36<1:59:58, 13.05it/s][A
 23%|███████▍                        | 28385/122310 [1:02:37<1:39:29, 15.73it/s][A
 23%|███████▍                        | 28388/122310 [1:02:37<2:07:11, 12.31it/s][A
 23%|███████▍                        | 28394/122310 [1:02:38<2:20:28, 11.14it/s][A
 23%|███████▍                        | 28402/122310 [1:02:39<2:21:10, 11.09it/s][A
 23%|███████▍                        | 28406/122310 [1:02:39<2:44:40,  9.50it/s][A
 23%|███████▍                        | 28412/122310 [1:02:40<2:50:37,  9.17it/s][A
 23%|███████▍                        | 28423/122310 [1:02:41<2:21:44, 11.04

step: 3500, loss: 83.817219081627, epoch: 0



 23%|███████▍                        | 28493/122310 [1:02:47<2:51:28,  9.12it/s][A
 23%|███████▍                        | 28498/122310 [1:02:48<3:04:47,  8.46it/s][A
 23%|███████▍                        | 28501/122310 [1:02:49<3:36:24,  7.22it/s][A
 23%|███████▍                        | 28513/122310 [1:02:50<2:34:35, 10.11it/s][A
 23%|███████▍                        | 28519/122310 [1:02:50<2:42:44,  9.60it/s][A
 23%|███████▍                        | 28530/122310 [1:02:51<2:17:44, 11.35it/s][A
 23%|███████▍                        | 28535/122310 [1:02:52<2:36:27,  9.99it/s][A
 23%|███████▍                        | 28543/122310 [1:02:52<2:30:45, 10.37it/s][A
 23%|███████▍                        | 28558/122310 [1:02:53<1:56:19, 13.43it/s][A
 23%|███████▍                        | 28562/122310 [1:02:54<2:22:13, 10.99it/s][A
 23%|███████▍                        | 28571/122310 [1:02:55<2:18:47, 11.26it/s][A
 23%|███████▍                        | 28580/122310 [1:02:55<2:16:18, 11.46

step: 3520, loss: 112.23166695125907, epoch: 0



 23%|███████▍                        | 28659/122310 [1:03:02<2:17:37, 11.34it/s][A
 23%|███████▍                        | 28662/122310 [1:03:03<2:44:02,  9.51it/s][A
 23%|███████▌                        | 28678/122310 [1:03:03<2:00:09, 12.99it/s][A
 23%|███████▌                        | 28688/122310 [1:03:04<1:57:50, 13.24it/s][A
 23%|███████▌                        | 28697/122310 [1:03:05<1:59:39, 13.04it/s][A
 23%|███████▌                        | 28702/122310 [1:03:06<2:17:36, 11.34it/s][A
 23%|███████▌                        | 28714/122310 [1:03:06<2:00:44, 12.92it/s][A
 23%|███████▌                        | 28725/122310 [1:03:07<1:54:29, 13.62it/s][A
 23%|███████▌                        | 28734/122310 [1:03:08<1:57:01, 13.33it/s][A
 23%|███████▌                        | 28739/122310 [1:03:08<2:16:07, 11.46it/s][A
 24%|███████▌                        | 28744/122310 [1:03:09<2:34:19, 10.11it/s][A
 24%|███████▌                        | 28752/122310 [1:03:10<2:30:01, 10.39

step: 3540, loss: 91.29229468434306, epoch: 0



 24%|███████▌                        | 28825/122310 [1:03:16<2:09:56, 11.99it/s][A
 24%|███████▌                        | 28839/122310 [1:03:17<1:48:53, 14.31it/s][A
 24%|███████▌                        | 28852/122310 [1:03:18<1:40:41, 15.47it/s][A
 24%|███████▌                        | 28859/122310 [1:03:18<1:53:11, 13.76it/s][A
 24%|███████▌                        | 28863/122310 [1:03:19<2:17:43, 11.31it/s][A
 24%|███████▌                        | 28873/122310 [1:03:20<2:09:19, 12.04it/s][A
 24%|███████▌                        | 28876/122310 [1:03:21<2:40:23,  9.71it/s][A
 24%|███████▌                        | 28888/122310 [1:03:21<2:11:15, 11.86it/s][A
 24%|███████▌                        | 28897/122310 [1:03:22<2:08:56, 12.08it/s][A
 24%|███████▌                        | 28913/122310 [1:03:23<1:42:40, 15.16it/s][A
 24%|███████▌                        | 28921/122310 [1:03:24<1:51:22, 13.97it/s][A
 24%|███████▌                        | 28932/122310 [1:03:24<1:47:51, 14.43

step: 3560, loss: 85.07533830732143, epoch: 0



 24%|███████▌                        | 28999/122310 [1:03:31<3:06:16,  8.35it/s][A
 24%|███████▌                        | 29007/122310 [1:03:31<2:50:54,  9.10it/s][A
 24%|███████▌                        | 29014/122310 [1:03:32<2:47:37,  9.28it/s][A
 24%|███████▌                        | 29018/122310 [1:03:33<3:10:53,  8.15it/s][A
 24%|███████▌                        | 29021/122310 [1:03:34<3:45:31,  6.89it/s][A
 24%|███████▌                        | 29024/122310 [1:03:34<4:19:30,  5.99it/s][A
 24%|███████▌                        | 29033/122310 [1:03:35<3:16:29,  7.91it/s][A
 24%|███████▌                        | 29041/122310 [1:03:36<2:57:02,  8.78it/s][A
 24%|███████▌                        | 29047/122310 [1:03:37<3:01:30,  8.56it/s][A
 24%|███████▌                        | 29050/122310 [1:03:37<3:36:17,  7.19it/s][A
 24%|███████▌                        | 29058/122310 [1:03:38<3:08:58,  8.22it/s][A
 24%|███████▌                        | 29061/122310 [1:03:39<3:43:38,  6.95

step: 3580, loss: 101.30131342618894, epoch: 0



 24%|███████▌                        | 29128/122310 [1:03:46<2:45:23,  9.39it/s][A
 24%|███████▌                        | 29138/122310 [1:03:47<2:27:14, 10.55it/s][A
 24%|███████▋                        | 29148/122310 [1:03:47<2:16:36, 11.37it/s][A
 24%|███████▋                        | 29162/122310 [1:03:48<1:55:06, 13.49it/s][A
 24%|███████▋                        | 29169/122310 [1:03:49<2:07:03, 12.22it/s][A
 24%|███████▋                        | 29175/122310 [1:03:50<2:20:45, 11.03it/s][A
 24%|███████▋                        | 29185/122310 [1:03:50<2:11:10, 11.83it/s][A
 24%|███████▋                        | 29188/122310 [1:03:51<2:43:51,  9.47it/s][A
 24%|███████▋                        | 29190/122310 [1:03:52<3:26:11,  7.53it/s][A
 24%|███████▋                        | 29204/122310 [1:03:52<2:21:15, 10.98it/s][A
 24%|███████▋                        | 29212/122310 [1:03:53<2:21:42, 10.95it/s][A
 24%|███████▋                        | 29218/122310 [1:03:54<2:33:24, 10.11

step: 3600, loss: 135.34573777421392, epoch: 0
sim1 and sim2 are 0.6669226363139764, 0.043603447988451345
cosine of pred and queen: 0.012116745333385567
Actual: athens:greece::madrid:spain, pred: week
Actual: bangkok:thailand::islamabad:pakistan, pred: last
Actual: beijing:china::tokyo:japan, pred: coolheaded
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: near
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: government
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: dorpang
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: greece
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: regularize



 24%|███████▋                        | 29284/122310 [1:04:11<2:16:21, 11.37it/s][A

Actual: india:delhi::serbia:belgrade, pred: confuse
Actual: spain:spanish::korea:korean, pred: run
Actual: syria:arabic::australia:english, pred: friday
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: steffen
Actual: argentina:peso::russia:ruble, pred: bank
Actual: armenia:dram::iran:rial, pred: sowest
Actual: brazil:real::sweden:krona, pred: cup
Actual: europe:euro::japan:yen, pred: year
Actual: india:rupee::denmark:krone, pred: nestorius
Actual: usa:dollar::nigeria:naira, pred: irreconcilable
Actual: switzerland:swiss::spain:spanish, pred: euro
Actual: thailand:thai::india:indian, pred: expresso
Actual: sweden:swedish::netherlands:dutch, pred: relatively
Actual: russia:russian::germany:german, pred: europe
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: least
Actual: norway:norwegian::mexico:mexican, pred: nought
Actual: japan:japanese::australia:australian, pred: keane
Actua


 24%|███████▍                       | 29289/122310 [1:05:16<83:24:27,  3.23s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.005917159763313609



 24%|███████▍                       | 29293/122310 [1:05:17<68:26:52,  2.65s/it][A
 24%|███████▍                       | 29307/122310 [1:05:18<35:45:40,  1.38s/it][A
 24%|███████▍                       | 29321/122310 [1:05:18<21:37:54,  1.19it/s][A
 24%|███████▍                       | 29327/122310 [1:05:19<17:54:24,  1.44it/s][A
 24%|███████▍                       | 29335/122310 [1:05:20<13:35:38,  1.90it/s][A
 24%|███████▍                       | 29344/122310 [1:05:20<10:03:06,  2.57it/s][A
 24%|███████▋                        | 29351/122310 [1:05:21<8:09:25,  3.17it/s][A
 24%|███████▋                        | 29354/122310 [1:05:22<7:53:13,  3.27it/s][A
 24%|███████▋                        | 29366/122310 [1:05:23<5:07:36,  5.04it/s][A
 24%|███████▋                        | 29373/122310 [1:05:23<4:28:12,  5.78it/s][A
 24%|███████▋                        | 29385/122310 [1:05:24<3:18:56,  7.78it/s][A
 24%|███████▋                        | 29398/122310 [1:05:25<2:35:55,  9.93

step: 3620, loss: 108.11284137941439, epoch: 0



 24%|███████▋                        | 29462/122310 [1:05:31<2:42:36,  9.52it/s][A
 24%|███████▋                        | 29474/122310 [1:05:32<2:17:51, 11.22it/s][A
 24%|███████▋                        | 29485/122310 [1:05:33<2:07:31, 12.13it/s][A
 24%|███████▋                        | 29497/122310 [1:05:34<1:56:29, 13.28it/s][A
 24%|███████▋                        | 29502/122310 [1:05:34<2:15:09, 11.45it/s][A
 24%|███████▋                        | 29506/122310 [1:05:35<2:38:00,  9.79it/s][A
 24%|███████▋                        | 29514/122310 [1:05:36<2:32:11, 10.16it/s][A
 24%|███████▋                        | 29520/122310 [1:05:37<2:42:16,  9.53it/s][A
 24%|███████▋                        | 29523/122310 [1:05:37<3:14:54,  7.93it/s][A
 24%|███████▋                        | 29535/122310 [1:05:38<2:28:26, 10.42it/s][A
 24%|███████▋                        | 29538/122310 [1:05:39<3:04:05,  8.40it/s][A
 24%|███████▋                        | 29558/122310 [1:05:40<1:51:47, 13.83

step: 3640, loss: 109.82656049604182, epoch: 0



 24%|███████▊                        | 29649/122310 [1:05:46<1:54:16, 13.51it/s][A
 24%|███████▊                        | 29655/122310 [1:05:47<2:09:07, 11.96it/s][A
 24%|███████▊                        | 29663/122310 [1:05:48<2:11:41, 11.73it/s][A
 24%|███████▊                        | 29677/122310 [1:05:49<1:50:31, 13.97it/s][A
 24%|███████▊                        | 29683/122310 [1:05:49<2:05:33, 12.30it/s][A
 24%|███████▊                        | 29692/122310 [1:05:50<2:05:14, 12.33it/s][A
 24%|███████▊                        | 29704/122310 [1:05:51<1:53:38, 13.58it/s][A
 24%|███████▊                        | 29714/122310 [1:05:51<1:53:25, 13.61it/s][A
 24%|███████▊                        | 29719/122310 [1:05:52<2:12:50, 11.62it/s][A
 24%|███████▊                        | 29735/122310 [1:05:53<1:44:51, 14.71it/s][A
 24%|███████▊                        | 29744/122310 [1:05:54<1:49:56, 14.03it/s][A
 24%|███████▊                        | 29749/122310 [1:05:54<2:09:32, 11.91

step: 3660, loss: 101.30568032151605, epoch: 0



 24%|███████▊                        | 29834/122310 [1:06:01<1:52:25, 13.71it/s][A
 24%|███████▊                        | 29841/122310 [1:06:02<2:03:20, 12.49it/s][A
 24%|███████▊                        | 29848/122310 [1:06:02<2:12:26, 11.63it/s][A
 24%|███████▊                        | 29859/122310 [1:06:03<2:01:09, 12.72it/s][A
 24%|███████▊                        | 29868/122310 [1:06:04<2:02:02, 12.62it/s][A
 24%|███████▊                        | 29879/122310 [1:06:05<1:55:03, 13.39it/s][A
 24%|███████▊                        | 29884/122310 [1:06:05<2:14:24, 11.46it/s][A
 24%|███████▊                        | 29895/122310 [1:06:06<2:02:55, 12.53it/s][A
 24%|███████▊                        | 29899/122310 [1:06:07<2:27:19, 10.45it/s][A
 24%|███████▊                        | 29905/122310 [1:06:07<2:37:18,  9.79it/s][A
 24%|███████▊                        | 29916/122310 [1:06:08<2:15:00, 11.41it/s][A
 24%|███████▊                        | 29928/122310 [1:06:09<1:59:28, 12.89

step: 3680, loss: 165.38800107920508, epoch: 0



 25%|███████▊                        | 30007/122310 [1:06:15<1:57:41, 13.07it/s][A
 25%|███████▊                        | 30015/122310 [1:06:16<2:04:33, 12.35it/s][A
 25%|███████▊                        | 30017/122310 [1:06:17<2:40:58,  9.56it/s][A
 25%|███████▊                        | 30022/122310 [1:06:18<2:55:30,  8.76it/s][A
 25%|███████▊                        | 30031/122310 [1:06:18<2:35:55,  9.86it/s][A
 25%|███████▊                        | 30037/122310 [1:06:19<2:44:11,  9.37it/s][A
 25%|███████▊                        | 30040/122310 [1:06:20<3:16:51,  7.81it/s][A
 25%|███████▊                        | 30050/122310 [1:06:21<2:39:59,  9.61it/s][A
 25%|███████▊                        | 30061/122310 [1:06:21<2:16:33, 11.26it/s][A
 25%|███████▊                        | 30068/122310 [1:06:22<2:22:38, 10.78it/s][A
 25%|███████▊                        | 30074/122310 [1:06:23<2:33:29, 10.02it/s][A
 25%|███████▊                        | 30080/122310 [1:06:23<2:41:55,  9.49

step: 3700, loss: 91.03359253501687, epoch: 0



 25%|███████▉                        | 30168/122310 [1:06:30<2:24:46, 10.61it/s][A
 25%|███████▉                        | 30172/122310 [1:06:31<2:48:54,  9.09it/s][A
 25%|███████▉                        | 30177/122310 [1:06:31<3:02:12,  8.43it/s][A
 25%|███████▉                        | 30188/122310 [1:06:32<2:27:11, 10.43it/s][A
 25%|███████▉                        | 30200/122310 [1:06:33<2:05:26, 12.24it/s][A
 25%|███████▉                        | 30207/122310 [1:06:34<2:13:41, 11.48it/s][A
 25%|███████▉                        | 30218/122310 [1:06:34<2:01:56, 12.59it/s][A
 25%|███████▉                        | 30233/122310 [1:06:35<1:42:25, 14.98it/s][A
 25%|███████▉                        | 30241/122310 [1:06:36<1:51:24, 13.77it/s][A
 25%|███████▉                        | 30245/122310 [1:06:37<2:16:35, 11.23it/s][A
 25%|███████▉                        | 30250/122310 [1:06:37<2:34:11,  9.95it/s][A
 25%|███████▉                        | 30257/122310 [1:06:38<2:36:11,  9.82

step: 3720, loss: 99.31217588049356, epoch: 0



 25%|███████▉                        | 30329/122310 [1:06:45<2:02:44, 12.49it/s][A
 25%|███████▉                        | 30338/122310 [1:06:45<2:03:10, 12.44it/s][A
 25%|███████▉                        | 30346/122310 [1:06:46<2:07:31, 12.02it/s][A
 25%|███████▉                        | 30351/122310 [1:06:47<2:25:55, 10.50it/s][A
 25%|███████▉                        | 30372/122310 [1:06:47<1:36:15, 15.92it/s][A
 25%|███████▉                        | 30376/122310 [1:06:48<1:59:29, 12.82it/s][A
 25%|███████▉                        | 30384/122310 [1:06:49<2:04:46, 12.28it/s][A
 25%|███████▉                        | 30396/122310 [1:06:50<1:52:59, 13.56it/s][A
 25%|███████▉                        | 30403/122310 [1:06:50<2:03:49, 12.37it/s][A
 25%|███████▉                        | 30417/122310 [1:06:51<1:46:08, 14.43it/s][A
 25%|███████▉                        | 30423/122310 [1:06:52<2:01:56, 12.56it/s][A
 25%|███████▉                        | 30429/122310 [1:06:53<2:16:17, 11.24

step: 3740, loss: 111.9405129283775, epoch: 0



 25%|███████▉                        | 30507/122310 [1:06:59<2:01:50, 12.56it/s][A
 25%|███████▉                        | 30518/122310 [1:07:00<1:54:54, 13.31it/s][A
 25%|███████▉                        | 30527/122310 [1:07:01<1:57:20, 13.04it/s][A
 25%|███████▉                        | 30543/122310 [1:07:01<1:37:05, 15.75it/s][A
 25%|███████▉                        | 30551/122310 [1:07:02<1:46:32, 14.35it/s][A
 25%|███████▉                        | 30556/122310 [1:07:03<2:05:59, 12.14it/s][A
 25%|███████▉                        | 30575/122310 [1:07:04<1:33:50, 16.29it/s][A
 25%|████████                        | 30579/122310 [1:07:04<1:56:36, 13.11it/s][A
 25%|████████                        | 30590/122310 [1:07:05<1:51:05, 13.76it/s][A
 25%|████████                        | 30607/122310 [1:07:06<1:31:38, 16.68it/s][A
 25%|████████                        | 30625/122310 [1:07:06<1:19:59, 19.10it/s][A
 25%|████████                        | 30639/122310 [1:07:07<1:19:44, 19.16

step: 3760, loss: 118.13758518065329, epoch: 0



 25%|████████                        | 30715/122310 [1:07:14<2:49:33,  9.00it/s][A
 25%|████████                        | 30722/122310 [1:07:14<2:45:53,  9.20it/s][A
 25%|████████                        | 30734/122310 [1:07:15<2:13:30, 11.43it/s][A
 25%|████████                        | 30740/122310 [1:07:16<2:25:38, 10.48it/s][A
 25%|████████                        | 30746/122310 [1:07:17<2:35:42,  9.80it/s][A
 25%|████████                        | 30752/122310 [1:07:17<2:43:00,  9.36it/s][A
 25%|████████                        | 30761/122310 [1:07:18<2:28:37, 10.27it/s][A
 25%|████████                        | 30772/122310 [1:07:19<2:09:50, 11.75it/s][A
 25%|████████                        | 30777/122310 [1:07:19<2:28:07, 10.30it/s][A
 25%|████████                        | 30791/122310 [1:07:20<1:57:38, 12.97it/s][A
 25%|████████                        | 30800/122310 [1:07:21<2:00:48, 12.62it/s][A
 25%|████████                        | 30812/122310 [1:07:22<1:51:34, 13.67

step: 3780, loss: 97.93229467302743, epoch: 0



 25%|████████                        | 30872/122310 [1:07:28<2:36:35,  9.73it/s][A
 25%|████████                        | 30876/122310 [1:07:29<2:59:33,  8.49it/s][A
 25%|████████                        | 30883/122310 [1:07:30<2:55:12,  8.70it/s][A
 25%|████████                        | 30892/122310 [1:07:31<2:38:22,  9.62it/s][A
 25%|████████                        | 30896/122310 [1:07:31<3:05:21,  8.22it/s][A
 25%|████████                        | 30915/122310 [1:07:32<1:55:21, 13.20it/s][A
 25%|████████                        | 30925/122310 [1:07:33<1:57:00, 13.02it/s][A
 25%|████████                        | 30933/122310 [1:07:34<2:04:32, 12.23it/s][A
 25%|████████                        | 30946/122310 [1:07:35<1:51:56, 13.60it/s][A
 25%|████████                        | 30959/122310 [1:07:35<1:45:56, 14.37it/s][A
 25%|████████                        | 30970/122310 [1:07:36<1:47:09, 14.21it/s][A
 25%|████████                        | 30976/122310 [1:07:37<2:05:09, 12.16

step: 3800, loss: 95.91826421903606, epoch: 0



 25%|████████▏                       | 31079/122310 [1:07:45<2:42:29,  9.36it/s][A
 25%|████████▏                       | 31087/122310 [1:07:46<2:39:15,  9.55it/s][A
 25%|████████▏                       | 31096/122310 [1:07:46<2:29:53, 10.14it/s][A
 25%|████████▏                       | 31107/122310 [1:07:47<2:15:45, 11.20it/s][A
 25%|████████▏                       | 31114/122310 [1:07:48<2:24:21, 10.53it/s][A
 25%|████████▏                       | 31118/122310 [1:07:49<2:50:58,  8.89it/s][A
 25%|████████▏                       | 31133/122310 [1:07:50<2:08:58, 11.78it/s][A
 25%|████████▏                       | 31140/122310 [1:07:50<2:19:04, 10.93it/s][A
 25%|████████▏                       | 31148/122310 [1:07:51<2:21:39, 10.73it/s][A
 25%|████████▏                       | 31165/122310 [1:07:52<1:48:14, 14.03it/s][A
 25%|████████▏                       | 31177/122310 [1:07:53<1:44:27, 14.54it/s][A
 25%|████████▏                       | 31182/122310 [1:07:53<2:05:11, 12.13

step: 3820, loss: 90.65974280121664, epoch: 0



 26%|████████▏                       | 31220/122310 [1:08:00<4:03:08,  6.24it/s][A
 26%|████████▏                       | 31231/122310 [1:08:01<2:56:57,  8.58it/s][A
 26%|████████▏                       | 31245/122310 [1:08:01<2:19:48, 10.86it/s][A
 26%|████████▏                       | 31253/122310 [1:08:02<2:31:49, 10.00it/s][A
 26%|████████▏                       | 31257/122310 [1:08:03<3:02:11,  8.33it/s][A
 26%|████████▏                       | 31260/122310 [1:08:04<3:36:53,  7.00it/s][A
 26%|████████▏                       | 31266/122310 [1:08:05<3:28:26,  7.28it/s][A
 26%|████████▏                       | 31280/122310 [1:08:06<2:25:19, 10.44it/s][A
 26%|████████▏                       | 31283/122310 [1:08:06<2:57:29,  8.55it/s][A
 26%|████████▏                       | 31285/122310 [1:08:07<3:45:05,  6.74it/s][A
 26%|████████▏                       | 31293/122310 [1:08:08<3:12:11,  7.89it/s][A
 26%|████████▏                       | 31299/122310 [1:08:09<3:12:45,  7.87

step: 3840, loss: 120.78487911967524, epoch: 0



 26%|████████▏                       | 31392/122310 [1:08:16<2:03:40, 12.25it/s][A
 26%|████████▏                       | 31400/122310 [1:08:16<2:08:29, 11.79it/s][A
 26%|████████▏                       | 31421/122310 [1:08:17<1:31:12, 16.61it/s][A
 26%|████████▏                       | 31428/122310 [1:08:18<1:44:52, 14.44it/s][A
 26%|████████▏                       | 31438/122310 [1:08:19<1:47:26, 14.10it/s][A
 26%|████████▏                       | 31446/122310 [1:08:19<1:56:14, 13.03it/s][A
 26%|████████▏                       | 31451/122310 [1:08:20<2:15:22, 11.19it/s][A
 26%|████████▏                       | 31463/122310 [1:08:21<2:02:31, 12.36it/s][A
 26%|████████▏                       | 31471/122310 [1:08:22<2:07:32, 11.87it/s][A
 26%|████████▏                       | 31481/122310 [1:08:22<2:03:24, 12.27it/s][A
 26%|████████▏                       | 31486/122310 [1:08:23<2:21:44, 10.68it/s][A
 26%|████████▏                       | 31496/122310 [1:08:24<2:11:57, 11.47

step: 3860, loss: 92.18407632576563, epoch: 0



 26%|████████▎                       | 31599/122310 [1:08:31<1:35:02, 15.91it/s][A
 26%|████████▎                       | 31607/122310 [1:08:31<1:46:52, 14.14it/s][A
 26%|████████▎                       | 31618/122310 [1:08:32<1:47:29, 14.06it/s][A
 26%|████████▎                       | 31621/122310 [1:08:33<2:17:51, 10.96it/s][A
 26%|████████▎                       | 31629/122310 [1:08:34<2:21:44, 10.66it/s][A
 26%|████████▎                       | 31631/122310 [1:08:34<3:00:34,  8.37it/s][A
 26%|████████▎                       | 31638/122310 [1:08:35<2:56:56,  8.54it/s][A
 26%|████████▎                       | 31649/122310 [1:08:37<3:14:33,  7.77it/s][A
 26%|████████▎                       | 31655/122310 [1:08:38<3:14:46,  7.76it/s][A
 26%|████████▎                       | 31661/122310 [1:08:38<3:15:30,  7.73it/s][A
 26%|████████▎                       | 31669/122310 [1:08:39<2:59:31,  8.42it/s][A
 26%|████████▎                       | 31678/122310 [1:08:40<2:43:52,  9.22

step: 3880, loss: 97.66894799575225, epoch: 0



 26%|████████▎                       | 31740/122310 [1:08:46<2:21:37, 10.66it/s][A
 26%|████████▎                       | 31754/122310 [1:08:47<1:55:59, 13.01it/s][A
 26%|████████▎                       | 31762/122310 [1:08:48<2:02:32, 12.32it/s][A
 26%|████████▎                       | 31766/122310 [1:08:48<2:26:46, 10.28it/s][A
 26%|████████▎                       | 31778/122310 [1:08:49<2:05:33, 12.02it/s][A
 26%|████████▎                       | 31788/122310 [1:08:50<2:01:34, 12.41it/s][A
 26%|████████▎                       | 31798/122310 [1:08:50<1:58:16, 12.75it/s][A
 26%|████████▎                       | 31806/122310 [1:08:51<2:05:57, 11.98it/s][A
 26%|████████▎                       | 31816/122310 [1:08:52<2:02:30, 12.31it/s][A
 26%|████████▎                       | 31822/122310 [1:08:53<2:18:10, 10.92it/s][A
 26%|████████▎                       | 31834/122310 [1:08:54<2:02:51, 12.27it/s][A
 26%|████████▎                       | 31844/122310 [1:08:54<2:05:30, 12.01

step: 3900, loss: 155.3625259282947, epoch: 0



 26%|████████▎                       | 31933/122310 [1:09:02<2:06:06, 11.94it/s][A
 26%|████████▎                       | 31941/122310 [1:09:02<2:10:04, 11.58it/s][A
 26%|████████▎                       | 31951/122310 [1:09:03<2:06:43, 11.88it/s][A
 26%|████████▎                       | 31956/122310 [1:09:04<2:21:31, 10.64it/s][A
 26%|████████▎                       | 31961/122310 [1:09:05<2:37:19,  9.57it/s][A
 26%|████████▎                       | 31974/122310 [1:09:05<2:10:41, 11.52it/s][A
 26%|████████▎                       | 31987/122310 [1:09:06<1:55:43, 13.01it/s][A
 26%|████████▎                       | 31997/122310 [1:09:07<1:57:08, 12.85it/s][A
 26%|████████▍                       | 32012/122310 [1:09:08<1:46:50, 14.09it/s][A
 26%|████████▍                       | 32026/122310 [1:09:09<1:39:46, 15.08it/s][A
 26%|████████▍                       | 32035/122310 [1:09:09<1:48:01, 13.93it/s][A
 26%|████████▍                       | 32037/122310 [1:09:10<2:23:53, 10.46

step: 3920, loss: 83.05749686575786, epoch: 0



 26%|████████▍                       | 32113/122310 [1:09:18<3:18:17,  7.58it/s][A
 26%|████████▍                       | 32117/122310 [1:09:18<3:35:13,  6.98it/s][A
 26%|████████▍                       | 32122/122310 [1:09:19<3:37:24,  6.91it/s][A
 26%|████████▍                       | 32130/122310 [1:09:20<3:05:03,  8.12it/s][A
 26%|████████▍                       | 32131/122310 [1:09:20<4:10:05,  6.01it/s][A
 26%|████████▍                       | 32138/122310 [1:09:21<3:36:33,  6.94it/s][A
 26%|████████▍                       | 32147/122310 [1:09:22<3:01:58,  8.26it/s][A
 26%|████████▍                       | 32160/122310 [1:09:23<2:16:38, 11.00it/s][A
 26%|████████▍                       | 32170/122310 [1:09:24<2:07:02, 11.83it/s][A
 26%|████████▍                       | 32181/122310 [1:09:24<1:56:51, 12.85it/s][A
 26%|████████▍                       | 32190/122310 [1:09:25<1:57:37, 12.77it/s][A
 26%|████████▍                       | 32208/122310 [1:09:26<1:31:42, 16.37

step: 3940, loss: 152.5068935922016, epoch: 0



 26%|████████▍                       | 32280/122310 [1:09:32<2:00:35, 12.44it/s][A
 26%|████████▍                       | 32286/122310 [1:09:33<2:15:27, 11.08it/s][A
 26%|████████▍                       | 32295/122310 [1:09:34<2:10:13, 11.52it/s][A
 26%|████████▍                       | 32304/122310 [1:09:34<2:08:10, 11.70it/s][A
 26%|████████▍                       | 32306/122310 [1:09:35<2:46:53,  8.99it/s][A
 26%|████████▍                       | 32314/122310 [1:09:36<2:41:23,  9.29it/s][A
 26%|████████▍                       | 32323/122310 [1:09:37<2:31:12,  9.92it/s][A
 26%|████████▍                       | 32332/122310 [1:09:37<2:24:29, 10.38it/s][A
 26%|████████▍                       | 32344/122310 [1:09:38<2:03:36, 12.13it/s][A
 26%|████████▍                       | 32355/122310 [1:09:39<1:56:18, 12.89it/s][A
 26%|████████▍                       | 32359/122310 [1:09:40<3:02:16,  8.22it/s][A
 26%|████████▍                       | 32366/122310 [1:09:41<2:58:28,  8.40

step: 3960, loss: 104.30729007919115, epoch: 0



 27%|████████▍                       | 32443/122310 [1:09:47<2:00:25, 12.44it/s][A
 27%|████████▍                       | 32449/122310 [1:09:48<2:14:33, 11.13it/s][A
 27%|████████▍                       | 32458/122310 [1:09:49<2:17:37, 10.88it/s][A
 27%|████████▍                       | 32467/122310 [1:09:50<2:14:34, 11.13it/s][A
 27%|████████▍                       | 32469/122310 [1:09:51<2:52:45,  8.67it/s][A
 27%|████████▍                       | 32473/122310 [1:09:51<3:13:04,  7.76it/s][A
 27%|████████▍                       | 32479/122310 [1:09:52<3:10:17,  7.87it/s][A
 27%|████████▍                       | 32487/122310 [1:09:53<2:51:07,  8.75it/s][A
 27%|████████▌                       | 32494/122310 [1:09:53<2:46:53,  8.97it/s][A
 27%|████████▌                       | 32511/122310 [1:09:54<1:54:39, 13.05it/s][A
 27%|████████▌                       | 32517/122310 [1:09:55<2:10:05, 11.50it/s][A
 27%|████████▌                       | 32528/122310 [1:09:56<2:00:39, 12.40

step: 3980, loss: 109.06579094737808, epoch: 0



 27%|████████▌                       | 32601/122310 [1:10:03<3:04:06,  8.12it/s][A
 27%|████████▌                       | 32604/122310 [1:10:04<3:38:16,  6.85it/s][A
 27%|████████▌                       | 32616/122310 [1:10:05<2:46:11,  9.00it/s][A
 27%|████████▌                       | 32618/122310 [1:10:06<3:29:22,  7.14it/s][A
 27%|████████▌                       | 32619/122310 [1:10:07<4:34:12,  5.45it/s][A
 27%|████████▌                       | 32623/122310 [1:10:07<4:43:04,  5.28it/s][A
 27%|████████▌                       | 32629/122310 [1:10:08<4:07:21,  6.04it/s][A
 27%|████████▌                       | 32641/122310 [1:10:09<2:48:22,  8.88it/s][A
 27%|████████▌                       | 32653/122310 [1:10:10<2:18:43, 10.77it/s][A
 27%|████████▌                       | 32658/122310 [1:10:11<2:41:00,  9.28it/s][A
 27%|████████▌                       | 32664/122310 [1:10:11<2:50:15,  8.78it/s][A
 27%|████████▌                       | 32674/122310 [1:10:12<2:31:01,  9.89

step: 4000, loss: 112.74592370270308, epoch: 0
sim1 and sim2 are 0.6491728838966678, 0.06844409369340716
cosine of pred and queen: -0.0007999447405590677
Actual: athens:greece::madrid:spain, pred: france
Actual: bangkok:thailand::islamabad:pakistan, pred: last
Actual: beijing:china::tokyo:japan, pred: coolheaded
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: france
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: dorpang
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: disobliging
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: would
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: regula

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: year
Actual: islamabad:pakistan::oslo:norway, pred: zemira
Actual: grandfather:grandmother::father:mother, pred: man
Actual: grandpa:grandma::sons:daughters, pred: came
Actual: king:queen::husband:wife, pred: turkeys
Actual: man:woman::brothers:sisters, pred: hurled
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: kandla
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: state
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: chief
Actual: tripura:agartala::odisha:bhubaneswar, pred: cambrics
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: argentina:peso::japan:yen, pred: bank
Actual: india:rupee::denmark:krone, 


 27%|████████▎                      | 32730/122310 [1:11:40<73:14:23,  2.94s/it][A
 27%|████████▎                      | 32739/122310 [1:11:41<51:12:12,  2.06s/it][A
 27%|████████▎                      | 32746/122310 [1:11:42<38:45:56,  1.56s/it][A
 27%|████████▎                      | 32753/122310 [1:11:42<29:05:19,  1.17s/it][A
 27%|████████▎                      | 32762/122310 [1:11:43<20:10:34,  1.23it/s][A
 27%|████████▎                      | 32780/122310 [1:11:44<10:53:41,  2.28it/s][A
 27%|████████▌                       | 32788/122310 [1:11:45<8:52:44,  2.80it/s][A
 27%|████████▌                       | 32790/122310 [1:11:45<8:55:02,  2.79it/s][A
 27%|████████▌                       | 32792/122310 [1:11:46<8:58:29,  2.77it/s][A
 27%|████████▌                       | 32796/122310 [1:11:47<8:00:04,  3.11it/s][A
 27%|████████▌                       | 32809/122310 [1:11:48<4:39:54,  5.33it/s][A
 27%|████████▌                       | 32819/122310 [1:11:48<3:39:54,  6.78

step: 4020, loss: 172.67320591805586, epoch: 0



 27%|████████▌                       | 32914/122310 [1:11:56<1:59:53, 12.43it/s][A
 27%|████████▌                       | 32918/122310 [1:11:57<2:22:47, 10.43it/s][A
 27%|████████▌                       | 32929/122310 [1:11:57<2:08:19, 11.61it/s][A
 27%|████████▌                       | 32935/122310 [1:11:58<2:20:41, 10.59it/s][A
 27%|████████▌                       | 32937/122310 [1:11:59<3:01:00,  8.23it/s][A
 27%|████████▌                       | 32949/122310 [1:12:00<2:21:53, 10.50it/s][A
 27%|████████▌                       | 32956/122310 [1:12:00<2:26:49, 10.14it/s][A
 27%|████████▌                       | 32961/122310 [1:12:01<2:43:08,  9.13it/s][A
 27%|████████▋                       | 32974/122310 [1:12:02<2:08:48, 11.56it/s][A
 27%|████████▋                       | 32978/122310 [1:12:03<2:34:00,  9.67it/s][A
 27%|████████▋                       | 32989/122310 [1:12:03<2:13:41, 11.14it/s][A
 27%|████████▋                       | 32995/122310 [1:12:04<2:25:58, 10.20

step: 4040, loss: 95.68870310796592, epoch: 0



 27%|████████▋                       | 33050/122310 [1:12:11<3:54:05,  6.35it/s][A
 27%|████████▋                       | 33058/122310 [1:12:12<3:19:28,  7.46it/s][A
 27%|████████▋                       | 33064/122310 [1:12:13<3:16:36,  7.57it/s][A
 27%|████████▋                       | 33074/122310 [1:12:14<2:42:09,  9.17it/s][A
 27%|████████▋                       | 33076/122310 [1:12:14<3:21:17,  7.39it/s][A
 27%|████████▋                       | 33083/122310 [1:12:15<3:05:51,  8.00it/s][A
 27%|████████▋                       | 33093/122310 [1:12:16<2:34:51,  9.60it/s][A
 27%|████████▋                       | 33102/122310 [1:12:17<2:26:22, 10.16it/s][A
 27%|████████▋                       | 33113/122310 [1:12:17<2:11:59, 11.26it/s][A
 27%|████████▋                       | 33127/122310 [1:12:18<1:52:45, 13.18it/s][A
 27%|████████▋                       | 33132/122310 [1:12:19<2:13:36, 11.12it/s][A
 27%|████████▋                       | 33136/122310 [1:12:20<2:40:33,  9.26

step: 4060, loss: 107.77464492746229, epoch: 0



 27%|████████▋                       | 33206/122310 [1:12:27<2:21:47, 10.47it/s][A
 27%|████████▋                       | 33217/122310 [1:12:28<2:10:34, 11.37it/s][A
 27%|████████▋                       | 33221/122310 [1:12:29<2:37:51,  9.41it/s][A
 27%|████████▋                       | 33225/122310 [1:12:30<3:03:19,  8.10it/s][A
 27%|████████▋                       | 33238/122310 [1:12:30<2:23:06, 10.37it/s][A
 27%|████████▋                       | 33244/122310 [1:12:31<2:42:33,  9.13it/s][A
 27%|████████▋                       | 33253/122310 [1:12:32<2:32:29,  9.73it/s][A
 27%|████████▋                       | 33270/122310 [1:12:33<1:58:48, 12.49it/s][A
 27%|████████▋                       | 33288/122310 [1:12:34<1:38:32, 15.06it/s][A
 27%|████████▋                       | 33299/122310 [1:12:35<1:42:42, 14.44it/s][A
 27%|████████▋                       | 33307/122310 [1:12:36<1:53:04, 13.12it/s][A
 27%|████████▋                       | 33318/122310 [1:12:36<1:51:40, 13.28

step: 4080, loss: 93.46254756184717, epoch: 0



 27%|████████▋                       | 33401/122310 [1:12:44<1:44:10, 14.22it/s][A
 27%|████████▋                       | 33408/122310 [1:12:44<1:57:31, 12.61it/s][A
 27%|████████▋                       | 33419/122310 [1:12:45<1:54:38, 12.92it/s][A
 27%|████████▋                       | 33430/122310 [1:12:46<1:53:00, 13.11it/s][A
 27%|████████▋                       | 33433/122310 [1:12:47<2:23:03, 10.35it/s][A
 27%|████████▋                       | 33439/122310 [1:12:48<2:37:03,  9.43it/s][A
 27%|████████▊                       | 33450/122310 [1:12:48<2:18:22, 10.70it/s][A
 27%|████████▊                       | 33454/122310 [1:12:49<2:44:21,  9.01it/s][A
 27%|████████▊                       | 33462/122310 [1:12:50<2:39:23,  9.29it/s][A
 27%|████████▊                       | 33469/122310 [1:12:51<2:42:20,  9.12it/s][A
 27%|████████▊                       | 33480/122310 [1:12:52<2:20:42, 10.52it/s][A
 27%|████████▊                       | 33484/122310 [1:12:52<2:47:01,  8.86

step: 4100, loss: 125.01365745101947, epoch: 0



 27%|████████▊                       | 33580/122310 [1:13:00<2:07:12, 11.63it/s][A
 27%|████████▊                       | 33591/122310 [1:13:00<2:01:08, 12.21it/s][A
 27%|████████▊                       | 33608/122310 [1:13:01<1:38:36, 14.99it/s][A
 27%|████████▊                       | 33617/122310 [1:13:02<1:46:56, 13.82it/s][A
 27%|████████▊                       | 33619/122310 [1:13:04<3:48:45,  6.46it/s][A
 27%|████████▊                       | 33621/122310 [1:13:05<4:21:31,  5.65it/s][A
 27%|████████▊                       | 33629/122310 [1:13:06<3:42:18,  6.65it/s][A
 28%|████████▊                       | 33641/122310 [1:13:07<2:49:29,  8.72it/s][A
 28%|████████▊                       | 33654/122310 [1:13:08<2:18:03, 10.70it/s][A
 28%|████████▊                       | 33656/122310 [1:13:08<2:55:13,  8.43it/s][A
 28%|████████▊                       | 33661/122310 [1:13:09<3:08:32,  7.84it/s][A
 28%|████████▊                       | 33670/122310 [1:13:10<2:47:38,  8.81

step: 4120, loss: 88.89389149367533, epoch: 0



 28%|████████▊                       | 33734/122310 [1:13:16<2:00:44, 12.23it/s][A
 28%|████████▊                       | 33748/122310 [1:13:17<1:47:24, 13.74it/s][A
 28%|████████▊                       | 33760/122310 [1:13:17<1:44:29, 14.12it/s][A
 28%|████████▊                       | 33770/122310 [1:13:18<1:47:59, 13.66it/s][A
 28%|████████▊                       | 33774/122310 [1:13:19<2:13:15, 11.07it/s][A
 28%|████████▊                       | 33780/122310 [1:13:20<2:27:46,  9.98it/s][A
 28%|████████▊                       | 33786/122310 [1:13:21<2:39:44,  9.24it/s][A
 28%|████████▊                       | 33795/122310 [1:13:21<2:29:48,  9.85it/s][A
 28%|████████▊                       | 33804/122310 [1:13:22<2:23:47, 10.26it/s][A
 28%|████████▊                       | 33814/122310 [1:13:23<2:14:57, 10.93it/s][A
 28%|████████▊                       | 33823/122310 [1:13:24<2:14:11, 10.99it/s][A
 28%|████████▊                       | 33830/122310 [1:13:25<2:22:37, 10.34

step: 4140, loss: 91.24042604655588, epoch: 0



 28%|████████▊                       | 33906/122310 [1:13:32<2:07:18, 11.57it/s][A
 28%|████████▉                       | 33923/122310 [1:13:33<1:42:10, 14.42it/s][A
 28%|████████▉                       | 33939/122310 [1:13:33<1:31:31, 16.09it/s][A
 28%|████████▉                       | 33950/122310 [1:13:34<1:35:42, 15.39it/s][A
 28%|████████▉                       | 33957/122310 [1:13:35<1:49:53, 13.40it/s][A
 28%|████████▉                       | 33964/122310 [1:13:36<2:02:34, 12.01it/s][A
 28%|████████▉                       | 33970/122310 [1:13:37<2:18:49, 10.61it/s][A
 28%|████████▉                       | 33980/122310 [1:13:37<2:11:47, 11.17it/s][A
 28%|████████▉                       | 33992/122310 [1:13:38<1:59:26, 12.32it/s][A
 28%|████████▉                       | 34000/122310 [1:13:39<2:06:06, 11.67it/s][A
 28%|████████▉                       | 34009/122310 [1:13:40<2:07:13, 11.57it/s][A
 28%|████████▉                       | 34019/122310 [1:13:41<2:04:21, 11.83

step: 4160, loss: 111.59323067979079, epoch: 0



 28%|████████▉                       | 34107/122310 [1:13:48<1:39:52, 14.72it/s][A
 28%|████████▉                       | 34115/122310 [1:13:49<1:50:20, 13.32it/s][A
 28%|████████▉                       | 34128/122310 [1:13:49<1:43:15, 14.23it/s][A
 28%|████████▉                       | 34138/122310 [1:13:50<1:48:54, 13.49it/s][A
 28%|████████▉                       | 34142/122310 [1:13:51<2:13:12, 11.03it/s][A
 28%|████████▉                       | 34147/122310 [1:13:52<2:33:07,  9.60it/s][A
 28%|████████▉                       | 34155/122310 [1:13:53<2:31:46,  9.68it/s][A
 28%|████████▉                       | 34165/122310 [1:13:53<2:19:37, 10.52it/s][A
 28%|████████▉                       | 34167/122310 [1:13:54<3:01:20,  8.10it/s][A
 28%|████████▉                       | 34183/122310 [1:13:55<2:05:49, 11.67it/s][A
 28%|████████▉                       | 34192/122310 [1:13:56<2:07:19, 11.53it/s][A
 28%|████████▉                       | 34200/122310 [1:13:57<2:12:57, 11.05

step: 4180, loss: 93.53838286782215, epoch: 0



 28%|████████▉                       | 34279/122310 [1:14:05<3:22:44,  7.24it/s][A
 28%|████████▉                       | 34280/122310 [1:14:06<4:14:30,  5.76it/s][A
 28%|████████▉                       | 34283/122310 [1:14:06<4:39:50,  5.24it/s][A
 28%|████████▉                       | 34295/122310 [1:14:07<3:06:24,  7.87it/s][A
 28%|████████▉                       | 34307/122310 [1:14:08<2:30:27,  9.75it/s][A
 28%|████████▉                       | 34313/122310 [1:14:09<2:40:07,  9.16it/s][A
 28%|████████▉                       | 34322/122310 [1:14:10<2:30:44,  9.73it/s][A
 28%|████████▉                       | 34326/122310 [1:14:10<2:55:58,  8.33it/s][A
 28%|████████▉                       | 34340/122310 [1:14:11<2:13:23, 10.99it/s][A
 28%|████████▉                       | 34353/122310 [1:14:12<1:57:12, 12.51it/s][A
 28%|████████▉                       | 34360/122310 [1:14:13<2:08:59, 11.36it/s][A
 28%|████████▉                       | 34367/122310 [1:14:14<2:18:37, 10.57

step: 4200, loss: 91.08879375026162, epoch: 0



 28%|█████████                       | 34426/122310 [1:14:20<2:22:14, 10.30it/s][A
 28%|█████████                       | 34434/122310 [1:14:21<2:23:09, 10.23it/s][A
 28%|█████████                       | 34445/122310 [1:14:22<2:10:22, 11.23it/s][A
 28%|█████████                       | 34453/122310 [1:14:23<2:55:05,  8.36it/s][A
 28%|█████████                       | 34454/122310 [1:14:24<3:40:41,  6.63it/s][A
 28%|█████████                       | 34471/122310 [1:14:25<2:21:12, 10.37it/s][A
 28%|█████████                       | 34483/122310 [1:14:26<2:06:08, 11.60it/s][A
 28%|█████████                       | 34495/122310 [1:14:27<1:56:41, 12.54it/s][A
 28%|█████████                       | 34503/122310 [1:14:27<2:03:38, 11.84it/s][A
 28%|█████████                       | 34508/122310 [1:14:28<2:22:03, 10.30it/s][A
 28%|█████████                       | 34513/122310 [1:14:29<2:40:57,  9.09it/s][A
 28%|█████████                       | 34518/122310 [1:14:30<2:56:51,  8.27

step: 4220, loss: 97.21145964236567, epoch: 0



 28%|█████████                       | 34569/122310 [1:14:36<3:15:26,  7.48it/s][A
 28%|█████████                       | 34580/122310 [1:14:37<2:34:33,  9.46it/s][A
 28%|█████████                       | 34584/122310 [1:14:38<2:59:27,  8.15it/s][A
 28%|█████████                       | 34587/122310 [1:14:38<3:32:40,  6.87it/s][A
 28%|█████████                       | 34595/122310 [1:14:39<3:05:11,  7.89it/s][A
 28%|█████████                       | 34608/122310 [1:14:40<2:18:27, 10.56it/s][A
 28%|█████████                       | 34612/122310 [1:14:41<2:43:18,  8.95it/s][A
 28%|█████████                       | 34619/122310 [1:14:41<2:42:24,  9.00it/s][A
 28%|█████████                       | 34629/122310 [1:14:42<2:23:56, 10.15it/s][A
 28%|█████████                       | 34635/122310 [1:14:43<2:34:51,  9.44it/s][A
 28%|█████████                       | 34637/122310 [1:14:44<3:18:19,  7.37it/s][A
 28%|█████████                       | 34643/122310 [1:14:45<3:16:25,  7.44

step: 4240, loss: 88.19767663708495, epoch: 0



 28%|█████████                       | 34704/122310 [1:14:52<2:51:57,  8.49it/s][A
 28%|█████████                       | 34720/122310 [1:14:52<2:00:32, 12.11it/s][A
 28%|█████████                       | 34728/122310 [1:14:53<2:05:48, 11.60it/s][A
 28%|█████████                       | 34737/122310 [1:14:54<2:05:08, 11.66it/s][A
 28%|█████████                       | 34750/122310 [1:14:55<1:51:32, 13.08it/s][A
 28%|█████████                       | 34753/122310 [1:14:55<2:21:26, 10.32it/s][A
 28%|█████████                       | 34759/122310 [1:14:56<2:32:21,  9.58it/s][A
 28%|█████████                       | 34765/122310 [1:14:57<2:41:30,  9.03it/s][A
 28%|█████████                       | 34773/122310 [1:14:58<2:34:21,  9.45it/s][A
 28%|█████████                       | 34786/122310 [1:14:59<2:04:54, 11.68it/s][A
 28%|█████████                       | 34794/122310 [1:14:59<2:09:13, 11.29it/s][A
 28%|█████████                       | 34804/122310 [1:15:00<2:03:31, 11.81

step: 4260, loss: 108.1487058550886, epoch: 0



 29%|█████████▏                      | 34893/122310 [1:15:07<2:36:25,  9.31it/s][A
 29%|█████████▏                      | 34915/122310 [1:15:08<1:36:37, 15.07it/s][A
 29%|█████████▏                      | 34921/122310 [1:15:09<2:26:39,  9.93it/s][A
 29%|█████████▏                      | 34928/122310 [1:15:10<2:30:22,  9.68it/s][A
 29%|█████████▏                      | 34932/122310 [1:15:11<2:51:10,  8.51it/s][A
 29%|█████████▏                      | 34939/122310 [1:15:12<2:47:58,  8.67it/s][A
 29%|█████████▏                      | 34949/122310 [1:15:12<2:27:29,  9.87it/s][A
 29%|█████████▏                      | 34956/122310 [1:15:13<2:31:22,  9.62it/s][A
 29%|█████████▏                      | 34968/122310 [1:15:14<2:08:17, 11.35it/s][A
 29%|█████████▏                      | 34975/122310 [1:15:15<2:16:43, 10.65it/s][A
 29%|█████████▏                      | 34984/122310 [1:15:16<2:13:21, 10.91it/s][A
 29%|█████████▏                      | 34990/122310 [1:15:16<2:25:57,  9.97

step: 4280, loss: 104.89035190796676, epoch: 0



 29%|█████████▏                      | 35052/122310 [1:15:23<2:47:28,  8.68it/s][A
 29%|█████████▏                      | 35055/122310 [1:15:23<3:19:05,  7.30it/s][A
 29%|█████████▏                      | 35061/122310 [1:15:24<3:15:36,  7.43it/s][A
 29%|█████████▏                      | 35067/122310 [1:15:25<3:12:23,  7.56it/s][A
 29%|█████████▏                      | 35075/122310 [1:15:26<2:52:57,  8.41it/s][A
 29%|█████████▏                      | 35084/122310 [1:15:26<2:36:41,  9.28it/s][A
 29%|█████████▏                      | 35104/122310 [1:15:27<1:46:53, 13.60it/s][A
 29%|█████████▏                      | 35115/122310 [1:15:28<1:49:05, 13.32it/s][A
 29%|█████████▏                      | 35121/122310 [1:15:29<2:05:47, 11.55it/s][A
 29%|█████████▏                      | 35127/122310 [1:15:30<2:20:57, 10.31it/s][A
 29%|█████████▏                      | 35133/122310 [1:15:31<2:33:48,  9.45it/s][A
 29%|█████████▏                      | 35139/122310 [1:15:31<2:42:52,  8.92

step: 4300, loss: 84.03151537268924, epoch: 0



 29%|█████████▏                      | 35209/122310 [1:15:39<2:37:40,  9.21it/s][A
 29%|█████████▏                      | 35214/122310 [1:15:40<2:58:11,  8.15it/s][A
 29%|█████████▏                      | 35223/122310 [1:15:41<2:42:22,  8.94it/s][A
 29%|█████████▏                      | 35226/122310 [1:15:41<3:15:03,  7.44it/s][A
 29%|█████████▏                      | 35237/122310 [1:15:42<2:36:34,  9.27it/s][A
 29%|█████████▏                      | 35245/122310 [1:15:43<2:33:05,  9.48it/s][A
 29%|█████████▏                      | 35250/122310 [1:15:44<2:50:39,  8.50it/s][A
 29%|█████████▏                      | 35255/122310 [1:15:45<3:05:56,  7.80it/s][A
 29%|█████████▏                      | 35265/122310 [1:15:45<2:38:33,  9.15it/s][A
 29%|█████████▏                      | 35278/122310 [1:15:46<2:08:44, 11.27it/s][A
 29%|█████████▏                      | 35280/122310 [1:15:47<2:48:59,  8.58it/s][A
 29%|█████████▏                      | 35284/122310 [1:15:48<3:13:35,  7.49

step: 4320, loss: 127.10752465064579, epoch: 0



 29%|█████████▎                      | 35358/122310 [1:15:55<2:22:26, 10.17it/s][A
 29%|█████████▎                      | 35362/122310 [1:15:56<2:46:50,  8.69it/s][A
 29%|█████████▎                      | 35376/122310 [1:15:57<2:06:15, 11.48it/s][A
 29%|█████████▎                      | 35389/122310 [1:15:57<1:50:38, 13.09it/s][A
 29%|█████████▎                      | 35393/122310 [1:15:58<2:14:52, 10.74it/s][A
 29%|█████████▎                      | 35401/122310 [1:15:59<2:16:36, 10.60it/s][A
 29%|█████████▎                      | 35416/122310 [1:16:00<1:49:55, 13.17it/s][A
 29%|█████████▎                      | 35421/122310 [1:16:00<2:09:40, 11.17it/s][A
 29%|█████████▎                      | 35430/122310 [1:16:01<2:08:20, 11.28it/s][A
 29%|█████████▎                      | 35437/122310 [1:16:02<2:15:41, 10.67it/s][A
 29%|█████████▎                      | 35446/122310 [1:16:03<2:12:56, 10.89it/s][A
 29%|█████████▎                      | 35450/122310 [1:16:03<2:37:38,  9.18

step: 4340, loss: 129.65902788138845, epoch: 0



 29%|█████████▎                      | 35508/122310 [1:16:10<2:40:49,  9.00it/s][A
 29%|█████████▎                      | 35513/122310 [1:16:11<2:52:22,  8.39it/s][A
 29%|█████████▎                      | 35523/122310 [1:16:12<2:27:47,  9.79it/s][A
 29%|█████████▎                      | 35531/122310 [1:16:13<2:27:24,  9.81it/s][A
 29%|█████████▎                      | 35540/122310 [1:16:13<2:21:00, 10.26it/s][A
 29%|█████████▎                      | 35563/122310 [1:16:14<1:32:26, 15.64it/s][A
 29%|█████████▎                      | 35573/122310 [1:16:15<1:38:42, 14.65it/s][A
 29%|█████████▎                      | 35577/122310 [1:16:16<2:02:42, 11.78it/s][A
 29%|█████████▎                      | 35584/122310 [1:16:17<2:13:51, 10.80it/s][A
 29%|█████████▎                      | 35597/122310 [1:16:17<1:56:09, 12.44it/s][A
 29%|█████████▎                      | 35602/122310 [1:16:18<2:16:53, 10.56it/s][A
 29%|█████████▎                      | 35609/122310 [1:16:19<2:24:28, 10.00

step: 4360, loss: 101.21646680141613, epoch: 0



 29%|█████████▎                      | 35670/122310 [1:16:26<2:22:12, 10.15it/s][A
 29%|█████████▎                      | 35677/122310 [1:16:27<2:29:17,  9.67it/s][A
 29%|█████████▎                      | 35694/122310 [1:16:28<1:49:35, 13.17it/s][A
 29%|█████████▎                      | 35710/122310 [1:16:28<1:33:41, 15.41it/s][A
 29%|█████████▎                      | 35717/122310 [1:16:29<1:46:34, 13.54it/s][A
 29%|█████████▎                      | 35723/122310 [1:16:30<2:02:55, 11.74it/s][A
 29%|█████████▎                      | 35734/122310 [1:16:31<1:56:03, 12.43it/s][A
 29%|█████████▎                      | 35744/122310 [1:16:32<1:56:05, 12.43it/s][A
 29%|█████████▎                      | 35752/122310 [1:16:32<2:03:46, 11.66it/s][A
 29%|█████████▎                      | 35763/122310 [1:16:33<1:57:59, 12.22it/s][A
 29%|█████████▎                      | 35773/122310 [1:16:34<1:57:35, 12.27it/s][A
 29%|█████████▎                      | 35782/122310 [1:16:35<2:01:19, 11.89

step: 4380, loss: 88.84228360693524, epoch: 0



 29%|█████████▍                      | 35879/122310 [1:16:42<1:39:38, 14.46it/s][A
 29%|█████████▍                      | 35882/122310 [1:16:43<2:07:35, 11.29it/s][A
 29%|█████████▍                      | 35895/122310 [1:16:44<1:52:34, 12.79it/s][A
 29%|█████████▍                      | 35904/122310 [1:16:45<1:56:50, 12.32it/s][A
 29%|█████████▍                      | 35908/122310 [1:16:45<2:21:40, 10.16it/s][A
 29%|█████████▍                      | 35919/122310 [1:16:46<2:09:14, 11.14it/s][A
 29%|█████████▍                      | 35928/122310 [1:16:47<2:09:34, 11.11it/s][A
 29%|█████████▍                      | 35930/122310 [1:16:49<3:39:56,  6.55it/s][A
 29%|█████████▍                      | 35939/122310 [1:16:49<3:07:06,  7.69it/s][A
 29%|█████████▍                      | 35946/122310 [1:16:50<3:02:35,  7.88it/s][A
 29%|█████████▍                      | 35954/122310 [1:16:51<2:53:20,  8.30it/s][A
 29%|█████████▍                      | 35962/122310 [1:16:52<2:47:48,  8.58

step: 4400, loss: 82.72437640853325, epoch: 0
sim1 and sim2 are 0.6696669372954958, 0.017728168996692998
cosine of pred and queen: -0.014590028806152915
Actual: athens:greece::madrid:spain, pred: europe
Actual: bangkok:thailand::islamabad:pakistan, pred: first
Actual: beijing:china::tokyo:japan, pred: following
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: government
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: dorpang
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: new
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: regularize

Actual: convenient:inconvenient::convincing:unconvincing, pred: lim
Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: year
Actual: islamabad:pakistan::oslo:norway, pred: zemira
Actual: grandfather:grandmother::father:mother, pred: man
Actual: grandpa:grandma::sons:daughters, pred: came
Actual: king:queen::husband:wife, pred: effluence
Actual: man:woman::brothers:sisters, pred: hurled
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: kandla
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: state
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: chief
Actual: tripura:agartala::odisha:bhubaneswar, pred: jefferson
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: arg


 29%|█████████▏                     | 36028/122310 [1:18:16<61:09:17,  2.55s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.005917159763313609



 29%|█████████▏                     | 36039/122310 [1:18:17<41:27:27,  1.73s/it][A
 29%|█████████▏                     | 36048/122310 [1:18:17<30:28:48,  1.27s/it][A
 29%|█████████▏                     | 36052/122310 [1:18:18<26:41:32,  1.11s/it][A
 29%|█████████▏                     | 36059/122310 [1:18:19<20:10:55,  1.19it/s][A
 29%|█████████▏                     | 36067/122310 [1:18:20<14:40:14,  1.63it/s][A
 29%|█████████▏                     | 36073/122310 [1:18:21<11:48:31,  2.03it/s][A
 30%|█████████▍                      | 36087/122310 [1:18:22<7:03:40,  3.39it/s][A
 30%|█████████▍                      | 36093/122310 [1:18:22<6:12:14,  3.86it/s][A
 30%|█████████▍                      | 36098/122310 [1:18:23<5:43:57,  4.18it/s][A
 30%|█████████▍                      | 36105/122310 [1:18:24<4:49:49,  4.96it/s][A
 30%|█████████▍                      | 36113/122310 [1:18:25<4:02:17,  5.93it/s][A
 30%|█████████▍                      | 36116/122310 [1:18:26<4:21:16,  5.50

step: 4420, loss: 124.40475527376533, epoch: 0



 30%|█████████▍                      | 36174/122310 [1:18:33<2:39:40,  8.99it/s][A
 30%|█████████▍                      | 36176/122310 [1:18:34<3:24:05,  7.03it/s][A
 30%|█████████▍                      | 36181/122310 [1:18:34<3:28:06,  6.90it/s][A
 30%|█████████▍                      | 36196/122310 [1:18:35<2:17:25, 10.44it/s][A
 30%|█████████▍                      | 36207/122310 [1:18:36<2:07:12, 11.28it/s][A
 30%|█████████▍                      | 36219/122310 [1:18:37<1:54:34, 12.52it/s][A
 30%|█████████▍                      | 36224/122310 [1:18:38<2:13:25, 10.75it/s][A
 30%|█████████▍                      | 36226/122310 [1:18:38<2:52:15,  8.33it/s][A
 30%|█████████▍                      | 36235/122310 [1:18:39<2:35:05,  9.25it/s][A
 30%|█████████▍                      | 36244/122310 [1:18:40<2:26:23,  9.80it/s][A
 30%|█████████▍                      | 36252/122310 [1:18:41<2:25:23,  9.86it/s][A
 30%|█████████▍                      | 36258/122310 [1:18:42<2:35:31,  9.22

step: 4440, loss: 101.47648429349775, epoch: 0



 30%|█████████▌                      | 36360/122310 [1:18:49<1:55:06, 12.45it/s][A
 30%|█████████▌                      | 36366/122310 [1:18:50<2:11:57, 10.86it/s][A
 30%|█████████▌                      | 36384/122310 [1:18:50<1:39:33, 14.39it/s][A
 30%|█████████▌                      | 36393/122310 [1:18:51<1:48:04, 13.25it/s][A
 30%|█████████▌                      | 36401/122310 [1:18:52<2:03:01, 11.64it/s][A
 30%|█████████▌                      | 36408/122310 [1:18:53<2:16:38, 10.48it/s][A
 30%|█████████▌                      | 36413/122310 [1:18:54<2:33:39,  9.32it/s][A
 30%|█████████▌                      | 36418/122310 [1:18:55<2:51:57,  8.33it/s][A
 30%|█████████▌                      | 36427/122310 [1:18:56<2:39:02,  9.00it/s][A
 30%|█████████▌                      | 36431/122310 [1:18:56<3:02:31,  7.84it/s][A
 30%|█████████▌                      | 36438/122310 [1:18:57<2:57:34,  8.06it/s][A
 30%|█████████▌                      | 36443/122310 [1:18:58<3:10:19,  7.52

step: 4460, loss: 102.19698315218302, epoch: 0



 30%|█████████▌                      | 36521/122310 [1:19:06<2:11:38, 10.86it/s][A
 30%|█████████▌                      | 36529/122310 [1:19:07<2:16:22, 10.48it/s][A
 30%|█████████▌                      | 36535/122310 [1:19:07<2:36:13,  9.15it/s][A
 30%|█████████▌                      | 36548/122310 [1:19:08<2:12:04, 10.82it/s][A
 30%|█████████▌                      | 36562/122310 [1:19:09<1:54:12, 12.51it/s][A
 30%|█████████▌                      | 36566/122310 [1:19:10<2:20:32, 10.17it/s][A
 30%|█████████▌                      | 36574/122310 [1:19:11<2:22:43, 10.01it/s][A
 30%|█████████▌                      | 36586/122310 [1:19:12<2:09:35, 11.02it/s][A
 30%|█████████▌                      | 36590/122310 [1:19:13<2:37:33,  9.07it/s][A
 30%|█████████▌                      | 36597/122310 [1:19:14<2:44:03,  8.71it/s][A
 30%|█████████▌                      | 36604/122310 [1:19:14<2:46:45,  8.57it/s][A
 30%|█████████▌                      | 36611/122310 [1:19:15<2:46:24,  8.58

step: 4480, loss: 109.9285634974221, epoch: 0



 30%|█████████▌                      | 36702/122310 [1:19:22<2:10:39, 10.92it/s][A
 30%|█████████▌                      | 36715/122310 [1:19:23<1:53:45, 12.54it/s][A
 30%|█████████▌                      | 36726/122310 [1:19:24<1:49:03, 13.08it/s][A
 30%|█████████▌                      | 36728/122310 [1:19:25<2:22:53,  9.98it/s][A
 30%|█████████▌                      | 36730/122310 [1:19:26<3:01:54,  7.84it/s][A
 30%|█████████▌                      | 36734/122310 [1:19:26<3:33:31,  6.68it/s][A
 30%|█████████▌                      | 36740/122310 [1:19:27<3:24:07,  6.99it/s][A
 30%|█████████▌                      | 36750/122310 [1:19:28<2:44:11,  8.68it/s][A
 30%|█████████▌                      | 36760/122310 [1:19:29<2:23:04,  9.97it/s][A
 30%|█████████▌                      | 36772/122310 [1:19:30<2:02:30, 11.64it/s][A
 30%|█████████▌                      | 36785/122310 [1:19:30<1:48:17, 13.16it/s][A
 30%|█████████▋                      | 36799/122310 [1:19:31<1:38:33, 14.46

step: 4500, loss: 99.08722148054709, epoch: 0



 30%|█████████▋                      | 36878/122310 [1:19:38<3:00:45,  7.88it/s][A
 30%|█████████▋                      | 36879/122310 [1:19:39<4:01:31,  5.90it/s][A
 30%|█████████▋                      | 36887/122310 [1:19:40<3:15:53,  7.27it/s][A
 30%|█████████▋                      | 36896/122310 [1:19:41<2:47:49,  8.48it/s][A
 30%|█████████▋                      | 36909/122310 [1:19:41<2:09:59, 10.95it/s][A
 30%|█████████▋                      | 36917/122310 [1:19:42<2:11:43, 10.80it/s][A
 30%|█████████▋                      | 36928/122310 [1:19:43<1:59:45, 11.88it/s][A
 30%|█████████▋                      | 36941/122310 [1:19:44<1:45:57, 13.43it/s][A
 30%|█████████▋                      | 36949/122310 [1:19:44<1:53:32, 12.53it/s][A
 30%|█████████▋                      | 36955/122310 [1:19:45<2:09:26, 10.99it/s][A
 30%|█████████▋                      | 36975/122310 [1:19:46<1:34:44, 15.01it/s][A
 30%|█████████▋                      | 36980/122310 [1:19:47<1:58:28, 12.00

step: 4520, loss: 96.0301271632903, epoch: 0



 30%|█████████▋                      | 37070/122310 [1:19:55<1:52:19, 12.65it/s][A
 30%|█████████▋                      | 37082/122310 [1:19:56<1:47:13, 13.25it/s][A
 30%|█████████▋                      | 37089/122310 [1:19:56<2:00:07, 11.82it/s][A
 30%|█████████▋                      | 37094/122310 [1:19:57<2:22:46,  9.95it/s][A
 30%|█████████▋                      | 37099/122310 [1:19:58<2:40:51,  8.83it/s][A
 30%|█████████▋                      | 37108/122310 [1:19:59<2:32:03,  9.34it/s][A
 30%|█████████▋                      | 37125/122310 [1:20:00<1:52:24, 12.63it/s][A
 30%|█████████▋                      | 37131/122310 [1:20:01<2:09:26, 10.97it/s][A
 30%|█████████▋                      | 37137/122310 [1:20:01<2:22:04,  9.99it/s][A
 30%|█████████▋                      | 37144/122310 [1:20:02<2:27:37,  9.62it/s][A
 30%|█████████▋                      | 37158/122310 [1:20:03<2:01:56, 11.64it/s][A
 30%|█████████▋                      | 37167/122310 [1:20:04<2:03:36, 11.48

step: 4540, loss: 124.31505060756179, epoch: 0



 30%|█████████▋                      | 37231/122310 [1:20:12<3:33:19,  6.65it/s][A
 30%|█████████▋                      | 37236/122310 [1:20:13<3:48:40,  6.20it/s][A
 30%|█████████▋                      | 37242/122310 [1:20:14<3:49:08,  6.19it/s][A
 30%|█████████▋                      | 37247/122310 [1:20:14<3:51:00,  6.14it/s][A
 30%|█████████▋                      | 37260/122310 [1:20:15<2:44:29,  8.62it/s][A
 30%|█████████▊                      | 37273/122310 [1:20:16<2:17:36, 10.30it/s][A
 30%|█████████▊                      | 37282/122310 [1:20:17<2:19:06, 10.19it/s][A
 30%|█████████▊                      | 37290/122310 [1:20:18<2:20:29, 10.09it/s][A
 30%|█████████▊                      | 37294/122310 [1:20:19<2:41:31,  8.77it/s][A
 31%|█████████▊                      | 37306/122310 [1:20:20<2:12:08, 10.72it/s][A
 31%|█████████▊                      | 37313/122310 [1:20:20<2:16:04, 10.41it/s][A
 31%|█████████▊                      | 37324/122310 [1:20:21<2:05:02, 11.33

step: 4560, loss: 126.50661948418914, epoch: 0



 31%|█████████▊                      | 37387/122310 [1:20:28<2:59:24,  7.89it/s][A
 31%|█████████▊                      | 37394/122310 [1:20:29<2:50:41,  8.29it/s][A
 31%|█████████▊                      | 37408/122310 [1:20:30<2:05:57, 11.23it/s][A
 31%|█████████▊                      | 37417/122310 [1:20:30<2:03:49, 11.43it/s][A
 31%|█████████▊                      | 37431/122310 [1:20:31<1:45:28, 13.41it/s][A
 31%|█████████▊                      | 37435/122310 [1:20:32<2:08:51, 10.98it/s][A
 31%|█████████▊                      | 37440/122310 [1:20:33<2:27:42,  9.58it/s][A
 31%|█████████▊                      | 37443/122310 [1:20:34<3:04:24,  7.67it/s][A
 31%|█████████▊                      | 37448/122310 [1:20:34<3:17:12,  7.17it/s][A
 31%|█████████▊                      | 37465/122310 [1:20:35<2:03:04, 11.49it/s][A
 31%|█████████▊                      | 37475/122310 [1:20:36<1:58:48, 11.90it/s][A
 31%|█████████▊                      | 37485/122310 [1:20:37<1:55:50, 12.20

step: 4580, loss: 89.8368680019776, epoch: 0



 31%|█████████▊                      | 37556/122310 [1:20:45<2:34:27,  9.14it/s][A
 31%|█████████▊                      | 37565/122310 [1:20:45<2:29:32,  9.45it/s][A
 31%|█████████▊                      | 37570/122310 [1:20:46<2:51:18,  8.24it/s][A
 31%|█████████▊                      | 37576/122310 [1:20:47<3:00:18,  7.83it/s][A
 31%|█████████▊                      | 37587/122310 [1:20:48<2:39:44,  8.84it/s][A
 31%|█████████▊                      | 37604/122310 [1:20:49<1:57:37, 12.00it/s][A
 31%|█████████▊                      | 37616/122310 [1:20:50<1:51:20, 12.68it/s][A
 31%|█████████▊                      | 37623/122310 [1:20:51<2:01:29, 11.62it/s][A
 31%|█████████▊                      | 37637/122310 [1:20:52<1:46:25, 13.26it/s][A
 31%|█████████▊                      | 37651/122310 [1:20:52<1:43:16, 13.66it/s][A
 31%|█████████▊                      | 37656/122310 [1:20:53<2:07:38, 11.05it/s][A
 31%|█████████▊                      | 37666/122310 [1:20:54<2:07:10, 11.09

step: 4600, loss: 89.48307419400568, epoch: 0



 31%|█████████▊                      | 37732/122310 [1:21:02<2:23:28,  9.83it/s][A
 31%|█████████▊                      | 37737/122310 [1:21:03<2:41:10,  8.75it/s][A
 31%|█████████▊                      | 37743/122310 [1:21:03<2:46:57,  8.44it/s][A
 31%|█████████▉                      | 37752/122310 [1:21:04<2:32:23,  9.25it/s][A
 31%|█████████▉                      | 37757/122310 [1:21:05<2:59:53,  7.83it/s][A
 31%|█████████▉                      | 37766/122310 [1:21:06<2:43:24,  8.62it/s][A
 31%|█████████▉                      | 37775/122310 [1:21:07<2:31:45,  9.28it/s][A
 31%|█████████▉                      | 37784/122310 [1:21:08<2:27:06,  9.58it/s][A
 31%|█████████▉                      | 37786/122310 [1:21:09<3:11:30,  7.36it/s][A
 31%|█████████▉                      | 37789/122310 [1:21:10<3:47:09,  6.20it/s][A
 31%|█████████▉                      | 37800/122310 [1:21:10<2:49:17,  8.32it/s][A
 31%|█████████▉                      | 37814/122310 [1:21:11<2:09:41, 10.86

step: 4620, loss: 108.33948935931207, epoch: 0



 31%|█████████▉                      | 37893/122310 [1:21:19<2:09:19, 10.88it/s][A
 31%|█████████▉                      | 37898/122310 [1:21:20<2:24:20,  9.75it/s][A
 31%|█████████▉                      | 37901/122310 [1:21:20<2:55:14,  8.03it/s][A
 31%|█████████▉                      | 37909/122310 [1:21:21<2:39:25,  8.82it/s][A
 31%|█████████▉                      | 37924/122310 [1:21:22<1:55:27, 12.18it/s][A
 31%|█████████▉                      | 37940/122310 [1:21:23<1:38:09, 14.32it/s][A
 31%|█████████▉                      | 37950/122310 [1:21:23<1:41:44, 13.82it/s][A
 31%|█████████▉                      | 37959/122310 [1:21:24<1:47:31, 13.07it/s][A
 31%|█████████▉                      | 37970/122310 [1:21:25<1:46:40, 13.18it/s][A
 31%|█████████▉                      | 37981/122310 [1:21:26<1:48:07, 13.00it/s][A
 31%|█████████▉                      | 37988/122310 [1:21:27<1:59:20, 11.78it/s][A
 31%|█████████▉                      | 38000/122310 [1:21:28<1:52:41, 12.47

step: 4640, loss: 95.88175131479213, epoch: 0



 31%|█████████▉                      | 38076/122310 [1:21:35<2:04:44, 11.25it/s][A
 31%|█████████▉                      | 38081/122310 [1:21:36<2:24:15,  9.73it/s][A
 31%|█████████▉                      | 38088/122310 [1:21:37<2:29:03,  9.42it/s][A
 31%|█████████▉                      | 38094/122310 [1:21:37<2:44:05,  8.55it/s][A
 31%|█████████▉                      | 38103/122310 [1:21:39<3:12:50,  7.28it/s][A
 31%|█████████▉                      | 38118/122310 [1:21:40<2:16:52, 10.25it/s][A
 31%|█████████▉                      | 38126/122310 [1:21:41<2:24:29,  9.71it/s][A
 31%|█████████▉                      | 38134/122310 [1:21:42<2:23:09,  9.80it/s][A
 31%|█████████▉                      | 38144/122310 [1:21:42<2:18:11, 10.15it/s][A
 31%|█████████▉                      | 38151/122310 [1:21:43<2:28:34,  9.44it/s][A
 31%|█████████▉                      | 38156/122310 [1:21:44<2:48:19,  8.33it/s][A
 31%|█████████▉                      | 38161/122310 [1:21:45<3:08:42,  7.43

step: 4660, loss: 97.71721063836623, epoch: 0



 31%|██████████                      | 38230/122310 [1:21:52<1:58:31, 11.82it/s][A
 31%|██████████                      | 38240/122310 [1:21:53<1:56:37, 12.01it/s][A
 31%|██████████                      | 38244/122310 [1:21:54<2:18:57, 10.08it/s][A
 31%|██████████                      | 38260/122310 [1:21:55<1:47:35, 13.02it/s][A
 31%|██████████                      | 38270/122310 [1:21:56<1:49:32, 12.79it/s][A
 31%|██████████                      | 38291/122310 [1:21:56<1:23:46, 16.71it/s][A
 31%|██████████                      | 38299/122310 [1:21:57<1:34:43, 14.78it/s][A
 31%|██████████                      | 38312/122310 [1:21:58<1:31:50, 15.24it/s][A
 31%|██████████                      | 38315/122310 [1:21:59<1:57:19, 11.93it/s][A
 31%|██████████                      | 38318/122310 [1:21:59<2:27:54,  9.46it/s][A
 31%|██████████                      | 38331/122310 [1:22:00<2:03:05, 11.37it/s][A
 31%|██████████                      | 38341/122310 [1:22:01<1:59:26, 11.72

step: 4680, loss: 96.64122205819557, epoch: 0



 31%|██████████                      | 38433/122310 [1:22:09<2:11:18, 10.65it/s][A
 31%|██████████                      | 38438/122310 [1:22:10<2:31:08,  9.25it/s][A
 31%|██████████                      | 38445/122310 [1:22:11<2:36:25,  8.94it/s][A
 31%|██████████                      | 38454/122310 [1:22:12<2:25:47,  9.59it/s][A
 31%|██████████                      | 38455/122310 [1:22:12<3:19:23,  7.01it/s][A
 31%|██████████                      | 38462/122310 [1:22:13<3:07:02,  7.47it/s][A
 31%|██████████                      | 38468/122310 [1:22:14<3:07:21,  7.46it/s][A
 31%|██████████                      | 38471/122310 [1:22:15<3:39:29,  6.37it/s][A
 31%|██████████                      | 38480/122310 [1:22:16<3:00:14,  7.75it/s][A
 31%|██████████                      | 38484/122310 [1:22:17<3:30:03,  6.65it/s][A
 31%|██████████                      | 38491/122310 [1:22:17<3:14:44,  7.17it/s][A
 31%|██████████                      | 38501/122310 [1:22:18<2:41:22,  8.66

step: 4700, loss: 111.90110437859319, epoch: 0



 32%|██████████                      | 38587/122310 [1:22:26<1:46:44, 13.07it/s][A
 32%|██████████                      | 38601/122310 [1:22:27<1:37:46, 14.27it/s][A
 32%|██████████                      | 38605/122310 [1:22:28<2:03:07, 11.33it/s][A
 32%|██████████                      | 38614/122310 [1:22:28<2:07:17, 10.96it/s][A
 32%|██████████                      | 38625/122310 [1:22:29<1:59:40, 11.65it/s][A
 32%|██████████                      | 38635/122310 [1:22:30<1:57:53, 11.83it/s][A
 32%|██████████                      | 38643/122310 [1:22:31<2:04:47, 11.17it/s][A
 32%|██████████                      | 38648/122310 [1:22:32<2:24:07,  9.67it/s][A
 32%|██████████                      | 38662/122310 [1:22:33<1:57:43, 11.84it/s][A
 32%|██████████                      | 38671/122310 [1:22:33<1:59:22, 11.68it/s][A
 32%|██████████                      | 38680/122310 [1:22:34<2:06:35, 11.01it/s][A
 32%|██████████                      | 38682/122310 [1:22:36<3:33:09,  6.54

step: 4720, loss: 88.11956837991812, epoch: 0



 32%|██████████▏                     | 38732/122310 [1:22:43<3:16:10,  7.10it/s][A
 32%|██████████▏                     | 38740/122310 [1:22:44<3:02:11,  7.64it/s][A
 32%|██████████▏                     | 38751/122310 [1:22:45<2:30:43,  9.24it/s][A
 32%|██████████▏                     | 38761/122310 [1:22:46<2:18:02, 10.09it/s][A
 32%|██████████▏                     | 38768/122310 [1:22:46<2:22:59,  9.74it/s][A
 32%|██████████▏                     | 38776/122310 [1:22:47<2:22:49,  9.75it/s][A
 32%|██████████▏                     | 38787/122310 [1:22:48<2:10:12, 10.69it/s][A
 32%|██████████▏                     | 38801/122310 [1:22:49<1:54:16, 12.18it/s][A
 32%|██████████▏                     | 38808/122310 [1:22:50<2:02:35, 11.35it/s][A
 32%|██████████▏                     | 38825/122310 [1:22:50<1:39:03, 14.05it/s][A
 32%|██████████▏                     | 38835/122310 [1:22:51<1:42:43, 13.54it/s][A
 32%|██████████▏                     | 38842/122310 [1:22:52<1:58:21, 11.75

step: 4740, loss: 107.64947518138405, epoch: 0



 32%|██████████▏                     | 38935/122310 [1:23:02<3:30:40,  6.60it/s][A
 32%|██████████▏                     | 38938/122310 [1:23:03<3:46:59,  6.12it/s][A
 32%|██████████▏                     | 38949/122310 [1:23:04<3:36:23,  6.42it/s][A
 32%|██████████▏                     | 38968/122310 [1:23:05<2:21:38,  9.81it/s][A
 32%|██████████▏                     | 38979/122310 [1:23:06<2:09:54, 10.69it/s][A
 32%|██████████▏                     | 38990/122310 [1:23:07<2:02:36, 11.33it/s][A
 32%|██████████▏                     | 38996/122310 [1:23:07<2:12:32, 10.48it/s][A
 32%|██████████▏                     | 39006/122310 [1:23:08<2:05:36, 11.05it/s][A
 32%|██████████▏                     | 39011/122310 [1:23:09<2:22:04,  9.77it/s][A
 32%|██████████▏                     | 39018/122310 [1:23:10<2:25:31,  9.54it/s][A
 32%|██████████▏                     | 39025/122310 [1:23:11<2:33:30,  9.04it/s][A
 32%|██████████▏                     | 39030/122310 [1:23:11<2:46:26,  8.34

step: 4760, loss: 99.00510324103514, epoch: 0



 32%|██████████▏                     | 39066/122310 [1:23:16<2:32:54,  9.07it/s][A
 32%|██████████▏                     | 39077/122310 [1:23:17<2:13:20, 10.40it/s][A
 32%|██████████▏                     | 39083/122310 [1:23:18<2:27:21,  9.41it/s][A
 32%|██████████▏                     | 39091/122310 [1:23:19<2:23:41,  9.65it/s][A
 32%|██████████▏                     | 39096/122310 [1:23:19<2:46:43,  8.32it/s][A
 32%|██████████▏                     | 39104/122310 [1:23:20<2:42:46,  8.52it/s][A
 32%|██████████▏                     | 39114/122310 [1:23:21<2:28:17,  9.35it/s][A
 32%|██████████▏                     | 39125/122310 [1:23:22<2:13:42, 10.37it/s][A
 32%|██████████▏                     | 39131/122310 [1:23:23<2:26:54,  9.44it/s][A
 32%|██████████▏                     | 39149/122310 [1:23:24<1:48:51, 12.73it/s][A
 32%|██████████▏                     | 39164/122310 [1:23:25<1:35:06, 14.57it/s][A
 32%|██████████▏                     | 39172/122310 [1:23:25<1:45:47, 13.10

step: 4780, loss: 79.4770393320897, epoch: 0



 32%|██████████▎                     | 39246/122310 [1:23:33<2:47:18,  8.27it/s][A
 32%|██████████▎                     | 39261/122310 [1:23:34<2:03:16, 11.23it/s][A
 32%|██████████▎                     | 39272/122310 [1:23:35<1:55:35, 11.97it/s][A
 32%|██████████▎                     | 39288/122310 [1:23:36<1:41:28, 13.64it/s][A
 32%|██████████▎                     | 39300/122310 [1:23:37<1:41:45, 13.60it/s][A
 32%|██████████▎                     | 39313/122310 [1:23:37<1:38:55, 13.98it/s][A
 32%|██████████▎                     | 39326/122310 [1:23:38<1:37:50, 14.13it/s][A
 32%|██████████▎                     | 39337/122310 [1:23:39<1:42:53, 13.44it/s][A
 32%|██████████▎                     | 39346/122310 [1:23:40<1:47:46, 12.83it/s][A
 32%|██████████▎                     | 39348/122310 [1:23:41<2:25:21,  9.51it/s][A
 32%|██████████▎                     | 39354/122310 [1:23:42<2:33:47,  8.99it/s][A
 32%|██████████▎                     | 39361/122310 [1:23:43<2:43:23,  8.46

step: 4800, loss: 107.55434785227753, epoch: 0
sim1 and sim2 are 0.660560511513319, 0.0661036161883135
cosine of pred and queen: 0.027030076388462936
Actual: athens:greece::madrid:spain, pred: week
Actual: bangkok:thailand::islamabad:pakistan, pred: last
Actual: beijing:china::tokyo:japan, pred: following
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: government
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: dorpang
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: new
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: regularize
Actual:


 32%|██████████▎                     | 39427/122310 [1:24:01<1:57:50, 11.72it/s][A

Actual: syria:arabic::australia:english, pred: friday
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: steffen
Actual: argentina:peso::russia:ruble, pred: bank
Actual: armenia:dram::iran:rial, pred: sowest
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: china
Actual: india:rupee::denmark:krone, pred: nestorius
Actual: usa:dollar::nigeria:naira, pred: per
Actual: switzerland:swiss::spain:spanish, pred: euro
Actual: thailand:thai::india:indian, pred: quotidian
Actual: sweden:swedish::netherlands:dutch, pred: police
Actual: russia:russian::germany:german, pred: europe
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: least
Actual: norway:norwegian::mexico:mexican, pred: earely
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: grameen
Actual: croatia:croatian::france:french, pred: prime
Actual: 


 32%|█████████▉                     | 39439/122310 [1:25:06<52:25:06,  2.28s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.005917159763313609



 32%|█████████▉                     | 39450/122310 [1:25:06<36:35:43,  1.59s/it][A
 32%|██████████                     | 39458/122310 [1:25:07<28:11:10,  1.22s/it][A
 32%|██████████                     | 39470/122310 [1:25:08<19:00:21,  1.21it/s][A
 32%|██████████                     | 39480/122310 [1:25:09<14:00:56,  1.64it/s][A
 32%|██████████                     | 39487/122310 [1:25:10<11:29:17,  2.00it/s][A
 32%|██████████▎                     | 39493/122310 [1:25:10<9:43:33,  2.37it/s][A
 32%|██████████▎                     | 39505/122310 [1:25:11<6:41:12,  3.44it/s][A
 32%|██████████▎                     | 39513/122310 [1:25:12<5:37:52,  4.08it/s][A
 32%|██████████▎                     | 39518/122310 [1:25:13<5:17:46,  4.34it/s][A
 32%|██████████▎                     | 39531/122310 [1:25:14<3:44:55,  6.13it/s][A
 32%|██████████▎                     | 39533/122310 [1:25:15<4:21:14,  5.28it/s][A
 32%|██████████▎                     | 39543/122310 [1:25:16<3:28:06,  6.63

step: 4820, loss: 104.21754300549648, epoch: 0



 32%|██████████▎                     | 39627/122310 [1:25:23<2:08:56, 10.69it/s][A
 32%|██████████▎                     | 39634/122310 [1:25:24<2:14:44, 10.23it/s][A
 32%|██████████▎                     | 39644/122310 [1:25:25<2:06:45, 10.87it/s][A
 32%|██████████▎                     | 39655/122310 [1:25:26<1:57:24, 11.73it/s][A
 32%|██████████▍                     | 39662/122310 [1:25:27<2:05:20, 10.99it/s][A
 32%|██████████▍                     | 39673/122310 [1:25:27<1:55:29, 11.93it/s][A
 32%|██████████▍                     | 39676/122310 [1:25:28<2:23:41,  9.58it/s][A
 32%|██████████▍                     | 39682/122310 [1:25:29<2:31:40,  9.08it/s][A
 32%|██████████▍                     | 39686/122310 [1:25:30<2:58:36,  7.71it/s][A
 32%|██████████▍                     | 39689/122310 [1:25:31<3:30:53,  6.53it/s][A
 32%|██████████▍                     | 39693/122310 [1:25:31<3:46:32,  6.08it/s][A
 32%|██████████▍                     | 39702/122310 [1:25:32<3:03:01,  7.52

step: 4840, loss: 103.41644025910908, epoch: 0



 33%|██████████▍                     | 39771/122310 [1:25:40<2:21:58,  9.69it/s][A
 33%|██████████▍                     | 39775/122310 [1:25:41<2:46:18,  8.27it/s][A
 33%|██████████▍                     | 39781/122310 [1:25:41<2:50:14,  8.08it/s][A
 33%|██████████▍                     | 39792/122310 [1:25:42<2:19:12,  9.88it/s][A
 33%|██████████▍                     | 39795/122310 [1:25:43<2:50:20,  8.07it/s][A
 33%|██████████▍                     | 39796/122310 [1:25:44<3:44:49,  6.12it/s][A
 33%|██████████▍                     | 39799/122310 [1:25:45<4:11:26,  5.47it/s][A
 33%|██████████▍                     | 39808/122310 [1:25:45<3:06:41,  7.37it/s][A
 33%|██████████▍                     | 39813/122310 [1:25:46<3:22:03,  6.80it/s][A
 33%|██████████▍                     | 39822/122310 [1:25:47<2:46:04,  8.28it/s][A
 33%|██████████▍                     | 39829/122310 [1:25:48<2:48:28,  8.16it/s][A
 33%|██████████▍                     | 39839/122310 [1:25:49<2:24:45,  9.50

step: 4860, loss: 87.19344378289665, epoch: 0



 33%|██████████▍                     | 39921/122310 [1:25:56<2:27:28,  9.31it/s][A
 33%|██████████▍                     | 39928/122310 [1:25:56<2:29:44,  9.17it/s][A
 33%|██████████▍                     | 39939/122310 [1:25:57<2:08:11, 10.71it/s][A
 33%|██████████▍                     | 39942/122310 [1:25:58<2:39:30,  8.61it/s][A
 33%|██████████▍                     | 39948/122310 [1:25:59<2:45:32,  8.29it/s][A
 33%|██████████▍                     | 39953/122310 [1:26:00<2:59:39,  7.64it/s][A
 33%|██████████▍                     | 39961/122310 [1:26:00<2:43:16,  8.41it/s][A
 33%|██████████▍                     | 39969/122310 [1:26:01<2:31:43,  9.04it/s][A
 33%|██████████▍                     | 39974/122310 [1:26:02<2:45:15,  8.30it/s][A
 33%|██████████▍                     | 39979/122310 [1:26:03<2:56:53,  7.76it/s][A
 33%|██████████▍                     | 39984/122310 [1:26:03<3:05:45,  7.39it/s][A
 33%|██████████▍                     | 39989/122310 [1:26:04<3:12:05,  7.14

step: 4880, loss: 107.96178680690086, epoch: 0



 33%|██████████▍                     | 40055/122310 [1:26:12<2:37:24,  8.71it/s][A
 33%|██████████▍                     | 40063/122310 [1:26:13<2:34:47,  8.86it/s][A
 33%|██████████▍                     | 40069/122310 [1:26:14<2:49:42,  8.08it/s][A
 33%|██████████▍                     | 40073/122310 [1:26:14<3:12:23,  7.12it/s][A
 33%|██████████▍                     | 40084/122310 [1:26:15<2:32:11,  9.00it/s][A
 33%|██████████▍                     | 40091/122310 [1:26:16<2:33:59,  8.90it/s][A
 33%|██████████▍                     | 40101/122310 [1:26:17<2:19:42,  9.81it/s][A
 33%|██████████▍                     | 40105/122310 [1:26:18<2:43:33,  8.38it/s][A
 33%|██████████▍                     | 40108/122310 [1:26:18<3:16:02,  6.99it/s][A
 33%|██████████▍                     | 40109/122310 [1:26:19<4:15:59,  5.35it/s][A
 33%|██████████▍                     | 40114/122310 [1:26:20<4:07:12,  5.54it/s][A
 33%|██████████▍                     | 40123/122310 [1:26:21<3:13:23,  7.08

step: 4900, loss: 96.94706303294907, epoch: 0



 33%|██████████▌                     | 40194/122310 [1:26:29<2:16:22, 10.04it/s][A
 33%|██████████▌                     | 40199/122310 [1:26:30<2:34:30,  8.86it/s][A
 33%|██████████▌                     | 40212/122310 [1:26:30<2:06:52, 10.78it/s][A
 33%|██████████▌                     | 40219/122310 [1:26:31<2:18:14,  9.90it/s][A
 33%|██████████▌                     | 40224/122310 [1:26:32<2:42:09,  8.44it/s][A
 33%|██████████▌                     | 40231/122310 [1:26:33<2:42:27,  8.42it/s][A
 33%|██████████▌                     | 40237/122310 [1:26:34<2:50:33,  8.02it/s][A
 33%|██████████▌                     | 40246/122310 [1:26:35<2:33:04,  8.93it/s][A
 33%|██████████▌                     | 40265/122310 [1:26:36<1:44:16, 13.11it/s][A
 33%|██████████▌                     | 40271/122310 [1:26:36<1:59:49, 11.41it/s][A
 33%|██████████▌                     | 40278/122310 [1:26:37<2:07:33, 10.72it/s][A
 33%|██████████▌                     | 40282/122310 [1:26:38<2:30:18,  9.10

step: 4920, loss: 97.59790410430357, epoch: 0



 33%|██████████▌                     | 40356/122310 [1:26:45<1:58:27, 11.53it/s][A
 33%|██████████▌                     | 40359/122310 [1:26:46<2:30:35,  9.07it/s][A
 33%|██████████▌                     | 40371/122310 [1:26:47<2:13:26, 10.23it/s][A
 33%|██████████▌                     | 40380/122310 [1:26:48<2:12:25, 10.31it/s][A
 33%|██████████▌                     | 40388/122310 [1:26:49<2:16:02, 10.04it/s][A
 33%|██████████▌                     | 40394/122310 [1:26:50<2:27:53,  9.23it/s][A
 33%|██████████▌                     | 40403/122310 [1:26:51<2:25:52,  9.36it/s][A
 33%|██████████▌                     | 40407/122310 [1:26:52<2:55:25,  7.78it/s][A
 33%|██████████▌                     | 40421/122310 [1:26:53<2:17:42,  9.91it/s][A
 33%|██████████▌                     | 40433/122310 [1:26:54<2:05:23, 10.88it/s][A
 33%|██████████▌                     | 40443/122310 [1:26:54<2:02:17, 11.16it/s][A
 33%|██████████▌                     | 40456/122310 [1:26:55<1:50:04, 12.39

step: 4940, loss: 94.64951224823191, epoch: 0



 33%|██████████▌                     | 40544/122310 [1:27:03<2:02:15, 11.15it/s][A
 33%|██████████▌                     | 40560/122310 [1:27:04<1:41:29, 13.43it/s][A
 33%|██████████▌                     | 40566/122310 [1:27:05<1:57:45, 11.57it/s][A
 33%|██████████▌                     | 40570/122310 [1:27:06<2:24:26,  9.43it/s][A
 33%|██████████▌                     | 40582/122310 [1:27:07<2:05:20, 10.87it/s][A
 33%|██████████▌                     | 40589/122310 [1:27:08<2:16:49,  9.95it/s][A
 33%|██████████▌                     | 40596/122310 [1:27:08<2:25:08,  9.38it/s][A
 33%|██████████▌                     | 40608/122310 [1:27:09<2:11:10, 10.38it/s][A
 33%|██████████▋                     | 40616/122310 [1:27:10<2:18:42,  9.82it/s][A
 33%|██████████▋                     | 40623/122310 [1:27:11<2:26:48,  9.27it/s][A
 33%|██████████▋                     | 40629/122310 [1:27:12<2:38:45,  8.58it/s][A
 33%|██████████▋                     | 40642/122310 [1:27:13<2:08:43, 10.57

step: 4960, loss: 80.72152624461698, epoch: 0



 33%|██████████▋                     | 40727/122310 [1:27:21<2:00:33, 11.28it/s][A
 33%|██████████▋                     | 40735/122310 [1:27:22<2:06:19, 10.76it/s][A
 33%|██████████▋                     | 40750/122310 [1:27:22<1:45:17, 12.91it/s][A
 33%|██████████▋                     | 40759/122310 [1:27:23<1:50:38, 12.28it/s][A
 33%|██████████▋                     | 40766/122310 [1:27:24<2:02:52, 11.06it/s][A
 33%|██████████▋                     | 40781/122310 [1:27:25<1:43:25, 13.14it/s][A
 33%|██████████▋                     | 40793/122310 [1:27:26<1:42:14, 13.29it/s][A
 33%|██████████▋                     | 40798/122310 [1:27:27<2:04:22, 10.92it/s][A
 33%|██████████▋                     | 40804/122310 [1:27:28<2:18:46,  9.79it/s][A
 33%|██████████▋                     | 40811/122310 [1:27:28<2:24:10,  9.42it/s][A
 33%|██████████▋                     | 40815/122310 [1:27:29<2:49:38,  8.01it/s][A
 33%|██████████▋                     | 40829/122310 [1:27:30<2:07:39, 10.64

step: 4980, loss: 91.60612204769822, epoch: 0



 33%|██████████▋                     | 40917/122310 [1:27:38<2:04:38, 10.88it/s][A
 33%|██████████▋                     | 40923/122310 [1:27:39<2:20:09,  9.68it/s][A
 33%|██████████▋                     | 40931/122310 [1:27:39<2:21:29,  9.59it/s][A
 33%|██████████▋                     | 40947/122310 [1:27:40<1:49:19, 12.40it/s][A
 33%|██████████▋                     | 40953/122310 [1:27:41<2:06:14, 10.74it/s][A
 33%|██████████▋                     | 40955/122310 [1:27:42<2:43:24,  8.30it/s][A
 34%|██████████▋                     | 40975/122310 [1:27:43<1:44:48, 12.93it/s][A
 34%|██████████▋                     | 40981/122310 [1:27:44<2:06:00, 10.76it/s][A
 34%|██████████▋                     | 40984/122310 [1:27:45<2:35:59,  8.69it/s][A
 34%|██████████▋                     | 40995/122310 [1:27:45<2:17:10,  9.88it/s][A
 34%|██████████▋                     | 41002/122310 [1:27:46<2:22:33,  9.51it/s][A
 34%|██████████▋                     | 41012/122310 [1:27:47<2:14:49, 10.05

step: 5000, loss: 96.30913958150727, epoch: 0
saving weights



 34%|██████████▊                     | 41095/122310 [1:27:57<3:23:26,  6.65it/s][A
 34%|██████████▊                     | 41097/122310 [1:27:58<3:51:21,  5.85it/s][A
 34%|██████████▊                     | 41112/122310 [1:27:58<2:46:26,  8.13it/s][A
 34%|██████████▊                     | 41117/122310 [1:27:59<2:57:31,  7.62it/s][A
 34%|██████████▊                     | 41136/122310 [1:28:00<2:04:33, 10.86it/s][A
 34%|██████████▊                     | 41143/122310 [1:28:01<2:15:09, 10.01it/s][A
 34%|██████████▊                     | 41148/122310 [1:28:02<2:31:22,  8.94it/s][A
 34%|██████████▊                     | 41164/122310 [1:28:03<1:57:30, 11.51it/s][A
 34%|██████████▊                     | 41176/122310 [1:28:04<1:51:56, 12.08it/s][A
 34%|██████████▊                     | 41185/122310 [1:28:05<1:58:01, 11.46it/s][A
 34%|██████████▊                     | 41199/122310 [1:28:06<1:47:35, 12.56it/s][A
 34%|██████████▊                     | 41202/122310 [1:28:06<2:14:57, 10.02

step: 5020, loss: 90.89733122579466, epoch: 0



 34%|██████████▊                     | 41261/122310 [1:28:13<2:44:29,  8.21it/s][A
 34%|██████████▊                     | 41272/122310 [1:28:14<2:15:57,  9.93it/s][A
 34%|██████████▊                     | 41282/122310 [1:28:15<2:05:56, 10.72it/s][A
 34%|██████████▊                     | 41293/122310 [1:28:15<1:55:44, 11.67it/s][A
 34%|██████████▊                     | 41301/122310 [1:28:16<2:00:01, 11.25it/s][A
 34%|██████████▊                     | 41306/122310 [1:28:17<2:16:50,  9.87it/s][A
 34%|██████████▊                     | 41310/122310 [1:28:18<2:46:43,  8.10it/s][A
 34%|██████████▊                     | 41322/122310 [1:28:19<2:14:20, 10.05it/s][A
 34%|██████████▊                     | 41331/122310 [1:28:20<2:12:45, 10.17it/s][A
 34%|██████████▊                     | 41340/122310 [1:28:20<2:09:44, 10.40it/s][A
 34%|██████████▊                     | 41346/122310 [1:28:21<2:23:34,  9.40it/s][A
 34%|██████████▊                     | 41352/122310 [1:28:22<2:36:07,  8.64

step: 5040, loss: 102.286658332468, epoch: 0



 34%|██████████▊                     | 41444/122310 [1:28:30<1:34:53, 14.20it/s][A
 34%|██████████▊                     | 41449/122310 [1:28:30<1:53:38, 11.86it/s][A
 34%|██████████▊                     | 41456/122310 [1:28:31<2:02:31, 11.00it/s][A
 34%|██████████▊                     | 41465/122310 [1:28:32<2:02:29, 11.00it/s][A
 34%|██████████▊                     | 41479/122310 [1:28:33<1:50:07, 12.23it/s][A
 34%|██████████▊                     | 41485/122310 [1:28:34<2:07:19, 10.58it/s][A
 34%|██████████▊                     | 41491/122310 [1:28:35<2:21:26,  9.52it/s][A
 34%|██████████▊                     | 41500/122310 [1:28:36<2:22:33,  9.45it/s][A
 34%|██████████▊                     | 41503/122310 [1:28:37<3:00:08,  7.48it/s][A
 34%|██████████▊                     | 41506/122310 [1:28:37<3:28:48,  6.45it/s][A
 34%|██████████▊                     | 41515/122310 [1:28:38<2:53:17,  7.77it/s][A
 34%|██████████▊                     | 41525/122310 [1:28:40<3:23:08,  6.63

step: 5060, loss: 136.03999936247013, epoch: 0



 34%|██████████▉                     | 41592/122310 [1:28:47<2:02:58, 10.94it/s][A
 34%|██████████▉                     | 41602/122310 [1:28:47<1:59:55, 11.22it/s][A
 34%|██████████▉                     | 41612/122310 [1:28:48<2:03:15, 10.91it/s][A
 34%|██████████▉                     | 41623/122310 [1:28:49<1:54:05, 11.79it/s][A
 34%|██████████▉                     | 41631/122310 [1:28:50<1:59:29, 11.25it/s][A
 34%|██████████▉                     | 41639/122310 [1:28:51<2:02:37, 10.96it/s][A
 34%|██████████▉                     | 41644/122310 [1:28:52<2:18:47,  9.69it/s][A
 34%|██████████▉                     | 41648/122310 [1:28:52<2:40:11,  8.39it/s][A
 34%|██████████▉                     | 41651/122310 [1:28:53<3:10:16,  7.06it/s][A
 34%|██████████▉                     | 41661/122310 [1:28:54<2:33:15,  8.77it/s][A
 34%|██████████▉                     | 41667/122310 [1:28:55<2:42:18,  8.28it/s][A
 34%|██████████▉                     | 41677/122310 [1:28:55<2:17:59,  9.74

step: 5080, loss: 79.67495075069121, epoch: 0



 34%|██████████▉                     | 41749/122310 [1:29:03<2:24:59,  9.26it/s][A
 34%|██████████▉                     | 41753/122310 [1:29:04<2:47:41,  8.01it/s][A
 34%|██████████▉                     | 41761/122310 [1:29:05<2:39:01,  8.44it/s][A
 34%|██████████▉                     | 41770/122310 [1:29:06<2:25:36,  9.22it/s][A
 34%|██████████▉                     | 41776/122310 [1:29:06<2:33:41,  8.73it/s][A
 34%|██████████▉                     | 41785/122310 [1:29:07<2:22:31,  9.42it/s][A
 34%|██████████▉                     | 41797/122310 [1:29:08<2:08:57, 10.41it/s][A
 34%|██████████▉                     | 41809/122310 [1:29:09<1:57:53, 11.38it/s][A
 34%|██████████▉                     | 41813/122310 [1:29:10<2:19:45,  9.60it/s][A
 34%|██████████▉                     | 41818/122310 [1:29:11<2:35:20,  8.64it/s][A
 34%|██████████▉                     | 41821/122310 [1:29:11<3:07:59,  7.14it/s][A
 34%|██████████▉                     | 41833/122310 [1:29:12<2:21:38,  9.47

step: 5100, loss: 113.52070472941327, epoch: 0



 34%|██████████▉                     | 41898/122310 [1:29:20<3:36:02,  6.20it/s][A
 34%|██████████▉                     | 41912/122310 [1:29:21<2:25:00,  9.24it/s][A
 34%|██████████▉                     | 41926/122310 [1:29:22<1:58:21, 11.32it/s][A
 34%|██████████▉                     | 41938/122310 [1:29:23<1:51:03, 12.06it/s][A
 34%|██████████▉                     | 41946/122310 [1:29:24<1:59:55, 11.17it/s][A
 34%|██████████▉                     | 41959/122310 [1:29:24<1:48:36, 12.33it/s][A
 34%|██████████▉                     | 41965/122310 [1:29:26<2:46:50,  8.03it/s][A
 34%|██████████▉                     | 41968/122310 [1:29:27<3:16:45,  6.81it/s][A
 34%|██████████▉                     | 41978/122310 [1:29:28<2:50:53,  7.83it/s][A
 34%|██████████▉                     | 41987/122310 [1:29:29<2:43:36,  8.18it/s][A
 34%|██████████▉                     | 41997/122310 [1:29:30<2:30:16,  8.91it/s][A
 34%|██████████▉                     | 42009/122310 [1:29:31<2:13:28, 10.03

step: 5120, loss: 108.65336480684091, epoch: 0



 34%|███████████                     | 42089/122310 [1:29:38<2:23:40,  9.31it/s][A
 34%|███████████                     | 42091/122310 [1:29:39<2:57:43,  7.52it/s][A
 34%|███████████                     | 42101/122310 [1:29:40<2:42:34,  8.22it/s][A
 34%|███████████                     | 42108/122310 [1:29:41<2:43:35,  8.17it/s][A
 34%|███████████                     | 42114/122310 [1:29:42<2:49:31,  7.88it/s][A
 34%|███████████                     | 42133/122310 [1:29:43<1:52:15, 11.90it/s][A
 34%|███████████                     | 42143/122310 [1:29:44<1:50:31, 12.09it/s][A
 34%|███████████                     | 42151/122310 [1:29:44<1:55:07, 11.60it/s][A
 34%|███████████                     | 42163/122310 [1:29:45<1:49:48, 12.16it/s][A
 34%|███████████                     | 42171/122310 [1:29:46<1:55:29, 11.57it/s][A
 34%|███████████                     | 42179/122310 [1:29:47<2:03:51, 10.78it/s][A
 34%|███████████                     | 42193/122310 [1:29:48<1:45:59, 12.60

step: 5140, loss: 125.4144380075025, epoch: 0



 35%|███████████                     | 42259/122310 [1:29:56<3:45:27,  5.92it/s][A
 35%|███████████                     | 42275/122310 [1:29:57<2:25:17,  9.18it/s][A
 35%|███████████                     | 42285/122310 [1:29:58<2:14:54,  9.89it/s][A
 35%|███████████                     | 42293/122310 [1:29:59<2:18:52,  9.60it/s][A
 35%|███████████                     | 42306/122310 [1:30:00<1:59:41, 11.14it/s][A
 35%|███████████                     | 42316/122310 [1:30:01<1:55:33, 11.54it/s][A
 35%|███████████                     | 42332/122310 [1:30:01<1:36:17, 13.84it/s][A
 35%|███████████                     | 42341/122310 [1:30:02<1:43:03, 12.93it/s][A
 35%|███████████                     | 42347/122310 [1:30:03<1:58:06, 11.28it/s][A
 35%|███████████                     | 42352/122310 [1:30:04<2:16:26,  9.77it/s][A
 35%|███████████                     | 42356/122310 [1:30:05<2:41:20,  8.26it/s][A
 35%|███████████                     | 42361/122310 [1:30:05<2:53:07,  7.70

step: 5160, loss: 138.41953045199014, epoch: 0



 35%|███████████                     | 42434/122310 [1:30:12<2:06:34, 10.52it/s][A
 35%|███████████                     | 42446/122310 [1:30:13<1:54:25, 11.63it/s][A
 35%|███████████                     | 42458/122310 [1:30:14<1:45:28, 12.62it/s][A
 35%|███████████                     | 42466/122310 [1:30:14<1:51:28, 11.94it/s][A
 35%|███████████                     | 42475/122310 [1:30:15<1:52:41, 11.81it/s][A
 35%|███████████                     | 42480/122310 [1:30:16<2:10:53, 10.17it/s][A
 35%|███████████                     | 42494/122310 [1:30:17<1:48:39, 12.24it/s][A
 35%|███████████                     | 42506/122310 [1:30:18<1:41:39, 13.08it/s][A
 35%|███████████                     | 42515/122310 [1:30:18<1:45:59, 12.55it/s][A
 35%|███████████                     | 42521/122310 [1:30:19<1:59:47, 11.10it/s][A
 35%|███████████▏                    | 42530/122310 [1:30:20<1:58:23, 11.23it/s][A
 35%|███████████▏                    | 42545/122310 [1:30:21<1:37:48, 13.59

step: 5180, loss: 89.89183481160534, epoch: 0



 35%|███████████▏                    | 42611/122310 [1:30:28<2:02:27, 10.85it/s][A
 35%|███████████▏                    | 42625/122310 [1:30:29<1:43:46, 12.80it/s][A
 35%|███████████▏                    | 42637/122310 [1:30:30<1:40:06, 13.26it/s][A
 35%|███████████▏                    | 42646/122310 [1:30:30<1:45:49, 12.55it/s][A
 35%|███████████▏                    | 42656/122310 [1:30:31<1:49:37, 12.11it/s][A
 35%|███████████▏                    | 42664/122310 [1:30:32<2:01:46, 10.90it/s][A
 35%|███████████▏                    | 42669/122310 [1:30:33<2:20:24,  9.45it/s][A
 35%|███████████▏                    | 42679/122310 [1:30:35<2:44:03,  8.09it/s][A
 35%|███████████▏                    | 42682/122310 [1:30:35<3:11:26,  6.93it/s][A
 35%|███████████▏                    | 42694/122310 [1:30:36<2:27:14,  9.01it/s][A
 35%|███████████▏                    | 42696/122310 [1:30:37<3:02:40,  7.26it/s][A
 35%|███████████▏                    | 42703/122310 [1:30:38<2:53:55,  7.63

step: 5200, loss: 102.55644041852749, epoch: 0
sim1 and sim2 are 0.6726079325386468, 0.11042771504349407
cosine of pred and queen: 0.05163633453715677
Actual: athens:greece::madrid:spain, pred: would
Actual: bangkok:thailand::islamabad:pakistan, pred: last
Actual: beijing:china::tokyo:japan, pred: team
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: government
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: dorpang
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: mah

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: time
Actual: islamabad:pakistan::oslo:norway, pred: zemira
Actual: grandfather:grandmother::father:mother, pred: man
Actual: grandpa:grandma::sons:daughters, pred: men
Actual: king:queen::husband:wife, pred: strick
Actual: man:woman::brothers:sisters, pred: nidhi
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: state
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: state
Actual: tripura:agartala::odisha:bhubaneswar, pred: jefferson
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: argentina:peso::japan:yen, pred: bank



 35%|██████████▊                    | 42761/122310 [1:32:03<67:10:35,  3.04s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.011834319526627219



 35%|██████████▊                    | 42771/122310 [1:32:04<44:36:17,  2.02s/it][A
 35%|██████████▊                    | 42781/122310 [1:32:05<30:28:24,  1.38s/it][A
 35%|██████████▊                    | 42786/122310 [1:32:06<25:21:54,  1.15s/it][A
 35%|██████████▊                    | 42793/122310 [1:32:06<19:06:53,  1.16it/s][A
 35%|██████████▊                    | 42801/122310 [1:32:07<13:57:14,  1.58it/s][A
 35%|██████████▊                    | 42809/122310 [1:32:08<10:26:51,  2.11it/s][A
 35%|███████████▏                    | 42816/122310 [1:32:09<8:16:01,  2.67it/s][A
 35%|███████████▏                    | 42824/122310 [1:32:10<6:21:50,  3.47it/s][A
 35%|███████████▏                    | 42827/122310 [1:32:11<6:17:11,  3.51it/s][A
 35%|███████████▏                    | 42840/122310 [1:32:11<3:57:11,  5.58it/s][A
 35%|███████████▏                    | 42845/122310 [1:32:12<3:52:08,  5.71it/s][A
 35%|███████████▏                    | 42851/122310 [1:32:13<3:35:42,  6.14

step: 5220, loss: 114.83617536775019, epoch: 0



 35%|███████████▏                    | 42915/122310 [1:32:20<2:02:56, 10.76it/s][A
 35%|███████████▏                    | 42927/122310 [1:32:21<1:49:59, 12.03it/s][A
 35%|███████████▏                    | 42932/122310 [1:32:22<2:08:40, 10.28it/s][A
 35%|███████████▏                    | 42942/122310 [1:32:23<2:01:25, 10.89it/s][A
 35%|███████████▏                    | 42952/122310 [1:32:23<1:58:15, 11.18it/s][A
 35%|███████████▏                    | 42957/122310 [1:32:24<2:16:39,  9.68it/s][A
 35%|███████████▏                    | 42963/122310 [1:32:25<2:26:30,  9.03it/s][A
 35%|███████████▏                    | 42975/122310 [1:32:26<2:02:50, 10.76it/s][A
 35%|███████████▏                    | 42989/122310 [1:32:27<1:43:20, 12.79it/s][A
 35%|███████████▏                    | 42992/122310 [1:32:28<2:16:39,  9.67it/s][A
 35%|███████████▎                    | 43002/122310 [1:32:28<2:12:59,  9.94it/s][A
 35%|███████████▎                    | 43012/122310 [1:32:29<2:06:30, 10.45

step: 5240, loss: 94.93787665727199, epoch: 0



 35%|███████████▎                    | 43074/122310 [1:32:37<2:29:05,  8.86it/s][A
 35%|███████████▎                    | 43087/122310 [1:32:38<1:58:04, 11.18it/s][A
 35%|███████████▎                    | 43095/122310 [1:32:39<2:00:29, 10.96it/s][A
 35%|███████████▎                    | 43104/122310 [1:32:39<2:01:23, 10.87it/s][A
 35%|███████████▎                    | 43113/122310 [1:32:40<2:06:18, 10.45it/s][A
 35%|███████████▎                    | 43120/122310 [1:32:41<2:17:58,  9.57it/s][A
 35%|███████████▎                    | 43129/122310 [1:32:42<2:17:00,  9.63it/s][A
 35%|███████████▎                    | 43140/122310 [1:32:43<2:09:05, 10.22it/s][A
 35%|███████████▎                    | 43144/122310 [1:32:44<2:35:42,  8.47it/s][A
 35%|███████████▎                    | 43150/122310 [1:32:45<2:45:50,  7.96it/s][A
 35%|███████████▎                    | 43162/122310 [1:32:46<2:16:46,  9.64it/s][A
 35%|███████████▎                    | 43175/122310 [1:32:47<1:55:57, 11.37

step: 5260, loss: 154.24778626431637, epoch: 0



 35%|███████████▎                    | 43242/122310 [1:32:54<2:15:53,  9.70it/s][A
 35%|███████████▎                    | 43251/122310 [1:32:55<2:07:56, 10.30it/s][A
 35%|███████████▎                    | 43263/122310 [1:32:56<1:50:59, 11.87it/s][A
 35%|███████████▎                    | 43265/122310 [1:32:56<2:23:46,  9.16it/s][A
 35%|███████████▎                    | 43276/122310 [1:32:57<2:03:15, 10.69it/s][A
 35%|███████████▎                    | 43281/122310 [1:32:58<2:19:13,  9.46it/s][A
 35%|███████████▎                    | 43286/122310 [1:32:59<2:33:51,  8.56it/s][A
 35%|███████████▎                    | 43298/122310 [1:32:59<2:04:03, 10.61it/s][A
 35%|███████████▎                    | 43307/122310 [1:33:00<2:02:46, 10.73it/s][A
 35%|███████████▎                    | 43309/122310 [1:33:01<2:38:08,  8.33it/s][A
 35%|███████████▎                    | 43318/122310 [1:33:02<2:26:30,  8.99it/s][A
 35%|███████████▎                    | 43327/122310 [1:33:03<2:20:54,  9.34

step: 5280, loss: 93.69941496882984, epoch: 0



 35%|███████████▎                    | 43407/122310 [1:33:11<2:17:35,  9.56it/s][A
 35%|███████████▎                    | 43413/122310 [1:33:12<2:28:50,  8.83it/s][A
 35%|███████████▎                    | 43420/122310 [1:33:13<2:31:49,  8.66it/s][A
 36%|███████████▎                    | 43432/122310 [1:33:13<2:04:39, 10.55it/s][A
 36%|███████████▎                    | 43446/122310 [1:33:14<1:49:22, 12.02it/s][A
 36%|███████████▎                    | 43458/122310 [1:33:15<1:41:10, 12.99it/s][A
 36%|███████████▎                    | 43473/122310 [1:33:16<1:31:38, 14.34it/s][A
 36%|███████████▍                    | 43478/122310 [1:33:17<1:49:46, 11.97it/s][A
 36%|███████████▍                    | 43487/122310 [1:33:18<1:50:39, 11.87it/s][A
 36%|███████████▍                    | 43496/122310 [1:33:18<1:54:20, 11.49it/s][A
 36%|███████████▍                    | 43501/122310 [1:33:19<2:15:57,  9.66it/s][A
 36%|███████████▍                    | 43510/122310 [1:33:20<2:09:45, 10.12

step: 5300, loss: 90.51171860734718, epoch: 0



 36%|███████████▍                    | 43584/122310 [1:33:27<1:47:46, 12.17it/s][A
 36%|███████████▍                    | 43589/122310 [1:33:28<2:05:20, 10.47it/s][A
 36%|███████████▍                    | 43595/122310 [1:33:29<2:15:32,  9.68it/s][A
 36%|███████████▍                    | 43604/122310 [1:33:30<2:08:53, 10.18it/s][A
 36%|███████████▍                    | 43607/122310 [1:33:31<2:37:34,  8.32it/s][A
 36%|███████████▍                    | 43615/122310 [1:33:31<2:31:34,  8.65it/s][A
 36%|███████████▍                    | 43623/122310 [1:33:32<2:33:34,  8.54it/s][A
 36%|███████████▍                    | 43630/122310 [1:33:33<2:33:37,  8.54it/s][A
 36%|███████████▍                    | 43643/122310 [1:33:34<2:07:24, 10.29it/s][A
 36%|███████████▍                    | 43649/122310 [1:33:35<2:21:02,  9.30it/s][A
 36%|███████████▍                    | 43658/122310 [1:33:36<2:18:58,  9.43it/s][A
 36%|███████████▍                    | 43674/122310 [1:33:37<1:47:16, 12.22

step: 5320, loss: 79.76802784716855, epoch: 0



 36%|███████████▍                    | 43754/122310 [1:33:44<1:38:59, 13.23it/s][A
 36%|███████████▍                    | 43758/122310 [1:33:45<2:03:12, 10.63it/s][A
 36%|███████████▍                    | 43765/122310 [1:33:46<2:09:11, 10.13it/s][A
 36%|███████████▍                    | 43767/122310 [1:33:47<2:46:21,  7.87it/s][A
 36%|███████████▍                    | 43770/122310 [1:33:47<3:16:59,  6.64it/s][A
 36%|███████████▍                    | 43773/122310 [1:33:48<3:43:47,  5.85it/s][A
 36%|███████████▍                    | 43780/122310 [1:33:49<3:12:24,  6.80it/s][A
 36%|███████████▍                    | 43791/122310 [1:33:50<2:25:50,  8.97it/s][A
 36%|███████████▍                    | 43799/122310 [1:33:51<2:20:57,  9.28it/s][A
 36%|███████████▍                    | 43808/122310 [1:33:52<2:18:46,  9.43it/s][A
 36%|███████████▍                    | 43817/122310 [1:33:52<2:11:02,  9.98it/s][A
 36%|███████████▍                    | 43824/122310 [1:33:53<2:15:51,  9.63

step: 5340, loss: 85.44647172219557, epoch: 0



 36%|███████████▍                    | 43894/122310 [1:34:00<2:14:52,  9.69it/s][A
 36%|███████████▍                    | 43899/122310 [1:34:01<2:32:31,  8.57it/s][A
 36%|███████████▍                    | 43904/122310 [1:34:02<2:46:36,  7.84it/s][A
 36%|███████████▍                    | 43915/122310 [1:34:03<2:16:51,  9.55it/s][A
 36%|███████████▍                    | 43924/122310 [1:34:04<2:09:43, 10.07it/s][A
 36%|███████████▍                    | 43929/122310 [1:34:05<2:28:45,  8.78it/s][A
 36%|███████████▍                    | 43940/122310 [1:34:05<2:07:01, 10.28it/s][A
 36%|███████████▍                    | 43943/122310 [1:34:06<2:37:22,  8.30it/s][A
 36%|███████████▍                    | 43947/122310 [1:34:07<2:56:59,  7.38it/s][A
 36%|███████████▍                    | 43952/122310 [1:34:08<3:05:31,  7.04it/s][A
 36%|███████████▌                    | 43956/122310 [1:34:08<3:21:09,  6.49it/s][A
 36%|███████████▌                    | 43961/122310 [1:34:10<4:24:28,  4.94

step: 5360, loss: 146.73133736561664, epoch: 0



 36%|███████████▌                    | 43994/122310 [1:34:17<6:50:03,  3.18it/s][A
 36%|███████████▌                    | 44010/122310 [1:34:17<3:35:03,  6.07it/s][A
 36%|███████████▌                    | 44022/122310 [1:34:18<2:48:33,  7.74it/s][A
 36%|███████████▌                    | 44032/122310 [1:34:19<2:31:16,  8.62it/s][A
 36%|███████████▌                    | 44038/122310 [1:34:20<2:38:07,  8.25it/s][A
 36%|███████████▌                    | 44055/122310 [1:34:21<1:54:05, 11.43it/s][A
 36%|███████████▌                    | 44065/122310 [1:34:24<3:24:52,  6.37it/s][A
 36%|███████████▌                    | 44074/122310 [1:34:25<3:03:06,  7.12it/s][A
 36%|███████████▌                    | 44092/122310 [1:34:26<2:14:25,  9.70it/s][A
 36%|███████████▌                    | 44099/122310 [1:34:27<2:18:12,  9.43it/s][A
 36%|███████████▌                    | 44114/122310 [1:34:28<1:54:11, 11.41it/s][A
 36%|███████████▌                    | 44124/122310 [1:34:28<1:52:13, 11.61

step: 5380, loss: 84.72768380323524, epoch: 0



 36%|███████████▌                    | 44165/122310 [1:34:34<3:24:32,  6.37it/s][A
 36%|███████████▌                    | 44172/122310 [1:34:35<3:11:19,  6.81it/s][A
 36%|███████████▌                    | 44180/122310 [1:34:35<2:54:19,  7.47it/s][A
 36%|███████████▌                    | 44187/122310 [1:34:36<2:47:18,  7.78it/s][A
 36%|███████████▌                    | 44200/122310 [1:34:37<2:11:55,  9.87it/s][A
 36%|███████████▌                    | 44211/122310 [1:34:38<1:59:23, 10.90it/s][A
 36%|███████████▌                    | 44224/122310 [1:34:39<1:44:34, 12.45it/s][A
 36%|███████████▌                    | 44229/122310 [1:34:39<2:01:12, 10.74it/s][A
 36%|███████████▌                    | 44234/122310 [1:34:40<2:17:36,  9.46it/s][A
 36%|███████████▌                    | 44236/122310 [1:34:41<2:53:52,  7.48it/s][A
 36%|███████████▌                    | 44244/122310 [1:34:42<2:38:23,  8.21it/s][A
 36%|███████████▌                    | 44252/122310 [1:34:43<2:28:23,  8.77

step: 5400, loss: 96.9786541746469, epoch: 0



 36%|███████████▌                    | 44352/122310 [1:34:50<1:48:42, 11.95it/s][A
 36%|███████████▌                    | 44358/122310 [1:34:50<2:01:08, 10.72it/s][A
 36%|███████████▌                    | 44364/122310 [1:34:51<2:12:26,  9.81it/s][A
 36%|███████████▌                    | 44374/122310 [1:34:52<2:01:56, 10.65it/s][A
 36%|███████████▌                    | 44385/122310 [1:34:53<1:53:37, 11.43it/s][A
 36%|███████████▌                    | 44395/122310 [1:34:54<1:51:34, 11.64it/s][A
 36%|███████████▌                    | 44408/122310 [1:34:54<1:40:52, 12.87it/s][A
 36%|███████████▌                    | 44419/122310 [1:34:55<1:42:04, 12.72it/s][A
 36%|███████████▌                    | 44422/122310 [1:34:56<2:09:04, 10.06it/s][A
 36%|███████████▌                    | 44432/122310 [1:34:57<2:03:57, 10.47it/s][A
 36%|███████████▋                    | 44437/122310 [1:34:58<2:22:04,  9.13it/s][A
 36%|███████████▋                    | 44448/122310 [1:34:59<2:09:16, 10.04

step: 5420, loss: 106.72757075750206, epoch: 0



 36%|███████████▋                    | 44506/122310 [1:35:06<2:54:26,  7.43it/s][A
 36%|███████████▋                    | 44514/122310 [1:35:07<2:40:52,  8.06it/s][A
 36%|███████████▋                    | 44521/122310 [1:35:08<2:38:31,  8.18it/s][A
 36%|███████████▋                    | 44529/122310 [1:35:09<2:32:28,  8.50it/s][A
 36%|███████████▋                    | 44535/122310 [1:35:10<2:35:51,  8.32it/s][A
 36%|███████████▋                    | 44550/122310 [1:35:11<1:53:40, 11.40it/s][A
 36%|███████████▋                    | 44567/122310 [1:35:11<1:34:05, 13.77it/s][A
 36%|███████████▋                    | 44573/122310 [1:35:12<1:49:10, 11.87it/s][A
 36%|███████████▋                    | 44579/122310 [1:35:13<2:03:15, 10.51it/s][A
 36%|███████████▋                    | 44585/122310 [1:35:14<2:23:58,  9.00it/s][A
 36%|███████████▋                    | 44597/122310 [1:35:15<2:06:35, 10.23it/s][A
 36%|███████████▋                    | 44604/122310 [1:35:16<2:16:28,  9.49

step: 5440, loss: 87.7223203998102, epoch: 0



 37%|███████████▋                    | 44674/122310 [1:35:23<2:09:56,  9.96it/s][A
 37%|███████████▋                    | 44685/122310 [1:35:24<1:59:55, 10.79it/s][A
 37%|███████████▋                    | 44691/122310 [1:35:25<2:10:27,  9.92it/s][A
 37%|███████████▋                    | 44705/122310 [1:35:26<1:49:22, 11.83it/s][A
 37%|███████████▋                    | 44714/122310 [1:35:28<2:31:51,  8.52it/s][A
 37%|███████████▋                    | 44725/122310 [1:35:28<2:15:48,  9.52it/s][A
 37%|███████████▋                    | 44732/122310 [1:35:29<2:21:03,  9.17it/s][A
 37%|███████████▋                    | 44740/122310 [1:35:30<2:21:13,  9.15it/s][A
 37%|███████████▋                    | 44745/122310 [1:35:31<2:38:58,  8.13it/s][A
 37%|███████████▋                    | 44751/122310 [1:35:32<2:44:37,  7.85it/s][A
 37%|███████████▋                    | 44763/122310 [1:35:33<2:15:03,  9.57it/s][A
 37%|███████████▋                    | 44772/122310 [1:35:34<2:14:16,  9.62

step: 5460, loss: 119.9531599815172, epoch: 0



 37%|███████████▋                    | 44856/122310 [1:35:40<1:40:20, 12.86it/s][A
 37%|███████████▋                    | 44860/122310 [1:35:41<2:02:37, 10.53it/s][A
 37%|███████████▋                    | 44862/122310 [1:35:42<2:37:24,  8.20it/s][A
 37%|███████████▋                    | 44867/122310 [1:35:42<2:49:18,  7.62it/s][A
 37%|███████████▋                    | 44881/122310 [1:35:43<2:05:55, 10.25it/s][A
 37%|███████████▋                    | 44888/122310 [1:35:44<2:20:06,  9.21it/s][A
 37%|███████████▋                    | 44892/122310 [1:35:45<2:45:02,  7.82it/s][A
 37%|███████████▋                    | 44895/122310 [1:35:46<3:19:54,  6.45it/s][A
 37%|███████████▋                    | 44907/122310 [1:35:47<2:30:31,  8.57it/s][A
 37%|███████████▊                    | 44915/122310 [1:35:48<2:27:23,  8.75it/s][A
 37%|███████████▊                    | 44921/122310 [1:35:49<2:39:32,  8.08it/s][A
 37%|███████████▊                    | 44934/122310 [1:35:50<2:03:33, 10.44

step: 5480, loss: 117.1331085376404, epoch: 0



 37%|███████████▊                    | 45027/122310 [1:35:57<2:16:55,  9.41it/s][A
 37%|███████████▊                    | 45042/122310 [1:35:58<1:45:56, 12.16it/s][A
 37%|███████████▊                    | 45059/122310 [1:35:59<1:26:16, 14.92it/s][A
 37%|███████████▊                    | 45065/122310 [1:35:59<1:42:08, 12.60it/s][A
 37%|███████████▊                    | 45078/122310 [1:36:00<1:33:29, 13.77it/s][A
 37%|███████████▊                    | 45088/122310 [1:36:01<1:36:01, 13.40it/s][A
 37%|███████████▊                    | 45098/122310 [1:36:02<1:37:37, 13.18it/s][A
 37%|███████████▊                    | 45103/122310 [1:36:03<1:55:50, 11.11it/s][A
 37%|███████████▊                    | 45112/122310 [1:36:03<1:54:50, 11.20it/s][A
 37%|███████████▊                    | 45114/122310 [1:36:05<3:28:22,  6.17it/s][A
 37%|███████████▊                    | 45124/122310 [1:36:06<2:54:26,  7.37it/s][A
 37%|███████████▊                    | 45133/122310 [1:36:07<2:43:22,  7.87

step: 5500, loss: 104.51609428557148, epoch: 0



 37%|███████████▊                    | 45186/122310 [1:36:14<2:56:18,  7.29it/s][A
 37%|███████████▊                    | 45188/122310 [1:36:15<3:31:48,  6.07it/s][A
 37%|███████████▊                    | 45197/122310 [1:36:16<3:03:49,  6.99it/s][A
 37%|███████████▊                    | 45206/122310 [1:36:17<2:47:41,  7.66it/s][A
 37%|███████████▊                    | 45211/122310 [1:36:18<2:59:31,  7.16it/s][A
 37%|███████████▊                    | 45216/122310 [1:36:19<3:09:28,  6.78it/s][A
 37%|███████████▊                    | 45221/122310 [1:36:20<3:20:56,  6.39it/s][A
 37%|███████████▊                    | 45233/122310 [1:36:21<2:35:02,  8.29it/s][A
 37%|███████████▊                    | 45234/122310 [1:36:22<3:27:24,  6.19it/s][A
 37%|███████████▊                    | 45248/122310 [1:36:23<2:22:24,  9.02it/s][A
 37%|███████████▊                    | 45259/122310 [1:36:24<2:10:44,  9.82it/s][A
 37%|███████████▊                    | 45268/122310 [1:36:25<2:13:20,  9.63

step: 5520, loss: 91.64477643539416, epoch: 0



 37%|███████████▊                    | 45346/122310 [1:36:32<2:17:25,  9.33it/s][A
 37%|███████████▊                    | 45356/122310 [1:36:33<2:04:30, 10.30it/s][A
 37%|███████████▊                    | 45371/122310 [1:36:34<1:39:35, 12.88it/s][A
 37%|███████████▊                    | 45378/122310 [1:36:35<1:49:52, 11.67it/s][A
 37%|███████████▊                    | 45386/122310 [1:36:35<1:54:38, 11.18it/s][A
 37%|███████████▉                    | 45394/122310 [1:36:36<2:01:20, 10.56it/s][A
 37%|███████████▉                    | 45400/122310 [1:36:37<2:12:21,  9.68it/s][A
 37%|███████████▉                    | 45406/122310 [1:36:38<2:19:51,  9.16it/s][A
 37%|███████████▉                    | 45414/122310 [1:36:39<2:14:01,  9.56it/s][A
 37%|███████████▉                    | 45419/122310 [1:36:39<2:28:07,  8.65it/s][A
 37%|███████████▉                    | 45427/122310 [1:36:40<2:22:22,  9.00it/s][A
 37%|███████████▉                    | 45429/122310 [1:36:41<3:08:48,  6.79

step: 5540, loss: 101.10154181693069, epoch: 0



 37%|███████████▉                    | 45498/122310 [1:36:49<3:01:53,  7.04it/s][A
 37%|███████████▉                    | 45506/122310 [1:36:50<2:51:17,  7.47it/s][A
 37%|███████████▉                    | 45513/122310 [1:36:51<2:47:19,  7.65it/s][A
 37%|███████████▉                    | 45523/122310 [1:36:52<2:25:46,  8.78it/s][A
 37%|███████████▉                    | 45527/122310 [1:36:53<2:44:47,  7.77it/s][A
 37%|███████████▉                    | 45538/122310 [1:36:53<2:15:13,  9.46it/s][A
 37%|███████████▉                    | 45543/122310 [1:36:54<2:30:47,  8.49it/s][A
 37%|███████████▉                    | 45558/122310 [1:36:55<1:58:56, 10.75it/s][A
 37%|███████████▉                    | 45565/122310 [1:36:56<2:13:06,  9.61it/s][A
 37%|███████████▉                    | 45576/122310 [1:36:57<2:00:29, 10.61it/s][A
 37%|███████████▉                    | 45578/122310 [1:36:59<3:24:46,  6.25it/s][A
 37%|███████████▉                    | 45581/122310 [1:37:00<3:47:58,  5.61

step: 5560, loss: 83.93696713707773, epoch: 0



 37%|███████████▉                    | 45641/122310 [1:37:06<2:01:22, 10.53it/s][A
 37%|███████████▉                    | 45651/122310 [1:37:07<1:55:34, 11.05it/s][A
 37%|███████████▉                    | 45657/122310 [1:37:08<2:08:15,  9.96it/s][A
 37%|███████████▉                    | 45664/122310 [1:37:09<2:17:15,  9.31it/s][A
 37%|███████████▉                    | 45665/122310 [1:37:10<3:03:34,  6.96it/s][A
 37%|███████████▉                    | 45670/122310 [1:37:10<3:08:16,  6.78it/s][A
 37%|███████████▉                    | 45674/122310 [1:37:11<3:22:22,  6.31it/s][A
 37%|███████████▉                    | 45685/122310 [1:37:12<2:28:30,  8.60it/s][A
 37%|███████████▉                    | 45691/122310 [1:37:13<2:35:30,  8.21it/s][A
 37%|███████████▉                    | 45699/122310 [1:37:14<2:27:42,  8.64it/s][A
 37%|███████████▉                    | 45707/122310 [1:37:15<2:23:30,  8.90it/s][A
 37%|███████████▉                    | 45720/122310 [1:37:15<1:56:17, 10.98

step: 5580, loss: 85.37261797230356, epoch: 0



 37%|███████████▉                    | 45775/122310 [1:37:23<2:44:23,  7.76it/s][A
 37%|███████████▉                    | 45780/122310 [1:37:25<3:40:37,  5.78it/s][A
 37%|███████████▉                    | 45789/122310 [1:37:25<3:01:25,  7.03it/s][A
 37%|███████████▉                    | 45800/122310 [1:37:26<2:28:07,  8.61it/s][A
 37%|███████████▉                    | 45806/122310 [1:37:27<2:34:33,  8.25it/s][A
 37%|███████████▉                    | 45815/122310 [1:37:28<2:23:08,  8.91it/s][A
 37%|███████████▉                    | 45826/122310 [1:37:29<2:06:17, 10.09it/s][A
 37%|███████████▉                    | 45838/122310 [1:37:30<1:52:26, 11.34it/s][A
 37%|███████████▉                    | 45845/122310 [1:37:30<2:01:00, 10.53it/s][A
 37%|███████████▉                    | 45857/122310 [1:37:31<1:50:01, 11.58it/s][A
 37%|███████████▉                    | 45864/122310 [1:37:32<2:00:32, 10.57it/s][A
 38%|████████████                    | 45875/122310 [1:37:33<1:52:06, 11.36

step: 5600, loss: 117.16069905886135, epoch: 0
sim1 and sim2 are 0.6499350096535056, 0.1261314857684744
cosine of pred and queen: 0.05560193330249189
Actual: athens:greece::madrid:spain, pred: france
Actual: bangkok:thailand::islamabad:pakistan, pred: last
Actual: beijing:china::tokyo:japan, pred: across
Actual: berlin:germany::rome:italy, pred: would
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: france
Actual: stockholm:sweden::hanoi:vietnam, pred: josipovic
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: cloak
Actual: lisbon:portugal::riga:latvia, pred: powerhouses
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: would
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharastr


 38%|████████████                    | 45933/122310 [1:37:51<2:00:45, 10.54it/s]

Actual: spain:spanish::korea:korean, pred: beijing


[A

Actual: syria:arabic::australia:english, pred: friday
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: holyhead
Actual: argentina:peso::russia:ruble, pred: bank
Actual: armenia:dram::iran:rial, pred: sowest
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: china
Actual: india:rupee::denmark:krone, pred: nestorius
Actual: usa:dollar::nigeria:naira, pred: irreconcilable
Actual: switzerland:swiss::spain:spanish, pred: top
Actual: thailand:thai::india:indian, pred: expresso
Actual: sweden:swedish::netherlands:dutch, pred: security
Actual: russia:russian::germany:german, pred: europe
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: support
Actual: norway:norwegian::mexico:mexican, pred: earely
Actual: japan:japanese::australia:australian, pred: group
Actual: italy:italian::ireland:irish, pred: entitles
Actual: croatia:croatian::france:french, pred: f


 38%|███████████▋                   | 45937/122310 [1:38:58<66:53:40,  3.15s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.005917159763313609



 38%|███████████▋                   | 45944/122310 [1:38:58<48:55:57,  2.31s/it][A
 38%|███████████▋                   | 45957/122310 [1:38:59<28:41:54,  1.35s/it][A
 38%|███████████▋                   | 45962/122310 [1:39:00<23:52:31,  1.13s/it][A
 38%|███████████▋                   | 45967/122310 [1:39:01<19:29:15,  1.09it/s][A
 38%|███████████▋                   | 45970/122310 [1:39:02<17:19:16,  1.22it/s][A
 38%|████████████                    | 45982/122310 [1:39:02<9:52:24,  2.15it/s][A
 38%|████████████                    | 45988/122310 [1:39:03<8:04:30,  2.63it/s][A
 38%|████████████                    | 46000/122310 [1:39:04<5:16:28,  4.02it/s][A
 38%|████████████                    | 46009/122310 [1:39:05<4:12:34,  5.03it/s][A
 38%|████████████                    | 46011/122310 [1:39:06<4:32:27,  4.67it/s][A
 38%|████████████                    | 46023/122310 [1:39:06<3:10:21,  6.68it/s][A
 38%|████████████                    | 46028/122310 [1:39:07<3:11:17,  6.65

step: 5620, loss: 93.9912046862676, epoch: 0



 38%|████████████                    | 46108/122310 [1:39:14<2:01:52, 10.42it/s][A
 38%|████████████                    | 46116/122310 [1:39:15<2:03:35, 10.28it/s][A
 38%|████████████                    | 46135/122310 [1:39:16<1:31:09, 13.93it/s][A
 38%|████████████                    | 46140/122310 [1:39:17<1:48:46, 11.67it/s][A
 38%|████████████                    | 46153/122310 [1:39:17<1:40:22, 12.65it/s][A
 38%|████████████                    | 46164/122310 [1:39:18<1:38:54, 12.83it/s][A
 38%|████████████                    | 46177/122310 [1:39:19<1:30:12, 14.07it/s][A
 38%|████████████                    | 46187/122310 [1:39:20<1:32:25, 13.73it/s][A
 38%|████████████                    | 46200/122310 [1:39:21<1:26:10, 14.72it/s][A
 38%|████████████                    | 46202/122310 [1:39:21<1:53:40, 11.16it/s][A
 38%|████████████                    | 46206/122310 [1:39:22<2:15:00,  9.39it/s][A
 38%|████████████                    | 46213/122310 [1:39:23<2:15:45,  9.34

step: 5640, loss: 116.18090946784643, epoch: 0



 38%|████████████                    | 46270/122310 [1:39:30<2:32:04,  8.33it/s][A
 38%|████████████                    | 46283/122310 [1:39:31<1:59:56, 10.56it/s][A
 38%|████████████                    | 46289/122310 [1:39:31<2:08:57,  9.82it/s][A
 38%|████████████                    | 46301/122310 [1:39:32<1:49:34, 11.56it/s][A
 38%|████████████                    | 46314/122310 [1:39:33<1:38:39, 12.84it/s][A
 38%|████████████                    | 46327/122310 [1:39:34<1:29:33, 14.14it/s][A
 38%|████████████                    | 46341/122310 [1:39:35<1:24:54, 14.91it/s][A
 38%|████████████▏                   | 46355/122310 [1:39:35<1:23:04, 15.24it/s][A
 38%|████████████▏                   | 46360/122310 [1:39:36<1:40:42, 12.57it/s][A
 38%|████████████▏                   | 46375/122310 [1:39:37<1:29:42, 14.11it/s][A
 38%|████████████▏                   | 46383/122310 [1:39:38<1:37:45, 12.94it/s][A
 38%|████████████▏                   | 46395/122310 [1:39:39<1:33:10, 13.58

step: 5660, loss: 108.94338804914416, epoch: 0



 38%|████████████▏                   | 46458/122310 [1:39:46<2:48:39,  7.50it/s][A
 38%|████████████▏                   | 46460/122310 [1:39:47<3:26:31,  6.12it/s][A
 38%|████████████▏                   | 46468/122310 [1:39:48<2:57:27,  7.12it/s][A
 38%|████████████▏                   | 46478/122310 [1:39:49<2:31:24,  8.35it/s][A
 38%|████████████▏                   | 46482/122310 [1:39:49<2:49:38,  7.45it/s][A
 38%|████████████▏                   | 46495/122310 [1:39:50<2:13:30,  9.46it/s][A
 38%|████████████▏                   | 46503/122310 [1:39:51<2:11:08,  9.63it/s][A
 38%|████████████▏                   | 46515/122310 [1:39:52<1:53:45, 11.11it/s][A
 38%|████████████▏                   | 46521/122310 [1:39:53<2:06:36,  9.98it/s][A
 38%|████████████▏                   | 46539/122310 [1:39:54<1:34:19, 13.39it/s][A
 38%|████████████▏                   | 46549/122310 [1:39:55<1:36:21, 13.10it/s][A
 38%|████████████▏                   | 46556/122310 [1:39:55<1:50:51, 11.39

step: 5680, loss: 103.82306625322587, epoch: 0



 38%|████████████▏                   | 46665/122310 [1:40:03<1:17:28, 16.27it/s][A
 38%|████████████▏                   | 46673/122310 [1:40:04<1:27:21, 14.43it/s][A
 38%|████████████▏                   | 46677/122310 [1:40:05<1:50:02, 11.46it/s][A
 38%|████████████▏                   | 46682/122310 [1:40:05<2:09:29,  9.73it/s][A
 38%|████████████▏                   | 46693/122310 [1:40:06<1:56:22, 10.83it/s][A
 38%|████████████▏                   | 46701/122310 [1:40:07<1:59:09, 10.58it/s][A
 38%|████████████▏                   | 46710/122310 [1:40:08<1:57:48, 10.69it/s][A
 38%|████████████▏                   | 46719/122310 [1:40:09<1:56:26, 10.82it/s][A
 38%|████████████▏                   | 46729/122310 [1:40:10<1:53:04, 11.14it/s][A
 38%|████████████▏                   | 46737/122310 [1:40:10<1:59:28, 10.54it/s][A
 38%|████████████▏                   | 46743/122310 [1:40:11<2:11:34,  9.57it/s][A
 38%|████████████▏                   | 46756/122310 [1:40:12<1:50:32, 11.39

step: 5700, loss: 103.53370987131035, epoch: 0



 38%|████████████▎                   | 46838/122310 [1:40:19<1:49:40, 11.47it/s][A
 38%|████████████▎                   | 46862/122310 [1:40:20<1:13:52, 17.02it/s][A
 38%|████████████▎                   | 46867/122310 [1:40:21<1:30:25, 13.90it/s][A
 38%|████████████▎                   | 46876/122310 [1:40:22<1:35:27, 13.17it/s][A
 38%|████████████▎                   | 46878/122310 [1:40:23<2:09:13,  9.73it/s][A
 38%|████████████▎                   | 46889/122310 [1:40:23<1:56:36, 10.78it/s][A
 38%|████████████▎                   | 46893/122310 [1:40:24<2:17:56,  9.11it/s][A
 38%|████████████▎                   | 46909/122310 [1:40:25<1:41:49, 12.34it/s][A
 38%|████████████▎                   | 46912/122310 [1:40:26<2:10:39,  9.62it/s][A
 38%|████████████▎                   | 46918/122310 [1:40:27<2:18:58,  9.04it/s][A
 38%|████████████▎                   | 46931/122310 [1:40:27<1:52:09, 11.20it/s][A
 38%|████████████▎                   | 46936/122310 [1:40:28<2:08:53,  9.75

step: 5720, loss: 98.35928342676888, epoch: 0



 38%|████████████▎                   | 47014/122310 [1:40:35<1:56:02, 10.81it/s][A
 38%|████████████▎                   | 47018/122310 [1:40:36<2:18:12,  9.08it/s][A
 38%|████████████▎                   | 47023/122310 [1:40:37<2:32:23,  8.23it/s][A
 38%|████████████▎                   | 47039/122310 [1:40:38<1:46:50, 11.74it/s][A
 38%|████████████▎                   | 47047/122310 [1:40:39<1:51:15, 11.28it/s][A
 38%|████████████▎                   | 47054/122310 [1:40:39<1:58:47, 10.56it/s][A
 38%|████████████▎                   | 47063/122310 [1:40:40<1:56:17, 10.78it/s][A
 38%|████████████▎                   | 47076/122310 [1:40:41<1:41:04, 12.41it/s][A
 38%|████████████▎                   | 47087/122310 [1:40:42<1:37:34, 12.85it/s][A
 39%|████████████▎                   | 47090/122310 [1:40:43<2:03:33, 10.15it/s][A
 39%|████████████▎                   | 47094/122310 [1:40:43<2:24:57,  8.65it/s][A
 39%|████████████▎                   | 47109/122310 [1:40:44<1:46:36, 11.76

step: 5740, loss: 98.92763820932004, epoch: 0



 39%|████████████▎                   | 47191/122310 [1:40:53<3:00:09,  6.95it/s][A
 39%|████████████▎                   | 47194/122310 [1:40:54<3:18:56,  6.29it/s][A
 39%|████████████▎                   | 47201/122310 [1:40:54<3:03:04,  6.84it/s][A
 39%|████████████▎                   | 47211/122310 [1:40:55<2:31:19,  8.27it/s][A
 39%|████████████▎                   | 47212/122310 [1:40:56<3:15:03,  6.42it/s][A
 39%|████████████▎                   | 47224/122310 [1:40:57<2:22:44,  8.77it/s][A
 39%|████████████▎                   | 47229/122310 [1:40:58<2:34:21,  8.11it/s][A
 39%|████████████▎                   | 47238/122310 [1:40:58<2:19:03,  9.00it/s][A
 39%|████████████▎                   | 47244/122310 [1:40:59<2:26:29,  8.54it/s][A
 39%|████████████▎                   | 47251/122310 [1:41:00<2:25:05,  8.62it/s][A
 39%|████████████▎                   | 47265/122310 [1:41:01<1:50:50, 11.28it/s][A
 39%|████████████▎                   | 47272/122310 [1:41:02<1:58:16, 10.57

step: 5760, loss: 126.90857081656556, epoch: 0



 39%|████████████▍                   | 47351/122310 [1:41:07<1:42:52, 12.14it/s][A
 39%|████████████▍                   | 47358/122310 [1:41:08<1:51:12, 11.23it/s][A
 39%|████████████▍                   | 47361/122310 [1:41:09<2:18:38,  9.01it/s][A
 39%|████████████▍                   | 47374/122310 [1:41:10<1:51:24, 11.21it/s][A
 39%|████████████▍                   | 47381/122310 [1:41:10<1:58:47, 10.51it/s][A
 39%|████████████▍                   | 47389/122310 [1:41:11<2:00:14, 10.39it/s][A
 39%|████████████▍                   | 47396/122310 [1:41:12<2:05:36,  9.94it/s][A
 39%|████████████▍                   | 47402/122310 [1:41:13<2:15:23,  9.22it/s][A
 39%|████████████▍                   | 47414/122310 [1:41:13<1:53:21, 11.01it/s][A
 39%|████████████▍                   | 47423/122310 [1:41:14<1:52:26, 11.10it/s][A
 39%|████████████▍                   | 47441/122310 [1:41:15<1:25:36, 14.58it/s][A
 39%|████████████▍                   | 47451/122310 [1:41:16<1:29:51, 13.89

step: 5780, loss: 131.13265459561194, epoch: 0



 39%|████████████▍                   | 47518/122310 [1:41:23<1:52:31, 11.08it/s][A
 39%|████████████▍                   | 47532/122310 [1:41:24<1:36:12, 12.95it/s][A
 39%|████████████▍                   | 47537/122310 [1:41:25<1:54:35, 10.88it/s][A
 39%|████████████▍                   | 47549/122310 [1:41:26<1:44:27, 11.93it/s][A
 39%|████████████▍                   | 47561/122310 [1:41:26<1:37:38, 12.76it/s][A
 39%|████████████▍                   | 47567/122310 [1:41:27<1:51:31, 11.17it/s][A
 39%|████████████▍                   | 47587/122310 [1:41:28<1:22:08, 15.16it/s][A
 39%|████████████▍                   | 47592/122310 [1:41:29<1:39:31, 12.51it/s][A
 39%|████████████▍                   | 47605/122310 [1:41:30<1:31:50, 13.56it/s][A
 39%|████████████▍                   | 47616/122310 [1:41:30<1:31:37, 13.59it/s][A
 39%|████████████▍                   | 47624/122310 [1:41:31<1:39:54, 12.46it/s][A
 39%|████████████▍                   | 47629/122310 [1:41:32<1:58:04, 10.54

step: 5800, loss: 100.32769875149708, epoch: 0



 39%|████████████▍                   | 47693/122310 [1:41:39<2:44:12,  7.57it/s][A
 39%|████████████▍                   | 47701/122310 [1:41:40<2:30:42,  8.25it/s][A
 39%|████████████▍                   | 47703/122310 [1:41:41<3:12:12,  6.47it/s][A
 39%|████████████▍                   | 47721/122310 [1:41:42<1:52:43, 11.03it/s][A
 39%|████████████▍                   | 47737/122310 [1:41:43<1:31:38, 13.56it/s][A
 39%|████████████▍                   | 47742/122310 [1:41:44<1:51:52, 11.11it/s][A
 39%|████████████▍                   | 47748/122310 [1:41:44<2:03:38, 10.05it/s][A
 39%|████████████▍                   | 47757/122310 [1:41:45<2:00:14, 10.33it/s][A
 39%|████████████▍                   | 47760/122310 [1:41:46<2:34:30,  8.04it/s][A
 39%|████████████▍                   | 47765/122310 [1:41:47<2:49:25,  7.33it/s][A
 39%|████████████▌                   | 47780/122310 [1:41:48<2:00:15, 10.33it/s][A
 39%|████████████▌                   | 47791/122310 [1:41:49<1:51:49, 11.11

step: 5820, loss: 85.02349534954413, epoch: 0



 39%|████████████▌                   | 47873/122310 [1:41:56<1:40:23, 12.36it/s][A
 39%|████████████▌                   | 47881/122310 [1:41:57<1:45:36, 11.75it/s][A
 39%|████████████▌                   | 47887/122310 [1:41:58<1:57:46, 10.53it/s][A
 39%|████████████▌                   | 47898/122310 [1:41:59<1:47:03, 11.59it/s][A
 39%|████████████▌                   | 47905/122310 [1:41:59<1:54:11, 10.86it/s][A
 39%|████████████▌                   | 47913/122310 [1:42:00<1:57:23, 10.56it/s][A
 39%|████████████▌                   | 47926/122310 [1:42:01<1:40:17, 12.36it/s][A
 39%|████████████▌                   | 47936/122310 [1:42:02<1:38:55, 12.53it/s][A
 39%|████████████▌                   | 47945/122310 [1:42:02<1:41:25, 12.22it/s][A
 39%|████████████▌                   | 47950/122310 [1:42:03<1:57:28, 10.55it/s][A
 39%|████████████▌                   | 47958/122310 [1:42:04<1:58:18, 10.47it/s][A
 39%|████████████▌                   | 47967/122310 [1:42:05<1:54:06, 10.86

step: 5840, loss: 83.8713574362071, epoch: 0



 39%|████████████▌                   | 48027/122310 [1:42:12<2:12:26,  9.35it/s][A
 39%|████████████▌                   | 48035/122310 [1:42:13<2:08:39,  9.62it/s][A
 39%|████████████▌                   | 48044/122310 [1:42:13<2:00:58, 10.23it/s][A
 39%|████████████▌                   | 48047/122310 [1:42:14<2:28:13,  8.35it/s][A
 39%|████████████▌                   | 48055/122310 [1:42:15<2:18:04,  8.96it/s][A
 39%|████████████▌                   | 48063/122310 [1:42:16<2:11:07,  9.44it/s][A
 39%|████████████▌                   | 48073/122310 [1:42:16<1:57:07, 10.56it/s][A
 39%|████████████▌                   | 48084/122310 [1:42:17<1:45:28, 11.73it/s][A
 39%|████████████▌                   | 48099/122310 [1:42:18<1:29:58, 13.75it/s][A
 39%|████████████▌                   | 48108/122310 [1:42:19<1:35:26, 12.96it/s][A
 39%|████████████▌                   | 48120/122310 [1:42:20<1:31:24, 13.53it/s][A
 39%|████████████▌                   | 48133/122310 [1:42:20<1:26:17, 14.33

step: 5860, loss: 94.74768470780663, epoch: 0



 39%|████████████▌                   | 48190/122310 [1:42:28<2:07:55,  9.66it/s][A
 39%|████████████▌                   | 48200/122310 [1:42:28<1:58:25, 10.43it/s][A
 39%|████████████▌                   | 48204/122310 [1:42:29<2:18:48,  8.90it/s][A
 39%|████████████▌                   | 48209/122310 [1:42:30<2:32:32,  8.10it/s][A
 39%|████████████▌                   | 48219/122310 [1:42:31<2:12:39,  9.31it/s][A
 39%|████████████▌                   | 48223/122310 [1:42:32<2:33:49,  8.03it/s][A
 39%|████████████▌                   | 48232/122310 [1:42:32<2:18:04,  8.94it/s][A
 39%|████████████▌                   | 48237/122310 [1:42:33<2:32:03,  8.12it/s][A
 39%|████████████▌                   | 48241/122310 [1:42:34<2:52:26,  7.16it/s][A
 39%|████████████▌                   | 48244/122310 [1:42:35<3:24:10,  6.05it/s][A
 39%|████████████▌                   | 48249/122310 [1:42:36<3:26:09,  5.99it/s][A
 39%|████████████▌                   | 48255/122310 [1:42:37<3:14:16,  6.35

step: 5880, loss: 105.76881144990512, epoch: 0



 40%|████████████▋                   | 48343/122310 [1:42:44<1:19:17, 15.55it/s][A
 40%|████████████▋                   | 48351/122310 [1:42:45<1:28:11, 13.98it/s][A
 40%|████████████▋                   | 48363/122310 [1:42:46<1:26:32, 14.24it/s][A
 40%|████████████▋                   | 48366/122310 [1:42:47<1:50:05, 11.19it/s][A
 40%|████████████▋                   | 48378/122310 [1:42:47<1:39:52, 12.34it/s][A
 40%|████████████▋                   | 48395/122310 [1:42:48<1:21:56, 15.03it/s][A
 40%|████████████▋                   | 48406/122310 [1:42:49<1:24:23, 14.59it/s][A
 40%|████████████▋                   | 48410/122310 [1:42:50<1:44:34, 11.78it/s][A
 40%|████████████▋                   | 48420/122310 [1:42:51<1:43:14, 11.93it/s][A
 40%|████████████▋                   | 48423/122310 [1:42:51<2:08:49,  9.56it/s][A
 40%|████████████▋                   | 48431/122310 [1:42:52<2:07:09,  9.68it/s][A
 40%|████████████▋                   | 48447/122310 [1:42:54<2:04:39,  9.88

step: 5900, loss: 86.78611443054746, epoch: 0



 40%|████████████▋                   | 48504/122310 [1:43:00<2:15:20,  9.09it/s][A
 40%|████████████▋                   | 48517/122310 [1:43:01<1:49:55, 11.19it/s][A
 40%|████████████▋                   | 48521/122310 [1:43:02<2:11:56,  9.32it/s][A
 40%|████████████▋                   | 48529/122310 [1:43:03<2:09:27,  9.50it/s][A
 40%|████████████▋                   | 48534/122310 [1:43:04<3:09:11,  6.50it/s][A
 40%|████████████▋                   | 48536/122310 [1:43:05<3:41:28,  5.55it/s][A
 40%|████████████▋                   | 48542/122310 [1:43:06<3:24:18,  6.02it/s][A
 40%|████████████▋                   | 48550/122310 [1:43:07<2:52:30,  7.13it/s][A
 40%|████████████▋                   | 48561/122310 [1:43:07<2:17:13,  8.96it/s][A
 40%|████████████▋                   | 48564/122310 [1:43:08<2:43:31,  7.52it/s][A
 40%|████████████▋                   | 48571/122310 [1:43:09<2:38:26,  7.76it/s][A
 40%|████████████▋                   | 48579/122310 [1:43:10<2:27:09,  8.35

step: 5920, loss: 91.15165820341215, epoch: 0



 40%|████████████▋                   | 48636/122310 [1:43:16<1:53:39, 10.80it/s][A
 40%|████████████▋                   | 48643/122310 [1:43:17<2:01:53, 10.07it/s][A
 40%|████████████▋                   | 48653/122310 [1:43:18<1:53:39, 10.80it/s][A
 40%|████████████▋                   | 48666/122310 [1:43:19<1:39:33, 12.33it/s][A
 40%|████████████▋                   | 48672/122310 [1:43:19<1:52:02, 10.95it/s][A
 40%|████████████▋                   | 48678/122310 [1:43:20<2:04:04,  9.89it/s][A
 40%|████████████▋                   | 48688/122310 [1:43:21<1:55:25, 10.63it/s][A
 40%|████████████▋                   | 48693/122310 [1:43:22<2:10:45,  9.38it/s][A
 40%|████████████▋                   | 48701/122310 [1:43:23<2:06:19,  9.71it/s][A
 40%|████████████▋                   | 48703/122310 [1:43:23<2:40:27,  7.65it/s][A
 40%|████████████▋                   | 48717/122310 [1:43:24<1:53:22, 10.82it/s][A
 40%|████████████▋                   | 48724/122310 [1:43:25<1:58:53, 10.32

step: 5940, loss: 88.44464728454899, epoch: 0



 40%|████████████▊                   | 48799/122310 [1:43:32<1:52:08, 10.93it/s][A
 40%|████████████▊                   | 48808/122310 [1:43:33<1:49:08, 11.22it/s][A
 40%|████████████▊                   | 48820/122310 [1:43:33<1:37:13, 12.60it/s][A
 40%|████████████▊                   | 48828/122310 [1:43:34<1:42:12, 11.98it/s][A
 40%|████████████▊                   | 48836/122310 [1:43:35<1:46:12, 11.53it/s][A
 40%|████████████▊                   | 48844/122310 [1:43:36<1:49:04, 11.23it/s][A
 40%|████████████▊                   | 48850/122310 [1:43:36<1:59:43, 10.23it/s][A
 40%|████████████▊                   | 48856/122310 [1:43:37<2:08:18,  9.54it/s][A
 40%|████████████▊                   | 48862/122310 [1:43:38<2:14:47,  9.08it/s][A
 40%|████████████▊                   | 48869/122310 [1:43:39<2:14:16,  9.12it/s][A
 40%|████████████▊                   | 48883/122310 [1:43:39<1:42:41, 11.92it/s][A
 40%|████████████▊                   | 48894/122310 [1:43:40<1:36:50, 12.64

step: 5960, loss: 77.35544922209857, epoch: 0



 40%|████████████▊                   | 48971/122310 [1:43:47<1:30:43, 13.47it/s][A
 40%|████████████▊                   | 48981/122310 [1:43:48<1:31:06, 13.42it/s][A
 40%|████████████▊                   | 48985/122310 [1:43:48<1:51:49, 10.93it/s][A
 40%|████████████▊                   | 48992/122310 [1:43:49<1:56:09, 10.52it/s][A
 40%|████████████▊                   | 48999/122310 [1:43:50<2:00:43, 10.12it/s][A
 40%|████████████▊                   | 49011/122310 [1:43:51<1:43:39, 11.79it/s][A
 40%|████████████▊                   | 49015/122310 [1:43:51<2:04:00,  9.85it/s][A
 40%|████████████▊                   | 49019/122310 [1:43:52<2:24:06,  8.48it/s][A
 40%|████████████▊                   | 49027/122310 [1:43:53<2:14:16,  9.10it/s][A
 40%|████████████▊                   | 49032/122310 [1:43:54<2:26:13,  8.35it/s][A
 40%|████████████▊                   | 49048/122310 [1:43:55<1:40:11, 12.19it/s][A
 40%|████████████▊                   | 49057/122310 [1:43:55<1:41:13, 12.06

step: 5980, loss: 104.27004395881849, epoch: 0



 40%|████████████▊                   | 49123/122310 [1:44:02<2:38:25,  7.70it/s][A
 40%|████████████▊                   | 49132/122310 [1:44:03<2:15:31,  9.00it/s][A
 40%|████████████▊                   | 49141/122310 [1:44:04<2:03:38,  9.86it/s][A
 40%|████████████▊                   | 49148/122310 [1:44:04<2:05:56,  9.68it/s][A
 40%|████████████▊                   | 49154/122310 [1:44:05<2:13:08,  9.16it/s][A
 40%|████████████▊                   | 49174/122310 [1:44:06<1:25:06, 14.32it/s][A
 40%|████████████▊                   | 49186/122310 [1:44:07<1:22:27, 14.78it/s][A
 40%|████████████▊                   | 49197/122310 [1:44:07<1:23:08, 14.66it/s][A
 40%|████████████▊                   | 49203/122310 [1:44:08<1:36:24, 12.64it/s][A
 40%|████████████▊                   | 49207/122310 [1:44:09<1:56:46, 10.43it/s][A
 40%|████████████▉                   | 49211/122310 [1:44:10<2:17:16,  8.88it/s][A
 40%|████████████▉                   | 49232/122310 [1:44:10<1:23:56, 14.51

step: 6000, loss: 87.46416079521863, epoch: 0
sim1 and sim2 are 0.6154804099289283, 0.13108739969905162
cosine of pred and queen: 0.11717474981304746
Actual: athens:greece::madrid:spain, pred: europe
Actual: bangkok:thailand::islamabad:pakistan, pred: last
Actual: beijing:china::tokyo:japan, pred: across
Actual: berlin:germany::rome:italy, pred: many
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: ahmed
Actual: canberra:australia::doha:qatar, pred: france
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: cloak
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: tuareg
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: new
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharastra:mum


 40%|████████████▉                   | 49292/122310 [1:44:31<1:34:52, 12.83it/s][A

Actual: europe:euro::japan:yen, pred: china
Actual: india:rupee::denmark:krone, pred: nestorius
Actual: usa:dollar::nigeria:naira, pred: irreconcilable
Actual: switzerland:swiss::spain:spanish, pred: world
Actual: thailand:thai::india:indian, pred: expresso
Actual: sweden:swedish::netherlands:dutch, pred: news
Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: support
Actual: norway:norwegian::mexico:mexican, pred: borders
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: entitles
Actual: croatia:croatian::france:french, pred: friday
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.022222222222222223
Actual: walk:walks::vanish:vanishes, pred: bariloche
Actual: work:works::generate:generates, pred: conventionally
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, 


 40%|████████████▍                  | 49300/122310 [1:45:35<53:25:11,  2.63s/it][A
 40%|████████████▍                  | 49317/122310 [1:45:36<30:12:09,  1.49s/it][A
 40%|████████████▌                  | 49327/122310 [1:45:36<22:25:52,  1.11s/it][A
 40%|████████████▌                  | 49334/122310 [1:45:37<18:06:33,  1.12it/s][A
 40%|████████████▌                  | 49336/122310 [1:45:38<17:15:47,  1.17it/s][A
 40%|████████████▌                  | 49342/122310 [1:45:39<13:29:11,  1.50it/s][A
 40%|████████████▌                  | 49346/122310 [1:45:39<11:35:44,  1.75it/s][A
 40%|████████████▉                   | 49352/122310 [1:45:40<8:54:02,  2.28it/s][A
 40%|████████████▉                   | 49363/122310 [1:45:41<5:36:57,  3.61it/s][A
 40%|████████████▉                   | 49378/122310 [1:45:42<3:29:48,  5.79it/s][A
 40%|████████████▉                   | 49382/122310 [1:45:42<3:32:14,  5.73it/s][A
 40%|████████████▉                   | 49392/122310 [1:45:43<2:49:58,  7.15

step: 6020, loss: 98.50049338052085, epoch: 0



 40%|████████████▉                   | 49470/122310 [1:45:50<1:46:44, 11.37it/s][A
 40%|████████████▉                   | 49478/122310 [1:45:51<1:47:56, 11.25it/s][A
 40%|████████████▉                   | 49488/122310 [1:45:52<1:41:39, 11.94it/s][A
 40%|████████████▉                   | 49491/122310 [1:45:53<2:06:37,  9.59it/s][A
 40%|████████████▉                   | 49504/122310 [1:45:53<1:41:17, 11.98it/s][A
 40%|████████████▉                   | 49512/122310 [1:45:54<1:44:13, 11.64it/s][A
 40%|████████████▉                   | 49516/122310 [1:45:55<2:04:10,  9.77it/s][A
 40%|████████████▉                   | 49522/122310 [1:45:56<2:10:28,  9.30it/s][A
 40%|████████████▉                   | 49532/122310 [1:45:56<1:54:36, 10.58it/s][A
 41%|████████████▉                   | 49542/122310 [1:45:57<1:45:35, 11.49it/s][A
 41%|████████████▉                   | 49550/122310 [1:45:58<1:47:34, 11.27it/s][A
 41%|████████████▉                   | 49554/122310 [1:45:59<2:07:13,  9.53

step: 6040, loss: 105.33975897059754, epoch: 0



 41%|████████████▉                   | 49634/122310 [1:46:05<1:34:47, 12.78it/s][A
 41%|████████████▉                   | 49646/122310 [1:46:06<1:28:49, 13.63it/s][A
 41%|████████████▉                   | 49658/122310 [1:46:07<1:25:29, 14.16it/s][A
 41%|████████████▉                   | 49660/122310 [1:46:08<1:52:35, 10.75it/s][A
 41%|████████████▉                   | 49675/122310 [1:46:08<1:30:29, 13.38it/s][A
 41%|████████████▉                   | 49681/122310 [1:46:09<1:43:55, 11.65it/s][A
 41%|████████████▉                   | 49683/122310 [1:46:10<2:14:44,  8.98it/s][A
 41%|█████████████                   | 49697/122310 [1:46:11<1:42:56, 11.76it/s][A
 41%|█████████████                   | 49704/122310 [1:46:12<1:50:23, 10.96it/s][A
 41%|█████████████                   | 49711/122310 [1:46:12<1:56:30, 10.39it/s][A
 41%|█████████████                   | 49723/122310 [1:46:13<1:41:22, 11.93it/s][A
 41%|█████████████                   | 49730/122310 [1:46:14<1:50:01, 10.99

step: 6060, loss: 95.49384343707229, epoch: 0



 41%|█████████████                   | 49805/122310 [1:46:21<1:53:58, 10.60it/s][A
 41%|█████████████                   | 49821/122310 [1:46:22<1:27:55, 13.74it/s][A
 41%|█████████████                   | 49828/122310 [1:46:22<1:37:39, 12.37it/s][A
 41%|█████████████                   | 49833/122310 [1:46:23<1:53:49, 10.61it/s][A
 41%|█████████████                   | 49842/122310 [1:46:24<1:50:31, 10.93it/s][A
 41%|█████████████                   | 49856/122310 [1:46:25<1:32:30, 13.05it/s][A
 41%|█████████████                   | 49860/122310 [1:46:25<1:52:37, 10.72it/s][A
 41%|█████████████                   | 49875/122310 [1:46:26<1:30:26, 13.35it/s][A
 41%|█████████████                   | 49882/122310 [1:46:27<1:39:53, 12.09it/s][A
 41%|█████████████                   | 49892/122310 [1:46:28<1:37:41, 12.35it/s][A
 41%|█████████████                   | 49899/122310 [1:46:29<1:45:54, 11.40it/s][A
 41%|█████████████                   | 49912/122310 [1:46:29<1:32:37, 13.03

step: 6080, loss: 187.89410995832904, epoch: 0



 41%|█████████████                   | 49967/122310 [1:46:36<2:28:34,  8.12it/s][A
 41%|█████████████                   | 49975/122310 [1:46:37<2:17:02,  8.80it/s][A
 41%|█████████████                   | 49989/122310 [1:46:38<1:44:08, 11.57it/s][A
 41%|█████████████                   | 49993/122310 [1:46:39<2:04:06,  9.71it/s][A
 41%|█████████████                   | 50005/122310 [1:46:39<1:45:01, 11.47it/s][A
 41%|█████████████                   | 50016/122310 [1:46:40<1:38:06, 12.28it/s][A
 41%|█████████████                   | 50021/122310 [1:46:41<1:53:56, 10.57it/s][A
 41%|█████████████                   | 50031/122310 [1:46:42<1:46:18, 11.33it/s][A
 41%|█████████████                   | 50040/122310 [1:46:42<1:45:14, 11.45it/s][A
 41%|█████████████                   | 50050/122310 [1:46:43<1:41:06, 11.91it/s][A
 41%|█████████████                   | 50056/122310 [1:46:44<1:52:57, 10.66it/s][A
 41%|█████████████                   | 50070/122310 [1:46:45<1:33:07, 12.93

step: 6100, loss: 81.56217375894609, epoch: 0



 41%|█████████████                   | 50139/122310 [1:46:52<2:06:21,  9.52it/s][A
 41%|█████████████                   | 50148/122310 [1:46:52<1:58:29, 10.15it/s][A
 41%|█████████████                   | 50161/122310 [1:46:53<1:38:52, 12.16it/s][A
 41%|█████████████▏                  | 50169/122310 [1:46:54<1:43:18, 11.64it/s][A
 41%|█████████████▏                  | 50183/122310 [1:46:55<1:28:32, 13.58it/s][A
 41%|█████████████▏                  | 50195/122310 [1:46:55<1:24:54, 14.15it/s][A
 41%|█████████████▏                  | 50204/122310 [1:46:56<1:29:36, 13.41it/s][A
 41%|█████████████▏                  | 50207/122310 [1:46:57<1:53:57, 10.55it/s][A
 41%|█████████████▏                  | 50220/122310 [1:46:58<1:36:22, 12.47it/s][A
 41%|█████████████▏                  | 50236/122310 [1:46:59<1:20:14, 14.97it/s][A
 41%|█████████████▏                  | 50246/122310 [1:46:59<1:23:37, 14.36it/s][A
 41%|█████████████▏                  | 50255/122310 [1:47:00<1:28:23, 13.59

step: 6120, loss: 98.17496328507248, epoch: 0



 41%|█████████████▏                  | 50339/122310 [1:47:07<1:37:45, 12.27it/s][A
 41%|█████████████▏                  | 50345/122310 [1:47:08<1:50:11, 10.88it/s][A
 41%|█████████████▏                  | 50353/122310 [1:47:09<1:51:55, 10.71it/s][A
 41%|█████████████▏                  | 50358/122310 [1:47:09<2:06:36,  9.47it/s][A
 41%|█████████████▏                  | 50368/122310 [1:47:10<1:53:31, 10.56it/s][A
 41%|█████████████▏                  | 50382/122310 [1:47:11<1:33:10, 12.87it/s][A
 41%|█████████████▏                  | 50392/122310 [1:47:12<1:32:44, 12.93it/s][A
 41%|█████████████▏                  | 50409/122310 [1:47:12<1:16:19, 15.70it/s][A
 41%|█████████████▏                  | 50416/122310 [1:47:13<1:27:41, 13.66it/s][A
 41%|█████████████▏                  | 50423/122310 [1:47:14<1:36:56, 12.36it/s][A
 41%|█████████████▏                  | 50430/122310 [1:47:15<1:45:18, 11.38it/s][A
 41%|█████████████▏                  | 50436/122310 [1:47:16<1:56:52, 10.25

step: 6140, loss: 108.43789121934984, epoch: 0



 41%|█████████████▏                  | 50504/122310 [1:47:22<2:21:23,  8.46it/s][A
 41%|█████████████▏                  | 50507/122310 [1:47:23<2:49:15,  7.07it/s][A
 41%|█████████████▏                  | 50514/122310 [1:47:24<2:34:49,  7.73it/s][A
 41%|█████████████▏                  | 50521/122310 [1:47:25<2:26:42,  8.16it/s][A
 41%|█████████████▏                  | 50526/122310 [1:47:25<2:36:07,  7.66it/s][A
 41%|█████████████▏                  | 50543/122310 [1:47:26<1:39:30, 12.02it/s][A
 41%|█████████████▏                  | 50551/122310 [1:47:27<1:43:39, 11.54it/s][A
 41%|█████████████▏                  | 50560/122310 [1:47:28<1:43:19, 11.57it/s][A
 41%|█████████████▏                  | 50571/122310 [1:47:29<1:36:04, 12.45it/s][A
 41%|█████████████▏                  | 50584/122310 [1:47:29<1:26:58, 13.74it/s][A
 41%|█████████████▏                  | 50587/122310 [1:47:30<1:50:39, 10.80it/s][A
 41%|█████████████▏                  | 50595/122310 [1:47:31<1:52:05, 10.66

step: 6160, loss: 84.26008277296194, epoch: 0



 41%|█████████████▎                  | 50651/122310 [1:47:39<3:44:11,  5.33it/s][A
 41%|█████████████▎                  | 50662/122310 [1:47:40<2:44:50,  7.24it/s][A
 41%|█████████████▎                  | 50669/122310 [1:47:41<2:35:25,  7.68it/s][A
 41%|█████████████▎                  | 50676/122310 [1:47:42<2:28:47,  8.02it/s][A
 41%|█████████████▎                  | 50681/122310 [1:47:42<2:36:07,  7.65it/s][A
 41%|█████████████▎                  | 50682/122310 [1:47:43<3:21:58,  5.91it/s][A
 41%|█████████████▎                  | 50684/122310 [1:47:44<3:58:51,  5.00it/s][A
 41%|█████████████▎                  | 50695/122310 [1:47:45<2:36:07,  7.65it/s][A
 41%|█████████████▎                  | 50705/122310 [1:47:45<2:09:25,  9.22it/s][A
 41%|█████████████▎                  | 50712/122310 [1:47:46<2:09:49,  9.19it/s][A
 41%|█████████████▎                  | 50721/122310 [1:47:47<2:00:30,  9.90it/s][A
 41%|█████████████▎                  | 50730/122310 [1:47:48<1:54:10, 10.45

step: 6180, loss: 103.57634768627707, epoch: 0



 42%|█████████████▎                  | 50793/122310 [1:47:53<1:50:47, 10.76it/s][A
 42%|█████████████▎                  | 50804/122310 [1:47:54<1:41:29, 11.74it/s][A
 42%|█████████████▎                  | 50815/122310 [1:47:55<1:35:03, 12.54it/s][A
 42%|█████████████▎                  | 50827/122310 [1:47:55<1:28:32, 13.45it/s][A
 42%|█████████████▎                  | 50837/122310 [1:47:56<1:29:23, 13.33it/s][A
 42%|█████████████▎                  | 50844/122310 [1:47:57<1:38:47, 12.06it/s][A
 42%|█████████████▎                  | 50858/122310 [1:47:58<1:25:47, 13.88it/s][A
 42%|█████████████▎                  | 50861/122310 [1:47:59<1:49:21, 10.89it/s][A
 42%|█████████████▎                  | 50866/122310 [1:47:59<2:04:15,  9.58it/s][A
 42%|█████████████▎                  | 50871/122310 [1:48:00<2:17:07,  8.68it/s][A
 42%|█████████████▎                  | 50876/122310 [1:48:01<2:28:34,  8.01it/s][A
 42%|█████████████▎                  | 50887/122310 [1:48:02<2:00:06,  9.91

step: 6200, loss: 82.41945930458336, epoch: 0



 42%|█████████████▎                  | 50971/122310 [1:48:08<1:44:49, 11.34it/s][A
 42%|█████████████▎                  | 50982/122310 [1:48:09<1:37:13, 12.23it/s][A
 42%|█████████████▎                  | 50987/122310 [1:48:10<1:53:07, 10.51it/s][A
 42%|█████████████▎                  | 50996/122310 [1:48:11<1:49:30, 10.85it/s][A
 42%|█████████████▎                  | 51005/122310 [1:48:12<1:46:44, 11.13it/s][A
 42%|█████████████▎                  | 51021/122310 [1:48:12<1:24:39, 14.03it/s][A
 42%|█████████████▎                  | 51024/122310 [1:48:13<1:48:20, 10.97it/s][A
 42%|█████████████▎                  | 51029/122310 [1:48:14<2:03:23,  9.63it/s][A
 42%|█████████████▎                  | 51031/122310 [1:48:15<2:37:48,  7.53it/s][A
 42%|█████████████▎                  | 51035/122310 [1:48:15<2:54:10,  6.82it/s][A
 42%|█████████████▎                  | 51040/122310 [1:48:16<2:56:29,  6.73it/s][A
 42%|█████████████▎                  | 51046/122310 [1:48:17<2:48:31,  7.05

step: 6220, loss: 103.34319781824058, epoch: 0



 42%|█████████████▎                  | 51112/122310 [1:48:24<2:25:38,  8.15it/s][A
 42%|█████████████▎                  | 51120/122310 [1:48:25<2:16:00,  8.72it/s][A
 42%|█████████████▍                  | 51125/122310 [1:48:26<2:27:35,  8.04it/s][A
 42%|█████████████▍                  | 51137/122310 [1:48:26<1:56:06, 10.22it/s][A
 42%|█████████████▍                  | 51149/122310 [1:48:27<1:41:14, 11.71it/s][A
 42%|█████████████▍                  | 51160/122310 [1:48:28<1:35:24, 12.43it/s][A
 42%|█████████████▍                  | 51172/122310 [1:48:29<1:29:13, 13.29it/s][A
 42%|█████████████▍                  | 51181/122310 [1:48:29<1:33:04, 12.74it/s][A
 42%|█████████████▍                  | 51183/122310 [1:48:30<2:01:50,  9.73it/s][A
 42%|█████████████▍                  | 51188/122310 [1:48:31<2:15:05,  8.77it/s][A
 42%|█████████████▍                  | 51195/122310 [1:48:32<2:14:11,  8.83it/s][A
 42%|█████████████▍                  | 51197/122310 [1:48:33<2:51:50,  6.90

step: 6240, loss: 91.30675554570185, epoch: 0



 42%|█████████████▍                  | 51274/122310 [1:48:40<1:32:10, 12.84it/s][A
 42%|█████████████▍                  | 51291/122310 [1:48:40<1:15:35, 15.66it/s][A
 42%|█████████████▍                  | 51297/122310 [1:48:41<1:28:45, 13.33it/s][A
 42%|█████████████▍                  | 51303/122310 [1:48:42<1:41:24, 11.67it/s][A
 42%|█████████████▍                  | 51308/122310 [1:48:43<1:56:36, 10.15it/s][A
 42%|█████████████▍                  | 51316/122310 [1:48:43<1:55:30, 10.24it/s][A
 42%|█████████████▍                  | 51325/122310 [1:48:44<1:50:44, 10.68it/s][A
 42%|█████████████▍                  | 51335/122310 [1:48:45<1:43:35, 11.42it/s][A
 42%|█████████████▍                  | 51341/122310 [1:48:46<1:54:20, 10.34it/s][A
 42%|█████████████▍                  | 51353/122310 [1:48:46<1:39:02, 11.94it/s][A
 42%|█████████████▍                  | 51356/122310 [1:48:47<2:03:52,  9.55it/s][A
 42%|█████████████▍                  | 51368/122310 [1:48:48<1:44:10, 11.35

step: 6260, loss: 105.3367123648422, epoch: 0



 42%|█████████████▍                  | 51432/122310 [1:48:55<2:50:48,  6.92it/s][A
 42%|█████████████▍                  | 51438/122310 [1:48:56<2:43:58,  7.20it/s][A
 42%|█████████████▍                  | 51442/122310 [1:48:56<2:58:40,  6.61it/s][A
 42%|█████████████▍                  | 51450/122310 [1:48:57<2:32:02,  7.77it/s][A
 42%|█████████████▍                  | 51454/122310 [1:48:58<2:45:53,  7.12it/s][A
 42%|█████████████▍                  | 51462/122310 [1:48:59<2:22:42,  8.27it/s][A
 42%|█████████████▍                  | 51469/122310 [1:48:59<2:16:08,  8.67it/s][A
 42%|█████████████▍                  | 51480/122310 [1:49:00<1:51:20, 10.60it/s][A
 42%|█████████████▍                  | 51499/122310 [1:49:01<1:17:37, 15.20it/s][A
 42%|█████████████▍                  | 51508/122310 [1:49:01<1:22:35, 14.29it/s][A
 42%|█████████████▍                  | 51522/122310 [1:49:02<1:15:12, 15.69it/s][A
 42%|█████████████▍                  | 51530/122310 [1:49:03<1:22:47, 14.25

step: 6280, loss: 98.7056053034619, epoch: 0



 42%|█████████████▌                  | 51606/122310 [1:49:10<1:27:05, 13.53it/s][A
 42%|█████████████▌                  | 51616/122310 [1:49:10<1:26:41, 13.59it/s][A
 42%|█████████████▌                  | 51626/122310 [1:49:11<1:26:43, 13.58it/s][A
 42%|█████████████▌                  | 51631/122310 [1:49:12<1:41:48, 11.57it/s][A
 42%|█████████████▌                  | 51633/122310 [1:49:12<2:12:00,  8.92it/s][A
 42%|█████████████▌                  | 51639/122310 [1:49:13<2:14:51,  8.73it/s][A
 42%|█████████████▌                  | 51649/122310 [1:49:14<1:55:11, 10.22it/s][A
 42%|█████████████▌                  | 51662/122310 [1:49:15<1:34:11, 12.50it/s][A
 42%|█████████████▌                  | 51679/122310 [1:49:15<1:15:08, 15.67it/s][A
 42%|█████████████▌                  | 51690/122310 [1:49:16<1:15:54, 15.51it/s][A
 42%|█████████████▌                  | 51700/122310 [1:49:17<1:18:37, 14.97it/s][A
 42%|█████████████▌                  | 51709/122310 [1:49:18<1:22:48, 14.21

step: 6300, loss: 95.77237166603841, epoch: 0



 42%|█████████████▌                  | 51788/122310 [1:49:24<1:43:02, 11.41it/s][A
 42%|█████████████▌                  | 51792/122310 [1:49:25<2:01:54,  9.64it/s][A
 42%|█████████████▌                  | 51808/122310 [1:49:26<1:28:13, 13.32it/s][A
 42%|█████████████▌                  | 51817/122310 [1:49:26<1:30:12, 13.02it/s][A
 42%|█████████████▌                  | 51825/122310 [1:49:27<1:34:38, 12.41it/s][A
 42%|█████████████▌                  | 51833/122310 [1:49:28<1:38:03, 11.98it/s][A
 42%|█████████████▌                  | 51835/122310 [1:49:29<2:46:11,  7.07it/s][A
 42%|█████████████▌                  | 51842/122310 [1:49:30<2:32:43,  7.69it/s][A
 42%|█████████████▌                  | 51853/122310 [1:49:31<2:02:01,  9.62it/s][A
 42%|█████████████▌                  | 51862/122310 [1:49:31<1:53:23, 10.35it/s][A
 42%|█████████████▌                  | 51875/122310 [1:49:32<1:34:22, 12.44it/s][A
 42%|█████████████▌                  | 51886/122310 [1:49:33<1:28:51, 13.21

step: 6320, loss: 101.85596573672075, epoch: 0



 42%|█████████████▌                  | 51961/122310 [1:49:39<1:23:05, 14.11it/s][A
 42%|█████████████▌                  | 51968/122310 [1:49:40<1:32:15, 12.71it/s][A
 42%|█████████████▌                  | 51977/122310 [1:49:40<1:33:08, 12.58it/s][A
 43%|█████████████▌                  | 51984/122310 [1:49:41<1:40:11, 11.70it/s][A
 43%|█████████████▌                  | 51990/122310 [1:49:42<2:23:28,  8.17it/s][A
 43%|█████████████▌                  | 51996/122310 [1:49:43<2:23:03,  8.19it/s][A
 43%|█████████████▌                  | 52003/122310 [1:49:44<2:17:11,  8.54it/s][A
 43%|█████████████▌                  | 52009/122310 [1:49:45<2:18:53,  8.44it/s][A
 43%|█████████████▌                  | 52015/122310 [1:49:45<2:19:59,  8.37it/s][A
 43%|█████████████▌                  | 52023/122310 [1:49:46<2:08:52,  9.09it/s][A
 43%|█████████████▌                  | 52032/122310 [1:49:47<1:57:20,  9.98it/s][A
 43%|█████████████▌                  | 52036/122310 [1:49:48<2:14:56,  8.68

step: 6340, loss: 80.73839834248207, epoch: 0



 43%|█████████████▋                  | 52101/122310 [1:49:53<1:39:06, 11.81it/s][A
 43%|█████████████▋                  | 52112/122310 [1:49:54<1:32:06, 12.70it/s][A
 43%|█████████████▋                  | 52118/122310 [1:49:55<1:42:33, 11.41it/s][A
 43%|█████████████▋                  | 52126/122310 [1:49:56<1:43:51, 11.26it/s][A
 43%|█████████████▋                  | 52132/122310 [1:49:56<1:52:41, 10.38it/s][A
 43%|█████████████▋                  | 52141/122310 [1:49:57<1:46:55, 10.94it/s][A
 43%|█████████████▋                  | 52145/122310 [1:49:58<2:05:32,  9.32it/s][A
 43%|█████████████▋                  | 52152/122310 [1:49:59<2:04:25,  9.40it/s][A
 43%|█████████████▋                  | 52161/122310 [1:49:59<1:54:09, 10.24it/s][A
 43%|█████████████▋                  | 52181/122310 [1:50:00<1:16:11, 15.34it/s][A
 43%|█████████████▋                  | 52189/122310 [1:50:01<1:23:33, 13.99it/s][A
 43%|█████████████▋                  | 52194/122310 [1:50:02<1:38:30, 11.86

step: 6360, loss: 90.67523648458584, epoch: 0



 43%|█████████████▋                  | 52252/122310 [1:50:08<2:20:29,  8.31it/s][A
 43%|█████████████▋                  | 52263/122310 [1:50:09<1:52:53, 10.34it/s][A
 43%|█████████████▋                  | 52274/122310 [1:50:10<1:39:20, 11.75it/s][A
 43%|█████████████▋                  | 52283/122310 [1:50:10<1:37:51, 11.93it/s][A
 43%|█████████████▋                  | 52286/122310 [1:50:18<7:27:01,  2.61it/s][A
 43%|█████████████▋                  | 52293/122310 [1:50:18<5:49:43,  3.34it/s][A
 43%|█████████████▋                  | 52299/122310 [1:50:19<4:53:42,  3.97it/s][A
 43%|█████████████▋                  | 52306/122310 [1:50:20<4:00:29,  4.85it/s][A
 43%|█████████████▋                  | 52316/122310 [1:50:21<3:00:28,  6.46it/s][A
 43%|█████████████▋                  | 52324/122310 [1:50:21<2:37:45,  7.39it/s][A
 43%|█████████████▋                  | 52332/122310 [1:50:22<2:23:01,  8.15it/s][A

step: 6380, loss: 96.35267847931638, epoch: 0



 43%|█████████████▋                  | 52337/122310 [1:50:23<2:28:42,  7.84it/s][A
 43%|█████████████▋                  | 52341/122310 [1:50:23<2:41:52,  7.20it/s][A
 43%|█████████████▋                  | 52350/122310 [1:50:24<2:16:41,  8.53it/s][A
 43%|█████████████▋                  | 52359/122310 [1:50:25<2:01:23,  9.60it/s][A
 43%|█████████████▋                  | 52366/122310 [1:50:26<2:01:23,  9.60it/s][A
 43%|█████████████▋                  | 52377/122310 [1:50:26<1:44:27, 11.16it/s][A
 43%|█████████████▋                  | 52395/122310 [1:50:27<1:17:18, 15.07it/s][A
 43%|██████████████▌                   | 52418/122310 [1:50:28<58:36, 19.87it/s][A
 43%|█████████████▋                  | 52424/122310 [1:50:29<1:11:11, 16.36it/s][A
 43%|█████████████▋                  | 52428/122310 [1:50:29<1:28:26, 13.17it/s][A
 43%|█████████████▋                  | 52439/122310 [1:50:30<1:24:55, 13.71it/s][A
 43%|█████████████▋                  | 52446/122310 [1:50:31<1:33:34, 12.44

step: 6400, loss: 125.24444635575536, epoch: 0
sim1 and sim2 are 0.6217158986587233, 0.1331221369895884
cosine of pred and queen: 0.06964792757937457
Actual: athens:greece::madrid:spain, pred: europe
Actual: bangkok:thailand::islamabad:pakistan, pred: india
Actual: beijing:china::tokyo:japan, pred: used
Actual: berlin:germany::rome:italy, pred: world
Actual: cairo:egypt::ottawa:canada, pred: family
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: france
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: cloak
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: resemblest
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: new
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharastra:

Actual: convenient:inconvenient::convincing:unconvincing, pred: lim
Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: year
Actual: islamabad:pakistan::oslo:norway, pred: zemira
Actual: grandfather:grandmother::father:mother, pred: would
Actual: grandpa:grandma::sons:daughters, pred: bring
Actual: king:queen::husband:wife, pred: mbelwa
Actual: man:woman::brothers:sisters, pred: nidhi
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: chief
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: chief
Actual: tripura:agartala::odisha:bhubaneswar, pred: jefferson
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: arge


 43%|█████████████▎                 | 52491/122310 [1:51:53<80:29:33,  4.15s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.005917159763313609



 43%|█████████████▎                 | 52499/122310 [1:51:53<50:53:44,  2.62s/it][A
 43%|█████████████▎                 | 52506/122310 [1:51:54<35:15:40,  1.82s/it][A
 43%|█████████████▎                 | 52519/122310 [1:51:55<19:34:14,  1.01s/it][A
 43%|█████████████▎                 | 52523/122310 [1:51:55<16:52:32,  1.15it/s][A
 43%|█████████████▎                 | 52527/122310 [1:51:56<14:17:10,  1.36it/s][A
 43%|█████████████▎                 | 52532/122310 [1:51:57<11:20:19,  1.71it/s][A
 43%|█████████████▋                  | 52542/122310 [1:51:58<7:07:56,  2.72it/s][A
 43%|█████████████▋                  | 52554/122310 [1:51:58<4:37:19,  4.19it/s][A
 43%|█████████████▊                  | 52566/122310 [1:51:59<3:19:33,  5.82it/s][A
 43%|█████████████▊                  | 52575/122310 [1:52:00<2:49:21,  6.86it/s][A
 43%|█████████████▊                  | 52588/122310 [1:52:00<2:10:31,  8.90it/s][A
 43%|█████████████▊                  | 52594/122310 [1:52:01<2:12:25,  8.77

step: 6420, loss: 89.16759641140146, epoch: 0



 43%|█████████████▊                  | 52715/122310 [1:52:08<1:02:03, 18.69it/s][A
 43%|█████████████▊                  | 52724/122310 [1:52:08<1:09:16, 16.74it/s][A
 43%|█████████████▊                  | 52726/122310 [1:52:09<1:31:33, 12.67it/s][A
 43%|█████████████▊                  | 52736/122310 [1:52:10<1:29:13, 13.00it/s][A
 43%|█████████████▊                  | 52749/122310 [1:52:11<1:20:15, 14.44it/s][A
 43%|█████████████▊                  | 52758/122310 [1:52:11<1:24:07, 13.78it/s][A
 43%|█████████████▊                  | 52764/122310 [1:52:12<1:35:40, 12.11it/s][A
 43%|█████████████▊                  | 52777/122310 [1:52:13<1:23:55, 13.81it/s][A
 43%|█████████████▊                  | 52788/122310 [1:52:14<1:21:41, 14.18it/s][A
 43%|█████████████▊                  | 52796/122310 [1:52:14<1:27:42, 13.21it/s][A
 43%|█████████████▊                  | 52803/122310 [1:52:15<1:35:23, 12.14it/s][A
 43%|█████████████▊                  | 52808/122310 [1:52:16<1:50:02, 10.53

step: 6440, loss: 94.74209566530102, epoch: 0



 43%|█████████████▊                  | 52895/122310 [1:52:22<1:25:19, 13.56it/s][A
 43%|█████████████▊                  | 52904/122310 [1:52:23<1:27:46, 13.18it/s][A
 43%|█████████████▊                  | 52921/122310 [1:52:24<1:11:37, 16.15it/s][A
 43%|█████████████▊                  | 52926/122310 [1:52:25<1:26:08, 13.42it/s][A
 43%|█████████████▊                  | 52933/122310 [1:52:25<1:33:51, 12.32it/s][A
 43%|█████████████▊                  | 52944/122310 [1:52:26<1:28:04, 13.13it/s][A
 43%|█████████████▊                  | 52947/122310 [1:52:27<1:50:44, 10.44it/s][A
 43%|█████████████▊                  | 52960/122310 [1:52:27<1:31:15, 12.67it/s][A
 43%|█████████████▊                  | 52968/122310 [1:52:28<1:35:06, 12.15it/s][A
 43%|█████████████▊                  | 52988/122310 [1:52:29<1:08:58, 16.75it/s][A
 43%|█████████████▊                  | 53004/122310 [1:52:30<1:03:07, 18.30it/s][A
 43%|█████████████▊                  | 53012/122310 [1:52:30<1:11:42, 16.11

step: 6460, loss: 73.69923974859859, epoch: 0



 43%|█████████████▉                  | 53090/122310 [1:52:37<1:25:36, 13.48it/s][A
 43%|█████████████▉                  | 53094/122310 [1:52:38<1:43:39, 11.13it/s][A
 43%|█████████████▉                  | 53102/122310 [1:52:38<1:44:31, 11.04it/s][A
 43%|█████████████▉                  | 53114/122310 [1:52:39<1:31:28, 12.61it/s][A
 43%|█████████████▉                  | 53127/122310 [1:52:40<1:21:26, 14.16it/s][A
 43%|█████████████▉                  | 53135/122310 [1:52:41<1:27:57, 13.11it/s][A
 43%|█████████████▉                  | 53139/122310 [1:52:41<1:46:13, 10.85it/s][A
 43%|█████████████▉                  | 53148/122310 [1:52:42<1:42:04, 11.29it/s][A
 43%|█████████████▉                  | 53154/122310 [1:52:43<1:51:05, 10.38it/s][A
 43%|█████████████▉                  | 53172/122310 [1:52:44<1:18:43, 14.64it/s][A
 43%|█████████████▉                  | 53180/122310 [1:52:44<1:24:56, 13.56it/s][A
 43%|█████████████▉                  | 53185/122310 [1:52:45<1:39:42, 11.55

step: 6480, loss: 78.69650954427426, epoch: 0



 44%|█████████████▉                  | 53273/122310 [1:52:52<1:32:01, 12.50it/s][A
 44%|█████████████▉                  | 53275/122310 [1:52:52<2:00:02,  9.59it/s][A
 44%|█████████████▉                  | 53284/122310 [1:52:53<1:50:26, 10.42it/s][A
 44%|█████████████▉                  | 53293/122310 [1:52:54<1:44:53, 10.97it/s][A
 44%|█████████████▉                  | 53297/122310 [1:52:54<2:03:04,  9.35it/s][A
 44%|█████████████▉                  | 53302/122310 [1:52:55<2:14:11,  8.57it/s][A
 44%|█████████████▉                  | 53312/122310 [1:52:56<1:53:42, 10.11it/s][A
 44%|█████████████▉                  | 53324/122310 [1:52:57<1:35:35, 12.03it/s][A
 44%|█████████████▉                  | 53326/122310 [1:52:57<2:05:08,  9.19it/s][A
 44%|█████████████▉                  | 53335/122310 [1:52:58<1:52:59, 10.17it/s][A
 44%|█████████████▉                  | 53351/122310 [1:52:59<1:23:52, 13.70it/s][A
 44%|█████████████▉                  | 53355/122310 [1:53:00<1:42:26, 11.22

step: 6500, loss: 88.37380380520484, epoch: 0



 44%|█████████████▉                  | 53424/122310 [1:53:06<2:02:59,  9.34it/s][A
 44%|█████████████▉                  | 53436/122310 [1:53:07<1:40:16, 11.45it/s][A
 44%|█████████████▉                  | 53443/122310 [1:53:08<1:45:22, 10.89it/s][A
 44%|█████████████▉                  | 53447/122310 [1:53:08<2:03:51,  9.27it/s][A
 44%|█████████████▉                  | 53458/122310 [1:53:09<1:44:50, 10.95it/s][A
 44%|█████████████▉                  | 53460/122310 [1:53:10<2:14:14,  8.55it/s][A
 44%|█████████████▉                  | 53464/122310 [1:53:11<2:30:30,  7.62it/s][A
 44%|█████████████▉                  | 53470/122310 [1:53:11<2:27:11,  7.80it/s][A
 44%|█████████████▉                  | 53474/122310 [1:53:12<2:41:34,  7.10it/s][A
 44%|█████████████▉                  | 53483/122310 [1:53:13<2:12:18,  8.67it/s][A
 44%|█████████████▉                  | 53491/122310 [1:53:14<2:03:04,  9.32it/s][A
 44%|█████████████▉                  | 53496/122310 [1:53:14<2:13:26,  8.59

step: 6520, loss: 84.29357263158475, epoch: 0



 44%|██████████████                  | 53572/122310 [1:53:21<1:28:32, 12.94it/s][A
 44%|██████████████                  | 53580/122310 [1:53:22<1:32:53, 12.33it/s][A
 44%|██████████████                  | 53594/122310 [1:53:22<1:19:44, 14.36it/s][A
 44%|██████████████                  | 53599/122310 [1:53:23<1:34:17, 12.15it/s][A
 44%|██████████████                  | 53608/122310 [1:53:24<1:34:13, 12.15it/s][A
 44%|██████████████                  | 53617/122310 [1:53:25<1:33:38, 12.23it/s][A
 44%|██████████████                  | 53629/122310 [1:53:25<1:25:08, 13.44it/s][A
 44%|██████████████                  | 53635/122310 [1:53:26<1:36:15, 11.89it/s][A
 44%|██████████████                  | 53641/122310 [1:53:27<2:18:01,  8.29it/s][A
 44%|██████████████                  | 53648/122310 [1:53:28<2:12:44,  8.62it/s][A
 44%|██████████████                  | 53654/122310 [1:53:29<2:14:12,  8.53it/s][A
 44%|██████████████                  | 53664/122310 [1:53:30<1:55:25,  9.91

step: 6540, loss: 82.89704857470758, epoch: 0



 44%|██████████████                  | 53732/122310 [1:53:35<1:51:05, 10.29it/s][A
 44%|██████████████                  | 53740/122310 [1:53:36<1:49:02, 10.48it/s][A
 44%|██████████████                  | 53744/122310 [1:53:37<2:07:28,  8.96it/s][A
 44%|██████████████                  | 53751/122310 [1:53:38<2:04:52,  9.15it/s][A
 44%|██████████████                  | 53755/122310 [1:53:38<2:22:01,  8.04it/s][A
 44%|██████████████                  | 53764/122310 [1:53:39<2:02:43,  9.31it/s][A
 44%|██████████████                  | 53774/122310 [1:53:40<1:47:43, 10.60it/s][A
 44%|██████████████                  | 53776/122310 [1:53:41<2:18:17,  8.26it/s][A
 44%|██████████████                  | 53789/122310 [1:53:41<1:42:45, 11.11it/s][A
 44%|██████████████                  | 53797/122310 [1:53:42<1:43:10, 11.07it/s][A
 44%|██████████████                  | 53811/122310 [1:53:43<1:24:45, 13.47it/s][A
 44%|██████████████                  | 53818/122310 [1:53:44<1:32:48, 12.30

step: 6560, loss: 87.98178435463316, epoch: 0



 44%|██████████████                  | 53893/122310 [1:53:50<1:53:34, 10.04it/s][A
 44%|██████████████                  | 53898/122310 [1:53:51<2:03:00,  9.27it/s][A
 44%|██████████████                  | 53900/122310 [1:53:52<2:30:25,  7.58it/s][A
 44%|██████████████                  | 53909/122310 [1:53:52<2:08:09,  8.90it/s][A
 44%|██████████████                  | 53917/122310 [1:53:53<2:00:17,  9.48it/s][A
 44%|██████████████                  | 53925/122310 [1:53:55<3:01:12,  6.29it/s][A
 44%|██████████████                  | 53931/122310 [1:53:56<2:50:17,  6.69it/s][A
 44%|██████████████                  | 53940/122310 [1:53:57<2:22:47,  7.98it/s][A
 44%|██████████████                  | 53949/122310 [1:53:57<2:06:08,  9.03it/s][A
 44%|██████████████                  | 53959/122310 [1:53:58<1:51:21, 10.23it/s][A
 44%|██████████████                  | 53963/122310 [1:53:59<2:07:12,  8.95it/s][A
 44%|██████████████                  | 53971/122310 [1:54:00<1:59:54,  9.50

step: 6580, loss: 88.65749243345824, epoch: 0



 44%|██████████████▏                 | 54039/122310 [1:54:05<1:23:08, 13.69it/s][A
 44%|██████████████▏                 | 54047/122310 [1:54:05<1:28:16, 12.89it/s][A
 44%|██████████████▏                 | 54060/122310 [1:54:06<1:19:37, 14.29it/s][A
 44%|██████████████▏                 | 54071/122310 [1:54:07<1:18:18, 14.52it/s][A
 44%|██████████████▏                 | 54081/122310 [1:54:08<1:19:48, 14.25it/s][A
 44%|██████████████▏                 | 54090/122310 [1:54:08<1:23:17, 13.65it/s][A
 44%|██████████████▏                 | 54094/122310 [1:54:09<1:41:26, 11.21it/s][A
 44%|██████████████▏                 | 54105/122310 [1:54:10<1:32:01, 12.35it/s][A
 44%|██████████████▏                 | 54116/122310 [1:54:11<1:26:27, 13.15it/s][A
 44%|██████████████▏                 | 54123/122310 [1:54:11<1:34:16, 12.05it/s][A
 44%|██████████████▏                 | 54135/122310 [1:54:12<1:25:08, 13.35it/s][A
 44%|██████████████▏                 | 54141/122310 [1:54:13<1:36:07, 11.82

step: 6600, loss: 96.16012390457465, epoch: 0



 44%|██████████████▏                 | 54204/122310 [1:54:19<2:01:10,  9.37it/s][A
 44%|██████████████▏                 | 54217/122310 [1:54:20<1:35:43, 11.86it/s][A
 44%|██████████████▏                 | 54226/122310 [1:54:21<1:34:29, 12.01it/s][A
 44%|██████████████▏                 | 54240/122310 [1:54:22<1:20:34, 14.08it/s][A
 44%|██████████████▏                 | 54243/122310 [1:54:22<1:41:53, 11.13it/s][A
 44%|██████████████▏                 | 54252/122310 [1:54:23<1:39:08, 11.44it/s][A
 44%|██████████████▏                 | 54254/122310 [1:54:24<2:08:57,  8.80it/s][A
 44%|██████████████▏                 | 54257/122310 [1:54:25<2:32:42,  7.43it/s][A
 44%|██████████████▏                 | 54263/122310 [1:54:25<2:28:04,  7.66it/s][A
 44%|██████████████▏                 | 54271/122310 [1:54:26<2:10:57,  8.66it/s][A
 44%|██████████████▏                 | 54289/122310 [1:54:27<1:24:28, 13.42it/s][A
 44%|██████████████▏                 | 54299/122310 [1:54:27<1:24:10, 13.47

step: 6620, loss: 90.23158125966518, epoch: 0



 44%|██████████████▏                 | 54360/122310 [1:54:34<1:55:21,  9.82it/s][A
 44%|██████████████▏                 | 54367/122310 [1:54:35<1:56:10,  9.75it/s][A
 44%|██████████████▏                 | 54372/122310 [1:54:36<2:07:28,  8.88it/s][A
 44%|██████████████▏                 | 54382/122310 [1:54:36<1:50:23, 10.25it/s][A
 44%|██████████████▏                 | 54384/122310 [1:54:37<2:20:32,  8.06it/s][A
 44%|██████████████▏                 | 54402/122310 [1:54:38<1:27:13, 12.97it/s][A
 44%|██████████████▏                 | 54413/122310 [1:54:38<1:23:08, 13.61it/s][A
 44%|██████████████▏                 | 54425/122310 [1:54:39<1:18:18, 14.45it/s][A
 45%|██████████████▏                 | 54431/122310 [1:54:40<1:29:58, 12.57it/s][A
 45%|██████████████▏                 | 54442/122310 [1:54:41<1:24:54, 13.32it/s][A
 45%|██████████████▏                 | 54451/122310 [1:54:41<1:27:08, 12.98it/s][A
 45%|██████████████▏                 | 54460/122310 [1:54:42<1:28:29, 12.78

step: 6640, loss: 96.13361338519026, epoch: 0



 45%|██████████████▎                 | 54535/122310 [1:54:49<1:27:15, 12.95it/s][A
 45%|██████████████▎                 | 54546/122310 [1:54:49<1:23:10, 13.58it/s][A
 45%|██████████████▎                 | 54553/122310 [1:54:50<1:31:14, 12.38it/s][A
 45%|██████████████▎                 | 54564/122310 [1:54:51<1:25:46, 13.16it/s][A
 45%|██████████████▎                 | 54574/122310 [1:54:52<1:24:51, 13.30it/s][A
 45%|██████████████▎                 | 54580/122310 [1:54:52<1:35:51, 11.78it/s][A
 45%|██████████████▎                 | 54586/122310 [1:54:53<1:45:39, 10.68it/s][A
 45%|██████████████▎                 | 54592/122310 [1:54:54<1:53:29,  9.94it/s][A
 45%|██████████████▎                 | 54598/122310 [1:54:55<2:00:13,  9.39it/s][A
 45%|██████████████▎                 | 54608/122310 [1:54:55<1:45:45, 10.67it/s][A
 45%|██████████████▎                 | 54619/122310 [1:54:56<1:34:07, 11.99it/s][A
 45%|██████████████▎                 | 54625/122310 [1:54:57<1:44:04, 10.84

step: 6660, loss: 76.57769012907654, epoch: 0



 45%|██████████████▎                 | 54733/122310 [1:55:03<1:07:51, 16.60it/s][A
 45%|██████████████▎                 | 54735/122310 [1:55:04<1:30:07, 12.50it/s][A
 45%|██████████████▎                 | 54740/122310 [1:55:05<1:44:30, 10.78it/s][A
 45%|██████████████▎                 | 54747/122310 [1:55:06<1:48:04, 10.42it/s][A
 45%|██████████████▎                 | 54759/122310 [1:55:06<1:32:11, 12.21it/s][A
 45%|██████████████▎                 | 54771/122310 [1:55:07<1:23:37, 13.46it/s][A
 45%|██████████████▎                 | 54778/122310 [1:55:08<1:31:29, 12.30it/s][A
 45%|██████████████▎                 | 54789/122310 [1:55:09<1:25:41, 13.13it/s][A
 45%|██████████████▎                 | 54799/122310 [1:55:09<1:25:05, 13.22it/s][A
 45%|██████████████▎                 | 54809/122310 [1:55:10<1:23:49, 13.42it/s][A
 45%|██████████████▎                 | 54817/122310 [1:55:11<1:28:44, 12.67it/s][A
 45%|██████████████▎                 | 54829/122310 [1:55:11<1:21:40, 13.77

step: 6680, loss: 121.85998282154404, epoch: 0



 45%|██████████████▎                 | 54924/122310 [1:55:18<1:30:48, 12.37it/s][A
 45%|██████████████▎                 | 54939/122310 [1:55:19<1:15:48, 14.81it/s][A
 45%|██████████████▍                 | 54944/122310 [1:55:20<1:30:15, 12.44it/s][A
 45%|██████████████▍                 | 54955/122310 [1:55:20<1:25:00, 13.21it/s][A
 45%|██████████████▍                 | 54964/122310 [1:55:21<1:26:50, 12.92it/s][A
 45%|██████████████▍                 | 54969/122310 [1:55:22<1:41:40, 11.04it/s][A
 45%|██████████████▍                 | 54972/122310 [1:55:22<2:04:33,  9.01it/s][A
 45%|██████████████▍                 | 54983/122310 [1:55:23<1:43:50, 10.81it/s][A
 45%|██████████████▍                 | 54990/122310 [1:55:24<1:47:46, 10.41it/s][A
 45%|██████████████▍                 | 54996/122310 [1:55:25<1:55:52,  9.68it/s][A
 45%|██████████████▍                 | 55001/122310 [1:55:25<2:07:17,  8.81it/s][A
 45%|██████████████▍                 | 55011/122310 [1:55:26<1:49:26, 10.25

step: 6700, loss: 81.588200000115, epoch: 0



 45%|██████████████▍                 | 55084/122310 [1:55:33<1:58:03,  9.49it/s][A
 45%|██████████████▍                 | 55101/122310 [1:55:33<1:21:43, 13.71it/s][A
 45%|██████████████▍                 | 55115/122310 [1:55:34<1:12:35, 15.43it/s][A
 45%|██████████████▍                 | 55118/122310 [1:55:35<1:33:00, 12.04it/s][A
 45%|██████████████▍                 | 55123/122310 [1:55:36<1:46:39, 10.50it/s][A
 45%|██████████████▍                 | 55137/122310 [1:55:36<1:24:52, 13.19it/s][A
 45%|██████████████▍                 | 55141/122310 [1:55:37<1:42:00, 10.97it/s][A
 45%|██████████████▍                 | 55147/122310 [1:55:38<1:49:30, 10.22it/s][A
 45%|██████████████▍                 | 55155/122310 [1:55:38<1:46:08, 10.54it/s][A
 45%|██████████████▍                 | 55164/122310 [1:55:39<1:40:15, 11.16it/s][A
 45%|██████████████▍                 | 55172/122310 [1:55:40<1:39:56, 11.20it/s][A
 45%|██████████████▍                 | 55176/122310 [1:55:41<1:57:21,  9.53

step: 6720, loss: 78.57608742495322, epoch: 0



 45%|██████████████▍                 | 55265/122310 [1:55:47<1:10:15, 15.90it/s][A
 45%|██████████████▍                 | 55272/122310 [1:55:48<1:19:02, 14.14it/s][A
 45%|██████████████▍                 | 55280/122310 [1:55:48<1:24:03, 13.29it/s][A
 45%|██████████████▍                 | 55287/122310 [1:55:49<1:30:49, 12.30it/s][A
 45%|██████████████▍                 | 55298/122310 [1:55:50<1:24:26, 13.23it/s][A
 45%|██████████████▍                 | 55305/122310 [1:55:51<1:31:27, 12.21it/s][A
 45%|██████████████▍                 | 55317/122310 [1:55:51<1:22:09, 13.59it/s][A
 45%|██████████████▍                 | 55329/122310 [1:55:52<1:16:40, 14.56it/s][A
 45%|██████████████▍                 | 55338/122310 [1:55:53<1:19:58, 13.96it/s][A
 45%|██████████████▍                 | 55344/122310 [1:55:53<1:30:28, 12.34it/s][A
 45%|██████████████▍                 | 55353/122310 [1:55:54<1:29:44, 12.44it/s][A
 45%|██████████████▍                 | 55362/122310 [1:55:55<1:29:42, 12.44

step: 6740, loss: 123.15641444401183, epoch: 0



 45%|██████████████▌                 | 55446/122310 [1:56:01<1:19:14, 14.06it/s][A
 45%|██████████████▌                 | 55452/122310 [1:56:02<1:29:54, 12.39it/s][A
 45%|██████████████▌                 | 55461/122310 [1:56:03<1:29:03, 12.51it/s][A
 45%|██████████████▌                 | 55469/122310 [1:56:03<1:31:52, 12.12it/s][A
 45%|██████████████▌                 | 55484/122310 [1:56:04<1:15:09, 14.82it/s][A
 45%|██████████████▌                 | 55490/122310 [1:56:05<1:26:24, 12.89it/s][A
 45%|██████████████▌                 | 55496/122310 [1:56:06<1:36:36, 11.53it/s][A
 45%|██████████████▌                 | 55502/122310 [1:56:06<1:45:38, 10.54it/s][A
 45%|██████████████▌                 | 55514/122310 [1:56:07<1:29:29, 12.44it/s][A
 45%|██████████████▌                 | 55518/122310 [1:56:08<1:47:11, 10.38it/s][A
 45%|██████████████▌                 | 55529/122310 [1:56:08<1:33:26, 11.91it/s][A
 45%|██████████████▌                 | 55542/122310 [1:56:09<1:20:42, 13.79

step: 6760, loss: 80.24142583453249, epoch: 0



 45%|██████████████▌                 | 55619/122310 [1:56:16<1:42:24, 10.85it/s][A
 45%|██████████████▌                 | 55627/122310 [1:56:16<1:41:03, 11.00it/s][A
 45%|██████████████▌                 | 55639/122310 [1:56:17<1:26:57, 12.78it/s][A
 45%|██████████████▌                 | 55644/122310 [1:56:18<1:40:24, 11.07it/s][A
 46%|██████████████▌                 | 55656/122310 [1:56:18<1:26:57, 12.78it/s][A
 46%|██████████████▌                 | 55662/122310 [1:56:19<1:36:54, 11.46it/s][A
 46%|██████████████▌                 | 55668/122310 [1:56:20<1:45:23, 10.54it/s][A
 46%|██████████████▌                 | 55673/122310 [1:56:21<1:56:45,  9.51it/s][A
 46%|██████████████▌                 | 55685/122310 [1:56:21<1:34:40, 11.73it/s][A
 46%|██████████████▌                 | 55698/122310 [1:56:22<1:21:08, 13.68it/s][A
 46%|██████████████▌                 | 55704/122310 [1:56:23<1:31:52, 12.08it/s][A
 46%|██████████████▌                 | 55711/122310 [1:56:23<1:37:13, 11.42

step: 6780, loss: 117.312973212211, epoch: 0



 46%|██████████████▌                 | 55765/122310 [1:56:31<3:31:04,  5.25it/s][A
 46%|██████████████▌                 | 55770/122310 [1:56:31<3:16:23,  5.65it/s][A
 46%|██████████████▌                 | 55781/122310 [1:56:32<2:19:52,  7.93it/s][A
 46%|██████████████▌                 | 55785/122310 [1:56:33<2:30:56,  7.35it/s][A
 46%|██████████████▌                 | 55789/122310 [1:56:33<2:40:42,  6.90it/s][A
 46%|██████████████▌                 | 55799/122310 [1:56:34<2:05:31,  8.83it/s][A
 46%|██████████████▌                 | 55804/122310 [1:56:35<2:12:44,  8.35it/s][A
 46%|██████████████▌                 | 55817/122310 [1:56:35<1:39:05, 11.18it/s][A
 46%|██████████████▌                 | 55826/122310 [1:56:36<1:35:24, 11.61it/s][A
 46%|██████████████▌                 | 55832/122310 [1:56:37<1:43:46, 10.68it/s][A
 46%|██████████████▌                 | 55847/122310 [1:56:38<1:20:39, 13.73it/s][A
 46%|██████████████▌                 | 55857/122310 [1:56:38<1:19:59, 13.84

step: 6800, loss: 105.19205270594884, epoch: 0
sim1 and sim2 are 0.6254219789385054, 0.13978952520504173
cosine of pred and queen: 0.08719062674302613
Actual: athens:greece::madrid:spain, pred: europe
Actual: bangkok:thailand::islamabad:pakistan, pred: india
Actual: beijing:china::tokyo:japan, pred: used
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: agreed
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: france
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: cloak
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: resemblest
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: mahar

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: year
Actual: islamabad:pakistan::oslo:norway, pred: zemira
Actual: grandfather:grandmother::father:mother, pred: well
Actual: grandpa:grandma::sons:daughters, pred: children
Actual: king:queen::husband:wife, pred: mbelwa
Actual: man:woman::brothers:sisters, pred: nidhi
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: state
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: state
Actual: tripura:agartala::odisha:bhubaneswar, pred: jefferson
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: argentina:peso::japan:yen, pred: dollar



 46%|██████████████▏                | 55901/122310 [1:57:56<61:02:42,  3.31s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.005917159763313609



 46%|██████████████▏                | 55915/122310 [1:57:57<32:53:11,  1.78s/it][A
 46%|██████████████▏                | 55919/122310 [1:57:58<28:07:41,  1.53s/it][A
 46%|██████████████▏                | 55933/122310 [1:57:58<16:00:54,  1.15it/s][A
 46%|██████████████▏                | 55943/122310 [1:57:59<11:24:50,  1.62it/s][A
 46%|██████████████▋                 | 55950/122310 [1:58:00<9:08:19,  2.02it/s][A
 46%|██████████████▋                 | 55964/122310 [1:58:00<5:48:42,  3.17it/s][A
 46%|██████████████▋                 | 55978/122310 [1:58:01<4:01:16,  4.58it/s][A
 46%|██████████████▋                 | 55984/122310 [1:58:02<3:40:42,  5.01it/s][A
 46%|██████████████▋                 | 55991/122310 [1:58:02<3:15:20,  5.66it/s][A
 46%|██████████████▋                 | 55996/122310 [1:58:03<3:08:20,  5.87it/s][A
 46%|██████████████▋                 | 56002/122310 [1:58:04<2:53:42,  6.36it/s][A
 46%|██████████████▋                 | 56011/122310 [1:58:05<2:23:26,  7.70

step: 6820, loss: 97.7078729212228, epoch: 0



 46%|██████████████▋                 | 56097/122310 [1:58:11<1:24:25, 13.07it/s][A
 46%|██████████████▋                 | 56101/122310 [1:58:12<1:41:15, 10.90it/s][A
 46%|██████████████▋                 | 56107/122310 [1:58:12<1:47:51, 10.23it/s][A
 46%|██████████████▋                 | 56116/122310 [1:58:13<1:40:33, 10.97it/s][A
 46%|██████████████▋                 | 56127/122310 [1:58:14<1:29:30, 12.32it/s][A
 46%|██████████████▋                 | 56135/122310 [1:58:14<1:31:51, 12.01it/s][A
 46%|██████████████▋                 | 56140/122310 [1:58:15<1:44:45, 10.53it/s][A
 46%|██████████████▋                 | 56148/122310 [1:58:16<1:42:30, 10.76it/s][A
 46%|██████████████▋                 | 56156/122310 [1:58:17<1:41:03, 10.91it/s][A
 46%|██████████████▋                 | 56164/122310 [1:58:17<1:40:00, 11.02it/s][A
 46%|██████████████▋                 | 56171/122310 [1:58:18<1:43:11, 10.68it/s][A
 46%|██████████████▋                 | 56178/122310 [1:58:19<1:45:43, 10.43

step: 6840, loss: 130.1116763367179, epoch: 0



 46%|██████████████▋                 | 56244/122310 [1:58:25<1:38:24, 11.19it/s][A
 46%|██████████████▋                 | 56261/122310 [1:58:26<1:14:11, 14.84it/s][A
 46%|██████████████▋                 | 56268/122310 [1:58:27<1:22:01, 13.42it/s][A
 46%|██████████████▋                 | 56273/122310 [1:58:27<1:35:04, 11.58it/s][A
 46%|██████████████▋                 | 56283/122310 [1:58:28<1:29:13, 12.33it/s][A
 46%|██████████████▋                 | 56291/122310 [1:58:29<1:31:24, 12.04it/s][A
 46%|██████████████▋                 | 56302/122310 [1:58:29<1:24:21, 13.04it/s][A
 46%|██████████████▋                 | 56305/122310 [1:58:30<1:45:36, 10.42it/s][A
 46%|██████████████▋                 | 56312/122310 [1:58:31<1:47:09, 10.26it/s][A
 46%|██████████████▋                 | 56316/122310 [1:58:32<2:04:45,  8.82it/s][A
 46%|██████████████▋                 | 56319/122310 [1:58:32<2:26:36,  7.50it/s][A
 46%|██████████████▋                 | 56323/122310 [1:58:33<2:38:31,  6.94

step: 6860, loss: 112.61942309404775, epoch: 0



 46%|██████████████▊                 | 56404/122310 [1:58:39<1:42:04, 10.76it/s][A
 46%|██████████████▊                 | 56407/122310 [1:58:40<2:04:25,  8.83it/s][A
 46%|██████████████▊                 | 56414/122310 [1:58:41<2:00:02,  9.15it/s][A
 46%|██████████████▊                 | 56426/122310 [1:58:41<1:35:43, 11.47it/s][A
 46%|██████████████▊                 | 56434/122310 [1:58:42<1:36:13, 11.41it/s][A
 46%|██████████████▊                 | 56441/122310 [1:58:43<1:40:22, 10.94it/s][A
 46%|██████████████▊                 | 56453/122310 [1:58:44<1:26:15, 12.73it/s][A
 46%|██████████████▊                 | 56459/122310 [1:58:44<1:36:02, 11.43it/s][A
 46%|██████████████▊                 | 56464/122310 [1:58:45<1:48:19, 10.13it/s][A
 46%|██████████████▊                 | 56475/122310 [1:58:46<1:33:29, 11.74it/s][A
 46%|██████████████▊                 | 56486/122310 [1:58:46<1:25:22, 12.85it/s][A
 46%|██████████████▊                 | 56498/122310 [1:58:47<1:17:59, 14.06

step: 6880, loss: 131.24580821230094, epoch: 0



 46%|██████████████▊                 | 56533/122310 [1:58:54<2:24:10,  7.60it/s][A
 46%|██████████████▊                 | 56550/122310 [1:58:54<1:38:50, 11.09it/s][A
 46%|██████████████▊                 | 56556/122310 [1:58:55<1:44:49, 10.45it/s][A
 46%|██████████████▊                 | 56564/122310 [1:58:56<1:42:31, 10.69it/s][A
 46%|██████████████▊                 | 56580/122310 [1:58:56<1:19:26, 13.79it/s][A
 46%|██████████████▊                 | 56589/122310 [1:58:57<1:21:12, 13.49it/s][A
 46%|██████████████▊                 | 56601/122310 [1:58:58<1:15:47, 14.45it/s][A
 46%|██████████████▊                 | 56606/122310 [1:58:58<1:28:35, 12.36it/s][A
 46%|██████████████▊                 | 56615/122310 [1:58:59<1:28:39, 12.35it/s][A
 46%|██████████████▊                 | 56617/122310 [1:59:00<1:54:06,  9.59it/s][A
 46%|██████████████▊                 | 56621/122310 [1:59:01<2:09:29,  8.45it/s][A
 46%|██████████████▊                 | 56629/122310 [1:59:01<1:58:04,  9.27

step: 6900, loss: 97.51345667412338, epoch: 0



 46%|██████████████▊                 | 56723/122310 [1:59:08<1:28:35, 12.34it/s][A
 46%|██████████████▊                 | 56735/122310 [1:59:08<1:19:46, 13.70it/s][A
 46%|██████████████▊                 | 56741/122310 [1:59:09<1:30:06, 12.13it/s][A
 46%|██████████████▊                 | 56746/122310 [1:59:10<1:43:00, 10.61it/s][A
 46%|██████████████▊                 | 56751/122310 [1:59:11<1:54:30,  9.54it/s][A
 46%|██████████████▊                 | 56761/122310 [1:59:11<1:40:10, 10.91it/s][A
 46%|██████████████▊                 | 56774/122310 [1:59:12<1:23:29, 13.08it/s][A
 46%|██████████████▊                 | 56778/122310 [1:59:13<1:40:21, 10.88it/s][A
 46%|██████████████▊                 | 56788/122310 [1:59:13<1:32:22, 11.82it/s][A
 46%|██████████████▊                 | 56797/122310 [1:59:14<1:30:27, 12.07it/s][A
 46%|██████████████▊                 | 56801/122310 [1:59:15<1:48:13, 10.09it/s][A
 46%|██████████████▊                 | 56806/122310 [1:59:16<1:58:51,  9.19

step: 6920, loss: 87.24611369468279, epoch: 0



 47%|██████████████▉                 | 56881/122310 [1:59:22<1:45:53, 10.30it/s][A
 47%|██████████████▉                 | 56895/122310 [1:59:23<1:23:00, 13.13it/s][A
 47%|██████████████▉                 | 56903/122310 [1:59:23<1:26:49, 12.55it/s][A
 47%|██████████████▉                 | 56910/122310 [1:59:24<1:32:42, 11.76it/s][A
 47%|██████████████▉                 | 56918/122310 [1:59:25<1:34:16, 11.56it/s][A
 47%|██████████████▉                 | 56923/122310 [1:59:26<1:46:27, 10.24it/s][A
 47%|██████████████▉                 | 56929/122310 [1:59:26<1:52:19,  9.70it/s][A
 47%|██████████████▉                 | 56936/122310 [1:59:27<1:52:01,  9.73it/s][A
 47%|██████████████▉                 | 56939/122310 [1:59:28<2:15:23,  8.05it/s][A
 47%|██████████████▉                 | 56943/122310 [1:59:28<2:28:15,  7.35it/s][A
 47%|██████████████▉                 | 56949/122310 [1:59:29<2:21:40,  7.69it/s][A
 47%|██████████████▉                 | 56963/122310 [1:59:30<1:36:29, 11.29

step: 6940, loss: 128.68942655418593, epoch: 0



 47%|██████████████▉                 | 57032/122310 [1:59:36<1:32:42, 11.73it/s][A
 47%|██████████████▉                 | 57034/122310 [1:59:38<2:35:45,  6.98it/s][A
 47%|██████████████▉                 | 57040/122310 [1:59:38<2:28:00,  7.35it/s][A
 47%|██████████████▉                 | 57043/122310 [1:59:39<2:46:22,  6.54it/s][A
 47%|██████████████▉                 | 57046/122310 [1:59:40<3:03:29,  5.93it/s][A
 47%|██████████████▉                 | 57061/122310 [1:59:40<1:46:54, 10.17it/s][A
 47%|██████████████▉                 | 57070/122310 [1:59:41<1:39:51, 10.89it/s][A
 47%|██████████████▉                 | 57075/122310 [1:59:42<1:51:17,  9.77it/s][A
 47%|██████████████▉                 | 57086/122310 [1:59:43<1:34:58, 11.45it/s][A
 47%|██████████████▉                 | 57098/122310 [1:59:43<1:23:23, 13.03it/s][A
 47%|██████████████▉                 | 57107/122310 [1:59:44<1:24:10, 12.91it/s][A
 47%|██████████████▉                 | 57127/122310 [1:59:45<1:02:15, 17.45

step: 6960, loss: 86.05875259350505, epoch: 0



 47%|██████████████▉                 | 57216/122310 [1:59:51<1:04:41, 16.77it/s][A
 47%|██████████████▉                 | 57229/122310 [1:59:51<1:02:58, 17.22it/s][A
 47%|██████████████▉                 | 57240/122310 [1:59:52<1:04:59, 16.69it/s][A
 47%|██████████████▉                 | 57247/122310 [1:59:53<1:14:11, 14.62it/s][A
 47%|██████████████▉                 | 57251/122310 [1:59:53<1:30:47, 11.94it/s][A
 47%|██████████████▉                 | 57260/122310 [1:59:54<1:29:13, 12.15it/s][A
 47%|██████████████▉                 | 57275/122310 [1:59:55<1:13:02, 14.84it/s][A
 47%|██████████████▉                 | 57287/122310 [1:59:56<1:10:44, 15.32it/s][A
 47%|██████████████▉                 | 57289/122310 [1:59:56<1:33:09, 11.63it/s][A
 47%|██████████████▉                 | 57302/122310 [1:59:57<1:19:16, 13.67it/s][A
 47%|██████████████▉                 | 57314/122310 [1:59:58<1:14:11, 14.60it/s][A
 47%|███████████████▉                  | 57338/122310 [1:59:58<53:16, 20.32

step: 6980, loss: 95.16188038073876, epoch: 0



 47%|███████████████                 | 57422/122310 [2:00:05<1:17:05, 14.03it/s][A
 47%|███████████████                 | 57432/122310 [2:00:05<1:16:49, 14.08it/s][A
 47%|███████████████                 | 57438/122310 [2:00:06<1:27:15, 12.39it/s][A
 47%|███████████████                 | 57449/122310 [2:00:07<1:21:09, 13.32it/s][A
 47%|███████████████                 | 57451/122310 [2:00:08<1:46:35, 10.14it/s][A
 47%|███████████████                 | 57459/122310 [2:00:08<1:43:29, 10.44it/s][A
 47%|███████████████                 | 57470/122310 [2:00:09<1:30:12, 11.98it/s][A
 47%|███████████████                 | 57478/122310 [2:00:10<1:32:02, 11.74it/s][A
 47%|███████████████                 | 57492/122310 [2:00:10<1:16:37, 14.10it/s][A
 47%|███████████████                 | 57502/122310 [2:00:11<1:17:02, 14.02it/s][A
 47%|███████████████                 | 57506/122310 [2:00:12<1:33:33, 11.54it/s][A
 47%|███████████████                 | 57517/122310 [2:00:13<1:24:55, 12.71

step: 7000, loss: 95.09594329772044, epoch: 0
saving weights



 47%|███████████████                 | 57592/122310 [2:00:19<1:46:22, 10.14it/s][A
 47%|███████████████                 | 57602/122310 [2:00:20<1:37:17, 11.08it/s][A
 47%|███████████████                 | 57604/122310 [2:00:21<1:59:54,  8.99it/s][A
 47%|███████████████                 | 57611/122310 [2:00:21<1:56:48,  9.23it/s][A
 47%|███████████████                 | 57622/122310 [2:00:22<1:38:31, 10.94it/s][A
 47%|███████████████                 | 57631/122310 [2:00:23<1:34:21, 11.42it/s][A
 47%|███████████████                 | 57634/122310 [2:00:23<1:55:23,  9.34it/s][A
 47%|███████████████                 | 57653/122310 [2:00:24<1:14:45, 14.41it/s][A
 47%|███████████████                 | 57658/122310 [2:00:25<1:28:07, 12.23it/s][A
 47%|███████████████                 | 57664/122310 [2:00:26<1:37:46, 11.02it/s][A
 47%|███████████████                 | 57666/122310 [2:00:26<2:04:47,  8.63it/s][A
 47%|███████████████                 | 57678/122310 [2:00:27<1:37:09, 11.09

step: 7020, loss: 82.57420703264052, epoch: 0



 47%|███████████████                 | 57750/122310 [2:00:34<2:29:11,  7.21it/s][A
 47%|███████████████                 | 57753/122310 [2:00:35<2:50:04,  6.33it/s][A
 47%|███████████████                 | 57767/122310 [2:00:36<1:44:05, 10.34it/s][A
 47%|███████████████                 | 57771/122310 [2:00:36<2:00:21,  8.94it/s][A
 47%|███████████████                 | 57780/122310 [2:00:37<1:46:51, 10.06it/s][A
 47%|███████████████                 | 57787/122310 [2:00:38<1:47:50,  9.97it/s][A
 47%|███████████████                 | 57796/122310 [2:00:38<1:39:42, 10.78it/s][A
 47%|███████████████                 | 57806/122310 [2:00:39<1:31:28, 11.75it/s][A
 47%|███████████████▏                | 57817/122310 [2:00:40<1:23:37, 12.85it/s][A
 47%|███████████████▏                | 57822/122310 [2:00:41<1:36:58, 11.08it/s][A
 47%|███████████████▏                | 57825/122310 [2:00:41<1:59:08,  9.02it/s][A
 47%|███████████████▏                | 57830/122310 [2:00:42<2:08:29,  8.36

step: 7040, loss: 121.22331893948154, epoch: 0



 47%|████████████████                  | 57930/122310 [2:00:49<58:11, 18.44it/s][A
 47%|███████████████▏                | 57935/122310 [2:00:49<1:11:32, 15.00it/s][A
 47%|███████████████▏                | 57947/122310 [2:00:50<1:09:10, 15.51it/s][A
 47%|███████████████▏                | 57951/122310 [2:00:51<1:25:47, 12.50it/s][A
 47%|███████████████▏                | 57962/122310 [2:00:51<1:20:38, 13.30it/s][A
 47%|███████████████▏                | 57971/122310 [2:00:52<1:21:50, 13.10it/s][A
 47%|███████████████▏                | 57983/122310 [2:00:53<1:15:44, 14.15it/s][A
 47%|███████████████▏                | 57991/122310 [2:00:54<1:21:15, 13.19it/s][A
 47%|███████████████▏                | 57999/122310 [2:00:54<1:25:33, 12.53it/s][A
 47%|███████████████▏                | 58002/122310 [2:00:55<1:46:37, 10.05it/s][A
 47%|███████████████▏                | 58011/122310 [2:00:56<1:39:26, 10.78it/s][A
 47%|███████████████▏                | 58018/122310 [2:00:56<1:42:21, 10.47

step: 7060, loss: 106.69012132349562, epoch: 0



 48%|███████████████▏                | 58098/122310 [2:01:03<1:33:53, 11.40it/s][A
 48%|███████████████▏                | 58104/122310 [2:01:04<1:42:11, 10.47it/s][A
 48%|███████████████▏                | 58109/122310 [2:01:04<1:53:31,  9.43it/s][A
 48%|███████████████▏                | 58118/122310 [2:01:05<1:43:38, 10.32it/s][A
 48%|███████████████▏                | 58121/122310 [2:01:06<2:05:56,  8.49it/s][A
 48%|███████████████▏                | 58132/122310 [2:01:07<1:41:30, 10.54it/s][A
 48%|███████████████▏                | 58143/122310 [2:01:07<1:29:30, 11.95it/s][A
 48%|███████████████▏                | 58147/122310 [2:01:08<1:46:34, 10.03it/s][A
 48%|███████████████▏                | 58160/122310 [2:01:09<1:26:05, 12.42it/s][A
 48%|███████████████▏                | 58166/122310 [2:01:09<1:35:29, 11.20it/s][A
 48%|███████████████▏                | 58170/122310 [2:01:10<1:52:18,  9.52it/s][A
 48%|███████████████▏                | 58177/122310 [2:01:11<1:51:51,  9.56

step: 7080, loss: 98.3969023128119, epoch: 0



 48%|███████████████▏                | 58263/122310 [2:01:17<1:23:05, 12.85it/s][A
 48%|███████████████▏                | 58271/122310 [2:01:18<1:26:27, 12.34it/s][A
 48%|███████████████▏                | 58286/122310 [2:01:19<1:11:47, 14.86it/s][A
 48%|███████████████▎                | 58298/122310 [2:01:20<1:09:16, 15.40it/s][A
 48%|███████████████▎                | 58309/122310 [2:01:20<1:09:30, 15.35it/s][A
 48%|███████████████▎                | 58328/122310 [2:01:22<1:14:31, 14.31it/s][A
 48%|███████████████▎                | 58340/122310 [2:01:22<1:11:17, 14.95it/s][A
 48%|███████████████▎                | 58346/122310 [2:01:23<1:20:56, 13.17it/s][A
 48%|███████████████▎                | 58352/122310 [2:01:24<1:29:51, 11.86it/s][A
 48%|███████████████▎                | 58359/122310 [2:01:25<1:34:56, 11.23it/s][A
 48%|███████████████▎                | 58373/122310 [2:01:25<1:18:32, 13.57it/s][A
 48%|███████████████▎                | 58389/122310 [2:01:26<1:06:23, 16.05

step: 7100, loss: 109.13098110952055, epoch: 0



 48%|███████████████▎                | 58472/122310 [2:01:32<1:14:14, 14.33it/s][A
 48%|███████████████▎                | 58481/122310 [2:01:33<1:17:09, 13.79it/s][A
 48%|███████████████▎                | 58486/122310 [2:01:33<1:30:44, 11.72it/s][A
 48%|███████████████▎                | 58500/122310 [2:01:34<1:15:49, 14.02it/s][A
 48%|███████████████▎                | 58515/122310 [2:01:35<1:06:11, 16.06it/s][A
 48%|███████████████▎                | 58528/122310 [2:01:35<1:03:52, 16.64it/s][A
 48%|████████████████▎                 | 58544/122310 [2:01:36<58:02, 18.31it/s][A
 48%|███████████████▎                | 58550/122310 [2:01:37<1:09:22, 15.32it/s][A
 48%|███████████████▎                | 58564/122310 [2:01:38<1:04:09, 16.56it/s][A
 48%|███████████████▎                | 58568/122310 [2:01:38<1:20:07, 13.26it/s][A
 48%|███████████████▎                | 58577/122310 [2:01:39<1:21:35, 13.02it/s][A
 48%|███████████████▎                | 58586/122310 [2:01:40<1:22:28, 12.88

step: 7120, loss: 83.68612045623547, epoch: 0



 48%|███████████████▎                | 58671/122310 [2:01:46<1:28:28, 11.99it/s][A
 48%|███████████████▎                | 58680/122310 [2:01:47<1:28:19, 12.01it/s][A
 48%|███████████████▎                | 58687/122310 [2:01:48<1:33:26, 11.35it/s][A
 48%|███████████████▎                | 58696/122310 [2:01:48<1:30:49, 11.67it/s][A
 48%|███████████████▎                | 58703/122310 [2:01:49<1:35:36, 11.09it/s][A
 48%|███████████████▎                | 58708/122310 [2:01:50<1:47:33,  9.85it/s][A
 48%|███████████████▎                | 58718/122310 [2:01:51<1:35:37, 11.08it/s][A
 48%|███████████████▎                | 58728/122310 [2:01:51<1:28:58, 11.91it/s][A
 48%|███████████████▎                | 58737/122310 [2:01:52<1:27:57, 12.05it/s][A
 48%|███████████████▎                | 58741/122310 [2:01:53<1:44:37, 10.13it/s][A
 48%|███████████████▎                | 58751/122310 [2:01:54<1:34:22, 11.22it/s][A
 48%|███████████████▎                | 58762/122310 [2:01:54<1:25:15, 12.42

step: 7140, loss: 95.53770179807256, epoch: 0



 48%|███████████████▍                | 58856/122310 [2:02:01<1:10:51, 14.92it/s][A
 48%|███████████████▍                | 58871/122310 [2:02:02<1:22:01, 12.89it/s][A
 48%|███████████████▍                | 58873/122310 [2:02:03<1:41:36, 10.41it/s][A
 48%|███████████████▍                | 58887/122310 [2:02:04<1:22:49, 12.76it/s][A
 48%|███████████████▍                | 58898/122310 [2:02:04<1:18:43, 13.43it/s][A
 48%|███████████████▍                | 58906/122310 [2:02:05<1:22:52, 12.75it/s][A
 48%|███████████████▍                | 58911/122310 [2:02:06<1:34:58, 11.12it/s][A
 48%|███████████████▍                | 58921/122310 [2:02:06<1:28:42, 11.91it/s][A
 48%|███████████████▍                | 58933/122310 [2:02:07<1:19:37, 13.26it/s][A
 48%|███████████████▍                | 58944/122310 [2:02:08<1:16:10, 13.86it/s][A
 48%|███████████████▍                | 58954/122310 [2:02:09<1:16:12, 13.86it/s][A
 48%|███████████████▍                | 58961/122310 [2:02:09<1:23:33, 12.64

step: 7160, loss: 83.01117173541599, epoch: 0



 48%|███████████████▍                | 59019/122310 [2:02:15<1:24:51, 12.43it/s][A
 48%|███████████████▍                | 59026/122310 [2:02:16<1:31:32, 11.52it/s][A
 48%|███████████████▍                | 59029/122310 [2:02:17<1:53:40,  9.28it/s][A
 48%|███████████████▍                | 59035/122310 [2:02:17<1:58:35,  8.89it/s][A
 48%|███████████████▍                | 59041/122310 [2:02:18<2:02:02,  8.64it/s][A
 48%|███████████████▍                | 59052/122310 [2:02:19<1:40:17, 10.51it/s][A
 48%|███████████████▍                | 59067/122310 [2:02:20<1:18:47, 13.38it/s][A
 48%|███████████████▍                | 59080/122310 [2:02:20<1:11:47, 14.68it/s][A
 48%|███████████████▍                | 59092/122310 [2:02:21<1:09:39, 15.13it/s][A
 48%|███████████████▍                | 59104/122310 [2:02:22<1:08:16, 15.43it/s][A
 48%|███████████████▍                | 59113/122310 [2:02:23<1:13:05, 14.41it/s][A
 48%|███████████████▍                | 59122/122310 [2:02:23<1:16:53, 13.70

step: 7180, loss: 96.69413498588139, epoch: 0



 48%|███████████████▍                | 59212/122310 [2:02:30<1:20:01, 13.14it/s][A
 48%|███████████████▍                | 59219/122310 [2:02:31<1:26:59, 12.09it/s][A
 48%|███████████████▍                | 59234/122310 [2:02:32<1:12:19, 14.53it/s][A
 48%|███████████████▍                | 59239/122310 [2:02:32<1:26:08, 12.20it/s][A
 48%|███████████████▌                | 59248/122310 [2:02:33<1:26:13, 12.19it/s][A
 48%|███████████████▌                | 59260/122310 [2:02:34<1:18:30, 13.39it/s][A
 48%|███████████████▌                | 59267/122310 [2:02:34<1:26:00, 12.22it/s][A
 48%|███████████████▌                | 59270/122310 [2:02:35<1:47:26,  9.78it/s][A
 48%|███████████████▌                | 59279/122310 [2:02:36<1:40:30, 10.45it/s][A
 48%|███████████████▌                | 59288/122310 [2:02:37<1:35:37, 10.98it/s][A
 48%|███████████████▌                | 59297/122310 [2:02:37<1:32:40, 11.33it/s][A
 48%|███████████████▌                | 59302/122310 [2:02:38<1:45:26,  9.96

step: 7200, loss: 124.86034944197549, epoch: 0
sim1 and sim2 are 0.6059050687026539, 0.14051198681975904
cosine of pred and queen: 0.030656395382436495
Actual: athens:greece::madrid:spain, pred: europe
Actual: bangkok:thailand::islamabad:pakistan, pred: india
Actual: beijing:china::tokyo:japan, pred: across
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: said
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: cloak
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharas

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: lineker
Actual: canberra:australia::dushanbe:tajikistan, pred: year
Actual: islamabad:pakistan::oslo:norway, pred: market
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: children
Actual: king:queen::husband:wife, pred: spat
Actual: man:woman::brothers:sisters, pred: nidhi
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: government
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: government
Actual: tripura:agartala::odisha:bhubaneswar, pred: cambrics
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: argentina:peso::japan:yen, pred: dollar



 49%|███████████████                | 59364/122310 [2:04:01<46:59:29,  2.69s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.011834319526627219



 49%|███████████████                | 59385/122310 [2:04:02<23:50:49,  1.36s/it][A
 49%|███████████████                | 59388/122310 [2:04:03<22:00:27,  1.26s/it][A
 49%|███████████████                | 59395/122310 [2:04:03<17:12:24,  1.02it/s][A
 49%|███████████████                | 59401/122310 [2:04:04<13:48:21,  1.27it/s][A
 49%|███████████████                | 59409/122310 [2:04:05<10:08:02,  1.72it/s][A
 49%|███████████████▌                | 59415/122310 [2:04:06<8:11:01,  2.13it/s][A
 49%|███████████████▌                | 59417/122310 [2:04:06<7:59:14,  2.19it/s][A
 49%|███████████████▌                | 59426/122310 [2:04:07<5:21:27,  3.26it/s][A
 49%|███████████████▌                | 59437/122310 [2:04:08<3:37:50,  4.81it/s][A
 49%|███████████████▌                | 59443/122310 [2:04:09<3:15:55,  5.35it/s][A
 49%|███████████████▌                | 59444/122310 [2:04:09<3:47:44,  4.60it/s][A
 49%|███████████████▌                | 59448/122310 [2:04:10<3:38:12,  4.80

step: 7220, loss: 95.88598072519217, epoch: 0



 49%|███████████████▌                | 59528/122310 [2:04:16<1:30:20, 11.58it/s][A
 49%|███████████████▌                | 59538/122310 [2:04:17<1:50:44,  9.45it/s][A
 49%|███████████████▌                | 59543/122310 [2:04:18<1:58:36,  8.82it/s][A
 49%|███████████████▌                | 59549/122310 [2:04:19<2:00:57,  8.65it/s][A
 49%|███████████████▌                | 59559/122310 [2:04:20<1:44:23, 10.02it/s][A
 49%|███████████████▌                | 59567/122310 [2:04:20<1:42:00, 10.25it/s][A
 49%|███████████████▌                | 59578/122310 [2:04:21<1:30:02, 11.61it/s][A
 49%|███████████████▌                | 59587/122310 [2:04:22<1:28:21, 11.83it/s][A
 49%|███████████████▌                | 59595/122310 [2:04:22<1:30:32, 11.54it/s][A
 49%|███████████████▌                | 59603/122310 [2:04:23<1:32:09, 11.34it/s][A
 49%|███████████████▌                | 59613/122310 [2:04:24<1:26:52, 12.03it/s][A
 49%|███████████████▌                | 59626/122310 [2:04:25<1:16:00, 13.74

step: 7240, loss: 100.14286527580691, epoch: 0



 49%|███████████████▌                | 59670/122310 [2:04:31<2:39:11,  6.56it/s][A
 49%|███████████████▌                | 59682/122310 [2:04:31<1:49:33,  9.53it/s][A
 49%|███████████████▌                | 59690/122310 [2:04:32<1:44:56,  9.95it/s][A
 49%|███████████████▌                | 59699/122310 [2:04:33<1:37:46, 10.67it/s][A
 49%|███████████████▌                | 59703/122310 [2:04:33<1:54:33,  9.11it/s][A
 49%|███████████████▌                | 59707/122310 [2:04:34<2:10:29,  8.00it/s][A
 49%|███████████████▌                | 59718/122310 [2:04:35<1:44:05, 10.02it/s][A
 49%|███████████████▋                | 59725/122310 [2:04:36<1:45:47,  9.86it/s][A
 49%|███████████████▋                | 59729/122310 [2:04:36<2:01:53,  8.56it/s][A
 49%|███████████████▋                | 59735/122310 [2:04:37<2:03:15,  8.46it/s][A
 49%|███████████████▋                | 59760/122310 [2:04:38<1:04:34, 16.15it/s][A
 49%|███████████████▋                | 59773/122310 [2:04:39<1:02:43, 16.62

step: 7260, loss: 96.7561188100584, epoch: 0



 49%|███████████████▋                | 59859/122310 [2:04:45<1:16:58, 13.52it/s][A
 49%|███████████████▋                | 59874/122310 [2:04:46<1:06:59, 15.53it/s][A
 49%|███████████████▋                | 59879/122310 [2:04:47<1:20:08, 12.98it/s][A
 49%|███████████████▋                | 59884/122310 [2:04:47<1:33:38, 11.11it/s][A
 49%|███████████████▋                | 59891/122310 [2:04:48<1:37:44, 10.64it/s][A
 49%|███████████████▋                | 59903/122310 [2:04:49<1:24:12, 12.35it/s][A
 49%|███████████████▋                | 59917/122310 [2:04:50<1:12:32, 14.33it/s][A
 49%|███████████████▋                | 59924/122310 [2:04:50<1:20:41, 12.89it/s][A
 49%|███████████████▋                | 59931/122310 [2:04:51<1:27:35, 11.87it/s][A
 49%|███████████████▋                | 59939/122310 [2:04:52<1:29:34, 11.61it/s][A
 49%|███████████████▋                | 59947/122310 [2:04:53<1:31:32, 11.35it/s][A
 49%|███████████████▋                | 59955/122310 [2:04:53<1:32:39, 11.22

step: 7280, loss: 97.03250079373109, epoch: 0



 49%|███████████████▋                | 60042/122310 [2:05:00<1:08:26, 15.16it/s][A
 49%|███████████████▋                | 60054/122310 [2:05:01<1:06:51, 15.52it/s][A
 49%|███████████████▋                | 60068/122310 [2:05:01<1:02:32, 16.59it/s][A
 49%|███████████████▋                | 60078/122310 [2:05:02<1:06:14, 15.66it/s][A
 49%|███████████████▋                | 60082/122310 [2:05:03<1:22:11, 12.62it/s][A
 49%|███████████████▋                | 60091/122310 [2:05:04<1:23:04, 12.48it/s][A
 49%|███████████████▋                | 60101/122310 [2:05:04<1:21:10, 12.77it/s][A
 49%|███████████████▋                | 60110/122310 [2:05:05<1:22:02, 12.63it/s][A
 49%|███████████████▋                | 60115/122310 [2:05:06<1:34:48, 10.93it/s][A
 49%|███████████████▋                | 60127/122310 [2:05:07<1:22:33, 12.55it/s][A
 49%|███████████████▋                | 60142/122310 [2:05:07<1:09:40, 14.87it/s][A
 49%|███████████████▋                | 60153/122310 [2:05:08<1:09:27, 14.92

step: 7300, loss: 103.36554731080977, epoch: 0



 49%|███████████████▊                | 60221/122310 [2:05:15<1:45:46,  9.78it/s][A
 49%|███████████████▊                | 60232/122310 [2:05:15<1:32:01, 11.24it/s][A
 49%|███████████████▊                | 60239/122310 [2:05:16<1:36:21, 10.74it/s][A
 49%|███████████████▊                | 60244/122310 [2:05:17<1:47:39,  9.61it/s][A
 49%|███████████████▊                | 60254/122310 [2:05:18<1:36:35, 10.71it/s][A
 49%|███████████████▊                | 60260/122310 [2:05:18<1:43:41,  9.97it/s][A
 49%|███████████████▊                | 60264/122310 [2:05:19<2:00:03,  8.61it/s][A
 49%|███████████████▊                | 60279/122310 [2:05:20<1:25:35, 12.08it/s][A
 49%|███████████████▊                | 60287/122310 [2:05:21<1:28:08, 11.73it/s][A
 49%|███████████████▊                | 60294/122310 [2:05:21<1:33:46, 11.02it/s][A
 49%|███████████████▊                | 60296/122310 [2:05:22<2:00:20,  8.59it/s][A
 49%|███████████████▊                | 60306/122310 [2:05:23<1:42:28, 10.08

step: 7320, loss: 92.84885636588035, epoch: 0



 49%|███████████████▊                | 60388/122310 [2:05:29<1:26:31, 11.93it/s][A
 49%|███████████████▊                | 60393/122310 [2:05:30<1:39:28, 10.37it/s][A
 49%|███████████████▊                | 60396/122310 [2:05:31<2:01:01,  8.53it/s][A
 49%|███████████████▊                | 60407/122310 [2:05:32<1:38:34, 10.47it/s][A
 49%|███████████████▊                | 60424/122310 [2:05:32<1:12:26, 14.24it/s][A
 49%|███████████████▊                | 60435/122310 [2:05:33<1:11:26, 14.44it/s][A
 49%|███████████████▊                | 60442/122310 [2:05:34<1:19:39, 12.95it/s][A
 49%|███████████████▊                | 60449/122310 [2:05:35<1:26:36, 11.91it/s][A
 49%|███████████████▊                | 60465/122310 [2:05:35<1:09:34, 14.82it/s][A
 49%|███████████████▊                | 60473/122310 [2:05:36<1:16:13, 13.52it/s][A
 49%|███████████████▊                | 60475/122310 [2:05:37<1:39:29, 10.36it/s][A
 49%|███████████████▊                | 60485/122310 [2:05:38<1:31:00, 11.32

step: 7340, loss: 83.99152192790781, epoch: 0



 50%|███████████████▊                | 60568/122310 [2:05:44<1:58:16,  8.70it/s][A
 50%|███████████████▊                | 60577/122310 [2:05:45<1:45:07,  9.79it/s][A
 50%|███████████████▊                | 60583/122310 [2:05:46<1:50:41,  9.29it/s][A
 50%|███████████████▊                | 60594/122310 [2:05:46<1:33:41, 10.98it/s][A
 50%|███████████████▊                | 60599/122310 [2:05:47<1:45:49,  9.72it/s][A
 50%|███████████████▊                | 60610/122310 [2:05:48<1:31:37, 11.22it/s][A
 50%|███████████████▊                | 60612/122310 [2:05:49<1:57:45,  8.73it/s][A
 50%|███████████████▊                | 60623/122310 [2:05:49<1:37:07, 10.59it/s][A
 50%|███████████████▊                | 60629/122310 [2:05:50<1:44:32,  9.83it/s][A
 50%|███████████████▊                | 60638/122310 [2:05:51<1:37:28, 10.54it/s][A
 50%|███████████████▊                | 60644/122310 [2:05:52<1:44:36,  9.82it/s][A
 50%|███████████████▊                | 60657/122310 [2:05:52<1:24:33, 12.15

step: 7360, loss: 79.10357632008356, epoch: 0



 50%|███████████████▉                | 60740/122310 [2:05:59<1:42:20, 10.03it/s][A
 50%|███████████████▉                | 60749/122310 [2:06:00<1:36:03, 10.68it/s][A
 50%|███████████████▉                | 60762/122310 [2:06:00<1:20:10, 12.79it/s][A
 50%|███████████████▉                | 60771/122310 [2:06:01<1:21:27, 12.59it/s][A
 50%|███████████████▉                | 60775/122310 [2:06:02<1:38:20, 10.43it/s][A
 50%|███████████████▉                | 60779/122310 [2:06:03<1:54:55,  8.92it/s][A
 50%|███████████████▉                | 60788/122310 [2:06:03<1:43:16,  9.93it/s][A
 50%|███████████████▉                | 60800/122310 [2:06:04<1:26:50, 11.81it/s][A
 50%|███████████████▉                | 60810/122310 [2:06:05<1:23:28, 12.28it/s][A
 50%|███████████████▉                | 60818/122310 [2:06:06<1:26:24, 11.86it/s][A
 50%|███████████████▉                | 60828/122310 [2:06:06<1:23:04, 12.34it/s][A
 50%|███████████████▉                | 60833/122310 [2:06:07<1:36:05, 10.66

step: 7380, loss: 97.29797150542487, epoch: 0



 50%|███████████████▉                | 60915/122310 [2:06:14<1:54:18,  8.95it/s][A
 50%|███████████████▉                | 60923/122310 [2:06:14<1:47:09,  9.55it/s][A
 50%|███████████████▉                | 60931/122310 [2:06:15<1:42:48,  9.95it/s][A
 50%|███████████████▉                | 60938/122310 [2:06:16<1:44:11,  9.82it/s][A
 50%|███████████████▉                | 60945/122310 [2:06:17<1:45:06,  9.73it/s][A
 50%|███████████████▉                | 60960/122310 [2:06:17<1:19:19, 12.89it/s][A
 50%|███████████████▉                | 60968/122310 [2:06:18<1:23:07, 12.30it/s][A
 50%|███████████████▉                | 60979/122310 [2:06:19<1:18:06, 13.09it/s][A
 50%|███████████████▉                | 60988/122310 [2:06:20<1:20:00, 12.77it/s][A
 50%|███████████████▉                | 60997/122310 [2:06:20<1:20:41, 12.66it/s][A
 50%|███████████████▉                | 61006/122310 [2:06:21<1:21:42, 12.51it/s][A
 50%|███████████████▉                | 61011/122310 [2:06:22<1:34:36, 10.80

step: 7400, loss: 107.42047966075967, epoch: 0



 50%|███████████████▉                | 61089/122310 [2:06:28<1:26:53, 11.74it/s][A
 50%|███████████████▉                | 61097/122310 [2:06:29<1:28:42, 11.50it/s][A
 50%|███████████████▉                | 61105/122310 [2:06:30<1:30:11, 11.31it/s][A
 50%|███████████████▉                | 61120/122310 [2:06:31<1:12:58, 13.98it/s][A
 50%|███████████████▉                | 61134/122310 [2:06:31<1:06:03, 15.43it/s][A
 50%|███████████████▉                | 61140/122310 [2:06:32<1:16:44, 13.28it/s][A
 50%|███████████████▉                | 61148/122310 [2:06:33<1:21:00, 12.58it/s][A
 50%|███████████████▉                | 61151/122310 [2:06:34<1:41:25, 10.05it/s][A
 50%|████████████████                | 61167/122310 [2:06:34<1:15:19, 13.53it/s][A
 50%|████████████████                | 61180/122310 [2:06:35<1:08:55, 14.78it/s][A
 50%|████████████████                | 61185/122310 [2:06:36<1:22:25, 12.36it/s][A
 50%|████████████████                | 61189/122310 [2:06:37<1:39:38, 10.22

step: 7420, loss: 99.28278797761428, epoch: 0



 50%|████████████████                | 61255/122310 [2:06:43<1:24:54, 11.98it/s][A
 50%|████████████████                | 61263/122310 [2:06:44<1:27:24, 11.64it/s][A
 50%|████████████████                | 61271/122310 [2:06:45<1:28:55, 11.44it/s][A
 50%|████████████████                | 61274/122310 [2:06:45<1:50:23,  9.21it/s][A
 50%|████████████████                | 61281/122310 [2:06:46<1:49:49,  9.26it/s][A
 50%|████████████████                | 61291/122310 [2:06:47<1:36:49, 10.50it/s][A
 50%|████████████████                | 61294/122310 [2:06:48<1:58:28,  8.58it/s][A
 50%|████████████████                | 61303/122310 [2:06:48<1:45:08,  9.67it/s][A
 50%|████████████████                | 61314/122310 [2:06:49<1:30:21, 11.25it/s][A
 50%|████████████████                | 61320/122310 [2:06:50<1:38:37, 10.31it/s][A
 50%|████████████████                | 61332/122310 [2:06:51<1:24:25, 12.04it/s][A
 50%|████████████████                | 61340/122310 [2:06:51<1:26:45, 11.71

step: 7440, loss: 82.97099233161138, epoch: 0



 50%|████████████████                | 61421/122310 [2:06:58<1:23:40, 12.13it/s][A
 50%|████████████████                | 61438/122310 [2:06:59<1:05:50, 15.41it/s][A
 50%|████████████████                | 61448/122310 [2:06:59<1:08:17, 14.85it/s][A
 50%|████████████████                | 61457/122310 [2:07:00<1:12:13, 14.04it/s][A
 50%|████████████████                | 61464/122310 [2:07:01<1:20:09, 12.65it/s][A
 50%|████████████████                | 61473/122310 [2:07:02<1:20:42, 12.56it/s][A
 50%|████████████████                | 61478/122310 [2:07:02<1:33:48, 10.81it/s][A
 50%|████████████████                | 61488/122310 [2:07:03<1:27:27, 11.59it/s][A
 50%|████████████████                | 61502/122310 [2:07:04<1:13:17, 13.83it/s][A
 50%|████████████████                | 61506/122310 [2:07:05<1:29:43, 11.29it/s][A
 50%|████████████████                | 61513/122310 [2:07:05<1:34:12, 10.76it/s][A
 50%|████████████████                | 61531/122310 [2:07:06<1:08:31, 14.78

step: 7460, loss: 94.51498127324805, epoch: 0



 50%|████████████████                | 61607/122310 [2:07:13<1:34:40, 10.69it/s][A
 50%|████████████████                | 61613/122310 [2:07:14<1:42:16,  9.89it/s][A
 50%|████████████████                | 61618/122310 [2:07:14<1:52:22,  9.00it/s][A
 50%|████████████████                | 61624/122310 [2:07:15<1:55:36,  8.75it/s][A
 50%|████████████████                | 61630/122310 [2:07:16<1:58:13,  8.55it/s][A
 50%|████████████████▏               | 61638/122310 [2:07:17<2:21:52,  7.13it/s][A
 50%|████████████████▏               | 61643/122310 [2:07:18<2:23:40,  7.04it/s][A
 50%|████████████████▏               | 61655/122310 [2:07:19<1:46:44,  9.47it/s][A
 50%|████████████████▏               | 61662/122310 [2:07:19<1:46:41,  9.47it/s][A
 50%|████████████████▏               | 61673/122310 [2:07:20<1:31:53, 11.00it/s][A
 50%|████████████████▏               | 61679/122310 [2:07:21<1:39:23, 10.17it/s][A
 50%|████████████████▏               | 61688/122310 [2:07:22<1:33:55, 10.76

step: 7480, loss: 87.44849082691944, epoch: 0



 50%|████████████████▏               | 61754/122310 [2:07:28<1:33:28, 10.80it/s][A
 50%|████████████████▏               | 61756/122310 [2:07:28<2:00:19,  8.39it/s][A
 50%|████████████████▏               | 61765/122310 [2:07:29<1:45:31,  9.56it/s][A
 51%|████████████████▏               | 61777/122310 [2:07:30<1:27:15, 11.56it/s][A
 51%|████████████████▏               | 61781/122310 [2:07:30<1:43:49,  9.72it/s][A
 51%|████████████████▏               | 61795/122310 [2:07:31<1:20:48, 12.48it/s][A
 51%|████████████████▏               | 61808/122310 [2:07:32<1:12:11, 13.97it/s][A
 51%|████████████████▏               | 61823/122310 [2:07:33<1:03:28, 15.88it/s][A
 51%|████████████████▏               | 61828/122310 [2:07:33<1:16:41, 13.14it/s][A
 51%|████████████████▏               | 61832/122310 [2:07:35<2:01:14,  8.31it/s][A
 51%|████████████████▏               | 61843/122310 [2:07:36<1:40:52,  9.99it/s][A
 51%|████████████████▏               | 61850/122310 [2:07:36<1:42:14,  9.86

step: 7500, loss: 97.5413340200812, epoch: 0



 51%|████████████████▏               | 61912/122310 [2:07:42<1:37:24, 10.33it/s][A
 51%|████████████████▏               | 61919/122310 [2:07:43<1:39:43, 10.09it/s][A
 51%|████████████████▏               | 61928/122310 [2:07:44<1:33:54, 10.72it/s][A
 51%|████████████████▏               | 61942/122310 [2:07:45<1:16:14, 13.20it/s][A
 51%|████████████████▏               | 61960/122310 [2:07:45<1:00:49, 16.54it/s][A
 51%|█████████████████▏                | 61973/122310 [2:07:46<59:40, 16.85it/s][A
 51%|████████████████▏               | 61984/122310 [2:07:47<1:03:02, 15.95it/s][A
 51%|████████████████▏               | 61989/122310 [2:07:48<1:16:42, 13.11it/s][A
 51%|████████████████▏               | 61991/122310 [2:07:48<1:39:50, 10.07it/s][A
 51%|████████████████▏               | 61996/122310 [2:07:49<1:51:07,  9.05it/s][A
 51%|████████████████▏               | 62003/122310 [2:07:50<1:49:44,  9.16it/s][A
 51%|████████████████▏               | 62012/122310 [2:07:51<1:40:20, 10.02

step: 7520, loss: 100.43615579482925, epoch: 0



 51%|████████████████▏               | 62094/122310 [2:07:57<1:15:34, 13.28it/s][A
 51%|████████████████▏               | 62103/122310 [2:07:58<1:17:13, 12.99it/s][A
 51%|████████████████▏               | 62109/122310 [2:07:59<1:26:33, 11.59it/s][A
 51%|████████████████▎               | 62118/122310 [2:07:59<1:25:13, 11.77it/s][A
 51%|████████████████▎               | 62127/122310 [2:08:00<1:24:26, 11.88it/s][A
 51%|████████████████▎               | 62133/122310 [2:08:01<1:33:36, 10.71it/s][A
 51%|████████████████▎               | 62141/122310 [2:08:02<1:32:37, 10.83it/s][A
 51%|████████████████▎               | 62150/122310 [2:08:02<1:29:13, 11.24it/s][A
 51%|████████████████▎               | 62160/122310 [2:08:03<1:24:04, 11.92it/s][A
 51%|████████████████▎               | 62163/122310 [2:08:04<1:44:50,  9.56it/s][A
 51%|████████████████▎               | 62168/122310 [2:08:05<1:54:33,  8.75it/s][A
 51%|████████████████▎               | 62182/122310 [2:08:05<1:24:53, 11.81

step: 7540, loss: 106.94503087176457, epoch: 0



 51%|████████████████▎               | 62248/122310 [2:08:12<2:02:01,  8.20it/s][A
 51%|████████████████▎               | 62259/122310 [2:08:13<1:38:10, 10.20it/s][A
 51%|████████████████▎               | 62262/122310 [2:08:13<1:59:20,  8.39it/s][A
 51%|████████████████▎               | 62275/122310 [2:08:14<1:29:41, 11.16it/s][A
 51%|████████████████▎               | 62286/122310 [2:08:15<1:21:41, 12.25it/s][A
 51%|████████████████▎               | 62295/122310 [2:08:16<1:21:51, 12.22it/s][A
 51%|████████████████▎               | 62299/122310 [2:08:16<1:38:07, 10.19it/s][A
 51%|████████████████▎               | 62308/122310 [2:08:17<1:32:33, 10.80it/s][A
 51%|████████████████▎               | 62312/122310 [2:08:18<1:48:43,  9.20it/s][A
 51%|████████████████▎               | 62326/122310 [2:08:19<1:22:30, 12.12it/s][A
 51%|████████████████▎               | 62340/122310 [2:08:19<1:10:34, 14.16it/s][A
 51%|████████████████▎               | 62344/122310 [2:08:20<1:26:41, 11.53

step: 7560, loss: 87.39620065539883, epoch: 0



 51%|████████████████▎               | 62415/122310 [2:08:27<1:20:21, 12.42it/s][A
 51%|████████████████▎               | 62423/122310 [2:08:27<1:23:17, 11.98it/s][A
 51%|████████████████▎               | 62435/122310 [2:08:28<1:15:09, 13.28it/s][A
 51%|████████████████▎               | 62439/122310 [2:08:29<1:31:33, 10.90it/s][A
 51%|████████████████▎               | 62449/122310 [2:08:30<1:25:08, 11.72it/s][A
 51%|████████████████▎               | 62455/122310 [2:08:30<1:33:43, 10.64it/s][A
 51%|████████████████▎               | 62459/122310 [2:08:31<1:50:01,  9.07it/s][A
 51%|████████████████▎               | 62472/122310 [2:08:32<1:26:08, 11.58it/s][A
 51%|████████████████▎               | 62474/122310 [2:08:33<1:51:19,  8.96it/s][A
 51%|████████████████▎               | 62480/122310 [2:08:33<1:54:41,  8.69it/s][A
 51%|████████████████▎               | 62486/122310 [2:08:34<1:56:54,  8.53it/s][A
 51%|████████████████▎               | 62490/122310 [2:08:35<2:11:57,  7.56

step: 7580, loss: 111.93278887000771, epoch: 0



 51%|████████████████▎               | 62572/122310 [2:08:42<1:08:14, 14.59it/s][A
 51%|████████████████▎               | 62582/122310 [2:08:42<1:09:29, 14.33it/s][A
 51%|████████████████▍               | 62594/122310 [2:08:43<1:06:45, 14.91it/s][A
 51%|████████████████▍               | 62606/122310 [2:08:44<1:05:05, 15.29it/s][A
 51%|████████████████▍               | 62609/122310 [2:08:44<1:23:24, 11.93it/s][A
 51%|████████████████▍               | 62617/122310 [2:08:45<1:25:59, 11.57it/s][A
 51%|████████████████▍               | 62623/122310 [2:08:46<1:34:19, 10.55it/s][A
 51%|████████████████▍               | 62630/122310 [2:08:47<1:37:15, 10.23it/s][A
 51%|████████████████▍               | 62639/122310 [2:08:47<1:32:17, 10.78it/s][A
 51%|████████████████▍               | 62647/122310 [2:08:48<1:32:10, 10.79it/s][A
 51%|████████████████▍               | 62658/122310 [2:08:49<1:23:15, 11.94it/s][A
 51%|████████████████▍               | 62672/122310 [2:08:50<1:11:01, 13.99

step: 7600, loss: 99.18903093193319, epoch: 0
sim1 and sim2 are 0.6045503032425891, 0.19889009503106453
cosine of pred and queen: 0.10380717694756196
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: economic
Actual: beijing:china::tokyo:japan, pred: south
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: enver
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: year
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: broke
Actual: lisbon:portugal::riga:latvia, pred: coerce
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharastra:mumb


 51%|████████████████▍               | 62773/122310 [2:09:12<1:02:32, 15.86it/s][A

Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: europe
Actual: russia:russian::germany:german, pred: italy
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: says
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: french
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.08888888888888889
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: misrule
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: earth
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: baltics
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays:


 51%|███████████████▉               | 62779/122310 [2:10:11<38:37:20,  2.34s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.023668639053254437



 51%|███████████████▉               | 62787/122310 [2:10:12<29:09:05,  1.76s/it][A
 51%|███████████████▉               | 62800/122310 [2:10:13<18:38:58,  1.13s/it][A
 51%|███████████████▉               | 62809/122310 [2:10:13<13:59:22,  1.18it/s][A
 51%|███████████████▉               | 62816/122310 [2:10:14<11:09:50,  1.48it/s][A
 51%|████████████████▍               | 62824/122310 [2:10:15<8:31:35,  1.94it/s][A
 51%|████████████████▍               | 62831/122310 [2:10:15<6:47:50,  2.43it/s][A
 51%|████████████████▍               | 62848/122310 [2:10:16<3:56:27,  4.19it/s][A
 51%|████████████████▍               | 62855/122310 [2:10:17<3:27:21,  4.78it/s][A
 51%|████████████████▍               | 62863/122310 [2:10:18<2:56:54,  5.60it/s][A
 51%|████████████████▍               | 62872/122310 [2:10:18<2:28:03,  6.69it/s][A
 51%|████████████████▍               | 62878/122310 [2:10:19<2:21:50,  6.98it/s][A
 51%|████████████████▍               | 62886/122310 [2:10:20<2:06:37,  7.82

step: 7620, loss: 81.35439270217212, epoch: 0



 51%|████████████████▍               | 62962/122310 [2:10:26<1:14:28, 13.28it/s][A
 51%|████████████████▍               | 62967/122310 [2:10:27<1:26:33, 11.43it/s][A
 51%|████████████████▍               | 62971/122310 [2:10:28<1:42:59,  9.60it/s][A
 51%|████████████████▍               | 62972/122310 [2:10:29<2:17:25,  7.20it/s][A
 51%|████████████████▍               | 62979/122310 [2:10:29<2:05:29,  7.88it/s][A
 51%|████████████████▍               | 62981/122310 [2:10:30<2:35:22,  6.36it/s][A
 51%|████████████████▍               | 62987/122310 [2:10:31<2:22:46,  6.93it/s][A
 52%|████████████████▍               | 62997/122310 [2:10:31<1:50:09,  8.97it/s][A
 52%|████████████████▍               | 63001/122310 [2:10:32<2:04:38,  7.93it/s][A
 52%|████████████████▍               | 63018/122310 [2:10:33<1:18:53, 12.53it/s][A
 52%|████████████████▍               | 63028/122310 [2:10:34<1:16:35, 12.90it/s][A
 52%|████████████████▍               | 63034/122310 [2:10:34<1:26:01, 11.48

step: 7640, loss: 85.86772961830476, epoch: 0



 52%|████████████████▌               | 63121/122310 [2:10:41<1:11:01, 13.89it/s][A
 52%|████████████████▌               | 63128/122310 [2:10:42<1:18:03, 12.64it/s][A
 52%|████████████████▌               | 63142/122310 [2:10:42<1:07:27, 14.62it/s][A
 52%|████████████████▌               | 63149/122310 [2:10:43<1:15:11, 13.11it/s][A
 52%|████████████████▌               | 63157/122310 [2:10:44<1:18:56, 12.49it/s][A
 52%|████████████████▌               | 63170/122310 [2:10:45<1:10:05, 14.06it/s][A
 52%|████████████████▌               | 63181/122310 [2:10:45<1:08:32, 14.38it/s][A
 52%|████████████████▌               | 63192/122310 [2:10:46<1:07:31, 14.59it/s][A
 52%|████████████████▌               | 63199/122310 [2:10:47<1:15:24, 13.06it/s][A
 52%|████████████████▌               | 63206/122310 [2:10:48<1:22:32, 11.93it/s][A
 52%|████████████████▌               | 63210/122310 [2:10:48<1:38:19, 10.02it/s][A
 52%|████████████████▌               | 63221/122310 [2:10:49<1:25:28, 11.52

step: 7660, loss: 80.53223631796769, epoch: 0



 52%|████████████████▌               | 63271/122310 [2:10:56<2:23:08,  6.87it/s][A
 52%|████████████████▌               | 63285/122310 [2:10:56<1:33:09, 10.56it/s][A
 52%|████████████████▌               | 63292/122310 [2:10:57<1:35:43, 10.28it/s][A
 52%|████████████████▌               | 63301/122310 [2:10:58<1:30:27, 10.87it/s][A
 52%|████████████████▌               | 63308/122310 [2:10:58<1:33:25, 10.53it/s][A
 52%|████████████████▌               | 63321/122310 [2:10:59<1:17:39, 12.66it/s][A
 52%|████████████████▌               | 63327/122310 [2:11:00<1:26:30, 11.36it/s][A
 52%|████████████████▌               | 63333/122310 [2:11:01<1:34:56, 10.35it/s][A
 52%|████████████████▌               | 63335/122310 [2:11:01<2:01:09,  8.11it/s][A
 52%|████████████████▌               | 63343/122310 [2:11:02<1:49:37,  8.96it/s][A
 52%|████████████████▌               | 63353/122310 [2:11:03<1:34:40, 10.38it/s][A
 52%|████████████████▌               | 63357/122310 [2:11:04<1:50:30,  8.89

step: 7680, loss: 104.78382258106504, epoch: 0



 52%|████████████████▌               | 63407/122310 [2:11:10<2:22:21,  6.90it/s][A
 52%|████████████████▌               | 63414/122310 [2:11:11<2:09:17,  7.59it/s][A
 52%|████████████████▌               | 63421/122310 [2:11:12<2:00:55,  8.12it/s][A
 52%|████████████████▌               | 63427/122310 [2:11:13<2:34:29,  6.35it/s][A
 52%|████████████████▌               | 63433/122310 [2:11:14<2:24:08,  6.81it/s][A
 52%|████████████████▌               | 63445/122310 [2:11:15<1:46:17,  9.23it/s][A
 52%|████████████████▌               | 63452/122310 [2:11:15<1:45:05,  9.33it/s][A
 52%|████████████████▌               | 63460/122310 [2:11:16<1:40:24,  9.77it/s][A
 52%|████████████████▌               | 63467/122310 [2:11:17<1:40:50,  9.73it/s][A
 52%|████████████████▌               | 63471/122310 [2:11:18<1:56:14,  8.44it/s][A
 52%|████████████████▌               | 63474/122310 [2:11:18<2:16:25,  7.19it/s][A
 52%|████████████████▌               | 63482/122310 [2:11:19<1:57:50,  8.32

step: 7700, loss: 110.3384053151117, epoch: 0



 52%|████████████████▋               | 63554/122310 [2:11:25<1:42:59,  9.51it/s][A
 52%|████████████████▋               | 63559/122310 [2:11:26<1:52:17,  8.72it/s][A
 52%|████████████████▋               | 63566/122310 [2:11:26<1:49:10,  8.97it/s][A
 52%|████████████████▋               | 63573/122310 [2:11:27<1:47:04,  9.14it/s][A
 52%|████████████████▋               | 63587/122310 [2:11:28<1:20:40, 12.13it/s][A
 52%|████████████████▋               | 63597/122310 [2:11:29<1:17:39, 12.60it/s][A
 52%|████████████████▋               | 63605/122310 [2:11:29<1:21:08, 12.06it/s][A
 52%|████████████████▋               | 63618/122310 [2:11:30<1:10:56, 13.79it/s][A
 52%|████████████████▋               | 63626/122310 [2:11:31<1:15:52, 12.89it/s][A
 52%|████████████████▋               | 63631/122310 [2:11:31<1:28:12, 11.09it/s][A
 52%|████████████████▋               | 63642/122310 [2:11:32<1:19:58, 12.23it/s][A
 52%|████████████████▋               | 63644/122310 [2:11:33<1:44:12,  9.38

step: 7720, loss: 86.8511894773673, epoch: 0



 52%|████████████████▋               | 63725/122310 [2:11:40<1:33:19, 10.46it/s][A
 52%|████████████████▋               | 63737/122310 [2:11:40<1:19:52, 12.22it/s][A
 52%|████████████████▋               | 63746/122310 [2:11:41<1:19:32, 12.27it/s][A
 52%|████████████████▋               | 63757/122310 [2:11:42<1:14:39, 13.07it/s][A
 52%|████████████████▋               | 63763/122310 [2:11:43<1:49:07,  8.94it/s][A
 52%|████████████████▋               | 63770/122310 [2:11:44<1:47:45,  9.05it/s][A
 52%|████████████████▋               | 63774/122310 [2:11:45<1:59:36,  8.16it/s][A
 52%|████████████████▋               | 63782/122310 [2:11:46<2:19:24,  7.00it/s][A
 52%|████████████████▋               | 63788/122310 [2:11:47<2:14:27,  7.25it/s][A
 52%|████████████████▋               | 63791/122310 [2:11:48<2:31:31,  6.44it/s][A
 52%|████████████████▋               | 63802/122310 [2:11:48<1:52:15,  8.69it/s][A
 52%|████████████████▋               | 63808/122310 [2:11:49<1:54:19,  8.53

step: 7740, loss: 98.64119642731585, epoch: 0



 52%|████████████████▋               | 63887/122310 [2:11:54<1:10:03, 13.90it/s][A
 52%|████████████████▋               | 63900/122310 [2:11:55<1:04:38, 15.06it/s][A
 52%|████████████████▋               | 63904/122310 [2:11:56<1:19:48, 12.20it/s][A
 52%|████████████████▋               | 63916/122310 [2:11:56<1:12:20, 13.45it/s][A
 52%|████████████████▋               | 63925/122310 [2:11:57<1:14:18, 13.09it/s][A
 52%|████████████████▋               | 63934/122310 [2:11:58<1:15:41, 12.85it/s][A
 52%|████████████████▋               | 63941/122310 [2:11:59<1:22:01, 11.86it/s][A
 52%|████████████████▋               | 63952/122310 [2:11:59<1:17:57, 12.48it/s][A
 52%|████████████████▋               | 63957/122310 [2:12:00<1:33:40, 10.38it/s][A
 52%|████████████████▋               | 63971/122310 [2:12:01<1:16:43, 12.67it/s][A
 52%|████████████████▋               | 63979/122310 [2:12:02<1:20:12, 12.12it/s][A
 52%|████████████████▋               | 63988/122310 [2:12:03<1:20:13, 12.12

step: 7760, loss: 82.9962402282214, epoch: 0



 52%|████████████████▊               | 64048/122310 [2:12:09<2:02:11,  7.95it/s][A
 52%|████████████████▊               | 64058/122310 [2:12:10<1:40:52,  9.62it/s][A
 52%|████████████████▊               | 64062/122310 [2:12:11<1:55:53,  8.38it/s][A
 52%|████████████████▊               | 64074/122310 [2:12:11<1:30:05, 10.77it/s][A
 52%|████████████████▊               | 64080/122310 [2:12:12<1:37:21,  9.97it/s][A
 52%|████████████████▊               | 64088/122310 [2:12:13<1:34:41, 10.25it/s][A
 52%|████████████████▊               | 64094/122310 [2:12:14<1:40:59,  9.61it/s][A
 52%|████████████████▊               | 64103/122310 [2:12:14<1:33:10, 10.41it/s][A
 52%|████████████████▊               | 64111/122310 [2:12:15<1:31:51, 10.56it/s][A
 52%|████████████████▊               | 64118/122310 [2:12:16<1:34:39, 10.25it/s][A
 52%|████████████████▊               | 64122/122310 [2:12:17<1:50:03,  8.81it/s][A
 52%|████████████████▊               | 64131/122310 [2:12:17<1:38:38,  9.83

step: 7780, loss: 87.72989246380985, epoch: 0



 52%|████████████████▊               | 64205/122310 [2:12:24<1:35:09, 10.18it/s][A
 52%|████████████████▊               | 64209/122310 [2:12:25<1:50:27,  8.77it/s][A
 53%|████████████████▊               | 64214/122310 [2:12:25<1:58:14,  8.19it/s][A
 53%|████████████████▊               | 64229/122310 [2:12:26<1:21:41, 11.85it/s][A
 53%|████████████████▊               | 64240/122310 [2:12:27<1:15:33, 12.81it/s][A
 53%|████████████████▊               | 64246/122310 [2:12:28<1:24:49, 11.41it/s][A
 53%|████████████████▊               | 64257/122310 [2:12:28<1:17:33, 12.47it/s][A
 53%|████████████████▊               | 64267/122310 [2:12:29<1:15:13, 12.86it/s][A
 53%|████████████████▊               | 64273/122310 [2:12:30<1:24:30, 11.45it/s][A
 53%|████████████████▊               | 64281/122310 [2:12:31<1:25:39, 11.29it/s][A
 53%|████████████████▊               | 64294/122310 [2:12:31<1:13:11, 13.21it/s][A
 53%|█████████████████▉                | 64312/122310 [2:12:32<58:11, 16.61

step: 7800, loss: 87.58845027221123, epoch: 0



 53%|████████████████▊               | 64385/122310 [2:12:39<1:16:49, 12.57it/s][A
 53%|████████████████▊               | 64394/122310 [2:12:39<1:17:22, 12.47it/s][A
 53%|████████████████▊               | 64399/122310 [2:12:40<1:29:52, 10.74it/s][A
 53%|████████████████▊               | 64410/122310 [2:12:41<1:20:38, 11.97it/s][A
 53%|████████████████▊               | 64418/122310 [2:12:42<1:22:52, 11.64it/s][A
 53%|████████████████▊               | 64433/122310 [2:12:42<1:07:48, 14.23it/s][A
 53%|████████████████▊               | 64436/122310 [2:12:43<1:26:08, 11.20it/s][A
 53%|████████████████▊               | 64441/122310 [2:12:44<1:37:41,  9.87it/s][A
 53%|████████████████▊               | 64444/122310 [2:12:45<1:58:16,  8.15it/s][A
 53%|████████████████▊               | 64453/122310 [2:12:45<1:42:46,  9.38it/s][A
 53%|████████████████▊               | 64460/122310 [2:12:46<1:42:26,  9.41it/s][A
 53%|████████████████▊               | 64465/122310 [2:12:47<1:51:51,  8.62

step: 7820, loss: 88.00908568939026, epoch: 0



 53%|████████████████▉               | 64532/122310 [2:12:53<1:24:03, 11.46it/s][A
 53%|████████████████▉               | 64543/122310 [2:12:54<1:17:24, 12.44it/s][A
 53%|████████████████▉               | 64551/122310 [2:12:55<1:20:01, 12.03it/s][A
 53%|████████████████▉               | 64556/122310 [2:12:56<1:32:04, 10.45it/s][A
 53%|████████████████▉               | 64565/122310 [2:12:56<1:27:57, 10.94it/s][A
 53%|████████████████▉               | 64571/122310 [2:12:57<1:35:00, 10.13it/s][A
 53%|████████████████▉               | 64579/122310 [2:12:58<1:32:58, 10.35it/s][A
 53%|████████████████▉               | 64583/122310 [2:12:59<1:48:17,  8.89it/s][A
 53%|████████████████▉               | 64598/122310 [2:12:59<1:18:30, 12.25it/s][A
 53%|████████████████▉               | 64606/122310 [2:13:00<1:20:55, 11.88it/s][A
 53%|████████████████▉               | 64614/122310 [2:13:01<1:23:08, 11.57it/s][A
 53%|████████████████▉               | 64624/122310 [2:13:01<1:19:15, 12.13

step: 7840, loss: 93.8163585509928, epoch: 0



 53%|████████████████▉               | 64697/122310 [2:13:08<1:14:13, 12.94it/s][A
 53%|████████████████▉               | 64706/122310 [2:13:09<1:15:21, 12.74it/s][A
 53%|████████████████▉               | 64710/122310 [2:13:10<1:31:24, 10.50it/s][A
 53%|████████████████▉               | 64713/122310 [2:13:10<1:51:17,  8.63it/s][A
 53%|████████████████▉               | 64719/122310 [2:13:11<1:52:57,  8.50it/s][A
 53%|████████████████▉               | 64733/122310 [2:13:12<1:22:39, 11.61it/s][A
 53%|████████████████▉               | 64741/122310 [2:13:13<1:24:02, 11.42it/s][A
 53%|████████████████▉               | 64753/122310 [2:13:13<1:14:41, 12.84it/s][A
 53%|████████████████▉               | 64763/122310 [2:13:14<1:13:13, 13.10it/s][A
 53%|████████████████▉               | 64772/122310 [2:13:15<1:14:52, 12.81it/s][A
 53%|████████████████▉               | 64775/122310 [2:13:16<1:34:29, 10.15it/s][A
 53%|████████████████▉               | 64781/122310 [2:13:16<1:40:26,  9.55

step: 7860, loss: 85.76007552054409, epoch: 0



 53%|████████████████▉               | 64868/122310 [2:13:23<1:23:22, 11.48it/s][A
 53%|████████████████▉               | 64873/122310 [2:13:24<1:34:59, 10.08it/s][A
 53%|████████████████▉               | 64894/122310 [2:13:24<1:01:26, 15.57it/s][A
 53%|████████████████▉               | 64904/122310 [2:13:25<1:03:56, 14.96it/s][A
 53%|████████████████▉               | 64912/122310 [2:13:26<1:09:44, 13.72it/s][A
 53%|████████████████▉               | 64923/122310 [2:13:27<1:08:21, 13.99it/s][A
 53%|████████████████▉               | 64926/122310 [2:13:27<1:26:27, 11.06it/s][A
 53%|████████████████▉               | 64933/122310 [2:13:28<1:30:15, 10.59it/s][A
 53%|████████████████▉               | 64947/122310 [2:13:29<1:13:02, 13.09it/s][A
 53%|████████████████▉               | 64951/122310 [2:13:30<1:28:27, 10.81it/s][A
 53%|████████████████▉               | 64958/122310 [2:13:30<1:31:48, 10.41it/s][A
 53%|████████████████▉               | 64963/122310 [2:13:31<1:42:42,  9.31

step: 7880, loss: 97.14145139425072, epoch: 0



 53%|█████████████████               | 65037/122310 [2:13:38<1:37:00,  9.84it/s][A
 53%|█████████████████               | 65046/122310 [2:13:38<1:30:31, 10.54it/s][A
 53%|█████████████████               | 65055/122310 [2:13:39<1:26:15, 11.06it/s][A
 53%|█████████████████               | 65065/122310 [2:13:40<1:20:41, 11.82it/s][A
 53%|█████████████████               | 65069/122310 [2:13:41<1:36:21,  9.90it/s][A
 53%|█████████████████               | 65086/122310 [2:13:41<1:09:10, 13.79it/s][A
 53%|█████████████████               | 65096/122310 [2:13:42<1:09:12, 13.78it/s][A
 53%|█████████████████               | 65101/122310 [2:13:43<1:21:28, 11.70it/s][A
 53%|█████████████████               | 65114/122310 [2:13:44<1:10:40, 13.49it/s][A
 53%|█████████████████               | 65124/122310 [2:13:44<1:10:42, 13.48it/s][A
 53%|█████████████████               | 65138/122310 [2:13:45<1:02:53, 15.15it/s][A
 53%|█████████████████               | 65140/122310 [2:13:46<1:23:20, 11.43

step: 7900, loss: 94.646165974213, epoch: 0



 53%|█████████████████               | 65218/122310 [2:13:52<1:26:42, 10.97it/s][A
 53%|█████████████████               | 65220/122310 [2:13:53<1:52:12,  8.48it/s][A
 53%|█████████████████               | 65227/122310 [2:13:54<1:47:56,  8.81it/s][A
 53%|█████████████████               | 65234/122310 [2:13:55<1:45:20,  9.03it/s][A
 53%|█████████████████               | 65248/122310 [2:13:55<1:19:00, 12.04it/s][A
 53%|█████████████████               | 65252/122310 [2:13:56<1:34:32, 10.06it/s][A
 53%|█████████████████               | 65261/122310 [2:13:57<1:28:51, 10.70it/s][A
 53%|█████████████████               | 65274/122310 [2:13:58<1:14:17, 12.80it/s][A
 53%|█████████████████               | 65277/122310 [2:13:58<1:33:32, 10.16it/s][A
 53%|█████████████████               | 65279/122310 [2:13:59<1:59:35,  7.95it/s][A
 53%|█████████████████               | 65289/122310 [2:14:00<1:38:48,  9.62it/s][A
 53%|█████████████████               | 65298/122310 [2:14:01<1:31:26, 10.39

step: 7920, loss: 67.1232922786284, epoch: 0



 53%|█████████████████               | 65370/122310 [2:14:07<1:55:50,  8.19it/s][A
 53%|█████████████████               | 65385/122310 [2:14:08<1:20:07, 11.84it/s][A
 53%|█████████████████               | 65390/122310 [2:14:09<1:31:54, 10.32it/s][A
 53%|█████████████████               | 65400/122310 [2:14:09<1:23:52, 11.31it/s][A
 53%|█████████████████               | 65409/122310 [2:14:10<1:22:10, 11.54it/s][A
 53%|█████████████████               | 65412/122310 [2:14:11<1:41:35,  9.33it/s][A
 53%|█████████████████               | 65418/122310 [2:14:12<1:45:35,  8.98it/s][A
 53%|█████████████████               | 65432/122310 [2:14:15<2:29:42,  6.33it/s][A
 53%|█████████████████               | 65434/122310 [2:14:15<2:46:50,  5.68it/s][A
 54%|█████████████████               | 65443/122310 [2:14:16<2:14:29,  7.05it/s][A
 54%|█████████████████▏              | 65456/122310 [2:14:17<1:39:48,  9.49it/s][A
 54%|█████████████████▏              | 65463/122310 [2:14:17<1:39:55,  9.48

step: 7940, loss: 97.09837843082117, epoch: 0



 54%|█████████████████▏              | 65523/122310 [2:14:22<1:05:32, 14.44it/s][A
 54%|█████████████████▏              | 65529/122310 [2:14:23<1:15:03, 12.61it/s][A
 54%|█████████████████▏              | 65534/122310 [2:14:23<1:26:51, 10.90it/s][A
 54%|█████████████████▏              | 65541/122310 [2:14:24<1:30:15, 10.48it/s][A
 54%|█████████████████▏              | 65547/122310 [2:14:25<1:36:44,  9.78it/s][A
 54%|█████████████████▏              | 65550/122310 [2:14:26<1:57:06,  8.08it/s][A
 54%|█████████████████▏              | 65572/122310 [2:14:26<1:04:35, 14.64it/s][A
 54%|█████████████████▏              | 65583/122310 [2:14:27<1:04:10, 14.73it/s][A
 54%|█████████████████▏              | 65596/122310 [2:14:29<1:18:47, 12.00it/s][A
 54%|█████████████████▏              | 65600/122310 [2:14:29<1:30:52, 10.40it/s][A
 54%|█████████████████▏              | 65608/122310 [2:14:30<1:29:48, 10.52it/s][A
 54%|█████████████████▏              | 65616/122310 [2:14:31<1:28:59, 10.62

step: 7960, loss: 92.80281774680414, epoch: 0



 54%|█████████████████▏              | 65672/122310 [2:14:45<7:02:06,  2.24it/s][A
 54%|█████████████████▏              | 65675/122310 [2:14:45<6:32:01,  2.41it/s][A
 54%|█████████████████▏              | 65679/122310 [2:14:46<5:42:27,  2.76it/s][A
 54%|█████████████████▏              | 65682/122310 [2:14:47<5:20:28,  2.95it/s][A
 54%|█████████████████▏              | 65693/122310 [2:14:48<3:10:32,  4.95it/s][A
 54%|█████████████████▏              | 65699/122310 [2:14:48<2:49:20,  5.57it/s][A
 54%|█████████████████▏              | 65703/122310 [2:14:49<2:50:00,  5.55it/s][A
 54%|█████████████████▏              | 65714/122310 [2:14:50<2:01:33,  7.76it/s][A
 54%|█████████████████▏              | 65720/122310 [2:14:51<2:00:12,  7.85it/s][A

step: 7980, loss: 85.00068827282252, epoch: 0



 54%|█████████████████▏              | 65742/122310 [2:14:51<1:08:59, 13.66it/s][A
 54%|█████████████████▏              | 65749/122310 [2:14:52<1:15:15, 12.53it/s][A
 54%|█████████████████▏              | 65757/122310 [2:14:53<1:18:13, 12.05it/s][A
 54%|█████████████████▏              | 65762/122310 [2:14:53<1:29:24, 10.54it/s][A
 54%|█████████████████▏              | 65776/122310 [2:14:54<1:12:22, 13.02it/s][A
 54%|█████████████████▏              | 65783/122310 [2:14:55<1:18:47, 11.96it/s][A
 54%|█████████████████▏              | 65796/122310 [2:14:56<1:08:54, 13.67it/s][A
 54%|█████████████████▏              | 65811/122310 [2:14:56<1:00:32, 15.55it/s][A
 54%|█████████████████▏              | 65816/122310 [2:14:58<1:33:59, 10.02it/s][A
 54%|█████████████████▏              | 65819/122310 [2:14:59<1:49:59,  8.56it/s][A
 54%|█████████████████▏              | 65827/122310 [2:14:59<1:42:32,  9.18it/s][A
 54%|█████████████████▏              | 65832/122310 [2:15:00<1:50:20,  8.53

step: 8000, loss: 84.35866052164823, epoch: 0
sim1 and sim2 are 0.6158283483483454, 0.20849656879270295
cosine of pred and queen: 0.1013417448501534
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: economic
Actual: beijing:china::tokyo:japan, pred: market
Actual: berlin:germany::rome:italy, pred: countries
Actual: cairo:egypt::ottawa:canada, pred: agreed
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: year
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: broke
Actual: lisbon:portugal::riga:latvia, pred: coerce
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: greece
Actual: kenya:africa::netherlands:europe, pred: spain
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharastra:


 54%|█████████████████▏              | 65881/122310 [2:15:22<1:26:07, 10.92it/s][A

Actual: thailand:thai::india:indian, pred: phds
Actual: sweden:swedish::netherlands:dutch, pred: europe
Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: prime
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.06666666666666667
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: misrule
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: earth
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: obituary
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:pl


 54%|████████████████▋              | 65892/122310 [2:16:22<41:17:26,  2.63s/it][A
 54%|████████████████▋              | 65905/122310 [2:16:22<25:48:57,  1.65s/it][A
 54%|████████████████▋              | 65911/122310 [2:16:23<20:59:19,  1.34s/it][A
 54%|████████████████▋              | 65927/122310 [2:16:24<12:10:41,  1.29it/s][A
 54%|█████████████████▎              | 65935/122310 [2:16:25<9:37:40,  1.63it/s][A
 54%|█████████████████▎              | 65942/122310 [2:16:25<7:47:48,  2.01it/s][A
 54%|█████████████████▎              | 65961/122310 [2:16:26<4:24:51,  3.55it/s][A
 54%|█████████████████▎              | 65974/122310 [2:16:27<3:17:58,  4.74it/s][A
 54%|█████████████████▎              | 65979/122310 [2:16:28<3:08:54,  4.97it/s][A
 54%|█████████████████▎              | 65983/122310 [2:16:28<3:06:02,  5.05it/s][A
 54%|█████████████████▎              | 65990/122310 [2:16:29<2:42:28,  5.78it/s][A
 54%|█████████████████▎              | 65996/122310 [2:16:30<2:30:15,  6.25

step: 8020, loss: 89.06801217470522, epoch: 0



 54%|█████████████████▎              | 66086/122310 [2:16:37<1:23:27, 11.23it/s][A
 54%|█████████████████▎              | 66096/122310 [2:16:38<1:18:07, 11.99it/s][A
 54%|█████████████████▎              | 66105/122310 [2:16:38<1:17:25, 12.10it/s][A
 54%|█████████████████▎              | 66115/122310 [2:16:39<1:15:00, 12.49it/s][A
 54%|█████████████████▎              | 66117/122310 [2:16:40<1:36:57,  9.66it/s][A
 54%|█████████████████▎              | 66118/122310 [2:16:41<2:11:02,  7.15it/s][A
 54%|█████████████████▎              | 66120/122310 [2:16:41<2:39:46,  5.86it/s][A
 54%|█████████████████▎              | 66136/122310 [2:16:42<1:27:37, 10.68it/s][A
 54%|█████████████████▎              | 66141/122310 [2:16:43<1:38:01,  9.55it/s][A
 54%|█████████████████▎              | 66146/122310 [2:16:45<2:51:01,  5.47it/s][A
 54%|█████████████████▎              | 66155/122310 [2:16:46<2:14:55,  6.94it/s][A
 54%|█████████████████▎              | 66170/122310 [2:16:46<1:33:04, 10.05

step: 8040, loss: 106.50671940757468, epoch: 0



 54%|█████████████████▎              | 66219/122310 [2:16:52<1:37:11,  9.62it/s][A
 54%|█████████████████▎              | 66228/122310 [2:16:53<1:55:49,  8.07it/s][A
 54%|█████████████████▎              | 66235/122310 [2:16:54<1:50:57,  8.42it/s][A
 54%|█████████████████▎              | 66236/122310 [2:16:54<2:21:06,  6.62it/s][A
 54%|█████████████████▎              | 66241/122310 [2:16:55<2:19:47,  6.68it/s][A
 54%|█████████████████▎              | 66246/122310 [2:16:56<2:18:56,  6.73it/s][A
 54%|█████████████████▎              | 66254/122310 [2:16:57<1:57:38,  7.94it/s][A
 54%|█████████████████▎              | 66266/122310 [2:16:57<1:29:59, 10.38it/s][A
 54%|█████████████████▎              | 66269/122310 [2:16:58<1:49:11,  8.55it/s][A
 54%|█████████████████▎              | 66283/122310 [2:16:59<1:19:58, 11.68it/s][A
 54%|█████████████████▎              | 66288/122310 [2:17:00<1:30:51, 10.28it/s][A
 54%|█████████████████▎              | 66293/122310 [2:17:00<1:40:53,  9.25

step: 8060, loss: 94.9276106188387, epoch: 0



 54%|█████████████████▎              | 66350/122310 [2:17:06<1:12:28, 12.87it/s][A
 54%|█████████████████▎              | 66357/122310 [2:17:07<1:18:15, 11.92it/s][A
 54%|█████████████████▎              | 66369/122310 [2:17:08<1:10:11, 13.28it/s][A
 54%|█████████████████▎              | 66379/122310 [2:17:08<1:09:31, 13.41it/s][A
 54%|█████████████████▎              | 66389/122310 [2:17:09<1:09:04, 13.49it/s][A
 54%|█████████████████▎              | 66395/122310 [2:17:10<1:18:15, 11.91it/s][A
 54%|█████████████████▎              | 66399/122310 [2:17:10<1:33:23,  9.98it/s][A
 54%|█████████████████▎              | 66408/122310 [2:17:11<1:27:12, 10.68it/s][A
 54%|█████████████████▍              | 66419/122310 [2:17:12<1:17:42, 11.99it/s][A
 54%|█████████████████▍              | 66427/122310 [2:17:13<1:19:55, 11.65it/s][A
 54%|█████████████████▍              | 66439/122310 [2:17:13<1:11:10, 13.08it/s][A
 54%|█████████████████▍              | 66446/122310 [2:17:14<1:17:29, 12.02

step: 8080, loss: 85.22235843041796, epoch: 0



 54%|█████████████████▍              | 66535/122310 [2:17:21<1:13:08, 12.71it/s][A
 54%|█████████████████▍              | 66541/122310 [2:17:21<1:21:32, 11.40it/s][A
 54%|█████████████████▍              | 66559/122310 [2:17:22<1:00:36, 15.33it/s][A
 54%|█████████████████▍              | 66566/122310 [2:17:23<1:08:11, 13.62it/s][A
 54%|██████████████████▌               | 66583/122310 [2:17:24<56:20, 16.48it/s][A
 54%|█████████████████▍              | 66589/122310 [2:17:24<1:06:21, 13.99it/s][A
 54%|█████████████████▍              | 66600/122310 [2:17:25<1:04:53, 14.31it/s][A
 54%|█████████████████▍              | 66612/122310 [2:17:26<1:02:14, 14.91it/s][A
 54%|█████████████████▍              | 66622/122310 [2:17:27<1:03:42, 14.57it/s][A
 54%|█████████████████▍              | 66628/122310 [2:17:27<1:13:15, 12.67it/s][A
 54%|█████████████████▍              | 66638/122310 [2:17:28<1:11:34, 12.96it/s][A
 54%|█████████████████▍              | 66643/122310 [2:17:29<1:23:24, 11.12

step: 8100, loss: 82.7291901776327, epoch: 0



 55%|██████████████████▌               | 66753/122310 [2:17:35<54:19, 17.04it/s][A
 55%|█████████████████▍              | 66760/122310 [2:17:36<1:02:23, 14.84it/s][A
 55%|██████████████████▌               | 66778/122310 [2:17:37<52:03, 17.78it/s][A
 55%|█████████████████▍              | 66783/122310 [2:17:38<1:03:53, 14.49it/s][A
 55%|█████████████████▍              | 66794/122310 [2:17:38<1:03:07, 14.66it/s][A
 55%|█████████████████▍              | 66797/122310 [2:17:39<1:21:05, 11.41it/s][A
 55%|█████████████████▍              | 66803/122310 [2:17:40<1:27:58, 10.52it/s][A
 55%|█████████████████▍              | 66806/122310 [2:17:41<2:21:21,  6.54it/s][A
 55%|█████████████████▍              | 66811/122310 [2:17:42<2:19:44,  6.62it/s][A
 55%|█████████████████▍              | 66819/122310 [2:17:43<1:59:22,  7.75it/s][A
 55%|█████████████████▍              | 66827/122310 [2:17:43<1:47:25,  8.61it/s][A
 55%|█████████████████▍              | 66840/122310 [2:17:44<1:23:01, 11.14

step: 8120, loss: 83.16744684364548, epoch: 0



 55%|██████████████████▌               | 66931/122310 [2:17:50<49:53, 18.50it/s][A
 55%|██████████████████▌               | 66944/122310 [2:17:51<50:26, 18.29it/s][A
 55%|██████████████████▌               | 66955/122310 [2:17:52<53:17, 17.31it/s][A
 55%|██████████████████▌               | 66963/122310 [2:17:52<59:53, 15.40it/s][A
 55%|█████████████████▌              | 66971/122310 [2:17:53<1:05:34, 14.06it/s][A
 55%|██████████████████▌               | 66985/122310 [2:17:54<59:09, 15.59it/s][A
 55%|█████████████████▌              | 66990/122310 [2:17:54<1:11:10, 12.95it/s][A
 55%|█████████████████▌              | 66998/122310 [2:17:55<1:14:36, 12.36it/s][A
 55%|█████████████████▌              | 67007/122310 [2:17:56<1:14:42, 12.34it/s][A
 55%|█████████████████▌              | 67016/122310 [2:17:57<1:14:50, 12.31it/s][A
 55%|█████████████████▌              | 67026/122310 [2:17:57<1:12:29, 12.71it/s][A
 55%|█████████████████▌              | 67036/122310 [2:17:58<1:10:59, 12.98

step: 8140, loss: 87.27900291152503, epoch: 0



 55%|██████████████████▋               | 67118/122310 [2:18:05<59:01, 15.59it/s][A
 55%|██████████████████▋               | 67135/122310 [2:18:05<51:33, 17.84it/s][A
 55%|██████████████████▋               | 67154/122310 [2:18:06<45:25, 20.24it/s][A
 55%|██████████████████▋               | 67160/122310 [2:18:07<55:10, 16.66it/s][A
 55%|██████████████████▋               | 67169/122310 [2:18:08<59:45, 15.38it/s][A
 55%|██████████████████▋               | 67187/122310 [2:18:08<50:39, 18.14it/s][A
 55%|█████████████████▌              | 67193/122310 [2:18:09<1:00:38, 15.15it/s][A
 55%|█████████████████▌              | 67199/122310 [2:18:10<1:10:31, 13.02it/s][A
 55%|█████████████████▌              | 67209/122310 [2:18:11<1:09:21, 13.24it/s][A
 55%|█████████████████▌              | 67219/122310 [2:18:11<1:08:42, 13.36it/s][A
 55%|█████████████████▌              | 67231/122310 [2:18:12<1:04:38, 14.20it/s][A
 55%|█████████████████▌              | 67242/122310 [2:18:13<1:03:34, 14.44

step: 8160, loss: 97.7256193697732, epoch: 0



 55%|█████████████████▌              | 67321/122310 [2:18:19<1:00:35, 15.13it/s][A
 55%|█████████████████▌              | 67330/122310 [2:18:20<1:04:02, 14.31it/s][A
 55%|█████████████████▌              | 67342/122310 [2:18:21<1:01:39, 14.86it/s][A
 55%|█████████████████▌              | 67344/122310 [2:18:22<1:21:27, 11.25it/s][A
 55%|█████████████████▌              | 67354/122310 [2:18:22<1:16:47, 11.93it/s][A
 55%|█████████████████▌              | 67363/122310 [2:18:23<1:16:01, 12.05it/s][A
 55%|█████████████████▋              | 67374/122310 [2:18:24<1:11:14, 12.85it/s][A
 55%|█████████████████▋              | 67383/122310 [2:18:25<1:12:40, 12.60it/s][A
 55%|█████████████████▋              | 67390/122310 [2:18:25<1:18:05, 11.72it/s][A
 55%|█████████████████▋              | 67400/122310 [2:18:26<1:14:23, 12.30it/s][A
 55%|█████████████████▋              | 67409/122310 [2:18:27<1:14:29, 12.28it/s][A
 55%|█████████████████▋              | 67418/122310 [2:18:28<1:14:40, 12.25

step: 8180, loss: 84.91610237194341, epoch: 0



 55%|█████████████████▋              | 67478/122310 [2:18:34<1:19:00, 11.57it/s][A
 55%|█████████████████▋              | 67484/122310 [2:18:35<1:26:15, 10.59it/s][A
 55%|█████████████████▋              | 67501/122310 [2:18:36<1:04:02, 14.26it/s][A
 55%|█████████████████▋              | 67506/122310 [2:18:36<1:15:30, 12.10it/s][A
 55%|█████████████████▋              | 67514/122310 [2:18:37<1:17:46, 11.74it/s][A
 55%|█████████████████▋              | 67520/122310 [2:18:38<1:25:30, 10.68it/s][A
 55%|█████████████████▋              | 67524/122310 [2:18:39<1:40:21,  9.10it/s][A
 55%|█████████████████▋              | 67527/122310 [2:18:39<1:59:56,  7.61it/s][A
 55%|█████████████████▋              | 67534/122310 [2:18:40<1:51:21,  8.20it/s][A
 55%|█████████████████▋              | 67536/122310 [2:18:41<2:19:14,  6.56it/s][A
 55%|█████████████████▋              | 67547/122310 [2:18:41<1:40:33,  9.08it/s][A
 55%|█████████████████▋              | 67555/122310 [2:18:42<1:34:53,  9.62

step: 8200, loss: 88.17690521756394, epoch: 0



 55%|█████████████████▋              | 67652/122310 [2:18:49<1:08:18, 13.33it/s][A
 55%|█████████████████▋              | 67658/122310 [2:18:50<1:17:22, 11.77it/s][A
 55%|█████████████████▋              | 67662/122310 [2:18:50<1:32:25,  9.85it/s][A
 55%|█████████████████▋              | 67672/122310 [2:18:51<1:23:19, 10.93it/s][A
 55%|█████████████████▋              | 67676/122310 [2:18:52<1:37:37,  9.33it/s][A
 55%|█████████████████▋              | 67685/122310 [2:18:53<1:29:16, 10.20it/s][A
 55%|█████████████████▋              | 67691/122310 [2:18:53<1:35:03,  9.58it/s][A
 55%|█████████████████▋              | 67709/122310 [2:18:54<1:04:51, 14.03it/s][A
 55%|█████████████████▋              | 67717/122310 [2:18:55<1:09:37, 13.07it/s][A
 55%|█████████████████▋              | 67728/122310 [2:18:55<1:06:42, 13.64it/s][A
 55%|█████████████████▋              | 67740/122310 [2:18:56<1:03:03, 14.42it/s][A
 55%|█████████████████▋              | 67750/122310 [2:18:57<1:04:00, 14.21

step: 8220, loss: 107.08761805802853, epoch: 0



 55%|█████████████████▋              | 67813/122310 [2:19:04<1:52:13,  8.09it/s][A
 55%|█████████████████▋              | 67824/122310 [2:19:04<1:29:27, 10.15it/s][A
 55%|█████████████████▋              | 67837/122310 [2:19:05<1:13:06, 12.42it/s][A
 55%|█████████████████▊              | 67848/122310 [2:19:06<1:08:54, 13.17it/s][A
 55%|█████████████████▊              | 67855/122310 [2:19:06<1:15:10, 12.07it/s][A
 55%|█████████████████▊              | 67869/122310 [2:19:07<1:03:59, 14.18it/s][A
 55%|█████████████████▊              | 67874/122310 [2:19:08<1:15:41, 11.99it/s][A
 56%|█████████████████▊              | 67888/122310 [2:19:09<1:04:16, 14.11it/s][A
 56%|█████████████████▊              | 67900/122310 [2:19:09<1:01:22, 14.78it/s][A
 56%|█████████████████▊              | 67908/122310 [2:19:10<1:06:35, 13.62it/s][A
 56%|█████████████████▊              | 67911/122310 [2:19:11<1:24:15, 10.76it/s][A
 56%|█████████████████▊              | 67918/122310 [2:19:12<1:27:12, 10.40

step: 8240, loss: 124.94043500929547, epoch: 0



 56%|█████████████████▊              | 67982/122310 [2:19:18<1:28:13, 10.26it/s][A
 56%|█████████████████▊              | 67991/122310 [2:19:19<1:23:27, 10.85it/s][A
 56%|█████████████████▊              | 67996/122310 [2:19:20<1:34:14,  9.61it/s][A
 56%|█████████████████▊              | 67999/122310 [2:19:20<1:53:25,  7.98it/s][A
 56%|█████████████████▊              | 68006/122310 [2:19:21<1:47:07,  8.45it/s][A
 56%|█████████████████▊              | 68017/122310 [2:19:22<1:27:02, 10.40it/s][A
 56%|█████████████████▊              | 68026/122310 [2:19:23<1:22:35, 10.95it/s][A
 56%|█████████████████▊              | 68044/122310 [2:19:23<1:00:11, 15.03it/s][A
 56%|█████████████████▊              | 68056/122310 [2:19:25<1:16:49, 11.77it/s][A
 56%|█████████████████▊              | 68058/122310 [2:19:26<1:34:23,  9.58it/s][A
 56%|█████████████████▊              | 68067/122310 [2:19:26<1:27:58, 10.28it/s][A
 56%|█████████████████▊              | 68073/122310 [2:19:27<1:33:06,  9.71

step: 8260, loss: 86.98085582427099, epoch: 0



 56%|█████████████████▊              | 68149/122310 [2:19:33<1:17:12, 11.69it/s][A
 56%|█████████████████▊              | 68158/122310 [2:19:34<1:16:13, 11.84it/s][A
 56%|█████████████████▊              | 68164/122310 [2:19:34<1:24:00, 10.74it/s][A
 56%|█████████████████▊              | 68175/122310 [2:19:35<1:15:18, 11.98it/s][A
 56%|█████████████████▊              | 68188/122310 [2:19:36<1:06:00, 13.67it/s][A
 56%|█████████████████▊              | 68194/122310 [2:19:37<1:15:00, 12.02it/s][A
 56%|█████████████████▊              | 68206/122310 [2:19:37<1:07:45, 13.31it/s][A
 56%|█████████████████▊              | 68213/122310 [2:19:38<1:13:57, 12.19it/s][A
 56%|█████████████████▊              | 68221/122310 [2:19:39<1:16:35, 11.77it/s][A
 56%|█████████████████▊              | 68225/122310 [2:19:40<1:31:22,  9.86it/s][A
 56%|█████████████████▊              | 68235/122310 [2:19:40<1:21:51, 11.01it/s][A
 56%|█████████████████▊              | 68241/122310 [2:19:41<1:28:59, 10.13

step: 8280, loss: 87.11074897896455, epoch: 0



 56%|██████████████████▉               | 68336/122310 [2:19:48<53:50, 16.71it/s][A
 56%|███████████████████               | 68350/122310 [2:19:49<51:50, 17.35it/s][A
 56%|█████████████████▉              | 68353/122310 [2:19:49<1:06:49, 13.46it/s][A
 56%|█████████████████▉              | 68362/122310 [2:19:50<1:08:32, 13.12it/s][A
 56%|███████████████████               | 68378/122310 [2:19:51<57:18, 15.68it/s][A
 56%|█████████████████▉              | 68383/122310 [2:19:51<1:09:21, 12.96it/s][A
 56%|█████████████████▉              | 68394/122310 [2:19:52<1:06:08, 13.59it/s][A
 56%|█████████████████▉              | 68403/122310 [2:19:53<1:08:05, 13.20it/s][A
 56%|███████████████████               | 68418/122310 [2:19:54<58:41, 15.30it/s][A
 56%|█████████████████▉              | 68426/122310 [2:19:54<1:04:17, 13.97it/s][A
 56%|█████████████████▉              | 68432/122310 [2:19:55<1:13:16, 12.26it/s][A
 56%|█████████████████▉              | 68436/122310 [2:19:56<1:28:11, 10.18

step: 8300, loss: 96.70567426438436, epoch: 0



 56%|█████████████████▉              | 68502/122310 [2:20:03<1:36:06,  9.33it/s][A
 56%|█████████████████▉              | 68507/122310 [2:20:03<1:44:17,  8.60it/s][A
 56%|█████████████████▉              | 68511/122310 [2:20:04<1:57:27,  7.63it/s][A
 56%|█████████████████▉              | 68522/122310 [2:20:05<1:31:11,  9.83it/s][A
 56%|█████████████████▉              | 68528/122310 [2:20:05<1:36:00,  9.34it/s][A
 56%|█████████████████▉              | 68533/122310 [2:20:06<1:44:25,  8.58it/s][A
 56%|█████████████████▉              | 68543/122310 [2:20:07<1:28:46, 10.09it/s][A
 56%|█████████████████▉              | 68553/122310 [2:20:08<1:20:25, 11.14it/s][A
 56%|█████████████████▉              | 68559/122310 [2:20:08<1:27:30, 10.24it/s][A
 56%|█████████████████▉              | 68564/122310 [2:20:09<1:37:20,  9.20it/s][A
 56%|█████████████████▉              | 68573/122310 [2:20:10<1:28:21, 10.14it/s][A
 56%|█████████████████▉              | 68575/122310 [2:20:11<1:53:05,  7.92

step: 8320, loss: 91.56039301381526, epoch: 0



 56%|█████████████████▉              | 68644/122310 [2:20:17<1:24:41, 10.56it/s][A
 56%|█████████████████▉              | 68658/122310 [2:20:18<1:08:03, 13.14it/s][A
 56%|█████████████████▉              | 68667/122310 [2:20:19<1:09:24, 12.88it/s][A
 56%|█████████████████▉              | 68670/122310 [2:20:19<1:27:16, 10.24it/s][A
 56%|█████████████████▉              | 68676/122310 [2:20:20<1:32:49,  9.63it/s][A
 56%|█████████████████▉              | 68685/122310 [2:20:21<1:25:41, 10.43it/s][A
 56%|█████████████████▉              | 68695/122310 [2:20:22<1:18:22, 11.40it/s][A
 56%|█████████████████▉              | 68698/122310 [2:20:22<1:37:31,  9.16it/s][A
 56%|█████████████████▉              | 68706/122310 [2:20:23<1:32:05,  9.70it/s][A
 56%|█████████████████▉              | 68713/122310 [2:20:24<1:32:18,  9.68it/s][A
 56%|█████████████████▉              | 68725/122310 [2:20:25<1:16:33, 11.66it/s][A
 56%|█████████████████▉              | 68733/122310 [2:20:25<1:18:04, 11.44

step: 8340, loss: 88.5204517480253, epoch: 0



 56%|██████████████████              | 68829/122310 [2:20:32<1:01:03, 14.60it/s][A
 56%|██████████████████              | 68836/122310 [2:20:33<1:08:09, 13.08it/s][A
 56%|██████████████████              | 68843/122310 [2:20:33<1:14:10, 12.01it/s][A
 56%|██████████████████              | 68852/122310 [2:20:34<1:13:53, 12.06it/s][A
 56%|██████████████████              | 68856/122310 [2:20:35<1:28:13, 10.10it/s][A
 56%|██████████████████              | 68860/122310 [2:20:36<2:12:57,  6.70it/s][A
 56%|██████████████████              | 68868/122310 [2:20:37<1:54:35,  7.77it/s][A
 56%|██████████████████              | 68877/122310 [2:20:38<1:39:45,  8.93it/s][A
 56%|██████████████████              | 68888/122310 [2:20:38<1:24:10, 10.58it/s][A
 56%|██████████████████              | 68894/122310 [2:20:39<1:30:06,  9.88it/s][A
 56%|██████████████████              | 68900/122310 [2:20:40<1:34:33,  9.41it/s][A
 56%|██████████████████              | 68914/122310 [2:20:41<1:12:49, 12.22

step: 8360, loss: 100.34675597434594, epoch: 0



 56%|██████████████████              | 68977/122310 [2:20:47<1:12:26, 12.27it/s][A
 56%|██████████████████              | 68984/122310 [2:20:47<1:17:27, 11.47it/s][A
 56%|██████████████████              | 69008/122310 [2:20:49<1:04:32, 13.76it/s][A
 56%|██████████████████              | 69019/122310 [2:20:49<1:03:04, 14.08it/s][A
 56%|██████████████████              | 69028/122310 [2:20:50<1:05:13, 13.62it/s][A
 56%|██████████████████              | 69040/122310 [2:20:51<1:01:48, 14.36it/s][A
 56%|██████████████████              | 69049/122310 [2:20:52<1:04:23, 13.79it/s][A
 56%|██████████████████              | 69055/122310 [2:20:52<1:12:51, 12.18it/s][A
 56%|██████████████████              | 69061/122310 [2:20:53<1:20:46, 10.99it/s][A
 56%|██████████████████              | 69066/122310 [2:20:54<1:30:26,  9.81it/s][A
 56%|██████████████████              | 69076/122310 [2:20:55<1:21:19, 10.91it/s][A
 56%|██████████████████              | 69085/122310 [2:20:55<1:18:15, 11.33

step: 8380, loss: 86.36292755785533, epoch: 0



 57%|██████████████████              | 69149/122310 [2:21:01<1:10:59, 12.48it/s][A
 57%|██████████████████              | 69154/122310 [2:21:02<1:21:03, 10.93it/s][A
 57%|██████████████████              | 69166/122310 [2:21:03<1:11:12, 12.44it/s][A
 57%|██████████████████              | 69176/122310 [2:21:03<1:09:32, 12.74it/s][A
 57%|██████████████████              | 69178/122310 [2:21:04<1:29:47,  9.86it/s][A
 57%|██████████████████              | 69185/122310 [2:21:05<1:30:45,  9.76it/s][A
 57%|██████████████████              | 69199/122310 [2:21:06<1:10:42, 12.52it/s][A
 57%|███████████████████▏              | 69217/122310 [2:21:06<55:01, 16.08it/s][A
 57%|██████████████████              | 69220/122310 [2:21:07<1:10:44, 12.51it/s][A
 57%|██████████████████              | 69225/122310 [2:21:08<1:21:52, 10.81it/s][A
 57%|██████████████████              | 69237/122310 [2:21:09<1:11:02, 12.45it/s][A
 57%|██████████████████              | 69242/122310 [2:21:09<1:22:13, 10.76

step: 8400, loss: 84.97608320068866, epoch: 0
sim1 and sim2 are 0.5775178399065952, 0.17273027841915842
cosine of pred and queen: 0.10708228024410951
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: economic
Actual: beijing:china::tokyo:japan, pred: market
Actual: berlin:germany::rome:italy, pred: countries
Actual: cairo:egypt::ottawa:canada, pred: close
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: broke
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: country
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: belou
Actual: mahar


 57%|██████████████████▏             | 69301/122310 [2:21:32<1:22:48, 10.67it/s][A

Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: spanish
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: become
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.06666666666666667
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: misrule
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: shall
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: obituary
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred: documentation
Actual: p


 57%|█████████████████▌             | 69308/122310 [2:22:30<45:20:01,  3.08s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.01775147928994083



 57%|█████████████████▌             | 69325/122310 [2:22:31<23:24:35,  1.59s/it][A
 57%|█████████████████▌             | 69334/122310 [2:22:32<17:23:37,  1.18s/it][A
 57%|█████████████████▌             | 69339/122310 [2:22:33<14:41:37,  1.00it/s][A
 57%|█████████████████▌             | 69347/122310 [2:22:33<10:50:52,  1.36it/s][A
 57%|██████████████████▏             | 69360/122310 [2:22:34<6:49:43,  2.15it/s][A
 57%|██████████████████▏             | 69375/122310 [2:22:35<4:23:44,  3.35it/s][A
 57%|██████████████████▏             | 69381/122310 [2:22:36<3:54:27,  3.76it/s][A
 57%|██████████████████▏             | 69387/122310 [2:22:36<3:27:34,  4.25it/s][A
 57%|██████████████████▏             | 69403/122310 [2:22:37<2:13:07,  6.62it/s][A
 57%|██████████████████▏             | 69418/122310 [2:22:38<1:39:20,  8.87it/s][A
 57%|██████████████████▏             | 69438/122310 [2:22:38<1:11:15, 12.36it/s][A
 57%|██████████████████▏             | 69444/122310 [2:22:39<1:16:40, 11.49

step: 8420, loss: 95.96563317988621, epoch: 0



 57%|███████████████████▎              | 69531/122310 [2:22:46<57:02, 15.42it/s][A
 57%|███████████████████▎              | 69550/122310 [2:22:46<47:10, 18.64it/s][A
 57%|███████████████████▎              | 69560/122310 [2:22:47<51:05, 17.21it/s][A
 57%|███████████████████▎              | 69568/122310 [2:22:48<57:07, 15.39it/s][A
 57%|██████████████████▏             | 69575/122310 [2:22:49<1:04:24, 13.65it/s][A
 57%|██████████████████▏             | 69582/122310 [2:22:49<1:10:24, 12.48it/s][A
 57%|██████████████████▏             | 69593/122310 [2:22:50<1:06:20, 13.24it/s][A
 57%|██████████████████▏             | 69602/122310 [2:22:51<1:07:22, 13.04it/s][A
 57%|██████████████████▏             | 69614/122310 [2:22:52<1:02:40, 14.01it/s][A
 57%|██████████████████▏             | 69623/122310 [2:22:52<1:04:43, 13.57it/s][A
 57%|██████████████████▏             | 69634/122310 [2:22:53<1:02:31, 14.04it/s][A
 57%|███████████████████▎              | 69654/122310 [2:22:54<48:33, 18.07

step: 8440, loss: 86.22795984390865, epoch: 0



 57%|██████████████████▏             | 69739/122310 [2:23:00<1:09:30, 12.61it/s][A
 57%|██████████████████▏             | 69752/122310 [2:23:01<1:01:41, 14.20it/s][A
 57%|███████████████████▍              | 69766/122310 [2:23:02<55:47, 15.70it/s][A
 57%|███████████████████▍              | 69779/122310 [2:23:02<53:35, 16.33it/s][A
 57%|███████████████████▍              | 69789/122310 [2:23:03<56:17, 15.55it/s][A
 57%|███████████████████▍              | 69799/122310 [2:23:04<58:17, 15.01it/s][A
 57%|███████████████████▍              | 69812/122310 [2:23:05<55:04, 15.89it/s][A
 57%|███████████████████▍              | 69824/122310 [2:23:05<54:29, 16.05it/s][A
 57%|██████████████████▎             | 69828/122310 [2:23:06<1:07:59, 12.86it/s][A
 57%|██████████████████▎             | 69836/122310 [2:23:07<1:10:52, 12.34it/s][A
 57%|███████████████████▍              | 69851/122310 [2:23:08<58:58, 14.82it/s][A
 57%|██████████████████▎             | 69860/122310 [2:23:08<1:02:11, 14.06

step: 8460, loss: 126.25011284227504, epoch: 0



 57%|██████████████████▎             | 69908/122310 [2:23:15<1:24:27, 10.34it/s][A
 57%|██████████████████▎             | 69912/122310 [2:23:16<1:37:59,  8.91it/s][A
 57%|██████████████████▎             | 69919/122310 [2:23:16<1:35:44,  9.12it/s][A
 57%|██████████████████▎             | 69924/122310 [2:23:17<1:43:19,  8.45it/s][A
 57%|██████████████████▎             | 69938/122310 [2:23:18<1:14:42, 11.68it/s][A
 57%|██████████████████▎             | 69951/122310 [2:23:18<1:04:50, 13.46it/s][A
 57%|██████████████████▎             | 69953/122310 [2:23:19<1:24:34, 10.32it/s][A
 57%|██████████████████▎             | 69961/122310 [2:23:20<1:22:57, 10.52it/s][A
 57%|██████████████████▎             | 69972/122310 [2:23:21<1:13:33, 11.86it/s][A
 57%|██████████████████▎             | 69979/122310 [2:23:21<1:18:04, 11.17it/s][A
 57%|██████████████████▎             | 69991/122310 [2:23:22<1:08:26, 12.74it/s][A
 57%|██████████████████▎             | 69998/122310 [2:23:23<1:14:11, 11.75

step: 8480, loss: 98.10712512844962, epoch: 0



 57%|███████████████████▍              | 70087/122310 [2:23:29<47:39, 18.26it/s][A
 57%|███████████████████▍              | 70101/122310 [2:23:30<46:55, 18.54it/s][A
 57%|██████████████████▎             | 70112/122310 [2:23:32<1:19:04, 11.00it/s][A
 57%|██████████████████▎             | 70119/122310 [2:23:33<1:21:18, 10.70it/s][A
 57%|██████████████████▎             | 70122/122310 [2:23:34<1:35:11,  9.14it/s][A
 57%|██████████████████▎             | 70125/122310 [2:23:35<1:49:38,  7.93it/s][A
 57%|██████████████████▎             | 70135/122310 [2:23:35<1:32:03,  9.45it/s][A
 57%|██████████████████▎             | 70148/122310 [2:23:36<1:14:08, 11.73it/s][A
 57%|██████████████████▎             | 70158/122310 [2:23:37<1:10:50, 12.27it/s][A
 57%|██████████████████▎             | 70165/122310 [2:23:37<1:15:33, 11.50it/s][A
 57%|██████████████████▎             | 70171/122310 [2:23:39<1:45:42,  8.22it/s][A
 57%|██████████████████▎             | 70182/122310 [2:23:40<1:27:36,  9.92

step: 8500, loss: 90.8341353054158, epoch: 0



 57%|██████████████████▍             | 70247/122310 [2:23:44<1:00:46, 14.28it/s][A
 57%|██████████████████▍             | 70252/122310 [2:23:45<1:11:16, 12.17it/s][A
 57%|███████████████████▌              | 70269/122310 [2:23:45<56:05, 15.46it/s][A
 57%|██████████████████▍             | 70271/122310 [2:23:46<1:14:52, 11.58it/s][A
 57%|██████████████████▍             | 70275/122310 [2:23:47<1:28:40,  9.78it/s][A
 57%|██████████████████▍             | 70285/122310 [2:23:48<1:19:23, 10.92it/s][A
 57%|██████████████████▍             | 70291/122310 [2:23:48<1:25:37, 10.13it/s][A
 57%|██████████████████▍             | 70302/122310 [2:23:49<1:14:54, 11.57it/s][A
 57%|██████████████████▍             | 70308/122310 [2:23:50<1:22:02, 10.56it/s][A
 57%|██████████████████▍             | 70312/122310 [2:23:51<1:35:48,  9.05it/s][A
 57%|██████████████████▍             | 70325/122310 [2:23:51<1:14:37, 11.61it/s][A
 58%|██████████████████▍             | 70330/122310 [2:23:52<1:25:02, 10.19

step: 8520, loss: 93.40142352689334, epoch: 0



 58%|██████████████████▍             | 70420/122310 [2:23:59<1:13:53, 11.70it/s][A
 58%|██████████████████▍             | 70429/122310 [2:24:00<1:13:20, 11.79it/s][A
 58%|██████████████████▍             | 70436/122310 [2:24:00<1:18:18, 11.04it/s][A
 58%|██████████████████▍             | 70449/122310 [2:24:05<2:47:15,  5.17it/s][A
 58%|██████████████████▍             | 70454/122310 [2:24:06<2:40:07,  5.40it/s][A
 58%|██████████████████▍             | 70457/122310 [2:24:06<2:47:54,  5.15it/s][A
 58%|██████████████████▍             | 70470/122310 [2:24:07<1:54:25,  7.55it/s][A
 58%|██████████████████▍             | 70490/122310 [2:24:08<1:13:30, 11.75it/s][A
 58%|██████████████████▍             | 70506/122310 [2:24:09<1:01:37, 14.01it/s][A
 58%|██████████████████▍             | 70515/122310 [2:24:09<1:03:17, 13.64it/s][A
 58%|███████████████████▌              | 70528/122310 [2:24:10<58:03, 14.87it/s][A
 58%|██████████████████▍             | 70535/122310 [2:24:11<1:03:49, 13.52

step: 8540, loss: 80.52069003580345, epoch: 0



 58%|██████████████████▍             | 70566/122310 [2:24:14<1:12:52, 11.83it/s][A
 58%|██████████████████▍             | 70572/122310 [2:24:14<1:18:44, 10.95it/s][A
 58%|██████████████████▍             | 70577/122310 [2:24:15<1:27:21,  9.87it/s][A
 58%|██████████████████▍             | 70584/122310 [2:24:16<1:27:14,  9.88it/s][A
 58%|██████████████████▍             | 70589/122310 [2:24:16<1:34:55,  9.08it/s][A
 58%|██████████████████▍             | 70598/122310 [2:24:17<1:25:03, 10.13it/s][A
 58%|██████████████████▍             | 70612/122310 [2:24:18<1:06:13, 13.01it/s][A
 58%|██████████████████▍             | 70620/122310 [2:24:19<1:08:59, 12.49it/s][A
 58%|███████████████████▋              | 70635/122310 [2:24:19<57:08, 15.07it/s][A
 58%|███████████████████▋              | 70649/122310 [2:24:20<52:16, 16.47it/s][A
 58%|███████████████████▋              | 70657/122310 [2:24:21<57:43, 14.91it/s][A
 58%|██████████████████▍             | 70662/122310 [2:24:21<1:08:56, 12.48

step: 8560, loss: 98.94428365776017, epoch: 0



 58%|███████████████████▋              | 70748/122310 [2:24:28<56:30, 15.21it/s][A
 58%|███████████████████▋              | 70758/122310 [2:24:29<57:33, 14.93it/s][A
 58%|███████████████████▋              | 70771/122310 [2:24:29<53:53, 15.94it/s][A
 58%|███████████████████▋              | 70781/122310 [2:24:30<56:52, 15.10it/s][A
 58%|███████████████████▋              | 70792/122310 [2:24:31<57:34, 14.91it/s][A
 58%|███████████████████▋              | 70808/122310 [2:24:32<51:05, 16.80it/s][A
 58%|███████████████████▋              | 70820/122310 [2:24:32<51:58, 16.51it/s][A
 58%|███████████████████▋              | 70834/122310 [2:24:33<50:03, 17.14it/s][A
 58%|███████████████████▋              | 70844/122310 [2:24:34<53:52, 15.92it/s][A
 58%|██████████████████▌             | 70856/122310 [2:24:35<1:10:12, 12.21it/s][A
 58%|██████████████████▌             | 70866/122310 [2:24:36<1:08:39, 12.49it/s][A
 58%|██████████████████▌             | 70875/122310 [2:24:37<1:09:37, 12.31

step: 8580, loss: 95.21869813833571, epoch: 0



 58%|██████████████████▌             | 70951/122310 [2:24:43<1:11:34, 11.96it/s][A
 58%|██████████████████▌             | 70959/122310 [2:24:44<1:13:18, 11.68it/s][A
 58%|██████████████████▌             | 70968/122310 [2:24:44<1:12:01, 11.88it/s][A
 58%|██████████████████▌             | 70978/122310 [2:24:45<1:09:08, 12.37it/s][A
 58%|██████████████████▌             | 70980/122310 [2:24:46<1:29:43,  9.54it/s][A
 58%|██████████████████▌             | 70990/122310 [2:24:47<1:19:36, 10.75it/s][A
 58%|██████████████████▌             | 70992/122310 [2:24:47<1:42:09,  8.37it/s][A
 58%|██████████████████▌             | 71005/122310 [2:24:49<2:02:28,  6.98it/s][A
 58%|██████████████████▌             | 71008/122310 [2:24:50<2:13:34,  6.40it/s][A
 58%|██████████████████▌             | 71015/122310 [2:24:51<1:59:51,  7.13it/s][A
 58%|██████████████████▌             | 71023/122310 [2:24:52<1:46:00,  8.06it/s][A
 58%|██████████████████▌             | 71030/122310 [2:24:52<1:40:51,  8.47

step: 8600, loss: 90.77102596241424, epoch: 0



 58%|██████████████████▌             | 71111/122310 [2:24:57<1:06:00, 12.93it/s][A
 58%|███████████████████▊              | 71125/122310 [2:24:58<57:47, 14.76it/s][A
 58%|███████████████████▊              | 71138/122310 [2:24:59<54:21, 15.69it/s][A
 58%|██████████████████▌             | 71143/122310 [2:25:00<1:05:25, 13.04it/s][A
 58%|██████████████████▌             | 71155/122310 [2:25:00<1:00:50, 14.01it/s][A
 58%|███████████████████▊              | 71166/122310 [2:25:01<59:27, 14.34it/s][A
 58%|███████████████████▊              | 71177/122310 [2:25:02<58:40, 14.53it/s][A
 58%|██████████████████▌             | 71187/122310 [2:25:03<1:00:07, 14.17it/s][A
 58%|██████████████████▋             | 71194/122310 [2:25:03<1:06:32, 12.80it/s][A
 58%|██████████████████▋             | 71200/122310 [2:25:04<1:14:30, 11.43it/s][A
 58%|██████████████████▋             | 71212/122310 [2:25:05<1:05:58, 12.91it/s][A
 58%|██████████████████▋             | 71221/122310 [2:25:06<1:06:56, 12.72

step: 8620, loss: 87.55438028871401, epoch: 0



 58%|██████████████████▋             | 71294/122310 [2:25:12<1:22:35, 10.29it/s][A
 58%|██████████████████▋             | 71301/122310 [2:25:13<1:24:15, 10.09it/s][A
 58%|██████████████████▋             | 71310/122310 [2:25:14<1:19:09, 10.74it/s][A
 58%|██████████████████▋             | 71321/122310 [2:25:14<1:11:08, 11.95it/s][A
 58%|██████████████████▋             | 71329/122310 [2:25:15<1:12:57, 11.65it/s][A
 58%|███████████████████▊              | 71344/122310 [2:25:16<59:26, 14.29it/s][A
 58%|██████████████████▋             | 71352/122310 [2:25:17<1:04:02, 13.26it/s][A
 58%|██████████████████▋             | 71358/122310 [2:25:17<1:12:14, 11.75it/s][A
 58%|██████████████████▋             | 71368/122310 [2:25:18<1:09:07, 12.28it/s][A
 58%|██████████████████▋             | 71377/122310 [2:25:19<1:08:57, 12.31it/s][A
 58%|██████████████████▋             | 71384/122310 [2:25:20<1:14:13, 11.44it/s][A
 58%|██████████████████▋             | 71387/122310 [2:25:20<1:31:30,  9.27

step: 8640, loss: 89.56806910043389, epoch: 0



 58%|██████████████████▋             | 71462/122310 [2:25:27<1:15:26, 11.23it/s][A
 58%|██████████████████▋             | 71471/122310 [2:25:28<1:13:34, 11.52it/s][A
 58%|██████████████████▋             | 71473/122310 [2:25:28<1:35:01,  8.92it/s][A
 58%|██████████████████▋             | 71483/122310 [2:25:29<1:21:41, 10.37it/s][A
 58%|██████████████████▋             | 71487/122310 [2:25:30<1:35:08,  8.90it/s][A
 58%|██████████████████▋             | 71492/122310 [2:25:30<1:42:14,  8.28it/s][A
 58%|██████████████████▋             | 71505/122310 [2:25:31<1:16:19, 11.09it/s][A
 58%|██████████████████▋             | 71512/122310 [2:25:32<1:19:20, 10.67it/s][A
 58%|██████████████████▋             | 71517/122310 [2:25:33<1:29:26,  9.46it/s][A
 58%|██████████████████▋             | 71523/122310 [2:25:33<1:33:00,  9.10it/s][A
 58%|██████████████████▋             | 71528/122310 [2:25:34<1:40:27,  8.42it/s][A
 58%|██████████████████▋             | 71543/122310 [2:25:35<1:10:13, 12.05

step: 8660, loss: 109.4262567549901, epoch: 0



 59%|██████████████████▋             | 71635/122310 [2:25:41<1:00:44, 13.90it/s][A
 59%|██████████████████▋             | 71642/122310 [2:25:42<1:06:58, 12.61it/s][A
 59%|██████████████████▋             | 71650/122310 [2:25:43<1:09:54, 12.08it/s][A
 59%|██████████████████▋             | 71659/122310 [2:25:44<1:09:29, 12.15it/s][A
 59%|██████████████████▋             | 71664/122310 [2:25:44<1:19:53, 10.56it/s][A
 59%|██████████████████▊             | 71668/122310 [2:25:45<1:33:46,  9.00it/s][A
 59%|██████████████████▊             | 71674/122310 [2:25:46<1:36:03,  8.79it/s][A
 59%|██████████████████▊             | 71680/122310 [2:25:47<1:38:22,  8.58it/s][A
 59%|██████████████████▊             | 71686/122310 [2:25:47<1:39:26,  8.48it/s][A
 59%|██████████████████▊             | 71693/122310 [2:25:48<1:35:47,  8.81it/s][A
 59%|██████████████████▊             | 71704/122310 [2:25:49<1:19:07, 10.66it/s][A
 59%|██████████████████▊             | 71708/122310 [2:25:50<1:32:51,  9.08

step: 8680, loss: 85.87121834064772, epoch: 0



 59%|██████████████████▊             | 71792/122310 [2:25:56<1:01:56, 13.59it/s][A
 59%|██████████████████▊             | 71802/122310 [2:25:57<1:01:54, 13.60it/s][A
 59%|██████████████████▊             | 71807/122310 [2:25:58<1:12:36, 11.59it/s][A
 59%|██████████████████▊             | 71812/122310 [2:25:58<1:23:06, 10.13it/s][A
 59%|██████████████████▊             | 71818/122310 [2:25:59<1:27:47,  9.59it/s][A
 59%|██████████████████▊             | 71823/122310 [2:26:00<1:36:17,  8.74it/s][A
 59%|██████████████████▊             | 71829/122310 [2:26:01<1:38:02,  8.58it/s][A
 59%|██████████████████▊             | 71835/122310 [2:26:01<1:39:40,  8.44it/s][A
 59%|██████████████████▊             | 71842/122310 [2:26:03<2:04:19,  6.77it/s][A
 59%|██████████████████▊             | 71853/122310 [2:26:03<1:35:22,  8.82it/s][A
 59%|██████████████████▊             | 71859/122310 [2:26:04<1:37:14,  8.65it/s][A
 59%|██████████████████▊             | 71863/122310 [2:26:05<1:47:55,  7.79

step: 8700, loss: 86.84801882751265, epoch: 0



 59%|██████████████████▊             | 71942/122310 [2:26:11<1:07:09, 12.50it/s][A
 59%|██████████████████▊             | 71953/122310 [2:26:12<1:03:19, 13.25it/s][A
 59%|████████████████████              | 71966/122310 [2:26:12<57:39, 14.55it/s][A
 59%|██████████████████▊             | 71972/122310 [2:26:13<1:06:09, 12.68it/s][A
 59%|██████████████████▊             | 71980/122310 [2:26:14<1:09:09, 12.13it/s][A
 59%|██████████████████▊             | 71987/122310 [2:26:15<1:13:49, 11.36it/s][A
 59%|██████████████████▊             | 71993/122310 [2:26:15<1:20:40, 10.40it/s][A
 59%|██████████████████▊             | 71998/122310 [2:26:16<1:29:50,  9.33it/s][A
 59%|██████████████████▊             | 72005/122310 [2:26:17<1:29:15,  9.39it/s][A
 59%|██████████████████▊             | 72016/122310 [2:26:17<1:15:43, 11.07it/s][A
 59%|██████████████████▊             | 72019/122310 [2:26:18<1:33:36,  8.95it/s][A
 59%|██████████████████▊             | 72026/122310 [2:26:19<1:31:35,  9.15

step: 8720, loss: 91.10396380544407, epoch: 0



 59%|██████████████████▊             | 72099/122310 [2:26:26<1:12:24, 11.56it/s][A
 59%|██████████████████▊             | 72111/122310 [2:26:26<1:04:11, 13.03it/s][A
 59%|██████████████████▊             | 72117/122310 [2:26:27<1:12:09, 11.59it/s][A
 59%|██████████████████▊             | 72123/122310 [2:26:28<1:19:12, 10.56it/s][A
 59%|██████████████████▊             | 72128/122310 [2:26:28<1:28:31,  9.45it/s][A
 59%|██████████████████▊             | 72133/122310 [2:26:29<1:36:22,  8.68it/s][A
 59%|██████████████████▉             | 72145/122310 [2:26:30<1:16:01, 11.00it/s][A
 59%|██████████████████▉             | 72153/122310 [2:26:31<1:16:09, 10.98it/s][A
 59%|██████████████████▉             | 72166/122310 [2:26:31<1:04:12, 13.02it/s][A
 59%|████████████████████              | 72185/122310 [2:26:32<49:29, 16.88it/s][A
 59%|████████████████████              | 72193/122310 [2:26:33<55:17, 15.11it/s][A
 59%|██████████████████▉             | 72201/122310 [2:26:34<1:00:22, 13.83

step: 8740, loss: 92.4793748181361, epoch: 0



 59%|██████████████████▉             | 72286/122310 [2:26:40<1:03:12, 13.19it/s][A
 59%|██████████████████▉             | 72291/122310 [2:26:41<1:13:32, 11.34it/s][A
 59%|████████████████████              | 72306/122310 [2:26:42<59:15, 14.07it/s][A
 59%|██████████████████▉             | 72310/122310 [2:26:42<1:12:24, 11.51it/s][A
 59%|██████████████████▉             | 72317/122310 [2:26:43<1:16:19, 10.92it/s][A
 59%|██████████████████▉             | 72327/122310 [2:26:44<1:10:59, 11.73it/s][A
 59%|██████████████████▉             | 72338/122310 [2:26:45<1:05:34, 12.70it/s][A
 59%|████████████████████              | 72356/122310 [2:26:45<51:11, 16.26it/s][A
 59%|████████████████████              | 72366/122310 [2:26:46<53:46, 15.48it/s][A
 59%|██████████████████▉             | 72373/122310 [2:26:47<1:00:43, 13.71it/s][A
 59%|████████████████████              | 72384/122310 [2:26:48<59:08, 14.07it/s][A
 59%|██████████████████▉             | 72391/122310 [2:26:48<1:05:29, 12.70

step: 8760, loss: 95.65279735427183, epoch: 0



 59%|██████████████████▉             | 72480/122310 [2:26:55<1:03:22, 13.10it/s][A
 59%|██████████████████▉             | 72482/122310 [2:26:56<1:22:51, 10.02it/s][A
 59%|██████████████████▉             | 72494/122310 [2:26:56<1:09:42, 11.91it/s][A
 59%|██████████████████▉             | 72501/122310 [2:26:57<1:14:05, 11.21it/s][A
 59%|██████████████████▉             | 72505/122310 [2:26:58<1:27:33,  9.48it/s][A
 59%|██████████████████▉             | 72519/122310 [2:26:59<1:07:01, 12.38it/s][A
 59%|██████████████████▉             | 72525/122310 [2:26:59<1:14:34, 11.13it/s][A
 59%|██████████████████▉             | 72539/122310 [2:27:00<1:01:23, 13.51it/s][A
 59%|██████████████████▉             | 72549/122310 [2:27:01<1:01:16, 13.54it/s][A
 59%|██████████████████▉             | 72556/122310 [2:27:01<1:07:10, 12.34it/s][A
 59%|██████████████████▉             | 72562/122310 [2:27:02<1:14:48, 11.08it/s][A
 59%|██████████████████▉             | 72566/122310 [2:27:04<1:54:42,  7.23

step: 8780, loss: 85.54269706827029, epoch: 0



 59%|████████████████████▏             | 72647/122310 [2:27:10<53:23, 15.50it/s][A
 59%|████████████████████▏             | 72655/122310 [2:27:10<58:26, 14.16it/s][A
 59%|████████████████████▏             | 72669/122310 [2:27:11<53:00, 15.61it/s][A
 59%|███████████████████             | 72673/122310 [2:27:12<1:05:50, 12.56it/s][A
 59%|███████████████████             | 72681/122310 [2:27:12<1:08:31, 12.07it/s][A
 59%|███████████████████             | 72690/122310 [2:27:13<1:08:06, 12.14it/s][A
 59%|███████████████████             | 72692/122310 [2:27:14<1:29:02,  9.29it/s][A
 59%|███████████████████             | 72694/122310 [2:27:15<1:52:19,  7.36it/s][A
 59%|███████████████████             | 72703/122310 [2:27:15<1:33:44,  8.82it/s][A
 59%|███████████████████             | 72713/122310 [2:27:16<1:20:35, 10.26it/s][A
 59%|███████████████████             | 72718/122310 [2:27:17<1:29:34,  9.23it/s][A
 59%|███████████████████             | 72723/122310 [2:27:18<1:37:17,  8.49

step: 8800, loss: 112.18674872413008, epoch: 0
sim1 and sim2 are 0.6166245108907775, 0.17280187665233168
cosine of pred and queen: 0.046637030279887665
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: economic
Actual: beijing:china::tokyo:japan, pred: nigeria
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: close
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: weeke
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: country
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: eluana
Actual: maha


 60%|███████████████████             | 72782/122310 [2:27:42<1:02:58, 13.11it/s][A

Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: prime
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.06666666666666667
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: misrule
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: let
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: obituary
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: thamer
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes, pred: ibbotsons
Actual: melon:melons::pig:pigs, pred: 


 60%|██████████████████▍            | 72783/122310 [2:28:39<45:41:08,  3.32s/it][A

Actual: india:rupee::denmark:krone, pred: trackless
Accuracy is 0.01775147928994083



 60%|██████████████████▍            | 72785/122310 [2:28:40<41:47:52,  3.04s/it][A
 60%|██████████████████▍            | 72791/122310 [2:28:41<29:02:13,  2.11s/it][A
 60%|██████████████████▍            | 72798/122310 [2:28:42<19:27:57,  1.42s/it][A
 60%|██████████████████▍            | 72807/122310 [2:28:42<12:19:27,  1.12it/s][A
 60%|███████████████████             | 72826/122310 [2:28:43<5:57:16,  2.31it/s][A
 60%|███████████████████             | 72829/122310 [2:28:44<5:40:20,  2.42it/s][A
 60%|███████████████████             | 72834/122310 [2:28:45<4:54:36,  2.80it/s][A
 60%|███████████████████             | 72837/122310 [2:28:45<4:39:59,  2.94it/s][A
 60%|███████████████████             | 72847/122310 [2:28:46<3:08:23,  4.38it/s][A
 60%|███████████████████             | 72852/122310 [2:28:47<2:52:26,  4.78it/s][A
 60%|███████████████████             | 72859/122310 [2:28:48<2:25:09,  5.68it/s][A
 60%|███████████████████             | 72872/122310 [2:28:48<1:39:57,  8.24

step: 8820, loss: 108.77789329863226, epoch: 0



 60%|████████████████████▎             | 72960/122310 [2:28:54<52:34, 15.64it/s][A
 60%|████████████████████▎             | 72973/122310 [2:28:55<50:40, 16.23it/s][A
 60%|███████████████████             | 72975/122310 [2:28:56<1:06:39, 12.34it/s][A
 60%|███████████████████             | 72982/122310 [2:28:56<1:11:22, 11.52it/s][A
 60%|███████████████████             | 72994/122310 [2:28:57<1:03:08, 13.02it/s][A
 60%|███████████████████             | 73003/122310 [2:28:58<1:04:11, 12.80it/s][A
 60%|███████████████████             | 73007/122310 [2:28:58<1:17:32, 10.60it/s][A
 60%|███████████████████             | 73009/122310 [2:28:59<1:39:04,  8.29it/s][A
 60%|███████████████████             | 73014/122310 [2:29:00<1:44:05,  7.89it/s][A
 60%|███████████████████             | 73025/122310 [2:29:01<1:21:27, 10.08it/s][A
 60%|███████████████████             | 73032/122310 [2:29:01<1:22:46,  9.92it/s][A
 60%|███████████████████             | 73043/122310 [2:29:02<1:11:16, 11.52

step: 8840, loss: 78.4523117383224, epoch: 0



 60%|███████████████████▏            | 73119/122310 [2:29:09<1:14:27, 11.01it/s][A
 60%|███████████████████▏            | 73125/122310 [2:29:09<1:20:30, 10.18it/s][A
 60%|███████████████████▏            | 73137/122310 [2:29:10<1:07:52, 12.08it/s][A
 60%|███████████████████▏            | 73144/122310 [2:29:11<1:12:11, 11.35it/s][A
 60%|███████████████████▏            | 73154/122310 [2:29:12<1:07:52, 12.07it/s][A
 60%|███████████████████▏            | 73156/122310 [2:29:12<1:29:00,  9.20it/s][A
 60%|███████████████████▏            | 73159/122310 [2:29:13<1:45:56,  7.73it/s][A
 60%|███████████████████▏            | 73174/122310 [2:29:14<1:10:36, 11.60it/s][A
 60%|███████████████████▏            | 73183/122310 [2:29:14<1:09:17, 11.82it/s][A
 60%|███████████████████▏            | 73195/122310 [2:29:15<1:01:54, 13.22it/s][A
 60%|███████████████████▏            | 73205/122310 [2:29:16<1:01:24, 13.33it/s][A
 60%|███████████████████▏            | 73212/122310 [2:29:17<1:06:52, 12.24

step: 8860, loss: 87.80840359712121, epoch: 0



 60%|███████████████████▏            | 73296/122310 [2:29:24<1:51:18,  7.34it/s][A
 60%|███████████████████▏            | 73303/122310 [2:29:25<1:43:05,  7.92it/s][A
 60%|███████████████████▏            | 73310/122310 [2:29:25<1:37:31,  8.37it/s][A
 60%|███████████████████▏            | 73321/122310 [2:29:26<1:20:02, 10.20it/s][A
 60%|███████████████████▏            | 73333/122310 [2:29:27<1:08:21, 11.94it/s][A
 60%|███████████████████▏            | 73337/122310 [2:29:28<1:20:47, 10.10it/s][A
 60%|████████████████████▍             | 73355/122310 [2:29:28<56:57, 14.33it/s][A
 60%|████████████████████▍             | 73365/122310 [2:29:29<57:37, 14.16it/s][A
 60%|███████████████████▏            | 73371/122310 [2:29:30<1:06:04, 12.35it/s][A
 60%|███████████████████▏            | 73380/122310 [2:29:31<1:05:45, 12.40it/s][A
 60%|███████████████████▏            | 73390/122310 [2:29:31<1:03:44, 12.79it/s][A
 60%|███████████████████▏            | 73396/122310 [2:29:32<1:11:20, 11.43

step: 8880, loss: 87.29561685202178, epoch: 0



 60%|███████████████████▏            | 73465/122310 [2:29:38<1:12:38, 11.21it/s][A
 60%|████████████████████▍             | 73481/122310 [2:29:39<56:39, 14.36it/s][A
 60%|███████████████████▏            | 73485/122310 [2:29:39<1:09:25, 11.72it/s][A
 60%|███████████████████▏            | 73491/122310 [2:29:40<1:16:07, 10.69it/s][A
 60%|████████████████████▍             | 73506/122310 [2:29:41<59:45, 13.61it/s][A
 60%|████████████████████▍             | 73517/122310 [2:29:41<58:01, 14.01it/s][A
 60%|███████████████████▏            | 73522/122310 [2:29:42<1:08:20, 11.90it/s][A
 60%|███████████████████▏            | 73529/122310 [2:29:43<1:12:33, 11.21it/s][A
 60%|███████████████████▏            | 73535/122310 [2:29:44<1:18:48, 10.32it/s][A
 60%|███████████████████▏            | 73545/122310 [2:29:44<1:11:37, 11.35it/s][A
 60%|███████████████████▏            | 73554/122310 [2:29:45<1:09:56, 11.62it/s][A
 60%|███████████████████▏            | 73565/122310 [2:29:46<1:04:26, 12.61

step: 8900, loss: 109.4328442559209, epoch: 0



 60%|███████████████████▎            | 73638/122310 [2:29:52<1:18:29, 10.33it/s][A
 60%|███████████████████▎            | 73648/122310 [2:29:53<1:11:29, 11.34it/s][A
 60%|███████████████████▎            | 73655/122310 [2:29:54<1:14:47, 10.84it/s][A
 60%|███████████████████▎            | 73664/122310 [2:29:55<1:12:05, 11.25it/s][A
 60%|████████████████████▍             | 73679/122310 [2:29:55<57:46, 14.03it/s][A
 60%|████████████████████▍             | 73694/122310 [2:29:56<50:53, 15.92it/s][A
 60%|████████████████████▍             | 73707/122310 [2:29:57<49:04, 16.51it/s][A
 60%|████████████████████▍             | 73725/122310 [2:29:58<42:45, 18.94it/s][A
 60%|████████████████████▍             | 73732/122310 [2:29:58<50:17, 16.10it/s][A
 60%|████████████████████▍             | 73741/122310 [2:29:59<53:55, 15.01it/s][A
 60%|████████████████████▌             | 73749/122310 [2:30:00<58:40, 13.79it/s][A
 60%|███████████████████▎            | 73755/122310 [2:30:01<1:28:06,  9.18

step: 8920, loss: 81.20432558607041, epoch: 0



 60%|███████████████████▎            | 73822/122310 [2:30:07<1:19:49, 10.12it/s][A
 60%|███████████████████▎            | 73836/122310 [2:30:08<1:03:14, 12.77it/s][A
 60%|███████████████████▎            | 73843/122310 [2:30:09<1:08:18, 11.83it/s][A
 60%|███████████████████▎            | 73851/122310 [2:30:09<1:10:10, 11.51it/s][A
 60%|███████████████████▎            | 73860/122310 [2:30:10<1:08:43, 11.75it/s][A
 60%|███████████████████▎            | 73866/122310 [2:30:11<1:15:32, 10.69it/s][A
 60%|███████████████████▎            | 73868/122310 [2:30:12<1:37:18,  8.30it/s][A
 60%|███████████████████▎            | 73875/122310 [2:30:12<1:32:46,  8.70it/s][A
 60%|███████████████████▎            | 73891/122310 [2:30:13<1:03:51, 12.64it/s][A
 60%|███████████████████▎            | 73898/122310 [2:30:14<1:09:01, 11.69it/s][A
 60%|███████████████████▎            | 73900/122310 [2:30:15<1:29:19,  9.03it/s][A
 60%|███████████████████▎            | 73903/122310 [2:30:15<1:47:42,  7.49

step: 8940, loss: 99.74874603398894, epoch: 0



 60%|███████████████████▎            | 73972/122310 [2:30:22<1:19:02, 10.19it/s][A
 60%|███████████████████▎            | 73975/122310 [2:30:23<1:35:53,  8.40it/s][A
 60%|███████████████████▎            | 73985/122310 [2:30:23<1:20:43,  9.98it/s][A
 60%|███████████████████▎            | 73996/122310 [2:30:24<1:09:47, 11.54it/s][A
 61%|████████████████████▌             | 74010/122310 [2:30:25<58:17, 13.81it/s][A
 61%|████████████████████▌             | 74020/122310 [2:30:26<58:33, 13.74it/s][A
 61%|████████████████████▌             | 74030/122310 [2:30:26<58:40, 13.72it/s][A
 61%|███████████████████▎            | 74034/122310 [2:30:27<1:11:30, 11.25it/s][A
 61%|████████████████████▌             | 74053/122310 [2:30:28<51:21, 15.66it/s][A
 61%|████████████████████▌             | 74062/122310 [2:30:28<54:50, 14.66it/s][A
 61%|████████████████████▌             | 74070/122310 [2:30:29<59:22, 13.54it/s][A
 61%|███████████████████▍            | 74075/122310 [2:30:30<1:09:39, 11.54

step: 8960, loss: 93.15124122981717, epoch: 0



 61%|███████████████████▍            | 74166/122310 [2:30:37<1:03:47, 12.58it/s][A
 61%|███████████████████▍            | 74168/122310 [2:30:37<1:22:41,  9.70it/s][A
 61%|███████████████████▍            | 74181/122310 [2:30:38<1:06:09, 12.13it/s][A
 61%|███████████████████▍            | 74189/122310 [2:30:39<1:08:14, 11.75it/s][A
 61%|███████████████████▍            | 74193/122310 [2:30:39<1:21:09,  9.88it/s][A
 61%|███████████████████▍            | 74198/122310 [2:30:40<1:29:21,  8.97it/s][A
 61%|███████████████████▍            | 74202/122310 [2:30:41<1:41:29,  7.90it/s][A
 61%|███████████████████▍            | 74209/122310 [2:30:42<1:35:12,  8.42it/s][A
 61%|███████████████████▍            | 74215/122310 [2:30:42<1:36:05,  8.34it/s][A
 61%|███████████████████▍            | 74230/122310 [2:30:43<1:07:00, 11.96it/s][A
 61%|███████████████████▍            | 74239/122310 [2:30:44<1:06:20, 12.08it/s][A
 61%|███████████████████▍            | 74246/122310 [2:30:45<1:10:53, 11.30

step: 8980, loss: 98.78248166746394, epoch: 0



 61%|███████████████████▍            | 74298/122310 [2:30:51<1:06:21, 12.06it/s][A
 61%|███████████████████▍            | 74305/122310 [2:30:52<1:10:26, 11.36it/s][A
 61%|███████████████████▍            | 74310/122310 [2:30:53<1:19:55, 10.01it/s][A
 61%|███████████████████▍            | 74312/122310 [2:30:53<1:41:06,  7.91it/s][A
 61%|███████████████████▍            | 74324/122310 [2:30:54<1:16:39, 10.43it/s][A
 61%|███████████████████▍            | 74332/122310 [2:30:55<1:15:35, 10.58it/s][A
 61%|███████████████████▍            | 74339/122310 [2:30:56<1:17:45, 10.28it/s][A
 61%|███████████████████▍            | 74346/122310 [2:30:56<1:19:30, 10.06it/s][A
 61%|███████████████████▍            | 74356/122310 [2:30:57<1:12:05, 11.09it/s][A
 61%|███████████████████▍            | 74362/122310 [2:30:58<1:18:30, 10.18it/s][A
 61%|███████████████████▍            | 74374/122310 [2:30:59<1:06:28, 12.02it/s][A
 61%|███████████████████▍            | 74383/122310 [2:30:59<1:06:21, 12.04

step: 9000, loss: 75.59687303122377, epoch: 0
saving weights



 61%|███████████████████▍            | 74439/122310 [2:31:06<1:53:39,  7.02it/s][A
 61%|███████████████████▍            | 74445/122310 [2:31:07<1:49:07,  7.31it/s][A
 61%|███████████████████▍            | 74458/122310 [2:31:07<1:19:50,  9.99it/s][A
 61%|███████████████████▍            | 74466/122310 [2:31:08<1:17:56, 10.23it/s][A
 61%|███████████████████▍            | 74477/122310 [2:31:09<1:08:52, 11.57it/s][A
 61%|███████████████████▍            | 74486/122310 [2:31:10<1:07:45, 11.76it/s][A
 61%|███████████████████▍            | 74494/122310 [2:31:10<1:09:14, 11.51it/s][A
 61%|███████████████████▍            | 74502/122310 [2:31:11<1:10:13, 11.35it/s][A
 61%|███████████████████▍            | 74511/122310 [2:31:12<1:08:32, 11.62it/s][A
 61%|███████████████████▍            | 74522/122310 [2:31:13<1:03:18, 12.58it/s][A
 61%|███████████████████▍            | 74530/122310 [2:31:13<1:05:58, 12.07it/s][A
 61%|███████████████████▌            | 74539/122310 [2:31:14<1:05:35, 12.14

step: 9020, loss: 96.02476060361761, epoch: 0



 61%|████████████████████▋             | 74642/122310 [2:31:21<51:40, 15.37it/s][A
 61%|████████████████████▊             | 74651/122310 [2:31:22<55:00, 14.44it/s][A
 61%|████████████████████▊             | 74667/122310 [2:31:23<47:36, 16.68it/s][A
 61%|████████████████████▊             | 74675/122310 [2:31:24<53:01, 14.97it/s][A
 61%|████████████████████▊             | 74682/122310 [2:31:24<59:30, 13.34it/s][A
 61%|███████████████████▌            | 74686/122310 [2:31:25<1:12:23, 10.96it/s][A
 61%|███████████████████▌            | 74688/122310 [2:31:26<1:33:18,  8.51it/s][A
 61%|███████████████████▌            | 74691/122310 [2:31:27<1:50:29,  7.18it/s][A
 61%|███████████████████▌            | 74697/122310 [2:31:27<1:45:56,  7.49it/s][A
 61%|███████████████████▌            | 74704/122310 [2:31:28<1:38:00,  8.10it/s][A
 61%|███████████████████▌            | 74712/122310 [2:31:29<1:28:25,  8.97it/s][A
 61%|████████████████████▊             | 74730/122310 [2:31:29<58:04, 13.65

step: 9040, loss: 123.10208800497793, epoch: 0



 61%|███████████████████▌            | 74825/122310 [2:31:36<1:03:00, 12.56it/s][A
 61%|███████████████████▌            | 74832/122310 [2:31:37<1:07:42, 11.69it/s][A
 61%|███████████████████▌            | 74841/122310 [2:31:37<1:06:35, 11.88it/s][A
 61%|████████████████████▊             | 74858/122310 [2:31:38<51:41, 15.30it/s][A
 61%|████████████████████▊             | 74866/122310 [2:31:39<56:34, 13.98it/s][A
 61%|████████████████████▊             | 74876/122310 [2:31:40<57:00, 13.87it/s][A
 61%|███████████████████▌            | 74880/122310 [2:31:40<1:09:41, 11.34it/s][A
 61%|███████████████████▌            | 74890/122310 [2:31:41<1:05:38, 12.04it/s][A
 61%|███████████████████▌            | 74896/122310 [2:31:42<1:12:27, 10.91it/s][A
 61%|███████████████████▌            | 74900/122310 [2:31:43<1:25:19,  9.26it/s][A
 61%|███████████████████▌            | 74908/122310 [2:31:43<1:20:58,  9.76it/s][A
 61%|███████████████████▌            | 74920/122310 [2:31:44<1:07:08, 11.76

step: 9060, loss: 86.43142827127856, epoch: 0



 61%|████████████████████▊             | 75018/122310 [2:31:51<51:10, 15.40it/s][A
 61%|████████████████████▊             | 75027/122310 [2:31:51<54:22, 14.49it/s][A
 61%|███████████████████▋            | 75034/122310 [2:31:52<1:00:31, 13.02it/s][A
 61%|████████████████████▊             | 75047/122310 [2:31:53<54:28, 14.46it/s][A
 61%|████████████████████▊             | 75059/122310 [2:31:54<52:23, 15.03it/s][A
 61%|███████████████████▋            | 75063/122310 [2:31:54<1:04:44, 12.16it/s][A
 61%|███████████████████▋            | 75071/122310 [2:31:55<1:06:38, 11.81it/s][A
 61%|████████████████████▊             | 75083/122310 [2:31:56<59:36, 13.21it/s][A
 61%|███████████████████▋            | 75086/122310 [2:31:57<1:15:20, 10.45it/s][A
 61%|███████████████████▋            | 75093/122310 [2:31:57<1:17:16, 10.18it/s][A
 61%|███████████████████▋            | 75105/122310 [2:31:58<1:05:37, 11.99it/s][A
 61%|███████████████████▋            | 75108/122310 [2:31:59<1:21:20,  9.67

step: 9080, loss: 79.23435267322428, epoch: 0



 61%|███████████████████▋            | 75169/122310 [2:32:05<1:42:56,  7.63it/s][A
 61%|███████████████████▋            | 75186/122310 [2:32:06<1:03:52, 12.29it/s][A
 61%|███████████████████▋            | 75197/122310 [2:32:07<1:00:02, 13.08it/s][A
 61%|███████████████████▋            | 75200/122310 [2:32:08<1:38:55,  7.94it/s][A
 61%|███████████████████▋            | 75203/122310 [2:32:09<1:51:43,  7.03it/s][A
 61%|███████████████████▋            | 75208/122310 [2:32:10<1:52:26,  6.98it/s][A
 61%|███████████████████▋            | 75219/122310 [2:32:11<1:25:33,  9.17it/s][A
 62%|███████████████████▋            | 75228/122310 [2:32:11<1:18:04, 10.05it/s][A
 62%|███████████████████▋            | 75235/122310 [2:32:12<1:19:06,  9.92it/s][A
 62%|███████████████████▋            | 75240/122310 [2:32:13<1:26:55,  9.02it/s][A
 62%|███████████████████▋            | 75249/122310 [2:32:13<1:18:43,  9.96it/s][A
 62%|███████████████████▋            | 75255/122310 [2:32:14<1:23:17,  9.42

step: 9100, loss: 107.86090492023366, epoch: 0



 62%|████████████████████▉             | 75342/122310 [2:32:20<43:39, 17.93it/s][A
 62%|████████████████████▉             | 75353/122310 [2:32:21<45:44, 17.11it/s][A
 62%|████████████████████▉             | 75363/122310 [2:32:22<48:42, 16.06it/s][A
 62%|████████████████████▉             | 75373/122310 [2:32:22<51:08, 15.30it/s][A
 62%|███████████████████▋            | 75377/122310 [2:32:23<1:03:16, 12.36it/s][A
 62%|███████████████████▋            | 75381/122310 [2:32:24<1:15:59, 10.29it/s][A
 62%|███████████████████▋            | 75391/122310 [2:32:24<1:09:25, 11.26it/s][A
 62%|███████████████████▋            | 75400/122310 [2:32:25<1:07:34, 11.57it/s][A
 62%|███████████████████▋            | 75411/122310 [2:32:26<1:02:05, 12.59it/s][A
 62%|███████████████████▋            | 75421/122310 [2:32:27<1:00:34, 12.90it/s][A
 62%|███████████████████▋            | 75429/122310 [2:32:28<1:22:36,  9.46it/s][A
 62%|███████████████████▋            | 75435/122310 [2:32:29<1:25:35,  9.13

step: 9120, loss: 100.94099378713247, epoch: 0



 62%|███████████████████▋            | 75481/122310 [2:32:35<1:49:48,  7.11it/s][A
 62%|███████████████████▋            | 75487/122310 [2:32:35<1:45:49,  7.37it/s][A
 62%|███████████████████▊            | 75493/122310 [2:32:36<1:42:55,  7.58it/s][A
 62%|███████████████████▊            | 75495/122310 [2:32:37<2:04:23,  6.27it/s][A
 62%|███████████████████▊            | 75503/122310 [2:32:38<1:43:00,  7.57it/s][A
 62%|███████████████████▊            | 75510/122310 [2:32:38<1:35:57,  8.13it/s][A
 62%|███████████████████▊            | 75521/122310 [2:32:39<1:17:00, 10.13it/s][A
 62%|███████████████████▊            | 75530/122310 [2:32:40<1:12:20, 10.78it/s][A
 62%|███████████████████▊            | 75536/122310 [2:32:41<1:17:59, 10.00it/s][A
 62%|███████████████████▊            | 75546/122310 [2:32:41<1:10:51, 11.00it/s][A
 62%|███████████████████▊            | 75550/122310 [2:32:42<1:23:04,  9.38it/s][A
 62%|███████████████████▊            | 75555/122310 [2:32:43<1:30:18,  8.63

step: 9140, loss: 92.1653286012878, epoch: 0



 62%|███████████████████▊            | 75625/122310 [2:32:49<1:27:57,  8.85it/s][A
 62%|███████████████████▊            | 75632/122310 [2:32:50<1:25:54,  9.06it/s][A
 62%|███████████████████▊            | 75639/122310 [2:32:51<1:24:44,  9.18it/s][A
 62%|███████████████████▊            | 75646/122310 [2:32:52<1:23:39,  9.30it/s][A
 62%|███████████████████▊            | 75660/122310 [2:32:52<1:03:40, 12.21it/s][A
 62%|███████████████████▊            | 75668/122310 [2:32:53<1:06:00, 11.78it/s][A
 62%|█████████████████████             | 75681/122310 [2:32:54<57:18, 13.56it/s][A
 62%|█████████████████████             | 75694/122310 [2:32:55<52:39, 14.76it/s][A
 62%|███████████████████▊            | 75700/122310 [2:32:55<1:00:44, 12.79it/s][A
 62%|███████████████████▊            | 75704/122310 [2:32:56<1:13:46, 10.53it/s][A
 62%|███████████████████▊            | 75710/122310 [2:32:57<1:18:44,  9.86it/s][A
 62%|███████████████████▊            | 75712/122310 [2:32:58<1:40:32,  7.72

step: 9160, loss: 94.63394034691653, epoch: 0



 62%|█████████████████████             | 75799/122310 [2:33:04<57:38, 13.45it/s][A
 62%|███████████████████▊            | 75802/122310 [2:33:05<1:12:38, 10.67it/s][A
 62%|███████████████████▊            | 75813/122310 [2:33:06<1:04:47, 11.96it/s][A
 62%|█████████████████████             | 75830/122310 [2:33:06<50:33, 15.32it/s][A
 62%|█████████████████████             | 75837/122310 [2:33:07<56:56, 13.60it/s][A
 62%|███████████████████▊            | 75842/122310 [2:33:08<1:06:56, 11.57it/s][A
 62%|███████████████████▊            | 75848/122310 [2:33:09<1:13:21, 10.56it/s][A
 62%|███████████████████▊            | 75860/122310 [2:33:09<1:03:02, 12.28it/s][A
 62%|█████████████████████             | 75878/122310 [2:33:10<48:31, 15.95it/s][A
 62%|█████████████████████             | 75884/122310 [2:33:11<56:50, 13.61it/s][A
 62%|███████████████████▊            | 75886/122310 [2:33:12<1:15:07, 10.30it/s][A
 62%|███████████████████▊            | 75891/122310 [2:33:12<1:23:26,  9.27

step: 9180, loss: 92.62889544232509, epoch: 0



 62%|███████████████████▊            | 75964/122310 [2:33:19<1:32:12,  8.38it/s][A
 62%|███████████████████▉            | 75979/122310 [2:33:20<1:04:10, 12.03it/s][A
 62%|███████████████████▉            | 75986/122310 [2:33:20<1:07:33, 11.43it/s][A
 62%|███████████████████▉            | 75991/122310 [2:33:21<1:15:56, 10.17it/s][A
 62%|███████████████████▉            | 75995/122310 [2:33:22<1:27:30,  8.82it/s][A
 62%|███████████████████▉            | 76001/122310 [2:33:23<1:28:48,  8.69it/s][A
 62%|███████████████████▉            | 76010/122310 [2:33:23<1:17:55,  9.90it/s][A
 62%|███████████████████▉            | 76014/122310 [2:33:24<1:29:29,  8.62it/s][A
 62%|███████████████████▉            | 76021/122310 [2:33:25<1:25:51,  8.99it/s][A
 62%|███████████████████▉            | 76026/122310 [2:33:25<1:32:06,  8.37it/s][A
 62%|███████████████████▉            | 76033/122310 [2:33:26<1:27:26,  8.82it/s][A
 62%|███████████████████▉            | 76042/122310 [2:33:27<1:17:15,  9.98

step: 9200, loss: 102.97524280694618, epoch: 0
sim1 and sim2 are 0.6329892967208334, 0.20013285318761662
cosine of pred and queen: 0.07918872276877956
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: economic
Actual: beijing:china::tokyo:japan, pred: nigeria
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: agreed
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: new
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: weeke
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: greece
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharastra:mumb


 62%|███████████████████▉            | 76100/122310 [2:33:42<1:04:15, 11.99it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: bihar
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: vetoes
Actual: india:delhi::serbia:belgrade, pred: keralam
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: football
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: holyhead
Actual: argentina:peso::russia:ruble, pred: states
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: bank
Actual: india:rupee::denmark:krone, pred: nestorius
Actual: usa:dollar::nigeria:naira, pred: cent
Actual: switzerland:swiss::spain:spanish, pred: union
Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: italy
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: spanish
Act


 62%|███████████████████▎           | 76103/122310 [2:34:45<41:51:00,  3.26s/it][A

Actual: india:rupee::denmark:krone, pred: nestorius
Accuracy is 0.01775147928994083



 62%|███████████████████▎           | 76116/122310 [2:34:46<23:28:25,  1.83s/it][A
 62%|███████████████████▎           | 76130/122310 [2:34:47<14:10:42,  1.11s/it][A
 62%|███████████████████▎           | 76135/122310 [2:34:47<12:03:02,  1.06it/s][A
 62%|███████████████████▎           | 76137/122310 [2:34:48<11:22:18,  1.13it/s][A
 62%|███████████████████▉            | 76146/122310 [2:34:49<7:35:07,  1.69it/s][A
 62%|███████████████████▉            | 76158/122310 [2:34:49<4:46:57,  2.68it/s][A
 62%|███████████████████▉            | 76174/122310 [2:34:50<2:55:53,  4.37it/s][A
 62%|███████████████████▉            | 76184/122310 [2:34:51<2:21:45,  5.42it/s][A
 62%|███████████████████▉            | 76191/122310 [2:34:52<2:07:37,  6.02it/s][A
 62%|███████████████████▉            | 76200/122310 [2:34:52<1:48:12,  7.10it/s][A
 62%|███████████████████▉            | 76205/122310 [2:34:53<1:48:08,  7.11it/s][A
 62%|███████████████████▉            | 76210/122310 [2:34:54<1:48:13,  7.10

step: 9220, loss: 90.69701939945917, epoch: 0



 62%|█████████████████████▏            | 76297/122310 [2:35:00<52:21, 14.65it/s][A
 62%|█████████████████████▏            | 76305/122310 [2:35:01<56:02, 13.68it/s][A
 62%|█████████████████████▏            | 76316/122310 [2:35:01<53:49, 14.24it/s][A
 62%|█████████████████████▏            | 76326/122310 [2:35:02<53:49, 14.24it/s][A
 62%|█████████████████████▏            | 76333/122310 [2:35:03<59:06, 12.96it/s][A
 62%|███████████████████▉            | 76338/122310 [2:35:04<1:08:25, 11.20it/s][A
 62%|███████████████████▉            | 76345/122310 [2:35:04<1:10:56, 10.80it/s][A
 62%|███████████████████▉            | 76352/122310 [2:35:05<1:12:38, 10.54it/s][A
 62%|███████████████████▉            | 76362/122310 [2:35:06<1:05:51, 11.63it/s][A
 62%|███████████████████▉            | 76366/122310 [2:35:06<1:17:47,  9.84it/s][A
 62%|███████████████████▉            | 76378/122310 [2:35:07<1:03:49, 11.99it/s][A
 62%|█████████████████████▏            | 76389/122310 [2:35:08<58:38, 13.05

step: 9240, loss: 85.58254030534965, epoch: 0



 63%|████████████████████            | 76453/122310 [2:35:14<1:02:30, 12.23it/s][A
 63%|████████████████████            | 76459/122310 [2:35:15<1:08:59, 11.08it/s][A
 63%|████████████████████            | 76466/122310 [2:35:16<1:11:15, 10.72it/s][A
 63%|████████████████████            | 76475/122310 [2:35:16<1:07:42, 11.28it/s][A
 63%|████████████████████            | 76477/122310 [2:35:17<1:26:50,  8.80it/s][A
 63%|████████████████████            | 76480/122310 [2:35:18<1:42:33,  7.45it/s][A
 63%|████████████████████            | 76496/122310 [2:35:18<1:03:39, 12.00it/s][A
 63%|████████████████████            | 76501/122310 [2:35:19<1:12:29, 10.53it/s][A
 63%|████████████████████            | 76513/122310 [2:35:20<1:01:12, 12.47it/s][A
 63%|█████████████████████▎            | 76524/122310 [2:35:21<57:01, 13.38it/s][A
 63%|████████████████████            | 76529/122310 [2:35:21<1:06:21, 11.50it/s][A
 63%|████████████████████            | 76535/122310 [2:35:22<1:12:05, 10.58

step: 9260, loss: 87.25639376352429, epoch: 0



 63%|████████████████████            | 76613/122310 [2:35:28<1:04:14, 11.86it/s][A
 63%|█████████████████████▎            | 76627/122310 [2:35:29<53:32, 14.22it/s][A
 63%|█████████████████████▎            | 76637/122310 [2:35:30<53:47, 14.15it/s][A
 63%|█████████████████████▎            | 76647/122310 [2:35:30<53:40, 14.18it/s][A
 63%|█████████████████████▎            | 76655/122310 [2:35:31<57:13, 13.30it/s][A
 63%|█████████████████████▎            | 76663/122310 [2:35:32<59:52, 12.71it/s][A
 63%|█████████████████████▎            | 76677/122310 [2:35:33<51:37, 14.73it/s][A
 63%|████████████████████            | 76679/122310 [2:35:33<1:07:50, 11.21it/s][A
 63%|█████████████████████▎            | 76693/122310 [2:35:34<55:15, 13.76it/s][A
 63%|████████████████████            | 76699/122310 [2:35:35<1:02:27, 12.17it/s][A
 63%|████████████████████            | 76708/122310 [2:35:35<1:01:38, 12.33it/s][A
 63%|████████████████████            | 76713/122310 [2:35:36<1:10:40, 10.75

step: 9280, loss: 92.16396074952165, epoch: 0



 63%|████████████████████            | 76772/122310 [2:35:43<1:12:22, 10.49it/s][A
 63%|████████████████████            | 76782/122310 [2:35:43<1:05:26, 11.60it/s][A
 63%|█████████████████████▎            | 76793/122310 [2:35:44<59:23, 12.77it/s][A
 63%|████████████████████            | 76797/122310 [2:35:45<1:11:14, 10.65it/s][A
 63%|█████████████████████▎            | 76813/122310 [2:35:45<53:25, 14.19it/s][A
 63%|█████████████████████▎            | 76826/122310 [2:35:46<49:05, 15.44it/s][A
 63%|████████████████████            | 76829/122310 [2:35:47<1:02:51, 12.06it/s][A
 63%|████████████████████            | 76835/122310 [2:35:48<1:09:06, 10.97it/s][A
 63%|████████████████████            | 76844/122310 [2:35:48<1:06:00, 11.48it/s][A
 63%|████████████████████            | 76855/122310 [2:35:49<1:00:01, 12.62it/s][A
 63%|████████████████████            | 76863/122310 [2:35:50<1:02:10, 12.18it/s][A
 63%|████████████████████            | 76865/122310 [2:35:50<1:20:42,  9.38

step: 9300, loss: 83.60769471531607, epoch: 0



 63%|████████████████████▏           | 76952/122310 [2:35:57<1:00:01, 12.59it/s][A
 63%|█████████████████████▍            | 76963/122310 [2:35:57<56:18, 13.42it/s][A
 63%|█████████████████████▍            | 76972/122310 [2:35:58<56:57, 13.27it/s][A
 63%|████████████████████▏           | 76975/122310 [2:35:59<1:11:33, 10.56it/s][A
 63%|████████████████████▏           | 76987/122310 [2:36:00<1:00:31, 12.48it/s][A
 63%|████████████████████▏           | 76992/122310 [2:36:00<1:09:40, 10.84it/s][A
 63%|████████████████████▏           | 77000/122310 [2:36:01<1:08:51, 10.97it/s][A
 63%|█████████████████████▍            | 77013/122310 [2:36:02<57:21, 13.16it/s][A
 63%|█████████████████████▍            | 77025/122310 [2:36:02<52:59, 14.24it/s][A
 63%|████████████████████▏           | 77031/122310 [2:36:03<1:00:12, 12.54it/s][A
 63%|█████████████████████▍            | 77042/122310 [2:36:04<56:18, 13.40it/s][A
 63%|█████████████████████▍            | 77061/122310 [2:36:05<43:25, 17.37

step: 9320, loss: 78.28291353591526, epoch: 0



 63%|████████████████████▏           | 77134/122310 [2:36:11<1:06:09, 11.38it/s][A
 63%|████████████████████▏           | 77143/122310 [2:36:12<1:03:56, 11.77it/s][A
 63%|████████████████████▏           | 77149/122310 [2:36:12<1:09:46, 10.79it/s][A
 63%|████████████████████▏           | 77153/122310 [2:36:13<1:21:20,  9.25it/s][A
 63%|████████████████████▏           | 77164/122310 [2:36:14<1:07:37, 11.13it/s][A
 63%|████████████████████▏           | 77171/122310 [2:36:15<1:10:39, 10.65it/s][A
 63%|████████████████████▏           | 77176/122310 [2:36:15<1:18:17,  9.61it/s][A
 63%|████████████████████▏           | 77186/122310 [2:36:16<1:08:52, 10.92it/s][A
 63%|████████████████████▏           | 77189/122310 [2:36:17<1:24:40,  8.88it/s][A
 63%|████████████████████▏           | 77190/122310 [2:36:17<1:52:36,  6.68it/s][A
 63%|████████████████████▏           | 77193/122310 [2:36:19<2:44:13,  4.58it/s][A
 63%|████████████████████▏           | 77206/122310 [2:36:20<1:57:10,  6.42

step: 9340, loss: 76.6130261377, epoch: 0



 63%|████████████████████▏           | 77255/122310 [2:36:25<1:19:34,  9.44it/s][A
 63%|████████████████████▏           | 77260/122310 [2:36:26<1:26:04,  8.72it/s][A
 63%|████████████████████▏           | 77263/122310 [2:36:27<1:41:05,  7.43it/s][A
 63%|████████████████████▏           | 77269/122310 [2:36:27<1:37:08,  7.73it/s][A
 63%|████████████████████▏           | 77276/122310 [2:36:28<1:29:32,  8.38it/s][A
 63%|████████████████████▏           | 77289/122310 [2:36:29<1:06:11, 11.34it/s][A
 63%|████████████████████▏           | 77295/122310 [2:36:29<1:11:43, 10.46it/s][A
 63%|████████████████████▏           | 77302/122310 [2:36:30<1:13:23, 10.22it/s][A
 63%|████████████████████▏           | 77309/122310 [2:36:31<1:14:16, 10.10it/s][A
 63%|████████████████████▏           | 77317/122310 [2:36:32<1:11:52, 10.43it/s][A
 63%|████████████████████▏           | 77325/122310 [2:36:32<1:10:23, 10.65it/s][A
 63%|████████████████████▏           | 77329/122310 [2:36:33<1:21:44,  9.17

step: 9360, loss: 96.16209103714435, epoch: 0



 63%|████████████████████▎           | 77420/122310 [2:36:39<1:04:26, 11.61it/s][A
 63%|████████████████████▎           | 77426/122310 [2:36:40<1:09:57, 10.69it/s][A
 63%|████████████████████▎           | 77434/122310 [2:36:41<1:08:50, 10.86it/s][A
 63%|█████████████████████▌            | 77446/122310 [2:36:42<59:12, 12.63it/s][A
 63%|████████████████████▎           | 77454/122310 [2:36:42<1:00:56, 12.27it/s][A
 63%|█████████████████████▌            | 77464/122310 [2:36:43<58:50, 12.70it/s][A
 63%|████████████████████▎           | 77478/122310 [2:36:44<1:05:26, 11.42it/s][A
 63%|████████████████████▎           | 77484/122310 [2:36:45<1:09:38, 10.73it/s][A
 63%|████████████████████▎           | 77496/122310 [2:36:46<1:00:31, 12.34it/s][A
 63%|████████████████████▎           | 77504/122310 [2:36:47<1:02:03, 12.03it/s][A
 63%|████████████████████▎           | 77513/122310 [2:36:47<1:01:11, 12.20it/s][A
 63%|█████████████████████▌            | 77523/122310 [2:36:48<58:56, 12.66

step: 9380, loss: 86.17032899926667, epoch: 0



 63%|█████████████████████▌            | 77592/122310 [2:36:54<53:16, 13.99it/s][A
 63%|█████████████████████▌            | 77600/122310 [2:36:54<56:27, 13.20it/s][A
 63%|████████████████████▎           | 77606/122310 [2:36:55<1:03:15, 11.78it/s][A
 63%|████████████████████▎           | 77612/122310 [2:36:56<1:09:05, 10.78it/s][A
 63%|████████████████████▎           | 77620/122310 [2:36:57<1:08:10, 10.93it/s][A
 63%|█████████████████████▌            | 77632/122310 [2:36:57<58:41, 12.69it/s][A
 63%|████████████████████▎           | 77636/122310 [2:36:58<1:10:53, 10.50it/s][A
 63%|████████████████████▎           | 77643/122310 [2:36:59<1:12:12, 10.31it/s][A
 63%|████████████████████▎           | 77649/122310 [2:36:59<1:16:13,  9.76it/s][A
 63%|████████████████████▎           | 77654/122310 [2:37:00<1:23:00,  8.97it/s][A
 63%|████████████████████▎           | 77658/122310 [2:37:01<1:33:26,  7.96it/s][A
 63%|████████████████████▎           | 77662/122310 [2:37:02<1:42:33,  7.26

step: 9400, loss: 84.32199490717831, epoch: 0



 64%|█████████████████████▌            | 77764/122310 [2:37:08<50:55, 14.58it/s][A
 64%|█████████████████████▌            | 77772/122310 [2:37:09<54:32, 13.61it/s][A
 64%|████████████████████▎           | 77774/122310 [2:37:19<5:47:21,  2.14it/s][A
 64%|████████████████████▎           | 77776/122310 [2:37:19<5:38:42,  2.19it/s][A
 64%|████████████████████▎           | 77783/122310 [2:37:20<4:08:15,  2.99it/s][A
 64%|████████████████████▎           | 77785/122310 [2:37:21<4:09:51,  2.97it/s][A
 64%|████████████████████▎           | 77792/122310 [2:37:21<3:03:52,  4.04it/s][A

step: 9420, loss: 79.12354973321663, epoch: 0



 64%|████████████████████▎           | 77798/122310 [2:37:22<2:33:40,  4.83it/s][A
 64%|████████████████████▎           | 77811/122310 [2:37:23<1:37:27,  7.61it/s][A
 64%|████████████████████▎           | 77826/122310 [2:37:24<1:09:25, 10.68it/s][A
 64%|█████████████████████▋            | 77839/122310 [2:37:24<59:01, 12.56it/s][A
 64%|█████████████████████▋            | 77848/122310 [2:37:25<58:54, 12.58it/s][A
 64%|█████████████████████▋            | 77859/122310 [2:37:26<55:25, 13.37it/s][A
 64%|████████████████████▎           | 77865/122310 [2:37:26<1:01:43, 12.00it/s][A
 64%|█████████████████████▋            | 77880/122310 [2:37:27<50:44, 14.60it/s][A
 64%|█████████████████████▋            | 77894/122310 [2:37:28<46:09, 16.04it/s][A
 64%|█████████████████████▋            | 77903/122310 [2:37:28<49:09, 15.05it/s][A
 64%|█████████████████████▋            | 77912/122310 [2:37:29<51:36, 14.34it/s][A
 64%|█████████████████████▋            | 77927/122310 [2:37:30<45:17, 16.33

step: 9440, loss: 91.69755852734568, epoch: 0



 64%|█████████████████████▋            | 78019/122310 [2:37:36<56:18, 13.11it/s][A
 64%|█████████████████████▋            | 78029/122310 [2:37:37<54:59, 13.42it/s][A
 64%|████████████████████▍           | 78031/122310 [2:37:38<1:12:08, 10.23it/s][A
 64%|████████████████████▍           | 78040/122310 [2:37:38<1:07:15, 10.97it/s][A
 64%|████████████████████▍           | 78047/122310 [2:37:39<1:09:27, 10.62it/s][A
 64%|████████████████████▍           | 78053/122310 [2:37:40<1:13:59,  9.97it/s][A
 64%|█████████████████████▋            | 78069/122310 [2:37:41<53:49, 13.70it/s][A
 64%|████████████████████▍           | 78072/122310 [2:37:41<1:08:03, 10.83it/s][A
 64%|████████████████████▍           | 78074/122310 [2:37:42<1:27:06,  8.46it/s][A
 64%|████████████████████▍           | 78079/122310 [2:37:43<1:31:44,  8.04it/s][A
 64%|████████████████████▍           | 78083/122310 [2:37:43<1:40:49,  7.31it/s][A
 64%|████████████████████▍           | 78091/122310 [2:37:44<1:27:08,  8.46

step: 9460, loss: 96.57356129410255, epoch: 0



 64%|████████████████████▍           | 78154/122310 [2:37:51<1:00:28, 12.17it/s][A
 64%|████████████████████▍           | 78159/122310 [2:37:51<1:09:04, 10.65it/s][A
 64%|█████████████████████▋            | 78174/122310 [2:37:52<53:27, 13.76it/s][A
 64%|████████████████████▍           | 78177/122310 [2:37:53<1:07:23, 10.92it/s][A
 64%|█████████████████████▋            | 78191/122310 [2:37:53<54:18, 13.54it/s][A
 64%|█████████████████████▋            | 78207/122310 [2:37:54<45:23, 16.20it/s][A
 64%|█████████████████████▋            | 78214/122310 [2:37:55<51:18, 14.33it/s][A
 64%|█████████████████████▋            | 78224/122310 [2:37:56<51:36, 14.24it/s][A
 64%|█████████████████████▋            | 78234/122310 [2:37:56<51:54, 14.15it/s][A
 64%|█████████████████████▋            | 78240/122310 [2:37:57<59:11, 12.41it/s][A
 64%|█████████████████████▊            | 78257/122310 [2:37:58<46:35, 15.76it/s][A
 64%|█████████████████████▊            | 78266/122310 [2:37:58<49:17, 14.89

step: 9480, loss: 89.515244961011, epoch: 0



 64%|█████████████████████▊            | 78357/122310 [2:38:05<54:05, 13.54it/s][A
 64%|████████████████████▌           | 78363/122310 [2:38:06<1:00:49, 12.04it/s][A
 64%|████████████████████▌           | 78369/122310 [2:38:06<1:06:44, 10.97it/s][A
 64%|████████████████████▌           | 78379/122310 [2:38:07<1:01:32, 11.90it/s][A
 64%|█████████████████████▊            | 78390/122310 [2:38:08<56:29, 12.96it/s][A
 64%|████████████████████▌           | 78396/122310 [2:38:08<1:03:31, 11.52it/s][A
 64%|████████████████████▌           | 78401/122310 [2:38:09<1:11:57, 10.17it/s][A
 64%|████████████████████▌           | 78413/122310 [2:38:10<1:00:25, 12.11it/s][A
 64%|████████████████████▌           | 78419/122310 [2:38:11<1:06:32, 10.99it/s][A
 64%|████████████████████▌           | 78423/122310 [2:38:11<1:17:53,  9.39it/s][A
 64%|████████████████████▌           | 78424/122310 [2:38:12<1:44:28,  7.00it/s][A
 64%|████████████████████▌           | 78440/122310 [2:38:13<1:02:48, 11.64

step: 9500, loss: 89.99225449161263, epoch: 0



 64%|████████████████████▌           | 78492/122310 [2:38:19<1:11:25, 10.23it/s][A
 64%|████████████████████▌           | 78499/122310 [2:38:20<1:12:11, 10.11it/s][A
 64%|████████████████████▌           | 78508/122310 [2:38:21<1:07:18, 10.85it/s][A
 64%|████████████████████▌           | 78510/122310 [2:38:21<1:26:08,  8.47it/s][A
 64%|████████████████████▌           | 78519/122310 [2:38:22<1:15:23,  9.68it/s][A
 64%|████████████████████▌           | 78528/122310 [2:38:23<1:09:15, 10.53it/s][A
 64%|████████████████████▌           | 78534/122310 [2:38:23<1:13:24,  9.94it/s][A
 64%|█████████████████████▊            | 78548/122310 [2:38:24<56:45, 12.85it/s][A
 64%|█████████████████████▊            | 78558/122310 [2:38:25<55:13, 13.20it/s][A
 64%|████████████████████▌           | 78564/122310 [2:38:26<1:01:53, 11.78it/s][A
 64%|█████████████████████▊            | 78576/122310 [2:38:26<54:51, 13.29it/s][A
 64%|█████████████████████▊            | 78584/122310 [2:38:27<57:28, 12.68

step: 9520, loss: 77.99326943928477, epoch: 0



 64%|█████████████████████▊            | 78664/122310 [2:38:33<50:52, 14.30it/s][A
 64%|█████████████████████▊            | 78678/122310 [2:38:34<45:42, 15.91it/s][A
 64%|█████████████████████▊            | 78687/122310 [2:38:35<48:49, 14.89it/s][A
 64%|█████████████████████▉            | 78699/122310 [2:38:36<47:03, 15.44it/s][A
 64%|█████████████████████▉            | 78713/122310 [2:38:36<43:30, 16.70it/s][A
 64%|█████████████████████▉            | 78722/122310 [2:38:37<46:56, 15.48it/s][A
 64%|█████████████████████▉            | 78733/122310 [2:38:38<46:56, 15.47it/s][A
 64%|█████████████████████▉            | 78740/122310 [2:38:38<52:53, 13.73it/s][A
 64%|████████████████████▌           | 78742/122310 [2:38:39<1:09:08, 10.50it/s][A
 64%|████████████████████▌           | 78749/122310 [2:38:40<1:10:37, 10.28it/s][A
 64%|████████████████████▌           | 78755/122310 [2:38:41<1:14:37,  9.73it/s][A
 64%|█████████████████████▉            | 78772/122310 [2:38:41<51:57, 13.96

step: 9540, loss: 84.47885911858292, epoch: 0



 64%|█████████████████████▉            | 78847/122310 [2:38:48<58:44, 12.33it/s][A
 64%|█████████████████████▉            | 78858/122310 [2:38:48<54:29, 13.29it/s][A
 64%|█████████████████████▉            | 78871/122310 [2:38:49<48:55, 14.80it/s][A
 64%|█████████████████████▉            | 78881/122310 [2:38:50<49:47, 14.53it/s][A
 64%|█████████████████████▉            | 78888/122310 [2:38:51<54:59, 13.16it/s][A
 65%|█████████████████████▉            | 78895/122310 [2:38:51<59:29, 12.16it/s][A
 65%|████████████████████▋           | 78900/122310 [2:38:52<1:08:04, 10.63it/s][A
 65%|████████████████████▋           | 78906/122310 [2:38:53<1:12:37,  9.96it/s][A
 65%|████████████████████▋           | 78911/122310 [2:38:53<1:19:36,  9.09it/s][A
 65%|████████████████████▋           | 78918/122310 [2:38:54<1:17:53,  9.29it/s][A
 65%|████████████████████▋           | 78926/122310 [2:38:55<1:13:06,  9.89it/s][A
 65%|████████████████████▋           | 78936/122310 [2:38:56<1:05:13, 11.08

step: 9560, loss: 97.22250994816314, epoch: 0



 65%|████████████████████▋           | 79003/122310 [2:39:02<1:18:17,  9.22it/s][A
 65%|████████████████████▋           | 79011/122310 [2:39:03<1:15:22,  9.57it/s][A
 65%|████████████████████▋           | 79023/122310 [2:39:03<1:02:47, 11.49it/s][A
 65%|████████████████████▋           | 79028/122310 [2:39:04<1:12:17,  9.98it/s][A
 65%|█████████████████████▉            | 79042/122310 [2:39:05<57:28, 12.55it/s][A
 65%|████████████████████▋           | 79044/122310 [2:39:06<1:15:24,  9.56it/s][A
 65%|████████████████████▋           | 79049/122310 [2:39:07<1:22:36,  8.73it/s][A
 65%|████████████████████▋           | 79057/122310 [2:39:07<1:16:57,  9.37it/s][A
 65%|████████████████████▋           | 79063/122310 [2:39:08<1:20:03,  9.00it/s][A
 65%|████████████████████▋           | 79072/122310 [2:39:09<1:12:23,  9.95it/s][A
 65%|████████████████████▋           | 79081/122310 [2:39:09<1:07:50, 10.62it/s][A
 65%|████████████████████▋           | 79085/122310 [2:39:10<1:19:25,  9.07

step: 9580, loss: 87.76771824165066, epoch: 0



 65%|████████████████████▋           | 79149/122310 [2:39:17<1:19:09,  9.09it/s][A
 65%|████████████████████▋           | 79160/122310 [2:39:18<1:06:29, 10.82it/s][A
 65%|████████████████████▋           | 79162/122310 [2:39:18<1:25:53,  8.37it/s][A
 65%|████████████████████▋           | 79173/122310 [2:39:19<1:09:34, 10.33it/s][A
 65%|██████████████████████            | 79185/122310 [2:39:20<59:23, 12.10it/s][A
 65%|████████████████████▋           | 79192/122310 [2:39:21<1:03:25, 11.33it/s][A
 65%|██████████████████████            | 79212/122310 [2:39:21<44:48, 16.03it/s][A
 65%|██████████████████████            | 79217/122310 [2:39:22<54:12, 13.25it/s][A
 65%|██████████████████████            | 79225/122310 [2:39:23<57:14, 12.54it/s][A
 65%|██████████████████████            | 79234/122310 [2:39:23<57:54, 12.40it/s][A
 65%|████████████████████▋           | 79236/122310 [2:39:24<1:15:02,  9.57it/s][A
 65%|████████████████████▋           | 79239/122310 [2:39:25<1:30:49,  7.90

step: 9600, loss: 109.2865612122875, epoch: 0
sim1 and sim2 are 0.6304492226311668, 0.20968165322848986
cosine of pred and queen: 0.08470758848273383
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: korea
Actual: beijing:china::tokyo:japan, pred: asia
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: broke
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: debt
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: belou
Actual: maharastra:mumba


 65%|████████████████████▋           | 79298/122310 [2:39:42<1:04:36, 11.10it/s][A

Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: asia
Actual: mouse:squeak::elephant:trumpet, pred: mongrel
Actual: algeria:dinar::usa:dollar, pred: holyhead
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: france
Actual: europe:euro::japan:yen, pred: year
Actual: india:rupee::denmark:krone, pred: trackless
Actual: usa:dollar::nigeria:naira, pred: asia
Actual: switzerland:swiss::spain:spanish, pred: top
Actual: thailand:thai::india:indian, pred: emabssy
Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: italy
Actual: portugal:portuguese::slovakia:slovakian, pred: classicists
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croati


 65%|████████████████████           | 79302/122310 [2:40:47<40:29:36,  3.39s/it][A

Actual: india:rupee::denmark:krone, pred: trackless
Accuracy is 0.023668639053254437



 65%|████████████████████           | 79309/122310 [2:40:48<28:39:32,  2.40s/it][A
 65%|████████████████████           | 79313/122310 [2:40:48<23:25:23,  1.96s/it][A
 65%|████████████████████           | 79315/122310 [2:40:49<21:03:25,  1.76s/it][A
 65%|████████████████████           | 79323/122310 [2:40:50<12:47:06,  1.07s/it][A
 65%|████████████████████▊           | 79338/122310 [2:40:51<6:22:01,  1.87it/s][A
 65%|████████████████████▊           | 79348/122310 [2:40:53<5:23:08,  2.22it/s][A
 65%|████████████████████▊           | 79357/122310 [2:40:54<4:03:15,  2.94it/s][A
 65%|████████████████████▊           | 79366/122310 [2:40:55<3:07:44,  3.81it/s][A
 65%|████████████████████▊           | 79372/122310 [2:40:56<2:45:09,  4.33it/s][A
 65%|████████████████████▊           | 79378/122310 [2:40:56<2:26:00,  4.90it/s][A
 65%|████████████████████▊           | 79389/122310 [2:40:57<1:47:41,  6.64it/s][A
 65%|████████████████████▊           | 79392/122310 [2:40:58<1:56:33,  6.14

step: 9620, loss: 73.05705242132775, epoch: 0



 65%|████████████████████▊           | 79436/122310 [2:41:03<1:27:14,  8.19it/s][A
 65%|████████████████████▊           | 79447/122310 [2:41:04<1:12:17,  9.88it/s][A
 65%|████████████████████▊           | 79453/122310 [2:41:04<1:15:37,  9.44it/s][A
 65%|████████████████████▊           | 79461/122310 [2:41:05<1:12:16,  9.88it/s][A
 65%|████████████████████▊           | 79466/122310 [2:41:06<1:18:57,  9.04it/s][A
 65%|████████████████████▊           | 79472/122310 [2:41:07<1:21:03,  8.81it/s][A
 65%|████████████████████▊           | 79476/122310 [2:41:09<2:48:25,  4.24it/s][A
 65%|████████████████████▊           | 79483/122310 [2:41:10<2:16:55,  5.21it/s][A
 65%|████████████████████▊           | 79491/122310 [2:41:11<1:51:49,  6.38it/s][A
 65%|████████████████████▊           | 79503/122310 [2:41:12<1:22:13,  8.68it/s][A
 65%|████████████████████▊           | 79516/122310 [2:41:12<1:05:23, 10.91it/s][A
 65%|████████████████████▊           | 79524/122310 [2:41:13<1:05:08, 10.95

step: 9640, loss: 79.57284855726975, epoch: 0



 65%|██████████████████████            | 79579/122310 [2:41:17<53:01, 13.43it/s][A
 65%|██████████████████████            | 79586/122310 [2:41:17<57:47, 12.32it/s][A
 65%|████████████████████▊           | 79590/122310 [2:41:18<1:08:48, 10.35it/s][A
 65%|████████████████████▊           | 79598/122310 [2:41:19<1:07:50, 10.49it/s][A
 65%|████████████████████▊           | 79603/122310 [2:41:20<1:15:27,  9.43it/s][A
 65%|████████████████████▊           | 79614/122310 [2:41:21<1:23:06,  8.56it/s][A
 65%|████████████████████▊           | 79627/122310 [2:41:22<1:05:23, 10.88it/s][A
 65%|████████████████████▊           | 79631/122310 [2:41:23<1:15:07,  9.47it/s][A
 65%|████████████████████▊           | 79635/122310 [2:41:23<1:24:50,  8.38it/s][A
 65%|████████████████████▊           | 79641/122310 [2:41:24<1:25:20,  8.33it/s][A
 65%|████████████████████▊           | 79645/122310 [2:41:25<1:34:32,  7.52it/s][A
 65%|██████████████████████▏           | 79668/122310 [2:41:25<49:10, 14.45

step: 9660, loss: 83.28951972338687, epoch: 0



 65%|██████████████████████▏           | 79745/122310 [2:41:31<44:05, 16.09it/s][A
 65%|██████████████████████▏           | 79753/122310 [2:41:32<48:47, 14.54it/s][A
 65%|████████████████████▊           | 79756/122310 [2:41:33<1:02:43, 11.31it/s][A
 65%|████████████████████▊           | 79759/122310 [2:41:33<1:16:51,  9.23it/s][A
 65%|████████████████████▊           | 79766/122310 [2:41:34<1:16:02,  9.33it/s][A
 65%|████████████████████▊           | 79768/122310 [2:41:35<1:36:30,  7.35it/s][A
 65%|████████████████████▊           | 79774/122310 [2:41:36<1:33:30,  7.58it/s][A
 65%|████████████████████▊           | 79782/122310 [2:41:36<1:22:31,  8.59it/s][A
 65%|████████████████████▉           | 79789/122310 [2:41:37<1:19:44,  8.89it/s][A
 65%|████████████████████▉           | 79797/122310 [2:41:38<1:14:31,  9.51it/s][A
 65%|████████████████████▉           | 79807/122310 [2:41:39<1:05:47, 10.77it/s][A
 65%|████████████████████▉           | 79814/122310 [2:41:39<1:08:03, 10.41

step: 9680, loss: 79.23323843557566, epoch: 0



 65%|████████████████████▉           | 79883/122310 [2:41:46<1:03:40, 11.11it/s][A
 65%|████████████████████▉           | 79885/122310 [2:41:47<1:22:15,  8.60it/s][A
 65%|████████████████████▉           | 79891/122310 [2:41:47<1:23:31,  8.46it/s][A
 65%|████████████████████▉           | 79894/122310 [2:41:48<1:38:24,  7.18it/s][A
 65%|████████████████████▉           | 79897/122310 [2:41:49<1:52:46,  6.27it/s][A
 65%|████████████████████▉           | 79914/122310 [2:41:50<1:02:10, 11.36it/s][A
 65%|██████████████████████▏           | 79927/122310 [2:41:50<53:32, 13.19it/s][A
 65%|██████████████████████▏           | 79935/122310 [2:41:51<56:06, 12.59it/s][A
 65%|██████████████████████▏           | 79943/122310 [2:41:52<58:21, 12.10it/s][A
 65%|████████████████████▉           | 79949/122310 [2:41:53<1:04:35, 10.93it/s][A
 65%|████████████████████▉           | 79958/122310 [2:41:53<1:02:19, 11.32it/s][A
 65%|████████████████████▉           | 79964/122310 [2:41:54<1:07:55, 10.39

step: 9700, loss: 95.20225755271899, epoch: 0



 65%|████████████████████▉           | 80028/122310 [2:42:01<1:25:11,  8.27it/s][A
 65%|████████████████████▉           | 80039/122310 [2:42:01<1:08:21, 10.31it/s][A
 65%|████████████████████▉           | 80050/122310 [2:42:02<1:00:23, 11.66it/s][A
 65%|████████████████████▉           | 80053/122310 [2:42:03<1:14:34,  9.44it/s][A
 65%|████████████████████▉           | 80061/122310 [2:42:03<1:11:11,  9.89it/s][A
 65%|████████████████████▉           | 80070/122310 [2:42:04<1:06:16, 10.62it/s][A
 65%|████████████████████▉           | 80078/122310 [2:42:05<1:05:52, 10.68it/s][A
 65%|████████████████████▉           | 80083/122310 [2:42:06<1:13:26,  9.58it/s][A
 65%|████████████████████▉           | 80091/122310 [2:42:06<1:10:28,  9.98it/s][A
 65%|████████████████████▉           | 80100/122310 [2:42:07<1:05:58, 10.66it/s][A
 65%|████████████████████▉           | 80103/122310 [2:42:08<1:20:53,  8.70it/s][A
 65%|████████████████████▉           | 80110/122310 [2:42:09<1:18:32,  8.96

step: 9720, loss: 85.46553219895031, epoch: 0



 66%|████████████████████▉           | 80193/122310 [2:42:15<1:22:08,  8.55it/s][A
 66%|████████████████████▉           | 80206/122310 [2:42:16<1:02:46, 11.18it/s][A
 66%|████████████████████▉           | 80211/122310 [2:42:17<1:11:27,  9.82it/s][A
 66%|████████████████████▉           | 80216/122310 [2:42:18<1:19:28,  8.83it/s][A
 66%|████████████████████▉           | 80222/122310 [2:42:18<1:21:49,  8.57it/s][A
 66%|████████████████████▉           | 80229/122310 [2:42:19<1:19:58,  8.77it/s][A
 66%|██████████████████████▎           | 80245/122310 [2:42:20<55:58, 12.52it/s][A
 66%|██████████████████████▎           | 80254/122310 [2:42:21<56:42, 12.36it/s][A
 66%|██████████████████████▎           | 80262/122310 [2:42:21<59:16, 11.82it/s][A
 66%|██████████████████████▎           | 80277/122310 [2:42:22<49:26, 14.17it/s][A
 66%|██████████████████████▎           | 80287/122310 [2:42:23<50:25, 13.89it/s][A
 66%|█████████████████████           | 80292/122310 [2:42:24<1:00:02, 11.66

step: 9740, loss: 123.64682083855354, epoch: 0



 66%|█████████████████████           | 80369/122310 [2:42:30<1:17:39,  9.00it/s][A
 66%|█████████████████████           | 80379/122310 [2:42:31<1:07:49, 10.30it/s][A
 66%|█████████████████████           | 80389/122310 [2:42:32<1:02:25, 11.19it/s][A
 66%|█████████████████████           | 80391/122310 [2:42:33<1:21:03,  8.62it/s][A
 66%|█████████████████████           | 80400/122310 [2:42:33<1:12:35,  9.62it/s][A
 66%|█████████████████████           | 80405/122310 [2:42:34<1:20:02,  8.72it/s][A
 66%|██████████████████████▎           | 80421/122310 [2:42:35<55:57, 12.48it/s][A
 66%|██████████████████████▎           | 80435/122310 [2:42:36<48:42, 14.33it/s][A
 66%|██████████████████████▎           | 80458/122310 [2:42:36<36:31, 19.09it/s][A
 66%|██████████████████████▎           | 80466/122310 [2:42:37<41:57, 16.62it/s][A
 66%|██████████████████████▎           | 80470/122310 [2:42:38<52:51, 13.19it/s][A
 66%|█████████████████████           | 80473/122310 [2:42:39<1:06:47, 10.44

step: 9760, loss: 87.26338153204641, epoch: 0



 66%|█████████████████████           | 80544/122310 [2:42:46<1:11:11,  9.78it/s][A
 66%|██████████████████████▍           | 80558/122310 [2:42:46<56:00, 12.43it/s][A
 66%|██████████████████████▍           | 80569/122310 [2:42:47<53:17, 13.05it/s][A
 66%|█████████████████████           | 80571/122310 [2:42:48<1:10:07,  9.92it/s][A
 66%|█████████████████████           | 80578/122310 [2:42:50<1:54:23,  6.08it/s][A
 66%|█████████████████████           | 80594/122310 [2:42:51<1:14:54,  9.28it/s][A
 66%|█████████████████████           | 80602/122310 [2:42:52<1:12:26,  9.60it/s][A
 66%|██████████████████████▍           | 80616/122310 [2:42:52<58:45, 11.83it/s][A
 66%|██████████████████████▍           | 80630/122310 [2:42:53<51:09, 13.58it/s][A
 66%|██████████████████████▍           | 80639/122310 [2:42:54<52:54, 13.13it/s][A
 66%|██████████████████████▍           | 80649/122310 [2:42:55<52:37, 13.19it/s][A
 66%|██████████████████████▍           | 80655/122310 [2:42:55<59:19, 11.70

step: 9780, loss: 86.19277853977718, epoch: 0



 66%|██████████████████████▍           | 80714/122310 [2:43:01<56:46, 12.21it/s][A
 66%|█████████████████████           | 80716/122310 [2:43:01<1:13:53,  9.38it/s][A
 66%|█████████████████████           | 80721/122310 [2:43:02<1:21:02,  8.55it/s][A
 66%|█████████████████████           | 80724/122310 [2:43:03<1:35:25,  7.26it/s][A
 66%|█████████████████████           | 80731/122310 [2:43:04<1:27:08,  7.95it/s][A
 66%|█████████████████████           | 80738/122310 [2:43:04<1:22:08,  8.44it/s][A
 66%|█████████████████████           | 80743/122310 [2:43:05<1:26:54,  7.97it/s][A
 66%|█████████████████████▏          | 80750/122310 [2:43:06<1:21:54,  8.46it/s][A
 66%|█████████████████████▏          | 80759/122310 [2:43:06<1:12:00,  9.62it/s][A
 66%|██████████████████████▍           | 80772/122310 [2:43:07<57:25, 12.06it/s][A
 66%|█████████████████████▏          | 80776/122310 [2:43:08<1:09:01, 10.03it/s][A
 66%|█████████████████████▏          | 80778/122310 [2:43:09<1:27:34,  7.90

step: 9800, loss: 101.49914596745671, epoch: 0



 66%|██████████████████████▍           | 80859/122310 [2:43:15<55:42, 12.40it/s][A
 66%|██████████████████████▍           | 80866/122310 [2:43:16<59:37, 11.58it/s][A
 66%|█████████████████████▏          | 80871/122310 [2:43:17<1:07:50, 10.18it/s][A
 66%|██████████████████████▍           | 80884/122310 [2:43:17<55:21, 12.47it/s][A
 66%|█████████████████████▏          | 80890/122310 [2:43:18<1:01:49, 11.17it/s][A
 66%|██████████████████████▍           | 80901/122310 [2:43:19<55:56, 12.34it/s][A
 66%|██████████████████████▍           | 80914/122310 [2:43:20<49:17, 14.00it/s][A
 66%|██████████████████████▍           | 80928/122310 [2:43:20<44:20, 15.56it/s][A
 66%|██████████████████████▍           | 80937/122310 [2:43:21<47:29, 14.52it/s][A
 66%|██████████████████████▌           | 80942/122310 [2:43:22<56:12, 12.27it/s][A
 66%|██████████████████████▌           | 80953/122310 [2:43:23<52:50, 13.05it/s][A
 66%|██████████████████████▌           | 80964/122310 [2:43:23<50:23, 13.67

step: 9820, loss: 102.01993533060893, epoch: 0



 66%|██████████████████████▌           | 81043/122310 [2:43:30<53:18, 12.90it/s][A
 66%|██████████████████████▌           | 81054/122310 [2:43:31<50:41, 13.57it/s][A
 66%|██████████████████████▌           | 81064/122310 [2:43:31<50:30, 13.61it/s][A
 66%|██████████████████████▌           | 81071/122310 [2:43:32<55:29, 12.38it/s][A
 66%|█████████████████████▏          | 81073/122310 [2:43:33<1:12:14,  9.51it/s][A
 66%|█████████████████████▏          | 81081/122310 [2:43:34<1:09:06,  9.94it/s][A
 66%|██████████████████████▌           | 81092/122310 [2:43:34<59:52, 11.47it/s][A
 66%|█████████████████████▏          | 81097/122310 [2:43:35<1:08:02, 10.09it/s][A
 66%|██████████████████████▌           | 81112/122310 [2:43:36<52:04, 13.19it/s][A
 66%|██████████████████████▌           | 81124/122310 [2:43:36<48:28, 14.16it/s][A
 66%|██████████████████████▌           | 81135/122310 [2:43:37<47:32, 14.43it/s][A
 66%|██████████████████████▌           | 81141/122310 [2:43:38<54:40, 12.55

step: 9840, loss: 88.85819611160514, epoch: 0



 66%|█████████████████████▏          | 81195/122310 [2:43:45<2:13:48,  5.12it/s][A
 66%|█████████████████████▏          | 81202/122310 [2:43:46<1:49:40,  6.25it/s][A
 66%|█████████████████████▏          | 81206/122310 [2:43:47<1:53:32,  6.03it/s][A
 66%|█████████████████████▏          | 81213/122310 [2:43:47<1:37:46,  7.01it/s][A
 66%|█████████████████████▎          | 81224/122310 [2:43:48<1:14:03,  9.25it/s][A
 66%|█████████████████████▎          | 81233/122310 [2:43:49<1:07:44, 10.11it/s][A
 66%|█████████████████████▎          | 81237/122310 [2:43:50<1:18:12,  8.75it/s][A
 66%|█████████████████████▎          | 81247/122310 [2:43:50<1:07:02, 10.21it/s][A
 66%|█████████████████████▎          | 81251/122310 [2:43:51<1:17:53,  8.78it/s][A
 66%|█████████████████████▎          | 81259/122310 [2:43:52<1:12:28,  9.44it/s][A
 66%|█████████████████████▎          | 81268/122310 [2:43:53<1:06:33, 10.28it/s][A
 66%|█████████████████████▎          | 81275/122310 [2:43:53<1:07:57, 10.06

step: 9860, loss: 117.28774439876412, epoch: 0



 66%|█████████████████████▎          | 81315/122310 [2:43:59<2:05:15,  5.45it/s][A
 66%|█████████████████████▎          | 81318/122310 [2:44:00<2:11:40,  5.19it/s][A
 66%|█████████████████████▎          | 81334/122310 [2:44:01<1:15:37,  9.03it/s][A
 67%|█████████████████████▎          | 81343/122310 [2:44:01<1:09:05,  9.88it/s][A
 67%|█████████████████████▎          | 81350/122310 [2:44:02<1:09:35,  9.81it/s][A
 67%|██████████████████████▌           | 81366/122310 [2:44:03<52:07, 13.09it/s][A
 67%|██████████████████████▌           | 81376/122310 [2:44:04<51:26, 13.26it/s][A
 67%|██████████████████████▌           | 81388/122310 [2:44:04<48:07, 14.17it/s][A
 67%|██████████████████████▋           | 81393/122310 [2:44:05<56:36, 12.05it/s][A
 67%|█████████████████████▎          | 81400/122310 [2:44:06<1:00:14, 11.32it/s][A
 67%|█████████████████████▎          | 81407/122310 [2:44:06<1:03:09, 10.79it/s][A
 67%|██████████████████████▋           | 81424/122310 [2:44:07<47:23, 14.38

step: 9880, loss: 116.51379087009671, epoch: 0



 67%|█████████████████████▎          | 81485/122310 [2:44:14<1:18:19,  8.69it/s][A
 67%|█████████████████████▎          | 81496/122310 [2:44:15<1:05:45, 10.35it/s][A
 67%|█████████████████████▎          | 81501/122310 [2:44:15<1:12:13,  9.42it/s][A
 67%|██████████████████████▋           | 81514/122310 [2:44:16<57:50, 11.76it/s][A
 67%|██████████████████████▋           | 81523/122310 [2:44:17<57:03, 11.91it/s][A
 67%|██████████████████████▋           | 81535/122310 [2:44:17<51:29, 13.20it/s][A
 67%|██████████████████████▋           | 81542/122310 [2:44:18<56:00, 12.13it/s][A
 67%|█████████████████████▎          | 81547/122310 [2:44:19<1:04:09, 10.59it/s][A
 67%|█████████████████████▎          | 81551/122310 [2:44:20<1:15:57,  8.94it/s][A
 67%|█████████████████████▎          | 81559/122310 [2:44:20<1:12:00,  9.43it/s][A
 67%|█████████████████████▎          | 81569/122310 [2:44:21<1:04:15, 10.57it/s][A
 67%|█████████████████████▎          | 81573/122310 [2:44:22<1:14:25,  9.12

step: 9900, loss: 72.2862213832963, epoch: 0



 67%|██████████████████████▋           | 81643/122310 [2:44:28<56:52, 11.92it/s][A
 67%|██████████████████████▋           | 81654/122310 [2:44:29<52:20, 12.94it/s][A
 67%|██████████████████████▋           | 81661/122310 [2:44:30<56:32, 11.98it/s][A
 67%|██████████████████████▋           | 81668/122310 [2:44:31<59:45, 11.34it/s][A
 67%|█████████████████████▎          | 81676/122310 [2:44:31<1:00:10, 11.25it/s][A
 67%|█████████████████████▎          | 81681/122310 [2:44:32<1:08:21,  9.91it/s][A
 67%|█████████████████████▎          | 81691/122310 [2:44:33<1:00:58, 11.10it/s][A
 67%|██████████████████████▋           | 81700/122310 [2:44:33<58:45, 11.52it/s][A
 67%|██████████████████████▋           | 81709/122310 [2:44:34<57:11, 11.83it/s][A
 67%|█████████████████████▍          | 81713/122310 [2:44:35<1:07:55,  9.96it/s][A
 67%|██████████████████████▋           | 81724/122310 [2:44:36<58:33, 11.55it/s][A
 67%|██████████████████████▋           | 81732/122310 [2:44:36<59:31, 11.36

step: 9920, loss: 92.18718135482224, epoch: 0



 67%|█████████████████████▍          | 81801/122310 [2:44:43<1:00:09, 11.22it/s][A
 67%|█████████████████████▍          | 81803/122310 [2:44:44<1:14:53,  9.02it/s][A
 67%|██████████████████████▋           | 81817/122310 [2:44:44<56:49, 11.88it/s][A
 67%|██████████████████████▋           | 81824/122310 [2:44:45<59:45, 11.29it/s][A
 67%|██████████████████████▋           | 81833/122310 [2:44:46<58:02, 11.62it/s][A
 67%|█████████████████████▍          | 81835/122310 [2:44:46<1:14:35,  9.04it/s][A
 67%|█████████████████████▍          | 81839/122310 [2:44:47<1:24:03,  8.02it/s][A
 67%|█████████████████████▍          | 81848/122310 [2:44:48<1:12:11,  9.34it/s][A
 67%|██████████████████████▊           | 81861/122310 [2:44:49<56:34, 11.91it/s][A
 67%|██████████████████████▊           | 81869/122310 [2:44:49<57:42, 11.68it/s][A
 67%|█████████████████████▍          | 81875/122310 [2:44:50<1:03:01, 10.69it/s][A
 67%|██████████████████████▊           | 81888/122310 [2:44:51<52:21, 12.87

step: 9940, loss: 82.90937720719758, epoch: 0



 67%|██████████████████████▊           | 81955/122310 [2:44:57<52:25, 12.83it/s][A
 67%|██████████████████████▊           | 81963/122310 [2:44:58<54:30, 12.34it/s][A
 67%|██████████████████████▊           | 81972/122310 [2:44:59<54:11, 12.41it/s][A
 67%|██████████████████████▊           | 81984/122310 [2:44:59<49:03, 13.70it/s][A
 67%|██████████████████████▊           | 82001/122310 [2:45:00<40:19, 16.66it/s][A
 67%|██████████████████████▊           | 82003/122310 [2:45:01<53:57, 12.45it/s][A
 67%|██████████████████████▊           | 82018/122310 [2:45:02<44:55, 14.95it/s][A
 67%|██████████████████████▊           | 82029/122310 [2:45:02<44:42, 15.02it/s][A
 67%|██████████████████████▊           | 82033/122310 [2:45:03<55:04, 12.19it/s][A
 67%|██████████████████████▊           | 82041/122310 [2:45:04<56:46, 11.82it/s][A
 67%|█████████████████████▍          | 82046/122310 [2:45:04<1:05:08, 10.30it/s][A
 67%|██████████████████████▊           | 82059/122310 [2:45:05<54:35, 12.29

step: 9960, loss: 75.67924825206184, epoch: 0



 67%|█████████████████████▍          | 82120/122310 [2:45:12<1:08:18,  9.81it/s][A
 67%|█████████████████████▍          | 82130/122310 [2:45:13<1:00:38, 11.04it/s][A
 67%|██████████████████████▊           | 82140/122310 [2:45:13<56:18, 11.89it/s][A
 67%|██████████████████████▊           | 82151/122310 [2:45:14<51:54, 12.89it/s][A
 67%|██████████████████████▊           | 82166/122310 [2:45:15<43:56, 15.23it/s][A
 67%|██████████████████████▊           | 82180/122310 [2:45:16<40:33, 16.49it/s][A
 67%|██████████████████████▊           | 82185/122310 [2:45:16<49:02, 13.64it/s][A
 67%|██████████████████████▊           | 82195/122310 [2:45:17<48:44, 13.72it/s][A
 67%|██████████████████████▊           | 82203/122310 [2:45:18<51:45, 12.91it/s][A
 67%|██████████████████████▊           | 82209/122310 [2:45:18<58:00, 11.52it/s][A
 67%|██████████████████████▊           | 82220/122310 [2:45:19<52:53, 12.63it/s][A
 67%|██████████████████████▊           | 82228/122310 [2:45:20<54:51, 12.18

step: 9980, loss: 93.41937695288871, epoch: 0



 67%|█████████████████████▌          | 82288/122310 [2:45:26<1:27:25,  7.63it/s][A
 67%|█████████████████████▌          | 82299/122310 [2:45:27<1:08:39,  9.71it/s][A
 67%|█████████████████████▌          | 82306/122310 [2:45:28<1:08:35,  9.72it/s][A
 67%|█████████████████████▌          | 82309/122310 [2:45:29<1:21:47,  8.15it/s][A
 67%|█████████████████████▌          | 82321/122310 [2:45:29<1:02:51, 10.60it/s][A
 67%|██████████████████████▉           | 82332/122310 [2:45:30<55:47, 11.94it/s][A
 67%|██████████████████████▉           | 82341/122310 [2:45:31<55:05, 12.09it/s][A
 67%|██████████████████████▉           | 82356/122310 [2:45:31<45:27, 14.65it/s][A
 67%|██████████████████████▉           | 82370/122310 [2:45:32<41:37, 15.99it/s][A
 67%|██████████████████████▉           | 82375/122310 [2:45:33<49:54, 13.33it/s][A
 67%|██████████████████████▉           | 82389/122310 [2:45:34<43:55, 15.15it/s][A
 67%|██████████████████████▉           | 82402/122310 [2:45:34<41:32, 16.01

step: 10000, loss: 94.82919569548824, epoch: 0
sim1 and sim2 are 0.6243511241360442, 0.17167903256546202
cosine of pred and queen: 0.1513763411196059
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: korea
Actual: beijing:china::tokyo:japan, pred: asia
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: agreed
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: flight
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: country
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: belou
Actual: maharastra:

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: ever
Actual: islamabad:pakistan::oslo:norway, pred: state
Actual: grandfather:grandmother::father:mother, pred: son
Actual: grandpa:grandma::sons:daughters, pred: men
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: jethlah
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: cambrics
Actual: algeria:dinar::japan:yen, pred: holyhead
Actual: argentina:peso::japan:yen, pred: dollar
Actual: india:rupee::denmar


 67%|████████████████████▉          | 82455/122310 [2:46:55<37:46:06,  3.41s/it][A
 67%|████████████████████▉          | 82468/122310 [2:46:56<21:00:22,  1.90s/it][A
 67%|████████████████████▉          | 82483/122310 [2:46:56<12:16:14,  1.11s/it][A
 67%|█████████████████████▌          | 82492/122310 [2:46:57<9:13:12,  1.20it/s][A
 67%|█████████████████████▌          | 82499/122310 [2:46:58<7:21:54,  1.50it/s][A
 67%|█████████████████████▌          | 82510/122310 [2:46:58<5:06:10,  2.17it/s][A
 67%|█████████████████████▌          | 82522/122310 [2:46:59<3:33:40,  3.10it/s][A
 67%|█████████████████████▌          | 82529/122310 [2:47:00<3:00:54,  3.66it/s][A
 67%|█████████████████████▌          | 82540/122310 [2:47:01<2:14:40,  4.92it/s][A
 67%|█████████████████████▌          | 82550/122310 [2:47:01<1:48:19,  6.12it/s][A
 68%|█████████████████████▌          | 82560/122310 [2:47:02<1:29:38,  7.39it/s][A
 68%|█████████████████████▌          | 82575/122310 [2:47:03<1:06:49,  9.91

step: 10020, loss: 92.0723635142923, epoch: 0



 68%|██████████████████████▉           | 82677/122310 [2:47:10<52:14, 12.64it/s][A
 68%|██████████████████████▉           | 82684/122310 [2:47:11<55:56, 11.81it/s][A
 68%|█████████████████████▋          | 82690/122310 [2:47:11<1:01:19, 10.77it/s][A
 68%|█████████████████████▋          | 82698/122310 [2:47:12<1:00:12, 10.97it/s][A
 68%|██████████████████████▉           | 82707/122310 [2:47:13<57:52, 11.40it/s][A
 68%|█████████████████████▋          | 82710/122310 [2:47:17<2:56:16,  3.74it/s][A
 68%|█████████████████████▋          | 82720/122310 [2:47:18<2:05:41,  5.25it/s][A
 68%|█████████████████████▋          | 82731/122310 [2:47:18<1:34:10,  7.00it/s][A
 68%|█████████████████████▋          | 82735/122310 [2:47:19<1:38:13,  6.71it/s][A
 68%|█████████████████████▋          | 82744/122310 [2:47:20<1:22:07,  8.03it/s][A
 68%|█████████████████████▋          | 82756/122310 [2:47:21<1:05:01, 10.14it/s][A
 68%|███████████████████████           | 82769/122310 [2:47:21<54:01, 12.20

step: 10040, loss: 106.57597145813892, epoch: 0



 68%|███████████████████████           | 82805/122310 [2:47:24<54:42, 12.04it/s][A
 68%|█████████████████████▋          | 82809/122310 [2:47:25<1:04:45, 10.17it/s][A
 68%|█████████████████████▋          | 82818/122310 [2:47:26<1:00:32, 10.87it/s][A
 68%|███████████████████████           | 82829/122310 [2:47:26<54:02, 12.18it/s][A
 68%|███████████████████████           | 82836/122310 [2:47:27<57:24, 11.46it/s][A
 68%|███████████████████████           | 82844/122310 [2:47:28<57:52, 11.36it/s][A
 68%|█████████████████████▋          | 82850/122310 [2:47:28<1:02:31, 10.52it/s][A
 68%|███████████████████████           | 82870/122310 [2:47:29<41:51, 15.71it/s][A
 68%|███████████████████████           | 82880/122310 [2:47:30<43:33, 15.09it/s][A
 68%|███████████████████████           | 82888/122310 [2:47:31<47:12, 13.92it/s][A
 68%|█████████████████████▋          | 82890/122310 [2:47:31<1:02:09, 10.57it/s][A
 68%|███████████████████████           | 82901/122310 [2:47:32<54:37, 12.02

step: 10060, loss: 82.76949024777124, epoch: 0



 68%|█████████████████████▋          | 82970/122310 [2:47:39<1:10:57,  9.24it/s][A
 68%|█████████████████████▋          | 82973/122310 [2:47:39<1:24:15,  7.78it/s][A
 68%|█████████████████████▋          | 82982/122310 [2:47:40<1:11:01,  9.23it/s][A
 68%|█████████████████████▋          | 82992/122310 [2:47:41<1:01:42, 10.62it/s][A
 68%|███████████████████████           | 83008/122310 [2:47:41<46:15, 14.16it/s][A
 68%|███████████████████████           | 83018/122310 [2:47:42<46:27, 14.10it/s][A
 68%|███████████████████████           | 83025/122310 [2:47:43<51:19, 12.76it/s][A
 68%|███████████████████████           | 83036/122310 [2:47:44<48:24, 13.52it/s][A
 68%|█████████████████████▋          | 83039/122310 [2:47:44<1:00:50, 10.76it/s][A
 68%|█████████████████████▋          | 83046/122310 [2:47:45<1:02:31, 10.47it/s][A
 68%|█████████████████████▋          | 83051/122310 [2:47:46<1:09:28,  9.42it/s][A
 68%|█████████████████████▋          | 83052/122310 [2:47:46<1:33:20,  7.01

step: 10080, loss: 89.40217635442237, epoch: 0



 68%|███████████████████████           | 83126/122310 [2:47:53<59:57, 10.89it/s][A
 68%|█████████████████████▊          | 83133/122310 [2:47:54<1:02:00, 10.53it/s][A
 68%|█████████████████████▊          | 83135/122310 [2:47:54<1:19:13,  8.24it/s][A
 68%|█████████████████████▊          | 83139/122310 [2:47:55<1:27:57,  7.42it/s][A
 68%|█████████████████████▊          | 83141/122310 [2:47:56<1:47:51,  6.05it/s][A
 68%|█████████████████████▊          | 83150/122310 [2:47:57<1:22:10,  7.94it/s][A
 68%|█████████████████████▊          | 83159/122310 [2:47:57<1:10:03,  9.31it/s][A
 68%|█████████████████████▊          | 83168/122310 [2:47:58<1:03:17, 10.31it/s][A
 68%|███████████████████████           | 83179/122310 [2:47:59<55:14, 11.81it/s][A
 68%|███████████████████████           | 83187/122310 [2:47:59<56:41, 11.50it/s][A
 68%|█████████████████████▊          | 83189/122310 [2:48:00<1:12:52,  8.95it/s][A
 68%|███████████████████████▏          | 83201/122310 [2:48:01<58:02, 11.23

step: 10100, loss: 87.1519442423357, epoch: 0



 68%|███████████████████████▏          | 83277/122310 [2:48:07<59:53, 10.86it/s][A
 68%|█████████████████████▊          | 83284/122310 [2:48:08<1:01:15, 10.62it/s][A
 68%|███████████████████████▏          | 83294/122310 [2:48:09<56:36, 11.49it/s][A
 68%|█████████████████████▊          | 83299/122310 [2:48:09<1:03:20, 10.27it/s][A
 68%|█████████████████████▊          | 83307/122310 [2:48:10<1:01:49, 10.51it/s][A
 68%|███████████████████████▏          | 83322/122310 [2:48:11<48:13, 13.48it/s][A
 68%|███████████████████████▏          | 83341/122310 [2:48:12<37:44, 17.21it/s][A
 68%|███████████████████████▏          | 83351/122310 [2:48:12<39:57, 16.25it/s][A
 68%|███████████████████████▏          | 83358/122310 [2:48:13<45:34, 14.25it/s][A
 68%|███████████████████████▏          | 83366/122310 [2:48:14<48:44, 13.32it/s][A
 68%|███████████████████████▏          | 83373/122310 [2:48:14<52:58, 12.25it/s][A
 68%|███████████████████████▏          | 83381/122310 [2:48:15<54:32, 11.90

step: 10120, loss: 82.46885154728844, epoch: 0



 68%|█████████████████████▊          | 83435/122310 [2:48:22<1:12:02,  8.99it/s][A
 68%|█████████████████████▊          | 83440/122310 [2:48:22<1:17:13,  8.39it/s][A
 68%|█████████████████████▊          | 83446/122310 [2:48:23<1:17:11,  8.39it/s][A
 68%|███████████████████████▏          | 83459/122310 [2:48:24<57:24, 11.28it/s][A
 68%|███████████████████████▏          | 83470/122310 [2:48:25<51:49, 12.49it/s][A
 68%|███████████████████████▏          | 83487/122310 [2:48:25<40:55, 15.81it/s][A
 68%|███████████████████████▏          | 83501/122310 [2:48:26<38:12, 16.93it/s][A
 68%|███████████████████████▏          | 83507/122310 [2:48:27<45:10, 14.31it/s][A
 68%|███████████████████████▏          | 83512/122310 [2:48:27<53:15, 12.14it/s][A
 68%|███████████████████████▏          | 83520/122310 [2:48:28<54:41, 11.82it/s][A
 68%|█████████████████████▊          | 83525/122310 [2:48:29<1:02:15, 10.38it/s][A
 68%|█████████████████████▊          | 83532/122310 [2:48:30<1:03:27, 10.19

step: 10140, loss: 78.19228436255955, epoch: 0



 68%|███████████████████████▏          | 83610/122310 [2:48:36<43:24, 14.86it/s][A
 68%|███████████████████████▏          | 83619/122310 [2:48:37<45:31, 14.17it/s][A
 68%|███████████████████████▏          | 83624/122310 [2:48:38<53:39, 12.02it/s][A
 68%|███████████████████████▏          | 83633/122310 [2:48:38<53:02, 12.15it/s][A
 68%|███████████████████████▎          | 83642/122310 [2:48:39<52:31, 12.27it/s][A
 68%|███████████████████████▎          | 83652/122310 [2:48:40<50:32, 12.75it/s][A
 68%|███████████████████████▎          | 83662/122310 [2:48:40<49:22, 13.05it/s][A
 68%|███████████████████████▎          | 83666/122310 [2:48:41<59:40, 10.79it/s][A
 68%|█████████████████████▉          | 83671/122310 [2:48:42<1:06:34,  9.67it/s][A
 68%|███████████████████████▎          | 83681/122310 [2:48:43<58:50, 10.94it/s][A
 68%|███████████████████████▎          | 83690/122310 [2:48:43<56:21, 11.42it/s][A
 68%|███████████████████████▎          | 83703/122310 [2:48:44<48:18, 13.32

step: 10160, loss: 90.53989476205108, epoch: 0



 69%|███████████████████████▎          | 83788/122310 [2:48:50<50:37, 12.68it/s][A
 69%|███████████████████████▎          | 83803/122310 [2:48:51<42:22, 15.15it/s][A
 69%|███████████████████████▎          | 83815/122310 [2:48:52<41:15, 15.55it/s][A
 69%|███████████████████████▎          | 83820/122310 [2:48:53<49:14, 13.03it/s][A
 69%|███████████████████████▎          | 83829/122310 [2:48:53<49:49, 12.87it/s][A
 69%|█████████████████████▉          | 83832/122310 [2:48:54<1:02:45, 10.22it/s][A
 69%|█████████████████████▉          | 83837/122310 [2:48:55<1:09:26,  9.23it/s][A
 69%|█████████████████████▉          | 83843/122310 [2:48:56<1:11:21,  8.98it/s][A
 69%|█████████████████████▉          | 83853/122310 [2:48:56<1:01:07, 10.48it/s][A
 69%|███████████████████████▎          | 83864/122310 [2:48:57<53:41, 11.93it/s][A
 69%|███████████████████████▎          | 83879/122310 [2:48:58<43:55, 14.58it/s][A
 69%|███████████████████████▎          | 83885/122310 [2:48:58<50:16, 12.74

step: 10180, loss: 86.42538117415232, epoch: 0



 69%|███████████████████████▎          | 83967/122310 [2:49:05<56:54, 11.23it/s][A
 69%|█████████████████████▉          | 83973/122310 [2:49:06<1:01:33, 10.38it/s][A
 69%|█████████████████████▉          | 83981/122310 [2:49:06<1:00:16, 10.60it/s][A
 69%|███████████████████████▎          | 83992/122310 [2:49:07<53:06, 12.02it/s][A
 69%|███████████████████████▎          | 84005/122310 [2:49:08<46:09, 13.83it/s][A
 69%|███████████████████████▎          | 84012/122310 [2:49:08<50:51, 12.55it/s][A
 69%|█████████████████████▉          | 84015/122310 [2:49:09<1:03:30, 10.05it/s][A
 69%|███████████████████████▎          | 84027/122310 [2:49:10<53:11, 12.00it/s][A
 69%|███████████████████████▎          | 84035/122310 [2:49:11<54:12, 11.77it/s][A
 69%|███████████████████████▎          | 84042/122310 [2:49:11<57:13, 11.15it/s][A
 69%|███████████████████████▎          | 84053/122310 [2:49:12<51:31, 12.38it/s][A
 69%|███████████████████████▎          | 84061/122310 [2:49:13<53:16, 11.97

step: 10200, loss: 113.00564214241375, epoch: 0



 69%|███████████████████████▍          | 84129/122310 [2:49:19<56:26, 11.27it/s][A
 69%|██████████████████████          | 84134/122310 [2:49:20<1:03:39,  9.99it/s][A
 69%|██████████████████████          | 84141/122310 [2:49:21<1:04:17,  9.90it/s][A
 69%|███████████████████████▍          | 84151/122310 [2:49:21<57:18, 11.10it/s][A
 69%|███████████████████████▍          | 84160/122310 [2:49:22<55:14, 11.51it/s][A
 69%|███████████████████████▍          | 84167/122310 [2:49:23<58:07, 10.94it/s][A
 69%|███████████████████████▍          | 84177/122310 [2:49:24<53:47, 11.81it/s][A
 69%|███████████████████████▍          | 84185/122310 [2:49:24<55:00, 11.55it/s][A
 69%|███████████████████████▍          | 84195/122310 [2:49:25<51:41, 12.29it/s][A
 69%|███████████████████████▍          | 84203/122310 [2:49:26<53:17, 11.92it/s][A
 69%|███████████████████████▍          | 84221/122310 [2:49:27<40:25, 15.70it/s][A
 69%|███████████████████████▍          | 84224/122310 [2:49:27<51:43, 12.27

step: 10220, loss: 72.37454881058856, epoch: 0



 69%|███████████████████████▍          | 84314/122310 [2:49:34<47:40, 13.28it/s][A
 69%|███████████████████████▍          | 84325/122310 [2:49:34<45:35, 13.89it/s][A
 69%|███████████████████████▍          | 84336/122310 [2:49:35<44:25, 14.24it/s][A
 69%|███████████████████████▍          | 84341/122310 [2:49:36<52:40, 12.01it/s][A
 69%|███████████████████████▍          | 84346/122310 [2:49:37<59:59, 10.55it/s][A
 69%|███████████████████████▍          | 84354/122310 [2:49:37<59:19, 10.66it/s][A
 69%|███████████████████████▍          | 84373/122310 [2:49:38<41:15, 15.32it/s][A
 69%|███████████████████████▍          | 84377/122310 [2:49:39<50:50, 12.43it/s][A
 69%|███████████████████████▍          | 84386/122310 [2:49:40<50:54, 12.41it/s][A
 69%|███████████████████████▍          | 84400/122310 [2:49:40<43:28, 14.54it/s][A
 69%|███████████████████████▍          | 84415/122310 [2:49:41<38:33, 16.38it/s][A
 69%|███████████████████████▍          | 84422/122310 [2:49:42<43:57, 14.36

step: 10240, loss: 82.02821206344845, epoch: 0



 69%|██████████████████████          | 84481/122310 [2:49:48<1:10:17,  8.97it/s][A
 69%|███████████████████████▍          | 84492/122310 [2:49:49<58:08, 10.84it/s][A
 69%|███████████████████████▍          | 84504/122310 [2:49:50<50:06, 12.57it/s][A
 69%|███████████████████████▍          | 84512/122310 [2:49:50<51:57, 12.13it/s][A
 69%|███████████████████████▍          | 84520/122310 [2:49:51<53:12, 11.84it/s][A
 69%|███████████████████████▍          | 84533/122310 [2:49:52<46:07, 13.65it/s][A
 69%|███████████████████████▍          | 84537/122310 [2:49:53<55:52, 11.27it/s][A
 69%|███████████████████████▌          | 84551/122310 [2:49:53<46:00, 13.68it/s][A
 69%|███████████████████████▌          | 84560/122310 [2:49:54<47:08, 13.35it/s][A
 69%|███████████████████████▌          | 84568/122310 [2:49:55<49:39, 12.67it/s][A
 69%|███████████████████████▌          | 84581/122310 [2:49:55<44:03, 14.27it/s][A
 69%|███████████████████████▌          | 84599/122310 [2:49:56<35:59, 17.46

step: 10260, loss: 84.98389365794777, epoch: 0



 69%|███████████████████████▌          | 84675/122310 [2:50:03<50:05, 12.52it/s][A
 69%|███████████████████████▌          | 84682/122310 [2:50:03<53:33, 11.71it/s][A
 69%|███████████████████████▌          | 84703/122310 [2:50:04<37:04, 16.91it/s][A
 69%|███████████████████████▌          | 84712/122310 [2:50:05<40:08, 15.61it/s][A
 69%|███████████████████████▌          | 84717/122310 [2:50:06<48:08, 13.02it/s][A
 69%|███████████████████████▌          | 84723/122310 [2:50:06<54:04, 11.59it/s][A
 69%|██████████████████████▏         | 84727/122310 [2:50:07<1:04:14,  9.75it/s][A
 69%|███████████████████████▌          | 84737/122310 [2:50:08<57:05, 10.97it/s][A
 69%|██████████████████████▏         | 84742/122310 [2:50:08<1:04:24,  9.72it/s][A
 69%|██████████████████████▏         | 84749/122310 [2:50:09<1:05:48,  9.51it/s][A
 69%|██████████████████████▏         | 84757/122310 [2:50:10<1:03:58,  9.78it/s][A
 69%|███████████████████████▌          | 84770/122310 [2:50:11<52:22, 11.95

step: 10280, loss: 98.65477107579329, epoch: 0



 69%|███████████████████████▌          | 84846/122310 [2:50:18<53:12, 11.73it/s][A
 69%|██████████████████████▏         | 84850/122310 [2:50:18<1:03:56,  9.76it/s][A
 69%|██████████████████████▏         | 84861/122310 [2:50:20<1:12:33,  8.60it/s][A
 69%|██████████████████████▏         | 84867/122310 [2:50:21<1:14:07,  8.42it/s][A
 69%|███████████████████████▌          | 84882/122310 [2:50:22<55:02, 11.33it/s][A
 69%|███████████████████████▌          | 84897/122310 [2:50:22<45:56, 13.57it/s][A
 69%|███████████████████████▌          | 84905/122310 [2:50:23<49:11, 12.67it/s][A
 69%|███████████████████████▌          | 84910/122310 [2:50:24<57:07, 10.91it/s][A
 69%|██████████████████████▏         | 84914/122310 [2:50:25<1:07:11,  9.27it/s][A
 69%|██████████████████████▏         | 84924/122310 [2:50:25<1:00:01, 10.38it/s][A
 69%|██████████████████████▏         | 84926/122310 [2:50:26<1:17:20,  8.06it/s][A
 69%|██████████████████████▏         | 84931/122310 [2:50:27<1:21:36,  7.63

step: 10300, loss: 96.78442081033751, epoch: 0



 69%|███████████████████████▋          | 85002/122310 [2:50:33<47:43, 13.03it/s][A
 70%|███████████████████████▋          | 85007/122310 [2:50:34<56:04, 11.09it/s][A
 70%|███████████████████████▋          | 85015/122310 [2:50:35<57:12, 10.86it/s][A
 70%|██████████████████████▏         | 85017/122310 [2:50:35<1:13:35,  8.45it/s][A
 70%|██████████████████████▏         | 85025/122310 [2:50:36<1:08:35,  9.06it/s][A
 70%|██████████████████████▏         | 85033/122310 [2:50:37<1:05:27,  9.49it/s][A
 70%|███████████████████████▋          | 85044/122310 [2:50:38<56:35, 10.97it/s][A
 70%|███████████████████████▋          | 85054/122310 [2:50:38<52:49, 11.76it/s][A
 70%|███████████████████████▋          | 85063/122310 [2:50:39<52:04, 11.92it/s][A
 70%|███████████████████████▋          | 85072/122310 [2:50:40<51:21, 12.08it/s][A
 70%|███████████████████████▋          | 85083/122310 [2:50:40<47:35, 13.04it/s][A
 70%|███████████████████████▋          | 85089/122310 [2:50:41<53:19, 11.63

step: 10320, loss: 88.51949221805604, epoch: 0



 70%|███████████████████████▋          | 85173/122310 [2:50:48<51:11, 12.09it/s][A
 70%|███████████████████████▋          | 85179/122310 [2:50:48<56:20, 10.98it/s][A
 70%|██████████████████████▎         | 85182/122310 [2:50:49<1:09:16,  8.93it/s][A
 70%|██████████████████████▎         | 85186/122310 [2:50:50<1:17:49,  7.95it/s][A
 70%|███████████████████████▋          | 85198/122310 [2:50:51<58:30, 10.57it/s][A
 70%|███████████████████████▋          | 85220/122310 [2:50:51<37:20, 16.55it/s][A
 70%|███████████████████████▋          | 85223/122310 [2:50:52<48:10, 12.83it/s][A
 70%|███████████████████████▋          | 85234/122310 [2:50:53<45:31, 13.57it/s][A
 70%|███████████████████████▋          | 85243/122310 [2:50:53<46:35, 13.26it/s][A
 70%|███████████████████████▋          | 85253/122310 [2:50:54<45:59, 13.43it/s][A
 70%|███████████████████████▋          | 85267/122310 [2:50:55<40:33, 15.22it/s][A
 70%|███████████████████████▋          | 85271/122310 [2:50:56<50:30, 12.22

step: 10340, loss: 79.72139265974958, epoch: 0



 70%|██████████████████████▎         | 85324/122310 [2:51:02<1:14:32,  8.27it/s][A
 70%|██████████████████████▎         | 85333/122310 [2:51:03<1:04:29,  9.56it/s][A
 70%|███████████████████████▋          | 85343/122310 [2:51:04<56:47, 10.85it/s][A
 70%|███████████████████████▋          | 85357/122310 [2:51:04<45:59, 13.39it/s][A
 70%|███████████████████████▋          | 85366/122310 [2:51:05<46:49, 13.15it/s][A
 70%|███████████████████████▋          | 85373/122310 [2:51:06<50:56, 12.08it/s][A
 70%|██████████████████████▎         | 85377/122310 [2:51:06<1:00:44, 10.13it/s][A
 70%|███████████████████████▋          | 85391/122310 [2:51:07<47:48, 12.87it/s][A
 70%|███████████████████████▋          | 85396/122310 [2:51:08<55:11, 11.15it/s][A
 70%|███████████████████████▋          | 85404/122310 [2:51:09<55:16, 11.13it/s][A
 70%|███████████████████████▋          | 85417/122310 [2:51:09<46:33, 13.21it/s][A
 70%|███████████████████████▋          | 85429/122310 [2:51:10<43:16, 14.21

step: 10360, loss: 78.462597519499, epoch: 0



 70%|███████████████████████▊          | 85511/122310 [2:51:17<50:57, 12.03it/s][A
 70%|██████████████████████▎         | 85515/122310 [2:51:17<1:00:31, 10.13it/s][A
 70%|███████████████████████▊          | 85527/122310 [2:51:18<50:37, 12.11it/s][A
 70%|███████████████████████▊          | 85539/122310 [2:51:19<45:29, 13.47it/s][A
 70%|███████████████████████▊          | 85544/122310 [2:51:19<53:11, 11.52it/s][A
 70%|███████████████████████▊          | 85553/122310 [2:51:20<51:51, 11.81it/s][A
 70%|███████████████████████▊          | 85561/122310 [2:51:21<52:50, 11.59it/s][A
 70%|███████████████████████▊          | 85574/122310 [2:51:22<45:18, 13.51it/s][A
 70%|███████████████████████▊          | 85583/122310 [2:51:22<46:19, 13.21it/s][A
 70%|███████████████████████▊          | 85596/122310 [2:51:23<41:43, 14.66it/s][A
 70%|███████████████████████▊          | 85606/122310 [2:51:24<42:25, 14.42it/s][A
 70%|███████████████████████▊          | 85613/122310 [2:51:25<47:06, 12.98

step: 10380, loss: 87.15746284042379, epoch: 0



 70%|███████████████████████▊          | 85689/122310 [2:51:31<58:38, 10.41it/s][A
 70%|███████████████████████▊          | 85697/122310 [2:51:32<57:12, 10.67it/s][A
 70%|███████████████████████▊          | 85706/122310 [2:51:32<54:28, 11.20it/s][A
 70%|███████████████████████▊          | 85717/122310 [2:51:33<49:08, 12.41it/s][A
 70%|███████████████████████▊          | 85728/122310 [2:51:34<45:55, 13.28it/s][A
 70%|███████████████████████▊          | 85735/122310 [2:51:35<50:07, 12.16it/s][A
 70%|███████████████████████▊          | 85742/122310 [2:51:35<53:15, 11.44it/s][A
 70%|███████████████████████▊          | 85753/122310 [2:51:36<48:23, 12.59it/s][A
 70%|███████████████████████▊          | 85764/122310 [2:51:37<45:34, 13.36it/s][A
 70%|███████████████████████▊          | 85769/122310 [2:51:38<53:26, 11.39it/s][A
 70%|███████████████████████▊          | 85778/122310 [2:51:38<52:03, 11.70it/s][A
 70%|███████████████████████▊          | 85788/122310 [2:51:39<49:12, 12.37

step: 10400, loss: 86.95775321094465, epoch: 0
sim1 and sim2 are 0.6428732689081716, 0.13523957670705786
cosine of pred and queen: 0.052481927312186455
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: korea
Actual: beijing:china::tokyo:japan, pred: asia
Actual: berlin:germany::rome:italy, pred: europe
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: flight
Actual: lisbon:portugal::riga:latvia, pred: eurozone
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: debt
Actual: nigeria:africa::france:europe, pred: country
Actual: kenya:africa::netherlands:europe, pred: country
Actual: mumbai:asia::nairobi:africa, pred: breather
Actual: maharastr


 70%|███████████████████████▊          | 85843/122310 [2:52:02<48:23, 12.56it/s][A

Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: country
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: prime
Actual: denmark:danish::germany:german, pred: debar
Accuracy is 0.044444444444444446
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: misrule
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: day
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: articulate
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: thamer
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes, pre


 70%|█████████████████████▊         | 85853/122310 [2:53:03<26:18:39,  2.60s/it][A

Actual: india:rupee::denmark:krone, pred: trackless
Accuracy is 0.023668639053254437



 70%|█████████████████████▊         | 85867/122310 [2:53:04<16:14:48,  1.60s/it][A
 70%|█████████████████████▊         | 85875/122310 [2:53:05<12:32:29,  1.24s/it][A
 70%|██████████████████████▍         | 85883/122310 [2:53:06<9:33:40,  1.06it/s][A
 70%|██████████████████████▍         | 85893/122310 [2:53:06<6:47:26,  1.49it/s][A
 70%|██████████████████████▍         | 85902/122310 [2:53:07<5:04:29,  1.99it/s][A
 70%|██████████████████████▍         | 85910/122310 [2:53:08<3:58:08,  2.55it/s][A
 70%|██████████████████████▍         | 85915/122310 [2:53:09<3:29:32,  2.89it/s][A
 70%|██████████████████████▍         | 85924/122310 [2:53:09<2:36:55,  3.86it/s][A
 70%|██████████████████████▍         | 85929/122310 [2:53:10<2:23:02,  4.24it/s][A
 70%|██████████████████████▍         | 85936/122310 [2:53:11<1:59:55,  5.06it/s][A
 70%|██████████████████████▍         | 85941/122310 [2:53:12<1:53:38,  5.33it/s][A
 70%|██████████████████████▍         | 85942/122310 [2:53:12<2:14:02,  4.52

step: 10420, loss: 100.85330336716507, epoch: 0



 70%|███████████████████████▉          | 86024/122310 [2:53:19<59:40, 10.13it/s][A
 70%|██████████████████████▌         | 86029/122310 [2:53:20<1:05:25,  9.24it/s][A
 70%|███████████████████████▉          | 86043/122310 [2:53:21<49:15, 12.27it/s][A
 70%|███████████████████████▉          | 86052/122310 [2:53:21<48:49, 12.37it/s][A
 70%|███████████████████████▉          | 86061/122310 [2:53:22<48:29, 12.46it/s][A
 70%|███████████████████████▉          | 86075/122310 [2:53:23<41:25, 14.58it/s][A
 70%|███████████████████████▉          | 86089/122310 [2:53:23<37:29, 16.10it/s][A
 70%|███████████████████████▉          | 86101/122310 [2:53:24<36:56, 16.34it/s][A
 70%|███████████████████████▉          | 86112/122310 [2:53:25<37:34, 16.05it/s][A
 70%|███████████████████████▉          | 86117/122310 [2:53:26<45:19, 13.31it/s][A
 70%|███████████████████████▉          | 86123/122310 [2:53:26<50:52, 11.85it/s][A
 70%|██████████████████████▌         | 86126/122310 [2:53:27<1:02:50,  9.60

step: 10440, loss: 96.11921930865705, epoch: 0



 71%|███████████████████████▉          | 86231/122310 [2:53:33<46:23, 12.96it/s][A
 71%|███████████████████████▉          | 86237/122310 [2:53:34<51:43, 11.63it/s][A
 71%|███████████████████████▉          | 86248/122310 [2:53:35<47:05, 12.76it/s][A
 71%|███████████████████████▉          | 86255/122310 [2:53:36<50:31, 11.89it/s][A
 71%|███████████████████████▉          | 86263/122310 [2:53:36<51:28, 11.67it/s][A
 71%|██████████████████████▌         | 86265/122310 [2:53:38<1:26:19,  6.96it/s][A
 71%|██████████████████████▌         | 86270/122310 [2:53:38<1:25:51,  7.00it/s][A
 71%|██████████████████████▌         | 86271/122310 [2:53:40<2:17:43,  4.36it/s][A
 71%|██████████████████████▌         | 86272/122310 [2:53:41<2:42:17,  3.70it/s][A
 71%|██████████████████████▌         | 86275/122310 [2:53:41<2:36:41,  3.83it/s][A
 71%|██████████████████████▌         | 86285/122310 [2:53:42<1:32:32,  6.49it/s][A
 71%|██████████████████████▌         | 86288/122310 [2:53:43<1:42:29,  5.86

step: 10460, loss: 94.04920402953117, epoch: 0



 71%|██████████████████████▌         | 86346/122310 [2:53:48<1:04:20,  9.31it/s][A
 71%|████████████████████████          | 86355/122310 [2:53:48<58:04, 10.32it/s][A
 71%|████████████████████████          | 86371/122310 [2:53:49<43:20, 13.82it/s][A
 71%|████████████████████████          | 86383/122310 [2:53:51<52:47, 11.34it/s][A
 71%|██████████████████████▌         | 86387/122310 [2:53:51<1:00:21,  9.92it/s][A
 71%|████████████████████████          | 86398/122310 [2:53:52<52:36, 11.38it/s][A
 71%|████████████████████████          | 86405/122310 [2:53:53<54:47, 10.92it/s][A
 71%|████████████████████████          | 86414/122310 [2:53:53<52:29, 11.40it/s][A
 71%|████████████████████████          | 86423/122310 [2:53:54<51:02, 11.72it/s][A
 71%|████████████████████████          | 86436/122310 [2:53:55<43:55, 13.61it/s][A
 71%|████████████████████████          | 86446/122310 [2:53:56<43:39, 13.69it/s][A
 71%|████████████████████████          | 86452/122310 [2:53:56<49:06, 12.17

step: 10480, loss: 80.33195363108243, epoch: 0



 71%|████████████████████████          | 86525/122310 [2:54:02<58:47, 10.14it/s][A
 71%|████████████████████████          | 86533/122310 [2:54:03<57:14, 10.42it/s][A
 71%|██████████████████████▋         | 86539/122310 [2:54:03<1:00:40,  9.83it/s][A
 71%|████████████████████████          | 86550/122310 [2:54:04<51:58, 11.47it/s][A
 71%|████████████████████████          | 86557/122310 [2:54:05<54:23, 10.96it/s][A
 71%|████████████████████████          | 86568/122310 [2:54:06<48:40, 12.24it/s][A
 71%|████████████████████████          | 86580/122310 [2:54:06<43:54, 13.56it/s][A
 71%|████████████████████████          | 86585/122310 [2:54:07<51:20, 11.60it/s][A
 71%|████████████████████████          | 86594/122310 [2:54:08<50:15, 11.85it/s][A
 71%|████████████████████████          | 86606/122310 [2:54:09<44:51, 13.27it/s][A
 71%|████████████████████████          | 86616/122310 [2:54:09<44:09, 13.47it/s][A
 71%|████████████████████████          | 86626/122310 [2:54:10<43:39, 13.62

step: 10500, loss: 95.80949969557724, epoch: 0



 71%|██████████████████████▋         | 86693/122310 [2:54:16<1:03:20,  9.37it/s][A
 71%|████████████████████████          | 86703/122310 [2:54:17<55:14, 10.74it/s][A
 71%|████████████████████████          | 86714/122310 [2:54:18<48:59, 12.11it/s][A
 71%|████████████████████████          | 86730/122310 [2:54:19<39:06, 15.16it/s][A
 71%|████████████████████████          | 86741/122310 [2:54:19<39:00, 15.19it/s][A
 71%|████████████████████████          | 86745/122310 [2:54:20<48:04, 12.33it/s][A
 71%|████████████████████████          | 86751/122310 [2:54:21<53:12, 11.14it/s][A
 71%|████████████████████████          | 86757/122310 [2:54:21<57:23, 10.32it/s][A
 71%|████████████████████████          | 86779/122310 [2:54:22<36:02, 16.43it/s][A
 71%|████████████████████████          | 86782/122310 [2:54:23<46:24, 12.76it/s][A
 71%|████████████████████████▏         | 86799/122310 [2:54:24<36:52, 16.05it/s][A
 71%|████████████████████████▏         | 86810/122310 [2:54:24<37:19, 15.85

step: 10520, loss: 82.79088600398296, epoch: 0



 71%|████████████████████████▏         | 86907/122310 [2:54:31<44:20, 13.31it/s][A
 71%|████████████████████████▏         | 86913/122310 [2:54:32<49:42, 11.87it/s][A
 71%|████████████████████████▏         | 86924/122310 [2:54:32<45:42, 12.90it/s][A
 71%|████████████████████████▏         | 86936/122310 [2:54:33<41:51, 14.09it/s][A
 71%|████████████████████████▏         | 86943/122310 [2:54:34<46:04, 12.79it/s][A
 71%|████████████████████████▏         | 86954/122310 [2:54:34<43:26, 13.56it/s][A
 71%|████████████████████████▏         | 86959/122310 [2:54:35<50:52, 11.58it/s][A
 71%|████████████████████████▏         | 86969/122310 [2:54:36<47:58, 12.28it/s][A
 71%|████████████████████████▏         | 86980/122310 [2:54:37<44:44, 13.16it/s][A
 71%|████████████████████████▏         | 86985/122310 [2:54:37<52:18, 11.26it/s][A
 71%|████████████████████████▏         | 86998/122310 [2:54:38<44:24, 13.25it/s][A
 71%|████████████████████████▏         | 87006/122310 [2:54:39<46:42, 12.60

step: 10540, loss: 152.7975933522879, epoch: 0



 71%|████████████████████████▏         | 87091/122310 [2:54:45<46:27, 12.63it/s][A
 71%|████████████████████████▏         | 87096/122310 [2:54:46<53:36, 10.95it/s][A
 71%|████████████████████████▏         | 87105/122310 [2:54:47<51:17, 11.44it/s][A
 71%|████████████████████████▏         | 87110/122310 [2:54:47<58:36, 10.01it/s][A
 71%|██████████████████████▊         | 87116/122310 [2:54:48<1:01:34,  9.53it/s][A
 71%|██████████████████████▊         | 87122/122310 [2:54:49<1:04:28,  9.10it/s][A
 71%|████████████████████████▏         | 87135/122310 [2:54:50<49:45, 11.78it/s][A
 71%|████████████████████████▏         | 87149/122310 [2:54:50<41:51, 14.00it/s][A
 71%|████████████████████████▏         | 87155/122310 [2:54:51<47:43, 12.28it/s][A
 71%|████████████████████████▏         | 87167/122310 [2:54:52<43:24, 13.49it/s][A
 71%|████████████████████████▏         | 87173/122310 [2:54:52<48:58, 11.96it/s][A
 71%|████████████████████████▏         | 87178/122310 [2:54:53<55:50, 10.49

step: 10560, loss: 97.9584673474584, epoch: 0



 71%|██████████████████████▊         | 87230/122310 [2:55:00<1:04:50,  9.02it/s][A
 71%|██████████████████████▊         | 87237/122310 [2:55:00<1:03:26,  9.21it/s][A
 71%|████████████████████████▎         | 87247/122310 [2:55:01<55:21, 10.56it/s][A
 71%|████████████████████████▎         | 87259/122310 [2:55:02<47:22, 12.33it/s][A
 71%|████████████████████████▎         | 87268/122310 [2:55:03<47:12, 12.37it/s][A
 71%|████████████████████████▎         | 87272/122310 [2:55:03<56:26, 10.35it/s][A
 71%|████████████████████████▎         | 87279/122310 [2:55:04<57:21, 10.18it/s][A
 71%|████████████████████████▎         | 87297/122310 [2:55:05<40:04, 14.56it/s][A
 71%|████████████████████████▎         | 87303/122310 [2:55:05<45:53, 12.71it/s][A
 71%|████████████████████████▎         | 87305/122310 [2:55:06<59:50,  9.75it/s][A
 71%|████████████████████████▎         | 87312/122310 [2:55:07<59:54,  9.74it/s][A
 71%|██████████████████████▊         | 87317/122310 [2:55:08<1:05:35,  8.89

step: 10580, loss: 90.50329599780311, epoch: 0



 71%|████████████████████████▎         | 87393/122310 [2:55:14<42:05, 13.83it/s][A
 71%|████████████████████████▎         | 87403/122310 [2:55:15<42:01, 13.84it/s][A
 71%|████████████████████████▎         | 87408/122310 [2:55:16<49:15, 11.81it/s][A
 71%|████████████████████████▎         | 87416/122310 [2:55:16<50:06, 11.61it/s][A
 71%|████████████████████████▎         | 87426/122310 [2:55:17<47:21, 12.28it/s][A
 71%|████████████████████████▎         | 87437/122310 [2:55:18<44:12, 13.15it/s][A
 71%|████████████████████████▎         | 87447/122310 [2:55:18<43:26, 13.38it/s][A
 72%|████████████████████████▎         | 87467/122310 [2:55:19<32:51, 17.68it/s][A
 72%|████████████████████████▎         | 87473/122310 [2:55:20<39:00, 14.88it/s][A
 72%|████████████████████████▎         | 87485/122310 [2:55:21<37:44, 15.38it/s][A
 72%|████████████████████████▎         | 87492/122310 [2:55:21<42:20, 13.70it/s][A
 72%|████████████████████████▎         | 87505/122310 [2:55:22<38:41, 14.99

step: 10600, loss: 89.0824732494029, epoch: 0



 72%|████████████████████████▎         | 87584/122310 [2:55:29<43:50, 13.20it/s][A
 72%|████████████████████████▎         | 87597/122310 [2:55:29<39:33, 14.63it/s][A
 72%|████████████████████████▎         | 87604/122310 [2:55:30<44:13, 13.08it/s][A
 72%|████████████████████████▎         | 87610/122310 [2:55:31<49:15, 11.74it/s][A
 72%|████████████████████████▎         | 87616/122310 [2:55:31<53:54, 10.73it/s][A
 72%|████████████████████████▎         | 87624/122310 [2:55:32<53:23, 10.83it/s][A
 72%|████████████████████████▎         | 87634/122310 [2:55:33<49:06, 11.77it/s][A
 72%|████████████████████████▎         | 87648/122310 [2:55:34<41:03, 14.07it/s][A
 72%|████████████████████████▎         | 87653/122310 [2:55:34<48:29, 11.91it/s][A
 72%|████████████████████████▎         | 87668/122310 [2:55:35<39:38, 14.56it/s][A
 72%|████████████████████████▎         | 87676/122310 [2:55:36<42:35, 13.55it/s][A
 72%|████████████████████████▎         | 87684/122310 [2:55:36<45:05, 12.80

step: 10620, loss: 96.67069555075133, epoch: 0



 72%|██████████████████████▉         | 87730/122310 [2:55:43<1:26:28,  6.67it/s][A
 72%|██████████████████████▉         | 87741/122310 [2:55:44<1:03:55,  9.01it/s][A
 72%|████████████████████████▍         | 87751/122310 [2:55:44<55:47, 10.32it/s][A
 72%|████████████████████████▍         | 87761/122310 [2:55:45<51:14, 11.24it/s][A
 72%|████████████████████████▍         | 87769/122310 [2:55:46<51:58, 11.08it/s][A
 72%|████████████████████████▍         | 87775/122310 [2:55:47<56:16, 10.23it/s][A
 72%|████████████████████████▍         | 87782/122310 [2:55:47<57:15, 10.05it/s][A
 72%|████████████████████████▍         | 87793/122310 [2:55:48<49:53, 11.53it/s][A
 72%|████████████████████████▍         | 87800/122310 [2:55:49<52:55, 10.87it/s][A
 72%|████████████████████████▍         | 87810/122310 [2:55:50<49:12, 11.68it/s][A
 72%|████████████████████████▍         | 87814/122310 [2:55:50<58:49,  9.77it/s][A
 72%|██████████████████████▉         | 87818/122310 [2:55:51<1:07:25,  8.53

step: 10640, loss: 88.49038674437797, epoch: 0



 72%|████████████████████████▍         | 87888/122310 [2:55:58<53:38, 10.70it/s][A
 72%|████████████████████████▍         | 87895/122310 [2:55:58<55:16, 10.38it/s][A
 72%|████████████████████████▍         | 87905/122310 [2:55:59<50:46, 11.29it/s][A
 72%|████████████████████████▍         | 87909/122310 [2:56:00<59:52,  9.58it/s][A
 72%|███████████████████████         | 87915/122310 [2:56:01<1:02:40,  9.15it/s][A
 72%|███████████████████████         | 87919/122310 [2:56:01<1:11:15,  8.04it/s][A
 72%|███████████████████████         | 87920/122310 [2:56:02<1:35:17,  6.01it/s][A
 72%|███████████████████████         | 87929/122310 [2:56:03<1:12:36,  7.89it/s][A
 72%|███████████████████████         | 87935/122310 [2:56:04<1:11:44,  7.98it/s][A
 72%|███████████████████████         | 87942/122310 [2:56:04<1:07:40,  8.46it/s][A
 72%|████████████████████████▍         | 87951/122310 [2:56:05<59:42,  9.59it/s][A
 72%|███████████████████████         | 87957/122310 [2:56:06<1:02:26,  9.17

step: 10660, loss: 120.25116952167373, epoch: 0



 72%|████████████████████████▍         | 88027/122310 [2:56:12<58:23,  9.79it/s][A
 72%|████████████████████████▍         | 88038/122310 [2:56:13<50:24, 11.33it/s][A
 72%|███████████████████████         | 88041/122310 [2:56:14<1:02:37,  9.12it/s][A
 72%|███████████████████████         | 88042/122310 [2:56:15<1:23:38,  6.83it/s][A
 72%|███████████████████████         | 88045/122310 [2:56:15<1:35:19,  5.99it/s][A
 72%|███████████████████████         | 88049/122310 [2:56:16<1:37:32,  5.85it/s][A
 72%|███████████████████████         | 88059/122310 [2:56:17<1:09:54,  8.17it/s][A
 72%|███████████████████████         | 88068/122310 [2:56:18<1:00:59,  9.36it/s][A
 72%|███████████████████████         | 88074/122310 [2:56:18<1:03:21,  9.01it/s][A
 72%|███████████████████████         | 88078/122310 [2:56:19<1:11:58,  7.93it/s][A
 72%|███████████████████████         | 88082/122310 [2:56:20<1:19:20,  7.19it/s][A
 72%|████████████████████████▍         | 88094/122310 [2:56:20<57:23,  9.94

step: 10680, loss: 73.08412402220235, epoch: 0



 72%|████████████████████████▌         | 88169/122310 [2:56:27<47:54, 11.88it/s][A
 72%|████████████████████████▌         | 88184/122310 [2:56:28<39:18, 14.47it/s][A
 72%|████████████████████████▌         | 88193/122310 [2:56:29<41:03, 13.85it/s][A
 72%|████████████████████████▌         | 88204/122310 [2:56:29<40:02, 14.20it/s][A
 72%|████████████████████████▌         | 88219/122310 [2:56:30<35:22, 16.06it/s][A
 72%|████████████████████████▌         | 88233/122310 [2:56:31<33:28, 16.96it/s][A
 72%|████████████████████████▌         | 88238/122310 [2:56:31<40:52, 13.89it/s][A
 72%|████████████████████████▌         | 88245/122310 [2:56:32<45:09, 12.57it/s][A
 72%|████████████████████████▌         | 88253/122310 [2:56:33<46:51, 12.11it/s][A
 72%|████████████████████████▌         | 88257/122310 [2:56:34<56:11, 10.10it/s][A
 72%|████████████████████████▌         | 88270/122310 [2:56:34<45:44, 12.40it/s][A
 72%|████████████████████████▌         | 88285/122310 [2:56:35<38:26, 14.75

step: 10700, loss: 98.19691521721047, epoch: 0



 72%|████████████████████████▌         | 88359/122310 [2:56:42<41:59, 13.48it/s][A
 72%|████████████████████████▌         | 88370/122310 [2:56:43<40:37, 13.92it/s][A
 72%|████████████████████████▌         | 88376/122310 [2:56:43<46:31, 12.15it/s][A
 72%|████████████████████████▌         | 88387/122310 [2:56:44<43:16, 13.07it/s][A
 72%|████████████████████████▌         | 88397/122310 [2:56:45<42:50, 13.19it/s][A
 72%|████████████████████████▌         | 88404/122310 [2:56:45<46:41, 12.10it/s][A
 72%|████████████████████████▌         | 88415/122310 [2:56:46<43:44, 12.91it/s][A
 72%|████████████████████████▌         | 88431/122310 [2:56:47<36:21, 15.53it/s][A
 72%|████████████████████████▌         | 88437/122310 [2:56:48<42:44, 13.21it/s][A
 72%|████████████████████████▌         | 88442/122310 [2:56:48<49:53, 11.31it/s][A
 72%|████████████████████████▌         | 88453/122310 [2:56:49<45:32, 12.39it/s][A
 72%|████████████████████████▌         | 88465/122310 [2:56:50<41:25, 13.62

step: 10720, loss: 73.8543546899506, epoch: 0



 72%|████████████████████████▌         | 88536/122310 [2:56:57<45:43, 12.31it/s][A
 72%|████████████████████████▌         | 88539/122310 [2:56:57<57:06,  9.86it/s][A
 72%|███████████████████████▏        | 88545/122310 [2:56:59<1:18:34,  7.16it/s][A
 72%|███████████████████████▏        | 88552/122310 [2:56:59<1:12:27,  7.77it/s][A
 72%|███████████████████████▏        | 88560/122310 [2:57:00<1:05:38,  8.57it/s][A
 72%|███████████████████████▏        | 88568/122310 [2:57:01<1:01:02,  9.21it/s][A
 72%|███████████████████████▏        | 88575/122310 [2:57:02<1:00:34,  9.28it/s][A
 72%|███████████████████████▏        | 88587/122310 [2:57:03<1:04:12,  8.75it/s][A
 72%|███████████████████████▏        | 88589/122310 [2:57:04<1:16:29,  7.35it/s][A
 72%|███████████████████████▏        | 88592/122310 [2:57:05<1:26:20,  6.51it/s][A
 72%|███████████████████████▏        | 88602/122310 [2:57:05<1:06:49,  8.41it/s][A
 72%|████████████████████████▋         | 88614/122310 [2:57:06<52:54, 10.62

step: 10740, loss: 80.23486034259913, epoch: 0



 73%|████████████████████████▋         | 88679/122310 [2:57:11<53:45, 10.43it/s][A
 73%|████████████████████████▋         | 88691/122310 [2:57:12<46:07, 12.15it/s][A
 73%|████████████████████████▋         | 88696/122310 [2:57:13<52:49, 10.61it/s][A
 73%|████████████████████████▋         | 88710/122310 [2:57:13<42:40, 13.12it/s][A
 73%|████████████████████████▋         | 88723/122310 [2:57:14<38:35, 14.51it/s][A
 73%|████████████████████████▋         | 88735/122310 [2:57:15<37:21, 14.98it/s][A
 73%|████████████████████████▋         | 88739/122310 [2:57:16<46:11, 12.11it/s][A
 73%|████████████████████████▋         | 88750/122310 [2:57:16<43:02, 13.00it/s][A
 73%|████████████████████████▋         | 88755/122310 [2:57:17<50:15, 11.13it/s][A
 73%|████████████████████████▋         | 88760/122310 [2:57:18<56:44,  9.85it/s][A
 73%|████████████████████████▋         | 88767/122310 [2:57:19<57:25,  9.73it/s][A
 73%|████████████████████████▋         | 88780/122310 [2:57:19<46:00, 12.14

step: 10760, loss: 125.4078209534896, epoch: 0



 73%|████████████████████████▋         | 88863/122310 [2:57:26<52:26, 10.63it/s][A
 73%|████████████████████████▋         | 88879/122310 [2:57:27<39:49, 13.99it/s][A
 73%|████████████████████████▋         | 88893/122310 [2:57:27<35:56, 15.50it/s][A
 73%|████████████████████████▋         | 88911/122310 [2:57:28<30:30, 18.25it/s][A
 73%|████████████████████████▋         | 88919/122310 [2:57:29<34:35, 16.09it/s][A
 73%|████████████████████████▋         | 88927/122310 [2:57:30<49:52, 11.15it/s][A
 73%|████████████████████████▋         | 88933/122310 [2:57:31<53:17, 10.44it/s][A
 73%|████████████████████████▋         | 88939/122310 [2:57:32<56:46,  9.80it/s][A
 73%|████████████████████████▋         | 88950/122310 [2:57:33<49:42, 11.19it/s][A
 73%|████████████████████████▋         | 88961/122310 [2:57:33<45:24, 12.24it/s][A
 73%|████████████████████████▋         | 88973/122310 [2:57:34<41:41, 13.33it/s][A
 73%|████████████████████████▋         | 88976/122310 [2:57:35<51:56, 10.70

step: 10780, loss: 93.28385293556144, epoch: 0



 73%|████████████████████████▊         | 89050/122310 [2:57:41<37:24, 14.82it/s][A
 73%|████████████████████████▊         | 89062/122310 [2:57:41<36:11, 15.31it/s][A
 73%|████████████████████████▊         | 89078/122310 [2:57:42<32:03, 17.28it/s][A
 73%|████████████████████████▊         | 89084/122310 [2:57:43<38:03, 14.55it/s][A
 73%|████████████████████████▊         | 89101/122310 [2:57:44<32:16, 17.15it/s][A
 73%|████████████████████████▊         | 89115/122310 [2:57:44<31:18, 17.67it/s][A
 73%|████████████████████████▊         | 89128/122310 [2:57:45<31:14, 17.70it/s][A
 73%|████████████████████████▊         | 89138/122310 [2:57:46<33:34, 16.46it/s][A
 73%|████████████████████████▊         | 89146/122310 [2:57:47<37:31, 14.73it/s][A
 73%|████████████████████████▊         | 89156/122310 [2:57:47<38:21, 14.40it/s][A
 73%|████████████████████████▊         | 89161/122310 [2:57:48<45:31, 12.14it/s][A
 73%|████████████████████████▊         | 89166/122310 [2:57:49<52:35, 10.50

step: 10800, loss: 86.1900657188872, epoch: 0
sim1 and sim2 are 0.6477798657314117, 0.1500211131355441
cosine of pred and queen: 0.07953926571852982
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: korea
Actual: beijing:china::tokyo:japan, pred: asia
Actual: berlin:germany::rome:italy, pred: countries
Actual: cairo:egypt::ottawa:canada, pred: panorama
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: eurozone
Actual: india:asia::paris:europe, pred: crop
Actual: china:asia::greece:europe, pred: debt
Actual: nigeria:africa::france:europe, pred: country
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: betoken
Actual: maha

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: like
Actual: islamabad:pakistan::oslo:norway, pred: state
Actual: grandfather:grandmother::father:mother, pred: came
Actual: grandpa:grandma::sons:daughters, pred: david
Actual: king:queen::husband:wife, pred: texted
Actual: man:woman::brothers:sisters, pred: jethlah
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: cambrics
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: bank



 73%|██████████████████████▌        | 89222/122310 [2:59:10<24:12:27,  2.63s/it][A

Actual: india:rupee::denmark:krone, pred: trackless
Accuracy is 0.023668639053254437



 73%|██████████████████████▌        | 89227/122310 [2:59:11<19:44:50,  2.15s/it][A
 73%|██████████████████████▌        | 89243/122310 [2:59:11<10:37:54,  1.16s/it][A
 73%|███████████████████████▎        | 89253/122310 [2:59:12<7:40:39,  1.20it/s][A
 73%|███████████████████████▎        | 89262/122310 [2:59:13<5:45:40,  1.59it/s][A
 73%|███████████████████████▎        | 89277/122310 [2:59:14<3:39:01,  2.51it/s][A
 73%|███████████████████████▎        | 89287/122310 [2:59:14<2:49:56,  3.24it/s][A
 73%|███████████████████████▎        | 89299/122310 [2:59:15<2:06:10,  4.36it/s][A
 73%|███████████████████████▎        | 89306/122310 [2:59:16<1:51:21,  4.94it/s][A
 73%|███████████████████████▎        | 89320/122310 [2:59:16<1:20:12,  6.85it/s][A
 73%|███████████████████████▎        | 89333/122310 [2:59:17<1:03:45,  8.62it/s][A
 73%|███████████████████████▎        | 89339/122310 [2:59:18<1:04:15,  8.55it/s][A
 73%|███████████████████████▎        | 89343/122310 [2:59:19<1:09:35,  7.90

step: 10820, loss: 85.0025166112643, epoch: 0



 73%|███████████████████████▍        | 89398/122310 [2:59:25<1:05:28,  8.38it/s][A
 73%|███████████████████████▍        | 89402/122310 [2:59:26<1:12:44,  7.54it/s][A
 73%|███████████████████████▍        | 89410/122310 [2:59:27<1:03:58,  8.57it/s][A
 73%|████████████████████████▊         | 89418/122310 [2:59:27<58:47,  9.32it/s][A
 73%|████████████████████████▊         | 89426/122310 [2:59:28<55:53,  9.81it/s][A
 73%|███████████████████████▍        | 89431/122310 [2:59:29<1:01:12,  8.95it/s][A
 73%|███████████████████████▍        | 89437/122310 [2:59:29<1:02:31,  8.76it/s][A
 73%|████████████████████████▊         | 89455/122310 [2:59:30<40:23, 13.56it/s][A
 73%|████████████████████████▊         | 89465/122310 [2:59:31<40:20, 13.57it/s][A
 73%|████████████████████████▊         | 89468/122310 [2:59:32<50:55, 10.75it/s][A
 73%|████████████████████████▊         | 89480/122310 [2:59:32<44:14, 12.37it/s][A
 73%|████████████████████████▉         | 89493/122310 [2:59:33<39:04, 14.00

step: 10840, loss: 86.63468392973569, epoch: 0



 73%|████████████████████████▉         | 89558/122310 [2:59:40<59:58,  9.10it/s][A
 73%|████████████████████████▉         | 89571/122310 [2:59:40<46:30, 11.73it/s][A
 73%|████████████████████████▉         | 89576/122310 [2:59:41<53:07, 10.27it/s][A
 73%|████████████████████████▉         | 89587/122310 [2:59:42<46:28, 11.73it/s][A
 73%|████████████████████████▉         | 89593/122310 [2:59:43<51:00, 10.69it/s][A
 73%|███████████████████████▍        | 89595/122310 [2:59:43<1:05:40,  8.30it/s][A
 73%|████████████████████████▉         | 89603/122310 [2:59:44<59:48,  9.11it/s][A
 73%|████████████████████████▉         | 89616/122310 [2:59:45<46:31, 11.71it/s][A
 73%|████████████████████████▉         | 89623/122310 [2:59:46<49:08, 11.09it/s][A
 73%|████████████████████████▉         | 89631/122310 [2:59:46<49:12, 11.07it/s][A
 73%|████████████████████████▉         | 89636/122310 [2:59:47<55:49,  9.75it/s][A
 73%|████████████████████████▉         | 89644/122310 [2:59:48<53:57, 10.09

step: 10860, loss: 109.21363255076866, epoch: 0



 73%|████████████████████████▉         | 89724/122310 [2:59:54<51:36, 10.52it/s][A
 73%|████████████████████████▉         | 89736/122310 [2:59:55<44:09, 12.29it/s][A
 73%|████████████████████████▉         | 89744/122310 [2:59:56<45:41, 11.88it/s][A
 73%|████████████████████████▉         | 89748/122310 [2:59:57<54:38,  9.93it/s][A
 73%|███████████████████████▍        | 89753/122310 [2:59:57<1:00:00,  9.04it/s][A
 73%|████████████████████████▉         | 89766/122310 [2:59:58<46:42, 11.61it/s][A
 73%|████████████████████████▉         | 89777/122310 [2:59:59<42:52, 12.64it/s][A
 73%|████████████████████████▉         | 89790/122310 [2:59:59<38:11, 14.19it/s][A
 73%|████████████████████████▉         | 89801/122310 [3:00:00<37:30, 14.45it/s][A
 73%|████████████████████████▉         | 89813/122310 [3:00:01<35:59, 15.05it/s][A
 73%|████████████████████████▉         | 89830/122310 [3:00:02<31:00, 17.46it/s][A
 73%|████████████████████████▉         | 89833/122310 [3:00:03<52:21, 10.34

step: 10880, loss: 105.3606737614478, epoch: 0



 74%|████████████████████████▉         | 89916/122310 [3:00:09<33:48, 15.97it/s][A
 74%|█████████████████████████         | 89934/122310 [3:00:10<29:10, 18.50it/s][A
 74%|█████████████████████████         | 89940/122310 [3:00:10<34:50, 15.48it/s][A
 74%|█████████████████████████         | 89950/122310 [3:00:11<36:02, 14.97it/s][A
 74%|█████████████████████████         | 89956/122310 [3:00:12<41:39, 12.95it/s][A
 74%|█████████████████████████         | 89968/122310 [3:00:13<38:42, 13.93it/s][A
 74%|█████████████████████████         | 89985/122310 [3:00:13<32:15, 16.70it/s][A
 74%|█████████████████████████         | 89998/122310 [3:00:14<31:38, 17.02it/s][A
 74%|█████████████████████████         | 90004/122310 [3:00:15<37:31, 14.35it/s][A
 74%|█████████████████████████         | 90012/122310 [3:00:16<40:20, 13.34it/s][A
 74%|█████████████████████████         | 90029/122310 [3:00:16<33:07, 16.24it/s][A
 74%|█████████████████████████         | 90036/122310 [3:00:17<37:39, 14.28

step: 10900, loss: 92.376594386607, epoch: 0



 74%|█████████████████████████         | 90108/122310 [3:00:24<48:33, 11.05it/s][A
 74%|█████████████████████████         | 90112/122310 [3:00:24<57:25,  9.34it/s][A
 74%|███████████████████████▌        | 90116/122310 [3:00:25<1:05:16,  8.22it/s][A
 74%|█████████████████████████         | 90124/122310 [3:00:26<59:27,  9.02it/s][A
 74%|█████████████████████████         | 90133/122310 [3:00:27<53:35, 10.01it/s][A
 74%|█████████████████████████         | 90149/122310 [3:00:27<39:32, 13.56it/s][A
 74%|█████████████████████████         | 90159/122310 [3:00:28<39:26, 13.58it/s][A
 74%|█████████████████████████         | 90165/122310 [3:00:29<44:42, 11.98it/s][A
 74%|█████████████████████████         | 90172/122310 [3:00:29<47:29, 11.28it/s][A
 74%|█████████████████████████         | 90182/122310 [3:00:30<44:38, 11.99it/s][A
 74%|█████████████████████████         | 90194/122310 [3:00:31<40:17, 13.29it/s][A
 74%|█████████████████████████         | 90201/122310 [3:00:32<43:54, 12.19

step: 10920, loss: 99.67224644091647, epoch: 0



 74%|█████████████████████████         | 90278/122310 [3:00:38<41:51, 12.75it/s][A
 74%|█████████████████████████         | 90284/122310 [3:00:39<46:49, 11.40it/s][A
 74%|█████████████████████████         | 90291/122310 [3:00:40<49:14, 10.84it/s][A
 74%|█████████████████████████         | 90296/122310 [3:00:40<55:11,  9.67it/s][A
 74%|█████████████████████████         | 90303/122310 [3:00:41<55:32,  9.61it/s][A
 74%|█████████████████████████         | 90310/122310 [3:00:42<55:32,  9.60it/s][A
 74%|█████████████████████████         | 90320/122310 [3:00:43<49:11, 10.84it/s][A
 74%|█████████████████████████         | 90327/122310 [3:00:43<51:03, 10.44it/s][A
 74%|█████████████████████████         | 90336/122310 [3:00:44<48:36, 10.96it/s][A
 74%|█████████████████████████         | 90340/122310 [3:00:45<57:21,  9.29it/s][A
 74%|█████████████████████████         | 90349/122310 [3:00:46<52:12, 10.20it/s][A
 74%|█████████████████████████         | 90358/122310 [3:00:46<49:13, 10.82

step: 10940, loss: 95.5687671336861, epoch: 0



 74%|█████████████████████████▏        | 90425/122310 [3:00:53<59:05,  8.99it/s][A
 74%|███████████████████████▋        | 90430/122310 [3:00:54<1:02:45,  8.47it/s][A
 74%|███████████████████████▋        | 90435/122310 [3:00:54<1:06:01,  8.05it/s][A
 74%|█████████████████████████▏        | 90448/122310 [3:00:55<49:38, 10.70it/s][A
 74%|█████████████████████████▏        | 90457/122310 [3:00:56<47:37, 11.15it/s][A
 74%|█████████████████████████▏        | 90469/122310 [3:00:57<42:10, 12.58it/s][A
 74%|█████████████████████████▏        | 90476/122310 [3:00:57<45:07, 11.76it/s][A
 74%|█████████████████████████▏        | 90483/122310 [3:00:58<47:39, 11.13it/s][A
 74%|█████████████████████████▏        | 90498/122310 [3:00:59<38:20, 13.83it/s][A
 74%|█████████████████████████▏        | 90500/122310 [3:01:00<50:06, 10.58it/s][A
 74%|█████████████████████████▏        | 90512/122310 [3:01:00<43:06, 12.29it/s][A
 74%|█████████████████████████▏        | 90519/122310 [3:01:01<46:15, 11.46

step: 10960, loss: 84.77751176196023, epoch: 0



 74%|█████████████████████████▏        | 90583/122310 [3:01:08<44:47, 11.81it/s][A
 74%|█████████████████████████▏        | 90597/122310 [3:01:08<37:47, 13.99it/s][A
 74%|█████████████████████████▏        | 90607/122310 [3:01:09<38:02, 13.89it/s][A
 74%|█████████████████████████▏        | 90611/122310 [3:01:10<46:18, 11.41it/s][A
 74%|█████████████████████████▏        | 90619/122310 [3:01:11<46:53, 11.27it/s][A
 74%|█████████████████████████▏        | 90633/122310 [3:01:11<38:47, 13.61it/s][A
 74%|█████████████████████████▏        | 90642/122310 [3:01:12<39:58, 13.21it/s][A
 74%|█████████████████████████▏        | 90647/122310 [3:01:13<46:41, 11.30it/s][A
 74%|█████████████████████████▏        | 90657/122310 [3:01:13<44:02, 11.98it/s][A
 74%|█████████████████████████▏        | 90670/122310 [3:01:14<38:22, 13.74it/s][A
 74%|█████████████████████████▏        | 90680/122310 [3:01:15<38:29, 13.70it/s][A
 74%|█████████████████████████▏        | 90689/122310 [3:01:16<39:39, 13.29

step: 10980, loss: 79.58427631158607, epoch: 0



 74%|███████████████████████▋        | 90751/122310 [3:01:22<1:01:56,  8.49it/s][A
 74%|█████████████████████████▏        | 90767/122310 [3:01:23<42:04, 12.50it/s][A
 74%|█████████████████████████▏        | 90776/122310 [3:01:24<42:13, 12.45it/s][A
 74%|█████████████████████████▏        | 90788/122310 [3:01:24<38:31, 13.64it/s][A
 74%|█████████████████████████▏        | 90792/122310 [3:01:25<46:53, 11.20it/s][A
 74%|█████████████████████████▏        | 90804/122310 [3:01:26<41:09, 12.76it/s][A
 74%|█████████████████████████▏        | 90812/122310 [3:01:27<43:02, 12.20it/s][A
 74%|█████████████████████████▏        | 90818/122310 [3:01:27<47:39, 11.01it/s][A
 74%|█████████████████████████▏        | 90828/122310 [3:01:28<44:34, 11.77it/s][A
 74%|█████████████████████████▎        | 90834/122310 [3:01:29<49:02, 10.70it/s][A
 74%|█████████████████████████▎        | 90843/122310 [3:01:30<47:04, 11.14it/s][A
 74%|█████████████████████████▎        | 90847/122310 [3:01:30<55:39,  9.42

step: 11000, loss: 83.02264174439199, epoch: 0
saving weights



 74%|█████████████████████████▎        | 90925/122310 [3:01:37<50:42, 10.32it/s][A
 74%|█████████████████████████▎        | 90948/122310 [3:01:38<33:59, 15.38it/s][A
 74%|█████████████████████████▎        | 90962/122310 [3:01:39<32:00, 16.32it/s][A
 74%|█████████████████████████▎        | 90968/122310 [3:01:39<37:06, 14.07it/s][A
 74%|█████████████████████████▎        | 90980/122310 [3:01:40<35:31, 14.70it/s][A
 74%|█████████████████████████▎        | 90988/122310 [3:01:41<38:18, 13.63it/s][A
 74%|█████████████████████████▎        | 90990/122310 [3:01:41<49:34, 10.53it/s][A
 74%|█████████████████████████▎        | 90996/122310 [3:01:42<53:07,  9.82it/s][A
 74%|█████████████████████████▎        | 91004/122310 [3:01:43<51:19, 10.16it/s][A
 74%|█████████████████████████▎        | 91010/122310 [3:01:44<54:36,  9.55it/s][A
 74%|█████████████████████████▎        | 91016/122310 [3:01:44<57:08,  9.13it/s][A
 74%|███████████████████████▊        | 91021/122310 [3:01:45<1:02:14,  8.38

step: 11020, loss: 73.83350299113565, epoch: 0



 74%|█████████████████████████▎        | 91094/122310 [3:01:53<52:20,  9.94it/s][A
 74%|█████████████████████████▎        | 91110/122310 [3:01:53<38:30, 13.50it/s][A
 74%|█████████████████████████▎        | 91118/122310 [3:01:54<40:46, 12.75it/s][A
 75%|█████████████████████████▎        | 91126/122310 [3:01:55<42:39, 12.18it/s][A
 75%|█████████████████████████▎        | 91133/122310 [3:01:55<45:36, 11.39it/s][A
 75%|█████████████████████████▎        | 91148/122310 [3:01:56<36:59, 14.04it/s][A
 75%|█████████████████████████▎        | 91153/122310 [3:01:57<43:29, 11.94it/s][A
 75%|█████████████████████████▎        | 91163/122310 [3:01:58<41:38, 12.47it/s][A
 75%|█████████████████████████▎        | 91171/122310 [3:01:58<43:12, 12.01it/s][A
 75%|█████████████████████████▎        | 91175/122310 [3:01:59<51:46, 10.02it/s][A
 75%|█████████████████████████▎        | 91192/122310 [3:02:00<37:15, 13.92it/s][A
 75%|█████████████████████████▎        | 91198/122310 [3:02:01<42:30, 12.20

step: 11040, loss: 73.42504121480626, epoch: 0



 75%|█████████████████████████▎        | 91267/122310 [3:02:07<43:12, 11.97it/s][A
 75%|█████████████████████████▎        | 91273/122310 [3:02:08<47:47, 10.82it/s][A
 75%|█████████████████████████▎        | 91280/122310 [3:02:09<49:22, 10.47it/s][A
 75%|█████████████████████████▍        | 91289/122310 [3:02:09<47:00, 11.00it/s][A
 75%|█████████████████████████▍        | 91296/122310 [3:02:10<48:51, 10.58it/s][A
 75%|█████████████████████████▍        | 91305/122310 [3:02:11<46:42, 11.06it/s][A
 75%|█████████████████████████▍        | 91310/122310 [3:02:12<52:37,  9.82it/s][A
 75%|█████████████████████████▍        | 91319/122310 [3:02:12<48:49, 10.58it/s][A
 75%|█████████████████████████▍        | 91325/122310 [3:02:13<52:26,  9.85it/s][A
 75%|█████████████████████████▍        | 91330/122310 [3:02:14<57:38,  8.96it/s][A
 75%|███████████████████████▉        | 91333/122310 [3:02:15<1:09:07,  7.47it/s][A
 75%|███████████████████████▉        | 91340/122310 [3:02:15<1:03:29,  8.13

step: 11060, loss: 254.54733910678675, epoch: 0



 75%|███████████████████████▉        | 91365/122310 [3:02:22<2:36:56,  3.29it/s][A
 75%|███████████████████████▉        | 91382/122310 [3:02:23<1:13:05,  7.05it/s][A
 75%|███████████████████████▉        | 91386/122310 [3:02:23<1:16:53,  6.70it/s][A
 75%|███████████████████████▉        | 91394/122310 [3:02:24<1:06:53,  7.70it/s][A
 75%|███████████████████████▉        | 91398/122310 [3:02:25<1:12:22,  7.12it/s][A
 75%|███████████████████████▉        | 91405/122310 [3:02:26<1:06:23,  7.76it/s][A
 75%|███████████████████████▉        | 91409/122310 [3:02:26<1:12:12,  7.13it/s][A
 75%|███████████████████████▉        | 91417/122310 [3:02:27<1:02:49,  8.20it/s][A
 75%|███████████████████████▉        | 91424/122310 [3:02:28<1:00:09,  8.56it/s][A
 75%|█████████████████████████▍        | 91438/122310 [3:02:28<44:16, 11.62it/s][A
 75%|█████████████████████████▍        | 91446/122310 [3:02:29<45:08, 11.40it/s][A
 75%|█████████████████████████▍        | 91450/122310 [3:02:30<53:38,  9.59

step: 11080, loss: 101.7330715777908, epoch: 0



 75%|█████████████████████████▍        | 91524/122310 [3:02:37<34:25, 14.91it/s][A
 75%|█████████████████████████▍        | 91536/122310 [3:02:37<33:29, 15.31it/s][A
 75%|█████████████████████████▍        | 91545/122310 [3:02:38<35:39, 14.38it/s][A
 75%|█████████████████████████▍        | 91551/122310 [3:02:39<40:58, 12.51it/s][A
 75%|█████████████████████████▍        | 91564/122310 [3:02:39<36:23, 14.08it/s][A
 75%|█████████████████████████▍        | 91572/122310 [3:02:40<38:59, 13.14it/s][A
 75%|█████████████████████████▍        | 91579/122310 [3:02:41<42:27, 12.06it/s][A
 75%|█████████████████████████▍        | 91583/122310 [3:02:42<50:49, 10.08it/s][A
 75%|█████████████████████████▍        | 91591/122310 [3:02:42<49:44, 10.29it/s][A
 75%|█████████████████████████▍        | 91603/122310 [3:02:43<42:11, 12.13it/s][A
 75%|█████████████████████████▍        | 91616/122310 [3:02:44<37:02, 13.81it/s][A
 75%|█████████████████████████▍        | 91625/122310 [3:02:45<38:20, 13.34

step: 11100, loss: 118.58773266312488, epoch: 0



 75%|█████████████████████████▍        | 91698/122310 [3:02:51<43:59, 11.60it/s][A
 75%|█████████████████████████▍        | 91709/122310 [3:02:52<40:26, 12.61it/s][A
 75%|█████████████████████████▍        | 91711/122310 [3:02:53<52:48,  9.66it/s][A
 75%|█████████████████████████▍        | 91721/122310 [3:02:53<46:52, 10.88it/s][A
 75%|█████████████████████████▍        | 91726/122310 [3:02:54<52:36,  9.69it/s][A
 75%|█████████████████████████▌        | 91735/122310 [3:02:55<48:26, 10.52it/s][A
 75%|█████████████████████████▌        | 91744/122310 [3:02:56<46:10, 11.03it/s][A
 75%|█████████████████████████▌        | 91750/122310 [3:02:56<49:42, 10.25it/s][A
 75%|█████████████████████████▌        | 91758/122310 [3:02:57<48:40, 10.46it/s][A
 75%|█████████████████████████▌        | 91768/122310 [3:02:58<44:21, 11.47it/s][A
 75%|█████████████████████████▌        | 91775/122310 [3:02:59<46:30, 10.94it/s][A
 75%|█████████████████████████▌        | 91788/122310 [3:02:59<38:54, 13.07

step: 11120, loss: 81.2966631655478, epoch: 0



 75%|█████████████████████████▌        | 91852/122310 [3:03:06<45:21, 11.19it/s][A
 75%|█████████████████████████▌        | 91862/122310 [3:03:07<42:28, 11.95it/s][A
 75%|█████████████████████████▌        | 91870/122310 [3:03:07<43:34, 11.64it/s][A
 75%|█████████████████████████▌        | 91878/122310 [3:03:08<44:31, 11.39it/s][A
 75%|█████████████████████████▌        | 91881/122310 [3:03:09<54:50,  9.25it/s][A
 75%|█████████████████████████▌        | 91886/122310 [3:03:09<59:33,  8.51it/s][A
 75%|████████████████████████        | 91890/122310 [3:03:10<1:07:14,  7.54it/s][A
 75%|████████████████████████        | 91893/122310 [3:03:11<1:17:38,  6.53it/s][A
 75%|█████████████████████████▌        | 91909/122310 [3:03:12<45:34, 11.12it/s][A
 75%|█████████████████████████▌        | 91918/122310 [3:03:12<44:27, 11.39it/s][A
 75%|█████████████████████████▌        | 91932/122310 [3:03:13<36:58, 13.70it/s][A
 75%|█████████████████████████▌        | 91949/122310 [3:03:14<30:40, 16.50

step: 11140, loss: 108.65239868225653, epoch: 0



 75%|█████████████████████████▌        | 92027/122310 [3:03:21<38:29, 13.11it/s][A
 75%|█████████████████████████▌        | 92037/122310 [3:03:21<37:53, 13.32it/s][A
 75%|█████████████████████████▌        | 92050/122310 [3:03:22<34:36, 14.57it/s][A
 75%|█████████████████████████▌        | 92053/122310 [3:03:23<43:53, 11.49it/s][A
 75%|█████████████████████████▌        | 92064/122310 [3:03:23<40:18, 12.50it/s][A
 75%|█████████████████████████▌        | 92073/122310 [3:03:24<40:37, 12.40it/s][A
 75%|█████████████████████████▌        | 92079/122310 [3:03:25<45:11, 11.15it/s][A
 75%|█████████████████████████▌        | 92097/122310 [3:03:26<33:10, 15.18it/s][A
 75%|█████████████████████████▌        | 92112/122310 [3:03:26<30:07, 16.71it/s][A
 75%|█████████████████████████▌        | 92118/122310 [3:03:27<35:29, 14.18it/s][A
 75%|█████████████████████████▌        | 92127/122310 [3:03:28<37:03, 13.57it/s][A
 75%|█████████████████████████▌        | 92129/122310 [3:03:29<48:38, 10.34

step: 11160, loss: 112.37921868545737, epoch: 0



 75%|█████████████████████████▋        | 92214/122310 [3:03:35<45:03, 11.13it/s][A
 75%|█████████████████████████▋        | 92224/122310 [3:03:36<42:11, 11.89it/s][A
 75%|█████████████████████████▋        | 92233/122310 [3:03:37<41:42, 12.02it/s][A
 75%|█████████████████████████▋        | 92240/122310 [3:03:37<44:21, 11.30it/s][A
 75%|█████████████████████████▋        | 92242/122310 [3:03:38<57:54,  8.65it/s][A
 75%|████████████████████████▏       | 92243/122310 [3:03:39<1:17:00,  6.51it/s][A
 75%|████████████████████████▏       | 92249/122310 [3:03:40<1:11:28,  7.01it/s][A
 75%|█████████████████████████▋        | 92260/122310 [3:03:40<53:18,  9.39it/s][A
 75%|█████████████████████████▋        | 92270/122310 [3:03:41<46:58, 10.66it/s][A
 75%|█████████████████████████▋        | 92281/122310 [3:03:42<41:56, 11.93it/s][A
 75%|█████████████████████████▋        | 92292/122310 [3:03:43<38:52, 12.87it/s][A
 75%|█████████████████████████▋        | 92303/122310 [3:03:43<37:07, 13.47

step: 11180, loss: 96.66728012931155, epoch: 0



 76%|█████████████████████████▋        | 92385/122310 [3:03:50<40:40, 12.26it/s][A
 76%|█████████████████████████▋        | 92394/122310 [3:03:51<40:41, 12.26it/s][A
 76%|█████████████████████████▋        | 92398/122310 [3:03:51<48:44, 10.23it/s][A
 76%|█████████████████████████▋        | 92408/122310 [3:03:52<44:16, 11.26it/s][A
 76%|█████████████████████████▋        | 92421/122310 [3:03:53<37:52, 13.15it/s][A
 76%|█████████████████████████▋        | 92428/122310 [3:03:54<41:08, 12.10it/s][A
 76%|█████████████████████████▋        | 92436/122310 [3:03:54<42:29, 11.72it/s][A
 76%|█████████████████████████▋        | 92442/122310 [3:03:55<46:30, 10.70it/s][A
 76%|█████████████████████████▋        | 92452/122310 [3:03:56<42:58, 11.58it/s][A
 76%|█████████████████████████▋        | 92469/122310 [3:03:57<32:59, 15.07it/s][A
 76%|█████████████████████████▋        | 92480/122310 [3:03:57<33:04, 15.04it/s][A
 76%|█████████████████████████▋        | 92491/122310 [3:03:58<33:06, 15.01

step: 11200, loss: 89.84525805074296, epoch: 0
sim1 and sim2 are 0.5999187489725062, 0.16583352582534552
cosine of pred and queen: 0.15002786643066413
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: asia
Actual: berlin:germany::rome:italy, pred: countries
Actual: cairo:egypt::ottawa:canada, pred: panorama
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: since
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: flight
Actual: lisbon:portugal::riga:latvia, pred: eurozone
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: new
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: belou
Actual: maharastr


 76%|█████████████████████████▋        | 92551/122310 [3:04:23<34:56, 14.19it/s][A

Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: prime
Actual: denmark:danish::germany:german, pred: debar
Accuracy is 0.08888888888888889
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: dimly
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: unto
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: marked
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: thamer
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes, pred: ibbotsons
Actual: melon:melons::pig:pigs, pred: illus


 76%|███████████████████████▍       | 92558/122310 [3:05:19<20:31:57,  2.48s/it][A

Actual: india:rupee::denmark:krone, pred: trackless
Accuracy is 0.023668639053254437



 76%|███████████████████████▍       | 92575/122310 [3:05:20<11:42:58,  1.42s/it][A
 76%|███████████████████████▍       | 92577/122310 [3:05:21<11:06:57,  1.35s/it][A
 76%|███████████████████████▍       | 92579/122310 [3:05:21<10:23:18,  1.26s/it][A
 76%|████████████████████████▏       | 92584/122310 [3:05:22<8:10:17,  1.01it/s][A
 76%|████████████████████████▏       | 92590/122310 [3:05:23<6:04:15,  1.36it/s][A
 76%|████████████████████████▏       | 92603/122310 [3:05:24<3:24:09,  2.43it/s][A
 76%|████████████████████████▏       | 92615/122310 [3:05:24<2:16:56,  3.61it/s][A
 76%|████████████████████████▏       | 92620/122310 [3:05:25<2:04:45,  3.97it/s][A
 76%|████████████████████████▏       | 92626/122310 [3:05:26<1:48:55,  4.54it/s][A
 76%|████████████████████████▏       | 92635/122310 [3:05:27<1:25:17,  5.80it/s][A
 76%|████████████████████████▏       | 92645/122310 [3:05:27<1:08:00,  7.27it/s][A
 76%|████████████████████████▏       | 92649/122310 [3:05:28<1:11:45,  6.89

step: 11220, loss: 79.66697071525486, epoch: 0



 76%|█████████████████████████▊        | 92746/122310 [3:05:35<34:26, 14.31it/s][A
 76%|█████████████████████████▊        | 92761/122310 [3:05:35<30:21, 16.23it/s][A
 76%|█████████████████████████▊        | 92769/122310 [3:05:36<33:31, 14.69it/s][A
 76%|█████████████████████████▊        | 92778/122310 [3:05:37<35:17, 13.94it/s][A
 76%|████████████████████████▎       | 92780/122310 [3:05:38<1:00:10,  8.18it/s][A
 76%|█████████████████████████▊        | 92787/122310 [3:05:39<57:28,  8.56it/s][A
 76%|█████████████████████████▊        | 92794/122310 [3:05:40<55:35,  8.85it/s][A
 76%|█████████████████████████▊        | 92809/122310 [3:05:40<40:47, 12.05it/s][A
 76%|█████████████████████████▊        | 92823/122310 [3:05:41<35:01, 14.03it/s][A
 76%|█████████████████████████▊        | 92833/122310 [3:05:42<35:17, 13.92it/s][A
 76%|█████████████████████████▊        | 92837/122310 [3:05:43<42:36, 11.53it/s][A
 76%|█████████████████████████▊        | 92842/122310 [3:05:43<48:15, 10.18

step: 11240, loss: 77.10146555503142, epoch: 0



 76%|█████████████████████████▊        | 92917/122310 [3:05:49<44:43, 10.95it/s][A
 76%|█████████████████████████▊        | 92925/122310 [3:05:50<44:35, 10.98it/s][A
 76%|█████████████████████████▊        | 92936/122310 [3:05:51<40:10, 12.18it/s][A
 76%|█████████████████████████▊        | 92938/122310 [3:05:51<52:19,  9.35it/s][A
 76%|█████████████████████████▊        | 92943/122310 [3:05:52<56:33,  8.65it/s][A
 76%|█████████████████████████▊        | 92957/122310 [3:05:53<41:19, 11.84it/s][A
 76%|█████████████████████████▊        | 92963/122310 [3:05:53<45:26, 10.76it/s][A
 76%|█████████████████████████▊        | 92975/122310 [3:05:54<39:09, 12.49it/s][A
 76%|█████████████████████████▊        | 92990/122310 [3:05:55<32:53, 14.85it/s][A
 76%|█████████████████████████▊        | 93004/122310 [3:05:56<30:11, 16.18it/s][A
 76%|█████████████████████████▊        | 93013/122310 [3:05:56<32:32, 15.00it/s][A
 76%|█████████████████████████▊        | 93016/122310 [3:05:57<41:30, 11.76

step: 11260, loss: 116.48240951642828, epoch: 0



 76%|█████████████████████████▊        | 93076/122310 [3:06:04<47:57, 10.16it/s][A
 76%|█████████████████████████▊        | 93080/122310 [3:06:04<54:41,  8.91it/s][A
 76%|█████████████████████████▉        | 93089/122310 [3:06:05<49:18,  9.88it/s][A
 76%|█████████████████████████▉        | 93105/122310 [3:06:06<36:29, 13.34it/s][A
 76%|█████████████████████████▉        | 93127/122310 [3:06:07<26:40, 18.23it/s][A
 76%|█████████████████████████▉        | 93137/122310 [3:06:07<28:45, 16.91it/s][A
 76%|█████████████████████████▉        | 93144/122310 [3:06:08<33:05, 14.69it/s][A
 76%|█████████████████████████▉        | 93155/122310 [3:06:09<32:51, 14.79it/s][A
 76%|█████████████████████████▉        | 93165/122310 [3:06:10<33:33, 14.48it/s][A
 76%|█████████████████████████▉        | 93182/122310 [3:06:10<28:25, 17.08it/s][A
 76%|█████████████████████████▉        | 93193/122310 [3:06:11<29:33, 16.42it/s][A
 76%|█████████████████████████▉        | 93208/122310 [3:06:12<27:29, 17.64

step: 11280, loss: 90.77041964281892, epoch: 0



 76%|█████████████████████████▉        | 93273/122310 [3:06:18<53:45,  9.00it/s][A
 76%|████████████████████████▍       | 93275/122310 [3:06:19<1:07:45,  7.14it/s][A
 76%|████████████████████████▍       | 93280/122310 [3:06:20<1:08:58,  7.01it/s][A
 76%|████████████████████████▍       | 93287/122310 [3:06:20<1:02:07,  7.79it/s][A
 76%|████████████████████████▍       | 93293/122310 [3:06:21<1:01:07,  7.91it/s][A
 76%|█████████████████████████▉        | 93300/122310 [3:06:22<57:29,  8.41it/s][A
 76%|█████████████████████████▉        | 93311/122310 [3:06:23<46:30, 10.39it/s][A
 76%|█████████████████████████▉        | 93313/122310 [3:06:23<59:46,  8.09it/s][A
 76%|████████████████████████▍       | 93317/122310 [3:06:24<1:05:54,  7.33it/s][A
 76%|████████████████████████▍       | 93324/122310 [3:06:25<1:00:17,  8.01it/s][A
 76%|████████████████████████▍       | 93329/122310 [3:06:26<1:02:55,  7.68it/s][A
 76%|█████████████████████████▉        | 93341/122310 [3:06:26<46:50, 10.31

step: 11300, loss: 109.54518393621423, epoch: 0



 76%|█████████████████████████▉        | 93414/122310 [3:06:33<49:17,  9.77it/s][A
 76%|█████████████████████████▉        | 93417/122310 [3:06:34<59:32,  8.09it/s][A
 76%|█████████████████████████▉        | 93435/122310 [3:06:34<36:51, 13.05it/s][A
 76%|█████████████████████████▉        | 93444/122310 [3:06:35<37:30, 12.83it/s][A
 76%|█████████████████████████▉        | 93452/122310 [3:06:36<39:03, 12.31it/s][A
 76%|█████████████████████████▉        | 93462/122310 [3:06:37<37:51, 12.70it/s][A
 76%|█████████████████████████▉        | 93471/122310 [3:06:37<38:07, 12.61it/s][A
 76%|█████████████████████████▉        | 93479/122310 [3:06:38<39:40, 12.11it/s][A
 76%|█████████████████████████▉        | 93483/122310 [3:06:39<47:40, 10.08it/s][A
 76%|████████████████████████▍       | 93490/122310 [3:06:40<1:02:36,  7.67it/s][A
 76%|████████████████████████▍       | 93495/122310 [3:06:41<1:04:08,  7.49it/s][A
 76%|████████████████████████▍       | 93501/122310 [3:06:42<1:02:42,  7.66

step: 11320, loss: 85.70722401896964, epoch: 0



 76%|██████████████████████████        | 93562/122310 [3:06:47<36:34, 13.10it/s][A
 77%|██████████████████████████        | 93571/122310 [3:06:48<37:11, 12.88it/s][A
 77%|██████████████████████████        | 93592/122310 [3:06:49<27:07, 17.64it/s][A
 77%|██████████████████████████        | 93601/122310 [3:06:50<29:46, 16.07it/s][A
 77%|██████████████████████████        | 93612/122310 [3:06:50<30:19, 15.77it/s][A
 77%|██████████████████████████        | 93621/122310 [3:06:51<32:28, 14.73it/s][A
 77%|██████████████████████████        | 93628/122310 [3:06:52<36:24, 13.13it/s][A
 77%|██████████████████████████        | 93636/122310 [3:06:53<38:07, 12.54it/s][A
 77%|██████████████████████████        | 93650/122310 [3:06:53<32:44, 14.59it/s][A
 77%|██████████████████████████        | 93662/122310 [3:06:54<31:26, 15.18it/s][A
 77%|██████████████████████████        | 93667/122310 [3:06:55<37:23, 12.77it/s][A
 77%|██████████████████████████        | 93674/122310 [3:06:55<40:45, 11.71

step: 11340, loss: 92.17369640975693, epoch: 0



 77%|██████████████████████████        | 93731/122310 [3:07:02<55:11,  8.63it/s][A
 77%|████████████████████████▌       | 93736/122310 [3:07:04<1:14:34,  6.39it/s][A
 77%|████████████████████████▌       | 93739/122310 [3:07:04<1:20:57,  5.88it/s][A
 77%|████████████████████████▌       | 93744/122310 [3:07:05<1:17:46,  6.12it/s][A
 77%|████████████████████████▌       | 93753/122310 [3:07:06<1:01:32,  7.73it/s][A
 77%|████████████████████████▌       | 93759/122310 [3:07:06<1:00:35,  7.85it/s][A

step: 11360, loss: 189.10836979121837, epoch: 0



 77%|████████████████████████▌       | 93760/122310 [3:07:19<7:08:48,  1.11it/s][A
 77%|████████████████████████▌       | 93769/122310 [3:07:19<4:12:58,  1.88it/s][A
 77%|████████████████████████▌       | 93777/122310 [3:07:20<2:56:59,  2.69it/s][A
 77%|████████████████████████▌       | 93798/122310 [3:07:21<1:24:27,  5.63it/s][A
 77%|████████████████████████▌       | 93808/122310 [3:07:22<1:10:20,  6.75it/s][A
 77%|██████████████████████████        | 93821/122310 [3:07:22<55:24,  8.57it/s][A
 77%|██████████████████████████        | 93826/122310 [3:07:23<57:40,  8.23it/s][A
 77%|████████████████████████▌       | 93830/122310 [3:07:24<1:02:04,  7.65it/s][A
 77%|████████████████████████▌       | 93836/122310 [3:07:25<1:01:14,  7.75it/s][A
 77%|████████████████████████▌       | 93839/122310 [3:07:25<1:09:26,  6.83it/s][A
 77%|████████████████████████▌       | 93841/122310 [3:07:26<1:22:04,  5.78it/s][A
 77%|██████████████████████████        | 93851/122310 [3:07:27<59:37,  7.96

step: 11380, loss: 1139.5316151875343, epoch: 0



 77%|████████████████████████▌       | 93856/122310 [3:07:34<3:38:17,  2.17it/s][A
 77%|████████████████████████▌       | 93865/122310 [3:07:35<2:27:36,  3.21it/s][A
 77%|████████████████████████▌       | 93872/122310 [3:07:36<1:57:36,  4.03it/s][A
 77%|████████████████████████▌       | 93882/122310 [3:07:36<1:25:57,  5.51it/s][A
 77%|████████████████████████▌       | 93890/122310 [3:07:37<1:12:57,  6.49it/s][A
 77%|████████████████████████▌       | 93898/122310 [3:07:38<1:04:11,  7.38it/s][A
 77%|██████████████████████████        | 93919/122310 [3:07:38<38:57, 12.14it/s][A
 77%|██████████████████████████        | 93921/122310 [3:07:39<47:52,  9.88it/s][A
 77%|██████████████████████████        | 93931/122310 [3:07:40<43:43, 10.82it/s][A
 77%|██████████████████████████        | 93935/122310 [3:07:41<50:25,  9.38it/s][A
 77%|██████████████████████████        | 93944/122310 [3:07:41<46:21, 10.20it/s][A
 77%|██████████████████████████        | 93951/122310 [3:07:42<47:10, 10.02

step: 11400, loss: 93.48250635512159, epoch: 0



 77%|████████████████████████▌       | 93985/122310 [3:07:47<1:07:41,  6.97it/s][A
 77%|██████████████████████████▏       | 93994/122310 [3:07:47<56:47,  8.31it/s][A
 77%|██████████████████████████▏       | 94010/122310 [3:07:48<39:45, 11.86it/s][A
 77%|██████████████████████████▏       | 94022/122310 [3:07:49<36:02, 13.08it/s][A
 77%|██████████████████████████▏       | 94029/122310 [3:07:49<39:05, 12.06it/s][A
 77%|██████████████████████████▏       | 94040/122310 [3:07:50<36:32, 12.90it/s][A
 77%|██████████████████████████▏       | 94051/122310 [3:07:51<34:51, 13.51it/s][A
 77%|██████████████████████████▏       | 94062/122310 [3:07:52<33:56, 13.87it/s][A
 77%|██████████████████████████▏       | 94067/122310 [3:07:52<40:00, 11.76it/s][A
 77%|██████████████████████████▏       | 94072/122310 [3:07:53<45:31, 10.34it/s][A
 77%|██████████████████████████▏       | 94078/122310 [3:07:54<48:27,  9.71it/s][A
 77%|██████████████████████████▏       | 94081/122310 [3:07:55<58:26,  8.05

step: 11420, loss: 85.8797792333924, epoch: 0



 77%|██████████████████████████▏       | 94150/122310 [3:08:00<36:12, 12.96it/s][A
 77%|██████████████████████████▏       | 94159/122310 [3:08:01<36:49, 12.74it/s][A
 77%|██████████████████████████▏       | 94166/122310 [3:08:02<39:42, 11.81it/s][A
 77%|██████████████████████████▏       | 94172/122310 [3:08:03<43:49, 10.70it/s][A
 77%|██████████████████████████▏       | 94180/122310 [3:08:03<43:26, 10.79it/s][A
 77%|██████████████████████████▏       | 94187/122310 [3:08:04<45:01, 10.41it/s][A
 77%|██████████████████████████▏       | 94199/122310 [3:08:05<38:25, 12.19it/s][A
 77%|██████████████████████████▏       | 94229/122310 [3:08:06<22:27, 20.83it/s][A
 77%|██████████████████████████▏       | 94244/122310 [3:08:06<22:35, 20.70it/s][A
 77%|██████████████████████████▏       | 94251/122310 [3:08:07<26:53, 17.39it/s][A
 77%|██████████████████████████▏       | 94272/122310 [3:08:08<22:32, 20.73it/s][A
 77%|██████████████████████████▏       | 94279/122310 [3:08:09<26:56, 17.34

step: 11440, loss: 80.24961597471483, epoch: 0



 77%|██████████████████████████▏       | 94339/122310 [3:08:15<49:05,  9.49it/s][A
 77%|██████████████████████████▏       | 94348/122310 [3:08:16<44:59, 10.36it/s][A
 77%|██████████████████████████▏       | 94355/122310 [3:08:17<46:10, 10.09it/s][A
 77%|██████████████████████████▏       | 94365/122310 [3:08:17<41:46, 11.15it/s][A
 77%|██████████████████████████▏       | 94370/122310 [3:08:18<47:15,  9.85it/s][A
 77%|████████████████████████▋       | 94372/122310 [3:08:19<1:00:07,  7.74it/s][A
 77%|██████████████████████████▏       | 94378/122310 [3:08:20<59:18,  7.85it/s][A
 77%|██████████████████████████▏       | 94384/122310 [3:08:20<58:28,  7.96it/s][A
 77%|████████████████████████▋       | 94385/122310 [3:08:21<1:17:53,  5.98it/s][A
 77%|██████████████████████████▏       | 94402/122310 [3:08:22<41:59, 11.08it/s][A
 77%|██████████████████████████▏       | 94412/122310 [3:08:22<39:15, 11.84it/s][A
 77%|██████████████████████████▏       | 94418/122310 [3:08:23<43:15, 10.74

step: 11460, loss: 102.70121045769193, epoch: 0



 77%|██████████████████████████▎       | 94481/122310 [3:08:30<59:58,  7.73it/s][A
 77%|██████████████████████████▎       | 94487/122310 [3:08:31<59:00,  7.86it/s][A
 77%|██████████████████████████▎       | 94493/122310 [3:08:31<58:29,  7.93it/s][A
 77%|██████████████████████████▎       | 94499/122310 [3:08:32<57:53,  8.01it/s][A
 77%|████████████████████████▋       | 94504/122310 [3:08:33<1:00:21,  7.68it/s][A
 77%|████████████████████████▋       | 94508/122310 [3:08:33<1:05:33,  7.07it/s][A
 77%|████████████████████████▋       | 94514/122310 [3:08:34<1:03:12,  7.33it/s][A
 77%|██████████████████████████▎       | 94522/122310 [3:08:35<55:34,  8.33it/s][A
 77%|██████████████████████████▎       | 94531/122310 [3:08:36<48:47,  9.49it/s][A
 77%|██████████████████████████▎       | 94538/122310 [3:08:36<48:43,  9.50it/s][A
 77%|██████████████████████████▎       | 94548/122310 [3:08:37<43:04, 10.74it/s][A
 77%|██████████████████████████▎       | 94556/122310 [3:08:38<42:47, 10.81

step: 11480, loss: 79.94247114223788, epoch: 0



 77%|██████████████████████████▎       | 94637/122310 [3:08:45<33:56, 13.59it/s][A
 77%|██████████████████████████▎       | 94642/122310 [3:08:45<39:42, 11.61it/s][A
 77%|██████████████████████████▎       | 94656/122310 [3:08:46<33:14, 13.86it/s][A
 77%|██████████████████████████▎       | 94670/122310 [3:08:47<29:54, 15.40it/s][A
 77%|██████████████████████████▎       | 94681/122310 [3:08:47<30:00, 15.35it/s][A
 77%|██████████████████████████▎       | 94685/122310 [3:08:48<37:15, 12.36it/s][A
 77%|██████████████████████████▎       | 94695/122310 [3:08:49<36:17, 12.68it/s][A
 77%|██████████████████████████▎       | 94698/122310 [3:08:50<45:13, 10.18it/s][A
 77%|██████████████████████████▎       | 94712/122310 [3:08:50<35:47, 12.85it/s][A
 77%|██████████████████████████▎       | 94718/122310 [3:08:51<40:17, 11.41it/s][A
 77%|██████████████████████████▎       | 94725/122310 [3:08:52<42:11, 10.90it/s][A
 77%|██████████████████████████▎       | 94747/122310 [3:08:53<27:39, 16.61

step: 11500, loss: 101.07848279679055, epoch: 0



 78%|██████████████████████████▎       | 94832/122310 [3:08:59<32:49, 13.95it/s][A
 78%|██████████████████████████▎       | 94839/122310 [3:09:01<57:41,  7.94it/s][A
 78%|██████████████████████████▎       | 94844/122310 [3:09:02<59:20,  7.71it/s][A
 78%|██████████████████████████▎       | 94857/122310 [3:09:03<45:39, 10.02it/s][A
 78%|██████████████████████████▎       | 94862/122310 [3:09:04<49:37,  9.22it/s][A
 78%|██████████████████████████▎       | 94865/122310 [3:09:04<58:00,  7.88it/s][A
 78%|██████████████████████████▎       | 94874/122310 [3:09:05<50:22,  9.08it/s][A
 78%|██████████████████████████▎       | 94880/122310 [3:09:06<52:02,  8.78it/s][A
 78%|██████████████████████████▍       | 94888/122310 [3:09:06<48:43,  9.38it/s][A
 78%|██████████████████████████▍       | 94897/122310 [3:09:07<44:59, 10.16it/s][A
 78%|██████████████████████████▍       | 94903/122310 [3:09:08<47:55,  9.53it/s][A
 78%|██████████████████████████▍       | 94910/122310 [3:09:09<47:52,  9.54

step: 11520, loss: 99.74658559900367, epoch: 0



 78%|██████████████████████████▍       | 94960/122310 [3:09:14<49:37,  9.19it/s][A
 78%|██████████████████████████▍       | 94976/122310 [3:09:15<35:05, 12.98it/s][A
 78%|██████████████████████████▍       | 94988/122310 [3:09:15<32:31, 14.00it/s][A
 78%|██████████████████████████▍       | 94995/122310 [3:09:16<35:55, 12.67it/s][A
 78%|████████████████████████▊       | 94997/122310 [3:09:18<1:15:22,  6.04it/s][A
 78%|████████████████████████▊       | 95002/122310 [3:09:19<1:13:09,  6.22it/s][A
 78%|██████████████████████████▍       | 95012/122310 [3:09:20<57:32,  7.91it/s][A
 78%|██████████████████████████▍       | 95023/122310 [3:09:20<47:08,  9.65it/s][A
 78%|████████████████████████▊       | 95025/122310 [3:09:25<2:12:41,  3.43it/s][A
 78%|████████████████████████▊       | 95044/122310 [3:09:26<1:08:59,  6.59it/s][A
 78%|██████████████████████████▍       | 95060/122310 [3:09:26<49:45,  9.13it/s][A
 78%|██████████████████████████▍       | 95062/122310 [3:09:27<57:28,  7.90

step: 11540, loss: 81.53215708698677, epoch: 0



 78%|██████████████████████████▍       | 95085/122310 [3:09:29<43:30, 10.43it/s][A
 78%|██████████████████████████▍       | 95093/122310 [3:09:29<42:55, 10.57it/s][A
 78%|██████████████████████████▍       | 95099/122310 [3:09:30<45:41,  9.93it/s][A
 78%|██████████████████████████▍       | 95102/122310 [3:09:31<54:17,  8.35it/s][A
 78%|████████████████████████▉       | 95106/122310 [3:09:31<1:00:14,  7.53it/s][A
 78%|██████████████████████████▍       | 95117/122310 [3:09:32<47:07,  9.62it/s][A
 78%|██████████████████████████▍       | 95126/122310 [3:09:33<43:38, 10.38it/s][A
 78%|██████████████████████████▍       | 95135/122310 [3:09:34<41:30, 10.91it/s][A
 78%|██████████████████████████▍       | 95144/122310 [3:09:34<39:57, 11.33it/s][A
 78%|██████████████████████████▍       | 95156/122310 [3:09:35<35:17, 12.82it/s][A
 78%|██████████████████████████▍       | 95164/122310 [3:09:36<36:55, 12.25it/s][A
 78%|██████████████████████████▍       | 95178/122310 [3:09:37<31:36, 14.31

step: 11560, loss: 98.6652523514501, epoch: 0



 78%|██████████████████████████▍       | 95255/122310 [3:09:43<45:05, 10.00it/s][A
 78%|██████████████████████████▍       | 95259/122310 [3:09:44<52:02,  8.66it/s][A
 78%|██████████████████████████▍       | 95268/122310 [3:09:45<46:20,  9.73it/s][A
 78%|██████████████████████████▍       | 95275/122310 [3:09:45<46:33,  9.68it/s][A
 78%|██████████████████████████▍       | 95282/122310 [3:09:46<46:48,  9.62it/s][A
 78%|██████████████████████████▍       | 95288/122310 [3:09:47<49:28,  9.10it/s][A
 78%|██████████████████████████▍       | 95299/122310 [3:09:48<41:32, 10.84it/s][A
 78%|██████████████████████████▍       | 95307/122310 [3:09:48<41:22, 10.88it/s][A
 78%|██████████████████████████▍       | 95320/122310 [3:09:49<34:58, 12.86it/s][A
 78%|██████████████████████████▌       | 95333/122310 [3:09:50<31:31, 14.26it/s][A
 78%|██████████████████████████▌       | 95339/122310 [3:09:51<36:03, 12.47it/s][A
 78%|██████████████████████████▌       | 95346/122310 [3:09:51<38:53, 11.56

step: 11580, loss: 78.8098938664638, epoch: 0



 78%|██████████████████████████▌       | 95440/122310 [3:09:58<30:06, 14.88it/s][A
 78%|██████████████████████████▌       | 95445/122310 [3:09:59<35:51, 12.49it/s][A
 78%|██████████████████████████▌       | 95455/122310 [3:09:59<34:54, 12.82it/s][A
 78%|██████████████████████████▌       | 95467/122310 [3:10:00<32:11, 13.90it/s][A
 78%|██████████████████████████▌       | 95472/122310 [3:10:01<38:02, 11.76it/s][A
 78%|██████████████████████████▌       | 95481/122310 [3:10:02<37:28, 11.93it/s][A
 78%|██████████████████████████▌       | 95499/122310 [3:10:02<28:24, 15.73it/s][A
 78%|██████████████████████████▌       | 95503/122310 [3:10:03<35:20, 12.64it/s][A
 78%|██████████████████████████▌       | 95512/122310 [3:10:04<35:37, 12.54it/s][A
 78%|██████████████████████████▌       | 95522/122310 [3:10:05<34:44, 12.85it/s][A
 78%|██████████████████████████▌       | 95531/122310 [3:10:05<35:13, 12.67it/s][A
 78%|██████████████████████████▌       | 95538/122310 [3:10:06<38:05, 11.71

step: 11600, loss: 91.0502028773415, epoch: 0
sim1 and sim2 are 0.5575104594330165, 0.15677800701397276
cosine of pred and queen: 0.04449501427606468
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: korea
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: son
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: bethhogla
Actual: india:asia::paris:europe, pred: annexation
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: spain
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: regularize
Actual: m

Actual: decided:undecided::pleasant:unpleasant, pred: propelled
Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: like
Actual: islamabad:pakistan::oslo:norway, pred: state
Actual: grandfather:grandmother::father:mother, pred: came
Actual: grandpa:grandma::sons:daughters, pred: david
Actual: king:queen::husband:wife, pred: madest
Actual: man:woman::brothers:sisters, pred: jethlah
Actual: stepson:stepdaughter::stepfather:stepmother, pred: despairing
Actual: uncle:aunt::grandson:granddaughter, pred: broadening
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: state
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: state
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: bank



 78%|████████████████████████▏      | 95599/122310 [3:11:28<21:36:05,  2.91s/it][A

Actual: india:rupee::denmark:krone, pred: trackless
Accuracy is 0.029585798816568046



 78%|████████████████████████▏      | 95609/122310 [3:11:29<14:18:43,  1.93s/it][A
 78%|█████████████████████████       | 95620/122310 [3:11:30<9:26:23,  1.27s/it][A
 78%|█████████████████████████       | 95625/122310 [3:11:30<7:52:55,  1.06s/it][A
 78%|█████████████████████████       | 95627/122310 [3:11:32<7:38:23,  1.03s/it][A
 78%|█████████████████████████       | 95630/122310 [3:11:32<6:38:07,  1.12it/s][A
 78%|█████████████████████████       | 95637/122310 [3:11:33<4:30:17,  1.64it/s][A
 78%|█████████████████████████       | 95644/122310 [3:11:34<3:13:31,  2.30it/s][A
 78%|█████████████████████████       | 95654/122310 [3:11:35<2:07:27,  3.49it/s][A
 78%|█████████████████████████       | 95662/122310 [3:11:35<1:39:35,  4.46it/s][A
 78%|█████████████████████████       | 95677/122310 [3:11:36<1:03:29,  6.99it/s][A
 78%|██████████████████████████▌       | 95691/122310 [3:11:37<48:05,  9.23it/s][A
 78%|██████████████████████████▌       | 95700/122310 [3:11:38<44:50,  9.89

step: 11620, loss: 217.57828965591594, epoch: 0



 78%|█████████████████████████       | 95714/122310 [3:11:43<2:18:21,  3.20it/s][A
 78%|█████████████████████████       | 95724/122310 [3:11:44<1:34:28,  4.69it/s][A
 78%|█████████████████████████       | 95739/122310 [3:11:45<1:00:12,  7.36it/s][A
 78%|██████████████████████████▌       | 95752/122310 [3:11:45<47:09,  9.39it/s][A
 78%|██████████████████████████▌       | 95762/122310 [3:11:46<42:51, 10.32it/s][A
 78%|██████████████████████████▌       | 95765/122310 [3:11:47<50:11,  8.82it/s][A
 78%|██████████████████████████▌       | 95771/122310 [3:11:48<50:51,  8.70it/s][A
 78%|██████████████████████████▌       | 95777/122310 [3:11:48<51:29,  8.59it/s][A
 78%|██████████████████████████▋       | 95784/122310 [3:11:49<50:06,  8.82it/s][A
 78%|██████████████████████████▋       | 95790/122310 [3:11:50<51:04,  8.65it/s][A
 78%|██████████████████████████▋       | 95795/122310 [3:11:51<54:12,  8.15it/s][A
 78%|██████████████████████████▋       | 95803/122310 [3:11:51<49:11,  8.98

step: 11640, loss: 78.29243335823041, epoch: 0



 78%|██████████████████████████▋       | 95883/122310 [3:11:58<36:33, 12.05it/s][A
 78%|██████████████████████████▋       | 95892/122310 [3:11:59<36:18, 12.13it/s][A
 78%|██████████████████████████▋       | 95899/122310 [3:11:59<38:37, 11.39it/s][A
 78%|██████████████████████████▋       | 95905/122310 [3:12:00<42:05, 10.46it/s][A
 78%|██████████████████████████▋       | 95921/122310 [3:12:01<31:40, 13.88it/s][A
 78%|██████████████████████████▋       | 95930/122310 [3:12:02<33:10, 13.26it/s][A
 78%|██████████████████████████▋       | 95943/122310 [3:12:02<30:04, 14.61it/s][A
 78%|██████████████████████████▋       | 95949/122310 [3:12:03<34:28, 12.75it/s][A
 78%|██████████████████████████▋       | 95960/122310 [3:12:04<33:01, 13.30it/s][A
 78%|██████████████████████████▋       | 95969/122310 [3:12:04<33:48, 12.98it/s][A
 78%|██████████████████████████▋       | 95973/122310 [3:12:05<40:50, 10.75it/s][A
 78%|██████████████████████████▋       | 95979/122310 [3:12:06<44:06,  9.95

step: 11660, loss: 99.96750060138122, epoch: 0



 79%|██████████████████████████▋       | 96046/122310 [3:12:12<40:53, 10.70it/s][A
 79%|██████████████████████████▋       | 96061/122310 [3:12:13<31:56, 13.70it/s][A
 79%|██████████████████████████▋       | 96067/122310 [3:12:14<36:20, 12.03it/s][A
 79%|██████████████████████████▋       | 96076/122310 [3:12:15<35:53, 12.18it/s][A
 79%|██████████████████████████▋       | 96087/122310 [3:12:15<33:29, 13.05it/s][A
 79%|██████████████████████████▋       | 96093/122310 [3:12:16<37:48, 11.56it/s][A
 79%|██████████████████████████▋       | 96102/122310 [3:12:17<37:03, 11.79it/s][A
 79%|██████████████████████████▋       | 96111/122310 [3:12:18<36:27, 11.98it/s][A
 79%|██████████████████████████▋       | 96115/122310 [3:12:18<43:40, 10.00it/s][A
 79%|██████████████████████████▋       | 96120/122310 [3:12:19<48:04,  9.08it/s][A
 79%|██████████████████████████▋       | 96132/122310 [3:12:20<38:41, 11.28it/s][A
 79%|██████████████████████████▋       | 96139/122310 [3:12:21<40:24, 10.79

step: 11680, loss: 127.31576732939543, epoch: 0



 79%|██████████████████████████▋       | 96216/122310 [3:12:27<38:44, 11.22it/s][A
 79%|██████████████████████████▋       | 96222/122310 [3:12:28<42:07, 10.32it/s][A
 79%|██████████████████████████▋       | 96228/122310 [3:12:29<44:52,  9.69it/s][A
 79%|██████████████████████████▊       | 96236/122310 [3:12:29<43:02, 10.10it/s][A
 79%|██████████████████████████▊       | 96242/122310 [3:12:30<45:30,  9.55it/s][A
 79%|██████████████████████████▊       | 96253/122310 [3:12:31<38:46, 11.20it/s][A
 79%|██████████████████████████▊       | 96266/122310 [3:12:31<33:07, 13.11it/s][A
 79%|██████████████████████████▊       | 96277/122310 [3:12:32<31:38, 13.71it/s][A
 79%|██████████████████████████▊       | 96291/122310 [3:12:33<28:16, 15.34it/s][A
 79%|██████████████████████████▊       | 96303/122310 [3:12:34<27:39, 15.67it/s][A
 79%|█████████████████████████▏      | 96305/122310 [3:12:37<1:20:17,  5.40it/s][A
 79%|█████████████████████████▏      | 96313/122310 [3:12:38<1:08:47,  6.30

step: 11700, loss: 83.44274204442911, epoch: 0



 79%|██████████████████████████▊       | 96357/122310 [3:12:42<45:32,  9.50it/s][A
 79%|██████████████████████████▊       | 96364/122310 [3:12:42<45:19,  9.54it/s][A
 79%|██████████████████████████▊       | 96372/122310 [3:12:43<43:37,  9.91it/s][A
 79%|██████████████████████████▊       | 96379/122310 [3:12:44<43:52,  9.85it/s][A
 79%|██████████████████████████▊       | 96388/122310 [3:12:45<40:49, 10.58it/s][A
 79%|██████████████████████████▊       | 96393/122310 [3:12:45<45:38,  9.46it/s][A
 79%|██████████████████████████▊       | 96395/122310 [3:12:46<57:35,  7.50it/s][A
 79%|██████████████████████████▊       | 96404/122310 [3:12:47<48:23,  8.92it/s][A
 79%|█████████████████████████▏      | 96406/122310 [3:12:48<1:00:51,  7.09it/s][A
 79%|██████████████████████████▊       | 96420/122310 [3:12:48<40:25, 10.67it/s][A
 79%|██████████████████████████▊       | 96429/122310 [3:12:49<38:31, 11.20it/s][A
 79%|██████████████████████████▊       | 96442/122310 [3:12:50<32:43, 13.18

step: 11720, loss: 84.15075267543475, epoch: 0



 79%|██████████████████████████▊       | 96514/122310 [3:12:56<43:55,  9.79it/s][A
 79%|██████████████████████████▊       | 96525/122310 [3:12:57<37:43, 11.39it/s][A
 79%|██████████████████████████▊       | 96538/122310 [3:12:58<32:22, 13.26it/s][A
 79%|██████████████████████████▊       | 96541/122310 [3:12:58<40:34, 10.58it/s][A
 79%|██████████████████████████▊       | 96546/122310 [3:12:59<45:27,  9.45it/s][A
 79%|██████████████████████████▊       | 96551/122310 [3:13:00<49:55,  8.60it/s][A
 79%|█████████████████████████▎      | 96553/122310 [3:13:01<1:02:08,  6.91it/s][A
 79%|██████████████████████████▊       | 96560/122310 [3:13:01<55:39,  7.71it/s][A
 79%|██████████████████████████▊       | 96575/122310 [3:13:02<37:05, 11.57it/s][A
 79%|██████████████████████████▊       | 96594/122310 [3:13:03<27:02, 15.85it/s][A
 79%|██████████████████████████▊       | 96599/122310 [3:13:04<32:35, 13.15it/s][A
 79%|██████████████████████████▊       | 96609/122310 [3:13:04<32:18, 13.26

step: 11740, loss: 76.99070914451033, epoch: 0



 79%|██████████████████████████▉       | 96702/122310 [3:13:11<30:57, 13.79it/s][A
 79%|██████████████████████████▉       | 96711/122310 [3:13:12<31:50, 13.40it/s][A
 79%|██████████████████████████▉       | 96717/122310 [3:13:12<36:00, 11.85it/s][A
 79%|██████████████████████████▉       | 96730/122310 [3:13:13<31:18, 13.62it/s][A
 79%|██████████████████████████▉       | 96742/122310 [3:13:14<29:29, 14.45it/s][A
 79%|██████████████████████████▉       | 96750/122310 [3:13:15<31:46, 13.41it/s][A
 79%|██████████████████████████▉       | 96759/122310 [3:13:15<32:34, 13.07it/s][A
 79%|██████████████████████████▉       | 96771/122310 [3:13:16<30:15, 14.07it/s][A
 79%|██████████████████████████▉       | 96786/122310 [3:13:17<26:34, 16.01it/s][A
 79%|██████████████████████████▉       | 96796/122310 [3:13:17<27:46, 15.31it/s][A
 79%|██████████████████████████▉       | 96808/122310 [3:13:18<27:13, 15.61it/s][A
 79%|██████████████████████████▉       | 96814/122310 [3:13:19<31:46, 13.38

step: 11760, loss: 98.40786475650577, epoch: 0



 79%|██████████████████████████▉       | 96883/122310 [3:13:26<40:49, 10.38it/s][A
 79%|██████████████████████████▉       | 96892/122310 [3:13:26<38:38, 10.96it/s][A
 79%|██████████████████████████▉       | 96898/122310 [3:13:27<41:51, 10.12it/s][A
 79%|██████████████████████████▉       | 96910/122310 [3:13:28<35:18, 11.99it/s][A
 79%|██████████████████████████▉       | 96918/122310 [3:13:28<36:13, 11.68it/s][A
 79%|██████████████████████████▉       | 96926/122310 [3:13:29<36:56, 11.45it/s][A
 79%|██████████████████████████▉       | 96939/122310 [3:13:30<31:47, 13.30it/s][A
 79%|██████████████████████████▉       | 96949/122310 [3:13:31<31:24, 13.46it/s][A
 79%|██████████████████████████▉       | 96953/122310 [3:13:31<38:22, 11.01it/s][A
 79%|██████████████████████████▉       | 96962/122310 [3:13:32<37:02, 11.40it/s][A
 79%|██████████████████████████▉       | 96970/122310 [3:13:33<37:38, 11.22it/s][A
 79%|██████████████████████████▉       | 96972/122310 [3:13:34<48:39,  8.68

step: 11780, loss: 84.5332798795739, epoch: 0



 79%|██████████████████████████▉       | 97042/122310 [3:13:40<34:31, 12.20it/s][A
 79%|██████████████████████████▉       | 97053/122310 [3:13:41<32:16, 13.04it/s][A
 79%|██████████████████████████▉       | 97057/122310 [3:13:42<38:59, 10.79it/s][A
 79%|██████████████████████████▉       | 97061/122310 [3:13:42<45:51,  9.18it/s][A
 79%|█████████████████████████▍      | 97062/122310 [3:13:43<1:01:19,  6.86it/s][A
 79%|██████████████████████████▉       | 97071/122310 [3:13:44<49:31,  8.49it/s][A
 79%|██████████████████████████▉       | 97079/122310 [3:13:45<45:36,  9.22it/s][A
 79%|██████████████████████████▉       | 97084/122310 [3:13:45<49:16,  8.53it/s][A
 79%|██████████████████████████▉       | 97092/122310 [3:13:46<45:24,  9.25it/s][A
 79%|██████████████████████████▉       | 97098/122310 [3:13:47<47:13,  8.90it/s][A
 79%|██████████████████████████▉       | 97106/122310 [3:13:48<44:10,  9.51it/s][A
 79%|██████████████████████████▉       | 97115/122310 [3:13:48<40:33, 10.35

step: 11800, loss: 80.45194414372658, epoch: 0



 79%|███████████████████████████       | 97167/122310 [3:13:55<54:51,  7.64it/s][A
 79%|███████████████████████████       | 97177/122310 [3:13:56<44:41,  9.37it/s][A
 79%|█████████████████████████▍      | 97181/122310 [3:13:57<1:06:05,  6.34it/s][A
 79%|█████████████████████████▍      | 97183/122310 [3:13:58<1:16:07,  5.50it/s][A
 79%|███████████████████████████       | 97200/122310 [3:13:59<41:33, 10.07it/s][A
 79%|███████████████████████████       | 97208/122310 [3:13:59<40:42, 10.28it/s][A
 79%|███████████████████████████       | 97218/122310 [3:14:00<37:32, 11.14it/s][A
 79%|███████████████████████████       | 97227/122310 [3:14:01<36:22, 11.49it/s][A
 80%|███████████████████████████       | 97240/122310 [3:14:02<31:26, 13.29it/s][A
 80%|███████████████████████████       | 97246/122310 [3:14:02<35:24, 11.80it/s][A
 80%|███████████████████████████       | 97252/122310 [3:14:03<38:50, 10.75it/s][A
 80%|███████████████████████████       | 97262/122310 [3:14:04<35:55, 11.62

step: 11820, loss: 90.02467514662952, epoch: 0



 80%|███████████████████████████       | 97339/122310 [3:14:10<34:34, 12.04it/s][A
 80%|███████████████████████████       | 97346/122310 [3:14:10<36:41, 11.34it/s][A
 80%|███████████████████████████       | 97359/122310 [3:14:11<31:17, 13.29it/s][A
 80%|███████████████████████████       | 97366/122310 [3:14:12<34:14, 12.14it/s][A
 80%|███████████████████████████       | 97377/122310 [3:14:12<31:51, 13.04it/s][A
 80%|███████████████████████████       | 97382/122310 [3:14:13<37:12, 11.17it/s][A
 80%|███████████████████████████       | 97390/122310 [3:14:14<37:24, 11.10it/s][A
 80%|███████████████████████████       | 97398/122310 [3:14:15<37:31, 11.06it/s][A
 80%|███████████████████████████       | 97406/122310 [3:14:15<37:38, 11.03it/s][A
 80%|███████████████████████████       | 97416/122310 [3:14:16<35:09, 11.80it/s][A
 80%|███████████████████████████       | 97426/122310 [3:14:17<33:28, 12.39it/s][A
 80%|███████████████████████████       | 97430/122310 [3:14:18<40:12, 10.31

step: 11840, loss: 76.72530273495543, epoch: 0



 80%|███████████████████████████       | 97499/122310 [3:14:24<43:39,  9.47it/s][A
 80%|███████████████████████████       | 97508/122310 [3:14:25<39:49, 10.38it/s][A
 80%|███████████████████████████       | 97517/122310 [3:14:26<37:43, 10.95it/s][A
 80%|███████████████████████████       | 97529/122310 [3:14:26<32:44, 12.61it/s][A
 80%|███████████████████████████       | 97544/122310 [3:14:27<27:33, 14.98it/s][A
 80%|███████████████████████████       | 97556/122310 [3:14:28<26:48, 15.39it/s][A
 80%|███████████████████████████       | 97563/122310 [3:14:29<30:13, 13.65it/s][A
 80%|███████████████████████████       | 97570/122310 [3:14:29<33:11, 12.42it/s][A
 80%|███████████████████████████       | 97574/122310 [3:14:30<39:54, 10.33it/s][A
 80%|███████████████████████████▏      | 97583/122310 [3:14:31<37:48, 10.90it/s][A
 80%|███████████████████████████▏      | 97591/122310 [3:14:32<37:47, 10.90it/s][A
 80%|███████████████████████████▏      | 97603/122310 [3:14:32<32:59, 12.48

step: 11860, loss: 80.8435734683312, epoch: 0



 80%|███████████████████████████▏      | 97680/122310 [3:14:39<37:17, 11.01it/s][A
 80%|███████████████████████████▏      | 97690/122310 [3:14:40<35:17, 11.63it/s][A
 80%|███████████████████████████▏      | 97697/122310 [3:14:41<37:43, 10.88it/s][A
 80%|███████████████████████████▏      | 97706/122310 [3:14:41<36:47, 11.15it/s][A
 80%|███████████████████████████▏      | 97721/122310 [3:14:42<29:56, 13.69it/s][A
 80%|███████████████████████████▏      | 97732/122310 [3:14:43<29:33, 13.86it/s][A
 80%|███████████████████████████▏      | 97740/122310 [3:14:44<31:53, 12.84it/s][A
 80%|███████████████████████████▏      | 97745/122310 [3:14:44<37:23, 10.95it/s][A
 80%|███████████████████████████▏      | 97756/122310 [3:14:45<34:27, 11.88it/s][A
 80%|███████████████████████████▏      | 97765/122310 [3:14:46<34:29, 11.86it/s][A
 80%|███████████████████████████▏      | 97770/122310 [3:14:47<40:03, 10.21it/s][A
 80%|███████████████████████████▏      | 97779/122310 [3:14:48<38:24, 10.64

step: 11880, loss: 91.6867641153881, epoch: 0



 80%|███████████████████████████▏      | 97846/122310 [3:14:54<44:20,  9.20it/s][A
 80%|███████████████████████████▏      | 97857/122310 [3:14:55<37:55, 10.74it/s][A
 80%|███████████████████████████▏      | 97861/122310 [3:14:56<44:39,  9.12it/s][A
 80%|███████████████████████████▏      | 97866/122310 [3:14:57<48:57,  8.32it/s][A
 80%|███████████████████████████▏      | 97883/122310 [3:14:57<32:37, 12.48it/s][A
 80%|███████████████████████████▏      | 97891/122310 [3:14:58<34:17, 11.87it/s][A
 80%|███████████████████████████▏      | 97896/122310 [3:14:59<39:23, 10.33it/s][A
 80%|███████████████████████████▏      | 97905/122310 [3:15:00<37:51, 10.74it/s][A
 80%|███████████████████████████▏      | 97914/122310 [3:15:01<47:46,  8.51it/s][A
 80%|███████████████████████████▏      | 97925/122310 [3:15:02<40:36, 10.01it/s][A
 80%|███████████████████████████▏      | 97943/122310 [3:15:03<30:02, 13.52it/s][A
 80%|███████████████████████████▏      | 97951/122310 [3:15:04<32:03, 12.67

step: 11900, loss: 87.01398143126444, epoch: 0



 80%|███████████████████████████▏      | 98009/122310 [3:15:10<36:31, 11.09it/s][A
 80%|███████████████████████████▏      | 98021/122310 [3:15:10<32:33, 12.43it/s][A
 80%|███████████████████████████▏      | 98023/122310 [3:15:11<42:28,  9.53it/s][A
 80%|███████████████████████████▎      | 98032/122310 [3:15:12<39:32, 10.23it/s][A
 80%|███████████████████████████▎      | 98038/122310 [3:15:13<42:40,  9.48it/s][A
 80%|███████████████████████████▎      | 98042/122310 [3:15:14<49:13,  8.22it/s][A
 80%|███████████████████████████▎      | 98046/122310 [3:15:14<54:54,  7.36it/s][A
 80%|███████████████████████████▎      | 98056/122310 [3:15:15<44:23,  9.11it/s][A
 80%|███████████████████████████▎      | 98066/122310 [3:15:16<39:13, 10.30it/s][A
 80%|███████████████████████████▎      | 98081/122310 [3:15:17<30:51, 13.09it/s][A
 80%|███████████████████████████▎      | 98084/122310 [3:15:17<38:59, 10.35it/s][A
 80%|███████████████████████████▎      | 98091/122310 [3:15:18<40:21, 10.00

step: 11920, loss: 83.73052368461776, epoch: 0



 80%|███████████████████████████▎      | 98181/122310 [3:15:25<36:06, 11.14it/s][A
 80%|███████████████████████████▎      | 98183/122310 [3:15:26<46:53,  8.57it/s][A
 80%|███████████████████████████▎      | 98190/122310 [3:15:26<45:51,  8.77it/s][A
 80%|███████████████████████████▎      | 98201/122310 [3:15:27<38:19, 10.48it/s][A
 80%|███████████████████████████▎      | 98219/122310 [3:15:28<27:50, 14.42it/s][A
 80%|███████████████████████████▎      | 98225/122310 [3:15:29<32:16, 12.44it/s][A
 80%|███████████████████████████▎      | 98233/122310 [3:15:30<33:53, 11.84it/s][A
 80%|███████████████████████████▎      | 98241/122310 [3:15:30<35:05, 11.43it/s][A
 80%|███████████████████████████▎      | 98245/122310 [3:15:31<41:54,  9.57it/s][A
 80%|███████████████████████████▎      | 98247/122310 [3:15:32<53:21,  7.52it/s][A
 80%|███████████████████████████▎      | 98253/122310 [3:15:33<52:35,  7.62it/s][A
 80%|███████████████████████████▎      | 98264/122310 [3:15:33<41:27,  9.67

step: 11940, loss: 99.5840978844172, epoch: 0



 80%|███████████████████████████▎      | 98340/122310 [3:15:40<46:08,  8.66it/s][A
 80%|███████████████████████████▎      | 98362/122310 [3:15:41<27:16, 14.63it/s][A
 80%|███████████████████████████▎      | 98364/122310 [3:15:42<35:52, 11.12it/s][A
 80%|███████████████████████████▎      | 98374/122310 [3:15:42<33:59, 11.73it/s][A
 80%|███████████████████████████▎      | 98380/122310 [3:15:43<37:43, 10.57it/s][A
 80%|███████████████████████████▎      | 98389/122310 [3:15:44<36:29, 10.93it/s][A
 80%|███████████████████████████▎      | 98396/122310 [3:15:45<38:10, 10.44it/s][A
 80%|███████████████████████████▎      | 98409/122310 [3:15:46<32:04, 12.42it/s][A
 80%|███████████████████████████▎      | 98419/122310 [3:15:46<31:28, 12.65it/s][A
 80%|███████████████████████████▎      | 98433/122310 [3:15:47<27:48, 14.31it/s][A
 80%|███████████████████████████▎      | 98457/122310 [3:15:48<20:27, 19.43it/s][A
 81%|███████████████████████████▎      | 98468/122310 [3:15:49<22:14, 17.87

step: 11960, loss: 86.61868021038299, epoch: 0



 81%|███████████████████████████▍      | 98536/122310 [3:15:55<40:37,  9.75it/s][A
 81%|███████████████████████████▍      | 98550/122310 [3:15:56<32:02, 12.36it/s][A
 81%|███████████████████████████▍      | 98560/122310 [3:15:57<31:24, 12.60it/s][A
 81%|███████████████████████████▍      | 98567/122310 [3:15:58<34:17, 11.54it/s][A
 81%|███████████████████████████▍      | 98582/122310 [3:15:58<28:10, 14.04it/s][A
 81%|███████████████████████████▍      | 98593/122310 [3:15:59<28:04, 14.08it/s][A
 81%|███████████████████████████▍      | 98597/122310 [3:16:00<34:29, 11.46it/s][A
 81%|███████████████████████████▍      | 98602/122310 [3:16:01<39:30, 10.00it/s][A
 81%|███████████████████████████▍      | 98608/122310 [3:16:02<42:22,  9.32it/s][A
 81%|███████████████████████████▍      | 98621/122310 [3:16:02<33:55, 11.64it/s][A
 81%|███████████████████████████▍      | 98631/122310 [3:16:03<32:38, 12.09it/s][A
 81%|███████████████████████████▍      | 98637/122310 [3:16:04<36:29, 10.81

step: 11980, loss: 78.34486321387146, epoch: 0



 81%|███████████████████████████▍      | 98678/122310 [3:16:11<59:57,  6.57it/s][A
 81%|█████████████████████████▊      | 98682/122310 [3:16:11<1:03:33,  6.20it/s][A
 81%|███████████████████████████▍      | 98689/122310 [3:16:12<55:39,  7.07it/s][A
 81%|███████████████████████████▍      | 98705/122310 [3:16:13<35:23, 11.12it/s][A
 81%|███████████████████████████▍      | 98715/122310 [3:16:14<33:42, 11.67it/s][A
 81%|███████████████████████████▍      | 98721/122310 [3:16:15<37:10, 10.57it/s][A
 81%|███████████████████████████▍      | 98729/122310 [3:16:15<37:17, 10.54it/s][A
 81%|███████████████████████████▍      | 98733/122310 [3:16:16<44:04,  8.91it/s][A
 81%|███████████████████████████▍      | 98740/122310 [3:16:17<43:32,  9.02it/s][A
 81%|███████████████████████████▍      | 98758/122310 [3:16:18<29:24, 13.35it/s][A
 81%|███████████████████████████▍      | 98768/122310 [3:16:18<29:32, 13.29it/s][A
 81%|███████████████████████████▍      | 98775/122310 [3:16:19<32:35, 12.04

step: 12000, loss: 72.1185827690717, epoch: 0
sim1 and sim2 are 0.6015358617917683, 0.13671093808161458
cosine of pred and queen: 0.09013685747130527
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: son
Actual: kabul:afghanistan::hanoi:vietnam, pred: maladie
Actual: canberra:australia::doha:qatar, pred: australian
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: opposition
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: spain
Actual: nigeria:africa::france:europe, pred: spain
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: regularize
Actu


 81%|███████████████████████████▍      | 98838/122310 [3:16:43<32:02, 12.21it/s][A

Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: defense
Actual: denmark:danish::germany:german, pred: european
Accuracy is 0.044444444444444446
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: dimly
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: shall
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: dozen
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine


 81%|█████████████████████████      | 98841/122310 [3:17:45<21:37:26,  3.32s/it][A
 81%|█████████████████████████      | 98847/122310 [3:17:45<16:15:42,  2.50s/it][A
 81%|█████████████████████████      | 98854/122310 [3:17:46<11:35:13,  1.78s/it][A
 81%|█████████████████████████▊      | 98858/122310 [3:17:47<9:32:59,  1.47s/it][A
 81%|█████████████████████████▊      | 98864/122310 [3:17:48<6:57:18,  1.07s/it][A
 81%|█████████████████████████▊      | 98875/122310 [3:17:48<4:06:21,  1.59it/s][A
 81%|█████████████████████████▊      | 98890/122310 [3:17:49<2:22:00,  2.75it/s][A
 81%|█████████████████████████▉      | 98904/122310 [3:17:50<1:35:56,  4.07it/s][A
 81%|█████████████████████████▉      | 98917/122310 [3:17:51<1:11:18,  5.47it/s][A
 81%|█████████████████████████▉      | 98922/122310 [3:17:51<1:09:26,  5.61it/s][A
 81%|█████████████████████████▉      | 98927/122310 [3:17:52<1:07:14,  5.80it/s][A
 81%|███████████████████████████▌      | 98938/122310 [3:17:53<52:32,  7.41

step: 12020, loss: 88.59974867168803, epoch: 0



 81%|███████████████████████████▌      | 99031/122310 [3:18:01<34:25, 11.27it/s][A
 81%|███████████████████████████▌      | 99033/122310 [3:18:01<44:13,  8.77it/s][A
 81%|███████████████████████████▌      | 99038/122310 [3:18:02<47:27,  8.17it/s][A
 81%|███████████████████████████▌      | 99050/122310 [3:18:03<37:14, 10.41it/s][A
 81%|███████████████████████████▌      | 99052/122310 [3:18:04<47:29,  8.16it/s][A
 81%|███████████████████████████▌      | 99074/122310 [3:18:04<26:51, 14.42it/s][A
 81%|███████████████████████████▌      | 99086/122310 [3:18:05<25:54, 14.94it/s][A
 81%|███████████████████████████▌      | 99096/122310 [3:18:06<26:52, 14.40it/s][A
 81%|███████████████████████████▌      | 99104/122310 [3:18:07<29:07, 13.28it/s][A
 81%|███████████████████████████▌      | 99110/122310 [3:18:07<33:04, 11.69it/s][A
 81%|███████████████████████████▌      | 99121/122310 [3:18:08<30:48, 12.55it/s][A
 81%|███████████████████████████▌      | 99132/122310 [3:18:09<29:18, 13.18

step: 12040, loss: 75.75311849999133, epoch: 0



 81%|███████████████████████████▌      | 99178/122310 [3:18:16<47:41,  8.09it/s][A
 81%|███████████████████████████▌      | 99186/122310 [3:18:16<43:35,  8.84it/s][A
 81%|███████████████████████████▌      | 99198/122310 [3:18:17<35:12, 10.94it/s][A
 81%|███████████████████████████▌      | 99204/122310 [3:18:18<38:16, 10.06it/s][A
 81%|███████████████████████████▌      | 99220/122310 [3:18:19<28:42, 13.41it/s][A
 81%|███████████████████████████▌      | 99230/122310 [3:18:19<28:48, 13.35it/s][A
 81%|███████████████████████████▌      | 99246/122310 [3:18:20<24:31, 15.67it/s][A
 81%|███████████████████████████▌      | 99260/122310 [3:18:21<23:11, 16.57it/s][A
 81%|███████████████████████████▌      | 99264/122310 [3:18:22<29:08, 13.18it/s][A
 81%|███████████████████████████▌      | 99272/122310 [3:18:22<30:58, 12.40it/s][A
 81%|███████████████████████████▌      | 99289/122310 [3:18:23<24:54, 15.40it/s][A
 81%|███████████████████████████▌      | 99296/122310 [3:18:24<28:13, 13.59

step: 12060, loss: 99.7106843846917, epoch: 0



 81%|███████████████████████████▋      | 99395/122310 [3:18:31<33:04, 11.55it/s][A
 81%|███████████████████████████▋      | 99404/122310 [3:18:31<32:44, 11.66it/s][A
 81%|███████████████████████████▋      | 99410/122310 [3:18:32<35:45, 10.67it/s][A
 81%|███████████████████████████▋      | 99414/122310 [3:18:33<41:27,  9.20it/s][A
 81%|███████████████████████████▋      | 99423/122310 [3:18:34<38:18,  9.96it/s][A
 81%|███████████████████████████▋      | 99428/122310 [3:18:34<42:18,  9.01it/s][A
 81%|███████████████████████████▋      | 99443/122310 [3:18:35<31:20, 12.16it/s][A
 81%|███████████████████████████▋      | 99452/122310 [3:18:36<31:36, 12.05it/s][A
 81%|███████████████████████████▋      | 99458/122310 [3:18:37<35:11, 10.82it/s][A
 81%|███████████████████████████▋      | 99463/122310 [3:18:38<39:33,  9.63it/s][A
 81%|███████████████████████████▋      | 99473/122310 [3:18:38<35:31, 10.71it/s][A
 81%|███████████████████████████▋      | 99477/122310 [3:18:39<41:56,  9.07

step: 12080, loss: 72.48587152280528, epoch: 0



 81%|███████████████████████████▋      | 99572/122310 [3:18:46<24:28, 15.48it/s][A
 81%|███████████████████████████▋      | 99578/122310 [3:18:47<28:39, 13.22it/s][A
 81%|███████████████████████████▋      | 99590/122310 [3:18:47<27:02, 14.00it/s][A
 81%|███████████████████████████▋      | 99594/122310 [3:18:48<33:15, 11.38it/s][A
 81%|███████████████████████████▋      | 99599/122310 [3:18:49<37:59,  9.96it/s][A
 81%|███████████████████████████▋      | 99609/122310 [3:18:50<34:38, 10.92it/s][A
 81%|███████████████████████████▋      | 99622/122310 [3:18:50<29:42, 12.73it/s][A
 81%|███████████████████████████▋      | 99628/122310 [3:18:51<33:22, 11.33it/s][A
 81%|███████████████████████████▋      | 99637/122310 [3:18:52<32:54, 11.48it/s][A
 81%|███████████████████████████▋      | 99640/122310 [3:18:53<40:54,  9.24it/s][A
 81%|███████████████████████████▋      | 99654/122310 [3:18:53<31:28, 12.00it/s][A
 81%|███████████████████████████▋      | 99664/122310 [3:18:54<30:34, 12.35

step: 12100, loss: 126.48796089667809, epoch: 0



 82%|███████████████████████████▋      | 99736/122310 [3:19:01<31:39, 11.89it/s][A
 82%|███████████████████████████▋      | 99738/122310 [3:19:02<39:21,  9.56it/s][A
 82%|███████████████████████████▋      | 99745/122310 [3:19:02<39:28,  9.53it/s][A
 82%|███████████████████████████▋      | 99752/122310 [3:19:03<39:45,  9.46it/s][A
 82%|███████████████████████████▋      | 99758/122310 [3:19:04<41:42,  9.01it/s][A
 82%|███████████████████████████▋      | 99767/122310 [3:19:05<38:09,  9.85it/s][A
 82%|███████████████████████████▋      | 99778/122310 [3:19:06<33:31, 11.20it/s][A
 82%|███████████████████████████▋      | 99793/122310 [3:19:06<27:20, 13.72it/s][A
 82%|███████████████████████████▋      | 99806/122310 [3:19:07<25:26, 14.74it/s][A
 82%|███████████████████████████▋      | 99819/122310 [3:19:08<24:14, 15.46it/s][A
 82%|███████████████████████████▊      | 99828/122310 [3:19:09<26:05, 14.36it/s][A
 82%|███████████████████████████▊      | 99838/122310 [3:19:09<26:42, 14.02

step: 12120, loss: 91.75807038018887, epoch: 0



 82%|███████████████████████████▊      | 99900/122310 [3:19:16<50:32,  7.39it/s][A
 82%|███████████████████████████▊      | 99919/122310 [3:19:17<31:39, 11.78it/s][A
 82%|███████████████████████████▊      | 99935/122310 [3:19:18<26:18, 14.18it/s][A
 82%|███████████████████████████▊      | 99941/122310 [3:19:18<29:53, 12.47it/s][A
 82%|███████████████████████████▊      | 99957/122310 [3:19:19<25:00, 14.90it/s][A
 82%|███████████████████████████▊      | 99964/122310 [3:19:20<28:04, 13.26it/s][A
 82%|███████████████████████████▊      | 99970/122310 [3:19:21<31:47, 11.71it/s][A
 82%|███████████████████████████▊      | 99981/122310 [3:19:21<29:51, 12.46it/s][A
 82%|███████████████████████████▊      | 99986/122310 [3:19:22<34:31, 10.78it/s][A
 82%|███████████████████████████▊      | 99994/122310 [3:19:23<34:45, 10.70it/s][A
 82%|██████████████████████████▉      | 100002/122310 [3:19:24<34:54, 10.65it/s][A
 82%|██████████████████████████▉      | 100010/122310 [3:19:24<34:59, 10.62

step: 12140, loss: 80.40689316140117, epoch: 0



 82%|███████████████████████████      | 100089/122310 [3:19:31<33:17, 11.13it/s][A
 82%|███████████████████████████      | 100094/122310 [3:19:32<37:50,  9.78it/s][A
 82%|███████████████████████████      | 100100/122310 [3:19:33<40:35,  9.12it/s][A
 82%|███████████████████████████      | 100103/122310 [3:19:34<48:30,  7.63it/s][A
 82%|███████████████████████████      | 100112/122310 [3:19:34<41:42,  8.87it/s][A
 82%|███████████████████████████      | 100118/122310 [3:19:35<43:03,  8.59it/s][A
 82%|███████████████████████████      | 100124/122310 [3:19:36<44:12,  8.37it/s][A
 82%|███████████████████████████      | 100132/122310 [3:19:37<40:58,  9.02it/s][A
 82%|███████████████████████████      | 100139/122310 [3:19:37<40:43,  9.07it/s][A
 82%|███████████████████████████      | 100150/122310 [3:19:38<34:30, 10.70it/s][A
 82%|███████████████████████████      | 100159/122310 [3:19:39<33:24, 11.05it/s][A
 82%|███████████████████████████      | 100166/122310 [3:19:40<35:09, 10.50

step: 12160, loss: 82.49503990934318, epoch: 0



 82%|███████████████████████████      | 100259/122310 [3:19:47<30:27, 12.07it/s][A
 82%|███████████████████████████      | 100262/122310 [3:19:47<38:01,  9.66it/s][A
 82%|███████████████████████████      | 100271/122310 [3:19:48<35:35, 10.32it/s][A
 82%|███████████████████████████      | 100278/122310 [3:19:49<36:49,  9.97it/s][A
 82%|███████████████████████████      | 100280/122310 [3:19:50<47:02,  7.80it/s][A
 82%|███████████████████████████      | 100289/122310 [3:19:50<40:37,  9.03it/s][A
 82%|███████████████████████████      | 100295/122310 [3:19:51<42:11,  8.70it/s][A
 82%|███████████████████████████      | 100300/122310 [3:19:52<45:28,  8.07it/s][A
 82%|███████████████████████████      | 100314/122310 [3:19:53<32:59, 11.11it/s][A
 82%|███████████████████████████      | 100322/122310 [3:19:53<33:19, 11.00it/s][A
 82%|███████████████████████████      | 100332/122310 [3:19:54<31:26, 11.65it/s][A
 82%|███████████████████████████      | 100347/122310 [3:19:55<26:03, 14.04

step: 12180, loss: 75.33633341340483, epoch: 0



 82%|███████████████████████████      | 100429/122310 [3:20:02<28:29, 12.80it/s][A
 82%|███████████████████████████      | 100433/122310 [3:20:02<34:28, 10.57it/s][A
 82%|███████████████████████████      | 100440/122310 [3:20:03<35:52, 10.16it/s][A
 82%|███████████████████████████      | 100448/122310 [3:20:04<35:28, 10.27it/s][A
 82%|███████████████████████████      | 100455/122310 [3:20:05<36:42,  9.92it/s][A
 82%|███████████████████████████      | 100467/122310 [3:20:06<31:12, 11.67it/s][A
 82%|███████████████████████████      | 100476/122310 [3:20:06<31:00, 11.74it/s][A
 82%|███████████████████████████      | 100485/122310 [3:20:07<30:56, 11.76it/s][A
 82%|███████████████████████████      | 100494/122310 [3:20:08<30:50, 11.79it/s][A
 82%|███████████████████████████      | 100503/122310 [3:20:09<30:46, 11.81it/s][A
 82%|███████████████████████████      | 100518/122310 [3:20:09<25:38, 14.17it/s][A
 82%|███████████████████████████      | 100528/122310 [3:20:10<26:10, 13.87

step: 12200, loss: 78.87829788711758, epoch: 0



 82%|███████████████████████████▏     | 100600/122310 [3:20:17<32:48, 11.03it/s][A
 82%|███████████████████████████▏     | 100609/122310 [3:20:18<32:08, 11.25it/s][A
 82%|███████████████████████████▏     | 100616/122310 [3:20:18<33:52, 10.68it/s][A
 82%|███████████████████████████▏     | 100625/122310 [3:20:19<32:42, 11.05it/s][A
 82%|███████████████████████████▏     | 100643/122310 [3:20:20<24:20, 14.83it/s][A
 82%|███████████████████████████▏     | 100652/122310 [3:20:21<25:58, 13.90it/s][A
 82%|███████████████████████████▏     | 100669/122310 [3:20:21<21:56, 16.44it/s][A
 82%|███████████████████████████▏     | 100680/122310 [3:20:22<22:42, 15.87it/s][A
 82%|███████████████████████████▏     | 100693/122310 [3:20:23<22:13, 16.21it/s][A
 82%|███████████████████████████▏     | 100703/122310 [3:20:24<23:27, 15.35it/s][A
 82%|███████████████████████████▏     | 100711/122310 [3:20:25<25:54, 13.89it/s][A
 82%|███████████████████████████▏     | 100722/122310 [3:20:25<25:45, 13.97

step: 12220, loss: 104.97660760441009, epoch: 0



 82%|███████████████████████████▏     | 100799/122310 [3:20:32<34:52, 10.28it/s][A
 82%|███████████████████████████▏     | 100807/122310 [3:20:33<34:36, 10.35it/s][A
 82%|███████████████████████████▏     | 100816/122310 [3:20:34<33:07, 10.81it/s][A
 82%|███████████████████████████▏     | 100824/122310 [3:20:34<33:19, 10.74it/s][A
 82%|███████████████████████████▏     | 100831/122310 [3:20:35<34:50, 10.28it/s][A
 82%|███████████████████████████▏     | 100835/122310 [3:20:36<40:45,  8.78it/s][A
 82%|███████████████████████████▏     | 100845/122310 [3:20:37<35:27, 10.09it/s][A
 82%|███████████████████████████▏     | 100854/122310 [3:20:37<33:46, 10.59it/s][A
 82%|███████████████████████████▏     | 100859/122310 [3:20:38<38:07,  9.38it/s][A
 82%|███████████████████████████▏     | 100868/122310 [3:20:39<35:15, 10.14it/s][A
 82%|███████████████████████████▏     | 100876/122310 [3:20:40<34:50, 10.25it/s][A
 82%|███████████████████████████▏     | 100889/122310 [3:20:40<28:58, 12.32

step: 12240, loss: 83.57049261550688, epoch: 0



 83%|███████████████████████████▏     | 100982/122310 [3:20:47<24:49, 14.32it/s][A
 83%|███████████████████████████▏     | 100991/122310 [3:20:48<26:02, 13.64it/s][A
 83%|███████████████████████████▎     | 101007/122310 [3:20:49<22:20, 15.89it/s][A
 83%|███████████████████████████▎     | 101016/122310 [3:20:50<24:12, 14.66it/s][A
 83%|███████████████████████████▎     | 101022/122310 [3:20:50<28:04, 12.63it/s][A
 83%|███████████████████████████▎     | 101027/122310 [3:20:51<32:44, 10.84it/s][A
 83%|███████████████████████████▎     | 101038/122310 [3:20:52<29:42, 11.94it/s][A
 83%|███████████████████████████▎     | 101043/122310 [3:20:53<34:22, 10.31it/s][A
 83%|███████████████████████████▎     | 101046/122310 [3:20:53<42:11,  8.40it/s][A
 83%|███████████████████████████▎     | 101053/122310 [3:20:54<40:59,  8.64it/s][A
 83%|███████████████████████████▎     | 101059/122310 [3:20:55<42:01,  8.43it/s][A
 83%|███████████████████████████▎     | 101067/122310 [3:20:56<39:05,  9.06

step: 12260, loss: 83.6913109048034, epoch: 0



 83%|███████████████████████████▎     | 101132/122310 [3:21:03<34:33, 10.22it/s][A
 83%|███████████████████████████▎     | 101137/122310 [3:21:03<38:38,  9.13it/s][A
 83%|███████████████████████████▎     | 101146/122310 [3:21:04<35:24,  9.96it/s][A
 83%|███████████████████████████▎     | 101161/122310 [3:21:05<27:17, 12.91it/s][A
 83%|███████████████████████████▎     | 101176/122310 [3:21:06<23:31, 14.97it/s][A
 83%|███████████████████████████▎     | 101183/122310 [3:21:06<26:35, 13.25it/s][A
 83%|███████████████████████████▎     | 101190/122310 [3:21:07<29:13, 12.05it/s][A
 83%|███████████████████████████▎     | 101204/122310 [3:21:08<25:13, 13.95it/s][A
 83%|███████████████████████████▎     | 101207/122310 [3:21:09<32:04, 10.97it/s][A
 83%|███████████████████████████▎     | 101224/122310 [3:21:09<24:27, 14.37it/s][A
 83%|███████████████████████████▎     | 101230/122310 [3:21:10<28:16, 12.43it/s][A
 83%|███████████████████████████▎     | 101239/122310 [3:21:11<28:43, 12.22

step: 12280, loss: 95.41740005518048, epoch: 0



 83%|███████████████████████████▎     | 101331/122310 [3:21:18<25:42, 13.60it/s][A
 83%|███████████████████████████▎     | 101336/122310 [3:21:18<30:19, 11.52it/s][A
 83%|███████████████████████████▎     | 101350/122310 [3:21:19<25:45, 13.56it/s][A
 83%|███████████████████████████▎     | 101357/122310 [3:21:20<28:21, 12.32it/s][A
 83%|███████████████████████████▎     | 101368/122310 [3:21:21<26:55, 12.97it/s][A
 83%|███████████████████████████▎     | 101377/122310 [3:21:22<27:37, 12.63it/s][A
 83%|███████████████████████████▎     | 101386/122310 [3:21:23<36:32,  9.54it/s][A
 83%|███████████████████████████▎     | 101399/122310 [3:21:24<30:34, 11.40it/s][A
 83%|███████████████████████████▎     | 101412/122310 [3:21:25<27:04, 12.86it/s][A
 83%|███████████████████████████▎     | 101420/122310 [3:21:25<28:24, 12.25it/s][A
 83%|███████████████████████████▎     | 101433/122310 [3:21:26<25:34, 13.60it/s][A
 83%|███████████████████████████▎     | 101442/122310 [3:21:27<26:32, 13.10

step: 12300, loss: 99.0401962813046, epoch: 0



 83%|███████████████████████████▍     | 101515/122310 [3:21:33<26:43, 12.97it/s][A
 83%|███████████████████████████▍     | 101527/122310 [3:21:34<25:04, 13.81it/s][A
 83%|███████████████████████████▍     | 101535/122310 [3:21:34<26:57, 12.84it/s][A
 83%|███████████████████████████▍     | 101542/122310 [3:21:35<29:34, 11.70it/s][A
 83%|███████████████████████████▍     | 101547/122310 [3:21:36<33:56, 10.20it/s][A
 83%|███████████████████████████▍     | 101552/122310 [3:21:37<37:59,  9.11it/s][A
 83%|███████████████████████████▍     | 101556/122310 [3:21:37<43:34,  7.94it/s][A
 83%|███████████████████████████▍     | 101563/122310 [3:21:38<41:32,  8.33it/s][A
 83%|███████████████████████████▍     | 101564/122310 [3:21:39<55:28,  6.23it/s][A
 83%|███████████████████████████▍     | 101577/122310 [3:21:40<36:21,  9.50it/s][A
 83%|███████████████████████████▍     | 101583/122310 [3:21:41<38:19,  9.01it/s][A
 83%|███████████████████████████▍     | 101597/122310 [3:21:41<29:10, 11.83

step: 12320, loss: 84.1938885709873, epoch: 0



 83%|███████████████████████████▍     | 101662/122310 [3:21:48<27:51, 12.36it/s][A
 83%|███████████████████████████▍     | 101666/122310 [3:21:49<33:28, 10.28it/s][A
 83%|███████████████████████████▍     | 101669/122310 [3:21:50<40:58,  8.39it/s][A
 83%|███████████████████████████▍     | 101680/122310 [3:21:50<33:37, 10.22it/s][A
 83%|███████████████████████████▍     | 101691/122310 [3:21:51<29:55, 11.48it/s][A
 83%|███████████████████████████▍     | 101704/122310 [3:21:52<26:03, 13.18it/s][A
 83%|███████████████████████████▍     | 101713/122310 [3:21:53<26:49, 12.80it/s][A
 83%|███████████████████████████▍     | 101729/122310 [3:21:53<22:26, 15.29it/s][A
 83%|███████████████████████████▍     | 101740/122310 [3:21:54<22:52, 14.98it/s][A
 83%|███████████████████████████▍     | 101753/122310 [3:21:55<21:55, 15.63it/s][A
 83%|███████████████████████████▍     | 101767/122310 [3:21:56<20:47, 16.47it/s][A
 83%|███████████████████████████▍     | 101771/122310 [3:21:56<26:06, 13.12

step: 12340, loss: 77.96668308261567, epoch: 0



 83%|███████████████████████████▍     | 101861/122310 [3:22:03<32:28, 10.50it/s][A
 83%|███████████████████████████▍     | 101871/122310 [3:22:04<30:21, 11.22it/s][A
 83%|███████████████████████████▍     | 101873/122310 [3:22:05<39:07,  8.71it/s][A
 83%|███████████████████████████▍     | 101884/122310 [3:22:06<32:29, 10.47it/s][A
 83%|███████████████████████████▍     | 101891/122310 [3:22:06<33:41, 10.10it/s][A
 83%|███████████████████████████▍     | 101901/122310 [3:22:07<31:02, 10.96it/s][A
 83%|███████████████████████████▍     | 101906/122310 [3:22:08<35:06,  9.69it/s][A
 83%|███████████████████████████▍     | 101907/122310 [3:22:09<47:32,  7.15it/s][A
 83%|███████████████████████████▍     | 101911/122310 [3:22:09<51:30,  6.60it/s][A
 83%|█████████████████████████▊     | 101914/122310 [3:22:12<1:33:42,  3.63it/s][A
 83%|█████████████████████████▊     | 101918/122310 [3:22:12<1:25:13,  3.99it/s][A
 83%|███████████████████████████▌     | 101933/122310 [3:22:13<44:21,  7.66

step: 12360, loss: 76.112919643966, epoch: 0



 83%|███████████████████████████▌     | 101999/122310 [3:22:19<32:17, 10.48it/s][A
 83%|███████████████████████████▌     | 102008/122310 [3:22:19<31:04, 10.89it/s][A
 83%|███████████████████████████▌     | 102011/122310 [3:22:20<38:13,  8.85it/s][A
 83%|███████████████████████████▌     | 102020/122310 [3:22:21<34:50,  9.71it/s][A
 83%|███████████████████████████▌     | 102025/122310 [3:22:22<38:18,  8.83it/s][A
 83%|███████████████████████████▌     | 102032/122310 [3:22:22<37:46,  8.95it/s][A
 83%|███████████████████████████▌     | 102038/122310 [3:22:23<39:20,  8.59it/s][A
 83%|███████████████████████████▌     | 102045/122310 [3:22:24<38:18,  8.82it/s][A
 83%|███████████████████████████▌     | 102053/122310 [3:22:25<36:11,  9.33it/s][A
 83%|███████████████████████████▌     | 102062/122310 [3:22:25<33:27, 10.09it/s][A
 83%|███████████████████████████▌     | 102072/122310 [3:22:26<30:40, 11.00it/s][A
 83%|███████████████████████████▌     | 102077/122310 [3:22:27<34:55,  9.66

step: 12380, loss: 84.9416758317154, epoch: 0



 84%|███████████████████████████▌     | 102152/122310 [3:22:34<33:16, 10.10it/s][A
 84%|███████████████████████████▌     | 102163/122310 [3:22:34<29:23, 11.42it/s][A
 84%|███████████████████████████▌     | 102173/122310 [3:22:35<28:06, 11.94it/s][A
 84%|███████████████████████████▌     | 102182/122310 [3:22:36<28:05, 11.94it/s][A
 84%|███████████████████████████▌     | 102189/122310 [3:22:37<30:07, 11.13it/s][A
 84%|███████████████████████████▌     | 102197/122310 [3:22:38<30:36, 10.95it/s][A
 84%|███████████████████████████▌     | 102206/122310 [3:22:38<29:49, 11.23it/s][A
 84%|███████████████████████████▌     | 102216/122310 [3:22:39<28:19, 11.83it/s][A
 84%|███████████████████████████▌     | 102223/122310 [3:22:40<30:19, 11.04it/s][A
 84%|███████████████████████████▌     | 102232/122310 [3:22:41<29:39, 11.28it/s][A
 84%|███████████████████████████▌     | 102242/122310 [3:22:41<28:12, 11.85it/s][A
 84%|███████████████████████████▌     | 102249/122310 [3:22:42<30:13, 11.06

step: 12400, loss: 102.3892214263626, epoch: 0
sim1 and sim2 are 0.5708994040250499, 0.14113259721889296
cosine of pred and queen: 0.050381903691638176
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: australian
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: council
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: eluana
Actual


 84%|███████████████████████████▌     | 102316/122310 [3:23:03<27:18, 12.20it/s][A

Actual: brazil:real::sweden:krona, pred: france
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: europe
Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: monday
Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: defense
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.044444444444444446
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: dimly
Actual: think:thinks::talk:talks, pred: somnambulisms
Actu


 84%|█████████████████████████     | 102329/122310 [3:24:07<13:02:26,  2.35s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.029585798816568046



 84%|█████████████████████████▉     | 102345/122310 [3:24:08<7:56:49,  1.43s/it][A
 84%|█████████████████████████▉     | 102350/122310 [3:24:08<6:52:15,  1.24s/it][A
 84%|█████████████████████████▉     | 102367/122310 [3:24:09<4:03:53,  1.36it/s][A
 84%|█████████████████████████▉     | 102378/122310 [3:24:10<3:01:35,  1.83it/s][A
 84%|█████████████████████████▉     | 102387/122310 [3:24:11<2:23:28,  2.31it/s][A
 84%|█████████████████████████▉     | 102401/122310 [3:24:11<1:38:48,  3.36it/s][A
 84%|█████████████████████████▉     | 102408/122310 [3:24:12<1:25:57,  3.86it/s][A
 84%|█████████████████████████▉     | 102414/122310 [3:24:13<1:17:10,  4.30it/s][A
 84%|█████████████████████████▉     | 102421/122310 [3:24:14<1:06:47,  4.96it/s][A
 84%|█████████████████████████▉     | 102426/122310 [3:24:14<1:03:16,  5.24it/s][A
 84%|███████████████████████████▋     | 102434/122310 [3:24:15<53:12,  6.23it/s][A
 84%|███████████████████████████▋     | 102440/122310 [3:24:16<50:06,  6.61

step: 12420, loss: 78.67438780153498, epoch: 0



 84%|███████████████████████████▋     | 102507/122310 [3:24:23<40:06,  8.23it/s][A
 84%|███████████████████████████▋     | 102511/122310 [3:24:23<44:55,  7.34it/s][A
 84%|███████████████████████████▋     | 102512/122310 [3:24:24<59:04,  5.59it/s][A
 84%|███████████████████████████▋     | 102527/122310 [3:24:25<33:18,  9.90it/s][A
 84%|███████████████████████████▋     | 102534/122310 [3:24:26<33:58,  9.70it/s][A
 84%|███████████████████████████▋     | 102537/122310 [3:24:26<41:12,  8.00it/s][A
 84%|███████████████████████████▋     | 102542/122310 [3:24:27<43:23,  7.59it/s][A
 84%|███████████████████████████▋     | 102558/122310 [3:24:28<28:06, 11.71it/s][A
 84%|███████████████████████████▋     | 102565/122310 [3:24:29<30:04, 10.94it/s][A
 84%|███████████████████████████▋     | 102571/122310 [3:24:29<32:38, 10.08it/s][A
 84%|███████████████████████████▋     | 102578/122310 [3:24:30<33:40,  9.76it/s][A
 84%|███████████████████████████▋     | 102589/122310 [3:24:31<29:12, 11.25

step: 12440, loss: 84.16796507566806, epoch: 0



 84%|███████████████████████████▋     | 102685/122310 [3:24:38<25:53, 12.64it/s][A
 84%|███████████████████████████▋     | 102695/122310 [3:24:38<25:32, 12.80it/s][A
 84%|███████████████████████████▋     | 102704/122310 [3:24:39<25:59, 12.57it/s][A
 84%|███████████████████████████▋     | 102718/122310 [3:24:40<22:43, 14.37it/s][A
 84%|███████████████████████████▋     | 102727/122310 [3:24:41<24:01, 13.59it/s][A
 84%|███████████████████████████▋     | 102734/122310 [3:24:41<26:30, 12.31it/s][A
 84%|███████████████████████████▋     | 102739/122310 [3:24:42<30:44, 10.61it/s][A
 84%|███████████████████████████▋     | 102747/122310 [3:24:43<30:44, 10.61it/s][A
 84%|███████████████████████████▋     | 102754/122310 [3:24:44<32:11, 10.12it/s][A
 84%|███████████████████████████▋     | 102764/122310 [3:24:45<29:31, 11.03it/s][A
 84%|███████████████████████████▋     | 102770/122310 [3:24:45<32:12, 10.11it/s][A
 84%|███████████████████████████▋     | 102779/122310 [3:24:46<30:33, 10.65

step: 12460, loss: 96.49165700425027, epoch: 0



 84%|███████████████████████████▊     | 102858/122310 [3:24:53<25:26, 12.74it/s][A
 84%|███████████████████████████▊     | 102862/122310 [3:24:53<30:36, 10.59it/s][A
 84%|███████████████████████████▊     | 102868/122310 [3:24:54<32:43,  9.90it/s][A
 84%|███████████████████████████▊     | 102875/122310 [3:24:55<32:55,  9.84it/s][A
 84%|███████████████████████████▊     | 102884/122310 [3:24:55<30:34, 10.59it/s][A
 84%|███████████████████████████▊     | 102894/122310 [3:24:56<28:06, 11.51it/s][A
 84%|███████████████████████████▊     | 102896/122310 [3:24:57<36:18,  8.91it/s][A
 84%|███████████████████████████▊     | 102909/122310 [3:24:58<27:47, 11.63it/s][A
 84%|███████████████████████████▊     | 102913/122310 [3:24:58<32:58,  9.80it/s][A
 84%|███████████████████████████▊     | 102921/122310 [3:24:59<31:55, 10.12it/s][A
 84%|███████████████████████████▊     | 102923/122310 [3:25:00<40:33,  7.97it/s][A
 84%|███████████████████████████▊     | 102936/122310 [3:25:01<29:31, 10.94

step: 12480, loss: 78.0845150315101, epoch: 0



 84%|███████████████████████████▊     | 102999/122310 [3:25:07<36:27,  8.83it/s][A
 84%|███████████████████████████▊     | 103007/122310 [3:25:08<33:51,  9.50it/s][A
 84%|███████████████████████████▊     | 103017/122310 [3:25:09<29:45, 10.80it/s][A
 84%|███████████████████████████▊     | 103026/122310 [3:25:09<28:30, 11.28it/s][A
 84%|███████████████████████████▊     | 103033/122310 [3:25:10<30:02, 10.70it/s][A
 84%|███████████████████████████▊     | 103043/122310 [3:25:11<27:36, 11.63it/s][A
 84%|███████████████████████████▊     | 103055/122310 [3:25:12<31:45, 10.10it/s][A
 84%|███████████████████████████▊     | 103061/122310 [3:25:13<33:08,  9.68it/s][A
 84%|███████████████████████████▊     | 103069/122310 [3:25:14<31:56, 10.04it/s][A
 84%|███████████████████████████▊     | 103085/122310 [3:25:14<24:06, 13.29it/s][A
 84%|███████████████████████████▊     | 103092/122310 [3:25:15<26:05, 12.27it/s][A
 84%|███████████████████████████▊     | 103095/122310 [3:25:16<32:13,  9.94

step: 12500, loss: 110.41745198319838, epoch: 0



 84%|███████████████████████████▊     | 103173/122310 [3:25:22<26:24, 12.08it/s][A
 84%|███████████████████████████▊     | 103182/122310 [3:25:22<26:08, 12.20it/s][A
 84%|███████████████████████████▊     | 103194/122310 [3:25:23<23:38, 13.48it/s][A
 84%|███████████████████████████▊     | 103198/122310 [3:25:24<28:37, 11.13it/s][A
 84%|███████████████████████████▊     | 103204/122310 [3:25:24<30:59, 10.27it/s][A
 84%|███████████████████████████▊     | 103209/122310 [3:25:25<34:24,  9.25it/s][A
 84%|███████████████████████████▊     | 103217/122310 [3:25:26<32:33,  9.77it/s][A
 84%|███████████████████████████▊     | 103223/122310 [3:25:27<34:13,  9.30it/s][A
 84%|███████████████████████████▊     | 103240/122310 [3:25:27<23:33, 13.49it/s][A
 84%|███████████████████████████▊     | 103246/122310 [3:25:28<26:38, 11.93it/s][A
 84%|███████████████████████████▊     | 103252/122310 [3:25:29<29:19, 10.83it/s][A
 84%|███████████████████████████▊     | 103260/122310 [3:25:30<29:09, 10.89

step: 12520, loss: 79.88269572833862, epoch: 0



 84%|███████████████████████████▉     | 103334/122310 [3:25:36<29:10, 10.84it/s][A
 84%|███████████████████████████▉     | 103350/122310 [3:25:37<22:19, 14.16it/s][A
 85%|███████████████████████████▉     | 103359/122310 [3:25:38<23:05, 13.68it/s][A
 85%|███████████████████████████▉     | 103365/122310 [3:25:38<26:13, 12.04it/s][A
 85%|███████████████████████████▉     | 103370/122310 [3:25:39<30:04, 10.49it/s][A
 85%|███████████████████████████▉     | 103379/122310 [3:25:40<28:33, 11.05it/s][A
 85%|███████████████████████████▉     | 103384/122310 [3:25:40<32:09,  9.81it/s][A
 85%|███████████████████████████▉     | 103388/122310 [3:25:41<37:07,  8.50it/s][A
 85%|███████████████████████████▉     | 103404/122310 [3:25:42<25:06, 12.55it/s][A
 85%|███████████████████████████▉     | 103417/122310 [3:25:43<22:15, 14.15it/s][A
 85%|███████████████████████████▉     | 103426/122310 [3:25:43<23:01, 13.67it/s][A
 85%|███████████████████████████▉     | 103432/122310 [3:25:44<26:06, 12.05

step: 12540, loss: 114.7690323829928, epoch: 0



 85%|███████████████████████████▉     | 103496/122310 [3:25:51<27:30, 11.40it/s][A
 85%|███████████████████████████▉     | 103503/122310 [3:25:51<28:43, 10.91it/s][A
 85%|███████████████████████████▉     | 103506/122310 [3:25:52<34:43,  9.02it/s][A
 85%|███████████████████████████▉     | 103515/122310 [3:25:53<31:20,  9.99it/s][A
 85%|███████████████████████████▉     | 103524/122310 [3:25:54<29:15, 10.70it/s][A
 85%|███████████████████████████▉     | 103532/122310 [3:25:54<28:59, 10.79it/s][A
 85%|███████████████████████████▉     | 103539/122310 [3:25:55<30:00, 10.43it/s][A
 85%|███████████████████████████▉     | 103544/122310 [3:25:56<33:22,  9.37it/s][A
 85%|███████████████████████████▉     | 103554/122310 [3:25:57<29:18, 10.66it/s][A
 85%|███████████████████████████▉     | 103563/122310 [3:25:57<27:57, 11.17it/s][A
 85%|███████████████████████████▉     | 103574/122310 [3:25:58<25:19, 12.33it/s][A
 85%|███████████████████████████▉     | 103580/122310 [3:25:59<28:03, 11.13

step: 12560, loss: 81.94668085774988, epoch: 0



 85%|███████████████████████████▉     | 103662/122310 [3:26:05<25:11, 12.34it/s][A
 85%|███████████████████████████▉     | 103666/122310 [3:26:06<30:05, 10.32it/s][A
 85%|███████████████████████████▉     | 103671/122310 [3:26:07<33:42,  9.22it/s][A
 85%|███████████████████████████▉     | 103676/122310 [3:26:07<36:35,  8.49it/s][A
 85%|███████████████████████████▉     | 103692/122310 [3:26:08<24:52, 12.47it/s][A
 85%|███████████████████████████▉     | 103702/122310 [3:26:09<24:11, 12.82it/s][A
 85%|███████████████████████████▉     | 103704/122310 [3:26:10<31:34,  9.82it/s][A
 85%|███████████████████████████▉     | 103716/122310 [3:26:10<26:11, 11.83it/s][A
 85%|███████████████████████████▉     | 103733/122310 [3:26:11<20:17, 15.26it/s][A
 85%|███████████████████████████▉     | 103741/122310 [3:26:12<22:11, 13.94it/s][A
 85%|███████████████████████████▉     | 103747/122310 [3:26:13<25:24, 12.18it/s][A
 85%|███████████████████████████▉     | 103750/122310 [3:26:13<31:45,  9.74

step: 12580, loss: 101.98129559616343, epoch: 0



 85%|████████████████████████████     | 103824/122310 [3:26:20<26:01, 11.84it/s][A
 85%|████████████████████████████     | 103833/122310 [3:26:21<25:39, 12.00it/s][A
 85%|████████████████████████████     | 103839/122310 [3:26:21<28:18, 10.87it/s][A
 85%|████████████████████████████     | 103844/122310 [3:26:22<31:52,  9.66it/s][A
 85%|████████████████████████████     | 103850/122310 [3:26:23<33:13,  9.26it/s][A
 85%|████████████████████████████     | 103854/122310 [3:26:24<37:49,  8.13it/s][A
 85%|████████████████████████████     | 103861/122310 [3:26:24<36:00,  8.54it/s][A
 85%|████████████████████████████     | 103866/122310 [3:26:25<38:08,  8.06it/s][A
 85%|████████████████████████████     | 103876/122310 [3:26:26<31:29,  9.76it/s][A
 85%|████████████████████████████     | 103884/122310 [3:26:26<30:32, 10.05it/s][A
 85%|████████████████████████████     | 103886/122310 [3:26:27<38:59,  7.87it/s][A
 85%|████████████████████████████     | 103892/122310 [3:26:28<38:22,  8.00

step: 12600, loss: 100.45659457145695, epoch: 0



 85%|████████████████████████████     | 103958/122310 [3:26:34<39:43,  7.70it/s][A
 85%|████████████████████████████     | 103961/122310 [3:26:35<46:13,  6.62it/s][A
 85%|████████████████████████████     | 103965/122310 [3:26:36<48:37,  6.29it/s][A
 85%|████████████████████████████     | 103971/122310 [3:26:37<44:28,  6.87it/s][A
 85%|████████████████████████████     | 103976/122310 [3:26:37<44:26,  6.87it/s][A
 85%|████████████████████████████     | 103986/122310 [3:26:38<34:16,  8.91it/s][A
 85%|████████████████████████████     | 103990/122310 [3:26:39<38:40,  7.90it/s][A
 85%|████████████████████████████     | 103997/122310 [3:26:40<36:21,  8.40it/s][A
 85%|████████████████████████████     | 104003/122310 [3:26:40<36:46,  8.30it/s][A
 85%|████████████████████████████     | 104009/122310 [3:26:41<36:55,  8.26it/s][A
 85%|████████████████████████████     | 104013/122310 [3:26:42<41:02,  7.43it/s][A
 85%|████████████████████████████     | 104025/122310 [3:26:43<30:09, 10.11

step: 12620, loss: 80.58271410535566, epoch: 0



 85%|████████████████████████████     | 104105/122310 [3:26:49<29:15, 10.37it/s][A
 85%|████████████████████████████     | 104108/122310 [3:26:50<35:33,  8.53it/s][A
 85%|████████████████████████████     | 104118/122310 [3:26:51<30:06, 10.07it/s][A
 85%|████████████████████████████     | 104130/122310 [3:26:51<25:15, 12.00it/s][A
 85%|████████████████████████████     | 104142/122310 [3:26:52<22:43, 13.32it/s][A
 85%|████████████████████████████     | 104152/122310 [3:26:53<22:33, 13.42it/s][A
 85%|████████████████████████████     | 104158/122310 [3:26:54<25:28, 11.87it/s][A
 85%|████████████████████████████     | 104173/122310 [3:26:54<20:50, 14.50it/s][A
 85%|████████████████████████████     | 104185/122310 [3:26:55<20:03, 15.06it/s][A
 85%|████████████████████████████     | 104188/122310 [3:26:56<25:50, 11.69it/s][A
 85%|████████████████████████████     | 104190/122310 [3:26:56<33:23,  9.05it/s][A
 85%|████████████████████████████     | 104196/122310 [3:26:57<34:36,  8.72

step: 12640, loss: 118.46031445623248, epoch: 0



 85%|████████████████████████████▏    | 104276/122310 [3:27:04<26:24, 11.38it/s][A
 85%|████████████████████████████▏    | 104283/122310 [3:27:04<27:38, 10.87it/s][A
 85%|████████████████████████████▏    | 104289/122310 [3:27:05<29:55, 10.04it/s][A
 85%|████████████████████████████▏    | 104295/122310 [3:27:06<31:33,  9.52it/s][A
 85%|████████████████████████████▏    | 104302/122310 [3:27:07<31:37,  9.49it/s][A
 85%|████████████████████████████▏    | 104311/122310 [3:27:07<29:13, 10.26it/s][A
 85%|████████████████████████████▏    | 104325/122310 [3:27:08<23:13, 12.91it/s][A
 85%|████████████████████████████▏    | 104335/122310 [3:27:09<22:53, 13.08it/s][A
 85%|████████████████████████████▏    | 104344/122310 [3:27:10<23:15, 12.88it/s][A
 85%|████████████████████████████▏    | 104351/122310 [3:27:10<25:06, 11.92it/s][A
 85%|████████████████████████████▏    | 104375/122310 [3:27:11<16:26, 18.17it/s][A
 85%|████████████████████████████▏    | 104379/122310 [3:27:12<20:53, 14.30

step: 12660, loss: 82.64684366204106, epoch: 0



 85%|████████████████████████████▏    | 104450/122310 [3:27:18<31:03,  9.59it/s][A
 85%|████████████████████████████▏    | 104455/122310 [3:27:19<33:54,  8.78it/s][A
 85%|████████████████████████████▏    | 104463/122310 [3:27:20<31:45,  9.37it/s][A
 85%|████████████████████████████▏    | 104469/122310 [3:27:21<33:03,  8.99it/s][A
 85%|████████████████████████████▏    | 104476/122310 [3:27:21<32:33,  9.13it/s][A
 85%|████████████████████████████▏    | 104484/122310 [3:27:22<30:44,  9.66it/s][A
 85%|████████████████████████████▏    | 104491/122310 [3:27:23<30:51,  9.62it/s][A
 85%|████████████████████████████▏    | 104496/122310 [3:27:24<33:44,  8.80it/s][A
 85%|████████████████████████████▏    | 104504/122310 [3:27:24<31:29,  9.42it/s][A
 85%|████████████████████████████▏    | 104509/122310 [3:27:25<34:17,  8.65it/s][A
 85%|████████████████████████████▏    | 104514/122310 [3:27:26<36:32,  8.12it/s][A
 85%|████████████████████████████▏    | 104521/122310 [3:27:27<34:45,  8.53

step: 12680, loss: 80.5407030583695, epoch: 0



 86%|████████████████████████████▏    | 104585/122310 [3:27:33<30:14,  9.77it/s][A
 86%|████████████████████████████▏    | 104593/122310 [3:27:34<29:10, 10.12it/s][A
 86%|████████████████████████████▏    | 104605/122310 [3:27:35<24:37, 11.98it/s][A
 86%|████████████████████████████▏    | 104610/122310 [3:27:35<28:12, 10.46it/s][A
 86%|████████████████████████████▏    | 104613/122310 [3:27:36<34:33,  8.54it/s][A
 86%|████████████████████████████▏    | 104624/122310 [3:27:37<28:08, 10.48it/s][A
 86%|████████████████████████████▏    | 104641/122310 [3:27:38<20:44, 14.20it/s][A
 86%|████████████████████████████▏    | 104643/122310 [3:27:38<27:16, 10.79it/s][A
 86%|████████████████████████████▏    | 104650/122310 [3:27:39<28:09, 10.45it/s][A
 86%|████████████████████████████▏    | 104659/122310 [3:27:40<26:43, 11.01it/s][A
 86%|████████████████████████████▏    | 104672/122310 [3:27:40<22:33, 13.03it/s][A
 86%|████████████████████████████▏    | 104681/122310 [3:27:41<22:53, 12.83

step: 12700, loss: 128.7178203722759, epoch: 0



 86%|████████████████████████████▎    | 104763/122310 [3:27:48<25:02, 11.68it/s][A
 86%|████████████████████████████▎    | 104769/122310 [3:27:49<27:37, 10.58it/s][A
 86%|████████████████████████████▎    | 104774/122310 [3:27:49<30:54,  9.46it/s][A
 86%|████████████████████████████▎    | 104779/122310 [3:27:50<33:36,  8.69it/s][A
 86%|████████████████████████████▎    | 104794/122310 [3:27:51<24:04, 12.12it/s][A
 86%|████████████████████████████▎    | 104801/122310 [3:27:52<25:47, 11.31it/s][A
 86%|████████████████████████████▎    | 104811/122310 [3:27:52<24:12, 12.05it/s][A
 86%|████████████████████████████▎    | 104824/122310 [3:27:53<21:11, 13.76it/s][A
 86%|████████████████████████████▎    | 104830/122310 [3:27:54<24:07, 12.07it/s][A
 86%|████████████████████████████▎    | 104839/122310 [3:27:54<24:03, 12.10it/s][A
 86%|████████████████████████████▎    | 104850/122310 [3:27:55<22:30, 12.92it/s][A
 86%|████████████████████████████▎    | 104865/122310 [3:27:56<19:09, 15.17

step: 12720, loss: 75.37057863844807, epoch: 0



 86%|████████████████████████████▎    | 104941/122310 [3:28:03<24:41, 11.72it/s][A
 86%|████████████████████████████▎    | 104948/122310 [3:28:03<26:04, 11.09it/s][A
 86%|████████████████████████████▎    | 104951/122310 [3:28:04<32:14,  8.97it/s][A
 86%|████████████████████████████▎    | 104953/122310 [3:28:05<40:34,  7.13it/s][A
 86%|████████████████████████████▎    | 104967/122310 [3:28:05<26:55, 10.74it/s][A
 86%|████████████████████████████▎    | 104976/122310 [3:28:06<25:46, 11.21it/s][A
 86%|████████████████████████████▎    | 104980/122310 [3:28:07<30:24,  9.50it/s][A
 86%|████████████████████████████▎    | 104997/122310 [3:28:08<21:11, 13.61it/s][A
 86%|████████████████████████████▎    | 105005/122310 [3:28:08<22:35, 12.77it/s][A
 86%|████████████████████████████▎    | 105009/122310 [3:28:09<27:07, 10.63it/s][A
 86%|████████████████████████████▎    | 105015/122310 [3:28:10<29:10,  9.88it/s][A
 86%|████████████████████████████▎    | 105028/122310 [3:28:11<23:34, 12.22

step: 12740, loss: 98.52226817691847, epoch: 0



 86%|████████████████████████████▎    | 105112/122310 [3:28:17<24:30, 11.70it/s][A
 86%|████████████████████████████▎    | 105117/122310 [3:28:18<28:12, 10.16it/s][A
 86%|████████████████████████████▎    | 105129/122310 [3:28:19<23:48, 12.03it/s][A
 86%|████████████████████████████▎    | 105141/122310 [3:28:19<21:39, 13.21it/s][A
 86%|████████████████████████████▎    | 105145/122310 [3:28:20<26:08, 10.94it/s][A
 86%|████████████████████████████▎    | 105152/122310 [3:28:21<27:09, 10.53it/s][A
 86%|████████████████████████████▎    | 105162/122310 [3:28:22<24:55, 11.46it/s][A
 86%|████████████████████████████▍    | 105172/122310 [3:28:22<23:36, 12.10it/s][A
 86%|████████████████████████████▍    | 105178/122310 [3:28:23<26:08, 10.92it/s][A
 86%|████████████████████████████▍    | 105183/122310 [3:28:24<29:20,  9.73it/s][A
 86%|████████████████████████████▍    | 105185/122310 [3:28:25<37:37,  7.58it/s][A
 86%|████████████████████████████▍    | 105192/122310 [3:28:25<34:45,  8.21

step: 12760, loss: 106.48548925329014, epoch: 0



 86%|████████████████████████████▍    | 105280/122310 [3:28:32<25:38, 11.07it/s][A
 86%|████████████████████████████▍    | 105293/122310 [3:28:33<21:42, 13.06it/s][A
 86%|████████████████████████████▍    | 105297/122310 [3:28:33<26:13, 10.81it/s][A
 86%|████████████████████████████▍    | 105307/122310 [3:28:34<24:18, 11.65it/s][A
 86%|████████████████████████████▍    | 105321/122310 [3:28:35<20:21, 13.90it/s][A
 86%|████████████████████████████▍    | 105328/122310 [3:28:36<22:27, 12.60it/s][A
 86%|████████████████████████████▍    | 105337/122310 [3:28:36<22:38, 12.49it/s][A
 86%|████████████████████████████▍    | 105349/122310 [3:28:37<20:39, 13.68it/s][A
 86%|████████████████████████████▍    | 105353/122310 [3:28:38<25:10, 11.22it/s][A
 86%|████████████████████████████▍    | 105374/122310 [3:28:38<17:09, 16.45it/s][A
 86%|████████████████████████████▍    | 105386/122310 [3:28:39<17:09, 16.43it/s][A
 86%|████████████████████████████▍    | 105396/122310 [3:28:40<18:06, 15.57

step: 12780, loss: 93.68928876868944, epoch: 0



 86%|████████████████████████████▍    | 105456/122310 [3:28:46<26:39, 10.54it/s][A
 86%|████████████████████████████▍    | 105463/122310 [3:28:47<27:20, 10.27it/s][A
 86%|████████████████████████████▍    | 105475/122310 [3:28:48<23:15, 12.07it/s][A
 86%|████████████████████████████▍    | 105488/122310 [3:28:49<20:23, 13.75it/s][A
 86%|████████████████████████████▍    | 105500/122310 [3:28:49<19:16, 14.54it/s][A
 86%|████████████████████████████▍    | 105507/122310 [3:28:50<21:24, 13.08it/s][A
 86%|████████████████████████████▍    | 105522/122310 [3:28:51<18:15, 15.33it/s][A
 86%|████████████████████████████▍    | 105530/122310 [3:28:52<19:58, 14.00it/s][A
 86%|████████████████████████████▍    | 105539/122310 [3:28:52<20:50, 13.41it/s][A
 86%|████████████████████████████▍    | 105548/122310 [3:28:53<21:20, 13.09it/s][A
 86%|████████████████████████████▍    | 105558/122310 [3:28:54<21:02, 13.27it/s][A
 86%|████████████████████████████▍    | 105567/122310 [3:28:55<21:30, 12.97

step: 12800, loss: 85.1318422209302, epoch: 0
sim1 and sim2 are 0.5752956173447359, 0.1826462702915177
cosine of pred and queen: 0.16537376158749909
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: country
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: council
Actual: india:asia::paris:europe, pred: collective
Actual: china:asia::greece:europe, pred: spain
Actual: nigeria:africa::france:europe, pred: spain
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: eluana
Actual: maha


 86%|████████████████████████████▍    | 105621/122310 [3:29:13<27:20, 10.17it/s][A

Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: global
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: france
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: europe
Actual: thailand:thai::india:indian, pred: staffers
Actual: sweden:swedish::netherlands:dutch, pred: family
Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: defense
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.044444444444444446
Actual: walk:w


 86%|█████████████████████████▉    | 105626/122310 [3:30:16<16:10:55,  3.49s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.023668639053254437



 86%|█████████████████████████▉    | 105634/122310 [3:30:17<10:50:26,  2.34s/it][A
 86%|██████████████████████████▊    | 105643/122310 [3:30:17<7:08:53,  1.54s/it][A
 86%|██████████████████████████▊    | 105654/122310 [3:30:18<4:32:15,  1.02it/s][A
 86%|██████████████████████████▊    | 105664/122310 [3:30:19<3:08:40,  1.47it/s][A
 86%|██████████████████████████▊    | 105672/122310 [3:30:20<2:23:49,  1.93it/s][A
 86%|██████████████████████████▊    | 105676/122310 [3:30:20<2:08:22,  2.16it/s][A
 86%|██████████████████████████▊    | 105684/122310 [3:30:21<1:35:21,  2.91it/s][A
 86%|██████████████████████████▊    | 105691/122310 [3:30:22<1:16:17,  3.63it/s][A
 86%|██████████████████████████▊    | 105694/122310 [3:30:22<1:14:53,  3.70it/s][A
 86%|████████████████████████████▌    | 105703/122310 [3:30:23<54:22,  5.09it/s][A
 86%|████████████████████████████▌    | 105710/122310 [3:30:24<46:34,  5.94it/s][A
 86%|████████████████████████████▌    | 105716/122310 [3:30:25<43:01,  6.43

step: 12820, loss: 79.38024420705278, epoch: 0



 86%|████████████████████████████▌    | 105789/122310 [3:30:31<26:12, 10.51it/s][A
 86%|████████████████████████████▌    | 105796/122310 [3:30:32<26:42, 10.30it/s][A
 87%|████████████████████████████▌    | 105808/122310 [3:30:33<22:40, 12.13it/s][A
 87%|████████████████████████████▌    | 105817/122310 [3:30:33<22:30, 12.21it/s][A
 87%|████████████████████████████▌    | 105821/122310 [3:30:34<26:53, 10.22it/s][A
 87%|████████████████████████████▌    | 105827/122310 [3:30:35<28:35,  9.61it/s][A
 87%|████████████████████████████▌    | 105835/122310 [3:30:36<27:20, 10.05it/s][A
 87%|████████████████████████████▌    | 105845/122310 [3:30:36<24:37, 11.14it/s][A
 87%|████████████████████████████▌    | 105853/122310 [3:30:37<24:41, 11.11it/s][A
 87%|████████████████████████████▌    | 105868/122310 [3:30:38<19:34, 14.00it/s][A
 87%|████████████████████████████▌    | 105885/122310 [3:30:38<16:21, 16.74it/s][A
 87%|████████████████████████████▌    | 105903/122310 [3:30:39<14:17, 19.14

step: 12840, loss: 100.28007576981537, epoch: 0



 87%|████████████████████████████▌    | 105986/122310 [3:30:46<24:01, 11.33it/s][A
 87%|████████████████████████████▌    | 105997/122310 [3:30:46<21:47, 12.48it/s][A
 87%|████████████████████████████▌    | 106013/122310 [3:30:47<17:42, 15.34it/s][A
 87%|████████████████████████████▌    | 106018/122310 [3:30:48<21:14, 12.78it/s][A
 87%|████████████████████████████▌    | 106025/122310 [3:30:49<22:52, 11.86it/s][A
 87%|████████████████████████████▌    | 106033/122310 [3:30:49<23:32, 11.52it/s][A
 87%|████████████████████████████▌    | 106040/122310 [3:30:50<24:41, 10.98it/s][A
 87%|████████████████████████████▌    | 106048/122310 [3:30:51<24:47, 10.93it/s][A
 87%|████████████████████████████▌    | 106050/122310 [3:30:52<31:44,  8.54it/s][A
 87%|████████████████████████████▌    | 106055/122310 [3:30:52<33:40,  8.04it/s][A
 87%|████████████████████████████▌    | 106064/122310 [3:30:53<29:02,  9.33it/s][A
 87%|████████████████████████████▌    | 106068/122310 [3:30:54<33:08,  8.17

step: 12860, loss: 104.51557702623049, epoch: 0



 87%|████████████████████████████▋    | 106145/122310 [3:31:00<28:37,  9.41it/s][A
 87%|████████████████████████████▋    | 106148/122310 [3:31:01<34:26,  7.82it/s][A
 87%|████████████████████████████▋    | 106158/122310 [3:31:02<27:58,  9.62it/s][A
 87%|████████████████████████████▋    | 106170/122310 [3:31:02<23:05, 11.65it/s][A
 87%|████████████████████████████▋    | 106172/122310 [3:31:03<29:56,  8.98it/s][A
 87%|████████████████████████████▋    | 106180/122310 [3:31:04<27:57,  9.62it/s][A
 87%|████████████████████████████▋    | 106189/122310 [3:31:05<25:41, 10.46it/s][A
 87%|████████████████████████████▋    | 106198/122310 [3:31:05<24:23, 11.01it/s][A
 87%|████████████████████████████▋    | 106210/122310 [3:31:06<21:12, 12.65it/s][A
 87%|████████████████████████████▋    | 106213/122310 [3:31:07<26:34, 10.09it/s][A
 87%|████████████████████████████▋    | 106229/122310 [3:31:08<19:36, 13.67it/s][A
 87%|████████████████████████████▋    | 106242/122310 [3:31:08<18:02, 14.84

step: 12880, loss: 93.03054184322242, epoch: 0



 87%|████████████████████████████▋    | 106322/122310 [3:31:15<20:49, 12.80it/s][A
 87%|████████████████████████████▋    | 106328/122310 [3:31:16<23:01, 11.57it/s][A
 87%|████████████████████████████▋    | 106335/122310 [3:31:16<24:06, 11.05it/s][A
 87%|████████████████████████████▋    | 106347/122310 [3:31:17<21:06, 12.61it/s][A
 87%|████████████████████████████▋    | 106355/122310 [3:31:18<21:57, 12.11it/s][A
 87%|████████████████████████████▋    | 106365/122310 [3:31:19<21:06, 12.59it/s][A
 87%|████████████████████████████▋    | 106374/122310 [3:31:19<21:12, 12.53it/s][A
 87%|████████████████████████████▋    | 106377/122310 [3:31:20<26:29, 10.02it/s][A
 87%|████████████████████████████▋    | 106381/122310 [3:31:21<30:34,  8.68it/s][A
 87%|████████████████████████████▋    | 106387/122310 [3:31:21<30:59,  8.56it/s][A
 87%|████████████████████████████▋    | 106399/122310 [3:31:22<24:18, 10.91it/s][A
 87%|████████████████████████████▋    | 106406/122310 [3:31:23<25:11, 10.52

step: 12900, loss: 91.38765348383248, epoch: 0



 87%|████████████████████████████▋    | 106466/122310 [3:31:29<26:48,  9.85it/s][A
 87%|████████████████████████████▋    | 106478/122310 [3:31:30<22:26, 11.76it/s][A
 87%|████████████████████████████▋    | 106490/122310 [3:31:31<20:05, 13.12it/s][A
 87%|████████████████████████████▋    | 106498/122310 [3:31:32<21:08, 12.47it/s][A
 87%|████████████████████████████▋    | 106504/122310 [3:31:32<23:32, 11.19it/s][A
 87%|████████████████████████████▋    | 106512/122310 [3:31:33<23:33, 11.18it/s][A
 87%|████████████████████████████▋    | 106520/122310 [3:31:34<23:48, 11.05it/s][A
 87%|████████████████████████████▋    | 106523/122310 [3:31:35<29:09,  9.02it/s][A
 87%|████████████████████████████▋    | 106531/122310 [3:31:35<27:25,  9.59it/s][A
 87%|████████████████████████████▋    | 106540/122310 [3:31:36<25:12, 10.43it/s][A
 87%|████████████████████████████▋    | 106543/122310 [3:31:38<49:44,  5.28it/s][A
 87%|████████████████████████████▋    | 106544/122310 [3:31:39<58:33,  4.49

step: 12920, loss: 78.16213172948657, epoch: 0



 87%|████████████████████████████▊    | 106618/122310 [3:31:44<19:56, 13.12it/s][A
 87%|████████████████████████████▊    | 106632/122310 [3:31:45<17:43, 14.74it/s][A
 87%|████████████████████████████▊    | 106637/122310 [3:31:46<21:04, 12.40it/s][A
 87%|████████████████████████████▊    | 106640/122310 [3:31:47<26:27,  9.87it/s][A
 87%|████████████████████████████▊    | 106645/122310 [3:31:47<29:11,  8.94it/s][A
 87%|████████████████████████████▊    | 106653/122310 [3:31:48<27:36,  9.45it/s][A
 87%|████████████████████████████▊    | 106658/122310 [3:31:49<30:53,  8.45it/s][A
 87%|████████████████████████████▊    | 106677/122310 [3:31:50<19:24, 13.42it/s][A
 87%|████████████████████████████▊    | 106686/122310 [3:31:50<20:25, 12.75it/s][A
 87%|████████████████████████████▊    | 106700/122310 [3:31:51<18:03, 14.41it/s][A
 87%|████████████████████████████▊    | 106714/122310 [3:31:52<16:43, 15.55it/s][A
 87%|████████████████████████████▊    | 106720/122310 [3:31:53<19:43, 13.18

step: 12940, loss: 101.70042348681483, epoch: 0



 87%|████████████████████████████▊    | 106801/122310 [3:32:00<20:48, 12.42it/s][A
 87%|████████████████████████████▊    | 106806/122310 [3:32:00<24:09, 10.70it/s][A
 87%|████████████████████████████▊    | 106819/122310 [3:32:01<20:25, 12.64it/s][A
 87%|████████████████████████████▊    | 106830/122310 [3:32:02<19:33, 13.20it/s][A
 87%|████████████████████████████▊    | 106837/122310 [3:32:03<21:27, 12.02it/s][A
 87%|████████████████████████████▊    | 106847/122310 [3:32:03<20:50, 12.36it/s][A
 87%|████████████████████████████▊    | 106861/122310 [3:32:04<18:08, 14.19it/s][A
 87%|████████████████████████████▊    | 106870/122310 [3:32:05<19:10, 13.43it/s][A
 87%|████████████████████████████▊    | 106878/122310 [3:32:06<20:26, 12.58it/s][A
 87%|████████████████████████████▊    | 106888/122310 [3:32:06<20:10, 12.75it/s][A
 87%|████████████████████████████▊    | 106899/122310 [3:32:07<19:18, 13.30it/s][A
 87%|████████████████████████████▊    | 106914/122310 [3:32:08<16:51, 15.22

step: 12960, loss: 88.24068404431397, epoch: 0



 87%|████████████████████████████▊    | 106973/122310 [3:32:15<30:13,  8.46it/s][A
 87%|████████████████████████████▊    | 106984/122310 [3:32:16<24:48, 10.29it/s][A
 87%|████████████████████████████▊    | 106993/122310 [3:32:16<23:50, 10.71it/s][A
 87%|████████████████████████████▊    | 107004/122310 [3:32:17<21:26, 11.89it/s][A
 87%|████████████████████████████▊    | 107013/122310 [3:32:18<21:29, 11.86it/s][A
 88%|████████████████████████████▉    | 107023/122310 [3:32:19<20:48, 12.25it/s][A
 88%|████████████████████████████▉    | 107031/122310 [3:32:19<21:37, 11.78it/s][A
 88%|████████████████████████████▉    | 107041/122310 [3:32:20<20:50, 12.21it/s][A
 88%|████████████████████████████▉    | 107051/122310 [3:32:21<20:19, 12.51it/s][A
 88%|████████████████████████████▉    | 107058/122310 [3:32:22<22:05, 11.51it/s][A
 88%|████████████████████████████▉    | 107068/122310 [3:32:22<21:14, 11.96it/s][A
 88%|████████████████████████████▉    | 107073/122310 [3:32:23<24:24, 10.40

step: 12980, loss: 86.1137220652121, epoch: 0



 88%|████████████████████████████▉    | 107161/122310 [3:32:30<17:39, 14.30it/s][A
 88%|████████████████████████████▉    | 107163/122310 [3:32:32<37:13,  6.78it/s][A
 88%|████████████████████████████▉    | 107171/122310 [3:32:33<33:12,  7.60it/s][A
 88%|████████████████████████████▉    | 107175/122310 [3:32:34<35:42,  7.06it/s][A
 88%|████████████████████████████▉    | 107181/122310 [3:32:34<34:37,  7.28it/s][A
 88%|████████████████████████████▉    | 107190/122310 [3:32:35<29:36,  8.51it/s][A
 88%|████████████████████████████▉    | 107201/122310 [3:32:36<24:47, 10.16it/s][A
 88%|████████████████████████████▉    | 107213/122310 [3:32:37<21:25, 11.74it/s][A
 88%|████████████████████████████▉    | 107220/122310 [3:32:37<22:48, 11.03it/s][A
 88%|████████████████████████████▉    | 107228/122310 [3:32:38<23:03, 10.90it/s][A
 88%|████████████████████████████▉    | 107237/122310 [3:32:39<22:25, 11.20it/s][A
 88%|████████████████████████████▉    | 107251/122310 [3:32:40<18:50, 13.32

step: 13000, loss: 120.53187954427031, epoch: 0
saving weights



 88%|████████████████████████████▉    | 107300/122310 [3:32:45<36:43,  6.81it/s][A
 88%|████████████████████████████▉    | 107310/122310 [3:32:46<30:00,  8.33it/s][A
 88%|████████████████████████████▉    | 107315/122310 [3:32:47<31:36,  7.91it/s][A
 88%|████████████████████████████▉    | 107321/122310 [3:32:47<31:30,  7.93it/s][A
 88%|████████████████████████████▉    | 107325/122310 [3:32:48<34:41,  7.20it/s][A
 88%|████████████████████████████▉    | 107329/122310 [3:32:49<37:32,  6.65it/s][A
 88%|████████████████████████████▉    | 107339/122310 [3:32:50<29:12,  8.54it/s][A
 88%|████████████████████████████▉    | 107353/122310 [3:32:51<21:46, 11.45it/s][A
 88%|████████████████████████████▉    | 107361/122310 [3:32:51<22:14, 11.20it/s][A
 88%|████████████████████████████▉    | 107370/122310 [3:32:52<21:50, 11.40it/s][A
 88%|████████████████████████████▉    | 107377/122310 [3:32:53<23:07, 10.76it/s][A
 88%|████████████████████████████▉    | 107383/122310 [3:32:54<25:08,  9.89

step: 13020, loss: 91.89250438850803, epoch: 0



 88%|████████████████████████████▉    | 107471/122310 [3:33:01<24:06, 10.26it/s][A
 88%|████████████████████████████▉    | 107473/122310 [3:33:02<29:25,  8.40it/s][A
 88%|████████████████████████████▉    | 107476/122310 [3:33:03<34:18,  7.21it/s][A
 88%|████████████████████████████▉    | 107479/122310 [3:33:03<38:51,  6.36it/s][A
 88%|█████████████████████████████    | 107491/122310 [3:33:04<27:23,  9.02it/s][A
 88%|█████████████████████████████    | 107498/122310 [3:33:05<27:09,  9.09it/s][A
 88%|█████████████████████████████    | 107505/122310 [3:33:06<27:02,  9.12it/s][A
 88%|█████████████████████████████    | 107511/122310 [3:33:06<28:16,  8.72it/s][A
 88%|█████████████████████████████    | 107518/122310 [3:33:07<27:42,  8.90it/s][A
 88%|█████████████████████████████    | 107523/122310 [3:33:08<29:59,  8.22it/s][A
 88%|█████████████████████████████    | 107529/122310 [3:33:09<30:17,  8.13it/s][A
 88%|█████████████████████████████    | 107538/122310 [3:33:09<26:43,  9.21

step: 13040, loss: 83.87739734870269, epoch: 0



 88%|█████████████████████████████    | 107618/122310 [3:33:16<21:56, 11.16it/s][A
 88%|█████████████████████████████    | 107625/122310 [3:33:17<23:05, 10.60it/s][A
 88%|█████████████████████████████    | 107629/122310 [3:33:18<27:16,  8.97it/s][A
 88%|█████████████████████████████    | 107643/122310 [3:33:19<20:48, 11.75it/s][A
 88%|█████████████████████████████    | 107658/122310 [3:33:19<17:25, 14.01it/s][A
 88%|█████████████████████████████    | 107665/122310 [3:33:20<19:22, 12.60it/s][A
 88%|█████████████████████████████    | 107675/122310 [3:33:21<19:08, 12.74it/s][A
 88%|█████████████████████████████    | 107683/122310 [3:33:22<20:11, 12.07it/s][A
 88%|█████████████████████████████    | 107693/122310 [3:33:23<19:42, 12.36it/s][A
 88%|█████████████████████████████    | 107697/122310 [3:33:23<23:51, 10.21it/s][A
 88%|█████████████████████████████    | 107709/122310 [3:33:24<20:30, 11.87it/s][A
 88%|█████████████████████████████    | 107719/122310 [3:33:25<19:56, 12.20

step: 13060, loss: 76.57892757998343, epoch: 0



 88%|█████████████████████████████    | 107785/122310 [3:33:32<30:04,  8.05it/s][A
 88%|█████████████████████████████    | 107788/122310 [3:33:32<35:23,  6.84it/s][A
 88%|█████████████████████████████    | 107800/122310 [3:33:33<25:28,  9.49it/s][A
 88%|█████████████████████████████    | 107807/122310 [3:33:34<25:45,  9.38it/s][A
 88%|█████████████████████████████    | 107811/122310 [3:33:35<29:46,  8.12it/s][A
 88%|█████████████████████████████    | 107817/122310 [3:33:36<30:03,  8.04it/s][A
 88%|█████████████████████████████    | 107822/122310 [3:33:36<31:47,  7.59it/s][A
 88%|█████████████████████████████    | 107828/122310 [3:33:37<31:25,  7.68it/s][A
 88%|█████████████████████████████    | 107843/122310 [3:33:38<21:26, 11.25it/s][A
 88%|█████████████████████████████    | 107850/122310 [3:33:39<22:44, 10.60it/s][A
 88%|█████████████████████████████    | 107864/122310 [3:33:39<18:42, 12.87it/s][A
 88%|█████████████████████████████    | 107874/122310 [3:33:40<18:35, 12.94

step: 13080, loss: 74.66531087805426, epoch: 0



 88%|█████████████████████████████    | 107931/122310 [3:33:47<33:01,  7.26it/s][A
 88%|█████████████████████████████    | 107935/122310 [3:33:48<35:56,  6.67it/s][A
 88%|█████████████████████████████▏   | 107951/122310 [3:33:49<21:55, 10.92it/s][A
 88%|█████████████████████████████▏   | 107958/122310 [3:33:49<22:56, 10.43it/s][A
 88%|█████████████████████████████▏   | 107963/122310 [3:33:50<25:53,  9.23it/s][A
 88%|█████████████████████████████▏   | 107976/122310 [3:33:51<20:41, 11.55it/s][A
 88%|█████████████████████████████▏   | 107984/122310 [3:33:52<21:16, 11.23it/s][A
 88%|█████████████████████████████▏   | 107995/122310 [3:33:52<19:34, 12.18it/s][A
 88%|█████████████████████████████▏   | 108000/122310 [3:33:53<22:46, 10.47it/s][A
 88%|█████████████████████████████▏   | 108011/122310 [3:33:54<20:28, 11.64it/s][A
 88%|█████████████████████████████▏   | 108021/122310 [3:33:55<19:46, 12.04it/s][A
 88%|█████████████████████████████▏   | 108030/122310 [3:33:56<19:56, 11.93

step: 13100, loss: 83.53879474677815, epoch: 0



 88%|█████████████████████████████▏   | 108088/122310 [3:34:02<25:35,  9.26it/s][A
 88%|█████████████████████████████▏   | 108098/122310 [3:34:03<22:44, 10.42it/s][A
 88%|█████████████████████████████▏   | 108102/122310 [3:34:04<26:48,  8.84it/s][A
 88%|█████████████████████████████▏   | 108115/122310 [3:34:05<20:54, 11.31it/s][A
 88%|█████████████████████████████▏   | 108126/122310 [3:34:05<19:19, 12.23it/s][A
 88%|█████████████████████████████▏   | 108142/122310 [3:34:06<15:54, 14.84it/s][A
 88%|█████████████████████████████▏   | 108154/122310 [3:34:07<15:37, 15.11it/s][A
 88%|█████████████████████████████▏   | 108161/122310 [3:34:08<17:39, 13.35it/s][A
 88%|█████████████████████████████▏   | 108168/122310 [3:34:09<19:33, 12.05it/s][A
 88%|█████████████████████████████▏   | 108175/122310 [3:34:09<21:01, 11.21it/s][A
 88%|█████████████████████████████▏   | 108188/122310 [3:34:10<18:09, 12.96it/s][A
 88%|█████████████████████████████▏   | 108192/122310 [3:34:11<22:07, 10.64

step: 13120, loss: 106.18999407467182, epoch: 0



 89%|█████████████████████████████▏   | 108270/122310 [3:34:18<18:21, 12.75it/s][A
 89%|█████████████████████████████▏   | 108272/122310 [3:34:18<24:02,  9.73it/s][A
 89%|█████████████████████████████▏   | 108283/122310 [3:34:19<21:00, 11.12it/s][A
 89%|█████████████████████████████▏   | 108285/122310 [3:34:20<27:15,  8.57it/s][A
 89%|█████████████████████████████▏   | 108288/122310 [3:34:21<32:35,  7.17it/s][A
 89%|█████████████████████████████▏   | 108296/122310 [3:34:22<28:32,  8.18it/s][A
 89%|█████████████████████████████▏   | 108305/122310 [3:34:22<25:09,  9.28it/s][A
 89%|█████████████████████████████▏   | 108316/122310 [3:34:23<21:31, 10.84it/s][A
 89%|█████████████████████████████▏   | 108325/122310 [3:34:24<20:59, 11.10it/s][A
 89%|█████████████████████████████▏   | 108331/122310 [3:34:25<22:59, 10.13it/s][A
 89%|█████████████████████████████▏   | 108340/122310 [3:34:25<21:55, 10.62it/s][A
 89%|█████████████████████████████▏   | 108349/122310 [3:34:26<21:16, 10.94

step: 13140, loss: 86.4013165514464, epoch: 0



 89%|█████████████████████████████▏   | 108409/122310 [3:34:33<20:39, 11.22it/s][A
 89%|█████████████████████████████▎   | 108423/122310 [3:34:34<17:20, 13.34it/s][A
 89%|█████████████████████████████▎   | 108428/122310 [3:34:35<20:29, 11.29it/s][A
 89%|█████████████████████████████▎   | 108434/122310 [3:34:35<22:39, 10.21it/s][A
 89%|█████████████████████████████▎   | 108436/122310 [3:34:36<29:24,  7.86it/s][A
 89%|█████████████████████████████▎   | 108443/122310 [3:34:37<28:01,  8.25it/s][A
 89%|█████████████████████████████▎   | 108448/122310 [3:34:38<29:52,  7.73it/s][A
 89%|█████████████████████████████▎   | 108458/122310 [3:34:38<24:49,  9.30it/s][A
 89%|█████████████████████████████▎   | 108467/122310 [3:34:39<22:57, 10.05it/s][A
 89%|█████████████████████████████▎   | 108473/122310 [3:34:40<24:34,  9.38it/s][A
 89%|█████████████████████████████▎   | 108479/122310 [3:34:41<25:54,  8.90it/s][A
 89%|█████████████████████████████▎   | 108489/122310 [3:34:41<22:44, 10.13

step: 13160, loss: 109.76113027525095, epoch: 0



 89%|█████████████████████████████▎   | 108585/122310 [3:34:48<15:59, 14.30it/s][A
 89%|█████████████████████████████▎   | 108599/122310 [3:34:49<14:42, 15.53it/s][A
 89%|█████████████████████████████▎   | 108611/122310 [3:34:50<14:38, 15.59it/s][A
 89%|█████████████████████████████▎   | 108620/122310 [3:34:51<15:46, 14.46it/s][A
 89%|█████████████████████████████▎   | 108627/122310 [3:34:51<17:47, 12.82it/s][A
 89%|█████████████████████████████▎   | 108631/122310 [3:34:52<21:36, 10.55it/s][A
 89%|█████████████████████████████▎   | 108634/122310 [3:34:53<26:34,  8.58it/s][A
 89%|█████████████████████████████▎   | 108640/122310 [3:34:54<27:19,  8.34it/s][A
 89%|█████████████████████████████▎   | 108646/122310 [3:34:54<27:46,  8.20it/s][A
 89%|█████████████████████████████▎   | 108656/122310 [3:34:55<23:32,  9.67it/s][A
 89%|█████████████████████████████▎   | 108662/122310 [3:34:56<24:57,  9.11it/s][A
 89%|█████████████████████████████▎   | 108669/122310 [3:34:57<24:52,  9.14

step: 13180, loss: 105.35437037971373, epoch: 0



 89%|█████████████████████████████▎   | 108753/122310 [3:35:04<15:43, 14.37it/s][A
 89%|█████████████████████████████▎   | 108761/122310 [3:35:04<17:07, 13.18it/s][A
 89%|█████████████████████████████▎   | 108765/122310 [3:35:05<20:52, 10.82it/s][A
 89%|█████████████████████████████▎   | 108771/122310 [3:35:06<22:43,  9.93it/s][A
 89%|█████████████████████████████▎   | 108777/122310 [3:35:07<24:19,  9.27it/s][A
 89%|█████████████████████████████▎   | 108789/122310 [3:35:07<20:07, 11.20it/s][A
 89%|█████████████████████████████▎   | 108795/122310 [3:35:08<21:52, 10.30it/s][A
 89%|█████████████████████████████▎   | 108806/122310 [3:35:09<19:23, 11.60it/s][A
 89%|█████████████████████████████▎   | 108813/122310 [3:35:10<20:22, 11.04it/s][A
 89%|█████████████████████████████▎   | 108820/122310 [3:35:10<21:14, 10.59it/s][A
 89%|█████████████████████████████▎   | 108835/122310 [3:35:11<16:34, 13.55it/s][A
 89%|█████████████████████████████▎   | 108846/122310 [3:35:12<16:03, 13.97

step: 13200, loss: 103.55279053740341, epoch: 0
sim1 and sim2 are 0.6325193890911914, 0.1774759957731975
cosine of pred and queen: 0.09558032665581721
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: australian
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: council
Actual: india:asia::paris:europe, pred: tremlett
Actual: china:asia::greece:europe, pred: spain
Actual: nigeria:africa::france:europe, pred: spain
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: eluana
Actual: mah


 89%|█████████████████████████████▍   | 108907/122310 [3:35:33<19:21, 11.54it/s][A

Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: europe
Actual: thailand:thai::india:indian, pred: staffers
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: spain
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: defense
Actual: denmark:danish::germany:german, pred: battleship
Accuracy is 0.1111111111111111
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: dimly
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: thy
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:


 89%|███████████████████████████▌   | 108923/122310 [3:36:33<8:02:53,  2.16s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.05325443786982249



 89%|███████████████████████████▌   | 108927/122310 [3:36:34<7:01:57,  1.89s/it][A
 89%|███████████████████████████▌   | 108931/122310 [3:36:34<5:59:03,  1.61s/it][A
 89%|███████████████████████████▌   | 108936/122310 [3:36:35<4:44:49,  1.28s/it][A
 89%|███████████████████████████▌   | 108951/122310 [3:36:36<2:29:31,  1.49it/s][A
 89%|███████████████████████████▌   | 108957/122310 [3:36:36<2:02:31,  1.82it/s][A
 89%|███████████████████████████▌   | 108967/122310 [3:36:37<1:25:57,  2.59it/s][A
 89%|█████████████████████████████▍   | 108980/122310 [3:36:38<57:15,  3.88it/s][A
 89%|█████████████████████████████▍   | 108984/122310 [3:36:39<54:54,  4.04it/s][A
 89%|█████████████████████████████▍   | 108988/122310 [3:36:39<52:16,  4.25it/s][A
 89%|█████████████████████████████▍   | 108994/122310 [3:36:40<45:23,  4.89it/s][A
 89%|█████████████████████████████▍   | 109000/122310 [3:36:41<40:17,  5.51it/s][A
 89%|█████████████████████████████▍   | 109010/122310 [3:36:42<30:41,  7.22

step: 13220, loss: 90.60690953627702, epoch: 0



 89%|█████████████████████████████▍   | 109076/122310 [3:36:48<19:19, 11.42it/s][A
 89%|█████████████████████████████▍   | 109090/122310 [3:36:49<16:06, 13.67it/s][A
 89%|█████████████████████████████▍   | 109096/122310 [3:36:50<18:14, 12.08it/s][A
 89%|█████████████████████████████▍   | 109106/122310 [3:36:50<17:31, 12.55it/s][A
 89%|█████████████████████████████▍   | 109116/122310 [3:36:51<17:02, 12.90it/s][A
 89%|█████████████████████████████▍   | 109123/122310 [3:36:52<18:33, 11.85it/s][A
 89%|█████████████████████████████▍   | 109131/122310 [3:36:52<18:57, 11.58it/s][A
 89%|█████████████████████████████▍   | 109140/122310 [3:36:53<18:34, 11.81it/s][A
 89%|█████████████████████████████▍   | 109151/122310 [3:36:54<17:10, 12.77it/s][A
 89%|█████████████████████████████▍   | 109163/122310 [3:36:55<15:49, 13.84it/s][A
 89%|█████████████████████████████▍   | 109169/122310 [3:36:55<18:12, 12.03it/s][A
 89%|█████████████████████████████▍   | 109178/122310 [3:36:56<18:18, 11.96

step: 13240, loss: 112.6175514099434, epoch: 0



 89%|█████████████████████████████▍   | 109230/122310 [3:37:03<32:35,  6.69it/s][A
 89%|█████████████████████████████▍   | 109242/122310 [3:37:04<23:19,  9.34it/s][A
 89%|█████████████████████████████▍   | 109262/122310 [3:37:04<15:07, 14.37it/s][A
 89%|█████████████████████████████▍   | 109274/122310 [3:37:05<14:40, 14.80it/s][A
 89%|█████████████████████████████▍   | 109278/122310 [3:37:06<18:02, 12.04it/s][A
 89%|█████████████████████████████▍   | 109281/122310 [3:37:07<22:21,  9.71it/s][A
 89%|█████████████████████████████▍   | 109290/122310 [3:37:07<20:43, 10.47it/s][A
 89%|█████████████████████████████▍   | 109295/122310 [3:37:08<23:07,  9.38it/s][A
 89%|█████████████████████████████▍   | 109298/122310 [3:37:10<36:10,  5.99it/s][A
 89%|█████████████████████████████▍   | 109301/122310 [3:37:10<39:04,  5.55it/s][A
 89%|█████████████████████████████▍   | 109318/122310 [3:37:11<21:23, 10.12it/s][A
 89%|█████████████████████████████▍   | 109323/122310 [3:37:12<23:27,  9.23

step: 13260, loss: 101.22985027622207, epoch: 0



 89%|█████████████████████████████▌   | 109395/122310 [3:37:18<16:57, 12.69it/s][A
 89%|█████████████████████████████▌   | 109405/122310 [3:37:18<16:33, 12.99it/s][A
 89%|█████████████████████████████▌   | 109421/122310 [3:37:19<13:49, 15.54it/s][A
 89%|█████████████████████████████▌   | 109426/122310 [3:37:20<16:26, 13.06it/s][A
 89%|█████████████████████████████▌   | 109439/122310 [3:37:21<14:39, 14.63it/s][A
 89%|█████████████████████████████▌   | 109446/122310 [3:37:21<16:12, 13.23it/s][A
 89%|█████████████████████████████▌   | 109458/122310 [3:37:22<14:57, 14.31it/s][A
 89%|█████████████████████████████▌   | 109465/122310 [3:37:23<16:28, 12.99it/s][A
 90%|█████████████████████████████▌   | 109472/122310 [3:37:23<17:54, 11.95it/s][A
 90%|█████████████████████████████▌   | 109484/122310 [3:37:24<15:52, 13.47it/s][A
 90%|█████████████████████████████▌   | 109487/122310 [3:37:25<19:59, 10.69it/s][A
 90%|█████████████████████████████▌   | 109490/122310 [3:37:26<24:16,  8.80

step: 13280, loss: 80.32918729912238, epoch: 0



 90%|█████████████████████████████▌   | 109571/122310 [3:37:32<15:57, 13.30it/s][A
 90%|█████████████████████████████▌   | 109578/122310 [3:37:33<17:15, 12.30it/s][A
 90%|█████████████████████████████▌   | 109591/122310 [3:37:33<15:05, 14.04it/s][A
 90%|█████████████████████████████▌   | 109602/122310 [3:37:34<14:40, 14.43it/s][A
 90%|█████████████████████████████▌   | 109607/122310 [3:37:35<17:17, 12.24it/s][A
 90%|█████████████████████████████▌   | 109619/122310 [3:37:36<15:34, 13.58it/s][A
 90%|█████████████████████████████▌   | 109632/122310 [3:37:36<14:03, 15.03it/s][A
 90%|█████████████████████████████▌   | 109635/122310 [3:37:37<17:54, 11.80it/s][A
 90%|█████████████████████████████▌   | 109645/122310 [3:37:38<16:56, 12.46it/s][A
 90%|█████████████████████████████▌   | 109654/122310 [3:37:38<16:48, 12.55it/s][A
 90%|█████████████████████████████▌   | 109666/122310 [3:37:39<15:18, 13.77it/s][A
 90%|█████████████████████████████▌   | 109672/122310 [3:37:40<17:12, 12.24

step: 13300, loss: 79.97405959805357, epoch: 0



 90%|█████████████████████████████▌   | 109748/122310 [3:37:46<20:16, 10.33it/s][A
 90%|█████████████████████████████▌   | 109762/122310 [3:37:47<16:23, 12.76it/s][A
 90%|█████████████████████████████▌   | 109768/122310 [3:37:48<18:05, 11.56it/s][A
 90%|█████████████████████████████▌   | 109777/122310 [3:37:48<17:35, 11.87it/s][A
 90%|█████████████████████████████▌   | 109786/122310 [3:37:49<17:16, 12.09it/s][A
 90%|█████████████████████████████▌   | 109790/122310 [3:37:50<20:27, 10.20it/s][A
 90%|█████████████████████████████▌   | 109799/122310 [3:37:51<24:35,  8.48it/s][A
 90%|█████████████████████████████▋   | 109804/122310 [3:37:52<25:33,  8.16it/s][A
 90%|█████████████████████████████▋   | 109810/122310 [3:37:53<25:24,  8.20it/s][A
 90%|█████████████████████████████▋   | 109819/122310 [3:37:53<22:02,  9.44it/s][A
 90%|█████████████████████████████▋   | 109825/122310 [3:37:54<22:42,  9.16it/s][A
 90%|█████████████████████████████▋   | 109835/122310 [3:37:55<19:40, 10.57

step: 13320, loss: 76.2283246889262, epoch: 0



 90%|█████████████████████████████▋   | 109898/122310 [3:38:00<15:17, 13.52it/s][A
 90%|█████████████████████████████▋   | 109902/122310 [3:38:01<18:26, 11.21it/s][A
 90%|█████████████████████████████▋   | 109908/122310 [3:38:02<19:57, 10.36it/s][A
 90%|█████████████████████████████▋   | 109914/122310 [3:38:03<21:10,  9.76it/s][A
 90%|█████████████████████████████▋   | 109918/122310 [3:38:03<24:14,  8.52it/s][A
 90%|█████████████████████████████▋   | 109922/122310 [3:38:04<26:55,  7.67it/s][A
 90%|█████████████████████████████▋   | 109928/122310 [3:38:05<26:06,  7.90it/s][A
 90%|█████████████████████████████▋   | 109934/122310 [3:38:05<25:39,  8.04it/s][A
 90%|█████████████████████████████▋   | 109943/122310 [3:38:06<21:57,  9.39it/s][A
 90%|█████████████████████████████▋   | 109947/122310 [3:38:07<24:53,  8.28it/s][A
 90%|█████████████████████████████▋   | 109952/122310 [3:38:08<26:02,  7.91it/s][A
 90%|█████████████████████████████▋   | 109957/122310 [3:38:08<26:54,  7.65

step: 13340, loss: 88.9877891110332, epoch: 0



 90%|█████████████████████████████▋   | 110018/122310 [3:38:15<30:47,  6.65it/s][A
 90%|█████████████████████████████▋   | 110024/122310 [3:38:15<28:25,  7.20it/s][A
 90%|█████████████████████████████▋   | 110032/122310 [3:38:16<24:41,  8.29it/s][A
 90%|█████████████████████████████▋   | 110038/122310 [3:38:17<24:49,  8.24it/s][A
 90%|█████████████████████████████▋   | 110048/122310 [3:38:18<20:41,  9.88it/s][A
 90%|█████████████████████████████▋   | 110057/122310 [3:38:18<19:17, 10.59it/s][A
 90%|█████████████████████████████▋   | 110064/122310 [3:38:19<19:51, 10.28it/s][A
 90%|█████████████████████████████▋   | 110077/122310 [3:38:20<16:17, 12.51it/s][A
 90%|█████████████████████████████▋   | 110092/122310 [3:38:21<13:40, 14.89it/s][A
 90%|█████████████████████████████▋   | 110099/122310 [3:38:21<15:17, 13.31it/s][A
 90%|█████████████████████████████▋   | 110103/122310 [3:38:22<18:25, 11.04it/s][A
 90%|█████████████████████████████▋   | 110110/122310 [3:38:23<19:08, 10.63

step: 13360, loss: 111.20365408030044, epoch: 0



 90%|█████████████████████████████▋   | 110201/122310 [3:38:29<13:46, 14.64it/s][A
 90%|█████████████████████████████▋   | 110208/122310 [3:38:30<15:23, 13.10it/s][A
 90%|█████████████████████████████▋   | 110214/122310 [3:38:31<17:11, 11.73it/s][A
 90%|█████████████████████████████▋   | 110223/122310 [3:38:31<16:48, 11.99it/s][A
 90%|█████████████████████████████▋   | 110226/122310 [3:38:32<20:54,  9.63it/s][A
 90%|█████████████████████████████▋   | 110231/122310 [3:38:33<22:44,  8.85it/s][A
 90%|█████████████████████████████▋   | 110237/122310 [3:38:33<23:05,  8.71it/s][A
 90%|█████████████████████████████▋   | 110243/122310 [3:38:34<23:16,  8.64it/s][A
 90%|█████████████████████████████▋   | 110248/122310 [3:38:35<24:40,  8.15it/s][A
 90%|█████████████████████████████▋   | 110253/122310 [3:38:36<25:39,  7.83it/s][A
 90%|█████████████████████████████▊   | 110271/122310 [3:38:36<15:23, 13.04it/s][A
 90%|█████████████████████████████▊   | 110280/122310 [3:38:37<15:30, 12.93

step: 13380, loss: 97.20635230342953, epoch: 0



 90%|█████████████████████████████▊   | 110328/122310 [3:38:43<25:13,  7.92it/s][A
 90%|███████████████████████████▉   | 110332/122310 [3:38:48<1:06:06,  3.02it/s][A
 90%|█████████████████████████████▊   | 110349/122310 [3:38:48<33:08,  6.02it/s][A
 90%|█████████████████████████████▊   | 110356/122310 [3:38:49<29:55,  6.66it/s][A
 90%|█████████████████████████████▊   | 110363/122310 [3:38:50<27:24,  7.27it/s][A
 90%|█████████████████████████████▊   | 110374/122310 [3:38:51<21:58,  9.06it/s][A
 90%|█████████████████████████████▊   | 110383/122310 [3:38:51<20:05,  9.90it/s][A
 90%|█████████████████████████████▊   | 110392/122310 [3:38:52<18:45, 10.59it/s][A
 90%|█████████████████████████████▊   | 110400/122310 [3:38:53<18:27, 10.75it/s][A
 90%|█████████████████████████████▊   | 110406/122310 [3:38:53<19:42, 10.07it/s][A
 90%|█████████████████████████████▊   | 110415/122310 [3:38:54<18:25, 10.76it/s][A
 90%|█████████████████████████████▊   | 110423/122310 [3:38:55<18:09, 10.91

step: 13400, loss: 97.98075902373232, epoch: 0



 90%|█████████████████████████████▊   | 110451/122310 [3:38:58<20:04,  9.84it/s][A
 90%|█████████████████████████████▊   | 110457/122310 [3:38:58<20:58,  9.42it/s][A
 90%|█████████████████████████████▊   | 110458/122310 [3:38:59<28:10,  7.01it/s][A
 90%|█████████████████████████████▊   | 110465/122310 [3:39:00<25:07,  7.86it/s][A
 90%|█████████████████████████████▊   | 110471/122310 [3:39:01<24:33,  8.03it/s][A
 90%|█████████████████████████████▊   | 110478/122310 [3:39:01<23:09,  8.51it/s][A
 90%|█████████████████████████████▊   | 110482/122310 [3:39:02<25:39,  7.68it/s][A
 90%|█████████████████████████████▊   | 110488/122310 [3:39:03<24:57,  7.90it/s][A
 90%|█████████████████████████████▊   | 110496/122310 [3:39:03<22:05,  8.91it/s][A
 90%|█████████████████████████████▊   | 110501/122310 [3:39:04<23:35,  8.34it/s][A
 90%|█████████████████████████████▊   | 110508/122310 [3:39:05<22:25,  8.77it/s][A
 90%|█████████████████████████████▊   | 110515/122310 [3:39:06<21:35,  9.11

step: 13420, loss: 79.27289868349857, epoch: 0



 90%|█████████████████████████████▊   | 110577/122310 [3:39:12<18:43, 10.44it/s][A
 90%|█████████████████████████████▊   | 110586/122310 [3:39:13<17:39, 11.06it/s][A
 90%|█████████████████████████████▊   | 110606/122310 [3:39:13<12:12, 15.97it/s][A
 90%|█████████████████████████████▊   | 110620/122310 [3:39:14<11:27, 17.01it/s][A
 90%|█████████████████████████████▊   | 110628/122310 [3:39:15<12:40, 15.36it/s][A
 90%|█████████████████████████████▊   | 110640/122310 [3:39:16<12:18, 15.80it/s][A
 90%|█████████████████████████████▊   | 110649/122310 [3:39:16<13:04, 14.86it/s][A
 90%|█████████████████████████████▊   | 110658/122310 [3:39:17<13:39, 14.22it/s][A
 90%|█████████████████████████████▊   | 110668/122310 [3:39:18<13:46, 14.08it/s][A
 90%|█████████████████████████████▊   | 110674/122310 [3:39:18<15:41, 12.35it/s][A
 90%|█████████████████████████████▊   | 110678/122310 [3:39:19<18:42, 10.36it/s][A
 90%|█████████████████████████████▊   | 110687/122310 [3:39:20<17:33, 11.03

step: 13440, loss: 92.63586230290683, epoch: 0



 91%|█████████████████████████████▉   | 110750/122310 [3:39:26<21:48,  8.84it/s][A
 91%|█████████████████████████████▉   | 110758/122310 [3:39:27<20:08,  9.56it/s][A
 91%|█████████████████████████████▉   | 110776/122310 [3:39:28<13:30, 14.23it/s][A
 91%|█████████████████████████████▉   | 110784/122310 [3:39:28<14:23, 13.35it/s][A
 91%|█████████████████████████████▉   | 110799/122310 [3:39:29<12:16, 15.63it/s][A
 91%|█████████████████████████████▉   | 110801/122310 [3:39:30<16:14, 11.80it/s][A
 91%|█████████████████████████████▉   | 110809/122310 [3:39:31<16:26, 11.66it/s][A
 91%|█████████████████████████████▉   | 110823/122310 [3:39:31<13:37, 14.05it/s][A
 91%|█████████████████████████████▉   | 110832/122310 [3:39:32<14:03, 13.60it/s][A
 91%|█████████████████████████████▉   | 110844/122310 [3:39:33<13:07, 14.56it/s][A
 91%|█████████████████████████████▉   | 110858/122310 [3:39:33<11:53, 16.06it/s][A
 91%|█████████████████████████████▉   | 110863/122310 [3:39:34<14:17, 13.35

step: 13460, loss: 75.29267927804143, epoch: 0



 91%|█████████████████████████████▉   | 110960/122310 [3:39:41<12:27, 15.19it/s][A
 91%|█████████████████████████████▉   | 110972/122310 [3:39:41<12:05, 15.63it/s][A
 91%|█████████████████████████████▉   | 110976/122310 [3:39:42<14:56, 12.65it/s][A
 91%|█████████████████████████████▉   | 110982/122310 [3:39:43<16:33, 11.40it/s][A
 91%|█████████████████████████████▉   | 110991/122310 [3:39:43<16:02, 11.76it/s][A
 91%|█████████████████████████████▉   | 110997/122310 [3:39:44<17:31, 10.76it/s][A
 91%|█████████████████████████████▉   | 111004/122310 [3:39:45<17:58, 10.48it/s][A
 91%|█████████████████████████████▉   | 111012/122310 [3:39:46<17:34, 10.71it/s][A
 91%|█████████████████████████████▉   | 111028/122310 [3:39:46<13:14, 14.21it/s][A
 91%|█████████████████████████████▉   | 111040/122310 [3:39:47<12:34, 14.94it/s][A
 91%|█████████████████████████████▉   | 111050/122310 [3:39:48<12:50, 14.62it/s][A
 91%|█████████████████████████████▉   | 111056/122310 [3:39:48<14:40, 12.78

step: 13480, loss: 76.49286446494527, epoch: 0



 91%|█████████████████████████████▉   | 111113/122310 [3:39:55<14:56, 12.50it/s][A
 91%|█████████████████████████████▉   | 111125/122310 [3:39:56<13:31, 13.79it/s][A
 91%|█████████████████████████████▉   | 111131/122310 [3:39:56<15:14, 12.23it/s][A
 91%|█████████████████████████████▉   | 111135/122310 [3:39:57<18:04, 10.31it/s][A
 91%|█████████████████████████████▉   | 111140/122310 [3:39:58<19:57,  9.33it/s][A
 91%|█████████████████████████████▉   | 111153/122310 [3:39:58<15:30, 11.99it/s][A
 91%|█████████████████████████████▉   | 111163/122310 [3:39:59<14:46, 12.58it/s][A
 91%|█████████████████████████████▉   | 111173/122310 [3:40:00<14:13, 13.04it/s][A
 91%|█████████████████████████████▉   | 111182/122310 [3:40:01<18:36,  9.96it/s][A
 91%|██████████████████████████████   | 111191/122310 [3:40:02<17:25, 10.63it/s][A
 91%|██████████████████████████████   | 111198/122310 [3:40:03<17:46, 10.42it/s][A
 91%|██████████████████████████████   | 111203/122310 [3:40:03<19:39,  9.41

step: 13500, loss: 88.70650751789398, epoch: 0



 91%|██████████████████████████████   | 111253/122310 [3:40:09<16:17, 11.31it/s][A
 91%|██████████████████████████████   | 111261/122310 [3:40:10<16:20, 11.27it/s][A
 91%|██████████████████████████████   | 111269/122310 [3:40:10<16:20, 11.26it/s][A
 91%|██████████████████████████████   | 111277/122310 [3:40:11<16:21, 11.24it/s][A
 91%|██████████████████████████████   | 111279/122310 [3:40:12<21:05,  8.72it/s][A
 91%|██████████████████████████████   | 111288/122310 [3:40:13<18:40,  9.84it/s][A
 91%|██████████████████████████████   | 111291/122310 [3:40:13<22:23,  8.20it/s][A
 91%|██████████████████████████████   | 111300/122310 [3:40:14<19:16,  9.52it/s][A
 91%|██████████████████████████████   | 111307/122310 [3:40:15<19:11,  9.56it/s][A
 91%|██████████████████████████████   | 111309/122310 [3:40:15<24:12,  7.57it/s][A
 91%|██████████████████████████████   | 111316/122310 [3:40:16<22:13,  8.25it/s][A
 91%|██████████████████████████████   | 111326/122310 [3:40:17<18:23,  9.95

step: 13520, loss: 83.39098793936526, epoch: 0



 91%|██████████████████████████████   | 111398/122310 [3:40:23<18:39,  9.75it/s][A
 91%|██████████████████████████████   | 111405/122310 [3:40:24<18:34,  9.79it/s][A
 91%|██████████████████████████████   | 111412/122310 [3:40:25<18:30,  9.81it/s][A
 91%|██████████████████████████████   | 111420/122310 [3:40:25<17:42, 10.25it/s][A
 91%|██████████████████████████████   | 111434/122310 [3:40:26<13:52, 13.06it/s][A
 91%|██████████████████████████████   | 111449/122310 [3:40:27<11:43, 15.44it/s][A
 91%|██████████████████████████████   | 111453/122310 [3:40:28<14:29, 12.48it/s][A
 91%|██████████████████████████████   | 111459/122310 [3:40:28<16:02, 11.27it/s][A
 91%|██████████████████████████████   | 111474/122310 [3:40:29<12:42, 14.21it/s][A
 91%|██████████████████████████████   | 111483/122310 [3:40:30<13:08, 13.73it/s][A
 91%|██████████████████████████████   | 111495/122310 [3:40:31<15:59, 11.27it/s][A
 91%|██████████████████████████████   | 111503/122310 [3:40:32<15:57, 11.29

step: 13540, loss: 86.34353692895314, epoch: 0



 91%|██████████████████████████████   | 111585/122310 [3:40:38<12:54, 13.86it/s][A
 91%|██████████████████████████████   | 111593/122310 [3:40:38<13:38, 13.10it/s][A
 91%|██████████████████████████████   | 111600/122310 [3:40:39<14:44, 12.11it/s][A
 91%|██████████████████████████████   | 111609/122310 [3:40:40<14:33, 12.25it/s][A
 91%|██████████████████████████████   | 111614/122310 [3:40:40<16:40, 10.69it/s][A
 91%|██████████████████████████████   | 111629/122310 [3:40:41<12:54, 13.79it/s][A
 91%|██████████████████████████████   | 111639/122310 [3:40:42<12:50, 13.85it/s][A
 91%|██████████████████████████████   | 111652/122310 [3:40:43<11:43, 15.15it/s][A
 91%|██████████████████████████████▏  | 111657/122310 [3:40:43<14:06, 12.59it/s][A
 91%|██████████████████████████████▏  | 111663/122310 [3:40:44<15:35, 11.38it/s][A
 91%|██████████████████████████████▏  | 111673/122310 [3:40:45<14:36, 12.14it/s][A
 91%|██████████████████████████████▏  | 111687/122310 [3:40:45<12:20, 14.35

step: 13560, loss: 118.28222803800163, epoch: 0



 91%|██████████████████████████████▏  | 111750/122310 [3:40:52<20:29,  8.59it/s][A
 91%|██████████████████████████████▏  | 111755/122310 [3:40:53<21:40,  8.12it/s][A
 91%|██████████████████████████████▏  | 111766/122310 [3:40:53<17:04, 10.30it/s][A
 91%|██████████████████████████████▏  | 111779/122310 [3:40:54<13:48, 12.71it/s][A
 91%|██████████████████████████████▏  | 111783/122310 [3:40:55<16:36, 10.56it/s][A
 91%|██████████████████████████████▏  | 111790/122310 [3:40:55<16:58, 10.33it/s][A
 91%|██████████████████████████████▏  | 111799/122310 [3:40:56<15:55, 11.01it/s][A
 91%|██████████████████████████████▏  | 111807/122310 [3:40:57<15:48, 11.08it/s][A
 91%|██████████████████████████████▏  | 111815/122310 [3:40:58<15:45, 11.10it/s][A
 91%|██████████████████████████████▏  | 111822/122310 [3:40:58<16:20, 10.69it/s][A
 91%|██████████████████████████████▏  | 111836/122310 [3:40:59<12:59, 13.43it/s][A
 91%|██████████████████████████████▏  | 111845/122310 [3:41:00<13:14, 13.16

step: 13580, loss: 75.07765701421046, epoch: 0



 92%|██████████████████████████████▏  | 111934/122310 [3:41:06<10:41, 16.17it/s][A
 92%|██████████████████████████████▏  | 111941/122310 [3:41:07<12:07, 14.24it/s][A
 92%|██████████████████████████████▏  | 111950/122310 [3:41:08<12:31, 13.78it/s][A
 92%|██████████████████████████████▏  | 111962/122310 [3:41:08<11:44, 14.69it/s][A
 92%|██████████████████████████████▏  | 111967/122310 [3:41:09<14:00, 12.31it/s][A
 92%|██████████████████████████████▏  | 111969/122310 [3:41:10<18:17,  9.42it/s][A
 92%|██████████████████████████████▏  | 111978/122310 [3:41:11<16:35, 10.38it/s][A
 92%|██████████████████████████████▏  | 111995/122310 [3:41:11<11:55, 14.42it/s][A
 92%|██████████████████████████████▏  | 112001/122310 [3:41:12<13:35, 12.63it/s][A
 92%|██████████████████████████████▏  | 112011/122310 [3:41:13<13:12, 13.00it/s][A
 92%|██████████████████████████████▏  | 112018/122310 [3:41:13<14:19, 11.97it/s][A
 92%|██████████████████████████████▏  | 112028/122310 [3:41:14<13:40, 12.53

step: 13600, loss: 109.17247580105763, epoch: 0
sim1 and sim2 are 0.6006843694276828, 0.15640611667644247
cosine of pred and queen: 0.15244405478094075
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: australian
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: economy
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: spain
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: vallecillo
Actual: 


 92%|██████████████████████████████▏  | 112104/122310 [3:41:33<10:50, 15.70it/s][A

Actual: brazil:real::sweden:krona, pred: france
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: europe
Actual: thailand:thai::india:indian, pred: staffers
Actual: sweden:swedish::netherlands:dutch, pred: woman
Actual: russia:russian::germany:german, pred: spain
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: defense
Actual: denmark:danish::germany:german, pred: european
Accuracy is 0.13333333333333333
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: reunited
Actual: think:thinks::talk:talks, pred: arza
Actual: van


 92%|████████████████████████████▍  | 112112/122310 [3:42:33<6:16:07,  2.21s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.05325443786982249



 92%|████████████████████████████▍  | 112121/122310 [3:42:34<4:36:39,  1.63s/it][A
 92%|████████████████████████████▍  | 112129/122310 [3:42:34<3:29:00,  1.23s/it][A
 92%|████████████████████████████▍  | 112142/122310 [3:42:35<2:14:03,  1.26it/s][A
 92%|████████████████████████████▍  | 112145/122310 [3:42:36<2:03:41,  1.37it/s][A
 92%|████████████████████████████▍  | 112161/122310 [3:42:37<1:10:37,  2.40it/s][A
 92%|██████████████████████████████▎  | 112170/122310 [3:42:37<55:13,  3.06it/s][A
 92%|██████████████████████████████▎  | 112180/122310 [3:42:38<42:18,  3.99it/s][A
 92%|██████████████████████████████▎  | 112185/122310 [3:42:39<43:18,  3.90it/s][A
 92%|██████████████████████████████▎  | 112189/122310 [3:42:40<40:58,  4.12it/s][A
 92%|██████████████████████████████▎  | 112199/122310 [3:42:41<30:06,  5.60it/s][A
 92%|██████████████████████████████▎  | 112211/122310 [3:42:42<22:10,  7.59it/s][A
 92%|██████████████████████████████▎  | 112218/122310 [3:42:42<20:52,  8.06

step: 13620, loss: 96.02774777887109, epoch: 0



 92%|██████████████████████████████▎  | 112295/122310 [3:42:48<11:55, 14.00it/s][A
 92%|██████████████████████████████▎  | 112309/122310 [3:42:49<10:36, 15.71it/s][A
 92%|██████████████████████████████▎  | 112318/122310 [3:42:49<11:18, 14.72it/s][A
 92%|██████████████████████████████▎  | 112322/122310 [3:42:50<13:45, 12.09it/s][A
 92%|██████████████████████████████▎  | 112327/122310 [3:42:51<15:40, 10.62it/s][A
 92%|██████████████████████████████▎  | 112330/122310 [3:42:51<19:05,  8.71it/s][A
 92%|██████████████████████████████▎  | 112336/122310 [3:42:52<19:15,  8.63it/s][A
 92%|██████████████████████████████▎  | 112345/122310 [3:42:53<16:46,  9.90it/s][A
 92%|██████████████████████████████▎  | 112357/122310 [3:42:54<13:50, 11.99it/s][A
 92%|██████████████████████████████▎  | 112362/122310 [3:42:54<15:44, 10.53it/s][A
 92%|██████████████████████████████▎  | 112375/122310 [3:42:55<12:53, 12.85it/s][A
 92%|██████████████████████████████▎  | 112381/122310 [3:42:56<14:22, 11.51

step: 13640, loss: 90.94598185917953, epoch: 0



 92%|██████████████████████████████▎  | 112452/122310 [3:43:02<13:46, 11.92it/s][A
 92%|██████████████████████████████▎  | 112457/122310 [3:43:03<15:39, 10.49it/s][A
 92%|██████████████████████████████▎  | 112468/122310 [3:43:03<13:41, 11.99it/s][A
 92%|██████████████████████████████▎  | 112472/122310 [3:43:04<16:14, 10.10it/s][A
 92%|██████████████████████████████▎  | 112492/122310 [3:43:05<10:34, 15.48it/s][A
 92%|██████████████████████████████▎  | 112500/122310 [3:43:06<11:28, 14.25it/s][A
 92%|██████████████████████████████▎  | 112511/122310 [3:43:06<11:09, 14.64it/s][A
 92%|██████████████████████████████▎  | 112522/122310 [3:43:07<10:57, 14.89it/s][A
 92%|██████████████████████████████▎  | 112529/122310 [3:43:08<12:10, 13.39it/s][A
 92%|██████████████████████████████▎  | 112538/122310 [3:43:08<12:20, 13.20it/s][A
 92%|██████████████████████████████▎  | 112546/122310 [3:43:09<12:53, 12.63it/s][A
 92%|██████████████████████████████▎  | 112556/122310 [3:43:10<12:26, 13.06

step: 13660, loss: 84.2613969665645, epoch: 0



 92%|██████████████████████████████▍  | 112634/122310 [3:43:16<10:54, 14.78it/s][A
 92%|██████████████████████████████▍  | 112643/122310 [3:43:17<11:23, 14.14it/s][A
 92%|██████████████████████████████▍  | 112649/122310 [3:43:18<12:55, 12.46it/s][A
 92%|██████████████████████████████▍  | 112661/122310 [3:43:18<11:37, 13.84it/s][A
 92%|██████████████████████████████▍  | 112672/122310 [3:43:19<11:12, 14.34it/s][A
 92%|██████████████████████████████▍  | 112681/122310 [3:43:20<11:37, 13.81it/s][A
 92%|██████████████████████████████▍  | 112692/122310 [3:43:21<11:10, 14.34it/s][A
 92%|██████████████████████████████▍  | 112694/122310 [3:43:21<14:41, 10.91it/s][A
 92%|██████████████████████████████▍  | 112697/122310 [3:43:22<18:02,  8.88it/s][A
 92%|██████████████████████████████▍  | 112705/122310 [3:43:23<16:40,  9.60it/s][A
 92%|██████████████████████████████▍  | 112711/122310 [3:43:23<17:16,  9.26it/s][A
 92%|██████████████████████████████▍  | 112722/122310 [3:43:24<14:26, 11.06

step: 13680, loss: 96.91708653486123, epoch: 0



 92%|██████████████████████████████▍  | 112813/122310 [3:43:30<09:26, 16.77it/s][A
 92%|██████████████████████████████▍  | 112819/122310 [3:43:31<11:02, 14.32it/s][A
 92%|██████████████████████████████▍  | 112835/122310 [3:43:32<09:25, 16.75it/s][A
 92%|██████████████████████████████▍  | 112851/122310 [3:43:33<08:29, 18.55it/s][A
 92%|██████████████████████████████▍  | 112862/122310 [3:43:33<08:57, 17.58it/s][A
 92%|██████████████████████████████▍  | 112872/122310 [3:43:34<09:28, 16.59it/s][A
 92%|██████████████████████████████▍  | 112884/122310 [3:43:35<09:26, 16.65it/s][A
 92%|██████████████████████████████▍  | 112890/122310 [3:43:35<11:04, 14.18it/s][A
 92%|██████████████████████████████▍  | 112900/122310 [3:43:36<11:08, 14.07it/s][A
 92%|██████████████████████████████▍  | 112902/122310 [3:43:37<14:37, 10.73it/s][A
 92%|██████████████████████████████▍  | 112909/122310 [3:43:38<14:59, 10.45it/s][A
 92%|██████████████████████████████▍  | 112913/122310 [3:43:38<17:28,  8.96

step: 13700, loss: 95.32364331680674, epoch: 0



 92%|██████████████████████████████▍  | 112977/122310 [3:43:45<17:07,  9.08it/s][A
 92%|██████████████████████████████▍  | 112989/122310 [3:43:45<13:35, 11.43it/s][A
 92%|██████████████████████████████▍  | 112995/122310 [3:43:46<14:45, 10.52it/s][A
 92%|██████████████████████████████▍  | 113007/122310 [3:43:47<12:32, 12.37it/s][A
 92%|██████████████████████████████▍  | 113015/122310 [3:43:48<12:54, 12.00it/s][A
 92%|██████████████████████████████▍  | 113019/122310 [3:43:48<15:16, 10.13it/s][A
 92%|██████████████████████████████▍  | 113026/122310 [3:43:49<15:29,  9.99it/s][A
 92%|██████████████████████████████▍  | 113032/122310 [3:43:50<16:12,  9.54it/s][A
 92%|██████████████████████████████▌  | 113046/122310 [3:43:50<12:17, 12.56it/s][A
 92%|██████████████████████████████▌  | 113058/122310 [3:43:51<11:09, 13.82it/s][A
 92%|██████████████████████████████▌  | 113065/122310 [3:43:52<12:12, 12.63it/s][A
 92%|██████████████████████████████▌  | 113083/122310 [3:43:53<09:22, 16.42

step: 13720, loss: 123.14271649770279, epoch: 0



 93%|██████████████████████████████▌  | 113160/122310 [3:43:59<13:57, 10.93it/s][A
 93%|██████████████████████████████▌  | 113162/122310 [3:44:00<17:55,  8.51it/s][A
 93%|██████████████████████████████▌  | 113167/122310 [3:44:00<18:52,  8.07it/s][A
 93%|██████████████████████████████▌  | 113178/122310 [3:44:01<14:48, 10.28it/s][A
 93%|██████████████████████████████▌  | 113191/122310 [3:44:03<15:34,  9.76it/s][A
 93%|██████████████████████████████▌  | 113199/122310 [3:44:03<14:56, 10.16it/s][A
 93%|██████████████████████████████▌  | 113209/122310 [3:44:04<13:35, 11.16it/s][A
 93%|██████████████████████████████▌  | 113217/122310 [3:44:05<13:33, 11.17it/s][A
 93%|██████████████████████████████▌  | 113226/122310 [3:44:05<13:08, 11.52it/s][A
 93%|██████████████████████████████▌  | 113232/122310 [3:44:06<14:10, 10.67it/s][A
 93%|██████████████████████████████▌  | 113237/122310 [3:44:07<15:43,  9.62it/s][A
 93%|██████████████████████████████▌  | 113248/122310 [3:44:08<13:20, 11.32

step: 13740, loss: 89.26764288347371, epoch: 0



 93%|██████████████████████████████▌  | 113328/122310 [3:44:13<10:26, 14.33it/s][A
 93%|██████████████████████████████▌  | 113331/122310 [3:44:14<13:06, 11.42it/s][A
 93%|██████████████████████████████▌  | 113336/122310 [3:44:15<14:47, 10.11it/s][A
 93%|██████████████████████████████▌  | 113347/122310 [3:44:15<12:45, 11.71it/s][A
 93%|██████████████████████████████▌  | 113354/122310 [3:44:16<13:25, 11.11it/s][A
 93%|██████████████████████████████▌  | 113365/122310 [3:44:17<12:01, 12.39it/s][A
 93%|██████████████████████████████▌  | 113375/122310 [3:44:18<11:32, 12.90it/s][A
 93%|██████████████████████████████▌  | 113384/122310 [3:44:18<11:36, 12.81it/s][A
 93%|██████████████████████████████▌  | 113397/122310 [3:44:19<10:17, 14.43it/s][A
 93%|██████████████████████████████▌  | 113403/122310 [3:44:20<11:44, 12.64it/s][A
 93%|██████████████████████████████▌  | 113410/122310 [3:44:20<12:35, 11.78it/s][A
 93%|██████████████████████████████▌  | 113421/122310 [3:44:21<11:30, 12.87

step: 13760, loss: 81.95429184788202, epoch: 0



 93%|██████████████████████████████▌  | 113491/122310 [3:44:28<15:36,  9.42it/s][A
 93%|██████████████████████████████▌  | 113502/122310 [3:44:28<13:27, 10.91it/s][A
 93%|██████████████████████████████▋  | 113510/122310 [3:44:29<13:19, 11.00it/s][A
 93%|██████████████████████████████▋  | 113518/122310 [3:44:30<13:13, 11.07it/s][A
 93%|██████████████████████████████▋  | 113524/122310 [3:44:30<14:12, 10.31it/s][A
 93%|██████████████████████████████▋  | 113532/122310 [3:44:31<13:50, 10.57it/s][A
 93%|██████████████████████████████▋  | 113541/122310 [3:44:32<13:06, 11.15it/s][A
 93%|██████████████████████████████▋  | 113545/122310 [3:44:33<15:21,  9.51it/s][A
 93%|██████████████████████████████▋  | 113552/122310 [3:44:33<15:10,  9.62it/s][A
 93%|██████████████████████████████▋  | 113559/122310 [3:44:34<15:04,  9.68it/s][A
 93%|██████████████████████████████▋  | 113575/122310 [3:44:35<10:49, 13.46it/s][A
 93%|██████████████████████████████▋  | 113581/122310 [3:44:35<12:12, 11.91

step: 13780, loss: 76.70216468010408, epoch: 0



 93%|██████████████████████████████▋  | 113654/122310 [3:44:42<12:11, 11.84it/s][A
 93%|██████████████████████████████▋  | 113665/122310 [3:44:43<11:08, 12.93it/s][A
 93%|██████████████████████████████▋  | 113672/122310 [3:44:43<11:58, 12.02it/s][A
 93%|██████████████████████████████▋  | 113676/122310 [3:44:44<14:20, 10.03it/s][A
 93%|██████████████████████████████▋  | 113678/122310 [3:44:45<18:11,  7.91it/s][A
 93%|██████████████████████████████▋  | 113687/122310 [3:44:45<15:25,  9.31it/s][A
 93%|██████████████████████████████▋  | 113691/122310 [3:44:46<17:33,  8.18it/s][A
 93%|██████████████████████████████▋  | 113696/122310 [3:44:47<18:16,  7.85it/s][A
 93%|██████████████████████████████▋  | 113707/122310 [3:44:48<14:10, 10.11it/s][A
 93%|██████████████████████████████▋  | 113716/122310 [3:44:48<13:13, 10.83it/s][A
 93%|██████████████████████████████▋  | 113726/122310 [3:44:49<12:05, 11.84it/s][A
 93%|██████████████████████████████▋  | 113734/122310 [3:44:50<12:16, 11.65

step: 13800, loss: 77.25196095376396, epoch: 0



 93%|██████████████████████████████▋  | 113819/122310 [3:44:56<11:12, 12.63it/s][A
 93%|██████████████████████████████▋  | 113831/122310 [3:44:57<10:12, 13.85it/s][A
 93%|██████████████████████████████▋  | 113836/122310 [3:44:58<11:55, 11.85it/s][A
 93%|██████████████████████████████▋  | 113849/122310 [3:44:58<10:17, 13.69it/s][A
 93%|██████████████████████████████▋  | 113859/122310 [3:44:59<10:12, 13.80it/s][A
 93%|██████████████████████████████▋  | 113873/122310 [3:45:00<09:03, 15.51it/s][A
 93%|██████████████████████████████▋  | 113880/122310 [3:45:00<10:13, 13.75it/s][A
 93%|██████████████████████████████▋  | 113887/122310 [3:45:01<11:09, 12.58it/s][A
 93%|██████████████████████████████▋  | 113891/122310 [3:45:02<13:21, 10.51it/s][A
 93%|██████████████████████████████▋  | 113899/122310 [3:45:03<13:04, 10.72it/s][A
 93%|██████████████████████████████▋  | 113911/122310 [3:45:03<11:10, 12.53it/s][A
 93%|██████████████████████████████▋  | 113922/122310 [3:45:04<10:24, 13.43

step: 13820, loss: 126.98956916532156, epoch: 0



 93%|██████████████████████████████▊  | 114000/122310 [3:45:10<11:15, 12.30it/s][A
 93%|██████████████████████████████▊  | 114007/122310 [3:45:11<11:53, 11.64it/s][A
 93%|██████████████████████████████▊  | 114026/122310 [3:45:12<08:43, 15.82it/s][A
 93%|██████████████████████████████▊  | 114041/122310 [3:45:13<07:59, 17.24it/s][A
 93%|██████████████████████████████▊  | 114053/122310 [3:45:13<08:00, 17.18it/s][A
 93%|██████████████████████████████▊  | 114064/122310 [3:45:14<08:15, 16.64it/s][A
 93%|██████████████████████████████▊  | 114066/122310 [3:45:15<10:59, 12.51it/s][A
 93%|██████████████████████████████▊  | 114071/122310 [3:45:15<12:32, 10.95it/s][A
 93%|██████████████████████████████▊  | 114087/122310 [3:45:16<09:31, 14.38it/s][A
 93%|██████████████████████████████▊  | 114097/122310 [3:45:17<09:35, 14.27it/s][A
 93%|██████████████████████████████▊  | 114106/122310 [3:45:18<10:08, 13.49it/s][A
 93%|██████████████████████████████▊  | 114108/122310 [3:45:19<17:30,  7.81

step: 13840, loss: 100.31628605467034, epoch: 0



 93%|██████████████████████████████▊  | 114178/122310 [3:45:25<11:05, 12.21it/s][A
 93%|██████████████████████████████▊  | 114187/122310 [3:45:26<11:05, 12.21it/s][A
 93%|██████████████████████████████▊  | 114196/122310 [3:45:26<11:03, 12.23it/s][A
 93%|██████████████████████████████▊  | 114201/122310 [3:45:27<12:46, 10.58it/s][A
 93%|██████████████████████████████▊  | 114208/122310 [3:45:28<13:08, 10.27it/s][A
 93%|██████████████████████████████▊  | 114221/122310 [3:45:29<10:48, 12.47it/s][A
 93%|██████████████████████████████▊  | 114229/122310 [3:45:29<11:18, 11.91it/s][A
 93%|██████████████████████████████▊  | 114239/122310 [3:45:30<10:51, 12.40it/s][A
 93%|██████████████████████████████▊  | 114244/122310 [3:45:31<12:35, 10.67it/s][A
 93%|██████████████████████████████▊  | 114248/122310 [3:45:32<14:41,  9.14it/s][A
 93%|██████████████████████████████▊  | 114254/122310 [3:45:32<15:11,  8.84it/s][A
 93%|██████████████████████████████▊  | 114263/122310 [3:45:33<13:37,  9.85

step: 13860, loss: 83.89912892892218, epoch: 0



 94%|██████████████████████████████▊  | 114367/122310 [3:45:40<10:38, 12.43it/s][A
 94%|██████████████████████████████▊  | 114377/122310 [3:45:41<10:31, 12.55it/s][A
 94%|██████████████████████████████▊  | 114389/122310 [3:45:42<09:49, 13.43it/s][A
 94%|██████████████████████████████▊  | 114393/122310 [3:45:42<11:53, 11.09it/s][A
 94%|██████████████████████████████▊  | 114397/122310 [3:45:43<13:59,  9.42it/s][A
 94%|██████████████████████████████▊  | 114406/122310 [3:45:44<16:47,  7.84it/s][A
 94%|██████████████████████████████▊  | 114407/122310 [3:45:46<26:05,  5.05it/s][A
 94%|██████████████████████████████▊  | 114418/122310 [3:45:47<18:24,  7.14it/s][A
 94%|██████████████████████████████▊  | 114423/122310 [3:45:47<18:38,  7.05it/s][A
 94%|██████████████████████████████▉  | 114436/122310 [3:45:48<13:36,  9.64it/s][A
 94%|██████████████████████████████▉  | 114438/122310 [3:45:49<16:56,  7.75it/s][A
 94%|██████████████████████████████▉  | 114440/122310 [3:45:50<20:30,  6.40

step: 13880, loss: 100.19680004080168, epoch: 0



 94%|██████████████████████████████▉  | 114495/122310 [3:45:55<11:24, 11.42it/s][A
 94%|██████████████████████████████▉  | 114507/122310 [3:45:56<10:07, 12.84it/s][A
 94%|██████████████████████████████▉  | 114513/122310 [3:45:56<11:22, 11.42it/s][A
 94%|██████████████████████████████▉  | 114521/122310 [3:45:57<11:26, 11.35it/s][A
 94%|██████████████████████████████▉  | 114526/122310 [3:45:58<12:50, 10.10it/s][A
 94%|██████████████████████████████▉  | 114538/122310 [3:45:59<10:44, 12.05it/s][A
 94%|██████████████████████████████▉  | 114561/122310 [3:45:59<07:10, 18.00it/s][A
 94%|██████████████████████████████▉  | 114571/122310 [3:46:00<07:40, 16.79it/s][A
 94%|██████████████████████████████▉  | 114576/122310 [3:46:01<09:18, 13.86it/s][A
 94%|██████████████████████████████▉  | 114592/122310 [3:46:01<07:50, 16.39it/s][A
 94%|██████████████████████████████▉  | 114600/122310 [3:46:02<08:40, 14.81it/s][A
 94%|██████████████████████████████▉  | 114608/122310 [3:46:03<09:30, 13.50

step: 13900, loss: 86.01633252692253, epoch: 0



 94%|██████████████████████████████▉  | 114680/122310 [3:46:10<13:50,  9.19it/s][A
 94%|██████████████████████████████▉  | 114694/122310 [3:46:10<10:29, 12.10it/s][A
 94%|██████████████████████████████▉  | 114701/122310 [3:46:11<11:11, 11.33it/s][A
 94%|██████████████████████████████▉  | 114706/122310 [3:46:12<12:43,  9.96it/s][A
 94%|██████████████████████████████▉  | 114708/122310 [3:46:13<16:17,  7.78it/s][A
 94%|██████████████████████████████▉  | 114716/122310 [3:46:13<14:33,  8.69it/s][A
 94%|██████████████████████████████▉  | 114721/122310 [3:46:14<15:35,  8.11it/s][A
 94%|██████████████████████████████▉  | 114728/122310 [3:46:15<14:51,  8.50it/s][A
 94%|██████████████████████████████▉  | 114738/122310 [3:46:15<12:34, 10.03it/s][A
 94%|██████████████████████████████▉  | 114749/122310 [3:46:16<10:57, 11.49it/s][A
 94%|██████████████████████████████▉  | 114754/122310 [3:46:17<12:30, 10.07it/s][A
 94%|██████████████████████████████▉  | 114761/122310 [3:46:18<12:43,  9.88

step: 13920, loss: 77.15320762587959, epoch: 0



 94%|██████████████████████████████▉  | 114843/122310 [3:46:24<11:10, 11.13it/s][A
 94%|██████████████████████████████▉  | 114853/122310 [3:46:25<10:29, 11.84it/s][A
 94%|██████████████████████████████▉  | 114867/122310 [3:46:26<08:51, 14.00it/s][A
 94%|██████████████████████████████▉  | 114874/122310 [3:46:27<09:48, 12.64it/s][A
 94%|██████████████████████████████▉  | 114883/122310 [3:46:27<09:56, 12.44it/s][A
 94%|██████████████████████████████▉  | 114886/122310 [3:46:28<12:25,  9.96it/s][A
 94%|██████████████████████████████▉  | 114889/122310 [3:46:29<15:08,  8.17it/s][A
 94%|██████████████████████████████▉  | 114896/122310 [3:46:30<14:23,  8.59it/s][A
 94%|███████████████████████████████  | 114902/122310 [3:46:30<14:40,  8.41it/s][A
 94%|███████████████████████████████  | 114906/122310 [3:46:31<16:25,  7.51it/s][A
 94%|███████████████████████████████  | 114915/122310 [3:46:32<13:49,  8.92it/s][A
 94%|███████████████████████████████  | 114937/122310 [3:46:33<08:07, 15.13

step: 13940, loss: 82.69544910110471, epoch: 0



 94%|███████████████████████████████  | 115022/122310 [3:46:39<09:57, 12.20it/s][A
 94%|███████████████████████████████  | 115041/122310 [3:46:40<07:27, 16.24it/s][A
 94%|███████████████████████████████  | 115050/122310 [3:46:41<08:02, 15.03it/s][A
 94%|███████████████████████████████  | 115061/122310 [3:46:41<08:03, 14.98it/s][A
 94%|███████████████████████████████  | 115063/122310 [3:46:42<10:43, 11.26it/s][A
 94%|███████████████████████████████  | 115067/122310 [3:46:43<12:39,  9.54it/s][A
 94%|███████████████████████████████  | 115075/122310 [3:46:44<12:08,  9.94it/s][A
 94%|███████████████████████████████  | 115082/122310 [3:46:44<12:16,  9.81it/s][A
 94%|███████████████████████████████  | 115091/122310 [3:46:45<11:27, 10.50it/s][A
 94%|███████████████████████████████  | 115103/122310 [3:46:46<09:50, 12.21it/s][A
 94%|███████████████████████████████  | 115111/122310 [3:46:47<10:09, 11.80it/s][A
 94%|███████████████████████████████  | 115116/122310 [3:46:47<11:39, 10.28

step: 13960, loss: 95.25288369007637, epoch: 0



 94%|███████████████████████████████  | 115185/122310 [3:46:54<13:14,  8.96it/s][A
 94%|███████████████████████████████  | 115192/122310 [3:46:55<13:01,  9.11it/s][A
 94%|███████████████████████████████  | 115203/122310 [3:46:56<10:55, 10.84it/s][A
 94%|███████████████████████████████  | 115208/122310 [3:46:56<12:17,  9.63it/s][A
 94%|███████████████████████████████  | 115210/122310 [3:46:57<15:40,  7.55it/s][A
 94%|███████████████████████████████  | 115221/122310 [3:46:58<12:06,  9.75it/s][A
 94%|███████████████████████████████  | 115234/122310 [3:46:58<09:44, 12.10it/s][A
 94%|███████████████████████████████  | 115238/122310 [3:46:59<11:40, 10.09it/s][A
 94%|███████████████████████████████  | 115246/122310 [3:47:00<11:24, 10.33it/s][A
 94%|███████████████████████████████  | 115257/122310 [3:47:01<10:03, 11.68it/s][A
 94%|███████████████████████████████  | 115265/122310 [3:47:01<10:14, 11.47it/s][A
 94%|███████████████████████████████  | 115271/122310 [3:47:02<11:07, 10.54

step: 13980, loss: 85.97221324426569, epoch: 0



 94%|███████████████████████████████  | 115335/122310 [3:47:09<10:59, 10.57it/s][A
 94%|███████████████████████████████  | 115345/122310 [3:47:09<10:00, 11.59it/s][A
 94%|███████████████████████████████  | 115349/122310 [3:47:10<11:50,  9.79it/s][A
 94%|███████████████████████████████  | 115353/122310 [3:47:11<13:34,  8.55it/s][A
 94%|███████████████████████████████▏ | 115365/122310 [3:47:12<10:32, 10.98it/s][A
 94%|███████████████████████████████▏ | 115377/122310 [3:47:12<09:07, 12.67it/s][A
 94%|███████████████████████████████▏ | 115382/122310 [3:47:13<10:30, 10.99it/s][A
 94%|███████████████████████████████▏ | 115388/122310 [3:47:14<11:18, 10.21it/s][A
 94%|███████████████████████████████▏ | 115395/122310 [3:47:14<11:27, 10.05it/s][A
 94%|███████████████████████████████▏ | 115403/122310 [3:47:15<11:05, 10.37it/s][A
 94%|███████████████████████████████▏ | 115412/122310 [3:47:16<10:28, 10.98it/s][A
 94%|███████████████████████████████▏ | 115419/122310 [3:47:17<10:47, 10.64

step: 14000, loss: 80.84016170216942, epoch: 0
sim1 and sim2 are 0.6321174218371828, 0.19419774887146563
cosine of pred and queen: 0.10964765588171609
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: outside
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: least
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: collective
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: vallecillo
Actual:


 94%|███████████████████████████████▏ | 115481/122310 [3:47:33<08:14, 13.80it/s][A

Actual: syria:arabic::australia:english, pred: media
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: global
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: ukraine
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: belgium
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: second
Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: police
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: adadah
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: company
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: offic


 94%|█████████████████████████████▎ | 115490/122310 [3:48:39<4:38:47,  2.45s/it][A
 94%|█████████████████████████████▎ | 115495/122310 [3:48:39<3:50:51,  2.03s/it][A
 94%|█████████████████████████████▎ | 115511/122310 [3:48:40<2:08:02,  1.13s/it][A
 94%|█████████████████████████████▎ | 115520/122310 [3:48:41<1:36:11,  1.18it/s][A
 94%|█████████████████████████████▎ | 115524/122310 [3:48:41<1:25:08,  1.33it/s][A
 94%|███████████████████████████████▏ | 115541/122310 [3:48:42<47:24,  2.38it/s][A
 94%|███████████████████████████████▏ | 115556/122310 [3:48:43<31:46,  3.54it/s][A
 94%|███████████████████████████████▏ | 115564/122310 [3:48:44<27:01,  4.16it/s][A
 94%|███████████████████████████████▏ | 115576/122310 [3:48:44<20:30,  5.47it/s][A
 95%|███████████████████████████████▏ | 115583/122310 [3:48:45<18:41,  6.00it/s][A
 95%|███████████████████████████████▏ | 115589/122310 [3:48:46<17:36,  6.36it/s][A
 95%|███████████████████████████████▏ | 115598/122310 [3:48:47<15:07,  7.40

step: 14020, loss: 90.39893183563527, epoch: 0



 95%|███████████████████████████████▏ | 115655/122310 [3:48:54<11:33,  9.60it/s][A
 95%|███████████████████████████████▏ | 115661/122310 [3:48:55<11:57,  9.27it/s][A
 95%|███████████████████████████████▏ | 115663/122310 [3:48:55<15:07,  7.33it/s][A
 95%|███████████████████████████████▏ | 115669/122310 [3:48:56<14:24,  7.68it/s][A
 95%|███████████████████████████████▏ | 115683/122310 [3:48:57<09:51, 11.21it/s][A
 95%|███████████████████████████████▏ | 115685/122310 [3:48:57<12:37,  8.75it/s][A
 95%|███████████████████████████████▏ | 115692/122310 [3:48:58<12:09,  9.07it/s][A
 95%|███████████████████████████████▏ | 115698/122310 [3:48:59<12:23,  8.89it/s][A
 95%|███████████████████████████████▏ | 115704/122310 [3:49:00<12:34,  8.76it/s][A
 95%|███████████████████████████████▏ | 115717/122310 [3:49:00<09:29, 11.58it/s][A
 95%|███████████████████████████████▏ | 115723/122310 [3:49:01<10:17, 10.67it/s][A
 95%|███████████████████████████████▏ | 115730/122310 [3:49:02<10:31, 10.42

step: 14040, loss: 100.16105274245493, epoch: 0



 95%|███████████████████████████████▏ | 115801/122310 [3:49:08<10:30, 10.32it/s][A
 95%|███████████████████████████████▏ | 115811/122310 [3:49:09<09:27, 11.45it/s][A
 95%|███████████████████████████████▏ | 115818/122310 [3:49:10<09:53, 10.94it/s][A
 95%|███████████████████████████████▎ | 115829/122310 [3:49:10<08:47, 12.29it/s][A
 95%|███████████████████████████████▎ | 115832/122310 [3:49:11<10:57,  9.86it/s][A
 95%|███████████████████████████████▎ | 115841/122310 [3:49:12<10:05, 10.69it/s][A
 95%|███████████████████████████████▎ | 115846/122310 [3:49:12<11:14,  9.58it/s][A
 95%|███████████████████████████████▎ | 115858/122310 [3:49:13<09:09, 11.74it/s][A
 95%|███████████████████████████████▎ | 115869/122310 [3:49:14<08:23, 12.79it/s][A
 95%|███████████████████████████████▎ | 115876/122310 [3:49:15<09:03, 11.84it/s][A
 95%|███████████████████████████████▎ | 115893/122310 [3:49:15<06:56, 15.40it/s][A
 95%|███████████████████████████████▎ | 115895/122310 [3:49:16<09:12, 11.60

step: 14060, loss: 104.53993314680869, epoch: 0



 95%|███████████████████████████████▎ | 115957/122310 [3:49:22<10:18, 10.28it/s][A
 95%|███████████████████████████████▎ | 115962/122310 [3:49:23<11:25,  9.26it/s][A
 95%|███████████████████████████████▎ | 115967/122310 [3:49:24<12:14,  8.64it/s][A
 95%|███████████████████████████████▎ | 115978/122310 [3:49:25<09:54, 10.66it/s][A
 95%|███████████████████████████████▎ | 115985/122310 [3:49:25<10:09, 10.38it/s][A
 95%|███████████████████████████████▎ | 115988/122310 [3:49:26<12:20,  8.54it/s][A
 95%|███████████████████████████████▎ | 115992/122310 [3:49:27<13:45,  7.66it/s][A
 95%|███████████████████████████████▎ | 116002/122310 [3:49:27<10:59,  9.56it/s][A
 95%|███████████████████████████████▎ | 116008/122310 [3:49:28<11:24,  9.21it/s][A
 95%|███████████████████████████████▎ | 116020/122310 [3:49:29<09:07, 11.49it/s][A
 95%|███████████████████████████████▎ | 116038/122310 [3:49:30<06:42, 15.58it/s][A
 95%|███████████████████████████████▎ | 116047/122310 [3:49:30<07:06, 14.69

step: 14080, loss: 103.10915825024978, epoch: 0



 95%|███████████████████████████████▎ | 116120/122310 [3:49:37<09:41, 10.64it/s][A
 95%|███████████████████████████████▎ | 116129/122310 [3:49:38<09:10, 11.24it/s][A
 95%|███████████████████████████████▎ | 116144/122310 [3:49:38<07:15, 14.16it/s][A
 95%|███████████████████████████████▎ | 116149/122310 [3:49:39<08:32, 12.02it/s][A
 95%|███████████████████████████████▎ | 116162/122310 [3:49:40<07:23, 13.85it/s][A
 95%|███████████████████████████████▎ | 116170/122310 [3:49:40<07:51, 13.01it/s][A
 95%|███████████████████████████████▎ | 116179/122310 [3:49:41<07:54, 12.91it/s][A
 95%|███████████████████████████████▎ | 116188/122310 [3:49:42<07:58, 12.80it/s][A
 95%|███████████████████████████████▎ | 116193/122310 [3:49:43<09:13, 11.05it/s][A
 95%|███████████████████████████████▎ | 116205/122310 [3:49:43<07:59, 12.74it/s][A
 95%|███████████████████████████████▎ | 116214/122310 [3:49:44<08:00, 12.69it/s][A
 95%|███████████████████████████████▎ | 116230/122310 [3:49:45<06:30, 15.56

step: 14100, loss: 84.08301834178968, epoch: 0



 95%|███████████████████████████████▍ | 116292/122310 [3:49:51<07:35, 13.20it/s][A
 95%|███████████████████████████████▍ | 116302/122310 [3:49:52<07:27, 13.44it/s][A
 95%|███████████████████████████████▍ | 116309/122310 [3:49:53<08:05, 12.36it/s][A
 95%|███████████████████████████████▍ | 116319/122310 [3:49:53<07:45, 12.86it/s][A
 95%|███████████████████████████████▍ | 116330/122310 [3:50:00<24:26,  4.08it/s][A
 95%|███████████████████████████████▍ | 116331/122310 [3:50:00<26:20,  3.78it/s][A
 95%|███████████████████████████████▍ | 116341/122310 [3:50:01<19:08,  5.20it/s][A
 95%|███████████████████████████████▍ | 116348/122310 [3:50:02<16:42,  5.95it/s][A
 95%|███████████████████████████████▍ | 116361/122310 [3:50:03<11:59,  8.26it/s][A
 95%|███████████████████████████████▍ | 116369/122310 [3:50:03<11:08,  8.89it/s][A
 95%|███████████████████████████████▍ | 116389/122310 [3:50:04<07:24, 13.33it/s][A
 95%|███████████████████████████████▍ | 116393/122310 [3:50:05<08:40, 11.37

step: 14120, loss: 77.0183396487316, epoch: 0



 95%|███████████████████████████████▍ | 116402/122310 [3:50:05<08:26, 11.66it/s][A
 95%|███████████████████████████████▍ | 116409/122310 [3:50:06<08:47, 11.19it/s][A
 95%|███████████████████████████████▍ | 116416/122310 [3:50:07<09:05, 10.81it/s][A
 95%|███████████████████████████████▍ | 116421/122310 [3:50:08<10:05,  9.72it/s][A
 95%|███████████████████████████████▍ | 116432/122310 [3:50:08<08:37, 11.36it/s][A
 95%|███████████████████████████████▍ | 116442/122310 [3:50:09<08:06, 12.05it/s][A
 95%|███████████████████████████████▍ | 116451/122310 [3:50:10<08:02, 12.15it/s][A
 95%|███████████████████████████████▍ | 116462/122310 [3:50:10<07:26, 13.08it/s][A
 95%|███████████████████████████████▍ | 116476/122310 [3:50:11<06:28, 15.03it/s][A
 95%|███████████████████████████████▍ | 116487/122310 [3:50:12<06:25, 15.10it/s][A
 95%|███████████████████████████████▍ | 116493/122310 [3:50:13<07:24, 13.10it/s][A
 95%|███████████████████████████████▍ | 116504/122310 [3:50:13<07:01, 13.76

step: 14140, loss: 96.9852648720382, epoch: 0



 95%|███████████████████████████████▍ | 116593/122310 [3:50:20<07:17, 13.08it/s][A
 95%|███████████████████████████████▍ | 116603/122310 [3:50:20<07:08, 13.33it/s][A
 95%|███████████████████████████████▍ | 116610/122310 [3:50:21<07:43, 12.30it/s][A
 95%|███████████████████████████████▍ | 116623/122310 [3:50:22<06:45, 14.02it/s][A
 95%|███████████████████████████████▍ | 116628/122310 [3:50:23<07:58, 11.87it/s][A
 95%|███████████████████████████████▍ | 116632/122310 [3:50:23<09:28,  9.99it/s][A
 95%|███████████████████████████████▍ | 116635/122310 [3:50:24<11:27,  8.25it/s][A
 95%|███████████████████████████████▍ | 116646/122310 [3:50:25<09:06, 10.37it/s][A
 95%|███████████████████████████████▍ | 116652/122310 [3:50:26<09:40,  9.75it/s][A
 95%|███████████████████████████████▍ | 116665/122310 [3:50:26<07:39, 12.27it/s][A
 95%|███████████████████████████████▍ | 116667/122310 [3:50:27<09:58,  9.43it/s][A
 95%|███████████████████████████████▍ | 116677/122310 [3:50:28<08:43, 10.77

step: 14160, loss: 104.63850696233723, epoch: 0



 95%|███████████████████████████████▌ | 116756/122310 [3:50:34<08:18, 11.15it/s][A
 95%|███████████████████████████████▌ | 116765/122310 [3:50:35<08:00, 11.54it/s][A
 95%|███████████████████████████████▌ | 116769/122310 [3:50:36<09:25,  9.80it/s][A
 95%|███████████████████████████████▌ | 116784/122310 [3:50:36<07:00, 13.13it/s][A
 95%|███████████████████████████████▌ | 116793/122310 [3:50:37<07:05, 12.98it/s][A
 96%|███████████████████████████████▌ | 116807/122310 [3:50:38<06:08, 14.95it/s][A
 96%|███████████████████████████████▌ | 116812/122310 [3:50:38<07:18, 12.55it/s][A
 96%|███████████████████████████████▌ | 116820/122310 [3:50:39<07:33, 12.11it/s][A
 96%|███████████████████████████████▌ | 116825/122310 [3:50:40<08:41, 10.51it/s][A
 96%|███████████████████████████████▌ | 116839/122310 [3:50:41<06:56, 13.14it/s][A
 96%|███████████████████████████████▌ | 116847/122310 [3:50:41<07:16, 12.51it/s][A
 96%|███████████████████████████████▌ | 116854/122310 [3:50:42<07:46, 11.70

step: 14180, loss: 90.1241360586686, epoch: 0



 96%|███████████████████████████████▌ | 116954/122310 [3:50:49<05:37, 15.87it/s][A
 96%|███████████████████████████████▌ | 116961/122310 [3:50:49<06:20, 14.05it/s][A
 96%|███████████████████████████████▌ | 116972/122310 [3:50:50<06:10, 14.42it/s][A
 96%|███████████████████████████████▌ | 116979/122310 [3:50:51<06:51, 12.97it/s][A
 96%|███████████████████████████████▌ | 116981/122310 [3:50:52<11:32,  7.69it/s][A
 96%|███████████████████████████████▌ | 116989/122310 [3:50:53<10:21,  8.56it/s][A
 96%|███████████████████████████████▌ | 116993/122310 [3:50:54<11:22,  7.79it/s][A
 96%|███████████████████████████████▌ | 117000/122310 [3:50:54<10:38,  8.31it/s][A
 96%|███████████████████████████████▌ | 117017/122310 [3:50:55<06:59, 12.62it/s][A
 96%|███████████████████████████████▌ | 117024/122310 [3:50:56<07:29, 11.77it/s][A
 96%|███████████████████████████████▌ | 117030/122310 [3:50:56<08:11, 10.74it/s][A
 96%|███████████████████████████████▌ | 117044/122310 [3:50:57<06:35, 13.31

step: 14200, loss: 89.65000915411733, epoch: 0



 96%|███████████████████████████████▌ | 117115/122310 [3:51:03<09:15,  9.35it/s][A
 96%|███████████████████████████████▌ | 117120/122310 [3:51:04<10:06,  8.56it/s][A
 96%|███████████████████████████████▌ | 117144/122310 [3:51:05<05:35, 15.39it/s][A
 96%|███████████████████████████████▌ | 117149/122310 [3:51:06<06:45, 12.72it/s][A
 96%|███████████████████████████████▌ | 117169/122310 [3:51:06<05:07, 16.72it/s][A
 96%|███████████████████████████████▌ | 117179/122310 [3:51:07<05:33, 15.40it/s][A
 96%|███████████████████████████████▌ | 117181/122310 [3:51:08<07:20, 11.63it/s][A
 96%|███████████████████████████████▌ | 117189/122310 [3:51:09<07:33, 11.29it/s][A
 96%|███████████████████████████████▌ | 117202/122310 [3:51:09<06:34, 12.96it/s][A
 96%|███████████████████████████████▌ | 117210/122310 [3:51:10<06:57, 12.22it/s][A
 96%|███████████████████████████████▋ | 117217/122310 [3:51:11<07:30, 11.30it/s][A
 96%|███████████████████████████████▋ | 117224/122310 [3:51:12<07:57, 10.65

step: 14220, loss: 402.72253174184925, epoch: 0



 96%|███████████████████████████████▋ | 117224/122310 [3:51:24<07:57, 10.65it/s][A
 96%|███████████████████████████████▋ | 117234/122310 [3:51:25<44:52,  1.89it/s][A
 96%|███████████████████████████████▋ | 117239/122310 [3:51:26<38:30,  2.19it/s][A
 96%|███████████████████████████████▋ | 117245/122310 [3:51:27<31:27,  2.68it/s][A
 96%|███████████████████████████████▋ | 117252/122310 [3:51:28<27:34,  3.06it/s][A
 96%|███████████████████████████████▋ | 117259/122310 [3:51:29<22:05,  3.81it/s][A
 96%|███████████████████████████████▋ | 117267/122310 [3:51:30<17:27,  4.81it/s][A
 96%|███████████████████████████████▋ | 117276/122310 [3:51:31<13:53,  6.04it/s][A
 96%|███████████████████████████████▋ | 117284/122310 [3:51:31<12:05,  6.93it/s][A
 96%|███████████████████████████████▋ | 117287/122310 [3:51:32<13:20,  6.27it/s][A
 96%|███████████████████████████████▋ | 117300/122310 [3:51:33<09:26,  8.84it/s][A

step: 14240, loss: 81.73314729691214, epoch: 0



 96%|███████████████████████████████▋ | 117305/122310 [3:51:34<10:07,  8.24it/s][A
 96%|███████████████████████████████▋ | 117313/122310 [3:51:35<09:25,  8.83it/s][A
 96%|███████████████████████████████▋ | 117318/122310 [3:51:35<10:06,  8.23it/s][A
 96%|███████████████████████████████▋ | 117325/122310 [3:51:36<09:47,  8.49it/s][A
 96%|███████████████████████████████▋ | 117330/122310 [3:51:37<10:27,  7.94it/s][A
 96%|███████████████████████████████▋ | 117345/122310 [3:51:38<07:17, 11.36it/s][A
 96%|███████████████████████████████▋ | 117351/122310 [3:51:38<08:01, 10.31it/s][A
 96%|███████████████████████████████▋ | 117357/122310 [3:51:39<08:37,  9.57it/s][A
 96%|███████████████████████████████▋ | 117365/122310 [3:51:40<08:23,  9.82it/s][A
 96%|███████████████████████████████▋ | 117369/122310 [3:51:41<09:42,  8.48it/s][A
 96%|███████████████████████████████▋ | 117374/122310 [3:51:41<10:24,  7.91it/s][A
 96%|███████████████████████████████▋ | 117382/122310 [3:51:42<09:28,  8.67

step: 14260, loss: 72.31476519456658, epoch: 0



 96%|███████████████████████████████▋ | 117479/122310 [3:51:49<05:16, 15.27it/s][A
 96%|███████████████████████████████▋ | 117491/122310 [3:51:50<05:05, 15.76it/s][A
 96%|███████████████████████████████▋ | 117501/122310 [3:51:50<05:16, 15.21it/s][A
 96%|███████████████████████████████▋ | 117510/122310 [3:51:51<05:33, 14.40it/s][A
 96%|███████████████████████████████▋ | 117524/122310 [3:51:52<05:01, 15.87it/s][A
 96%|███████████████████████████████▋ | 117527/122310 [3:51:52<06:28, 12.31it/s][A
 96%|███████████████████████████████▋ | 117529/122310 [3:51:53<08:22,  9.52it/s][A
 96%|███████████████████████████████▋ | 117538/122310 [3:51:54<07:36, 10.45it/s][A
 96%|███████████████████████████████▋ | 117541/122310 [3:51:55<09:17,  8.56it/s][A
 96%|███████████████████████████████▋ | 117547/122310 [3:51:55<09:21,  8.48it/s][A
 96%|███████████████████████████████▋ | 117553/122310 [3:51:56<09:22,  8.46it/s][A
 96%|███████████████████████████████▋ | 117561/122310 [3:51:57<08:32,  9.27

step: 14280, loss: 78.61922930816432, epoch: 0



 96%|███████████████████████████████▋ | 117645/122310 [3:52:03<05:46, 13.48it/s][A
 96%|███████████████████████████████▋ | 117655/122310 [3:52:04<05:41, 13.61it/s][A
 96%|███████████████████████████████▋ | 117665/122310 [3:52:05<05:38, 13.73it/s][A
 96%|███████████████████████████████▋ | 117673/122310 [3:52:05<05:58, 12.95it/s][A
 96%|███████████████████████████████▊ | 117680/122310 [3:52:06<06:26, 11.99it/s][A
 96%|███████████████████████████████▊ | 117684/122310 [3:52:07<07:40, 10.04it/s][A
 96%|███████████████████████████████▊ | 117688/122310 [3:52:08<08:48,  8.74it/s][A
 96%|███████████████████████████████▊ | 117694/122310 [3:52:08<08:54,  8.63it/s][A
 96%|███████████████████████████████▊ | 117699/122310 [3:52:09<09:27,  8.13it/s][A
 96%|███████████████████████████████▊ | 117709/122310 [3:52:10<07:46,  9.86it/s][A
 96%|███████████████████████████████▊ | 117714/122310 [3:52:10<08:35,  8.92it/s][A
 96%|███████████████████████████████▊ | 117719/122310 [3:52:11<09:13,  8.30

step: 14300, loss: 103.95765303856886, epoch: 0



 96%|███████████████████████████████▊ | 117815/122310 [3:52:18<04:58, 15.05it/s][A
 96%|███████████████████████████████▊ | 117817/122310 [3:52:22<16:53,  4.43it/s][A
 96%|███████████████████████████████▊ | 117829/122310 [3:52:23<12:11,  6.13it/s][A
 96%|███████████████████████████████▊ | 117836/122310 [3:52:24<11:13,  6.64it/s][A
 96%|███████████████████████████████▊ | 117844/122310 [3:52:24<10:04,  7.39it/s][A
 96%|███████████████████████████████▊ | 117853/122310 [3:52:25<08:56,  8.31it/s][A
 96%|███████████████████████████████▊ | 117856/122310 [3:52:27<12:42,  5.84it/s][A
 96%|███████████████████████████████▊ | 117868/122310 [3:52:28<09:18,  7.95it/s][A
 96%|███████████████████████████████▊ | 117872/122310 [3:52:28<10:07,  7.31it/s][A
 96%|███████████████████████████████▊ | 117880/122310 [3:52:29<09:08,  8.08it/s][A
 96%|███████████████████████████████▊ | 117889/122310 [3:52:30<08:10,  9.02it/s][A
 96%|███████████████████████████████▊ | 117900/122310 [3:52:31<07:01, 10.47

step: 14320, loss: 95.66195796562748, epoch: 0



 96%|███████████████████████████████▊ | 117925/122310 [3:52:33<06:24, 11.41it/s][A
 96%|███████████████████████████████▊ | 117928/122310 [3:52:34<07:55,  9.21it/s][A
 96%|███████████████████████████████▊ | 117932/122310 [3:52:35<09:03,  8.06it/s][A
 96%|███████████████████████████████▊ | 117940/122310 [3:52:35<08:20,  8.72it/s][A
 96%|███████████████████████████████▊ | 117945/122310 [3:52:36<08:57,  8.12it/s][A
 96%|███████████████████████████████▊ | 117950/122310 [3:52:37<09:31,  7.63it/s][A
 96%|███████████████████████████████▊ | 117957/122310 [3:52:38<08:56,  8.11it/s][A
 96%|███████████████████████████████▊ | 117968/122310 [3:52:38<07:15,  9.96it/s][A
 96%|███████████████████████████████▊ | 117970/122310 [3:52:39<09:17,  7.79it/s][A
 96%|███████████████████████████████▊ | 117975/122310 [3:52:40<09:44,  7.42it/s][A
 96%|███████████████████████████████▊ | 117988/122310 [3:52:41<07:00, 10.28it/s][A
 96%|███████████████████████████████▊ | 117995/122310 [3:52:41<07:14,  9.94

step: 14340, loss: 77.79667038731735, epoch: 0



 97%|███████████████████████████████▊ | 118069/122310 [3:52:48<06:13, 11.36it/s][A
 97%|███████████████████████████████▊ | 118078/122310 [3:52:49<06:09, 11.47it/s][A
 97%|███████████████████████████████▊ | 118087/122310 [3:52:50<06:04, 11.59it/s][A
 97%|███████████████████████████████▊ | 118089/122310 [3:52:51<07:56,  8.85it/s][A
 97%|███████████████████████████████▊ | 118097/122310 [3:52:51<07:30,  9.35it/s][A
 97%|███████████████████████████████▊ | 118112/122310 [3:52:52<05:38, 12.41it/s][A
 97%|███████████████████████████████▊ | 118118/122310 [3:52:53<06:19, 11.05it/s][A
 97%|███████████████████████████████▊ | 118132/122310 [3:52:54<05:15, 13.22it/s][A
 97%|███████████████████████████████▊ | 118135/122310 [3:52:54<06:40, 10.43it/s][A
 97%|███████████████████████████████▉ | 118146/122310 [3:52:55<05:58, 11.61it/s][A
 97%|███████████████████████████████▉ | 118157/122310 [3:52:56<05:34, 12.43it/s][A
 97%|███████████████████████████████▉ | 118172/122310 [3:52:57<04:44, 14.53

step: 14360, loss: 108.82915372725809, epoch: 0



 97%|███████████████████████████████▉ | 118249/122310 [3:53:04<09:51,  6.87it/s][A
 97%|███████████████████████████████▉ | 118258/122310 [3:53:05<08:28,  7.97it/s][A
 97%|███████████████████████████████▉ | 118265/122310 [3:53:06<08:08,  8.28it/s][A
 97%|███████████████████████████████▉ | 118270/122310 [3:53:07<08:36,  7.81it/s][A
 97%|███████████████████████████████▉ | 118275/122310 [3:53:07<08:59,  7.48it/s][A
 97%|███████████████████████████████▉ | 118285/122310 [3:53:08<07:26,  9.02it/s][A
 97%|███████████████████████████████▉ | 118291/122310 [3:53:09<07:42,  8.70it/s][A
 97%|███████████████████████████████▉ | 118296/122310 [3:53:10<08:18,  8.05it/s][A
 97%|███████████████████████████████▉ | 118306/122310 [3:53:11<07:01,  9.50it/s][A
 97%|███████████████████████████████▉ | 118312/122310 [3:53:11<07:21,  9.06it/s][A
 97%|███████████████████████████████▉ | 118324/122310 [3:53:12<06:01, 11.01it/s][A
 97%|███████████████████████████████▉ | 118330/122310 [3:53:13<06:35, 10.07

step: 14380, loss: 86.65998122377026, epoch: 0



 97%|███████████████████████████████▉ | 118406/122310 [3:53:19<05:07, 12.70it/s][A
 97%|███████████████████████████████▉ | 118417/122310 [3:53:20<04:55, 13.20it/s][A
 97%|███████████████████████████████▉ | 118424/122310 [3:53:20<05:24, 11.97it/s][A
 97%|███████████████████████████████▉ | 118429/122310 [3:53:21<06:13, 10.39it/s][A
 97%|███████████████████████████████▉ | 118449/122310 [3:53:22<04:14, 15.16it/s][A
 97%|███████████████████████████████▉ | 118459/122310 [3:53:23<04:25, 14.52it/s][A
 97%|███████████████████████████████▉ | 118467/122310 [3:53:23<04:48, 13.30it/s][A
 97%|███████████████████████████████▉ | 118480/122310 [3:53:24<04:24, 14.45it/s][A
 97%|███████████████████████████████▉ | 118489/122310 [3:53:25<04:39, 13.66it/s][A
 97%|███████████████████████████████▉ | 118494/122310 [3:53:26<05:32, 11.48it/s][A
 97%|███████████████████████████████▉ | 118501/122310 [3:53:27<05:52, 10.82it/s][A
 97%|███████████████████████████████▉ | 118512/122310 [3:53:27<05:19, 11.90

step: 14400, loss: 77.56305392099726, epoch: 0
sim1 and sim2 are 0.60213121351474, 0.18675644600142502
cosine of pred and queen: 0.14947796609115185
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: size
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: pressure
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: collective
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actual: maharast


 97%|███████████████████████████████▉ | 118577/122310 [3:53:44<05:42, 10.91it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: bihar
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: tiresome
Actual: india:delhi::serbia:belgrade, pred: returne
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: media
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: almost
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: belgium
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: second
Actual: thailand:thai::india:indian, pred: eataly
Actual: sweden:swedish::netherlands:dutch, pred: says
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: denial
Actual: poland:polish::italy:italian, pred: offic


 97%|██████████████████████████████ | 118585/122310 [3:54:53<3:07:05,  3.01s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.05325443786982249



 97%|██████████████████████████████ | 118590/122310 [3:54:54<2:30:00,  2.42s/it][A
 97%|██████████████████████████████ | 118599/122310 [3:54:55<1:39:24,  1.61s/it][A
 97%|██████████████████████████████ | 118606/122310 [3:54:55<1:13:24,  1.19s/it][A
 97%|████████████████████████████████ | 118614/122310 [3:54:56<52:07,  1.18it/s][A
 97%|████████████████████████████████ | 118619/122310 [3:54:57<42:41,  1.44it/s][A
 97%|████████████████████████████████ | 118630/122310 [3:54:58<26:56,  2.28it/s][A
 97%|████████████████████████████████ | 118634/122310 [3:54:58<24:11,  2.53it/s][A
 97%|████████████████████████████████ | 118636/122310 [3:55:00<26:34,  2.30it/s][A
 97%|████████████████████████████████ | 118643/122310 [3:55:01<19:20,  3.16it/s][A
 97%|████████████████████████████████ | 118644/122310 [3:55:01<21:06,  2.89it/s][A
 97%|████████████████████████████████ | 118653/122310 [3:55:02<13:36,  4.48it/s][A
 97%|████████████████████████████████ | 118658/122310 [3:55:03<12:21,  4.93

step: 14420, loss: 103.25997615566703, epoch: 0



 97%|████████████████████████████████ | 118697/122310 [3:55:09<10:05,  5.97it/s][A
 97%|████████████████████████████████ | 118706/122310 [3:55:10<07:59,  7.51it/s][A
 97%|████████████████████████████████ | 118714/122310 [3:55:10<07:10,  8.36it/s][A
 97%|████████████████████████████████ | 118722/122310 [3:55:11<06:40,  8.96it/s][A
 97%|████████████████████████████████ | 118724/122310 [3:55:12<08:15,  7.24it/s][A
 97%|████████████████████████████████ | 118731/122310 [3:55:13<07:35,  7.86it/s][A
 97%|████████████████████████████████ | 118744/122310 [3:55:13<05:35, 10.62it/s][A
 97%|████████████████████████████████ | 118749/122310 [3:55:14<06:15,  9.48it/s][A
 97%|████████████████████████████████ | 118761/122310 [3:55:15<05:10, 11.43it/s][A
 97%|████████████████████████████████ | 118767/122310 [3:55:16<05:39, 10.43it/s][A
 97%|████████████████████████████████ | 118780/122310 [3:55:16<04:42, 12.50it/s][A
 97%|████████████████████████████████ | 118791/122310 [3:55:17<04:28, 13.12

step: 14440, loss: 84.32671439879708, epoch: 0



 97%|████████████████████████████████ | 118882/122310 [3:55:24<04:03, 14.07it/s][A
 97%|████████████████████████████████ | 118894/122310 [3:55:25<03:53, 14.64it/s][A
 97%|████████████████████████████████ | 118908/122310 [3:55:25<03:34, 15.86it/s][A
 97%|████████████████████████████████ | 118915/122310 [3:55:26<04:04, 13.91it/s][A
 97%|████████████████████████████████ | 118925/122310 [3:55:27<04:06, 13.73it/s][A
 97%|████████████████████████████████ | 118933/122310 [3:55:28<04:23, 12.82it/s][A
 97%|████████████████████████████████ | 118940/122310 [3:55:28<04:46, 11.77it/s][A
 97%|████████████████████████████████ | 118950/122310 [3:55:29<04:34, 12.23it/s][A
 97%|████████████████████████████████ | 118958/122310 [3:55:31<06:13,  8.98it/s][A
 97%|████████████████████████████████ | 118964/122310 [3:55:31<06:20,  8.78it/s][A
 97%|████████████████████████████████ | 118969/122310 [3:55:32<06:46,  8.22it/s][A
 97%|████████████████████████████████ | 118983/122310 [3:55:33<05:00, 11.07

step: 14460, loss: 88.37095067839292, epoch: 0



 97%|████████████████████████████████ | 119031/122310 [3:55:39<06:43,  8.12it/s][A
 97%|████████████████████████████████ | 119035/122310 [3:55:40<07:23,  7.39it/s][A
 97%|████████████████████████████████ | 119044/122310 [3:55:40<06:16,  8.67it/s][A
 97%|████████████████████████████████ | 119050/122310 [3:55:41<06:24,  8.49it/s][A
 97%|████████████████████████████████ | 119060/122310 [3:55:42<05:29,  9.87it/s][A
 97%|████████████████████████████████▏| 119067/122310 [3:55:43<05:34,  9.68it/s][A
 97%|████████████████████████████████▏| 119083/122310 [3:55:43<04:06, 13.10it/s][A
 97%|████████████████████████████████▏| 119095/122310 [3:55:44<03:50, 13.95it/s][A
 97%|████████████████████████████████▏| 119101/122310 [3:55:45<04:23, 12.17it/s][A
 97%|████████████████████████████████▏| 119105/122310 [3:55:46<05:17, 10.09it/s][A
 97%|████████████████████████████████▏| 119110/122310 [3:55:46<05:51,  9.10it/s][A
 97%|████████████████████████████████▏| 119128/122310 [3:55:47<03:57, 13.41

step: 14480, loss: 100.05167188639793, epoch: 0



 97%|████████████████████████████████▏| 119225/122310 [3:55:54<03:12, 16.05it/s][A
 97%|████████████████████████████████▏| 119228/122310 [3:55:55<04:07, 12.46it/s][A
 97%|████████████████████████████████▏| 119246/122310 [3:55:55<03:13, 15.87it/s][A
 97%|████████████████████████████████▏| 119252/122310 [3:55:56<03:45, 13.55it/s][A
 98%|████████████████████████████████▏| 119257/122310 [3:55:57<04:25, 11.49it/s][A
 98%|████████████████████████████████▏| 119273/122310 [3:55:58<03:30, 14.43it/s][A
 98%|████████████████████████████████▏| 119282/122310 [3:55:58<03:40, 13.71it/s][A
 98%|████████████████████████████████▏| 119294/122310 [3:55:59<03:29, 14.42it/s][A
 98%|████████████████████████████████▏| 119299/122310 [3:56:00<04:09, 12.08it/s][A
 98%|████████████████████████████████▏| 119311/122310 [3:56:01<03:46, 13.25it/s][A
 98%|████████████████████████████████▏| 119316/122310 [3:56:01<04:25, 11.28it/s][A
 98%|████████████████████████████████▏| 119326/122310 [3:56:02<04:11, 11.88

step: 14500, loss: 77.43817804241756, epoch: 0



 98%|████████████████████████████████▏| 119411/122310 [3:56:09<05:40,  8.52it/s][A
 98%|████████████████████████████████▏| 119420/122310 [3:56:10<05:01,  9.57it/s][A
 98%|████████████████████████████████▏| 119431/122310 [3:56:10<04:19, 11.11it/s][A
 98%|████████████████████████████████▏| 119441/122310 [3:56:11<04:04, 11.74it/s][A
 98%|████████████████████████████████▏| 119451/122310 [3:56:12<03:53, 12.24it/s][A
 98%|████████████████████████████████▏| 119456/122310 [3:56:13<04:30, 10.55it/s][A
 98%|████████████████████████████████▏| 119464/122310 [3:56:13<04:29, 10.56it/s][A
 98%|████████████████████████████████▏| 119476/122310 [3:56:14<03:52, 12.18it/s][A
 98%|████████████████████████████████▏| 119481/122310 [3:56:15<04:29, 10.51it/s][A
 98%|████████████████████████████████▏| 119490/122310 [3:56:16<04:17, 10.97it/s][A
 98%|████████████████████████████████▏| 119492/122310 [3:56:16<05:32,  8.48it/s][A
 98%|████████████████████████████████▏| 119507/122310 [3:56:17<03:55, 11.91

step: 14520, loss: 81.55047989623446, epoch: 0



 98%|████████████████████████████████▎| 119557/122310 [3:56:24<05:29,  8.36it/s][A
 98%|████████████████████████████████▎| 119564/122310 [3:56:25<05:18,  8.63it/s][A
 98%|████████████████████████████████▎| 119576/122310 [3:56:25<04:13, 10.77it/s][A
 98%|████████████████████████████████▎| 119584/122310 [3:56:26<04:12, 10.78it/s][A
 98%|████████████████████████████████▎| 119592/122310 [3:56:27<04:12, 10.77it/s][A
 98%|████████████████████████████████▎| 119598/122310 [3:56:28<04:31, 10.00it/s][A
 98%|████████████████████████████████▎| 119605/122310 [3:56:28<04:34,  9.84it/s][A
 98%|████████████████████████████████▎| 119616/122310 [3:56:29<03:57, 11.36it/s][A
 98%|████████████████████████████████▎| 119621/122310 [3:56:30<04:30,  9.93it/s][A
 98%|████████████████████████████████▎| 119629/122310 [3:56:31<04:24, 10.15it/s][A
 98%|████████████████████████████████▎| 119631/122310 [3:56:31<05:35,  7.98it/s][A
 98%|████████████████████████████████▎| 119642/122310 [3:56:32<04:26, 10.03

step: 14540, loss: 97.05803080866168, epoch: 0



 98%|████████████████████████████████▎| 119718/122310 [3:56:39<03:41, 11.71it/s][A
 98%|████████████████████████████████▎| 119723/122310 [3:56:39<04:10, 10.34it/s][A
 98%|████████████████████████████████▎| 119726/122310 [3:56:40<05:03,  8.52it/s][A
 98%|████████████████████████████████▎| 119732/122310 [3:56:41<05:02,  8.52it/s][A
 98%|████████████████████████████████▎| 119737/122310 [3:56:41<05:18,  8.08it/s][A
 98%|████████████████████████████████▎| 119747/122310 [3:56:42<04:19,  9.87it/s][A
 98%|████████████████████████████████▎| 119759/122310 [3:56:43<03:32, 11.98it/s][A
 98%|████████████████████████████████▎| 119766/122310 [3:56:44<03:44, 11.34it/s][A
 98%|████████████████████████████████▎| 119778/122310 [3:56:44<03:14, 12.99it/s][A
 98%|████████████████████████████████▎| 119787/122310 [3:56:45<03:16, 12.84it/s][A
 98%|████████████████████████████████▎| 119790/122310 [3:56:46<04:04, 10.31it/s][A
 98%|████████████████████████████████▎| 119805/122310 [3:56:46<03:04, 13.55

step: 14560, loss: 92.29887741117774, epoch: 0



 98%|████████████████████████████████▎| 119888/122310 [3:56:53<02:55, 13.81it/s][A
 98%|████████████████████████████████▎| 119897/122310 [3:56:54<02:59, 13.46it/s][A
 98%|████████████████████████████████▎| 119911/122310 [3:56:54<02:36, 15.33it/s][A
 98%|████████████████████████████████▎| 119916/122310 [3:56:55<03:06, 12.84it/s][A
 98%|████████████████████████████████▎| 119925/122310 [3:56:56<03:06, 12.78it/s][A
 98%|████████████████████████████████▎| 119936/122310 [3:56:56<02:55, 13.54it/s][A
 98%|████████████████████████████████▎| 119941/122310 [3:56:57<03:24, 11.61it/s][A
 98%|████████████████████████████████▎| 119948/122310 [3:56:58<03:33, 11.09it/s][A
 98%|████████████████████████████████▎| 119954/122310 [3:56:59<03:49, 10.24it/s][A
 98%|████████████████████████████████▎| 119956/122310 [3:56:59<04:51,  8.08it/s][A
 98%|████████████████████████████████▎| 119973/122310 [3:57:00<03:03, 12.73it/s][A
 98%|████████████████████████████████▎| 119977/122310 [3:57:01<03:39, 10.63

step: 14580, loss: 88.19567696956736, epoch: 0



 98%|████████████████████████████████▍| 120041/122310 [3:57:07<03:05, 12.23it/s][A
 98%|████████████████████████████████▍| 120055/122310 [3:57:08<02:36, 14.45it/s][A
 98%|████████████████████████████████▍| 120063/122310 [3:57:09<02:46, 13.47it/s][A
 98%|████████████████████████████████▍| 120073/122310 [3:57:09<02:43, 13.68it/s][A
 98%|████████████████████████████████▍| 120088/122310 [3:57:10<02:20, 15.84it/s][A
 98%|████████████████████████████████▍| 120096/122310 [3:57:11<02:32, 14.48it/s][A
 98%|████████████████████████████████▍| 120103/122310 [3:57:11<02:48, 13.11it/s][A
 98%|████████████████████████████████▍| 120113/122310 [3:57:12<02:44, 13.39it/s][A
 98%|████████████████████████████████▍| 120119/122310 [3:57:13<03:04, 11.89it/s][A
 98%|████████████████████████████████▍| 120125/122310 [3:57:13<03:21, 10.87it/s][A
 98%|████████████████████████████████▍| 120131/122310 [3:57:14<03:34, 10.15it/s][A
 98%|████████████████████████████████▍| 120146/122310 [3:57:15<02:41, 13.43

step: 14600, loss: 85.17094694606436, epoch: 0



 98%|████████████████████████████████▍| 120201/122310 [3:57:21<04:13,  8.32it/s][A
 98%|████████████████████████████████▍| 120210/122310 [3:57:22<03:40,  9.54it/s][A
 98%|████████████████████████████████▍| 120224/122310 [3:57:23<02:47, 12.43it/s][A
 98%|████████████████████████████████▍| 120236/122310 [3:57:23<02:31, 13.72it/s][A
 98%|████████████████████████████████▍| 120242/122310 [3:57:24<02:49, 12.17it/s][A
 98%|████████████████████████████████▍| 120248/122310 [3:57:25<03:07, 11.00it/s][A
 98%|████████████████████████████████▍| 120251/122310 [3:57:26<03:49,  8.98it/s][A
 98%|████████████████████████████████▍| 120263/122310 [3:57:26<03:00, 11.32it/s][A
 98%|████████████████████████████████▍| 120271/122310 [3:57:27<03:00, 11.28it/s][A
 98%|████████████████████████████████▍| 120277/122310 [3:57:28<04:12,  8.05it/s][A
 98%|████████████████████████████████▍| 120289/122310 [3:57:29<03:17, 10.22it/s][A
 98%|████████████████████████████████▍| 120296/122310 [3:57:30<03:18, 10.12

step: 14620, loss: 87.07060062972279, epoch: 0



 98%|████████████████████████████████▍| 120362/122310 [3:57:36<02:55, 11.08it/s][A
 98%|████████████████████████████████▍| 120372/122310 [3:57:36<02:41, 11.96it/s][A
 98%|████████████████████████████████▍| 120378/122310 [3:57:37<02:57, 10.87it/s][A
 98%|████████████████████████████████▍| 120388/122310 [3:57:38<02:42, 11.81it/s][A
 98%|████████████████████████████████▍| 120401/122310 [3:57:38<02:19, 13.68it/s][A
 98%|████████████████████████████████▍| 120407/122310 [3:57:39<02:36, 12.16it/s][A
 98%|████████████████████████████████▍| 120418/122310 [3:57:40<02:24, 13.13it/s][A
 98%|████████████████████████████████▍| 120437/122310 [3:57:41<01:48, 17.25it/s][A
 98%|████████████████████████████████▍| 120443/122310 [3:57:41<02:08, 14.57it/s][A
 98%|████████████████████████████████▍| 120456/122310 [3:57:42<01:58, 15.69it/s][A
 98%|████████████████████████████████▌| 120460/122310 [3:57:43<02:26, 12.64it/s][A
 98%|████████████████████████████████▌| 120466/122310 [3:57:43<02:42, 11.36

step: 14640, loss: 86.83379063018478, epoch: 0



 99%|████████████████████████████████▌| 120553/122310 [3:57:50<02:22, 12.29it/s][A
 99%|████████████████████████████████▌| 120558/122310 [3:57:51<02:45, 10.58it/s][A
 99%|████████████████████████████████▌| 120570/122310 [3:57:52<02:23, 12.16it/s][A
 99%|████████████████████████████████▌| 120581/122310 [3:57:52<02:14, 12.82it/s][A
 99%|████████████████████████████████▌| 120583/122310 [3:57:53<02:56,  9.79it/s][A
 99%|████████████████████████████████▌| 120586/122310 [3:57:54<03:34,  8.03it/s][A
 99%|████████████████████████████████▌| 120600/122310 [3:57:55<02:33, 11.15it/s][A
 99%|████████████████████████████████▌| 120614/122310 [3:57:55<02:07, 13.27it/s][A
 99%|████████████████████████████████▌| 120624/122310 [3:57:56<02:07, 13.25it/s][A
 99%|████████████████████████████████▌| 120635/122310 [3:57:57<02:03, 13.60it/s][A
 99%|████████████████████████████████▌| 120641/122310 [3:57:58<02:20, 11.87it/s][A
 99%|████████████████████████████████▌| 120649/122310 [3:57:58<02:24, 11.49

step: 14660, loss: 80.57532140200564, epoch: 0



 99%|████████████████████████████████▌| 120757/122310 [3:58:05<01:42, 15.12it/s][A
 99%|████████████████████████████████▌| 120770/122310 [3:58:06<01:37, 15.76it/s][A
 99%|████████████████████████████████▌| 120779/122310 [3:58:07<01:44, 14.59it/s][A
 99%|████████████████████████████████▌| 120785/122310 [3:58:08<02:37,  9.66it/s][A
 99%|████████████████████████████████▌| 120795/122310 [3:58:09<02:23, 10.55it/s][A
 99%|████████████████████████████████▌| 120805/122310 [3:58:10<02:13, 11.23it/s][A
 99%|████████████████████████████████▌| 120815/122310 [3:58:11<02:07, 11.77it/s][A
 99%|████████████████████████████████▌| 120821/122310 [3:58:11<02:19, 10.68it/s][A
 99%|████████████████████████████████▌| 120833/122310 [3:58:12<02:01, 12.14it/s][A
 99%|████████████████████████████████▌| 120843/122310 [3:58:13<01:57, 12.45it/s][A
 99%|████████████████████████████████▌| 120852/122310 [3:58:14<01:58, 12.27it/s][A
 99%|████████████████████████████████▌| 120863/122310 [3:58:14<01:52, 12.91

step: 14680, loss: 75.52731805933105, epoch: 0



 99%|████████████████████████████████▋| 120927/122310 [3:58:20<02:36,  8.81it/s][A
 99%|████████████████████████████████▋| 120930/122310 [3:58:21<03:06,  7.40it/s][A
 99%|████████████████████████████████▋| 120949/122310 [3:58:22<01:47, 12.68it/s][A
 99%|████████████████████████████████▋| 120957/122310 [3:58:23<01:51, 12.11it/s][A
 99%|████████████████████████████████▋| 120972/122310 [3:58:23<01:32, 14.40it/s][A
 99%|████████████████████████████████▋| 120984/122310 [3:58:24<01:29, 14.80it/s][A
 99%|████████████████████████████████▋| 120991/122310 [3:58:25<01:40, 13.10it/s][A
 99%|████████████████████████████████▋| 121006/122310 [3:58:26<01:26, 15.11it/s][A
 99%|████████████████████████████████▋| 121022/122310 [3:58:26<01:16, 16.93it/s][A
 99%|████████████████████████████████▋| 121034/122310 [3:58:27<01:16, 16.60it/s][A
 99%|████████████████████████████████▋| 121047/122310 [3:58:28<01:15, 16.79it/s][A
 99%|████████████████████████████████▋| 121052/122310 [3:58:29<01:31, 13.68

step: 14700, loss: 80.19324143924757, epoch: 0



 99%|████████████████████████████████▋| 121116/122310 [3:58:36<01:32, 12.87it/s][A
 99%|████████████████████████████████▋| 121121/122310 [3:58:36<01:47, 11.02it/s][A
 99%|████████████████████████████████▋| 121123/122310 [3:58:37<02:19,  8.48it/s][A
 99%|████████████████████████████████▋| 121127/122310 [3:58:38<02:37,  7.51it/s][A
 99%|████████████████████████████████▋| 121131/122310 [3:58:39<02:51,  6.88it/s][A
 99%|████████████████████████████████▋| 121139/122310 [3:58:39<02:26,  7.99it/s][A
 99%|████████████████████████████████▋| 121140/122310 [3:58:40<03:16,  5.95it/s][A
 99%|████████████████████████████████▋| 121146/122310 [3:58:41<02:57,  6.56it/s][A
 99%|████████████████████████████████▋| 121153/122310 [3:58:42<02:37,  7.33it/s][A
 99%|████████████████████████████████▋| 121163/122310 [3:58:42<02:06,  9.08it/s][A
 99%|████████████████████████████████▋| 121165/122310 [3:58:43<02:39,  7.16it/s][A
 99%|████████████████████████████████▋| 121175/122310 [3:58:44<02:06,  8.97

step: 14720, loss: 91.52685060269295, epoch: 0



 99%|████████████████████████████████▋| 121239/122310 [3:58:51<01:58,  9.01it/s][A
 99%|████████████████████████████████▋| 121248/122310 [3:58:51<01:47,  9.88it/s][A
 99%|████████████████████████████████▋| 121256/122310 [3:58:52<01:44, 10.10it/s][A
 99%|████████████████████████████████▋| 121261/122310 [3:58:53<01:56,  9.02it/s][A
 99%|████████████████████████████████▋| 121276/122310 [3:58:54<01:24, 12.25it/s][A
 99%|████████████████████████████████▋| 121289/122310 [3:58:55<01:14, 13.75it/s][A
 99%|████████████████████████████████▋| 121308/122310 [3:58:55<00:58, 17.16it/s][A
 99%|████████████████████████████████▋| 121311/122310 [3:58:56<01:15, 13.21it/s][A
 99%|████████████████████████████████▋| 121322/122310 [3:58:57<01:12, 13.57it/s][A
 99%|████████████████████████████████▋| 121325/122310 [3:58:58<02:00,  8.20it/s][A
 99%|████████████████████████████████▋| 121327/122310 [3:58:59<02:23,  6.86it/s][A
 99%|████████████████████████████████▋| 121342/122310 [3:59:00<01:34, 10.26

step: 14740, loss: 76.97879011904362, epoch: 0



 99%|████████████████████████████████▊| 121421/122310 [3:59:06<01:07, 13.12it/s][A
 99%|████████████████████████████████▊| 121425/122310 [3:59:07<01:21, 10.82it/s][A
 99%|████████████████████████████████▊| 121437/122310 [3:59:07<01:10, 12.30it/s][A
 99%|████████████████████████████████▊| 121443/122310 [3:59:08<01:18, 10.99it/s][A
 99%|████████████████████████████████▊| 121449/122310 [3:59:09<01:25, 10.07it/s][A
 99%|████████████████████████████████▊| 121455/122310 [3:59:10<01:30,  9.43it/s][A
 99%|████████████████████████████████▊| 121466/122310 [3:59:10<01:16, 10.97it/s][A
 99%|████████████████████████████████▊| 121481/122310 [3:59:11<01:00, 13.63it/s][A
 99%|████████████████████████████████▊| 121494/122310 [3:59:12<00:55, 14.69it/s][A
 99%|████████████████████████████████▊| 121507/122310 [3:59:13<00:51, 15.45it/s][A
 99%|████████████████████████████████▊| 121520/122310 [3:59:13<00:49, 15.98it/s][A
 99%|████████████████████████████████▊| 121527/122310 [3:59:14<00:56, 13.91

step: 14760, loss: 96.8394624472403, epoch: 0



 99%|████████████████████████████████▊| 121607/122310 [3:59:21<01:05, 10.79it/s][A
 99%|████████████████████████████████▊| 121609/122310 [3:59:22<01:24,  8.34it/s][A
 99%|████████████████████████████████▊| 121616/122310 [3:59:23<01:20,  8.65it/s][A
 99%|████████████████████████████████▊| 121620/122310 [3:59:23<01:30,  7.62it/s][A
 99%|████████████████████████████████▊| 121627/122310 [3:59:24<01:23,  8.14it/s][A
 99%|████████████████████████████████▊| 121636/122310 [3:59:25<01:13,  9.23it/s][A
 99%|████████████████████████████████▊| 121640/122310 [3:59:26<01:22,  8.09it/s][A
 99%|████████████████████████████████▊| 121648/122310 [3:59:26<01:15,  8.81it/s][A
 99%|████████████████████████████████▊| 121660/122310 [3:59:27<00:59, 10.96it/s][A
 99%|████████████████████████████████▊| 121663/122310 [3:59:28<01:12,  8.87it/s][A
 99%|████████████████████████████████▊| 121674/122310 [3:59:29<01:00, 10.55it/s][A
 99%|████████████████████████████████▊| 121676/122310 [3:59:29<01:17,  8.21

step: 14780, loss: 91.1995517847105, epoch: 0



100%|████████████████████████████████▊| 121763/122310 [3:59:36<00:50, 10.77it/s][A
100%|████████████████████████████████▊| 121772/122310 [3:59:37<00:48, 11.11it/s][A
100%|████████████████████████████████▊| 121776/122310 [3:59:38<00:56,  9.38it/s][A
100%|████████████████████████████████▊| 121782/122310 [3:59:38<00:59,  8.94it/s][A
100%|████████████████████████████████▊| 121790/122310 [3:59:39<00:55,  9.44it/s][A
100%|████████████████████████████████▊| 121793/122310 [3:59:40<01:06,  7.81it/s][A
100%|████████████████████████████████▊| 121803/122310 [3:59:41<00:53,  9.44it/s][A
100%|████████████████████████████████▊| 121807/122310 [3:59:41<01:01,  8.14it/s][A
100%|████████████████████████████████▊| 121821/122310 [3:59:42<00:43, 11.21it/s][A
100%|████████████████████████████████▊| 121832/122310 [3:59:43<00:39, 12.18it/s][A
100%|████████████████████████████████▊| 121841/122310 [3:59:44<00:38, 12.08it/s][A
100%|████████████████████████████████▊| 121844/122310 [3:59:44<00:48,  9.67

step: 14800, loss: 89.66077208014464, epoch: 0
sim1 and sim2 are 0.6334257084832099, 0.2367043869433527
cosine of pred and queen: 0.15255270345472738
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: united
Actual: beijing:china::tokyo:japan, pred: size
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: australian
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: collective
Actual: china:asia::greece:europe, pred: spain
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tory
Actua

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: made
Actual: islamabad:pakistan::oslo:norway, pred: friday
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: david
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: woven
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: pradesh
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



100%|████████████████████████████████▉| 121910/122310 [4:01:10<25:47,  3.87s/it][A

Actual: india:rupee::denmark:krone, pred: hampstead
Accuracy is 0.05325443786982249



100%|████████████████████████████████▉| 121924/122310 [4:01:11<12:59,  2.02s/it][A
100%|████████████████████████████████▉| 121930/122310 [4:01:12<10:02,  1.58s/it][A
100%|████████████████████████████████▉| 121937/122310 [4:01:12<07:18,  1.18s/it][A
100%|████████████████████████████████▉| 121942/122310 [4:01:13<05:49,  1.05it/s][A
100%|████████████████████████████████▉| 121953/122310 [4:01:14<03:30,  1.69it/s][A
100%|████████████████████████████████▉| 121960/122310 [4:01:15<02:40,  2.18it/s][A
100%|████████████████████████████████▉| 121966/122310 [4:01:15<02:09,  2.67it/s][A
100%|████████████████████████████████▉| 121977/122310 [4:01:16<01:24,  3.94it/s][A
100%|████████████████████████████████▉| 121987/122310 [4:01:17<01:02,  5.18it/s][A
100%|████████████████████████████████▉| 121995/122310 [4:01:18<00:51,  6.06it/s][A
100%|████████████████████████████████▉| 122005/122310 [4:01:18<00:41,  7.40it/s][A
100%|████████████████████████████████▉| 122007/122310 [4:01:19<00:47,  6.37

step: 14820, loss: 77.86952296278308, epoch: 0



100%|████████████████████████████████▉| 122072/122310 [4:01:26<00:19, 11.92it/s][A
100%|████████████████████████████████▉| 122079/122310 [4:01:27<00:20, 11.21it/s][A
100%|████████████████████████████████▉| 122084/122310 [4:01:27<00:22,  9.93it/s][A
100%|████████████████████████████████▉| 122095/122310 [4:01:28<00:18, 11.33it/s][A
100%|████████████████████████████████▉| 122103/122310 [4:01:29<00:18, 11.15it/s][A
100%|████████████████████████████████▉| 122110/122310 [4:01:30<00:18, 10.66it/s][A
100%|████████████████████████████████▉| 122119/122310 [4:01:30<00:17, 11.11it/s][A
100%|████████████████████████████████▉| 122134/122310 [4:01:31<00:12, 13.81it/s][A
100%|████████████████████████████████▉| 122138/122310 [4:01:32<00:15, 11.28it/s][A
100%|████████████████████████████████▉| 122145/122310 [4:01:33<00:15, 10.69it/s][A
100%|████████████████████████████████▉| 122148/122310 [4:01:33<00:18,  8.73it/s][A
100%|████████████████████████████████▉| 122154/122310 [4:01:34<00:18,  8.51

step: 14840, loss: 83.75788524189113, epoch: 0



100%|████████████████████████████████▉| 122239/122310 [4:01:41<00:06, 10.95it/s][A
100%|████████████████████████████████▉| 122244/122310 [4:01:42<00:06,  9.69it/s][A
100%|████████████████████████████████▉| 122253/122310 [4:01:42<00:05, 10.40it/s][A
100%|████████████████████████████████▉| 122264/122310 [4:01:43<00:03, 11.67it/s][A
100%|████████████████████████████████▉| 122272/122310 [4:01:44<00:03, 11.41it/s][A
100%|████████████████████████████████▉| 122281/122310 [4:01:45<00:02, 11.60it/s][A
100%|████████████████████████████████▉| 122289/122310 [4:01:45<00:01, 11.30it/s][A
100%|████████████████████████████████▉| 122294/122310 [4:01:46<00:01,  9.88it/s][A
100%|████████████████████████████████▉| 122298/122310 [4:01:47<00:01,  8.52it/s][A
100%|█████████████████████████████████| 122310/122310 [4:01:48<00:00,  8.43it/s][A
  5%|█▊                                  | 1/20 [4:01:48<76:34:12, 14508.05s/it]
  0%|                                                | 0/122310 [00:00<?, ?it/

step: 14860, loss: 94.47459549442105, epoch: 1



  0%|                                     | 79/122310 [00:08<3:40:50,  9.22it/s][A
  0%|                                     | 88/122310 [00:09<3:22:27, 10.06it/s][A
  0%|                                     | 97/122310 [00:09<3:10:53, 10.67it/s][A
  0%|                                    | 108/122310 [00:10<2:51:46, 11.86it/s][A
  0%|                                    | 111/122310 [00:11<3:34:30,  9.49it/s][A
  0%|                                    | 115/122310 [00:12<4:06:35,  8.26it/s][A
  0%|                                    | 130/122310 [00:12<2:52:42, 11.79it/s][A
  0%|                                    | 142/122310 [00:13<2:35:45, 13.07it/s][A
  0%|                                    | 156/122310 [00:14<2:18:09, 14.74it/s][A
  0%|                                    | 168/122310 [00:15<2:14:31, 15.13it/s][A
  0%|                                    | 179/122310 [00:15<2:15:57, 14.97it/s][A
  0%|                                    | 187/122310 [00:16<2:28:53, 13.67

step: 14880, loss: 79.40665174148748, epoch: 1



  0%|                                    | 266/122310 [00:23<2:37:46, 12.89it/s][A
  0%|                                    | 277/122310 [00:24<2:31:13, 13.45it/s][A
  0%|                                    | 283/122310 [00:24<2:52:23, 11.80it/s][A
  0%|                                    | 293/122310 [00:25<2:45:57, 12.25it/s][A
  0%|                                    | 303/122310 [00:26<2:41:55, 12.56it/s][A
  0%|                                    | 311/122310 [00:27<2:49:45, 11.98it/s][A
  0%|                                    | 315/122310 [00:27<3:23:03, 10.01it/s][A
  0%|                                    | 329/122310 [00:28<2:41:10, 12.61it/s][A
  0%|                                    | 334/122310 [00:29<3:08:48, 10.77it/s][A
  0%|                                    | 344/122310 [00:30<2:55:56, 11.55it/s][A
  0%|                                    | 355/122310 [00:30<2:42:54, 12.48it/s][A
  0%|                                    | 362/122310 [00:31<2:56:32, 11.51

step: 14900, loss: 91.28493543066757, epoch: 1



  0%|▏                                   | 425/122310 [00:38<3:44:07,  9.06it/s][A
  0%|▏                                   | 432/122310 [00:39<3:41:56,  9.15it/s][A
  0%|▏                                   | 446/122310 [00:39<2:52:11, 11.80it/s][A
  0%|▏                                   | 458/122310 [00:40<2:36:29, 12.98it/s][A
  0%|▏                                   | 463/122310 [00:41<3:01:39, 11.18it/s][A
  0%|▏                                   | 480/122310 [00:42<2:20:23, 14.46it/s][A
  0%|▏                                   | 482/122310 [00:42<3:04:26, 11.01it/s][A
  0%|▏                                   | 492/122310 [00:43<2:52:55, 11.74it/s][A
  0%|▏                                   | 509/122310 [00:44<2:15:25, 14.99it/s][A
  0%|▏                                   | 520/122310 [00:45<2:16:44, 14.84it/s][A
  0%|▏                                   | 524/122310 [00:45<2:49:11, 12.00it/s][A
  0%|▏                                   | 529/122310 [00:46<3:14:40, 10.43

step: 14920, loss: 86.92956179698442, epoch: 1



  1%|▏                                   | 631/122310 [00:53<2:23:09, 14.17it/s][A
  1%|▏                                   | 645/122310 [00:54<2:11:00, 15.48it/s][A
  1%|▏                                   | 649/122310 [00:54<2:42:55, 12.45it/s][A
  1%|▏                                   | 653/122310 [00:55<3:17:42, 10.26it/s][A
  1%|▏                                   | 655/122310 [00:56<4:12:39,  8.02it/s][A
  1%|▏                                   | 663/122310 [00:57<3:50:06,  8.81it/s][A
  1%|▏                                   | 672/122310 [00:57<3:28:12,  9.74it/s][A
  1%|▏                                   | 677/122310 [00:58<3:49:36,  8.83it/s][A
  1%|▏                                   | 690/122310 [00:59<2:58:12, 11.37it/s][A
  1%|▏                                   | 704/122310 [01:00<2:29:34, 13.55it/s][A
  1%|▏                                   | 714/122310 [01:00<2:30:22, 13.48it/s][A
  1%|▏                                   | 721/122310 [01:01<2:45:42, 12.23

step: 14940, loss: 99.83313633622714, epoch: 1



  1%|▏                                   | 819/122310 [01:08<2:25:18, 13.93it/s][A
  1%|▏                                   | 823/122310 [01:09<2:57:17, 11.42it/s][A
  1%|▏                                   | 827/122310 [01:09<3:30:49,  9.60it/s][A
  1%|▏                                   | 844/122310 [01:10<2:29:40, 13.53it/s][A
  1%|▎                                   | 857/122310 [01:11<2:18:35, 14.61it/s][A
  1%|▎                                   | 863/122310 [01:12<2:40:00, 12.65it/s][A
  1%|▎                                   | 868/122310 [01:12<3:06:13, 10.87it/s][A
  1%|▎                                   | 876/122310 [01:13<3:07:42, 10.78it/s][A
  1%|▎                                   | 883/122310 [01:14<3:15:17, 10.36it/s][A
  1%|▎                                   | 886/122310 [01:15<3:59:33,  8.45it/s][A
  1%|▎                                   | 893/122310 [01:15<3:52:01,  8.72it/s][A
  1%|▎                                   | 899/122310 [01:16<3:58:36,  8.48

step: 14960, loss: 78.76011610697293, epoch: 1



  1%|▎                                   | 991/122310 [01:23<2:12:57, 15.21it/s][A
  1%|▎                                   | 998/122310 [01:24<2:30:45, 13.41it/s][A
  1%|▎                                  | 1005/122310 [01:25<3:37:56,  9.28it/s][A
  1%|▎                                  | 1012/122310 [01:26<3:37:54,  9.28it/s][A
  1%|▎                                  | 1014/122310 [01:27<4:28:54,  7.52it/s][A
  1%|▎                                  | 1022/122310 [01:27<4:02:36,  8.33it/s][A
  1%|▎                                  | 1024/122310 [01:28<4:59:45,  6.74it/s][A
  1%|▎                                  | 1027/122310 [01:29<5:40:41,  5.93it/s][A
  1%|▎                                  | 1042/122310 [01:30<3:24:01,  9.91it/s][A
  1%|▎                                  | 1050/122310 [01:31<3:20:02, 10.10it/s][A
  1%|▎                                  | 1056/122310 [01:31<3:34:03,  9.44it/s][A
  1%|▎                                  | 1062/122310 [01:32<3:44:57,  8.98

step: 14980, loss: 175.26424940098448, epoch: 1



  1%|▎                                  | 1122/122310 [01:38<4:12:04,  8.01it/s][A
  1%|▎                                  | 1126/122310 [01:39<4:37:04,  7.29it/s][A
  1%|▎                                  | 1132/122310 [01:39<4:22:55,  7.68it/s][A
  1%|▎                                  | 1139/122310 [01:40<4:02:49,  8.32it/s][A
  1%|▎                                  | 1151/122310 [01:41<3:05:11, 10.90it/s][A
  1%|▎                                  | 1163/122310 [01:41<2:38:48, 12.71it/s][A
  1%|▎                                  | 1173/122310 [01:42<2:33:45, 13.13it/s][A
  1%|▎                                  | 1181/122310 [01:43<2:40:39, 12.57it/s][A
  1%|▎                                  | 1187/122310 [01:44<2:58:00, 11.34it/s][A
  1%|▎                                  | 1195/122310 [01:44<2:58:23, 11.32it/s][A
  1%|▎                                  | 1203/122310 [01:45<2:58:25, 11.31it/s][A
  1%|▎                                  | 1215/122310 [01:46<2:35:53, 12.95

step: 15000, loss: 74.4413442768126, epoch: 1
saving weights



  1%|▎                                  | 1283/122310 [01:52<3:36:07,  9.33it/s][A
  1%|▎                                  | 1294/122310 [01:53<3:06:40, 10.80it/s][A
  1%|▎                                  | 1305/122310 [01:54<2:48:24, 11.98it/s][A
  1%|▍                                  | 1311/122310 [01:54<3:02:25, 11.05it/s][A
  1%|▍                                  | 1320/122310 [01:55<2:55:13, 11.51it/s][A
  1%|▍                                  | 1326/122310 [01:56<3:10:37, 10.58it/s][A
  1%|▍                                  | 1329/122310 [01:57<3:50:09,  8.76it/s][A
  1%|▍                                  | 1340/122310 [01:57<3:07:41, 10.74it/s][A
  1%|▍                                  | 1344/122310 [01:58<3:38:30,  9.23it/s][A
  1%|▍                                  | 1352/122310 [01:59<3:26:35,  9.76it/s][A
  1%|▍                                  | 1361/122310 [01:59<3:09:20, 10.65it/s][A
  1%|▍                                  | 1369/122310 [02:00<3:07:28, 10.75

step: 15020, loss: 76.47388689773295, epoch: 1



  1%|▍                                  | 1447/122310 [02:07<3:35:41,  9.34it/s][A
  1%|▍                                  | 1455/122310 [02:08<3:23:04,  9.92it/s][A
  1%|▍                                  | 1460/122310 [02:09<3:42:57,  9.03it/s][A
  1%|▍                                  | 1466/122310 [02:09<3:47:20,  8.86it/s][A
  1%|▍                                  | 1480/122310 [02:10<2:46:21, 12.11it/s][A
  1%|▍                                  | 1486/122310 [02:11<3:03:10, 10.99it/s][A
  1%|▍                                  | 1492/122310 [02:12<3:16:46, 10.23it/s][A
  1%|▍                                  | 1498/122310 [02:12<3:27:29,  9.70it/s][A
  1%|▍                                  | 1505/122310 [02:13<3:26:40,  9.74it/s][A
  1%|▍                                  | 1518/122310 [02:14<2:45:14, 12.18it/s][A
  1%|▍                                  | 1525/122310 [02:14<2:54:50, 11.51it/s][A
  1%|▍                                  | 1536/122310 [02:15<2:38:35, 12.69

step: 15040, loss: 83.26806705307783, epoch: 1



  1%|▍                                  | 1623/122310 [02:22<2:38:44, 12.67it/s][A
  1%|▍                                  | 1630/122310 [02:22<2:51:10, 11.75it/s][A
  1%|▍                                  | 1634/122310 [02:23<3:24:37,  9.83it/s][A
  1%|▍                                  | 1644/122310 [02:24<3:03:26, 10.96it/s][A
  1%|▍                                  | 1646/122310 [02:25<3:57:11,  8.48it/s][A
  1%|▍                                  | 1651/122310 [02:25<4:11:15,  8.00it/s][A
  1%|▍                                  | 1661/122310 [02:26<3:27:58,  9.67it/s][A
  1%|▍                                  | 1665/122310 [02:27<3:58:25,  8.43it/s][A
  1%|▍                                  | 1674/122310 [02:28<3:30:19,  9.56it/s][A
  1%|▍                                  | 1681/122310 [02:28<3:29:52,  9.58it/s][A
  1%|▍                                  | 1690/122310 [02:29<3:13:43, 10.38it/s][A
  1%|▍                                  | 1706/122310 [02:30<2:25:36, 13.80

step: 15060, loss: 82.45183595592472, epoch: 1



  1%|▌                                  | 1775/122310 [02:36<3:19:52, 10.05it/s][A
  1%|▌                                  | 1785/122310 [02:37<3:00:13, 11.15it/s][A
  1%|▌                                  | 1789/122310 [02:38<3:33:10,  9.42it/s][A
  1%|▌                                  | 1796/122310 [02:39<3:32:07,  9.47it/s][A
  1%|▌                                  | 1801/122310 [02:39<3:52:25,  8.64it/s][A
  1%|▌                                  | 1806/122310 [02:40<4:08:44,  8.07it/s][A
  1%|▌                                  | 1815/122310 [02:41<3:35:10,  9.33it/s][A
  1%|▌                                  | 1823/122310 [02:42<3:24:43,  9.81it/s][A
  2%|▌                                  | 1850/122310 [02:42<1:52:29, 17.85it/s][A
  2%|▌                                  | 1858/122310 [02:43<2:07:26, 15.75it/s][A
  2%|▌                                  | 1866/122310 [02:44<2:20:48, 14.26it/s][A
  2%|▌                                  | 1872/122310 [02:44<2:41:09, 12.46

step: 15080, loss: 79.94954868929199, epoch: 1



  2%|▌                                  | 1948/122310 [02:51<2:44:08, 12.22it/s][A
  2%|▌                                  | 1955/122310 [02:52<2:55:14, 11.45it/s][A
  2%|▌                                  | 1958/122310 [02:53<3:37:31,  9.22it/s][A
  2%|▌                                  | 1972/122310 [02:53<2:44:47, 12.17it/s][A
  2%|▌                                  | 1974/122310 [02:54<3:34:39,  9.34it/s][A
  2%|▌                                  | 1981/122310 [02:55<3:34:29,  9.35it/s][A
  2%|▌                                  | 1990/122310 [02:56<3:16:29, 10.21it/s][A
  2%|▌                                  | 1998/122310 [02:56<3:12:13, 10.43it/s][A
  2%|▌                                  | 2003/122310 [02:57<3:34:36,  9.34it/s][A
  2%|▌                                  | 2013/122310 [02:58<3:08:40, 10.63it/s][A
  2%|▌                                  | 2018/122310 [02:58<3:32:42,  9.43it/s][A
  2%|▌                                  | 2019/122310 [02:59<4:45:11,  7.03

step: 15100, loss: 89.83899737060732, epoch: 1



  2%|▌                                  | 2084/122310 [03:06<3:28:22,  9.62it/s][A
  2%|▌                                  | 2095/122310 [03:07<2:58:52, 11.20it/s][A
  2%|▌                                  | 2116/122310 [03:07<2:02:33, 16.34it/s][A
  2%|▌                                  | 2130/122310 [03:08<1:57:08, 17.10it/s][A
  2%|▌                                  | 2134/122310 [03:09<2:26:15, 13.69it/s][A
  2%|▌                                  | 2142/122310 [03:09<2:35:48, 12.85it/s][A
  2%|▌                                  | 2150/122310 [03:10<2:43:25, 12.25it/s][A
  2%|▌                                  | 2163/122310 [03:11<2:24:17, 13.88it/s][A
  2%|▌                                  | 2167/122310 [03:12<2:56:39, 11.33it/s][A
  2%|▌                                  | 2177/122310 [03:12<2:46:35, 12.02it/s][A
  2%|▋                                  | 2191/122310 [03:13<2:21:54, 14.11it/s][A
  2%|▋                                  | 2202/122310 [03:14<2:20:05, 14.29

step: 15120, loss: 75.59647376864264, epoch: 1



  2%|▋                                  | 2273/122310 [03:21<2:26:38, 13.64it/s][A
  2%|▋                                  | 2278/122310 [03:21<2:51:38, 11.66it/s][A
  2%|▋                                  | 2286/122310 [03:22<2:54:39, 11.45it/s][A
  2%|▋                                  | 2296/122310 [03:23<2:46:00, 12.05it/s][A
  2%|▋                                  | 2300/122310 [03:23<3:17:39, 10.12it/s][A
  2%|▋                                  | 2305/122310 [03:24<3:39:01,  9.13it/s][A
  2%|▋                                  | 2315/122310 [03:25<3:10:57, 10.47it/s][A
  2%|▋                                  | 2323/122310 [03:26<3:08:15, 10.62it/s][A
  2%|▋                                  | 2338/122310 [03:26<2:27:48, 13.53it/s][A
  2%|▋                                  | 2342/122310 [03:27<2:59:58, 11.11it/s][A
  2%|▋                                  | 2349/122310 [03:28<3:08:11, 10.62it/s][A
  2%|▋                                  | 2353/122310 [03:29<3:40:14,  9.08

step: 15140, loss: 75.40282631362388, epoch: 1



  2%|▋                                  | 2446/122310 [03:35<2:15:18, 14.76it/s][A
  2%|▋                                  | 2452/122310 [03:36<2:35:54, 12.81it/s][A
  2%|▋                                  | 2459/122310 [03:37<2:49:03, 11.82it/s][A
  2%|▋                                  | 2464/122310 [03:37<3:13:17, 10.33it/s][A
  2%|▋                                  | 2472/122310 [03:38<3:10:13, 10.50it/s][A
  2%|▋                                  | 2475/122310 [03:39<3:52:43,  8.58it/s][A
  2%|▋                                  | 2483/122310 [03:40<3:34:53,  9.29it/s][A
  2%|▋                                  | 2486/122310 [03:40<4:18:48,  7.72it/s][A
  2%|▋                                  | 2499/122310 [03:41<3:06:22, 10.71it/s][A
  2%|▋                                  | 2510/122310 [03:42<2:47:11, 11.94it/s][A
  2%|▋                                  | 2526/122310 [03:43<2:14:06, 14.89it/s][A
  2%|▋                                  | 2534/122310 [03:43<2:25:29, 13.72

step: 15160, loss: 81.28839623214196, epoch: 1



  2%|▊                                  | 2622/122310 [03:50<2:33:00, 13.04it/s][A
  2%|▊                                  | 2631/122310 [03:51<2:35:19, 12.84it/s][A
  2%|▊                                  | 2644/122310 [03:51<2:19:26, 14.30it/s][A
  2%|▊                                  | 2649/122310 [03:52<2:45:37, 12.04it/s][A
  2%|▊                                  | 2651/122310 [03:53<3:35:37,  9.25it/s][A
  2%|▊                                  | 2656/122310 [03:54<3:54:03,  8.52it/s][A
  2%|▊                                  | 2667/122310 [03:54<3:10:30, 10.47it/s][A
  2%|▊                                  | 2673/122310 [03:55<3:24:00,  9.77it/s][A
  2%|▊                                  | 2679/122310 [03:56<3:34:25,  9.30it/s][A
  2%|▊                                  | 2684/122310 [03:57<3:53:55,  8.52it/s][A
  2%|▊                                  | 2691/122310 [03:57<3:45:17,  8.85it/s][A
  2%|▊                                  | 2696/122310 [03:58<4:03:07,  8.20

step: 15180, loss: 86.39143070348742, epoch: 1



  2%|▊                                  | 2777/122310 [04:05<2:40:05, 12.44it/s][A
  2%|▊                                  | 2785/122310 [04:05<2:46:02, 12.00it/s][A
  2%|▊                                  | 2797/122310 [04:06<2:30:04, 13.27it/s][A
  2%|▊                                  | 2803/122310 [04:07<2:49:26, 11.76it/s][A
  2%|▊                                  | 2809/122310 [04:08<3:05:43, 10.72it/s][A
  2%|▊                                  | 2824/122310 [04:08<2:27:08, 13.53it/s][A
  2%|▊                                  | 2826/122310 [04:09<3:11:58, 10.37it/s][A
  2%|▊                                  | 2833/122310 [04:10<3:16:34, 10.13it/s][A
  2%|▊                                  | 2845/122310 [04:11<2:45:53, 12.00it/s][A
  2%|▊                                  | 2855/122310 [04:11<2:39:25, 12.49it/s][A
  2%|▊                                  | 2871/122310 [04:12<2:10:41, 15.23it/s][A
  2%|▊                                  | 2877/122310 [04:13<2:31:38, 13.13

step: 15200, loss: 88.76021914509478, epoch: 1
sim1 and sim2 are 0.5642389449662573, 0.21761437733192593
cosine of pred and queen: 0.12887263628672052
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: talks
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: emblem
Actual: china:asia::greece:europe, pred: spain
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: made
Actual: islamabad:pakistan::oslo:norway, pred: friday
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: david
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: woven
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



  2%|▊                                | 2935/122310 [05:34<104:35:29,  3.15s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.05917159763313609



  2%|▊                                 | 2943/122310 [05:35<72:31:08,  2.19s/it][A
  2%|▊                                 | 2946/122310 [05:36<63:09:37,  1.90s/it][A
  2%|▊                                 | 2954/122310 [05:36<41:46:44,  1.26s/it][A
  2%|▊                                 | 2963/122310 [05:37<27:33:16,  1.20it/s][A
  2%|▊                                 | 2970/122310 [05:38<20:36:49,  1.61it/s][A
  2%|▊                                 | 2979/122310 [05:39<14:25:38,  2.30it/s][A
  2%|▊                                 | 2984/122310 [05:39<12:22:33,  2.68it/s][A
  2%|▊                                 | 2986/122310 [05:40<12:18:59,  2.69it/s][A
  2%|▊                                  | 2992/122310 [05:41<9:39:04,  3.43it/s][A
  2%|▊                                  | 3001/122310 [05:42<6:49:06,  4.86it/s][A
  2%|▊                                  | 3012/122310 [05:42<4:54:02,  6.76it/s][A
  2%|▊                                  | 3022/122310 [05:43<4:01:27,  8.23

step: 15220, loss: 73.8023916928006, epoch: 1



  3%|▉                                  | 3103/122310 [05:50<3:01:12, 10.96it/s][A
  3%|▉                                  | 3108/122310 [05:50<3:23:27,  9.76it/s][A
  3%|▉                                  | 3121/122310 [05:51<2:43:43, 12.13it/s][A
  3%|▉                                  | 3125/122310 [05:52<3:15:01, 10.19it/s][A
  3%|▉                                  | 3141/122310 [05:52<2:24:31, 13.74it/s][A
  3%|▉                                  | 3152/122310 [05:53<2:21:03, 14.08it/s][A
  3%|▉                                  | 3160/122310 [05:54<2:30:36, 13.19it/s][A
  3%|▉                                  | 3165/122310 [05:55<2:55:38, 11.31it/s][A
  3%|▉                                  | 3170/122310 [05:55<3:19:31,  9.95it/s][A
  3%|▉                                  | 3187/122310 [05:56<2:22:12, 13.96it/s][A
  3%|▉                                  | 3200/122310 [05:57<2:11:57, 15.04it/s][A
  3%|▉                                  | 3211/122310 [05:58<2:11:34, 15.09

step: 15240, loss: 94.39414441861405, epoch: 1



  3%|▉                                  | 3300/122310 [06:04<2:28:09, 13.39it/s][A
  3%|▉                                  | 3307/122310 [06:05<2:42:50, 12.18it/s][A
  3%|▉                                  | 3318/122310 [06:06<2:33:03, 12.96it/s][A
  3%|▉                                  | 3341/122310 [06:06<1:48:29, 18.28it/s][A
  3%|▉                                  | 3350/122310 [06:07<2:00:53, 16.40it/s][A
  3%|▉                                  | 3361/122310 [06:08<2:04:41, 15.90it/s][A
  3%|▉                                  | 3367/122310 [06:09<2:27:07, 13.47it/s][A
  3%|▉                                  | 3371/122310 [06:09<2:58:39, 11.10it/s][A
  3%|▉                                  | 3379/122310 [06:10<3:01:07, 10.94it/s][A
  3%|▉                                  | 3387/122310 [06:11<3:02:02, 10.89it/s][A
  3%|▉                                  | 3394/122310 [06:12<3:10:53, 10.38it/s][A

step: 15260, loss: 97.6799422899205, epoch: 1



  3%|▉                                 | 3400/122310 [06:20<13:32:47,  2.44it/s][A
  3%|▉                                 | 3405/122310 [06:21<11:34:16,  2.85it/s][A
  3%|▉                                  | 3419/122310 [06:21<6:53:41,  4.79it/s][A
  3%|▉                                  | 3425/122310 [06:22<6:17:47,  5.24it/s][A
  3%|▉                                  | 3429/122310 [06:23<6:15:50,  5.27it/s][A
  3%|▉                                  | 3430/122310 [06:24<7:23:09,  4.47it/s][A
  3%|▉                                  | 3433/122310 [06:24<7:32:57,  4.37it/s][A
  3%|▉                                  | 3438/122310 [06:25<6:42:12,  4.93it/s][A
  3%|▉                                  | 3454/122310 [06:26<3:37:06,  9.12it/s][A
  3%|▉                                  | 3457/122310 [06:27<4:15:34,  7.75it/s][A
  3%|▉                                  | 3464/122310 [06:27<4:01:26,  8.20it/s][A
  3%|▉                                  | 3480/122310 [06:28<2:46:12, 11.92

step: 15280, loss: 121.5703623676487, epoch: 1



  3%|█                                  | 3550/122310 [06:34<2:31:57, 13.03it/s][A
  3%|█                                  | 3561/122310 [06:35<2:26:16, 13.53it/s][A
  3%|█                                  | 3566/122310 [06:36<2:53:46, 11.39it/s][A
  3%|█                                  | 3568/122310 [06:36<3:44:28,  8.82it/s][A
  3%|█                                  | 3573/122310 [06:37<4:01:40,  8.19it/s][A
  3%|█                                  | 3579/122310 [06:38<4:04:43,  8.09it/s][A
  3%|█                                  | 3589/122310 [06:39<3:25:59,  9.61it/s][A
  3%|█                                  | 3599/122310 [06:39<3:05:06, 10.69it/s][A
  3%|█                                  | 3608/122310 [06:40<2:58:38, 11.07it/s][A
  3%|█                                  | 3627/122310 [06:41<2:09:15, 15.30it/s][A
  3%|█                                  | 3633/122310 [06:42<2:30:48, 13.12it/s][A
  3%|█                                  | 3647/122310 [06:42<2:13:42, 14.79

step: 15300, loss: 78.08853081093945, epoch: 1



  3%|█                                  | 3724/122310 [06:49<3:16:04, 10.08it/s][A
  3%|█                                  | 3729/122310 [06:50<3:37:36,  9.08it/s][A
  3%|█                                  | 3743/122310 [06:51<2:45:14, 11.96it/s][A

step: 15320, loss: 106.51013116077021, epoch: 1



  3%|█                                  | 3743/122310 [07:06<2:45:14, 11.96it/s][A
  3%|█                                 | 3747/122310 [07:10<27:49:37,  1.18it/s][A
  3%|█                                 | 3765/122310 [07:11<14:23:16,  2.29it/s][A
  3%|█                                 | 3771/122310 [07:12<13:11:39,  2.50it/s][A
  3%|█                                 | 3779/122310 [07:13<10:26:55,  3.15it/s][A
  3%|█                                  | 3788/122310 [07:14<8:06:51,  4.06it/s][A
  3%|█                                 | 3790/122310 [07:16<10:39:31,  3.09it/s][A
  3%|█                                  | 3794/122310 [07:17<9:44:45,  3.38it/s][A
  3%|█                                  | 3801/122310 [07:18<7:40:29,  4.29it/s][A
  3%|█                                  | 3811/122310 [07:18<5:35:13,  5.89it/s][A

step: 15340, loss: 80.8948815487777, epoch: 1



  3%|█                                  | 3823/122310 [07:19<4:08:23,  7.95it/s][A
  3%|█                                  | 3838/122310 [07:20<3:05:05, 10.67it/s][A
  3%|█                                  | 3846/122310 [07:21<3:05:28, 10.64it/s][A
  3%|█                                  | 3850/122310 [07:21<3:32:25,  9.29it/s][A
  3%|█                                  | 3857/122310 [07:22<3:32:35,  9.29it/s][A
  3%|█                                  | 3862/122310 [07:23<3:51:11,  8.54it/s][A
  3%|█                                  | 3865/122310 [07:24<4:33:02,  7.23it/s][A
  3%|█                                  | 3871/122310 [07:24<4:24:40,  7.46it/s][A
  3%|█                                  | 3882/122310 [07:25<3:26:11,  9.57it/s][A
  3%|█                                  | 3896/122310 [07:26<2:41:56, 12.19it/s][A
  3%|█                                  | 3898/122310 [07:27<3:30:22,  9.38it/s][A
  3%|█                                  | 3906/122310 [07:27<3:22:00,  9.77

step: 15360, loss: 92.4128110396037, epoch: 1



  3%|█▏                                 | 4002/122310 [07:34<2:38:38, 12.43it/s][A
  3%|█▏                                 | 4009/122310 [07:35<2:50:04, 11.59it/s][A
  3%|█▏                                 | 4014/122310 [07:36<3:13:51, 10.17it/s][A
  3%|█▏                                 | 4024/122310 [07:36<2:55:17, 11.25it/s][A
  3%|█▏                                 | 4032/122310 [07:37<2:56:42, 11.16it/s][A
  3%|█▏                                 | 4039/122310 [07:38<3:04:53, 10.66it/s][A
  3%|█▏                                 | 4047/122310 [07:38<3:03:48, 10.72it/s][A
  3%|█▏                                 | 4052/122310 [07:39<3:25:49,  9.58it/s][A
  3%|█▏                                 | 4056/122310 [07:40<3:55:30,  8.37it/s][A
  3%|█▏                                 | 4061/122310 [07:41<4:09:18,  7.91it/s][A
  3%|█▏                                 | 4063/122310 [07:41<5:11:46,  6.32it/s][A
  3%|█▏                                 | 4067/122310 [07:42<5:24:18,  6.08

step: 15380, loss: 84.08827287989126, epoch: 1



  3%|█▏                                 | 4135/122310 [07:49<3:57:04,  8.31it/s][A
  3%|█▏                                 | 4152/122310 [07:49<2:34:02, 12.78it/s][A
  3%|█▏                                 | 4161/122310 [07:50<2:36:02, 12.62it/s][A
  3%|█▏                                 | 4168/122310 [07:51<2:48:18, 11.70it/s][A
  3%|█▏                                 | 4175/122310 [07:52<3:51:28,  8.51it/s][A
  3%|█▏                                 | 4187/122310 [07:53<3:07:28, 10.50it/s][A
  3%|█▏                                 | 4192/122310 [07:54<3:26:37,  9.53it/s][A
  3%|█▏                                 | 4198/122310 [07:55<3:34:03,  9.20it/s][A
  3%|█▏                                 | 4211/122310 [07:55<2:49:23, 11.62it/s][A
  3%|█▏                                 | 4217/122310 [07:56<3:03:22, 10.73it/s][A
  3%|█▏                                 | 4224/122310 [07:57<3:08:41, 10.43it/s][A
  3%|█▏                                 | 4236/122310 [07:57<2:39:49, 12.31

step: 15400, loss: 125.02121875345023, epoch: 1



  4%|█▏                                 | 4307/122310 [08:03<2:59:55, 10.93it/s][A
  4%|█▏                                 | 4311/122310 [08:05<4:31:44,  7.24it/s][A
  4%|█▏                                 | 4328/122310 [08:06<3:35:36,  9.12it/s][A
  4%|█▏                                 | 4329/122310 [08:07<5:25:36,  6.04it/s][A
  4%|█▏                                 | 4339/122310 [08:08<4:17:12,  7.64it/s][A
  4%|█▏                                 | 4345/122310 [08:09<4:14:19,  7.73it/s][A
  4%|█▏                                 | 4355/122310 [08:10<3:34:16,  9.17it/s][A
  4%|█▏                                 | 4363/122310 [08:10<3:24:43,  9.60it/s][A
  4%|█▎                                 | 4374/122310 [08:11<2:57:02, 11.10it/s][A
  4%|█▎                                 | 4379/122310 [08:12<3:18:26,  9.90it/s][A
  4%|█▎                                 | 4383/122310 [08:13<3:47:06,  8.65it/s][A
  4%|█▎                                 | 4392/122310 [08:13<3:23:23,  9.66

step: 15420, loss: 96.04280620363512, epoch: 1



  4%|█▎                                 | 4447/122310 [08:18<3:23:08,  9.67it/s][A
  4%|█▎                                 | 4458/122310 [08:19<2:56:24, 11.13it/s][A
  4%|█▎                                 | 4466/122310 [08:19<2:58:27, 11.01it/s][A
  4%|█▎                                 | 4477/122310 [08:20<2:42:33, 12.08it/s][A
  4%|█▎                                 | 4487/122310 [08:21<2:38:00, 12.43it/s][A
  4%|█▎                                 | 4491/122310 [08:22<3:11:26, 10.26it/s][A
  4%|█▎                                 | 4501/122310 [08:22<2:55:23, 11.19it/s][A
  4%|█▎                                 | 4510/122310 [08:23<2:51:42, 11.43it/s][A
  4%|█▎                                 | 4520/122310 [08:24<2:44:12, 11.95it/s][A
  4%|█▎                                 | 4529/122310 [08:25<2:44:14, 11.95it/s][A
  4%|█▎                                 | 4538/122310 [08:25<2:44:54, 11.90it/s][A
  4%|█▎                                 | 4543/122310 [08:26<3:09:35, 10.35

step: 15440, loss: 78.27615988152334, epoch: 1



  4%|█▎                                 | 4641/122310 [08:33<2:06:29, 15.50it/s][A
  4%|█▎                                 | 4655/122310 [08:34<1:59:05, 16.47it/s][A
  4%|█▎                                 | 4668/122310 [08:34<1:57:35, 16.67it/s][A
  4%|█▎                                 | 4678/122310 [08:35<2:05:43, 15.59it/s][A
  4%|█▎                                 | 4686/122310 [08:36<2:20:10, 13.99it/s][A
  4%|█▎                                 | 4689/122310 [08:37<2:57:21, 11.05it/s][A
  4%|█▎                                 | 4696/122310 [08:37<3:06:32, 10.51it/s][A
  4%|█▎                                 | 4703/122310 [08:38<3:12:53, 10.16it/s][A
  4%|█▎                                 | 4709/122310 [08:39<3:27:18,  9.45it/s][A
  4%|█▎                                 | 4716/122310 [08:40<3:28:24,  9.40it/s][A
  4%|█▎                                 | 4723/122310 [08:40<3:28:56,  9.38it/s][A
  4%|█▎                                 | 4734/122310 [08:41<2:58:53, 10.95

step: 15460, loss: 94.85058976088781, epoch: 1



  4%|█▎                                 | 4792/122310 [08:48<3:36:59,  9.03it/s][A
  4%|█▍                                 | 4809/122310 [08:49<2:30:18, 13.03it/s][A
  4%|█▍                                 | 4816/122310 [08:50<2:44:21, 11.91it/s][A
  4%|█▍                                 | 4820/122310 [08:50<3:18:08,  9.88it/s][A
  4%|█▍                                 | 4824/122310 [08:51<3:48:59,  8.55it/s][A
  4%|█▍                                 | 4834/122310 [08:52<3:17:53,  9.89it/s][A
  4%|█▍                                 | 4839/122310 [08:53<3:40:31,  8.88it/s][A
  4%|█▍                                 | 4848/122310 [08:53<3:21:54,  9.70it/s][A
  4%|█▍                                 | 4851/122310 [08:54<4:04:40,  8.00it/s][A
  4%|█▍                                 | 4856/122310 [08:55<4:17:52,  7.59it/s][A
  4%|█▍                                 | 4874/122310 [08:56<2:37:13, 12.45it/s][A
  4%|█▍                                 | 4885/122310 [08:56<2:30:00, 13.05

step: 15480, loss: 113.3963123316886, epoch: 1



  4%|█▍                                 | 4965/122310 [09:03<3:08:51, 10.36it/s][A
  4%|█▍                                 | 4973/122310 [09:04<3:07:07, 10.45it/s][A
  4%|█▍                                 | 4991/122310 [09:05<2:15:06, 14.47it/s][A
  4%|█▍                                 | 5002/122310 [09:05<2:14:50, 14.50it/s][A
  4%|█▍                                 | 5006/122310 [09:06<2:46:46, 11.72it/s][A
  4%|█▍                                 | 5015/122310 [09:07<2:45:36, 11.80it/s][A
  4%|█▍                                 | 5035/122310 [09:08<2:00:37, 16.20it/s][A
  4%|█▍                                 | 5043/122310 [09:08<2:14:18, 14.55it/s][A
  4%|█▍                                 | 5054/122310 [09:09<2:14:15, 14.56it/s][A
  4%|█▍                                 | 5061/122310 [09:10<2:30:36, 12.98it/s][A
  4%|█▍                                 | 5071/122310 [09:11<2:29:41, 13.05it/s][A
  4%|█▍                                 | 5089/122310 [09:12<2:00:04, 16.27

step: 15500, loss: 73.87170027554988, epoch: 1



  4%|█▍                                 | 5162/122310 [09:18<3:08:54, 10.34it/s][A
  4%|█▍                                 | 5169/122310 [09:19<3:11:34, 10.19it/s][A
  4%|█▍                                 | 5180/122310 [09:20<2:47:51, 11.63it/s][A
  4%|█▍                                 | 5182/122310 [09:20<3:35:23,  9.06it/s][A
  4%|█▍                                 | 5189/122310 [09:21<3:30:47,  9.26it/s][A
  4%|█▍                                 | 5199/122310 [09:22<3:03:12, 10.65it/s][A
  4%|█▍                                 | 5204/122310 [09:23<3:24:58,  9.52it/s][A
  4%|█▍                                 | 5207/122310 [09:23<4:05:51,  7.94it/s][A
  4%|█▍                                 | 5223/122310 [09:24<2:39:44, 12.22it/s][A
  4%|█▍                                 | 5230/122310 [09:25<2:50:10, 11.47it/s][A
  4%|█▍                                 | 5240/122310 [09:25<2:39:53, 12.20it/s][A
  4%|█▌                                 | 5252/122310 [09:26<2:24:26, 13.51

step: 15520, loss: 78.57221388772858, epoch: 1



  4%|█▌                                 | 5331/122310 [09:33<2:44:58, 11.82it/s][A
  4%|█▌                                 | 5337/122310 [09:33<3:00:24, 10.81it/s][A
  4%|█▌                                 | 5343/122310 [09:34<3:12:58, 10.10it/s][A
  4%|█▌                                 | 5356/122310 [09:35<2:35:34, 12.53it/s][A
  4%|█▌                                 | 5365/122310 [09:36<2:35:31, 12.53it/s][A
  4%|█▌                                 | 5371/122310 [09:36<2:52:06, 11.32it/s][A
  4%|█▌                                 | 5375/122310 [09:38<5:22:22,  6.05it/s][A
  4%|█▌                                 | 5380/122310 [09:39<5:13:15,  6.22it/s][A
  4%|█▌                                 | 5390/122310 [09:40<4:02:28,  8.04it/s][A
  4%|█▌                                 | 5402/122310 [09:41<3:11:25, 10.18it/s][A
  4%|█▌                                 | 5404/122310 [09:41<3:54:54,  8.29it/s][A
  4%|█▌                                 | 5412/122310 [09:42<3:33:55,  9.11

step: 15540, loss: 89.35184094389115, epoch: 1



  4%|█▌                                 | 5476/122310 [09:47<2:54:18, 11.17it/s][A
  4%|█▌                                 | 5485/122310 [09:48<2:47:07, 11.65it/s][A
  4%|█▌                                 | 5492/122310 [09:48<2:55:14, 11.11it/s][A
  4%|█▌                                 | 5499/122310 [09:49<3:02:07, 10.69it/s][A
  5%|█▌                                 | 5506/122310 [09:50<3:06:11, 10.46it/s][A
  5%|█▌                                 | 5512/122310 [09:50<3:17:01,  9.88it/s][A
  5%|█▌                                 | 5523/122310 [09:51<2:48:26, 11.56it/s][A
  5%|█▌                                 | 5531/122310 [09:52<2:49:42, 11.47it/s][A
  5%|█▌                                 | 5543/122310 [09:53<2:28:38, 13.09it/s][A
  5%|█▌                                 | 5553/122310 [09:53<2:25:19, 13.39it/s][A
  5%|█▌                                 | 5559/122310 [09:54<2:43:44, 11.88it/s][A
  5%|█▌                                 | 5563/122310 [09:55<3:14:26, 10.01

step: 15560, loss: 91.63072028516137, epoch: 1



  5%|█▌                                 | 5650/122310 [10:01<3:16:42,  9.88it/s][A
  5%|█▌                                 | 5662/122310 [10:02<2:42:13, 11.98it/s][A
  5%|█▌                                 | 5671/122310 [10:03<2:39:26, 12.19it/s][A
  5%|█▋                                 | 5679/122310 [10:03<2:43:06, 11.92it/s][A
  5%|█▋                                 | 5688/122310 [10:04<2:40:10, 12.14it/s][A
  5%|█▋                                 | 5704/122310 [10:05<2:07:30, 15.24it/s][A
  5%|█▋                                 | 5711/122310 [10:05<2:23:10, 13.57it/s][A
  5%|█▋                                 | 5717/122310 [10:06<2:41:23, 12.04it/s][A
  5%|█▋                                 | 5727/122310 [10:07<2:33:53, 12.63it/s][A
  5%|█▋                                 | 5732/122310 [10:08<2:58:24, 10.89it/s][A
  5%|█▋                                 | 5747/122310 [10:08<2:19:20, 13.94it/s][A
  5%|█▋                                 | 5759/122310 [10:09<2:11:23, 14.78

step: 15580, loss: 79.03799664113896, epoch: 1



  5%|█▋                                 | 5821/122310 [10:15<3:09:21, 10.25it/s][A
  5%|█▋                                 | 5827/122310 [10:16<3:19:51,  9.71it/s][A
  5%|█▋                                 | 5830/122310 [10:17<4:02:19,  8.01it/s][A
  5%|█▋                                 | 5833/122310 [10:18<4:41:09,  6.90it/s][A
  5%|█▋                                 | 5837/122310 [10:18<4:57:56,  6.52it/s][A
  5%|█▋                                 | 5849/122310 [10:19<3:21:32,  9.63it/s][A
  5%|█▋                                 | 5859/122310 [10:20<2:57:04, 10.96it/s][A
  5%|█▋                                 | 5870/122310 [10:20<2:37:34, 12.32it/s][A
  5%|█▋                                 | 5877/122310 [10:21<2:47:44, 11.57it/s][A
  5%|█▋                                 | 5891/122310 [10:22<2:18:29, 14.01it/s][A
  5%|█▋                                 | 5899/122310 [10:23<2:27:16, 13.17it/s][A
  5%|█▋                                 | 5911/122310 [10:23<2:16:39, 14.20

step: 15600, loss: 79.57111266439955, epoch: 1
sim1 and sim2 are 0.5489229770861974, 0.21451896928350428
cosine of pred and queen: 0.1271881116805349
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: talks
Actual: beijing:china::tokyo:japan, pred: taiwan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: collective
Actual: china:asia::greece:europe, pred: spain
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: vallecillo
Actual: ma


  5%|█▋                                 | 5963/122310 [10:46<2:53:12, 11.19it/s][A

Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: indian
Actual: italy:italian::ireland:irish, pred: friday
Actual: croatia:croatian::france:french, pred: friday
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.15555555555555556
Actual: walk:walks::vanish:vanishes, pred: kalingrad
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: let
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, pred: emblem
Actual: say:says::provide:provides, pred: combating
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: thamer
Actual: mango:mangoes::oni


  5%|█▋                                | 5976/122310 [11:42<73:23:13,  2.27s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.05325443786982249



  5%|█▋                                | 5988/122310 [11:43<49:10:14,  1.52s/it][A
  5%|█▋                                | 6000/122310 [11:45<34:58:21,  1.08s/it][A
  5%|█▋                                | 6010/122310 [11:46<25:49:37,  1.25it/s][A
  5%|█▋                                | 6016/122310 [11:47<21:34:13,  1.50it/s][A
  5%|█▋                                | 6026/122310 [11:47<15:30:19,  2.08it/s][A
  5%|█▋                                | 6028/122310 [11:48<15:09:35,  2.13it/s][A
  5%|█▋                                | 6033/122310 [11:49<12:45:07,  2.53it/s][A
  5%|█▋                                 | 6046/122310 [11:50<7:42:57,  4.19it/s][A
  5%|█▋                                 | 6053/122310 [11:50<6:33:34,  4.92it/s][A
  5%|█▋                                 | 6064/122310 [11:51<4:55:58,  6.55it/s][A
  5%|█▋                                 | 6066/122310 [11:52<5:29:17,  5.88it/s][A
  5%|█▋                                 | 6079/122310 [11:52<3:49:08,  8.45

step: 15620, loss: 81.32426527648717, epoch: 1



  5%|█▊                                 | 6132/122310 [11:57<3:08:57, 10.25it/s][A
  5%|█▊                                 | 6140/122310 [11:58<3:03:06, 10.57it/s][A
  5%|█▊                                 | 6149/122310 [11:59<2:59:55, 10.76it/s][A
  5%|█▊                                 | 6157/122310 [12:00<3:01:04, 10.69it/s][A
  5%|█▊                                 | 6163/122310 [12:00<3:15:49,  9.89it/s][A
  5%|█▊                                 | 6170/122310 [12:01<3:20:27,  9.66it/s][A
  5%|█▊                                 | 6179/122310 [12:02<3:06:41, 10.37it/s][A
  5%|█▊                                 | 6190/122310 [12:03<2:46:27, 11.63it/s][A
  5%|█▊                                 | 6202/122310 [12:03<2:29:27, 12.95it/s][A
  5%|█▊                                 | 6207/122310 [12:04<2:55:33, 11.02it/s][A
  5%|█▊                                 | 6218/122310 [12:05<2:40:06, 12.09it/s][A
  5%|█▊                                 | 6232/122310 [12:06<2:18:01, 14.02

step: 15640, loss: 98.42436939609172, epoch: 1



  5%|█▊                                 | 6333/122310 [12:12<2:18:49, 13.92it/s][A
  5%|█▊                                 | 6337/122310 [12:13<2:50:31, 11.33it/s][A
  5%|█▊                                 | 6348/122310 [12:14<2:36:11, 12.37it/s][A
  5%|█▊                                 | 6355/122310 [12:15<2:46:05, 11.64it/s][A
  5%|█▊                                 | 6368/122310 [12:15<2:23:04, 13.51it/s][A
  5%|█▊                                 | 6370/122310 [12:16<3:06:30, 10.36it/s][A
  5%|█▊                                 | 6384/122310 [12:17<2:27:48, 13.07it/s][A
  5%|█▊                                 | 6387/122310 [12:18<3:05:41, 10.40it/s][A
  5%|█▊                                 | 6393/122310 [12:18<3:16:48,  9.82it/s][A
  5%|█▊                                 | 6397/122310 [12:19<3:45:26,  8.57it/s][A
  5%|█▊                                 | 6407/122310 [12:20<3:09:47, 10.18it/s][A
  5%|█▊                                 | 6416/122310 [12:20<2:57:34, 10.88

step: 15660, loss: 74.17095269522186, epoch: 1



  5%|█▊                                 | 6503/122310 [12:27<2:20:35, 13.73it/s][A
  5%|█▊                                 | 6526/122310 [12:28<1:42:07, 18.90it/s][A
  5%|█▊                                 | 6537/122310 [12:28<1:48:45, 17.74it/s][A
  5%|█▊                                 | 6545/122310 [12:29<2:02:40, 15.73it/s][A
  5%|█▊                                 | 6552/122310 [12:31<3:00:50, 10.67it/s][A
  5%|█▉                                 | 6563/122310 [12:31<2:43:57, 11.77it/s][A
  5%|█▉                                 | 6583/122310 [12:32<2:02:05, 15.80it/s][A
  5%|█▉                                 | 6608/122310 [12:33<1:32:57, 20.74it/s][A
  5%|█▉                                 | 6614/122310 [12:34<1:51:26, 17.30it/s][A
  5%|█▉                                 | 6618/122310 [12:34<2:18:25, 13.93it/s][A
  5%|█▉                                 | 6628/122310 [12:35<2:19:50, 13.79it/s][A
  5%|█▉                                 | 6634/122310 [12:36<2:37:59, 12.20

step: 15680, loss: 72.90308340497668, epoch: 1



  5%|█▉                                 | 6687/122310 [12:42<3:26:59,  9.31it/s][A
  5%|█▉                                 | 6699/122310 [12:42<2:48:37, 11.43it/s][A
  5%|█▉                                 | 6708/122310 [12:43<2:44:43, 11.70it/s][A
  5%|█▉                                 | 6713/122310 [12:44<3:08:35, 10.22it/s][A
  5%|█▉                                 | 6722/122310 [12:45<2:58:45, 10.78it/s][A
  5%|█▉                                 | 6725/122310 [12:45<3:38:26,  8.82it/s][A
  6%|█▉                                 | 6737/122310 [12:47<3:46:23,  8.51it/s][A
  6%|█▉                                 | 6738/122310 [12:48<4:43:59,  6.78it/s][A
  6%|█▉                                 | 6743/122310 [12:48<4:43:25,  6.80it/s][A
  6%|█▉                                 | 6757/122310 [12:49<3:09:37, 10.16it/s][A
  6%|█▉                                 | 6763/122310 [12:50<3:20:36,  9.60it/s][A
  6%|█▉                                 | 6775/122310 [12:50<2:46:53, 11.54

step: 15700, loss: 175.54843480118132, epoch: 1



  6%|█▉                                 | 6810/122310 [12:56<5:51:01,  5.48it/s][A
  6%|█▉                                 | 6815/122310 [12:57<5:32:48,  5.78it/s][A
  6%|█▉                                 | 6822/122310 [12:58<4:49:00,  6.66it/s][A
  6%|█▉                                 | 6827/122310 [12:58<4:46:26,  6.72it/s][A
  6%|█▉                                 | 6831/122310 [12:59<5:01:20,  6.39it/s][A
  6%|█▉                                 | 6835/122310 [13:00<5:14:43,  6.12it/s][A
  6%|█▉                                 | 6845/122310 [13:01<3:54:29,  8.21it/s][A
  6%|█▉                                 | 6857/122310 [13:01<3:03:24, 10.49it/s][A
  6%|█▉                                 | 6865/122310 [13:02<3:01:23, 10.61it/s][A
  6%|█▉                                 | 6879/122310 [13:04<3:10:39, 10.09it/s][A
  6%|█▉                                 | 6884/122310 [13:04<3:26:16,  9.33it/s][A
  6%|█▉                                 | 6886/122310 [13:05<4:13:07,  7.60

step: 15720, loss: 73.6475517574454, epoch: 1



  6%|█▉                                 | 6951/122310 [13:11<3:13:37,  9.93it/s][A
  6%|█▉                                 | 6957/122310 [13:12<3:23:46,  9.43it/s][A
  6%|█▉                                 | 6964/122310 [13:12<3:22:56,  9.47it/s][A
  6%|█▉                                 | 6971/122310 [13:13<3:21:47,  9.53it/s][A
  6%|█▉                                 | 6983/122310 [13:14<2:46:03, 11.58it/s][A
  6%|██                                 | 6995/122310 [13:15<2:28:12, 12.97it/s][A
  6%|██                                 | 7004/122310 [13:15<2:29:54, 12.82it/s][A
  6%|██                                 | 7009/122310 [13:16<2:54:02, 11.04it/s][A
  6%|██                                 | 7016/122310 [13:17<3:01:49, 10.57it/s][A
  6%|██                                 | 7019/122310 [13:18<3:43:24,  8.60it/s][A
  6%|██                                 | 7027/122310 [13:18<3:26:48,  9.29it/s][A
  6%|██                                 | 7035/122310 [13:19<3:16:39,  9.77

step: 15740, loss: 76.70528623512172, epoch: 1



  6%|██                                 | 7102/122310 [13:26<3:21:54,  9.51it/s][A
  6%|██                                 | 7113/122310 [13:26<2:52:12, 11.15it/s][A
  6%|██                                 | 7124/122310 [13:27<2:36:38, 12.26it/s][A
  6%|██                                 | 7132/122310 [13:28<2:41:54, 11.86it/s][A
  6%|██                                 | 7151/122310 [13:29<1:59:45, 16.03it/s][A
  6%|██                                 | 7158/122310 [13:29<2:16:28, 14.06it/s][A
  6%|██                                 | 7162/122310 [13:30<2:47:03, 11.49it/s][A
  6%|██                                 | 7169/122310 [13:31<2:55:24, 10.94it/s][A
  6%|██                                 | 7175/122310 [13:32<3:10:19, 10.08it/s][A
  6%|██                                 | 7187/122310 [13:32<2:40:19, 11.97it/s][A
  6%|██                                 | 7195/122310 [13:33<2:44:56, 11.63it/s][A
  6%|██                                 | 7204/122310 [13:34<2:42:16, 11.82

step: 15760, loss: 89.65881556618012, epoch: 1



  6%|██                                 | 7273/122310 [13:40<3:34:57,  8.92it/s][A
  6%|██                                 | 7290/122310 [13:41<2:25:21, 13.19it/s][A
  6%|██                                 | 7293/122310 [13:42<3:02:47, 10.49it/s][A
  6%|██                                 | 7301/122310 [13:43<3:00:15, 10.63it/s][A
  6%|██                                 | 7307/122310 [13:43<3:14:08,  9.87it/s][A
  6%|██                                 | 7316/122310 [13:44<3:00:50, 10.60it/s][A
  6%|██                                 | 7323/122310 [13:45<3:07:20, 10.23it/s][A
  6%|██                                 | 7331/122310 [13:45<3:02:56, 10.47it/s][A
  6%|██                                 | 7336/122310 [13:46<3:24:23,  9.38it/s][A
  6%|██                                 | 7340/122310 [13:47<3:53:49,  8.19it/s][A
  6%|██                                 | 7344/122310 [13:48<5:37:56,  5.67it/s][A
  6%|██                                 | 7352/122310 [13:49<4:33:32,  7.00

step: 15780, loss: 84.48092210392635, epoch: 1



  6%|██                                 | 7421/122310 [13:55<3:05:27, 10.32it/s][A
  6%|██                                 | 7423/122310 [13:56<3:56:40,  8.09it/s][A
  6%|██▏                                | 7430/122310 [13:56<3:44:58,  8.51it/s][A
  6%|██▏                                | 7440/122310 [13:57<3:10:33, 10.05it/s][A
  6%|██▏                                | 7453/122310 [13:58<2:35:14, 12.33it/s][A
  6%|██▏                                | 7457/122310 [13:59<3:07:16, 10.22it/s][A
  6%|██▏                                | 7468/122310 [13:59<2:43:59, 11.67it/s][A
  6%|██▏                                | 7480/122310 [14:00<2:26:27, 13.07it/s][A
  6%|██▏                                | 7490/122310 [14:01<2:25:04, 13.19it/s][A
  6%|██▏                                | 7509/122310 [14:02<1:52:49, 16.96it/s][A
  6%|██▏                                | 7512/122310 [14:02<2:26:53, 13.03it/s][A
  6%|██▏                                | 7515/122310 [14:03<3:03:15, 10.44

step: 15800, loss: 90.37002224591608, epoch: 1



  6%|██▏                                | 7593/122310 [14:10<3:23:34,  9.39it/s][A
  6%|██▏                                | 7602/122310 [14:11<3:06:07, 10.27it/s][A
  6%|██▏                                | 7609/122310 [14:11<3:10:08, 10.05it/s][A
  6%|██▏                                | 7615/122310 [14:12<3:22:00,  9.46it/s][A
  6%|██▏                                | 7623/122310 [14:13<3:12:47,  9.91it/s][A
  6%|██▏                                | 7631/122310 [14:13<3:07:31, 10.19it/s][A
  6%|██▏                                | 7636/122310 [14:14<3:28:13,  9.18it/s][A
  6%|██▏                                | 7642/122310 [14:15<3:35:50,  8.85it/s][A
  6%|██▏                                | 7653/122310 [14:16<2:58:13, 10.72it/s][A
  6%|██▏                                | 7666/122310 [14:16<2:29:07, 12.81it/s][A
  6%|██▏                                | 7675/122310 [14:17<2:30:46, 12.67it/s][A
  6%|██▏                                | 7686/122310 [14:18<2:22:55, 13.37

step: 15820, loss: 88.75429779097074, epoch: 1



  6%|██▏                                | 7747/122310 [14:24<4:35:39,  6.93it/s][A
  6%|██▏                                | 7752/122310 [14:25<4:37:05,  6.89it/s][A
  6%|██▏                                | 7755/122310 [14:26<5:14:58,  6.06it/s][A
  6%|██▏                                | 7766/122310 [14:27<3:38:01,  8.76it/s][A
  6%|██▏                                | 7774/122310 [14:27<3:23:04,  9.40it/s][A
  6%|██▏                                | 7780/122310 [14:28<3:31:49,  9.01it/s][A
  6%|██▏                                | 7789/122310 [14:29<3:10:46, 10.01it/s][A
  6%|██▏                                | 7794/122310 [14:30<3:30:50,  9.05it/s][A
  6%|██▏                                | 7802/122310 [14:30<3:18:57,  9.59it/s][A
  6%|██▏                                | 7806/122310 [14:31<3:49:25,  8.32it/s][A
  6%|██▏                                | 7810/122310 [14:32<4:15:26,  7.47it/s][A
  6%|██▏                                | 7821/122310 [14:33<3:16:31,  9.71

step: 15840, loss: 89.30245558247938, epoch: 1



  6%|██▎                                | 7899/122310 [14:39<3:05:15, 10.29it/s][A
  6%|██▎                                | 7906/122310 [14:40<3:09:14, 10.08it/s][A
  6%|██▎                                | 7921/122310 [14:41<2:24:39, 13.18it/s][A
  6%|██▎                                | 7927/122310 [14:41<2:43:05, 11.69it/s][A
  6%|██▎                                | 7943/122310 [14:42<2:10:32, 14.60it/s][A
  7%|██▎                                | 7953/122310 [14:43<2:13:18, 14.30it/s][A
  7%|██▎                                | 7964/122310 [14:44<2:11:23, 14.50it/s][A
  7%|██▎                                | 7973/122310 [14:44<2:17:47, 13.83it/s][A
  7%|██▎                                | 7978/122310 [14:45<2:42:25, 11.73it/s][A
  7%|██▎                                | 7989/122310 [14:46<2:30:33, 12.66it/s][A
  7%|██▎                                | 7998/122310 [14:47<2:31:33, 12.57it/s][A
  7%|██▎                                | 8008/122310 [14:47<2:27:44, 12.89

step: 15860, loss: 73.03304804054584, epoch: 1



  7%|██▎                                | 8093/122310 [14:54<2:31:01, 12.60it/s][A
  7%|██▎                                | 8107/122310 [14:55<2:10:54, 14.54it/s][A
  7%|██▎                                | 8129/122310 [14:55<1:39:31, 19.12it/s][A
  7%|██▎                                | 8138/122310 [14:56<1:51:38, 17.05it/s][A
  7%|██▎                                | 8148/122310 [14:57<1:59:16, 15.95it/s][A
  7%|██▎                                | 8156/122310 [14:58<2:11:42, 14.45it/s][A
  7%|██▎                                | 8159/122310 [14:58<2:47:54, 11.33it/s][A
  7%|██▎                                | 8176/122310 [14:59<2:08:01, 14.86it/s][A
  7%|██▎                                | 8193/122310 [15:00<1:49:38, 17.35it/s][A
  7%|██▎                                | 8199/122310 [15:01<2:10:02, 14.62it/s][A
  7%|██▎                                | 8202/122310 [15:02<3:35:49,  8.81it/s][A
  7%|██▎                                | 8213/122310 [15:03<3:03:44, 10.35

step: 15880, loss: 73.32403692198466, epoch: 1



  7%|██▎                                | 8266/122310 [15:09<3:42:45,  8.53it/s][A
  7%|██▎                                | 8271/122310 [15:09<3:56:25,  8.04it/s][A
  7%|██▎                                | 8280/122310 [15:10<3:24:25,  9.30it/s][A
  7%|██▎                                | 8290/122310 [15:11<2:58:57, 10.62it/s][A
  7%|██▎                                | 8298/122310 [15:12<2:57:31, 10.70it/s][A
  7%|██▍                                | 8307/122310 [15:12<2:51:27, 11.08it/s][A
  7%|██▍                                | 8310/122310 [15:13<3:30:25,  9.03it/s][A
  7%|██▍                                | 8320/122310 [15:14<3:02:03, 10.44it/s][A
  7%|██▍                                | 8331/122310 [15:14<2:41:01, 11.80it/s][A
  7%|██▍                                | 8343/122310 [15:15<2:24:39, 13.13it/s][A
  7%|██▍                                | 8349/122310 [15:16<2:43:04, 11.65it/s][A
  7%|██▍                                | 8361/122310 [15:17<2:25:29, 13.05

step: 15900, loss: 77.57903386907314, epoch: 1



  7%|██▍                                | 8447/122310 [15:23<2:15:43, 13.98it/s][A
  7%|██▍                                | 8455/122310 [15:24<2:25:19, 13.06it/s][A
  7%|██▍                                | 8467/122310 [15:25<2:15:04, 14.05it/s][A
  7%|██▍                                | 8478/122310 [15:27<3:32:47,  8.92it/s][A
  7%|██▍                                | 8482/122310 [15:28<3:50:30,  8.23it/s][A
  7%|██▍                                | 8490/122310 [15:28<3:34:31,  8.84it/s][A
  7%|██▍                                | 8497/122310 [15:29<3:30:34,  9.01it/s][A
  7%|██▍                                | 8505/122310 [15:30<3:19:34,  9.50it/s][A
  7%|██▍                                | 8523/122310 [15:31<2:20:05, 13.54it/s][A
  7%|██▍                                | 8532/122310 [15:32<3:03:44, 10.32it/s][A
  7%|██▍                                | 8537/122310 [15:33<3:19:20,  9.51it/s][A
  7%|██▍                                | 8547/122310 [15:34<2:59:28, 10.56

step: 15920, loss: 76.19827203706613, epoch: 1



  7%|██▍                                | 8602/122310 [15:38<2:32:06, 12.46it/s][A
  7%|██▍                                | 8617/122310 [15:39<2:07:42, 14.84it/s][A
  7%|██▍                                | 8624/122310 [15:39<2:23:16, 13.23it/s][A
  7%|██▍                                | 8639/122310 [15:40<2:03:01, 15.40it/s][A
  7%|██▍                                | 8646/122310 [15:41<2:19:29, 13.58it/s][A
  7%|██▍                                | 8649/122310 [15:42<2:55:13, 10.81it/s][A
  7%|██▍                                | 8654/122310 [15:43<4:16:08,  7.40it/s][A
  7%|██▍                                | 8664/122310 [15:44<3:31:26,  8.96it/s][A
  7%|██▍                                | 8672/122310 [15:45<3:20:00,  9.47it/s][A
  7%|██▍                                | 8681/122310 [15:45<3:05:12, 10.23it/s][A
  7%|██▍                                | 8692/122310 [15:46<2:43:53, 11.55it/s][A
  7%|██▍                                | 8700/122310 [15:47<2:46:41, 11.36

step: 15940, loss: 79.07256520057483, epoch: 1



  7%|██▌                                | 8788/122310 [15:53<1:57:08, 16.15it/s][A
  7%|██▌                                | 8796/122310 [15:53<2:09:24, 14.62it/s][A
  7%|██▌                                | 8808/122310 [15:54<2:04:55, 15.14it/s][A
  7%|██▌                                | 8820/122310 [15:55<2:02:09, 15.48it/s][A
  7%|██▌                                | 8822/122310 [15:56<2:42:55, 11.61it/s][A
  7%|██▌                                | 8824/122310 [15:56<3:30:19,  8.99it/s][A
  7%|██▌                                | 8834/122310 [15:57<3:02:42, 10.35it/s][A
  7%|██▌                                | 8842/122310 [15:58<2:59:33, 10.53it/s][A
  7%|██▌                                | 8849/122310 [15:59<3:05:37, 10.19it/s][A
  7%|██▌                                | 8856/122310 [15:59<3:08:23, 10.04it/s][A
  7%|██▌                                | 8866/122310 [16:00<2:50:22, 11.10it/s][A
  7%|██▌                                | 8875/122310 [16:01<2:45:14, 11.44

step: 15960, loss: 72.35871950404336, epoch: 1



  7%|██▌                                | 8949/122310 [16:07<2:41:41, 11.68it/s][A
  7%|██▌                                | 8953/122310 [16:08<3:11:51,  9.85it/s][A
  7%|██▌                                | 8959/122310 [16:09<3:22:04,  9.35it/s][A
  7%|██▌                                | 8968/122310 [16:10<3:04:42, 10.23it/s][A
  7%|██▌                                | 8975/122310 [16:10<3:08:32, 10.02it/s][A
  7%|██▌                                | 8986/122310 [16:11<2:44:06, 11.51it/s][A
  7%|██▌                                | 8994/122310 [16:12<2:46:48, 11.32it/s][A
  7%|██▌                                | 9001/122310 [16:13<2:55:09, 10.78it/s][A
  7%|██▌                                | 9010/122310 [16:13<2:47:40, 11.26it/s][A
  7%|██▌                                | 9019/122310 [16:14<2:43:23, 11.56it/s][A
  7%|██▌                                | 9024/122310 [16:15<3:06:21, 10.13it/s][A
  7%|██▌                                | 9037/122310 [16:15<2:32:30, 12.38

step: 15980, loss: 96.01850781199417, epoch: 1



  7%|██▌                                | 9115/122310 [16:22<2:13:51, 14.09it/s][A
  7%|██▌                                | 9127/122310 [16:23<2:07:42, 14.77it/s][A
  7%|██▌                                | 9139/122310 [16:24<2:03:35, 15.26it/s][A
  7%|██▌                                | 9152/122310 [16:24<1:57:48, 16.01it/s][A
  7%|██▌                                | 9158/122310 [16:25<2:18:58, 13.57it/s][A
  7%|██▌                                | 9160/122310 [16:26<3:01:23, 10.40it/s][A
  7%|██▌                                | 9170/122310 [16:26<2:46:07, 11.35it/s][A
  8%|██▋                                | 9188/122310 [16:27<2:02:56, 15.34it/s][A
  8%|██▋                                | 9200/122310 [16:28<2:00:37, 15.63it/s][A
  8%|██▋                                | 9212/122310 [16:29<1:58:59, 15.84it/s][A
  8%|██▋                                | 9229/122310 [16:29<1:44:43, 18.00it/s][A
  8%|██▋                                | 9239/122310 [16:30<1:52:48, 16.71

step: 16000, loss: 102.82974283137264, epoch: 1
sim1 and sim2 are 0.5604768065323119, 0.21542775301038247
cosine of pred and queen: 0.18628120984240804
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: talks
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: chamber
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharas


  8%|██▋                                | 9294/122310 [16:46<2:40:10, 11.76it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: singh
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: emblem
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: media
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: ukraine
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: italy
Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: government
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: dovish
Actual: poland:polish::italy:italian


  8%|██▌                              | 9297/122310 [17:52<107:02:25,  3.41s/it][A
  8%|██▌                              | 9298/122310 [17:53<102:08:46,  3.25s/it][A
  8%|██▌                               | 9305/122310 [17:54<65:52:44,  2.10s/it][A
  8%|██▌                               | 9314/122310 [17:54<40:23:48,  1.29s/it][A
  8%|██▌                               | 9322/122310 [17:55<27:40:50,  1.13it/s][A
  8%|██▌                               | 9328/122310 [17:56<21:19:27,  1.47it/s][A
  8%|██▌                               | 9335/122310 [17:56<15:46:16,  1.99it/s][A
  8%|██▌                               | 9336/122310 [17:57<16:10:47,  1.94it/s][A
  8%|██▌                               | 9343/122310 [17:58<11:23:18,  2.76it/s][A
  8%|██▋                                | 9352/122310 [17:59<7:47:57,  4.02it/s][A
  8%|██▋                                | 9363/122310 [17:59<5:25:33,  5.78it/s][A
  8%|██▋                                | 9373/122310 [18:00<4:20:41,  7.22

step: 16020, loss: 77.53669721346957, epoch: 1



  8%|██▋                                | 9446/122310 [18:07<3:12:48,  9.76it/s][A
  8%|██▋                                | 9453/122310 [18:08<3:13:20,  9.73it/s][A
  8%|██▋                                | 9457/122310 [18:09<3:41:59,  8.47it/s][A
  8%|██▋                                | 9465/122310 [18:10<3:23:38,  9.24it/s][A
  8%|██▋                                | 9476/122310 [18:10<2:50:51, 11.01it/s][A
  8%|██▋                                | 9487/122310 [18:11<2:33:34, 12.24it/s][A
  8%|██▋                                | 9493/122310 [18:12<2:52:26, 10.90it/s][A
  8%|██▋                                | 9500/122310 [18:12<2:57:30, 10.59it/s][A
  8%|██▋                                | 9511/122310 [18:13<2:37:45, 11.92it/s][A
  8%|██▋                                | 9518/122310 [18:14<2:47:24, 11.23it/s][A
  8%|██▋                                | 9525/122310 [18:15<2:54:56, 10.74it/s][A
  8%|██▋                                | 9534/122310 [18:15<2:47:46, 11.20

step: 16040, loss: 82.54731070636034, epoch: 1



  8%|██▊                                | 9622/122310 [18:22<2:13:08, 14.11it/s][A
  8%|██▊                                | 9633/122310 [18:23<2:10:36, 14.38it/s][A
  8%|██▊                                | 9645/122310 [18:23<2:05:04, 15.01it/s][A
  8%|██▊                                | 9660/122310 [18:24<1:52:44, 16.65it/s][A
  8%|██▊                                | 9676/122310 [18:25<1:43:03, 18.22it/s][A
  8%|██▊                                | 9681/122310 [18:26<2:06:53, 14.79it/s][A
  8%|██▊                                | 9688/122310 [18:26<2:22:35, 13.16it/s][A
  8%|██▊                                | 9693/122310 [18:29<4:25:56,  7.06it/s][A
  8%|██▊                                | 9698/122310 [18:29<4:26:53,  7.03it/s][A
  8%|██▊                                | 9705/122310 [18:30<4:06:57,  7.60it/s][A
  8%|██▊                                | 9715/122310 [18:31<3:25:36,  9.13it/s][A
  8%|██▊                                | 9719/122310 [18:32<3:49:25,  8.18

step: 16060, loss: 81.27833595127511, epoch: 1



  8%|██▊                                | 9782/122310 [18:37<2:36:06, 12.01it/s][A
  8%|██▊                                | 9787/122310 [18:37<2:59:04, 10.47it/s][A
  8%|██▊                                | 9798/122310 [18:38<2:38:12, 11.85it/s][A
  8%|██▊                                | 9813/122310 [18:39<2:09:42, 14.45it/s][A
  8%|██▊                                | 9821/122310 [18:40<2:19:57, 13.40it/s][A
  8%|██▊                                | 9836/122310 [18:40<2:00:56, 15.50it/s][A
  8%|██▊                                | 9838/122310 [18:41<2:40:21, 11.69it/s][A
  8%|██▊                                | 9846/122310 [18:42<2:44:23, 11.40it/s][A
  8%|██▊                                | 9851/122310 [18:42<3:05:40, 10.09it/s][A
  8%|██▊                                | 9856/122310 [18:43<3:25:37,  9.12it/s][A
  8%|██▊                                | 9862/122310 [18:44<3:32:41,  8.81it/s][A
  8%|██▊                                | 9870/122310 [18:45<3:18:10,  9.46

step: 16080, loss: 105.48267813654984, epoch: 1



  8%|██▊                                | 9930/122310 [18:51<3:22:01,  9.27it/s][A
  8%|██▊                                | 9940/122310 [18:52<2:56:43, 10.60it/s][A
  8%|██▊                                | 9948/122310 [18:53<2:56:13, 10.63it/s][A
  8%|██▊                                | 9956/122310 [18:54<2:56:01, 10.64it/s][A
  8%|██▊                                | 9966/122310 [18:54<2:43:01, 11.49it/s][A
  8%|██▊                                | 9971/122310 [18:55<3:06:20, 10.05it/s][A
  8%|██▊                                | 9976/122310 [18:56<3:24:46,  9.14it/s][A
  8%|██▊                                | 9982/122310 [18:56<3:31:13,  8.86it/s][A
  8%|██▊                                | 9992/122310 [18:57<3:02:28, 10.26it/s][A
  8%|██▊                               | 10011/122310 [18:58<2:05:23, 14.93it/s][A
  8%|██▊                               | 10023/122310 [18:59<2:01:38, 15.39it/s][A
  8%|██▊                               | 10034/122310 [18:59<2:02:48, 15.24

step: 16100, loss: 94.94891311288164, epoch: 1



  8%|██▊                               | 10120/122310 [19:06<2:15:30, 13.80it/s][A
  8%|██▊                               | 10130/122310 [19:07<2:15:40, 13.78it/s][A
  8%|██▊                               | 10138/122310 [19:07<2:24:32, 12.93it/s][A
  8%|██▊                               | 10147/122310 [19:08<2:28:17, 12.61it/s][A
  8%|██▊                               | 10151/122310 [19:09<2:57:35, 10.53it/s][A
  8%|██▊                               | 10158/122310 [19:10<3:02:34, 10.24it/s][A
  8%|██▊                               | 10164/122310 [19:10<3:14:07,  9.63it/s][A
  8%|██▊                               | 10174/122310 [19:11<2:52:12, 10.85it/s][A
  8%|██▊                               | 10190/122310 [19:12<2:12:23, 14.12it/s][A
  8%|██▊                               | 10202/122310 [19:13<2:06:04, 14.82it/s][A
  8%|██▊                               | 10212/122310 [19:13<2:09:22, 14.44it/s][A
  8%|██▊                               | 10219/122310 [19:14<2:24:07, 12.96

step: 16120, loss: 85.42103454509277, epoch: 1



  8%|██▊                               | 10302/122310 [19:21<3:02:21, 10.24it/s][A
  8%|██▊                               | 10315/122310 [19:21<2:29:46, 12.46it/s][A
  8%|██▊                               | 10322/122310 [19:22<2:40:33, 11.63it/s][A
  8%|██▊                               | 10332/122310 [19:23<2:32:28, 12.24it/s][A
  8%|██▊                               | 10338/122310 [19:24<2:49:37, 11.00it/s][A
  8%|██▉                               | 10344/122310 [19:24<3:03:07, 10.19it/s][A
  8%|██▉                               | 10348/122310 [19:25<3:32:54,  8.76it/s][A
  8%|██▉                               | 10359/122310 [19:26<2:55:25, 10.64it/s][A
  8%|██▉                               | 10361/122310 [19:27<3:46:57,  8.22it/s][A
  8%|██▉                               | 10368/122310 [19:27<3:36:57,  8.60it/s][A
  8%|██▉                               | 10379/122310 [19:28<2:57:29, 10.51it/s][A
  8%|██▉                               | 10386/122310 [19:29<3:02:14, 10.24

step: 16140, loss: 79.30392085158957, epoch: 1



  9%|██▉                               | 10443/122310 [19:35<4:04:56,  7.61it/s][A
  9%|██▉                               | 10451/122310 [19:36<3:36:31,  8.61it/s][A
  9%|██▉                               | 10469/122310 [19:37<2:19:27, 13.37it/s][A
  9%|██▉                               | 10478/122310 [19:38<2:23:19, 13.00it/s][A
  9%|██▉                               | 10491/122310 [19:38<2:10:09, 14.32it/s][A
  9%|██▉                               | 10497/122310 [19:39<2:30:20, 12.40it/s][A
  9%|██▉                               | 10505/122310 [19:40<2:37:11, 11.85it/s][A
  9%|██▉                               | 10511/122310 [19:41<2:52:00, 10.83it/s][A
  9%|██▉                               | 10520/122310 [19:41<2:46:05, 11.22it/s][A
  9%|██▉                               | 10532/122310 [19:42<2:26:43, 12.70it/s][A
  9%|██▉                               | 10545/122310 [19:43<2:11:10, 14.20it/s][A
  9%|██▉                               | 10559/122310 [19:44<1:59:21, 15.60

step: 16160, loss: 87.50670969785227, epoch: 1



  9%|██▉                               | 10634/122310 [19:50<2:52:58, 10.76it/s][A
  9%|██▉                               | 10643/122310 [19:51<2:45:39, 11.24it/s][A
  9%|██▉                               | 10650/122310 [19:52<2:53:20, 10.74it/s][A
  9%|██▉                               | 10658/122310 [19:52<2:52:54, 10.76it/s][A
  9%|██▉                               | 10666/122310 [19:53<2:52:04, 10.81it/s][A
  9%|██▉                               | 10676/122310 [19:54<2:39:46, 11.64it/s][A
  9%|██▉                               | 10682/122310 [19:55<2:55:45, 10.58it/s][A
  9%|██▉                               | 10684/122310 [19:55<3:43:14,  8.33it/s][A
  9%|██▉                               | 10688/122310 [19:56<4:07:39,  7.51it/s][A
  9%|██▉                               | 10692/122310 [19:57<4:27:35,  6.95it/s][A
  9%|██▉                               | 10703/122310 [19:57<3:17:31,  9.42it/s][A
  9%|██▉                               | 10714/122310 [19:58<2:45:39, 11.23

step: 16180, loss: 92.4901646174059, epoch: 1



  9%|██▉                               | 10755/122310 [20:05<3:19:42,  9.31it/s][A
  9%|██▉                               | 10760/122310 [20:05<3:28:57,  8.90it/s][A
  9%|██▉                               | 10766/122310 [20:06<3:31:11,  8.80it/s][A
  9%|██▉                               | 10773/122310 [20:07<3:25:36,  9.04it/s][A
  9%|██▉                               | 10779/122310 [20:07<3:29:18,  8.88it/s][A
  9%|██▉                               | 10790/122310 [20:08<2:55:23, 10.60it/s][A
  9%|███                               | 10796/122310 [20:09<3:05:38, 10.01it/s][A
  9%|███                               | 10820/122310 [20:10<1:52:07, 16.57it/s][A
  9%|███                               | 10832/122310 [20:10<1:51:33, 16.66it/s][A
  9%|███                               | 10840/122310 [20:11<2:02:55, 15.11it/s][A
  9%|███                               | 10845/122310 [20:12<2:25:50, 12.74it/s][A
  9%|███                               | 10855/122310 [20:12<2:21:39, 13.11

step: 16200, loss: 88.90030018535137, epoch: 1



  9%|███                               | 10930/122310 [20:19<3:25:03,  9.05it/s][A
  9%|███                               | 10935/122310 [20:20<3:38:32,  8.49it/s][A
  9%|███                               | 10947/122310 [20:20<2:49:13, 10.97it/s][A
  9%|███                               | 10967/122310 [20:21<1:55:18, 16.09it/s][A
  9%|███                               | 10973/122310 [20:22<2:14:36, 13.79it/s][A
  9%|███                               | 10982/122310 [20:22<2:18:08, 13.43it/s][A
  9%|███                               | 10987/122310 [20:23<2:41:11, 11.51it/s][A
  9%|███                               | 10996/122310 [20:24<2:36:24, 11.86it/s][A
  9%|███                               | 11007/122310 [20:25<2:23:21, 12.94it/s][A
  9%|███                               | 11016/122310 [20:25<2:24:42, 12.82it/s][A
  9%|███                               | 11020/122310 [20:26<2:53:26, 10.69it/s][A
  9%|███                               | 11029/122310 [20:27<2:44:24, 11.28

step: 16220, loss: 98.30604755467604, epoch: 1



  9%|███                               | 11093/122310 [20:33<2:44:23, 11.28it/s][A
  9%|███                               | 11098/122310 [20:34<3:04:45, 10.03it/s][A
  9%|███                               | 11105/122310 [20:35<3:06:01,  9.96it/s][A
  9%|███                               | 11115/122310 [20:35<2:45:45, 11.18it/s][A
  9%|███                               | 11120/122310 [20:36<3:06:43,  9.92it/s][A
  9%|███                               | 11129/122310 [20:37<2:52:16, 10.76it/s][A
  9%|███                               | 11136/122310 [20:37<2:57:55, 10.41it/s][A
  9%|███                               | 11141/122310 [20:38<3:15:56,  9.46it/s][A
  9%|███                               | 11150/122310 [20:39<2:58:27, 10.38it/s][A
  9%|███                               | 11166/122310 [20:40<2:12:12, 14.01it/s][A
  9%|███                               | 11175/122310 [20:40<2:17:21, 13.49it/s][A
  9%|███                               | 11177/122310 [20:41<2:59:27, 10.32

step: 16240, loss: 89.6639395377914, epoch: 1



  9%|███▏                              | 11276/122310 [20:47<1:51:11, 16.64it/s][A
  9%|███▏                              | 11279/122310 [20:48<2:22:45, 12.96it/s][A
  9%|███▏                              | 11290/122310 [20:49<2:15:13, 13.68it/s][A
  9%|███▏                              | 11294/122310 [20:50<2:44:22, 11.26it/s][A
  9%|███▏                              | 11307/122310 [20:50<2:20:16, 13.19it/s][A
  9%|███▏                              | 11311/122310 [20:51<2:48:52, 10.95it/s][A
  9%|███▏                              | 11319/122310 [20:52<2:48:37, 10.97it/s][A
  9%|███▏                              | 11328/122310 [20:52<2:41:09, 11.48it/s][A
  9%|███▏                              | 11333/122310 [20:53<3:02:23, 10.14it/s][A
  9%|███▏                              | 11338/122310 [20:54<3:21:36,  9.17it/s][A
  9%|███▏                              | 11351/122310 [20:55<2:36:02, 11.85it/s][A
  9%|███▏                              | 11362/122310 [20:55<2:23:00, 12.93

step: 16260, loss: 84.78983171797628, epoch: 1



  9%|███▏                              | 11420/122310 [21:02<2:55:51, 10.51it/s][A
  9%|███▏                              | 11425/122310 [21:02<3:15:03,  9.47it/s][A
  9%|███▏                              | 11430/122310 [21:03<3:32:17,  8.71it/s][A
  9%|███▏                              | 11437/122310 [21:04<3:24:10,  9.05it/s][A
  9%|███▏                              | 11443/122310 [21:05<3:28:46,  8.85it/s][A
  9%|███▏                              | 11452/122310 [21:06<4:00:41,  7.68it/s][A
  9%|███▏                              | 11457/122310 [21:07<4:05:22,  7.53it/s][A
  9%|███▏                              | 11471/122310 [21:07<2:53:21, 10.66it/s][A
  9%|███▏                              | 11482/122310 [21:08<2:34:23, 11.96it/s][A
  9%|███▏                              | 11489/122310 [21:09<2:42:39, 11.36it/s][A
  9%|███▏                              | 11505/122310 [21:10<2:07:25, 14.49it/s][A
  9%|███▏                              | 11514/122310 [21:10<2:12:38, 13.92

step: 16280, loss: 83.16358315503847, epoch: 1



  9%|███▏                              | 11601/122310 [21:16<1:46:44, 17.28it/s][A
  9%|███▏                              | 11611/122310 [21:17<1:52:46, 16.36it/s][A
 10%|███▏                              | 11622/122310 [21:18<2:29:54, 12.31it/s][A
 10%|███▏                              | 11624/122310 [21:19<3:04:55,  9.98it/s][A
 10%|███▏                              | 11634/122310 [21:20<2:47:00, 11.04it/s][A
 10%|███▏                              | 11642/122310 [21:20<2:46:13, 11.10it/s][A
 10%|███▏                              | 11659/122310 [21:21<2:06:24, 14.59it/s][A
 10%|███▏                              | 11664/122310 [21:22<2:28:20, 12.43it/s][A
 10%|███▏                              | 11668/122310 [21:22<2:56:40, 10.44it/s][A
 10%|███▏                              | 11681/122310 [21:23<2:25:06, 12.71it/s][A
 10%|███▏                              | 11685/122310 [21:24<2:53:53, 10.60it/s][A
 10%|███▎                              | 11694/122310 [21:25<2:48:59, 10.91

step: 16300, loss: 90.00830035396004, epoch: 1



 10%|███▎                              | 11770/122310 [21:31<2:42:26, 11.34it/s][A
 10%|███▎                              | 11773/122310 [21:31<3:20:37,  9.18it/s][A
 10%|███▎                              | 11775/122310 [21:32<4:15:09,  7.22it/s][A
 10%|███▎                              | 11778/122310 [21:33<4:53:30,  6.28it/s][A
 10%|███▎                              | 11782/122310 [21:34<5:04:26,  6.05it/s][A
 10%|███▎                              | 11788/122310 [21:34<4:36:39,  6.66it/s][A
 10%|███▎                              | 11797/122310 [21:35<3:40:41,  8.35it/s][A
 10%|███▎                              | 11807/122310 [21:36<3:06:51,  9.86it/s][A
 10%|███▎                              | 11812/122310 [21:37<3:25:01,  8.98it/s][A
 10%|███▎                              | 11816/122310 [21:37<3:53:01,  7.90it/s][A
 10%|███▎                              | 11822/122310 [21:38<3:50:28,  7.99it/s][A
 10%|███▎                              | 11831/122310 [21:39<3:18:24,  9.28

step: 16320, loss: 102.75215451717271, epoch: 1



 10%|███▎                              | 11894/122310 [21:45<3:19:34,  9.22it/s][A
 10%|███▎                              | 11904/122310 [21:46<2:52:57, 10.64it/s][A
 10%|███▎                              | 11907/122310 [21:47<3:30:14,  8.75it/s][A
 10%|███▎                              | 11918/122310 [21:47<2:51:13, 10.75it/s][A
 10%|███▎                              | 11926/122310 [21:48<2:49:03, 10.88it/s][A
 10%|███▎                              | 11928/122310 [21:49<3:37:20,  8.46it/s][A
 10%|███▎                              | 11933/122310 [21:49<3:48:19,  8.06it/s][A
 10%|███▎                              | 11948/122310 [21:50<2:35:57, 11.79it/s][A
 10%|███▎                              | 11954/122310 [21:51<2:52:33, 10.66it/s][A
 10%|███▎                              | 11965/122310 [21:52<2:34:04, 11.94it/s][A
 10%|███▎                              | 11971/122310 [21:52<2:50:41, 10.77it/s][A
 10%|███▎                              | 11981/122310 [21:53<2:38:16, 11.62

step: 16340, loss: 88.47835311357677, epoch: 1



 10%|███▎                              | 12050/122310 [22:00<3:02:32, 10.07it/s][A
 10%|███▎                              | 12063/122310 [22:01<2:29:40, 12.28it/s][A
 10%|███▎                              | 12069/122310 [22:01<2:46:18, 11.05it/s][A
 10%|███▎                              | 12079/122310 [22:02<2:35:57, 11.78it/s][A
 10%|███▎                              | 12087/122310 [22:03<2:39:54, 11.49it/s][A
 10%|███▎                              | 12101/122310 [22:04<2:14:20, 13.67it/s][A
 10%|███▎                              | 12106/122310 [22:04<2:38:44, 11.57it/s][A
 10%|███▎                              | 12113/122310 [22:05<2:47:52, 10.94it/s][A
 10%|███▎                              | 12128/122310 [22:06<2:14:05, 13.70it/s][A
 10%|███▎                              | 12139/122310 [22:07<2:10:46, 14.04it/s][A
 10%|███▍                              | 12152/122310 [22:07<2:02:07, 15.03it/s][A
 10%|███▍                              | 12158/122310 [22:08<2:21:45, 12.95

step: 16360, loss: 88.97989890522884, epoch: 1



 10%|███▍                              | 12244/122310 [22:15<2:27:24, 12.44it/s][A
 10%|███▍                              | 12258/122310 [22:15<2:09:41, 14.14it/s][A
 10%|███▍                              | 12267/122310 [22:16<2:14:48, 13.60it/s][A
 10%|███▍                              | 12273/122310 [22:17<2:31:44, 12.09it/s][A
 10%|███▍                              | 12293/122310 [22:18<1:52:05, 16.36it/s][A
 10%|███▍                              | 12301/122310 [22:18<2:04:24, 14.74it/s][A
 10%|███▍                              | 12306/122310 [22:19<2:27:57, 12.39it/s][A
 10%|███▍                              | 12318/122310 [22:20<2:15:41, 13.51it/s][A
 10%|███▍                              | 12328/122310 [22:21<2:15:30, 13.53it/s][A
 10%|███▍                              | 12349/122310 [22:21<1:42:12, 17.93it/s][A
 10%|███▍                              | 12354/122310 [22:22<2:05:35, 14.59it/s][A
 10%|███▍                              | 12361/122310 [22:23<2:20:28, 13.05

step: 16380, loss: 105.31820507806006, epoch: 1



 10%|███▍                              | 12397/122310 [22:30<5:43:47,  5.33it/s][A
 10%|███▍                              | 12406/122310 [22:30<4:25:27,  6.90it/s][A
 10%|███▍                              | 12421/122310 [22:31<2:59:29, 10.20it/s][A
 10%|███▍                              | 12429/122310 [22:32<2:56:39, 10.37it/s][A
 10%|███▍                              | 12445/122310 [22:32<2:16:20, 13.43it/s][A
 10%|███▍                              | 12448/122310 [22:33<2:49:33, 10.80it/s][A
 10%|███▍                              | 12463/122310 [22:34<2:15:32, 13.51it/s][A
 10%|███▍                              | 12471/122310 [22:35<2:23:55, 12.72it/s][A
 10%|███▍                              | 12474/122310 [22:35<3:00:29, 10.14it/s][A
 10%|███▍                              | 12476/122310 [22:36<3:50:27,  7.94it/s][A
 10%|███▍                              | 12488/122310 [22:37<2:57:09, 10.33it/s][A
 10%|███▍                              | 12504/122310 [22:38<2:14:17, 13.63

step: 16400, loss: 73.39205664134778, epoch: 1
sim1 and sim2 are 0.580674261568368, 0.17123287661247671
cosine of pred and queen: 0.09157484522283496
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: talks
Actual: beijing:china::tokyo:japan, pred: size
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: unobnoxious
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mah

Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: became
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: child
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: central
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: karnataka
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 10%|███▍                             | 12560/122310 [23:59<93:37:09,  3.07s/it][A

Actual: india:rupee::denmark:krone, pred: rome
Accuracy is 0.07692307692307693



 10%|███▍                             | 12574/122310 [24:00<53:14:29,  1.75s/it][A
 10%|███▍                             | 12581/122310 [24:01<41:17:21,  1.35s/it][A
 10%|███▍                             | 12591/122310 [24:01<28:29:46,  1.07it/s][A
 10%|███▍                             | 12603/122310 [24:02<18:54:40,  1.61it/s][A
 10%|███▍                             | 12608/122310 [24:03<16:21:02,  1.86it/s][A
 10%|███▍                             | 12613/122310 [24:04<13:57:21,  2.18it/s][A
 10%|███▍                             | 12621/122310 [24:04<10:25:47,  2.92it/s][A
 10%|███▌                              | 12631/122310 [24:05<7:28:23,  4.08it/s][A
 10%|███▌                              | 12636/122310 [24:06<6:51:21,  4.44it/s][A
 10%|███▌                              | 12645/122310 [24:06<5:20:03,  5.71it/s][A
 10%|███▌                              | 12661/122310 [24:07<3:29:34,  8.72it/s][A
 10%|███▌                              | 12673/122310 [24:08<2:56:42, 10.34

step: 16420, loss: 91.54137690133484, epoch: 1



 10%|███▌                              | 12737/122310 [24:15<3:19:10,  9.17it/s][A
 10%|███▌                              | 12744/122310 [24:15<3:15:58,  9.32it/s][A
 10%|███▌                              | 12758/122310 [24:16<2:28:45, 12.27it/s][A
 10%|███▌                              | 12771/122310 [24:17<2:11:41, 13.86it/s][A
 10%|███▌                              | 12783/122310 [24:17<2:04:09, 14.70it/s][A
 10%|███▌                              | 12791/122310 [24:18<2:14:44, 13.55it/s][A
 10%|███▌                              | 12804/122310 [24:19<2:03:23, 14.79it/s][A
 10%|███▌                              | 12810/122310 [24:20<2:22:18, 12.82it/s][A
 10%|███▌                              | 12813/122310 [24:20<2:58:33, 10.22it/s][A
 10%|███▌                              | 12820/122310 [24:21<3:02:38,  9.99it/s][A
 10%|███▌                              | 12825/122310 [24:22<3:21:42,  9.05it/s][A
 10%|███▌                              | 12830/122310 [24:23<3:37:31,  8.39

step: 16440, loss: 86.88620986576251, epoch: 1



 11%|███▌                              | 12909/122310 [24:29<2:38:07, 11.53it/s][A
 11%|███▌                              | 12921/122310 [24:30<2:20:20, 12.99it/s][A
 11%|███▌                              | 12931/122310 [24:31<2:17:57, 13.21it/s][A
 11%|███▌                              | 12939/122310 [24:31<2:26:10, 12.47it/s][A
 11%|███▌                              | 12943/122310 [24:33<3:48:18,  7.98it/s][A
 11%|███▌                              | 12949/122310 [24:34<3:45:57,  8.07it/s][A
 11%|███▌                              | 12962/122310 [24:34<2:51:41, 10.61it/s][A
 11%|███▌                              | 12969/122310 [24:35<2:56:37, 10.32it/s][A
 11%|███▌                              | 12985/122310 [24:36<2:52:53, 10.54it/s][A
 11%|███▌                              | 12991/122310 [24:37<3:01:29, 10.04it/s][A
 11%|███▌                              | 12997/122310 [24:38<3:10:17,  9.57it/s][A
 11%|███▌                              | 13010/122310 [24:39<2:34:52, 11.76

step: 16460, loss: 102.61141620590337, epoch: 1



 11%|███▋                              | 13055/122310 [24:44<3:21:56,  9.02it/s][A
 11%|███▋                              | 13059/122310 [24:46<6:03:40,  5.01it/s][A
 11%|███▋                              | 13060/122310 [24:47<7:07:30,  4.26it/s][A
 11%|███▋                              | 13072/122310 [24:47<4:17:46,  7.06it/s][A
 11%|███▋                              | 13085/122310 [24:48<3:07:27,  9.71it/s][A
 11%|███▋                              | 13089/122310 [24:49<3:32:36,  8.56it/s][A
 11%|███▋                              | 13092/122310 [24:50<4:06:31,  7.38it/s][A
 11%|███▋                              | 13104/122310 [24:50<3:04:00,  9.89it/s][A
 11%|███▋                              | 13111/122310 [24:51<3:06:17,  9.77it/s][A
 11%|███▋                              | 13113/122310 [24:52<3:54:35,  7.76it/s][A
 11%|███▋                              | 13120/122310 [24:53<3:39:30,  8.29it/s][A
 11%|███▋                              | 13130/122310 [24:53<3:04:20,  9.87

step: 16480, loss: 148.45876730047374, epoch: 1



 11%|███▋                              | 13163/122310 [24:58<4:16:41,  7.09it/s][A
 11%|███▋                              | 13167/122310 [24:59<4:30:38,  6.72it/s][A
 11%|███▋                              | 13172/122310 [25:00<4:29:07,  6.76it/s][A
 11%|███▋                              | 13185/122310 [25:01<3:08:54,  9.63it/s][A
 11%|███▋                              | 13193/122310 [25:01<3:02:29,  9.97it/s][A
 11%|███▋                              | 13200/122310 [25:02<3:05:45,  9.79it/s][A
 11%|███▋                              | 13205/122310 [25:03<3:23:10,  8.95it/s][A
 11%|███▋                              | 13218/122310 [25:04<2:38:27, 11.47it/s][A
 11%|███▋                              | 13227/122310 [25:04<2:35:05, 11.72it/s][A
 11%|███▋                              | 13236/122310 [25:05<2:32:54, 11.89it/s][A
 11%|███▋                              | 13238/122310 [25:06<3:18:16,  9.17it/s][A
 11%|███▋                              | 13247/122310 [25:07<3:01:00, 10.04

step: 16500, loss: 78.99352153505588, epoch: 1



 11%|███▋                              | 13347/122310 [25:13<1:52:04, 16.20it/s][A
 11%|███▋                              | 13360/122310 [25:14<1:48:55, 16.67it/s][A
 11%|███▋                              | 13363/122310 [25:15<2:21:32, 12.83it/s][A
 11%|███▋                              | 13373/122310 [25:15<2:18:24, 13.12it/s][A
 11%|███▋                              | 13377/122310 [25:16<2:47:30, 10.84it/s][A
 11%|███▋                              | 13387/122310 [25:17<2:35:44, 11.66it/s][A
 11%|███▋                              | 13397/122310 [25:18<2:28:33, 12.22it/s][A
 11%|███▋                              | 13403/122310 [25:18<2:44:30, 11.03it/s][A
 11%|███▋                              | 13412/122310 [25:19<2:39:37, 11.37it/s][A
 11%|███▋                              | 13416/122310 [25:20<3:08:42,  9.62it/s][A
 11%|███▋                              | 13418/122310 [25:20<4:01:32,  7.51it/s][A
 11%|███▋                              | 13423/122310 [25:21<4:07:51,  7.32

step: 16520, loss: 90.64567352536244, epoch: 1



 11%|███▊                              | 13502/122310 [25:28<2:45:02, 10.99it/s][A
 11%|███▊                              | 13507/122310 [25:29<3:06:01,  9.75it/s][A
 11%|███▊                              | 13513/122310 [25:29<3:16:23,  9.23it/s][A
 11%|███▊                              | 13522/122310 [25:30<2:58:01, 10.18it/s][A
 11%|███▊                              | 13532/122310 [25:31<2:42:06, 11.18it/s][A
 11%|███▊                              | 13538/122310 [25:32<2:56:25, 10.28it/s][A
 11%|███▊                              | 13543/122310 [25:32<3:16:23,  9.23it/s][A
 11%|███▊                              | 13547/122310 [25:33<3:44:43,  8.07it/s][A
 11%|███▊                              | 13559/122310 [25:34<2:51:54, 10.54it/s][A
 11%|███▊                              | 13566/122310 [25:34<2:56:55, 10.24it/s][A
 11%|███▊                              | 13568/122310 [25:35<3:47:58,  7.95it/s][A
 11%|███▊                              | 13575/122310 [25:36<3:35:58,  8.39

step: 16540, loss: 128.90224921130263, epoch: 1



 11%|███▊                              | 13658/122310 [25:43<2:48:45, 10.73it/s][A
 11%|███▊                              | 13663/122310 [25:43<3:08:28,  9.61it/s][A
 11%|███▊                              | 13670/122310 [25:44<3:09:01,  9.58it/s][A
 11%|███▊                              | 13674/122310 [25:45<3:37:22,  8.33it/s][A
 11%|███▊                              | 13683/122310 [25:46<4:08:16,  7.29it/s][A
 11%|███▊                              | 13689/122310 [25:47<4:00:31,  7.53it/s][A
 11%|███▊                              | 13702/122310 [25:48<2:57:53, 10.18it/s][A
 11%|███▊                              | 13709/122310 [25:49<3:02:05,  9.94it/s][A
 11%|███▊                              | 13711/122310 [25:49<3:47:26,  7.96it/s][A
 11%|███▊                              | 13717/122310 [25:50<3:45:34,  8.02it/s][A
 11%|███▊                              | 13721/122310 [25:51<4:08:37,  7.28it/s][A
 11%|███▊                              | 13730/122310 [25:51<3:27:36,  8.72

step: 16560, loss: 96.39689781033627, epoch: 1



 11%|███▊                              | 13814/122310 [25:57<2:18:56, 13.01it/s][A
 11%|███▊                              | 13828/122310 [25:58<2:01:45, 14.85it/s][A
 11%|███▊                              | 13842/122310 [25:59<1:52:43, 16.04it/s][A
 11%|███▊                              | 13845/122310 [26:00<3:08:32,  9.59it/s][A
 11%|███▊                              | 13856/122310 [26:01<2:46:07, 10.88it/s][A
 11%|███▊                              | 13861/122310 [26:02<3:03:49,  9.83it/s][A
 11%|███▊                              | 13872/122310 [26:03<2:41:48, 11.17it/s][A
 11%|███▊                              | 13884/122310 [26:03<2:23:19, 12.61it/s][A
 11%|███▊                              | 13892/122310 [26:04<2:29:42, 12.07it/s][A
 11%|███▊                              | 13896/122310 [26:05<2:57:21, 10.19it/s][A
 11%|███▊                              | 13909/122310 [26:06<2:26:01, 12.37it/s][A
 11%|███▊                              | 13917/122310 [26:06<2:31:33, 11.92

step: 16580, loss: 89.66051424870227, epoch: 1



 11%|███▉                              | 13975/122310 [26:12<2:56:39, 10.22it/s][A
 11%|███▉                              | 13981/122310 [26:13<3:07:33,  9.63it/s][A
 11%|███▉                              | 13993/122310 [26:14<2:35:04, 11.64it/s][A
 11%|███▉                              | 14000/122310 [26:14<2:44:00, 11.01it/s][A
 11%|███▉                              | 14007/122310 [26:15<2:51:31, 10.52it/s][A
 11%|███▉                              | 14013/122310 [26:16<3:03:42,  9.83it/s][A
 11%|███▉                              | 14021/122310 [26:17<2:58:12, 10.13it/s][A
 11%|███▉                              | 14026/122310 [26:17<3:17:41,  9.13it/s][A
 11%|███▉                              | 14035/122310 [26:19<3:52:58,  7.75it/s][A
 11%|███▉                              | 14042/122310 [26:20<3:40:17,  8.19it/s][A
 11%|███▉                              | 14049/122310 [26:20<3:31:54,  8.51it/s][A
 11%|███▉                              | 14056/122310 [26:21<3:25:05,  8.80

step: 16600, loss: 81.0095085561975, epoch: 1



 12%|███▉                              | 14130/122310 [26:27<2:35:33, 11.59it/s][A
 12%|███▉                              | 14136/122310 [26:28<2:50:37, 10.57it/s][A
 12%|███▉                              | 14148/122310 [26:28<2:27:13, 12.24it/s][A
 12%|███▉                              | 14156/122310 [26:29<2:31:42, 11.88it/s][A
 12%|███▉                              | 14165/122310 [26:30<2:31:08, 11.93it/s][A
 12%|███▉                              | 14169/122310 [26:31<2:59:29, 10.04it/s][A
 12%|███▉                              | 14173/122310 [26:31<3:28:13,  8.66it/s][A
 12%|███▉                              | 14177/122310 [26:32<3:54:13,  7.69it/s][A
 12%|███▉                              | 14181/122310 [26:33<4:17:59,  6.99it/s][A
 12%|███▉                              | 14187/122310 [26:34<4:04:31,  7.37it/s][A
 12%|███▉                              | 14199/122310 [26:34<2:59:18, 10.05it/s][A
 12%|███▉                              | 14203/122310 [26:35<3:27:48,  8.67

step: 16620, loss: 92.39620301589835, epoch: 1



 12%|███▉                              | 14274/122310 [26:42<2:35:36, 11.57it/s][A
 12%|███▉                              | 14282/122310 [26:42<2:38:31, 11.36it/s][A
 12%|███▉                              | 14292/122310 [26:43<2:29:58, 12.00it/s][A
 12%|███▉                              | 14301/122310 [26:44<2:28:48, 12.10it/s][A
 12%|███▉                              | 14303/122310 [26:45<3:13:42,  9.29it/s][A
 12%|███▉                              | 14311/122310 [26:45<3:04:07,  9.78it/s][A
 12%|███▉                              | 14323/122310 [26:46<2:33:37, 11.72it/s][A
 12%|███▉                              | 14326/122310 [26:47<3:11:47,  9.38it/s][A
 12%|███▉                              | 14328/122310 [26:48<4:02:37,  7.42it/s][A
 12%|███▉                              | 14336/122310 [26:48<3:33:34,  8.43it/s][A
 12%|███▉                              | 14348/122310 [26:49<2:47:21, 10.75it/s][A
 12%|███▉                              | 14352/122310 [26:50<3:17:55,  9.09

step: 16640, loss: 77.59060055626261, epoch: 1



 12%|████                              | 14456/122310 [26:56<1:46:33, 16.87it/s][A
 12%|████                              | 14467/122310 [26:57<1:50:22, 16.29it/s][A
 12%|████                              | 14474/122310 [26:58<2:06:14, 14.24it/s][A
 12%|████                              | 14488/122310 [26:59<1:54:53, 15.64it/s][A
 12%|████                              | 14497/122310 [26:59<2:02:42, 14.64it/s][A
 12%|████                              | 14504/122310 [27:00<2:17:11, 13.10it/s][A
 12%|████                              | 14511/122310 [27:01<2:29:37, 12.01it/s][A
 12%|████                              | 14523/122310 [27:02<2:15:13, 13.29it/s][A
 12%|████                              | 14532/122310 [27:02<2:18:41, 12.95it/s][A
 12%|████                              | 14544/122310 [27:03<2:08:43, 13.95it/s][A
 12%|████                              | 14553/122310 [27:04<2:13:41, 13.43it/s][A
 12%|████                              | 14562/122310 [27:05<2:17:44, 13.04

step: 16660, loss: 122.8686439598593, epoch: 1



 12%|████                              | 14633/122310 [27:11<3:00:53,  9.92it/s][A
 12%|████                              | 14636/122310 [27:12<3:33:26,  8.41it/s][A
 12%|████                              | 14645/122310 [27:13<3:09:38,  9.46it/s][A
 12%|████                              | 14647/122310 [27:13<3:57:02,  7.57it/s][A
 12%|████                              | 14655/122310 [27:14<3:31:01,  8.50it/s][A
 12%|████                              | 14672/122310 [27:15<2:21:07, 12.71it/s][A
 12%|████                              | 14686/122310 [27:16<2:03:20, 14.54it/s][A
 12%|████                              | 14695/122310 [27:16<2:09:23, 13.86it/s][A
 12%|████                              | 14699/122310 [27:17<2:35:44, 11.52it/s][A
 12%|████                              | 14710/122310 [27:18<2:21:28, 12.68it/s][A
 12%|████                              | 14721/122310 [27:18<2:13:06, 13.47it/s][A
 12%|████                              | 14730/122310 [27:19<2:15:43, 13.21

step: 16680, loss: 70.65051468608831, epoch: 1



 12%|████                              | 14811/122310 [27:26<2:15:54, 13.18it/s][A
 12%|████                              | 14821/122310 [27:26<2:15:47, 13.19it/s][A
 12%|████                              | 14829/122310 [27:27<2:24:57, 12.36it/s][A
 12%|████                              | 14832/122310 [27:28<3:03:14,  9.78it/s][A
 12%|████                              | 14836/122310 [27:29<3:32:05,  8.45it/s][A
 12%|████▏                             | 14845/122310 [27:30<3:09:22,  9.46it/s][A
 12%|████▏                             | 14852/122310 [27:30<3:10:39,  9.39it/s][A
 12%|████▏                             | 14857/122310 [27:31<3:29:31,  8.55it/s][A
 12%|████▏                             | 14866/122310 [27:32<3:07:59,  9.53it/s][A
 12%|████▏                             | 14871/122310 [27:33<3:26:43,  8.66it/s][A
 12%|████▏                             | 14880/122310 [27:33<3:06:10,  9.62it/s][A
 12%|████▏                             | 14891/122310 [27:34<2:41:23, 11.09

step: 16700, loss: 80.62087376658182, epoch: 1



 12%|████▏                             | 14967/122310 [27:41<2:46:01, 10.78it/s][A
 12%|████▏                             | 14977/122310 [27:42<2:35:28, 11.51it/s][A
 12%|████▏                             | 14989/122310 [27:42<2:20:49, 12.70it/s][A
 12%|████▏                             | 14993/122310 [27:43<2:49:24, 10.56it/s][A
 12%|████▏                             | 15005/122310 [27:44<2:27:27, 12.13it/s][A
 12%|████▏                             | 15017/122310 [27:45<2:15:46, 13.17it/s][A
 12%|████▏                             | 15023/122310 [27:45<2:33:39, 11.64it/s][A
 12%|████▏                             | 15027/122310 [27:46<3:03:31,  9.74it/s][A
 12%|████▏                             | 15038/122310 [27:47<2:40:02, 11.17it/s][A
 12%|████▏                             | 15043/122310 [27:48<3:03:14,  9.76it/s][A
 12%|████▏                             | 15051/122310 [27:48<2:58:16, 10.03it/s][A
 12%|████▏                             | 15064/122310 [27:49<2:26:50, 12.17

step: 16720, loss: 86.2582238119813, epoch: 1



 12%|████▏                             | 15153/122310 [27:56<2:08:37, 13.88it/s][A
 12%|████▏                             | 15156/122310 [27:57<2:43:30, 10.92it/s][A
 12%|████▏                             | 15168/122310 [27:58<2:24:55, 12.32it/s][A
 12%|████▏                             | 15176/122310 [27:58<2:30:01, 11.90it/s][A
 12%|████▏                             | 15183/122310 [27:59<2:38:36, 11.26it/s][A
 12%|████▏                             | 15196/122310 [28:00<2:14:31, 13.27it/s][A
 12%|████▏                             | 15213/122310 [28:01<1:49:36, 16.28it/s][A
 12%|████▏                             | 15222/122310 [28:01<1:56:46, 15.28it/s][A
 12%|████▏                             | 15227/122310 [28:02<2:19:03, 12.83it/s][A
 12%|████▏                             | 15240/122310 [28:03<2:03:35, 14.44it/s][A
 12%|████▏                             | 15248/122310 [28:03<2:12:38, 13.45it/s][A
 12%|████▏                             | 15256/122310 [28:04<2:19:58, 12.75

step: 16740, loss: 90.02770071828527, epoch: 1



 13%|████▎                             | 15330/122310 [28:11<2:32:42, 11.68it/s][A
 13%|████▎                             | 15338/122310 [28:11<2:35:46, 11.44it/s][A
 13%|████▎                             | 15346/122310 [28:12<2:38:55, 11.22it/s][A
 13%|████▎                             | 15356/122310 [28:13<2:29:03, 11.96it/s][A
 13%|████▎                             | 15364/122310 [28:14<2:33:49, 11.59it/s][A
 13%|████▎                             | 15369/122310 [28:14<2:55:20, 10.17it/s][A
 13%|████▎                             | 15373/122310 [28:16<4:25:43,  6.71it/s][A
 13%|████▎                             | 15377/122310 [28:16<4:38:35,  6.40it/s][A
 13%|████▎                             | 15390/122310 [28:17<3:10:42,  9.34it/s][A
 13%|████▎                             | 15398/122310 [28:18<3:02:36,  9.76it/s][A
 13%|████▎                             | 15402/122310 [28:19<3:29:16,  8.51it/s][A
 13%|████▎                             | 15412/122310 [28:19<2:58:51,  9.96

step: 16760, loss: 80.28001999334374, epoch: 1



 13%|████▎                             | 15486/122310 [28:25<2:05:45, 14.16it/s][A
 13%|████▎                             | 15493/122310 [28:26<2:19:00, 12.81it/s][A
 13%|████▎                             | 15497/122310 [28:27<2:48:00, 10.60it/s][A
 13%|████▎                             | 15503/122310 [28:28<3:00:43,  9.85it/s][A
 13%|████▎                             | 15506/122310 [28:28<3:38:33,  8.14it/s][A
 13%|████▎                             | 15514/122310 [28:29<3:18:46,  8.95it/s][A
 13%|████▎                             | 15533/122310 [28:30<2:07:18, 13.98it/s][A
 13%|████▎                             | 15543/122310 [28:30<2:08:24, 13.86it/s][A
 13%|████▎                             | 15556/122310 [28:31<2:00:08, 14.81it/s][A
 13%|████▎                             | 15558/122310 [28:32<2:38:06, 11.25it/s][A
 13%|████▎                             | 15568/122310 [28:33<2:28:55, 11.95it/s][A
 13%|████▎                             | 15587/122310 [28:33<1:50:24, 16.11

step: 16780, loss: 75.64527221355766, epoch: 1



 13%|████▎                             | 15665/122310 [28:40<2:58:06,  9.98it/s][A
 13%|████▎                             | 15668/122310 [28:41<3:36:03,  8.23it/s][A
 13%|████▎                             | 15676/122310 [28:42<3:17:29,  9.00it/s][A
 13%|████▎                             | 15679/122310 [28:42<3:56:05,  7.53it/s][A
 13%|████▎                             | 15688/122310 [28:43<3:18:57,  8.93it/s][A
 13%|████▎                             | 15699/122310 [28:44<2:45:26, 10.74it/s][A
 13%|████▎                             | 15706/122310 [28:44<2:51:28, 10.36it/s][A
 13%|████▎                             | 15713/122310 [28:45<2:56:07, 10.09it/s][A
 13%|████▎                             | 15718/122310 [28:46<3:15:24,  9.09it/s][A
 13%|████▎                             | 15730/122310 [28:47<2:38:04, 11.24it/s][A
 13%|████▍                             | 15739/122310 [28:47<2:34:02, 11.53it/s][A
 13%|████▍                             | 15747/122310 [28:48<2:36:41, 11.33

step: 16800, loss: 95.71914255846103, epoch: 1
sim1 and sim2 are 0.5410451620501543, 0.17932391966847558
cosine of pred and queen: 0.16933201251668972
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: france
Actual: cairo:egypt::ottawa:canada, pred: unobnoxious
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actu


 13%|████▍                             | 15815/122310 [29:06<2:21:35, 12.54it/s][A

Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: belgium
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: europe
Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: france
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size



 13%|████▎                            | 15825/122310 [30:10<73:06:56,  2.47s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.08284023668639054



 13%|████▎                            | 15834/122310 [30:10<52:37:27,  1.78s/it][A
 13%|████▎                            | 15840/122310 [30:11<42:01:08,  1.42s/it][A
 13%|████▎                            | 15848/122310 [30:12<30:32:06,  1.03s/it][A
 13%|████▎                            | 15854/122310 [30:12<24:06:25,  1.23it/s][A
 13%|████▎                            | 15866/122310 [30:13<15:04:28,  1.96it/s][A
 13%|████▎                            | 15872/122310 [30:14<12:30:04,  2.37it/s][A
 13%|████▎                            | 15877/122310 [30:15<10:46:44,  2.74it/s][A
 13%|████▎                            | 15880/122310 [30:16<11:19:46,  2.61it/s][A
 13%|████▍                             | 15888/122310 [30:17<8:08:20,  3.63it/s][A
 13%|████▍                             | 15895/122310 [30:18<6:32:21,  4.52it/s][A
 13%|████▍                             | 15905/122310 [30:18<4:49:06,  6.13it/s][A
 13%|████▍                             | 15916/122310 [30:19<3:43:30,  7.93

step: 16820, loss: 82.12532065586252, epoch: 1



 13%|████▍                             | 15983/122310 [30:25<2:28:07, 11.96it/s][A
 13%|████▍                             | 15985/122310 [30:26<3:11:38,  9.25it/s][A
 13%|████▍                             | 15993/122310 [30:26<3:01:07,  9.78it/s][A
 13%|████▍                             | 15997/122310 [30:27<3:27:45,  8.53it/s][A
 13%|████▍                             | 16007/122310 [30:28<2:56:30, 10.04it/s][A
 13%|████▍                             | 16011/122310 [30:29<3:23:42,  8.70it/s][A
 13%|████▍                             | 16021/122310 [30:29<2:53:31, 10.21it/s][A
 13%|████▍                             | 16035/122310 [30:30<2:17:27, 12.89it/s][A
 13%|████▍                             | 16039/122310 [30:31<2:47:05, 10.60it/s][A
 13%|████▍                             | 16041/122310 [30:31<3:33:07,  8.31it/s][A
 13%|████▍                             | 16058/122310 [30:32<2:18:51, 12.75it/s][A
 13%|████▍                             | 16060/122310 [30:33<3:02:08,  9.72

step: 16840, loss: 88.54309171641839, epoch: 1



 13%|████▍                             | 16120/122310 [30:39<4:02:11,  7.31it/s][A
 13%|████▍                             | 16129/122310 [30:40<3:20:26,  8.83it/s][A
 13%|████▍                             | 16136/122310 [30:41<3:15:00,  9.07it/s][A
 13%|████▍                             | 16140/122310 [30:42<3:40:34,  8.02it/s][A
 13%|████▍                             | 16142/122310 [30:42<4:35:08,  6.43it/s][A
 13%|████▍                             | 16158/122310 [30:43<2:39:44, 11.07it/s][A
 13%|████▍                             | 16161/122310 [30:44<3:16:49,  8.99it/s][A
 13%|████▍                             | 16176/122310 [30:45<2:21:51, 12.47it/s][A
 13%|████▍                             | 16181/122310 [30:45<2:45:00, 10.72it/s][A
 13%|████▍                             | 16186/122310 [30:46<3:05:11,  9.55it/s][A
 13%|████▌                             | 16191/122310 [30:47<3:24:25,  8.65it/s][A
 13%|████▌                             | 16199/122310 [30:48<3:09:24,  9.34

step: 16860, loss: 87.54357954425487, epoch: 1



 13%|████▌                             | 16247/122310 [30:54<3:12:43,  9.17it/s][A
 13%|████▌                             | 16255/122310 [30:55<3:02:33,  9.68it/s][A
 13%|████▌                             | 16266/122310 [30:56<3:22:27,  8.73it/s][A
 13%|████▌                             | 16280/122310 [30:57<2:37:11, 11.24it/s][A
 13%|████▌                             | 16288/122310 [30:58<3:17:55,  8.93it/s][A
 13%|████▌                             | 16297/122310 [30:59<3:02:19,  9.69it/s][A
 13%|████▌                             | 16307/122310 [31:00<2:46:20, 10.62it/s][A
 13%|████▌                             | 16309/122310 [31:01<3:25:36,  8.59it/s][A
 13%|████▌                             | 16325/122310 [31:01<2:25:29, 12.14it/s][A
 13%|████▌                             | 16335/122310 [31:02<2:20:36, 12.56it/s][A
 13%|████▌                             | 16343/122310 [31:03<2:26:06, 12.09it/s][A
 13%|████▌                             | 16348/122310 [31:04<2:47:04, 10.57

step: 16880, loss: 74.84354107275864, epoch: 1



 13%|████▌                             | 16415/122310 [31:09<2:02:28, 14.41it/s][A
 13%|████▌                             | 16424/122310 [31:09<2:07:19, 13.86it/s][A
 13%|████▌                             | 16437/122310 [31:10<1:56:47, 15.11it/s][A
 13%|████▌                             | 16445/122310 [31:11<2:06:15, 13.97it/s][A
 13%|████▌                             | 16466/122310 [31:12<1:35:22, 18.49it/s][A
 13%|████▌                             | 16475/122310 [31:12<1:45:46, 16.68it/s][A
 13%|████▌                             | 16482/122310 [31:13<2:00:56, 14.58it/s][A
 13%|████▌                             | 16489/122310 [31:14<2:14:07, 13.15it/s][A
 13%|████▌                             | 16496/122310 [31:15<2:25:33, 12.12it/s][A
 13%|████▌                             | 16506/122310 [31:15<2:19:46, 12.62it/s][A
 13%|████▌                             | 16510/122310 [31:16<2:47:37, 10.52it/s][A
 14%|████▌                             | 16516/122310 [31:17<2:58:53,  9.86

step: 16900, loss: 97.18856494325134, epoch: 1



 14%|████▌                             | 16591/122310 [31:23<1:57:50, 14.95it/s][A
 14%|████▌                             | 16594/122310 [31:24<2:28:45, 11.84it/s][A
 14%|████▌                             | 16610/122310 [31:25<1:58:30, 14.87it/s][A
 14%|████▌                             | 16617/122310 [31:25<2:12:03, 13.34it/s][A
 14%|████▌                             | 16622/122310 [31:26<2:34:51, 11.38it/s][A
 14%|████▌                             | 16630/122310 [31:27<2:37:04, 11.21it/s][A
 14%|████▋                             | 16639/122310 [31:28<2:31:34, 11.62it/s][A
 14%|████▋                             | 16646/122310 [31:28<2:38:59, 11.08it/s][A
 14%|████▋                             | 16657/122310 [31:29<2:23:00, 12.31it/s][A
 14%|████▋                             | 16664/122310 [31:30<2:32:36, 11.54it/s][A
 14%|████▋                             | 16671/122310 [31:30<2:40:10, 10.99it/s][A
 14%|████▋                             | 16678/122310 [31:31<2:45:59, 10.61

step: 16920, loss: 79.13981623682648, epoch: 1



 14%|████▋                             | 16756/122310 [31:38<2:31:42, 11.60it/s][A
 14%|████▋                             | 16765/122310 [31:38<2:27:48, 11.90it/s][A
 14%|████▋                             | 16776/122310 [31:39<2:16:11, 12.91it/s][A
 14%|████▋                             | 16780/122310 [31:40<2:44:09, 10.71it/s][A
 14%|████▋                             | 16789/122310 [31:40<2:36:20, 11.25it/s][A
 14%|████▋                             | 16797/122310 [31:41<2:36:48, 11.21it/s][A
 14%|████▋                             | 16804/122310 [31:42<2:43:17, 10.77it/s][A
 14%|████▋                             | 16813/122310 [31:43<2:36:09, 11.26it/s][A
 14%|████▋                             | 16823/122310 [31:43<2:26:23, 12.01it/s][A
 14%|████▋                             | 16830/122310 [31:44<2:34:38, 11.37it/s][A
 14%|████▋                             | 16841/122310 [31:45<2:20:23, 12.52it/s][A
 14%|████▋                             | 16861/122310 [31:46<1:42:53, 17.08

step: 16940, loss: 75.89152978801285, epoch: 1



 14%|████▋                             | 16917/122310 [31:52<3:16:25,  8.94it/s][A
 14%|████▋                             | 16931/122310 [31:53<2:25:33, 12.07it/s][A
 14%|████▋                             | 16944/122310 [31:54<2:06:52, 13.84it/s][A
 14%|████▋                             | 16959/122310 [31:54<1:50:21, 15.91it/s][A
 14%|████▋                             | 16967/122310 [31:55<2:01:44, 14.42it/s][A
 14%|████▋                             | 16975/122310 [31:56<2:10:52, 13.41it/s][A
 14%|████▋                             | 16991/122310 [31:56<1:49:30, 16.03it/s][A
 14%|████▋                             | 17005/122310 [31:57<1:42:57, 17.05it/s][A
 14%|████▋                             | 17016/122310 [31:58<1:46:22, 16.50it/s][A
 14%|████▋                             | 17020/122310 [31:59<2:12:59, 13.19it/s][A
 14%|████▋                             | 17023/122310 [31:59<2:47:27, 10.48it/s][A
 14%|████▋                             | 17033/122310 [32:00<2:32:50, 11.48

step: 16960, loss: 90.26676166559518, epoch: 1



 14%|████▊                             | 17128/122310 [32:07<1:40:12, 17.49it/s][A
 14%|████▊                             | 17134/122310 [32:07<1:58:35, 14.78it/s][A
 14%|████▊                             | 17146/122310 [32:08<1:54:38, 15.29it/s][A
 14%|████▊                             | 17152/122310 [32:09<2:12:36, 13.22it/s][A
 14%|████▊                             | 17157/122310 [32:09<2:34:41, 11.33it/s][A
 14%|████▊                             | 17162/122310 [32:10<2:55:14, 10.00it/s][A
 14%|████▊                             | 17169/122310 [32:11<2:56:57,  9.90it/s][A
 14%|████▊                             | 17174/122310 [32:12<3:14:12,  9.02it/s][A
 14%|████▊                             | 17183/122310 [32:12<2:54:27, 10.04it/s][A
 14%|████▊                             | 17191/122310 [32:13<2:48:57, 10.37it/s][A
 14%|████▊                             | 17202/122310 [32:14<2:28:11, 11.82it/s][A
 14%|████▊                             | 17212/122310 [32:15<3:02:51,  9.58

step: 16980, loss: 91.135740678914, epoch: 1



 14%|████▊                             | 17294/122310 [32:21<2:25:47, 12.01it/s][A
 14%|████▊                             | 17298/122310 [32:22<2:53:16, 10.10it/s][A
 14%|████▊                             | 17306/122310 [32:22<2:49:12, 10.34it/s][A
 14%|████▊                             | 17308/122310 [32:24<4:40:15,  6.24it/s][A
 14%|████▊                             | 17325/122310 [32:25<2:46:15, 10.52it/s][A
 14%|████▊                             | 17331/122310 [32:25<2:56:04,  9.94it/s][A
 14%|████▊                             | 17339/122310 [32:26<2:50:27, 10.26it/s][A
 14%|████▊                             | 17352/122310 [32:27<2:21:00, 12.41it/s][A
 14%|████▊                             | 17360/122310 [32:28<2:25:49, 12.00it/s][A
 14%|████▊                             | 17366/122310 [32:28<2:39:30, 10.96it/s][A
 14%|████▊                             | 17377/122310 [32:29<2:23:41, 12.17it/s][A
 14%|████▊                             | 17380/122310 [32:30<2:58:11,  9.81

step: 17000, loss: 88.48895064145036, epoch: 1
saving weights



 14%|████▊                             | 17434/122310 [32:36<4:18:15,  6.77it/s][A
 14%|████▊                             | 17443/122310 [32:36<3:35:39,  8.10it/s][A
 14%|████▊                             | 17445/122310 [32:37<4:18:31,  6.76it/s][A
 14%|████▊                             | 17453/122310 [32:38<3:40:31,  7.92it/s][A
 14%|████▊                             | 17462/122310 [32:38<3:10:30,  9.17it/s][A
 14%|████▊                             | 17469/122310 [32:39<3:07:00,  9.34it/s][A
 14%|████▊                             | 17486/122310 [32:40<2:10:07, 13.43it/s][A
 14%|████▊                             | 17492/122310 [32:41<2:26:23, 11.93it/s][A
 14%|████▊                             | 17496/122310 [32:41<2:53:36, 10.06it/s][A
 14%|████▊                             | 17500/122310 [32:42<3:20:36,  8.71it/s][A
 14%|████▊                             | 17505/122310 [32:43<3:34:21,  8.15it/s][A
 14%|████▊                             | 17511/122310 [32:44<3:32:30,  8.22

step: 17020, loss: 97.04898922443205, epoch: 1



 14%|████▉                             | 17569/122310 [32:51<3:55:14,  7.42it/s][A
 14%|████▉                             | 17572/122310 [32:52<4:26:49,  6.54it/s][A
 14%|████▉                             | 17583/122310 [32:52<3:14:00,  9.00it/s][A
 14%|████▉                             | 17593/122310 [32:53<2:48:30, 10.36it/s][A
 14%|████▉                             | 17599/122310 [32:54<3:00:08,  9.69it/s][A
 14%|████▉                             | 17603/122310 [32:54<3:25:40,  8.49it/s][A
 14%|████▉                             | 17609/122310 [32:55<3:28:54,  8.35it/s][A
 14%|████▉                             | 17611/122310 [32:56<4:20:10,  6.71it/s][A
 14%|████▉                             | 17614/122310 [32:57<4:52:46,  5.96it/s][A
 14%|████▉                             | 17619/122310 [32:57<4:39:26,  6.24it/s][A
 14%|████▉                             | 17630/122310 [32:58<3:15:42,  8.91it/s][A
 14%|████▉                             | 17637/122310 [32:59<3:11:10,  9.13

step: 17040, loss: 79.39753610431639, epoch: 1



 14%|████▉                             | 17720/122310 [33:05<2:29:01, 11.70it/s][A
 14%|████▉                             | 17724/122310 [33:06<2:57:10,  9.84it/s][A
 14%|████▉                             | 17729/122310 [33:07<3:14:03,  8.98it/s][A
 15%|████▉                             | 17738/122310 [33:08<2:54:02, 10.01it/s][A
 15%|████▉                             | 17755/122310 [33:08<2:04:24, 14.01it/s][A
 15%|████▉                             | 17761/122310 [33:09<2:22:14, 12.25it/s][A
 15%|████▉                             | 17774/122310 [33:10<2:04:35, 13.98it/s][A
 15%|████▉                             | 17786/122310 [33:10<1:57:50, 14.78it/s][A
 15%|████▉                             | 17796/122310 [33:11<2:00:13, 14.49it/s][A
 15%|████▉                             | 17804/122310 [33:12<2:09:04, 13.49it/s][A
 15%|████▉                             | 17812/122310 [33:13<2:16:38, 12.75it/s][A
 15%|████▉                             | 17819/122310 [33:13<2:27:19, 11.82

step: 17060, loss: 77.59791414224753, epoch: 1



 15%|████▉                             | 17898/122310 [33:20<2:31:00, 11.52it/s][A
 15%|████▉                             | 17908/122310 [33:21<2:23:32, 12.12it/s][A
 15%|████▉                             | 17921/122310 [33:21<2:08:19, 13.56it/s][A
 15%|████▉                             | 17929/122310 [33:22<2:14:43, 12.91it/s][A
 15%|████▉                             | 17938/122310 [33:23<2:15:57, 12.79it/s][A
 15%|████▉                             | 17952/122310 [33:23<1:58:42, 14.65it/s][A
 15%|████▉                             | 17971/122310 [33:24<1:37:07, 17.90it/s][A
 15%|████▉                             | 17973/122310 [33:25<2:08:15, 13.56it/s][A
 15%|████▉                             | 17979/122310 [33:26<2:24:53, 12.00it/s][A
 15%|█████                             | 17994/122310 [33:26<1:59:13, 14.58it/s][A
 15%|█████                             | 18010/122310 [33:27<1:43:30, 16.79it/s][A
 15%|█████                             | 18023/122310 [33:28<1:41:16, 17.16

step: 17080, loss: 69.72923906204134, epoch: 1



 15%|█████                             | 18077/122310 [33:34<3:03:20,  9.48it/s][A
 15%|█████                             | 18084/122310 [33:35<3:01:55,  9.55it/s][A
 15%|█████                             | 18094/122310 [33:36<2:41:06, 10.78it/s][A
 15%|█████                             | 18100/122310 [33:37<2:53:16, 10.02it/s][A
 15%|█████                             | 18108/122310 [33:37<2:47:21, 10.38it/s][A
 15%|█████                             | 18114/122310 [33:38<2:58:31,  9.73it/s][A
 15%|█████                             | 18123/122310 [33:39<2:44:54, 10.53it/s][A
 15%|█████                             | 18125/122310 [33:39<3:30:54,  8.23it/s][A
 15%|█████                             | 18135/122310 [33:40<2:55:34,  9.89it/s][A
 15%|█████                             | 18141/122310 [33:41<3:03:32,  9.46it/s][A
 15%|█████                             | 18150/122310 [33:42<2:48:16, 10.32it/s][A
 15%|█████                             | 18157/122310 [33:42<2:51:24, 10.13

step: 17100, loss: 77.14918224860492, epoch: 1



 15%|█████                             | 18262/122310 [33:49<1:59:41, 14.49it/s][A
 15%|█████                             | 18267/122310 [33:50<2:21:38, 12.24it/s][A
 15%|█████                             | 18277/122310 [33:50<2:16:24, 12.71it/s][A
 15%|█████                             | 18285/122310 [33:51<2:21:54, 12.22it/s][A
 15%|█████                             | 18288/122310 [33:52<2:57:52,  9.75it/s][A
 15%|█████                             | 18292/122310 [33:52<3:23:20,  8.53it/s][A
 15%|█████                             | 18299/122310 [33:53<3:16:18,  8.83it/s][A
 15%|█████                             | 18309/122310 [33:54<2:48:33, 10.28it/s][A
 15%|█████                             | 18318/122310 [33:55<2:39:09, 10.89it/s][A
 15%|█████                             | 18322/122310 [33:55<3:07:16,  9.25it/s][A
 15%|█████                             | 18337/122310 [33:56<2:16:44, 12.67it/s][A
 15%|█████                             | 18343/122310 [33:57<2:32:35, 11.36

step: 17120, loss: 96.78155968097776, epoch: 1



 15%|█████                             | 18415/122310 [34:03<2:34:53, 11.18it/s][A
 15%|█████                             | 18426/122310 [34:04<2:19:57, 12.37it/s][A
 15%|█████▏                            | 18437/122310 [34:05<2:10:55, 13.22it/s][A
 15%|█████▏                            | 18451/122310 [34:06<1:55:28, 14.99it/s][A
 15%|█████▏                            | 18454/122310 [34:06<2:26:53, 11.78it/s][A
 15%|█████▏                            | 18462/122310 [34:07<2:30:49, 11.48it/s][A
 15%|█████▏                            | 18469/122310 [34:08<2:37:48, 10.97it/s][A
 15%|█████▏                            | 18484/122310 [34:09<2:04:51, 13.86it/s][A
 15%|█████▏                            | 18486/122310 [34:09<2:44:18, 10.53it/s][A
 15%|█████▏                            | 18496/122310 [34:10<2:29:29, 11.57it/s][A
 15%|█████▏                            | 18500/122310 [34:11<2:57:21,  9.75it/s][A
 15%|█████▏                            | 18508/122310 [34:11<2:50:29, 10.15

step: 17140, loss: 83.84905230489093, epoch: 1



 15%|█████▏                            | 18604/122310 [34:18<1:58:40, 14.56it/s][A
 15%|█████▏                            | 18608/122310 [34:19<2:25:42, 11.86it/s][A
 15%|█████▏                            | 18614/122310 [34:19<2:39:50, 10.81it/s][A
 15%|█████▏                            | 18626/122310 [34:20<2:17:29, 12.57it/s][A
 15%|█████▏                            | 18634/122310 [34:21<2:22:41, 12.11it/s][A
 15%|█████▏                            | 18643/122310 [34:22<2:21:30, 12.21it/s][A
 15%|█████▏                            | 18665/122310 [34:22<1:37:55, 17.64it/s][A
 15%|█████▏                            | 18680/122310 [34:23<1:33:00, 18.57it/s][A
 15%|█████▏                            | 18687/122310 [34:24<1:48:36, 15.90it/s][A
 15%|█████▏                            | 18692/122310 [34:24<2:10:47, 13.20it/s][A
 15%|█████▏                            | 18703/122310 [34:25<2:06:00, 13.70it/s][A
 15%|█████▏                            | 18705/122310 [34:26<2:44:38, 10.49

step: 17160, loss: 76.5780889380224, epoch: 1



 15%|█████▏                            | 18795/122310 [34:32<1:46:22, 16.22it/s][A
 15%|█████▏                            | 18816/122310 [34:33<1:26:00, 20.06it/s][A
 15%|█████▏                            | 18822/122310 [34:34<1:44:33, 16.49it/s][A
 15%|█████▏                            | 18832/122310 [34:35<1:49:36, 15.73it/s][A
 15%|█████▏                            | 18845/122310 [34:35<1:45:05, 16.41it/s][A
 15%|█████▏                            | 18849/122310 [34:36<2:11:21, 13.13it/s][A
 15%|█████▏                            | 18855/122310 [34:37<2:28:35, 11.60it/s][A
 15%|█████▏                            | 18871/122310 [34:38<1:56:51, 14.75it/s][A
 15%|█████▏                            | 18876/122310 [34:38<2:18:57, 12.41it/s][A
 15%|█████▏                            | 18886/122310 [34:39<2:14:14, 12.84it/s][A
 15%|█████▎                            | 18897/122310 [34:40<2:07:20, 13.53it/s][A
 15%|█████▎                            | 18906/122310 [34:40<2:10:44, 13.18

step: 17180, loss: 73.68938548663871, epoch: 1



 16%|█████▎                            | 18988/122310 [34:47<2:44:42, 10.46it/s][A
 16%|█████▎                            | 18999/122310 [34:48<2:24:45, 11.89it/s][A
 16%|█████▎                            | 19007/122310 [34:48<2:28:10, 11.62it/s][A
 16%|█████▎                            | 19025/122310 [34:49<1:50:10, 15.62it/s][A
 16%|█████▎                            | 19032/122310 [34:50<2:04:30, 13.83it/s][A
 16%|█████▎                            | 19036/122310 [34:51<2:31:40, 11.35it/s][A
 16%|█████▎                            | 19051/122310 [34:51<2:02:26, 14.06it/s][A
 16%|█████▎                            | 19053/122310 [34:52<2:40:49, 10.70it/s][A
 16%|█████▎                            | 19067/122310 [34:53<2:09:46, 13.26it/s][A
 16%|█████▎                            | 19073/122310 [34:54<2:26:26, 11.75it/s][A
 16%|█████▎                            | 19081/122310 [34:54<2:28:52, 11.56it/s][A
 16%|█████▎                            | 19086/122310 [34:55<2:49:28, 10.15

step: 17200, loss: 88.4461471433994, epoch: 1
sim1 and sim2 are 0.5210990646672674, 0.1674851091267477
cosine of pred and queen: 0.13011372547051497
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: talks
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: unobnoxious
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual:


 16%|█████▎                            | 19147/122310 [35:16<2:31:34, 11.34it/s][A

Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: ruto
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: europe
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: indian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: government
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.26666666666666666
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: shall
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, pred: emblem
Actual: say:says::provide:provides, pred: emblem
Actual: scream:screams::sing:si


 16%|█████▏                           | 19157/122310 [36:15<75:17:22,  2.63s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.09467455621301775



 16%|█████▏                           | 19162/122310 [36:16<61:16:48,  2.14s/it][A
 16%|█████▏                           | 19175/122310 [36:16<36:04:08,  1.26s/it][A
 16%|█████▏                           | 19189/122310 [36:17<22:26:29,  1.28it/s][A
 16%|█████▏                           | 19200/122310 [36:18<16:07:38,  1.78it/s][A
 16%|█████▏                           | 19217/122310 [36:19<10:06:04,  2.84it/s][A
 16%|█████▎                            | 19231/122310 [36:19<7:19:14,  3.91it/s][A
 16%|█████▎                            | 19242/122310 [36:20<5:52:04,  4.88it/s][A
 16%|█████▎                            | 19253/122310 [36:21<4:45:46,  6.01it/s][A
 16%|█████▎                            | 19266/122310 [36:21<3:45:10,  7.63it/s][A
 16%|█████▎                            | 19273/122310 [36:22<3:35:43,  7.96it/s][A
 16%|█████▎                            | 19280/122310 [36:23<3:26:43,  8.31it/s][A
 16%|█████▎                            | 19286/122310 [36:24<3:26:12,  8.33

step: 17220, loss: 77.51208672010172, epoch: 1



 16%|█████▍                            | 19353/122310 [36:30<3:14:50,  8.81it/s][A
 16%|█████▍                            | 19361/122310 [36:31<3:03:37,  9.34it/s][A
 16%|█████▍                            | 19375/122310 [36:31<2:25:08, 11.82it/s][A
 16%|█████▍                            | 19386/122310 [36:32<2:15:43, 12.64it/s][A
 16%|█████▍                            | 19390/122310 [36:33<2:39:20, 10.76it/s][A
 16%|█████▍                            | 19402/122310 [36:34<2:17:57, 12.43it/s][A
 16%|█████▍                            | 19414/122310 [36:34<2:05:13, 13.69it/s][A
 16%|█████▍                            | 19429/122310 [36:36<2:19:56, 12.25it/s][A
 16%|█████▍                            | 19431/122310 [36:37<2:51:45,  9.98it/s][A
 16%|█████▍                            | 19438/122310 [36:37<2:53:09,  9.90it/s][A
 16%|█████▍                            | 19450/122310 [36:38<2:25:35, 11.77it/s][A
 16%|█████▍                            | 19455/122310 [36:39<2:43:23, 10.49

step: 17240, loss: 74.56504908554227, epoch: 1



 16%|█████▍                            | 19539/122310 [36:44<1:56:07, 14.75it/s][A
 16%|█████▍                            | 19544/122310 [36:45<2:17:25, 12.46it/s][A
 16%|█████▍                            | 19556/122310 [36:46<2:04:28, 13.76it/s][A
 16%|█████▍                            | 19563/122310 [36:46<2:15:17, 12.66it/s][A
 16%|█████▍                            | 19585/122310 [36:47<1:34:32, 18.11it/s][A
 16%|█████▍                            | 19599/122310 [36:48<1:31:51, 18.63it/s][A
 16%|█████▍                            | 19604/122310 [36:49<1:53:02, 15.14it/s][A
 16%|█████▍                            | 19613/122310 [36:49<1:58:40, 14.42it/s][A
 16%|█████▍                            | 19618/122310 [36:50<2:20:13, 12.21it/s][A
 16%|█████▍                            | 19631/122310 [36:51<2:02:04, 14.02it/s][A
 16%|█████▍                            | 19636/122310 [36:51<2:23:57, 11.89it/s][A
 16%|█████▍                            | 19639/122310 [36:52<2:58:00,  9.61

step: 17260, loss: 104.98536277930084, epoch: 1



 16%|█████▍                            | 19700/122310 [36:59<2:45:39, 10.32it/s][A
 16%|█████▍                            | 19706/122310 [36:59<2:55:14,  9.76it/s][A
 16%|█████▍                            | 19711/122310 [37:00<3:10:55,  8.96it/s][A
 16%|█████▍                            | 19717/122310 [37:01<3:14:13,  8.80it/s][A
 16%|█████▍                            | 19728/122310 [37:01<2:38:04, 10.82it/s][A
 16%|█████▍                            | 19732/122310 [37:02<3:04:43,  9.26it/s][A
 16%|█████▍                            | 19743/122310 [37:03<2:33:25, 11.14it/s][A
 16%|█████▍                            | 19754/122310 [37:04<2:17:40, 12.42it/s][A
 16%|█████▍                            | 19758/122310 [37:04<2:44:10, 10.41it/s][A
 16%|█████▍                            | 19768/122310 [37:05<2:28:39, 11.50it/s][A
 16%|█████▍                            | 19778/122310 [37:06<2:19:13, 12.27it/s][A
 16%|█████▌                            | 19791/122310 [37:06<2:01:29, 14.06

step: 17280, loss: 81.90651047674862, epoch: 1



 16%|█████▌                            | 19888/122310 [37:13<2:09:31, 13.18it/s][A
 16%|█████▌                            | 19896/122310 [37:14<2:15:01, 12.64it/s][A
 16%|█████▌                            | 19907/122310 [37:14<2:06:29, 13.49it/s][A
 16%|█████▌                            | 19920/122310 [37:15<1:54:58, 14.84it/s][A
 16%|█████▌                            | 19930/122310 [37:16<1:58:48, 14.36it/s][A
 16%|█████▌                            | 19937/122310 [37:16<2:11:52, 12.94it/s][A
 16%|█████▌                            | 19941/122310 [37:17<2:39:48, 10.68it/s][A
 16%|█████▌                            | 19945/122310 [37:18<3:07:48,  9.08it/s][A
 16%|█████▌                            | 19955/122310 [37:19<2:43:23, 10.44it/s][A
 16%|█████▌                            | 19966/122310 [37:19<2:24:55, 11.77it/s][A
 16%|█████▌                            | 19978/122310 [37:20<2:10:05, 13.11it/s][A
 16%|█████▌                            | 19982/122310 [37:21<2:37:47, 10.81

step: 17300, loss: 85.62564999018788, epoch: 1



 16%|█████▌                            | 20064/122310 [37:27<2:32:24, 11.18it/s][A
 16%|█████▌                            | 20070/122310 [37:28<2:45:21, 10.31it/s][A
 16%|█████▌                            | 20075/122310 [37:29<3:03:55,  9.26it/s][A
 16%|█████▍                           | 20079/122310 [37:39<16:57:02,  1.68it/s][A
 16%|█████▍                           | 20085/122310 [37:40<12:53:13,  2.20it/s][A
 16%|█████▌                            | 20093/122310 [37:41<9:08:19,  3.11it/s][A
 16%|█████▌                            | 20107/122310 [37:41<5:29:38,  5.17it/s][A

step: 17320, loss: 75.16156105208185, epoch: 1



 16%|█████▌                            | 20126/122310 [37:42<3:23:06,  8.38it/s][A
 16%|█████▌                            | 20131/122310 [37:43<3:29:42,  8.12it/s][A
 16%|█████▌                            | 20140/122310 [37:43<3:09:42,  8.98it/s][A
 16%|█████▌                            | 20146/122310 [37:44<3:13:24,  8.80it/s][A
 16%|█████▌                            | 20158/122310 [37:45<2:40:03, 10.64it/s][A
 16%|█████▌                            | 20165/122310 [37:46<2:44:28, 10.35it/s][A
 16%|█████▌                            | 20174/122310 [37:46<2:36:35, 10.87it/s][A
 17%|█████▌                            | 20184/122310 [37:47<2:26:07, 11.65it/s][A
 17%|█████▌                            | 20195/122310 [37:48<2:15:45, 12.54it/s][A
 17%|█████▌                            | 20199/122310 [37:49<2:41:23, 10.54it/s][A
 17%|█████▌                            | 20209/122310 [37:49<2:29:01, 11.42it/s][A
 17%|█████▌                            | 20214/122310 [37:50<2:49:23, 10.05

step: 17340, loss: 87.9768137895241, epoch: 1



 17%|█████▋                            | 20277/122310 [37:57<2:36:32, 10.86it/s][A
 17%|█████▋                            | 20281/122310 [37:58<3:59:08,  7.11it/s][A
 17%|█████▋                            | 20286/122310 [37:59<4:03:01,  7.00it/s][A
 17%|█████▋                            | 20299/122310 [38:00<2:53:38,  9.79it/s][A
 17%|█████▋                            | 20310/122310 [38:00<2:32:27, 11.15it/s][A
 17%|█████▋                            | 20316/122310 [38:01<2:45:07, 10.30it/s][A
 17%|█████▋                            | 20320/122310 [38:02<3:10:27,  8.93it/s][A
 17%|█████▋                            | 20333/122310 [38:03<2:29:06, 11.40it/s][A
 17%|█████▋                            | 20341/122310 [38:03<2:30:39, 11.28it/s][A
 17%|█████▋                            | 20347/122310 [38:04<2:44:23, 10.34it/s][A
 17%|█████▋                            | 20357/122310 [38:05<2:30:12, 11.31it/s][A
 17%|█████▋                            | 20368/122310 [38:06<2:17:22, 12.37

step: 17360, loss: 80.96390517401527, epoch: 1



 17%|█████▋                            | 20441/122310 [38:11<2:19:21, 12.18it/s][A
 17%|█████▋                            | 20451/122310 [38:12<2:14:29, 12.62it/s][A
 17%|█████▋                            | 20457/122310 [38:13<2:30:59, 11.24it/s][A
 17%|█████▋                            | 20465/122310 [38:14<2:31:39, 11.19it/s][A
 17%|█████▋                            | 20477/122310 [38:14<2:13:21, 12.73it/s][A
 17%|█████▋                            | 20495/122310 [38:15<1:45:01, 16.16it/s][A
 17%|█████▋                            | 20515/122310 [38:16<1:27:07, 19.47it/s][A
 17%|█████▋                            | 20520/122310 [38:17<1:48:24, 15.65it/s][A
 17%|█████▋                            | 20526/122310 [38:17<2:06:13, 13.44it/s][A
 17%|█████▋                            | 20547/122310 [38:18<1:34:28, 17.95it/s][A
 17%|█████▋                            | 20558/122310 [38:19<1:39:32, 17.04it/s][A
 17%|█████▋                            | 20565/122310 [38:20<1:54:33, 14.80

step: 17380, loss: 84.02440043343078, epoch: 1



 17%|█████▋                            | 20628/122310 [38:26<2:50:45,  9.92it/s][A
 17%|█████▋                            | 20633/122310 [38:27<3:08:38,  8.98it/s][A
 17%|█████▋                            | 20635/122310 [38:28<3:58:33,  7.10it/s][A
 17%|█████▋                            | 20646/122310 [38:28<2:59:22,  9.45it/s][A
 17%|█████▋                            | 20649/122310 [38:29<3:36:06,  7.84it/s][A
 17%|█████▋                            | 20655/122310 [38:30<3:33:07,  7.95it/s][A
 17%|█████▋                            | 20667/122310 [38:31<2:41:50, 10.47it/s][A
 17%|█████▋                            | 20676/122310 [38:31<2:33:48, 11.01it/s][A
 17%|█████▊                            | 20701/122310 [38:32<1:34:34, 17.91it/s][A
 17%|█████▊                            | 20706/122310 [38:33<1:55:57, 14.60it/s][A
 17%|█████▊                            | 20711/122310 [38:34<2:18:18, 12.24it/s][A
 17%|█████▊                            | 20721/122310 [38:34<2:13:48, 12.65

step: 17400, loss: 87.33114712445636, epoch: 1



 17%|█████▊                            | 20810/122310 [38:41<2:26:38, 11.54it/s][A
 17%|█████▊                            | 20826/122310 [38:42<1:56:12, 14.55it/s][A
 17%|█████▊                            | 20833/122310 [38:42<2:09:35, 13.05it/s][A
 17%|█████▊                            | 20843/122310 [38:43<2:08:07, 13.20it/s][A
 17%|█████▊                            | 20849/122310 [38:44<2:25:09, 11.65it/s][A
 17%|█████▊                            | 20856/122310 [38:45<2:33:26, 11.02it/s][A
 17%|█████▊                            | 20871/122310 [38:45<2:02:11, 13.84it/s][A
 17%|█████▊                            | 20875/122310 [38:46<2:29:06, 11.34it/s][A
 17%|█████▊                            | 20888/122310 [38:47<2:07:53, 13.22it/s][A
 17%|█████▊                            | 20899/122310 [38:48<2:02:45, 13.77it/s][A
 17%|█████▊                            | 20906/122310 [38:48<2:15:11, 12.50it/s][A
 17%|█████▊                            | 20910/122310 [38:49<2:42:49, 10.38

step: 17420, loss: 74.20014484237844, epoch: 1



 17%|█████▊                            | 20991/122310 [38:56<2:11:00, 12.89it/s][A
 17%|█████▊                            | 20998/122310 [38:56<2:21:31, 11.93it/s][A
 17%|█████▊                            | 21004/122310 [38:59<4:09:22,  6.77it/s][A
 17%|█████▊                            | 21009/122310 [38:59<4:09:11,  6.78it/s][A
 17%|█████▊                            | 21011/122310 [39:00<4:46:27,  5.89it/s][A
 17%|█████▊                            | 21019/122310 [39:01<3:54:27,  7.20it/s][A
 17%|█████▊                            | 21027/122310 [39:01<3:24:45,  8.24it/s][A
 17%|█████▊                            | 21034/122310 [39:02<3:14:39,  8.67it/s][A
 17%|█████▊                            | 21042/122310 [39:03<3:00:12,  9.37it/s][A
 17%|█████▊                            | 21051/122310 [39:04<2:43:51, 10.30it/s][A
 17%|█████▊                            | 21057/122310 [39:04<2:53:04,  9.75it/s][A
 17%|█████▊                            | 21065/122310 [39:05<2:45:17, 10.21

step: 17440, loss: 72.08407601091574, epoch: 1



 17%|█████▊                            | 21123/122310 [39:10<2:37:10, 10.73it/s][A
 17%|█████▊                            | 21134/122310 [39:11<2:19:01, 12.13it/s][A
 17%|█████▉                            | 21143/122310 [39:11<2:17:26, 12.27it/s][A
 17%|█████▉                            | 21152/122310 [39:12<2:16:04, 12.39it/s][A
 17%|█████▉                            | 21163/122310 [39:13<2:06:44, 13.30it/s][A
 17%|█████▉                            | 21176/122310 [39:14<1:54:05, 14.77it/s][A
 17%|█████▉                            | 21185/122310 [39:14<1:59:16, 14.13it/s][A
 17%|█████▉                            | 21193/122310 [39:15<2:07:01, 13.27it/s][A
 17%|█████▉                            | 21198/122310 [39:16<2:28:17, 11.36it/s][A
 17%|█████▉                            | 21203/122310 [39:16<2:47:16, 10.07it/s][A
 17%|█████▉                            | 21216/122310 [39:17<2:14:32, 12.52it/s][A
 17%|█████▉                            | 21222/122310 [39:18<2:29:23, 11.28

step: 17460, loss: 77.78055323326097, epoch: 1



 17%|█████▉                            | 21287/122310 [39:24<2:34:09, 10.92it/s][A
 17%|█████▉                            | 21293/122310 [39:25<2:45:19, 10.18it/s][A
 17%|█████▉                            | 21305/122310 [39:26<2:18:17, 12.17it/s][A
 17%|█████▉                            | 21311/122310 [39:26<2:32:25, 11.04it/s][A
 17%|█████▉                            | 21321/122310 [39:27<2:20:59, 11.94it/s][A
 17%|█████▉                            | 21330/122310 [39:28<2:18:43, 12.13it/s][A
 17%|█████▉                            | 21340/122310 [39:29<2:12:36, 12.69it/s][A
 17%|█████▉                            | 21351/122310 [39:29<2:04:22, 13.53it/s][A
 17%|█████▉                            | 21364/122310 [39:30<1:53:14, 14.86it/s][A
 17%|█████▉                            | 21376/122310 [39:31<1:48:53, 15.45it/s][A
 17%|█████▉                            | 21382/122310 [39:31<2:06:00, 13.35it/s][A
 17%|█████▉                            | 21388/122310 [39:32<2:21:54, 11.85

step: 17480, loss: 114.47409237842868, epoch: 1



 18%|█████▉                            | 21466/122310 [39:39<2:36:33, 10.74it/s][A
 18%|█████▉                            | 21481/122310 [39:39<2:04:01, 13.55it/s][A
 18%|█████▉                            | 21491/122310 [39:40<2:02:38, 13.70it/s][A
 18%|█████▉                            | 21499/122310 [39:41<2:09:17, 12.99it/s][A
 18%|█████▉                            | 21505/122310 [39:41<2:24:16, 11.64it/s][A
 18%|█████▉                            | 21513/122310 [39:42<2:26:38, 11.46it/s][A
 18%|█████▉                            | 21523/122310 [39:43<2:17:39, 12.20it/s][A
 18%|█████▉                            | 21529/122310 [39:44<2:31:46, 11.07it/s][A
 18%|█████▉                            | 21534/122310 [39:44<2:50:07,  9.87it/s][A
 18%|█████▉                            | 21542/122310 [39:45<2:43:56, 10.24it/s][A
 18%|█████▉                            | 21546/122310 [39:46<3:09:10,  8.88it/s][A
 18%|█████▉                            | 21551/122310 [39:47<4:22:10,  6.41

step: 17500, loss: 96.89107698037387, epoch: 1



 18%|██████                            | 21634/122310 [39:55<3:52:32,  7.22it/s][A
 18%|██████                            | 21637/122310 [39:56<4:10:52,  6.69it/s][A
 18%|██████                            | 21641/122310 [39:56<4:20:17,  6.45it/s][A
 18%|██████                            | 21650/122310 [39:57<3:34:00,  7.84it/s][A
 18%|██████                            | 21657/122310 [39:58<3:21:19,  8.33it/s][A
 18%|██████                            | 21663/122310 [39:59<3:21:13,  8.34it/s][A
 18%|██████                            | 21667/122310 [39:59<3:40:28,  7.61it/s][A
 18%|██████                            | 21674/122310 [40:00<3:24:11,  8.21it/s][A
 18%|██████                            | 21683/122310 [40:01<2:57:33,  9.45it/s][A
 18%|██████                            | 21692/122310 [40:01<2:43:16, 10.27it/s][A
 18%|██████                            | 21694/122310 [40:02<3:26:04,  8.14it/s][A
 18%|██████                            | 21702/122310 [40:03<3:05:29,  9.04

step: 17520, loss: 86.68105431510689, epoch: 1



 18%|██████                            | 21764/122310 [40:07<2:18:04, 12.14it/s][A
 18%|██████                            | 21777/122310 [40:08<1:59:51, 13.98it/s][A
 18%|██████                            | 21785/122310 [40:09<2:07:59, 13.09it/s][A
 18%|██████                            | 21794/122310 [40:09<2:10:01, 12.88it/s][A
 18%|██████                            | 21796/122310 [40:10<2:48:59,  9.91it/s][A
 18%|██████                            | 21799/122310 [40:11<3:23:37,  8.23it/s][A
 18%|██████                            | 21807/122310 [40:11<3:03:46,  9.11it/s][A
 18%|██████                            | 21818/122310 [40:12<2:32:36, 10.98it/s][A
 18%|██████                            | 21822/122310 [40:13<2:59:06,  9.35it/s][A
 18%|██████                            | 21829/122310 [40:14<2:56:55,  9.47it/s][A
 18%|██████                            | 21832/122310 [40:14<3:31:23,  7.92it/s][A
 18%|██████                            | 21847/122310 [40:15<2:21:22, 11.84

step: 17540, loss: 92.4116307376117, epoch: 1



 18%|██████                            | 21930/122310 [40:22<2:07:00, 13.17it/s][A
 18%|██████                            | 21940/122310 [40:22<2:05:14, 13.36it/s][A
 18%|██████                            | 21950/122310 [40:23<2:04:21, 13.45it/s][A
 18%|██████                            | 21955/122310 [40:24<2:25:51, 11.47it/s][A
 18%|██████                            | 21959/122310 [40:24<2:52:09,  9.71it/s][A
 18%|██████                            | 21962/122310 [40:25<3:28:49,  8.01it/s][A
 18%|██████                            | 21971/122310 [40:26<2:59:22,  9.32it/s][A
 18%|██████                            | 21976/122310 [40:27<3:14:48,  8.58it/s][A
 18%|██████                            | 21979/122310 [40:27<3:50:45,  7.25it/s][A
 18%|██████                            | 21983/122310 [40:28<4:09:35,  6.70it/s][A
 18%|██████                            | 21985/122310 [40:29<5:02:02,  5.54it/s][A
 18%|██████                            | 21995/122310 [40:30<3:29:48,  7.97

step: 17560, loss: 93.0157458927697, epoch: 1



 18%|██████▏                           | 22048/122310 [40:36<2:43:19, 10.23it/s][A
 18%|██████▏                           | 22057/122310 [40:37<2:33:03, 10.92it/s][A
 18%|██████▏                           | 22062/122310 [40:38<2:52:13,  9.70it/s][A
 18%|██████▏                           | 22070/122310 [40:38<2:46:17, 10.05it/s][A
 18%|██████▏                           | 22072/122310 [40:39<3:31:15,  7.91it/s][A
 18%|██████▏                           | 22080/122310 [40:40<3:08:47,  8.85it/s][A
 18%|██████▏                           | 22091/122310 [40:40<2:35:28, 10.74it/s][A
 18%|██████▏                           | 22097/122310 [40:41<2:47:02, 10.00it/s][A
 18%|██████▏                           | 22100/122310 [40:42<3:22:49,  8.23it/s][A
 18%|██████▏                           | 22113/122310 [40:43<2:29:28, 11.17it/s][A
 18%|██████▏                           | 22129/122310 [40:43<1:55:54, 14.40it/s][A
 18%|██████▏                           | 22134/122310 [40:44<2:17:24, 12.15

step: 17580, loss: 91.0450617463365, epoch: 1



 18%|██████▏                           | 22237/122310 [40:51<1:32:56, 17.94it/s][A
 18%|██████▏                           | 22246/122310 [40:51<1:42:08, 16.33it/s][A
 18%|██████▏                           | 22254/122310 [40:52<1:53:11, 14.73it/s][A
 18%|██████▏                           | 22261/122310 [40:53<2:06:08, 13.22it/s][A
 18%|██████▏                           | 22270/122310 [40:53<2:08:19, 12.99it/s][A
 18%|██████▏                           | 22283/122310 [40:54<1:55:41, 14.41it/s][A
 18%|██████▏                           | 22292/122310 [40:55<2:00:40, 13.81it/s][A
 18%|██████▏                           | 22305/122310 [40:56<1:50:46, 15.05it/s][A
 18%|██████▏                           | 22313/122310 [40:56<2:00:11, 13.87it/s][A
 18%|██████▏                           | 22319/122310 [40:57<2:15:36, 12.29it/s][A
 18%|██████▏                           | 22326/122310 [40:58<2:24:17, 11.55it/s][A
 18%|██████▏                           | 22332/122310 [40:59<2:37:33, 10.58

step: 17600, loss: 85.51726458806152, epoch: 1
sim1 and sim2 are 0.13781549557725206, 0.19053095631540168
cosine of pred and queen: 0.6872402298921386
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: talks
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharastr

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: became
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: child
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: ever
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 18%|██████                           | 22401/122310 [42:17<70:33:54,  2.54s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.08284023668639054



 18%|██████                           | 22411/122310 [42:18<48:15:26,  1.74s/it][A
 18%|██████                           | 22421/122310 [42:19<33:33:51,  1.21s/it][A
 18%|██████                           | 22430/122310 [42:19<24:28:29,  1.13it/s][A
 18%|██████                           | 22440/122310 [42:20<17:22:56,  1.60it/s][A
 18%|██████                           | 22445/122310 [42:21<14:54:36,  1.86it/s][A
 18%|██████                           | 22451/122310 [42:21<12:07:45,  2.29it/s][A
 18%|██████                           | 22456/122310 [42:22<10:19:13,  2.69it/s][A
 18%|██████▏                           | 22470/122310 [42:23<6:08:23,  4.52it/s][A
 18%|██████▏                           | 22480/122310 [42:24<4:46:53,  5.80it/s][A
 18%|██████▎                           | 22490/122310 [42:24<3:54:00,  7.11it/s][A
 18%|██████▎                           | 22508/122310 [42:25<2:38:09, 10.52it/s][A
 18%|██████▎                           | 22516/122310 [42:26<2:35:42, 10.68

step: 17620, loss: 73.70883089141448, epoch: 1



 18%|██████▎                           | 22602/122310 [42:32<1:54:34, 14.50it/s][A
 18%|██████▎                           | 22607/122310 [42:33<2:14:52, 12.32it/s][A
 18%|██████▎                           | 22612/122310 [42:34<2:34:19, 10.77it/s][A
 18%|██████▎                           | 22625/122310 [42:34<2:07:32, 13.03it/s][A
 19%|██████▎                           | 22633/122310 [42:35<2:12:54, 12.50it/s][A
 19%|██████▎                           | 22637/122310 [42:36<2:39:23, 10.42it/s][A
 19%|██████▎                           | 22644/122310 [42:36<2:41:24, 10.29it/s][A
 19%|██████▎                           | 22653/122310 [42:37<2:30:55, 11.00it/s][A
 19%|██████▎                           | 22663/122310 [42:38<2:19:21, 11.92it/s][A
 19%|██████▎                           | 22675/122310 [42:39<2:03:42, 13.42it/s][A
 19%|██████▎                           | 22682/122310 [42:39<2:14:13, 12.37it/s][A
 19%|██████▎                           | 22688/122310 [42:40<2:28:14, 11.20

step: 17640, loss: 87.78661679725788, epoch: 1



 19%|██████▎                           | 22774/122310 [42:46<2:29:12, 11.12it/s][A
 19%|██████▎                           | 22782/122310 [42:47<2:28:20, 11.18it/s][A
 19%|██████▎                           | 22792/122310 [42:48<2:17:58, 12.02it/s][A
 19%|██████▎                           | 22800/122310 [42:49<2:21:34, 11.72it/s][A
 19%|██████▎                           | 22804/122310 [42:49<2:46:12,  9.98it/s][A
 19%|██████▎                           | 22818/122310 [42:50<2:09:00, 12.85it/s][A
 19%|██████▎                           | 22825/122310 [42:51<2:18:16, 11.99it/s][A
 19%|██████▎                           | 22829/122310 [42:51<2:44:17, 10.09it/s][A
 19%|██████▎                           | 22841/122310 [42:52<2:16:52, 12.11it/s][A
 19%|██████▎                           | 22853/122310 [42:53<2:02:26, 13.54it/s][A
 19%|██████▎                           | 22865/122310 [42:53<1:54:26, 14.48it/s][A
 19%|██████▎                           | 22873/122310 [42:54<2:02:33, 13.52

step: 17660, loss: 92.49329133577234, epoch: 1



 19%|██████▍                           | 22968/122310 [43:01<2:00:38, 13.72it/s][A
 19%|██████▍                           | 22973/122310 [43:01<2:20:31, 11.78it/s][A
 19%|██████▍                           | 22977/122310 [43:02<2:46:39,  9.93it/s][A
 19%|██████▍                           | 22984/122310 [43:03<2:47:16,  9.90it/s][A
 19%|██████▍                           | 22987/122310 [43:03<3:22:02,  8.19it/s][A
 19%|██████▍                           | 22996/122310 [43:04<2:53:43,  9.53it/s][A
 19%|██████▍                           | 23001/122310 [43:05<3:08:24,  8.78it/s][A
 19%|██████▍                           | 23011/122310 [43:06<2:39:53, 10.35it/s][A
 19%|██████▍                           | 23024/122310 [43:06<2:10:04, 12.72it/s][A
 19%|██████▍                           | 23030/122310 [43:07<2:24:49, 11.43it/s][A
 19%|██████▍                           | 23039/122310 [43:08<2:20:15, 11.80it/s][A
 19%|██████▍                           | 23049/122310 [43:08<2:12:50, 12.45

step: 17680, loss: 77.59142603068376, epoch: 1



 19%|██████▍                           | 23114/122310 [43:15<3:05:50,  8.90it/s][A
 19%|██████▍                           | 23124/122310 [43:16<2:41:09, 10.26it/s][A
 19%|██████▍                           | 23129/122310 [43:16<2:56:28,  9.37it/s][A
 19%|██████▍                           | 23140/122310 [43:17<2:29:16, 11.07it/s][A
 19%|██████▍                           | 23148/122310 [43:18<2:28:38, 11.12it/s][A
 19%|██████▍                           | 23152/122310 [43:18<2:53:38,  9.52it/s][A
 19%|██████▍                           | 23164/122310 [43:19<2:21:48, 11.65it/s][A
 19%|██████▍                           | 23173/122310 [43:20<2:18:05, 11.96it/s][A
 19%|██████▍                           | 23175/122310 [43:21<2:58:50,  9.24it/s][A
 19%|██████▍                           | 23182/122310 [43:21<2:55:50,  9.40it/s][A
 19%|██████▍                           | 23191/122310 [43:22<2:39:36, 10.35it/s][A
 19%|██████▍                           | 23197/122310 [43:23<2:49:12,  9.76

step: 17700, loss: 79.87196796721025, epoch: 1



 19%|██████▍                           | 23276/122310 [43:29<2:56:11,  9.37it/s][A
 19%|██████▍                           | 23284/122310 [43:30<2:46:08,  9.93it/s][A
 19%|██████▍                           | 23291/122310 [43:31<2:46:48,  9.89it/s][A
 19%|██████▍                           | 23302/122310 [43:31<2:22:52, 11.55it/s][A
 19%|██████▍                           | 23316/122310 [43:32<1:58:15, 13.95it/s][A
 19%|██████▍                           | 23324/122310 [43:33<2:05:28, 13.15it/s][A
 19%|██████▍                           | 23330/122310 [43:33<2:20:26, 11.75it/s][A
 19%|██████▍                           | 23339/122310 [43:34<2:17:46, 11.97it/s][A
 19%|██████▍                           | 23348/122310 [43:35<2:15:11, 12.20it/s][A
 19%|██████▍                           | 23356/122310 [43:36<2:18:31, 11.91it/s][A
 19%|██████▍                           | 23367/122310 [43:36<2:07:23, 12.94it/s][A
 19%|██████▍                           | 23377/122310 [43:37<2:04:25, 13.25

step: 17720, loss: 99.06619117614441, epoch: 1



 19%|██████▌                           | 23436/122310 [43:43<2:36:54, 10.50it/s][A
 19%|██████▌                           | 23443/122310 [43:44<2:39:52, 10.31it/s][A
 19%|██████▌                           | 23452/122310 [43:45<2:29:49, 11.00it/s][A
 19%|██████▌                           | 23459/122310 [43:46<2:36:13, 10.55it/s][A
 19%|██████▌                           | 23464/122310 [43:46<2:53:16,  9.51it/s][A
 19%|██████▌                           | 23470/122310 [43:47<3:00:11,  9.14it/s][A
 19%|██████▌                           | 23483/122310 [43:48<2:20:02, 11.76it/s][A
 19%|██████▌                           | 23493/122310 [43:49<2:13:09, 12.37it/s][A
 19%|██████▌                           | 23496/122310 [43:49<2:45:36,  9.94it/s][A
 19%|██████▌                           | 23500/122310 [43:50<3:10:12,  8.66it/s][A
 19%|██████▌                           | 23508/122310 [43:51<2:54:53,  9.42it/s][A
 19%|██████▌                           | 23518/122310 [43:51<2:33:04, 10.76

step: 17740, loss: 105.44071967724685, epoch: 1



 19%|██████▌                           | 23592/122310 [43:58<2:33:09, 10.74it/s][A
 19%|██████▌                           | 23612/122310 [43:59<1:43:32, 15.89it/s][A
 19%|██████▌                           | 23621/122310 [43:59<1:49:54, 14.97it/s][A
 19%|██████▌                           | 23631/122310 [44:00<1:51:54, 14.70it/s][A
 19%|██████▌                           | 23643/122310 [44:01<1:47:29, 15.30it/s][A
 19%|██████▌                           | 23653/122310 [44:01<1:50:52, 14.83it/s][A
 19%|██████▌                           | 23664/122310 [44:02<1:49:43, 14.98it/s][A
 19%|██████▌                           | 23673/122310 [44:03<1:55:41, 14.21it/s][A
 19%|██████▌                           | 23676/122310 [44:04<2:26:38, 11.21it/s][A
 19%|██████▌                           | 23689/122310 [44:04<2:04:03, 13.25it/s][A
 19%|██████▌                           | 23694/122310 [44:05<2:24:29, 11.37it/s][A
 19%|██████▌                           | 23705/122310 [44:06<2:10:34, 12.59

step: 17760, loss: 77.55858965071457, epoch: 1



 19%|██████▌                           | 23792/122310 [44:12<1:56:54, 14.05it/s][A
 19%|██████▌                           | 23795/122310 [44:13<2:27:31, 11.13it/s][A
 19%|██████▌                           | 23801/122310 [44:14<2:39:09, 10.32it/s][A
 19%|██████▌                           | 23808/122310 [44:14<2:41:37, 10.16it/s][A
 19%|██████▌                           | 23816/122310 [44:15<2:37:25, 10.43it/s][A
 19%|██████▌                           | 23821/122310 [44:16<2:54:25,  9.41it/s][A
 19%|██████▌                           | 23825/122310 [44:16<3:18:16,  8.28it/s][A
 19%|██████▋                           | 23836/122310 [44:17<2:38:18, 10.37it/s][A
 19%|██████▋                           | 23838/122310 [44:18<3:22:11,  8.12it/s][A
 19%|██████▋                           | 23843/122310 [44:19<3:30:26,  7.80it/s][A
 20%|██████▋                           | 23854/122310 [44:19<2:42:58, 10.07it/s][A
 20%|██████▋                           | 23860/122310 [44:20<2:50:59,  9.60

step: 17780, loss: 85.45288840026485, epoch: 1



 20%|██████▋                           | 23943/122310 [44:26<2:15:07, 12.13it/s][A
 20%|██████▋                           | 23955/122310 [44:27<2:01:01, 13.54it/s][A
 20%|██████▋                           | 23965/122310 [44:28<1:59:46, 13.68it/s][A
 20%|██████▋                           | 23969/122310 [44:29<2:25:47, 11.24it/s][A
 20%|██████▋                           | 23973/122310 [44:29<2:52:03,  9.53it/s][A
 20%|██████▋                           | 23982/122310 [44:30<2:37:02, 10.43it/s][A
 20%|██████▋                           | 23995/122310 [44:31<2:08:43, 12.73it/s][A
 20%|██████▋                           | 24004/122310 [44:31<2:09:04, 12.69it/s][A
 20%|██████▋                           | 24012/122310 [44:32<2:14:31, 12.18it/s][A
 20%|██████▋                           | 24015/122310 [44:33<2:46:16,  9.85it/s][A
 20%|██████▋                           | 24020/122310 [44:34<3:03:06,  8.95it/s][A
 20%|██████▋                           | 24029/122310 [44:34<2:42:50, 10.06

step: 17800, loss: 74.18363100923547, epoch: 1



 20%|██████▋                           | 24103/122310 [44:41<2:25:29, 11.25it/s][A
 20%|██████▋                           | 24110/122310 [44:42<2:30:18, 10.89it/s][A
 20%|██████▋                           | 24122/122310 [44:42<2:09:22, 12.65it/s][A
 20%|██████▋                           | 24134/122310 [44:43<1:57:55, 13.88it/s][A
 20%|██████▋                           | 24145/122310 [44:44<1:53:53, 14.36it/s][A
 20%|██████▋                           | 24157/122310 [44:44<1:48:31, 15.07it/s][A
 20%|██████▋                           | 24166/122310 [44:45<1:54:18, 14.31it/s][A
 20%|██████▋                           | 24180/122310 [44:46<1:43:05, 15.86it/s][A
 20%|██████▋                           | 24185/122310 [44:47<2:04:05, 13.18it/s][A
 20%|██████▋                           | 24192/122310 [44:47<2:14:40, 12.14it/s][A
 20%|██████▋                           | 24200/122310 [44:48<2:18:21, 11.82it/s][A
 20%|██████▋                           | 24209/122310 [44:49<2:16:10, 12.01

step: 17820, loss: 96.3951287240543, epoch: 1



 20%|██████▋                           | 24260/122310 [44:55<3:47:24,  7.19it/s][A
 20%|██████▋                           | 24270/122310 [44:56<3:04:27,  8.86it/s][A
 20%|██████▋                           | 24273/122310 [44:57<3:33:21,  7.66it/s][A
 20%|██████▊                           | 24286/122310 [44:57<2:35:14, 10.52it/s][A
 20%|██████▊                           | 24295/122310 [44:58<2:27:08, 11.10it/s][A
 20%|██████▊                           | 24302/122310 [44:59<2:32:08, 10.74it/s][A
 20%|██████▊                           | 24312/122310 [44:59<2:19:46, 11.69it/s][A
 20%|██████▊                           | 24319/122310 [45:00<2:26:39, 11.14it/s][A
 20%|██████▊                           | 24325/122310 [45:01<2:38:10, 10.32it/s][A
 20%|██████▊                           | 24337/122310 [45:02<2:13:38, 12.22it/s][A
 20%|██████▊                           | 24353/122310 [45:02<1:47:26, 15.19it/s][A
 20%|██████▊                           | 24358/122310 [45:03<2:07:40, 12.79

step: 17840, loss: 99.50933116057917, epoch: 1



 20%|██████▊                           | 24437/122310 [45:10<2:07:50, 12.76it/s][A
 20%|██████▊                           | 24447/122310 [45:10<2:04:12, 13.13it/s][A
 20%|██████▊                           | 24461/122310 [45:11<1:48:08, 15.08it/s][A
 20%|██████▊                           | 24470/122310 [45:12<1:53:29, 14.37it/s][A
 20%|██████▊                           | 24478/122310 [45:12<2:02:00, 13.36it/s][A
 20%|██████▊                           | 24485/122310 [45:13<2:12:54, 12.27it/s][A

step: 17860, loss: 101.80892952742153, epoch: 1



 20%|██████▌                          | 24490/122310 [45:24<13:48:43,  1.97it/s][A
 20%|██████▌                          | 24492/122310 [45:25<13:22:11,  2.03it/s][A
 20%|██████▊                           | 24501/122310 [45:26<8:58:03,  3.03it/s][A
 20%|██████▊                           | 24506/122310 [45:27<7:45:31,  3.50it/s][A
 20%|██████▊                           | 24510/122310 [45:27<7:08:42,  3.80it/s][A
 20%|██████▊                           | 24520/122310 [45:28<4:53:45,  5.55it/s][A
 20%|██████▊                           | 24527/122310 [45:29<4:15:06,  6.39it/s][A
 20%|██████▊                           | 24544/122310 [45:29<2:39:47, 10.20it/s][A
 20%|██████▊                           | 24558/122310 [45:30<2:11:03, 12.43it/s][A
 20%|██████▊                           | 24563/122310 [45:31<2:27:24, 11.05it/s][A
 20%|██████▊                           | 24577/122310 [45:32<2:03:45, 13.16it/s][A
 20%|██████▊                           | 24583/122310 [45:32<2:17:09, 11.88

step: 17880, loss: 99.03150791203981, epoch: 1



 20%|██████▊                           | 24647/122310 [45:38<2:33:10, 10.63it/s][A
 20%|██████▊                           | 24650/122310 [45:40<4:56:28,  5.49it/s][A
 20%|██████▊                           | 24671/122310 [45:41<2:34:01, 10.57it/s][A
 20%|██████▊                           | 24683/122310 [45:42<2:15:31, 12.01it/s][A
 20%|██████▊                           | 24693/122310 [45:42<2:10:10, 12.50it/s][A
 20%|██████▊                           | 24700/122310 [45:43<2:18:24, 11.75it/s][A
 20%|██████▊                           | 24706/122310 [45:44<2:29:37, 10.87it/s][A
 20%|██████▊                           | 24713/122310 [45:44<2:33:58, 10.56it/s][A
 20%|██████▊                           | 24716/122310 [45:45<3:05:43,  8.76it/s][A
 20%|██████▊                           | 24727/122310 [45:46<2:32:19, 10.68it/s][A
 20%|██████▉                           | 24735/122310 [45:47<2:30:42, 10.79it/s][A
 20%|██████▉                           | 24738/122310 [45:47<3:02:54,  8.89

step: 17900, loss: 86.52826069453273, epoch: 1



 20%|██████▉                           | 24813/122310 [45:52<1:49:04, 14.90it/s][A
 20%|██████▉                           | 24816/122310 [45:53<2:18:36, 11.72it/s][A
 20%|██████▉                           | 24825/122310 [45:54<2:15:36, 11.98it/s][A
 20%|██████▉                           | 24831/122310 [45:54<2:28:46, 10.92it/s][A
 20%|██████▉                           | 24845/122310 [45:55<1:59:56, 13.54it/s][A
 20%|██████▉                           | 24848/122310 [45:56<2:31:24, 10.73it/s][A
 20%|██████▉                           | 24855/122310 [45:57<2:35:24, 10.45it/s][A
 20%|██████▉                           | 24860/122310 [45:57<2:52:42,  9.40it/s][A
 20%|██████▉                           | 24869/122310 [45:58<2:37:00, 10.34it/s][A
 20%|██████▉                           | 24883/122310 [45:59<2:04:13, 13.07it/s][A
 20%|██████▉                           | 24888/122310 [45:59<2:23:58, 11.28it/s][A
 20%|██████▉                           | 24894/122310 [46:00<2:35:15, 10.46

step: 17920, loss: 78.75097857011147, epoch: 1



 20%|██████▉                           | 24959/122310 [46:07<3:04:54,  8.77it/s][A
 20%|██████▉                           | 24960/122310 [46:07<4:06:01,  6.59it/s][A
 20%|██████▉                           | 24964/122310 [46:08<4:16:56,  6.31it/s][A
 20%|██████▉                           | 24974/122310 [46:09<3:08:14,  8.62it/s][A
 20%|██████▉                           | 24982/122310 [46:09<2:52:44,  9.39it/s][A
 20%|██████▉                           | 24989/122310 [46:10<2:50:13,  9.53it/s][A
 20%|██████▉                           | 24996/122310 [46:11<2:48:26,  9.63it/s][A
 20%|██████▉                           | 25013/122310 [46:12<1:56:52, 13.88it/s][A
 20%|██████▉                           | 25018/122310 [46:12<2:17:24, 11.80it/s][A
 20%|██████▉                           | 25024/122310 [46:13<2:30:26, 10.78it/s][A
 20%|██████▉                           | 25035/122310 [46:14<2:14:07, 12.09it/s][A
 20%|██████▉                           | 25041/122310 [46:14<2:27:55, 10.96

step: 17940, loss: 71.63207344554678, epoch: 1



 21%|██████▉                           | 25095/122310 [46:21<2:57:18,  9.14it/s][A
 21%|██████▉                           | 25101/122310 [46:22<3:00:49,  8.96it/s][A
 21%|██████▉                           | 25112/122310 [46:22<2:30:30, 10.76it/s][A
 21%|██████▉                           | 25120/122310 [46:23<2:28:37, 10.90it/s][A
 21%|██████▉                           | 25131/122310 [46:24<2:12:36, 12.21it/s][A
 21%|██████▉                           | 25141/122310 [46:24<2:07:12, 12.73it/s][A
 21%|██████▉                           | 25155/122310 [46:25<1:49:46, 14.75it/s][A
 21%|██████▉                           | 25160/122310 [46:26<2:09:53, 12.47it/s][A
 21%|██████▉                           | 25167/122310 [46:27<2:18:35, 11.68it/s][A
 21%|██████▉                           | 25177/122310 [46:27<2:11:10, 12.34it/s][A
 21%|███████                           | 25187/122310 [46:28<2:05:56, 12.85it/s][A
 21%|███████                           | 25192/122310 [46:29<2:26:10, 11.07

step: 17960, loss: 77.40656026333046, epoch: 1



 21%|███████                           | 25266/122310 [46:35<2:54:48,  9.25it/s][A
 21%|███████                           | 25273/122310 [46:37<3:45:56,  7.16it/s][A
 21%|███████                           | 25281/122310 [46:37<3:19:56,  8.09it/s][A
 21%|███████                           | 25290/122310 [46:38<2:56:09,  9.18it/s][A
 21%|███████                           | 25294/122310 [46:39<3:18:39,  8.14it/s][A
 21%|███████                           | 25298/122310 [46:40<3:39:44,  7.36it/s][A
 21%|███████                           | 25304/122310 [46:40<3:32:59,  7.59it/s][A
 21%|███████                           | 25315/122310 [46:41<2:46:22,  9.72it/s][A
 21%|███████                           | 25324/122310 [46:42<2:35:16, 10.41it/s][A
 21%|███████                           | 25331/122310 [46:43<2:38:58, 10.17it/s][A
 21%|███████                           | 25341/122310 [46:43<2:24:39, 11.17it/s][A
 21%|███████                           | 25351/122310 [46:44<2:16:18, 11.86

step: 17980, loss: 85.2504305939114, epoch: 1



 21%|███████                           | 25440/122310 [46:50<2:03:45, 13.05it/s][A
 21%|███████                           | 25447/122310 [46:51<2:14:39, 11.99it/s][A
 21%|███████                           | 25456/122310 [46:52<2:14:00, 12.05it/s][A
 21%|███████                           | 25463/122310 [46:52<2:23:04, 11.28it/s][A
 21%|███████                           | 25470/122310 [46:53<2:30:34, 10.72it/s][A
 21%|███████                           | 25488/122310 [46:54<1:49:31, 14.73it/s][A
 21%|███████                           | 25491/122310 [46:54<2:19:14, 11.59it/s][A
 21%|███████                           | 25494/122310 [46:55<2:52:43,  9.34it/s][A
 21%|███████                           | 25502/122310 [46:56<2:44:59,  9.78it/s][A
 21%|███████                           | 25509/122310 [46:57<2:46:31,  9.69it/s][A
 21%|███████                           | 25517/122310 [46:57<2:41:03, 10.02it/s][A
 21%|███████                           | 25526/122310 [46:58<2:32:37, 10.57

step: 18000, loss: 89.10445517949574, epoch: 1
sim1 and sim2 are 0.1513602035363777, 0.17797092791575553
cosine of pred and queen: 0.6122880309227791
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: two
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharast


 21%|███████                           | 25594/122310 [47:17<2:19:54, 11.52it/s][A

Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: belgium
Actual: usa:dollar::nigeria:naira, pred: cent
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: indian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accur


 21%|██████▉                          | 25596/122310 [48:20<95:31:33,  3.56s/it][A
 21%|██████▉                          | 25607/122310 [48:21<56:34:42,  2.11s/it][A
 21%|██████▉                          | 25618/122310 [48:22<36:09:53,  1.35s/it][A
 21%|██████▉                          | 25629/122310 [48:22<24:14:05,  1.11it/s][A
 21%|██████▉                          | 25637/122310 [48:23<18:26:49,  1.46it/s][A
 21%|██████▉                          | 25643/122310 [48:24<15:05:44,  1.78it/s][A
 21%|██████▉                          | 25648/122310 [48:25<12:45:45,  2.10it/s][A
 21%|██████▉                          | 25653/122310 [48:25<10:42:55,  2.51it/s][A
 21%|███████▏                          | 25661/122310 [48:26<7:52:15,  3.41it/s][A
 21%|███████▏                          | 25667/122310 [48:27<6:37:45,  4.05it/s][A
 21%|███████▏                          | 25674/122310 [48:28<5:26:21,  4.94it/s][A
 21%|███████▏                          | 25686/122310 [48:28<3:47:19,  7.08

step: 18020, loss: 90.16159467076949, epoch: 1



 21%|███████▏                          | 25797/122310 [48:36<1:46:29, 15.10it/s][A
 21%|███████▏                          | 25800/122310 [48:36<2:14:42, 11.94it/s][A
 21%|███████▏                          | 25813/122310 [48:37<1:57:24, 13.70it/s][A
 21%|███████▏                          | 25825/122310 [48:38<1:51:01, 14.48it/s][A
 21%|███████▏                          | 25831/122310 [48:39<2:07:09, 12.64it/s][A
 21%|███████▏                          | 25839/122310 [48:40<2:52:41,  9.31it/s][A
 21%|███████▏                          | 25842/122310 [48:41<3:19:49,  8.05it/s][A
 21%|███████▏                          | 25849/122310 [48:41<3:10:17,  8.45it/s][A
 21%|███████▏                          | 25855/122310 [48:42<3:11:41,  8.39it/s][A
 21%|███████▏                          | 25867/122310 [48:43<2:31:22, 10.62it/s][A
 21%|███████▏                          | 25871/122310 [48:44<2:56:50,  9.09it/s][A
 21%|███████▏                          | 25880/122310 [48:44<2:40:22, 10.02

step: 18040, loss: 80.73370016162073, epoch: 1



 21%|███████▏                          | 25932/122310 [48:50<3:59:31,  6.71it/s][A
 21%|███████▏                          | 25938/122310 [48:51<3:44:40,  7.15it/s][A
 21%|███████▏                          | 25949/122310 [48:52<2:48:40,  9.52it/s][A
 21%|███████▏                          | 25959/122310 [48:52<2:29:27, 10.74it/s][A
 21%|███████▏                          | 25963/122310 [48:53<2:54:40,  9.19it/s][A
 21%|███████▏                          | 25981/122310 [48:54<1:56:31, 13.78it/s][A
 21%|███████▏                          | 25984/122310 [48:55<2:28:15, 10.83it/s][A
 21%|███████▏                          | 25994/122310 [48:55<2:16:57, 11.72it/s][A
 21%|███████▏                          | 26004/122310 [48:56<2:10:50, 12.27it/s][A
 21%|███████▏                          | 26010/122310 [48:57<2:24:53, 11.08it/s][A
 21%|███████▏                          | 26017/122310 [48:58<2:30:59, 10.63it/s][A
 21%|███████▏                          | 26023/122310 [48:58<2:42:10,  9.90

step: 18060, loss: 85.11468612976398, epoch: 1



 21%|███████▎                          | 26129/122310 [49:05<1:40:02, 16.02it/s][A
 21%|███████▎                          | 26137/122310 [49:06<1:49:51, 14.59it/s][A
 21%|███████▎                          | 26139/122310 [49:06<2:24:36, 11.08it/s][A
 21%|███████▎                          | 26146/122310 [49:07<2:29:56, 10.69it/s][A
 21%|███████▎                          | 26150/122310 [49:08<2:55:01,  9.16it/s][A
 21%|███████▎                          | 26159/122310 [49:09<2:37:39, 10.16it/s][A
 21%|███████▎                          | 26179/122310 [49:09<1:44:12, 15.37it/s][A
 21%|███████▎                          | 26181/122310 [49:10<2:17:29, 11.65it/s][A
 21%|███████▎                          | 26187/122310 [49:11<2:30:07, 10.67it/s][A
 21%|███████▎                          | 26195/122310 [49:11<2:28:15, 10.81it/s][A
 21%|███████▎                          | 26202/122310 [49:13<3:19:18,  8.04it/s][A
 21%|███████▎                          | 26206/122310 [49:14<3:34:09,  7.48

step: 18080, loss: 110.37294807524097, epoch: 1



 21%|███████▎                          | 26271/122310 [49:19<2:08:17, 12.48it/s][A
 21%|███████▎                          | 26280/122310 [49:20<2:08:07, 12.49it/s][A
 21%|███████▎                          | 26288/122310 [49:21<2:12:41, 12.06it/s][A
 22%|███████▎                          | 26298/122310 [49:21<2:06:32, 12.65it/s][A
 22%|███████▎                          | 26309/122310 [49:22<1:59:12, 13.42it/s][A
 22%|███████▎                          | 26316/122310 [49:23<2:09:46, 12.33it/s][A
 22%|███████▎                          | 26319/122310 [49:24<2:41:57,  9.88it/s][A
 22%|███████▎                          | 26329/122310 [49:24<2:24:30, 11.07it/s][A
 22%|███████▎                          | 26339/122310 [49:25<2:14:20, 11.91it/s][A
 22%|███████▎                          | 26344/122310 [49:26<2:33:21, 10.43it/s][A
 22%|███████▎                          | 26350/122310 [49:27<2:43:03,  9.81it/s][A
 22%|███████▎                          | 26364/122310 [49:27<2:06:33, 12.64

step: 18100, loss: 81.94470906962921, epoch: 1



 22%|███████▎                          | 26447/122310 [49:34<2:20:55, 11.34it/s][A
 22%|███████▎                          | 26457/122310 [49:34<2:12:00, 12.10it/s][A
 22%|███████▎                          | 26468/122310 [49:35<2:02:56, 12.99it/s][A
 22%|███████▎                          | 26477/122310 [49:36<2:03:47, 12.90it/s][A
 22%|███████▎                          | 26490/122310 [49:37<2:24:16, 11.07it/s][A
 22%|███████▎                          | 26498/122310 [49:38<2:23:29, 11.13it/s][A
 22%|███████▎                          | 26505/122310 [49:39<2:28:29, 10.75it/s][A
 22%|███████▎                          | 26521/122310 [49:40<1:55:07, 13.87it/s][A
 22%|███████▎                          | 26530/122310 [49:40<1:58:31, 13.47it/s][A
 22%|███████▍                          | 26540/122310 [49:41<1:57:16, 13.61it/s][A
 22%|███████▍                          | 26547/122310 [49:42<2:08:17, 12.44it/s][A
 22%|███████▍                          | 26555/122310 [49:42<2:12:21, 12.06

step: 18120, loss: 90.1455181308409, epoch: 1



 22%|███████▍                          | 26615/122310 [49:48<2:47:29,  9.52it/s][A
 22%|███████▍                          | 26632/122310 [49:49<1:56:42, 13.66it/s][A
 22%|███████▍                          | 26645/122310 [49:50<1:46:20, 14.99it/s][A
 22%|███████▍                          | 26650/122310 [49:50<2:07:04, 12.55it/s][A
 22%|███████▍                          | 26656/122310 [49:51<2:21:19, 11.28it/s][A
 22%|███████▍                          | 26660/122310 [49:52<2:46:14,  9.59it/s][A
 22%|███████▍                          | 26667/122310 [49:53<2:45:39,  9.62it/s][A
 22%|███████▍                          | 26681/122310 [49:53<2:06:41, 12.58it/s][A
 22%|███████▍                          | 26687/122310 [49:54<2:21:44, 11.24it/s][A
 22%|███████▍                          | 26694/122310 [49:55<2:27:27, 10.81it/s][A
 22%|███████▍                          | 26705/122310 [49:55<2:11:38, 12.10it/s][A
 22%|███████▍                          | 26715/122310 [49:56<2:06:21, 12.61

step: 18140, loss: 79.70592615250797, epoch: 1



 22%|███████▍                          | 26797/122310 [50:03<2:20:22, 11.34it/s][A
 22%|███████▍                          | 26807/122310 [50:03<2:11:32, 12.10it/s][A
 22%|███████▍                          | 26821/122310 [50:04<1:51:27, 14.28it/s][A
 22%|███████▍                          | 26831/122310 [50:05<1:52:29, 14.15it/s][A
 22%|███████▍                          | 26843/122310 [50:06<1:47:00, 14.87it/s][A
 22%|███████▍                          | 26847/122310 [50:06<2:11:22, 12.11it/s][A
 22%|███████▍                          | 26858/122310 [50:07<2:02:35, 12.98it/s][A
 22%|███████▍                          | 26862/122310 [50:08<2:27:20, 10.80it/s][A
 22%|███████▍                          | 26876/122310 [50:08<1:58:53, 13.38it/s][A
 22%|███████▍                          | 26883/122310 [50:09<2:09:47, 12.25it/s][A
 22%|███████▍                          | 26890/122310 [50:10<2:18:30, 11.48it/s][A
 22%|███████▍                          | 26899/122310 [50:11<2:14:45, 11.80

step: 18160, loss: 75.99600462385096, epoch: 1



 22%|███████▌                          | 26983/122310 [50:17<2:05:32, 12.65it/s][A
 22%|███████▌                          | 26995/122310 [50:18<1:54:59, 13.81it/s][A
 22%|███████▌                          | 27008/122310 [50:19<1:45:24, 15.07it/s][A
 22%|███████▌                          | 27013/122310 [50:19<2:05:55, 12.61it/s][A
 22%|███████▌                          | 27021/122310 [50:20<2:10:23, 12.18it/s][A
 22%|███████▌                          | 27028/122310 [50:21<2:20:11, 11.33it/s][A
 22%|███████▌                          | 27030/122310 [50:22<3:00:07,  8.82it/s][A
 22%|███████▌                          | 27039/122310 [50:22<2:40:19,  9.90it/s][A
 22%|███████▌                          | 27043/122310 [50:23<3:05:22,  8.57it/s][A
 22%|███████▌                          | 27049/122310 [50:24<3:07:14,  8.48it/s][A
 22%|███████▌                          | 27062/122310 [50:24<2:20:13, 11.32it/s][A
 22%|███████▌                          | 27071/122310 [50:25<2:15:55, 11.68

step: 18180, loss: 71.6271989903662, epoch: 1



 22%|███████▌                          | 27150/122310 [50:32<2:08:49, 12.31it/s][A
 22%|███████▌                          | 27154/122310 [50:32<2:33:09, 10.35it/s][A
 22%|███████▌                          | 27159/122310 [50:33<2:50:22,  9.31it/s][A
 22%|███████▌                          | 27169/122310 [50:34<2:28:31, 10.68it/s][A
 22%|███████▌                          | 27187/122310 [50:35<1:46:22, 14.90it/s][A
 22%|███████▌                          | 27189/122310 [50:35<2:20:23, 11.29it/s][A
 22%|███████▌                          | 27195/122310 [50:37<3:18:02,  8.00it/s][A
 22%|███████▌                          | 27198/122310 [50:37<3:44:46,  7.05it/s][A
 22%|███████▌                          | 27207/122310 [50:38<3:07:14,  8.47it/s][A
 22%|███████▌                          | 27225/122310 [50:39<2:03:07, 12.87it/s][A
 22%|███████▌                          | 27233/122310 [50:40<2:08:08, 12.37it/s][A
 22%|███████▌                          | 27242/122310 [50:40<2:07:50, 12.39

step: 18200, loss: 96.75804712361487, epoch: 1



 22%|███████▌                          | 27291/122310 [50:46<4:20:57,  6.07it/s][A
 22%|███████▌                          | 27298/122310 [50:47<3:46:56,  6.98it/s][A
 22%|███████▌                          | 27309/122310 [50:48<2:53:40,  9.12it/s][A
 22%|███████▌                          | 27314/122310 [50:48<3:06:40,  8.48it/s][A
 22%|███████▌                          | 27321/122310 [50:49<2:59:28,  8.82it/s][A
 22%|███████▌                          | 27337/122310 [50:50<2:05:38, 12.60it/s][A
 22%|███████▌                          | 27343/122310 [50:51<2:19:34, 11.34it/s][A
 22%|███████▌                          | 27351/122310 [50:51<2:20:40, 11.25it/s][A
 22%|███████▌                          | 27365/122310 [50:52<1:56:05, 13.63it/s][A
 22%|███████▌                          | 27367/122310 [50:53<2:31:41, 10.43it/s][A
 22%|███████▌                          | 27372/122310 [50:53<2:48:46,  9.37it/s][A
 22%|███████▌                          | 27382/122310 [50:54<2:27:53, 10.70

step: 18220, loss: 75.8922797793819, epoch: 1



 22%|███████▋                          | 27464/122310 [51:01<1:48:11, 14.61it/s][A
 22%|███████▋                          | 27468/122310 [51:02<2:52:17,  9.17it/s][A
 22%|███████▋                          | 27475/122310 [51:03<2:49:37,  9.32it/s][A
 22%|███████▋                          | 27482/122310 [51:04<2:48:00,  9.41it/s][A
 22%|███████▋                          | 27490/122310 [51:04<2:40:23,  9.85it/s][A
 22%|███████▋                          | 27502/122310 [51:05<2:14:44, 11.73it/s][A
 22%|███████▋                          | 27516/122310 [51:06<1:53:50, 13.88it/s][A
 23%|███████▋                          | 27527/122310 [51:07<2:22:37, 11.08it/s][A
 23%|███████▋                          | 27531/122310 [51:08<2:42:21,  9.73it/s][A
 23%|███████▋                          | 27541/122310 [51:09<2:27:07, 10.74it/s][A
 23%|███████▋                          | 27556/122310 [51:09<1:58:36, 13.31it/s][A
 23%|███████▋                          | 27569/122310 [51:10<1:48:40, 14.53

step: 18240, loss: 80.00796248618099, epoch: 1



 23%|███████▋                          | 27613/122310 [51:15<2:18:22, 11.41it/s][A
 23%|███████▋                          | 27623/122310 [51:16<2:09:59, 12.14it/s][A
 23%|███████▋                          | 27634/122310 [51:17<2:00:58, 13.04it/s][A
 23%|███████▋                          | 27642/122310 [51:17<2:06:41, 12.45it/s][A
 23%|███████▋                          | 27654/122310 [51:18<1:55:18, 13.68it/s][A
 23%|███████▋                          | 27664/122310 [51:19<1:55:23, 13.67it/s][A
 23%|███████▋                          | 27670/122310 [51:20<2:10:12, 12.11it/s][A
 23%|███████▋                          | 27673/122310 [51:20<2:42:17,  9.72it/s][A
 23%|███████▋                          | 27682/122310 [51:21<2:29:32, 10.55it/s][A
 23%|███████▋                          | 27692/122310 [51:22<2:16:44, 11.53it/s][A
 23%|███████▋                          | 27702/122310 [51:22<2:09:00, 12.22it/s][A
 23%|███████▋                          | 27710/122310 [51:23<2:13:11, 11.84

step: 18260, loss: 74.84443410997214, epoch: 1



 23%|███████▋                          | 27751/122310 [51:30<5:11:53,  5.05it/s][A
 23%|███████▋                          | 27755/122310 [51:30<5:02:46,  5.20it/s][A
 23%|███████▋                          | 27766/122310 [51:31<3:21:19,  7.83it/s][A
 23%|███████▋                          | 27776/122310 [51:32<2:46:34,  9.46it/s][A
 23%|███████▋                          | 27791/122310 [51:33<2:05:01, 12.60it/s][A
 23%|███████▋                          | 27804/122310 [51:33<1:51:32, 14.12it/s][A
 23%|███████▋                          | 27810/122310 [51:34<2:07:01, 12.40it/s][A
 23%|███████▋                          | 27820/122310 [51:35<2:02:54, 12.81it/s][A
 23%|███████▋                          | 27825/122310 [51:35<2:21:46, 11.11it/s][A
 23%|███████▋                          | 27839/122310 [51:36<1:56:24, 13.53it/s][A
 23%|███████▋                          | 27844/122310 [51:37<2:15:59, 11.58it/s][A
 23%|███████▋                          | 27851/122310 [51:38<2:23:14, 10.99

step: 18280, loss: 90.89853037371351, epoch: 1



 23%|███████▊                          | 27919/122310 [51:44<3:04:06,  8.55it/s][A
 23%|███████▊                          | 27935/122310 [51:45<2:04:49, 12.60it/s][A
 23%|███████▊                          | 27937/122310 [51:46<2:43:45,  9.61it/s][A
 23%|███████▊                          | 27939/122310 [51:46<3:27:17,  7.59it/s][A
 23%|███████▊                          | 27948/122310 [51:47<2:55:02,  8.98it/s][A
 23%|███████▊                          | 27963/122310 [51:48<2:06:38, 12.42it/s][A
 23%|███████▊                          | 27968/122310 [51:49<2:25:45, 10.79it/s][A
 23%|███████▊                          | 27981/122310 [51:49<2:01:37, 12.93it/s][A
 23%|███████▊                          | 27984/122310 [51:50<2:32:43, 10.29it/s][A
 23%|███████▊                          | 28004/122310 [51:51<1:41:44, 15.45it/s][A
 23%|███████▊                          | 28010/122310 [51:51<1:57:58, 13.32it/s][A
 23%|███████▊                          | 28018/122310 [51:52<2:04:30, 12.62

step: 18300, loss: 103.81335688728555, epoch: 1



 23%|███████▊                          | 28075/122310 [51:59<2:22:47, 11.00it/s][A
 23%|███████▊                          | 28080/122310 [51:59<2:40:37,  9.78it/s][A
 23%|███████▊                          | 28088/122310 [52:00<2:34:40, 10.15it/s][A
 23%|███████▊                          | 28096/122310 [52:01<2:31:15, 10.38it/s][A
 23%|███████▊                          | 28102/122310 [52:02<2:41:27,  9.72it/s][A
 23%|███████▊                          | 28110/122310 [52:02<2:35:19, 10.11it/s][A
 23%|███████▊                          | 28114/122310 [52:03<2:59:06,  8.77it/s][A
 23%|███████▊                          | 28124/122310 [52:04<2:33:03, 10.26it/s][A
 23%|███████▊                          | 28130/122310 [52:05<2:42:17,  9.67it/s][A
 23%|███████▊                          | 28136/122310 [52:05<2:49:17,  9.27it/s][A
 23%|███████▊                          | 28145/122310 [52:06<2:34:12, 10.18it/s][A
 23%|███████▊                          | 28155/122310 [52:07<2:19:55, 11.21

step: 18320, loss: 70.84524595885483, epoch: 1



 23%|███████▊                          | 28233/122310 [52:13<1:55:51, 13.53it/s][A
 23%|███████▊                          | 28255/122310 [52:14<1:24:18, 18.59it/s][A
 23%|███████▊                          | 28263/122310 [52:15<1:35:58, 16.33it/s][A
 23%|███████▊                          | 28273/122310 [52:15<1:40:33, 15.59it/s][A
 23%|███████▊                          | 28283/122310 [52:16<1:44:13, 15.04it/s][A
 23%|███████▊                          | 28291/122310 [52:17<1:53:31, 13.80it/s][A
 23%|███████▊                          | 28299/122310 [52:18<2:00:26, 13.01it/s][A
 23%|███████▊                          | 28304/122310 [52:18<2:20:24, 11.16it/s][A
 23%|███████▊                          | 28317/122310 [52:19<1:59:26, 13.12it/s][A
 23%|███████▊                          | 28323/122310 [52:20<2:13:57, 11.69it/s][A
 23%|███████▉                          | 28330/122310 [52:21<2:21:45, 11.05it/s][A
 23%|███████▉                          | 28338/122310 [52:21<2:21:40, 11.05

step: 18340, loss: 100.00919444434494, epoch: 1



 23%|███████▉                          | 28410/122310 [52:28<2:47:35,  9.34it/s][A
 23%|███████▉                          | 28418/122310 [52:29<2:38:36,  9.87it/s][A
 23%|███████▉                          | 28430/122310 [52:29<2:11:45, 11.88it/s][A
 23%|███████▉                          | 28448/122310 [52:30<1:39:23, 15.74it/s][A
 23%|███████▉                          | 28453/122310 [52:31<1:59:28, 13.09it/s][A
 23%|███████▉                          | 28457/122310 [52:31<2:24:28, 10.83it/s][A
 23%|███████▉                          | 28459/122310 [52:32<3:05:34,  8.43it/s][A
 23%|███████▉                          | 28470/122310 [52:33<2:29:33, 10.46it/s][A
 23%|███████▉                          | 28480/122310 [52:34<2:16:43, 11.44it/s][A
 23%|███████▉                          | 28486/122310 [52:34<2:28:57, 10.50it/s][A
 23%|███████▉                          | 28492/122310 [52:35<2:39:20,  9.81it/s][A
 23%|███████▉                          | 28496/122310 [52:36<3:03:25,  8.52

step: 18360, loss: 89.02762047976819, epoch: 1



 23%|███████▉                          | 28566/122310 [52:42<2:26:18, 10.68it/s][A
 23%|███████▉                          | 28579/122310 [52:43<2:01:42, 12.84it/s][A
 23%|███████▉                          | 28584/122310 [52:44<2:21:20, 11.05it/s][A
 23%|███████▉                          | 28591/122310 [52:45<2:26:14, 10.68it/s][A
 23%|███████▉                          | 28610/122310 [52:45<1:41:58, 15.31it/s][A
 23%|███████▉                          | 28618/122310 [52:46<1:51:24, 14.02it/s][A
 23%|███████▉                          | 28629/122310 [52:47<1:49:10, 14.30it/s][A
 23%|███████▉                          | 28638/122310 [52:47<1:53:17, 13.78it/s][A
 23%|███████▉                          | 28644/122310 [52:48<2:08:55, 12.11it/s][A
 23%|███████▉                          | 28646/122310 [52:49<2:48:28,  9.27it/s][A
 23%|███████▉                          | 28658/122310 [52:50<2:16:10, 11.46it/s][A
 23%|███████▉                          | 28660/122310 [52:50<2:56:25,  8.85

step: 18380, loss: 72.64348197962285, epoch: 1



 24%|███████▉                          | 28743/122310 [52:57<2:25:21, 10.73it/s][A
 24%|███████▉                          | 28747/122310 [52:58<2:49:57,  9.17it/s][A
 24%|███████▉                          | 28756/122310 [52:58<2:33:47, 10.14it/s][A
 24%|███████▉                          | 28764/122310 [52:59<2:29:39, 10.42it/s][A
 24%|███████▉                          | 28775/122310 [53:00<2:11:34, 11.85it/s][A
 24%|████████                          | 28786/122310 [53:01<2:01:18, 12.85it/s][A
 24%|████████                          | 28788/122310 [53:01<2:38:27,  9.84it/s][A
 24%|████████                          | 28796/122310 [53:02<2:33:54, 10.13it/s][A
 24%|████████                          | 28804/122310 [53:03<2:30:09, 10.38it/s][A
 24%|████████                          | 28811/122310 [53:03<2:33:39, 10.14it/s][A
 24%|████████                          | 28822/122310 [53:04<2:13:36, 11.66it/s][A
 24%|████████                          | 28831/122310 [53:05<2:10:57, 11.90

step: 18400, loss: 92.25020110660826, epoch: 1
sim1 and sim2 are 0.24618666250826116, 0.1636683242955942
cosine of pred and queen: 0.6181112736026444
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma


 24%|████████                          | 28894/122310 [53:27<2:10:15, 11.95it/s][A

Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: group
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.24444444444444444
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: let
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, pred: emblem
Actual: say:says::provide:provides, pred: emblem
Actual: scream:screams::sing:sings, pred: asketh
Actual: play:plays::listen:listens, pred


 24%|███████▊                         | 28908/122310 [54:25<58:12:20,  2.24s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.09467455621301775



 24%|███████▊                         | 28916/122310 [54:26<44:12:26,  1.70s/it][A
 24%|███████▊                         | 28929/122310 [54:26<28:28:38,  1.10s/it][A
 24%|███████▊                         | 28938/122310 [54:27<21:26:08,  1.21it/s][A
 24%|███████▊                         | 28948/122310 [54:28<15:36:12,  1.66it/s][A
 24%|███████▊                         | 28962/122310 [54:29<10:17:03,  2.52it/s][A
 24%|████████                          | 28969/122310 [54:29<8:40:20,  2.99it/s][A
 24%|████████                          | 28975/122310 [54:30<7:31:25,  3.45it/s][A
 24%|████████                          | 28982/122310 [54:31<6:17:41,  4.12it/s][A
 24%|████████                          | 28988/122310 [54:32<5:32:05,  4.68it/s][A
 24%|████████                          | 28994/122310 [54:32<4:54:36,  5.28it/s][A
 24%|████████                          | 28996/122310 [54:33<5:23:59,  4.80it/s][A
 24%|████████                          | 29002/122310 [54:34<4:40:38,  5.54

step: 18420, loss: 76.16696646037155, epoch: 1



 24%|████████                          | 29056/122310 [54:40<3:09:52,  8.19it/s][A
 24%|████████                          | 29061/122310 [54:41<3:18:33,  7.83it/s][A
 24%|████████                          | 29067/122310 [54:42<3:14:44,  7.98it/s][A
 24%|████████                          | 29073/122310 [54:42<3:12:19,  8.08it/s][A
 24%|████████                          | 29080/122310 [54:43<3:01:05,  8.58it/s][A
 24%|████████                          | 29090/122310 [54:44<2:33:43, 10.11it/s][A
 24%|████████                          | 29092/122310 [54:44<3:14:33,  7.99it/s][A
 24%|████████                          | 29099/122310 [54:45<3:03:30,  8.47it/s][A
 24%|████████                          | 29111/122310 [54:46<2:22:15, 10.92it/s][A
 24%|████████                          | 29120/122310 [54:47<2:16:26, 11.38it/s][A
 24%|████████                          | 29127/122310 [54:47<2:23:04, 10.85it/s][A
 24%|████████                          | 29135/122310 [54:48<2:21:56, 10.94

step: 18440, loss: 73.44912796321034, epoch: 1



 24%|████████                          | 29210/122310 [54:55<2:13:33, 11.62it/s][A
 24%|████████                          | 29217/122310 [54:55<2:19:31, 11.12it/s][A
 24%|████████                          | 29226/122310 [54:56<2:14:43, 11.51it/s][A
 24%|████████▏                         | 29235/122310 [54:57<2:12:02, 11.75it/s][A
 24%|████████▏                         | 29253/122310 [54:57<1:39:38, 15.57it/s][A
 24%|████████▏                         | 29262/122310 [54:58<1:45:39, 14.68it/s][A
 24%|████████▏                         | 29264/122310 [54:59<2:18:51, 11.17it/s][A
 24%|████████▏                         | 29275/122310 [55:00<2:05:25, 12.36it/s][A
 24%|████████▏                         | 29280/122310 [55:00<2:24:58, 10.69it/s][A
 24%|████████▏                         | 29287/122310 [55:01<2:29:38, 10.36it/s][A
 24%|████████▏                         | 29291/122310 [55:02<2:53:45,  8.92it/s][A
 24%|████████▏                         | 29303/122310 [55:02<2:17:54, 11.24

step: 18460, loss: 76.08399441532845, epoch: 1



 24%|████████▏                         | 29380/122310 [55:09<2:14:31, 11.51it/s][A
 24%|████████▏                         | 29394/122310 [55:10<1:51:26, 13.90it/s][A
 24%|████████▏                         | 29399/122310 [55:10<2:10:58, 11.82it/s][A
 24%|████████▏                         | 29401/122310 [55:11<2:51:01,  9.05it/s][A
 24%|████████▏                         | 29410/122310 [55:12<2:33:59, 10.05it/s][A
 24%|████████▏                         | 29420/122310 [55:13<2:18:38, 11.17it/s][A
 24%|████████▏                         | 29429/122310 [55:13<2:13:43, 11.58it/s][A
 24%|████████▏                         | 29438/122310 [55:14<2:10:44, 11.84it/s][A
 24%|████████▏                         | 29449/122310 [55:15<2:00:04, 12.89it/s][A
 24%|████████▏                         | 29460/122310 [55:15<1:53:45, 13.60it/s][A
 24%|████████▏                         | 29462/122310 [55:16<2:30:48, 10.26it/s][A
 24%|████████▏                         | 29470/122310 [55:17<2:26:42, 10.55

step: 18480, loss: 78.96544661944307, epoch: 1



 24%|████████▏                         | 29538/122310 [55:23<2:41:46,  9.56it/s][A
 24%|████████▏                         | 29555/122310 [55:24<1:51:48, 13.83it/s][A
 24%|████████▏                         | 29562/122310 [55:25<2:02:51, 12.58it/s][A
 24%|████████▏                         | 29572/122310 [55:26<1:58:59, 12.99it/s][A
 24%|████████▏                         | 29583/122310 [55:26<1:53:50, 13.58it/s][A
 24%|████████▏                         | 29592/122310 [55:27<1:56:10, 13.30it/s][A
 24%|████████▏                         | 29609/122310 [55:28<1:34:33, 16.34it/s][A
 24%|████████▏                         | 29613/122310 [55:29<1:57:53, 13.11it/s][A
 24%|████████▏                         | 29627/122310 [55:29<1:43:16, 14.96it/s][A
 24%|████████▏                         | 29635/122310 [55:30<1:51:38, 13.83it/s][A
 24%|████████▏                         | 29647/122310 [55:31<1:45:18, 14.66it/s][A
 24%|████████▏                         | 29653/122310 [55:31<2:01:00, 12.76

step: 18500, loss: 78.44905123676028, epoch: 1



 24%|████████▎                         | 29739/122310 [55:38<1:59:42, 12.89it/s][A
 24%|████████▎                         | 29749/122310 [55:39<1:57:14, 13.16it/s][A
 24%|████████▎                         | 29758/122310 [55:39<1:58:32, 13.01it/s][A
 24%|████████▎                         | 29769/122310 [55:40<1:52:50, 13.67it/s][A
 24%|████████▎                         | 29779/122310 [55:41<1:52:20, 13.73it/s][A
 24%|████████▎                         | 29783/122310 [55:42<2:16:53, 11.26it/s][A
 24%|████████▎                         | 29793/122310 [55:42<2:08:07, 12.03it/s][A
 24%|████████▎                         | 29800/122310 [55:43<2:16:08, 11.33it/s][A
 24%|████████▎                         | 29807/122310 [55:44<2:22:53, 10.79it/s][A
 24%|████████▎                         | 29820/122310 [55:44<1:59:02, 12.95it/s][A
 24%|████████▎                         | 29833/122310 [55:45<1:49:35, 14.06it/s][A
 24%|████████▎                         | 29837/122310 [55:46<2:13:26, 11.55

step: 18520, loss: 80.26942567320559, epoch: 1



 24%|████████▎                         | 29914/122310 [55:52<2:12:52, 11.59it/s][A
 24%|████████▎                         | 29926/122310 [55:53<1:57:17, 13.13it/s][A
 24%|████████▎                         | 29930/122310 [55:54<2:21:58, 10.84it/s][A
 24%|████████▎                         | 29938/122310 [55:55<2:21:16, 10.90it/s][A
 24%|████████▎                         | 29945/122310 [55:55<2:27:21, 10.45it/s][A
 24%|████████▎                         | 29952/122310 [55:56<2:31:26, 10.16it/s][A
 25%|████████▎                         | 29974/122310 [55:57<1:34:59, 16.20it/s][A
 25%|████████▎                         | 29980/122310 [55:58<1:51:47, 13.77it/s][A
 25%|████████▎                         | 29982/122310 [55:58<2:26:41, 10.49it/s][A
 25%|████████▎                         | 29996/122310 [55:59<1:58:18, 13.00it/s][A
 25%|████████▎                         | 30005/122310 [56:00<1:59:14, 12.90it/s][A
 25%|████████▎                         | 30013/122310 [56:00<2:04:39, 12.34

step: 18540, loss: 71.30264487471439, epoch: 1



 25%|████████▎                         | 30071/122310 [56:07<2:39:47,  9.62it/s][A
 25%|████████▎                         | 30079/122310 [56:08<2:32:56, 10.05it/s][A
 25%|████████▎                         | 30087/122310 [56:08<2:29:31, 10.28it/s][A
 25%|████████▎                         | 30101/122310 [56:09<1:58:31, 12.97it/s][A
 25%|████████▎                         | 30108/122310 [56:10<2:07:37, 12.04it/s][A
 25%|████████▎                         | 30120/122310 [56:11<1:54:30, 13.42it/s][A
 25%|████████▍                         | 30129/122310 [56:11<1:56:53, 13.14it/s][A
 25%|████████▍                         | 30147/122310 [56:12<1:32:25, 16.62it/s][A
 25%|████████▍                         | 30155/122310 [56:13<1:43:14, 14.88it/s][A
 25%|████████▍                         | 30159/122310 [56:14<2:06:41, 12.12it/s][A
 25%|████████▍                         | 30168/122310 [56:14<2:06:24, 12.15it/s][A
 25%|████████▍                         | 30172/122310 [56:15<2:30:44, 10.19

step: 18560, loss: 180.27593544230785, epoch: 1



 25%|████████▍                         | 30248/122310 [56:22<2:26:48, 10.45it/s][A
 25%|████████▍                         | 30255/122310 [56:22<2:29:32, 10.26it/s][A
 25%|████████▍                         | 30268/122310 [56:23<2:02:40, 12.50it/s][A
 25%|████████▍                         | 30280/122310 [56:24<1:51:36, 13.74it/s][A
 25%|████████▍                         | 30283/122310 [56:25<3:03:48,  8.34it/s][A
 25%|████████▍                         | 30286/122310 [56:26<3:29:29,  7.32it/s][A
 25%|████████▍                         | 30294/122310 [56:27<3:04:32,  8.31it/s][A
 25%|████████▍                         | 30306/122310 [56:27<2:25:53, 10.51it/s][A
 25%|████████▍                         | 30315/122310 [56:28<2:18:35, 11.06it/s][A
 25%|████████▍                         | 30326/122310 [56:29<2:05:01, 12.26it/s][A
 25%|████████▍                         | 30335/122310 [56:30<2:04:36, 12.30it/s][A
 25%|████████▍                         | 30340/122310 [56:30<2:22:47, 10.73

step: 18580, loss: 78.35607716853141, epoch: 1



 25%|████████▍                         | 30421/122310 [56:36<1:50:46, 13.82it/s][A
 25%|████████▍                         | 30428/122310 [56:37<2:01:47, 12.57it/s][A
 25%|████████▍                         | 30430/122310 [56:38<2:38:37,  9.65it/s][A
 25%|████████▍                         | 30434/122310 [56:38<3:02:08,  8.41it/s][A
 25%|████████▍                         | 30441/122310 [56:39<2:54:17,  8.78it/s][A
 25%|████████▍                         | 30454/122310 [56:40<2:12:41, 11.54it/s][A
 25%|████████▍                         | 30469/122310 [56:40<1:47:00, 14.30it/s][A
 25%|████████▍                         | 30475/122310 [56:41<2:02:13, 12.52it/s][A
 25%|████████▍                         | 30487/122310 [56:42<1:51:23, 13.74it/s][A
 25%|████████▍                         | 30492/122310 [56:43<2:10:34, 11.72it/s][A
 25%|████████▍                         | 30503/122310 [56:43<2:00:08, 12.74it/s][A
 25%|████████▍                         | 30514/122310 [56:44<1:53:39, 13.46

step: 18600, loss: 82.3435086982328, epoch: 1



 25%|████████▌                         | 30618/122310 [56:51<1:22:24, 18.54it/s][A
 25%|████████▌                         | 30633/122310 [56:51<1:19:36, 19.19it/s][A
 25%|████████▌                         | 30648/122310 [56:52<1:17:38, 19.67it/s][A
 25%|████████▌                         | 30659/122310 [56:53<1:23:24, 18.31it/s][A
 25%|████████▌                         | 30672/122310 [56:53<1:23:50, 18.22it/s][A
 25%|████████▌                         | 30681/122310 [56:54<1:32:50, 16.45it/s][A
 25%|████████▌                         | 30686/122310 [56:55<1:51:59, 13.64it/s][A
 25%|████████▌                         | 30698/122310 [56:56<1:45:17, 14.50it/s][A
 25%|████████▌                         | 30703/122310 [56:56<2:05:32, 12.16it/s][A
 25%|████████▌                         | 30711/122310 [56:57<2:09:34, 11.78it/s][A
 25%|████████▌                         | 30713/122310 [56:58<2:47:47,  9.10it/s][A
 25%|████████▌                         | 30719/122310 [56:59<2:52:08,  8.87

step: 18620, loss: 94.23406472000016, epoch: 1



 25%|████████▌                         | 30800/122310 [57:05<1:52:17, 13.58it/s][A
 25%|████████▌                         | 30806/122310 [57:06<2:06:16, 12.08it/s][A
 25%|████████▌                         | 30820/122310 [57:07<1:47:08, 14.23it/s][A
 25%|████████▌                         | 30828/122310 [57:07<1:54:40, 13.30it/s][A
 25%|████████▌                         | 30835/122310 [57:08<2:04:56, 12.20it/s][A
 25%|████████▌                         | 30839/122310 [57:09<2:29:41, 10.18it/s][A
 25%|████████▌                         | 30842/122310 [57:09<3:01:52,  8.38it/s][A
 25%|████████▌                         | 30848/122310 [57:10<3:02:38,  8.35it/s][A
 25%|████████▌                         | 30851/122310 [57:11<3:34:54,  7.09it/s][A
 25%|████████▌                         | 30863/122310 [57:12<2:33:46,  9.91it/s][A
 25%|████████▌                         | 30868/122310 [57:12<2:48:48,  9.03it/s][A
 25%|████████▌                         | 30872/122310 [57:13<3:11:30,  7.96

step: 18640, loss: 79.7491183696963, epoch: 1



 25%|████████▌                         | 30966/122310 [57:20<1:37:10, 15.67it/s][A
 25%|████████▌                         | 30974/122310 [57:20<1:46:29, 14.29it/s][A
 25%|████████▌                         | 30982/122310 [57:21<1:53:53, 13.36it/s][A
 25%|████████▌                         | 30996/122310 [57:22<1:40:26, 15.15it/s][A
 25%|████████▌                         | 31012/122310 [57:22<1:28:40, 17.16it/s][A
 25%|████████▌                         | 31019/122310 [57:23<1:42:02, 14.91it/s][A
 25%|████████▋                         | 31033/122310 [57:24<1:33:43, 16.23it/s][A
 25%|████████▋                         | 31048/122310 [57:25<1:26:42, 17.54it/s][A
 25%|████████▋                         | 31061/122310 [57:25<1:26:23, 17.60it/s][A
 25%|████████▋                         | 31070/122310 [57:26<1:34:57, 16.01it/s][A
 25%|████████▋                         | 31079/122310 [57:28<2:12:31, 11.47it/s][A
 25%|████████▋                         | 31085/122310 [57:28<2:21:39, 10.73

step: 18660, loss: 77.633725177925, epoch: 1



 25%|████████▋                         | 31160/122310 [57:34<1:48:59, 13.94it/s][A
 25%|████████▋                         | 31171/122310 [57:35<1:45:54, 14.34it/s][A
 25%|████████▋                         | 31179/122310 [57:36<1:53:39, 13.36it/s][A
 25%|████████▋                         | 31184/122310 [57:36<2:12:53, 11.43it/s][A
 26%|████████▋                         | 31190/122310 [57:37<2:25:37, 10.43it/s][A
 26%|████████▋                         | 31193/122310 [57:38<2:57:41,  8.55it/s][A
 26%|████████▋                         | 31197/122310 [57:38<3:18:35,  7.65it/s][A
 26%|████████▋                         | 31201/122310 [57:39<3:37:06,  6.99it/s][A
 26%|████████▋                         | 31210/122310 [57:40<2:56:22,  8.61it/s][A
 26%|████████▋                         | 31214/122310 [57:41<3:17:40,  7.68it/s][A
 26%|████████▋                         | 31217/122310 [57:41<3:49:23,  6.62it/s][A
 26%|████████▋                         | 31227/122310 [57:42<2:53:03,  8.77

step: 18680, loss: 82.06595769833129, epoch: 1



 26%|████████▋                         | 31286/122310 [57:49<3:53:57,  6.48it/s][A
 26%|████████▋                         | 31296/122310 [57:49<2:54:37,  8.69it/s][A
 26%|████████▋                         | 31304/122310 [57:50<2:41:28,  9.39it/s][A
 26%|████████▋                         | 31314/122310 [57:51<2:21:44, 10.70it/s][A
 26%|████████▋                         | 31324/122310 [57:52<2:10:20, 11.63it/s][A
 26%|████████▋                         | 31340/122310 [57:52<1:42:36, 14.78it/s][A
 26%|████████▋                         | 31349/122310 [57:53<1:48:04, 14.03it/s][A
 26%|████████▋                         | 31360/122310 [57:54<1:45:37, 14.35it/s][A
 26%|████████▋                         | 31374/122310 [57:54<1:35:29, 15.87it/s][A
 26%|████████▋                         | 31385/122310 [57:55<1:37:25, 15.55it/s][A
 26%|████████▋                         | 31388/122310 [57:56<2:04:59, 12.12it/s][A
 26%|████████▋                         | 31397/122310 [57:57<2:04:38, 12.16

step: 18700, loss: 73.62130955894354, epoch: 1



 26%|████████▊                         | 31484/122310 [58:03<2:18:17, 10.95it/s][A
 26%|████████▊                         | 31495/122310 [58:04<2:04:04, 12.20it/s][A
 26%|████████▊                         | 31498/122310 [58:05<2:34:24,  9.80it/s][A
 26%|████████▊                         | 31510/122310 [58:05<2:07:59, 11.82it/s][A
 26%|████████▊                         | 31526/122310 [58:06<1:41:46, 14.87it/s][A
 26%|████████▊                         | 31530/122310 [58:07<2:05:20, 12.07it/s][A
 26%|████████▊                         | 31546/122310 [58:08<1:40:23, 15.07it/s][A
 26%|████████▊                         | 31550/122310 [58:08<2:03:45, 12.22it/s][A
 26%|████████▊                         | 31570/122310 [58:09<1:29:50, 16.83it/s][A
 26%|████████▊                         | 31580/122310 [58:10<1:35:04, 15.90it/s][A
 26%|████████▊                         | 31594/122310 [58:10<1:29:18, 16.93it/s][A
 26%|████████▊                         | 31601/122310 [58:11<1:43:06, 14.66

step: 18720, loss: 95.87184216958786, epoch: 1



 26%|████████▊                         | 31659/122310 [58:18<3:06:59,  8.08it/s][A
 26%|████████▊                         | 31664/122310 [58:19<3:15:12,  7.74it/s][A
 26%|████████▊                         | 31675/122310 [58:19<2:31:50,  9.95it/s][A
 26%|████████▊                         | 31678/122310 [58:20<3:03:35,  8.23it/s][A
 26%|████████▊                         | 31683/122310 [58:21<3:12:40,  7.84it/s][A
 26%|████████▊                         | 31693/122310 [58:21<2:37:09,  9.61it/s][A
 26%|████████▊                         | 31702/122310 [58:22<2:24:23, 10.46it/s][A
 26%|████████▊                         | 31712/122310 [58:23<2:11:52, 11.45it/s][A
 26%|████████▊                         | 31721/122310 [58:24<2:08:40, 11.73it/s][A
 26%|████████▊                         | 31732/122310 [58:24<1:58:27, 12.74it/s][A
 26%|████████▊                         | 31739/122310 [58:25<2:07:27, 11.84it/s][A
 26%|████████▊                         | 31746/122310 [58:26<2:14:51, 11.19

step: 18740, loss: 80.20072333458785, epoch: 1



 26%|████████▊                         | 31832/122310 [58:32<1:53:03, 13.34it/s][A
 26%|████████▊                         | 31840/122310 [58:33<1:59:01, 12.67it/s][A
 26%|████████▊                         | 31849/122310 [58:34<1:59:52, 12.58it/s][A
 26%|████████▊                         | 31855/122310 [58:34<2:13:28, 11.29it/s][A
 26%|████████▊                         | 31867/122310 [58:35<1:57:01, 12.88it/s][A
 26%|████████▊                         | 31883/122310 [58:36<1:36:27, 15.62it/s][A
 26%|████████▊                         | 31888/122310 [58:37<1:55:40, 13.03it/s][A
 26%|████████▊                         | 31898/122310 [58:37<1:53:36, 13.26it/s][A
 26%|████████▊                         | 31913/122310 [58:38<1:37:16, 15.49it/s][A
 26%|████████▉                         | 31930/122310 [58:39<1:24:16, 17.88it/s][A
 26%|████████▉                         | 31933/122310 [58:40<1:50:18, 13.65it/s][A
 26%|████████▉                         | 31937/122310 [58:40<2:13:39, 11.27

step: 18760, loss: 87.94078641029115, epoch: 1



 26%|████████▉                         | 32033/122310 [58:47<1:34:11, 15.98it/s][A
 26%|████████▉                         | 32035/122310 [58:48<2:04:57, 12.04it/s][A
 26%|████████▉                         | 32041/122310 [58:48<2:17:42, 10.92it/s][A
 26%|████████▉                         | 32056/122310 [58:49<1:48:28, 13.87it/s][A
 26%|████████▉                         | 32073/122310 [58:50<1:29:52, 16.73it/s][A
 26%|████████▉                         | 32076/122310 [58:50<1:55:46, 12.99it/s][A
 26%|████████▉                         | 32085/122310 [58:51<1:57:42, 12.77it/s][A
 26%|████████▉                         | 32098/122310 [58:52<1:44:45, 14.35it/s][A
 26%|████████▉                         | 32102/122310 [58:53<2:09:00, 11.65it/s][A
 26%|████████▉                         | 32106/122310 [58:53<2:34:11,  9.75it/s][A
 26%|████████▉                         | 32113/122310 [58:55<3:21:04,  7.48it/s][A
 26%|████████▉                         | 32120/122310 [58:56<3:07:29,  8.02

step: 18780, loss: 79.53381953966014, epoch: 1



 26%|████████▉                         | 32187/122310 [59:01<2:00:04, 12.51it/s][A
 26%|████████▉                         | 32199/122310 [59:02<1:49:29, 13.72it/s][A
 26%|████████▉                         | 32210/122310 [59:03<1:45:50, 14.19it/s][A
 26%|████████▉                         | 32220/122310 [59:04<1:46:52, 14.05it/s][A
 26%|████████▉                         | 32231/122310 [59:04<1:44:21, 14.39it/s][A
 26%|████████▉                         | 32240/122310 [59:05<1:49:31, 13.71it/s][A
 26%|████████▉                         | 32244/122310 [59:06<2:13:47, 11.22it/s][A
 26%|████████▉                         | 32247/122310 [59:06<2:44:44,  9.11it/s][A
 26%|████████▉                         | 32256/122310 [59:07<2:28:56, 10.08it/s][A
 26%|████████▉                         | 32270/122310 [59:08<1:56:53, 12.84it/s][A
 26%|████████▉                         | 32278/122310 [59:09<2:01:38, 12.34it/s][A
 26%|████████▉                         | 32285/122310 [59:09<2:10:41, 11.48

step: 18800, loss: 87.20981031274317, epoch: 1
sim1 and sim2 are 0.17060074249656113, 0.19234885102165014
cosine of pred and queen: 0.6198664356136644
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: argentina
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: vallecillo
Actual: m


 26%|████████▉                         | 32337/122310 [59:27<2:13:12, 11.26it/s][A

Actual: syria:arabic::australia:english, pred: football
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: led
Ac


 26%|████████▏                      | 32354/122310 [1:00:30<53:17:21,  2.13s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.10059171597633136



 26%|████████▏                      | 32357/122310 [1:00:30<48:14:51,  1.93s/it][A
 26%|████████▏                      | 32359/122310 [1:00:31<44:38:22,  1.79s/it][A
 26%|████████▏                      | 32365/122310 [1:00:32<32:51:10,  1.31s/it][A
 26%|████████▏                      | 32367/122310 [1:00:32<30:00:15,  1.20s/it][A
 26%|████████▏                      | 32379/122310 [1:00:33<15:37:24,  1.60it/s][A
 26%|████████▍                       | 32393/122310 [1:00:34<9:04:12,  2.75it/s][A
 26%|████████▍                       | 32405/122310 [1:00:35<6:21:00,  3.93it/s][A
 26%|████████▍                       | 32411/122310 [1:00:35<5:39:31,  4.41it/s][A
 27%|████████▍                       | 32417/122310 [1:00:36<5:03:33,  4.94it/s][A
 27%|████████▍                       | 32427/122310 [1:00:37<3:54:37,  6.38it/s][A
 27%|████████▍                       | 32443/122310 [1:00:37<2:40:28,  9.33it/s][A
 27%|████████▍                       | 32449/122310 [1:00:38<2:44:21,  9.11

step: 18820, loss: 80.52631856830659, epoch: 1



 27%|████████▌                       | 32515/122310 [1:00:45<2:03:41, 12.10it/s][A
 27%|████████▌                       | 32525/122310 [1:00:45<1:58:23, 12.64it/s][A
 27%|████████▌                       | 32532/122310 [1:00:46<2:06:34, 11.82it/s][A
 27%|████████▌                       | 32545/122310 [1:00:47<1:49:28, 13.67it/s][A
 27%|████████▌                       | 32555/122310 [1:00:47<1:48:48, 13.75it/s][A
 27%|████████▌                       | 32563/122310 [1:00:48<1:55:21, 12.97it/s][A
 27%|████████▌                       | 32574/122310 [1:00:49<1:49:35, 13.65it/s][A
 27%|████████▌                       | 32584/122310 [1:00:50<1:49:39, 13.64it/s][A
 27%|████████▌                       | 32588/122310 [1:00:50<2:13:41, 11.18it/s][A
 27%|████████▌                       | 32593/122310 [1:00:51<2:30:03,  9.96it/s][A
 27%|████████▌                       | 32600/122310 [1:00:52<2:31:28,  9.87it/s][A
 27%|████████▌                       | 32604/122310 [1:00:53<2:55:23,  8.52

step: 18840, loss: 78.00328809897754, epoch: 1



 27%|████████▌                       | 32663/122310 [1:00:59<2:26:19, 10.21it/s][A
 27%|████████▌                       | 32668/122310 [1:01:00<2:41:31,  9.25it/s][A
 27%|████████▌                       | 32677/122310 [1:01:00<2:25:50, 10.24it/s][A
 27%|████████▌                       | 32687/122310 [1:01:02<2:50:48,  8.74it/s][A
 27%|████████▌                       | 32690/122310 [1:01:03<3:16:04,  7.62it/s][A
 27%|████████▌                       | 32701/122310 [1:01:03<2:35:21,  9.61it/s][A
 27%|████████▌                       | 32707/122310 [1:01:04<2:40:58,  9.28it/s][A
 27%|████████▌                       | 32717/122310 [1:01:05<2:21:53, 10.52it/s][A
 27%|████████▌                       | 32727/122310 [1:01:06<2:09:37, 11.52it/s][A
 27%|████████▌                       | 32736/122310 [1:01:06<2:06:22, 11.81it/s][A
 27%|████████▌                       | 32744/122310 [1:01:07<2:09:06, 11.56it/s][A
 27%|████████▌                       | 32751/122310 [1:01:08<2:16:06, 10.97

step: 18860, loss: 76.03529237690262, epoch: 1



 27%|████████▌                       | 32817/122310 [1:01:13<2:07:01, 11.74it/s][A
 27%|████████▌                       | 32825/122310 [1:01:14<2:09:03, 11.56it/s][A
 27%|████████▌                       | 32834/122310 [1:01:15<2:06:01, 11.83it/s][A
 27%|████████▌                       | 32850/122310 [1:01:16<1:39:55, 14.92it/s][A
 27%|████████▌                       | 32861/122310 [1:01:16<1:38:55, 15.07it/s][A
 27%|████████▌                       | 32871/122310 [1:01:17<1:41:46, 14.65it/s][A
 27%|████████▌                       | 32875/122310 [1:01:18<2:06:01, 11.83it/s][A
 27%|████████▌                       | 32880/122310 [1:01:19<2:23:49, 10.36it/s][A
 27%|████████▌                       | 32889/122310 [1:01:19<2:16:31, 10.92it/s][A
 27%|████████▌                       | 32899/122310 [1:01:20<2:06:28, 11.78it/s][A
 27%|████████▌                       | 32912/122310 [1:01:21<1:49:52, 13.56it/s][A
 27%|████████▌                       | 32917/122310 [1:01:21<2:09:17, 11.52

step: 18880, loss: 71.2449134007674, epoch: 1



 27%|████████▋                       | 32986/122310 [1:01:28<2:12:11, 11.26it/s][A
 27%|████████▋                       | 32992/122310 [1:01:29<2:23:38, 10.36it/s][A
 27%|████████▋                       | 33003/122310 [1:01:30<2:06:34, 11.76it/s][A
 27%|████████▋                       | 33006/122310 [1:01:30<2:37:24,  9.46it/s][A
 27%|████████▋                       | 33015/122310 [1:01:31<2:24:28, 10.30it/s][A
 27%|████████▋                       | 33026/122310 [1:01:32<2:07:03, 11.71it/s][A
 27%|████████▋                       | 33037/122310 [1:01:32<1:57:27, 12.67it/s][A
 27%|████████▋                       | 33039/122310 [1:01:33<2:33:34,  9.69it/s][A
 27%|████████▋                       | 33040/122310 [1:01:34<3:27:27,  7.17it/s][A
 27%|████████▋                       | 33041/122310 [1:01:35<4:33:02,  5.45it/s][A
 27%|████████▋                       | 33048/122310 [1:01:35<3:42:30,  6.69it/s][A
 27%|████████▋                       | 33056/122310 [1:01:36<3:07:22,  7.94

step: 18900, loss: 95.39929715989427, epoch: 1



 27%|████████▋                       | 33132/122310 [1:01:43<1:51:36, 13.32it/s][A
 27%|████████▋                       | 33134/122310 [1:01:43<2:25:45, 10.20it/s][A
 27%|████████▋                       | 33138/122310 [1:01:44<2:49:47,  8.75it/s][A
 27%|████████▋                       | 33142/122310 [1:01:45<3:10:45,  7.79it/s][A
 27%|████████▋                       | 33148/122310 [1:01:46<3:07:55,  7.91it/s][A
 27%|████████▋                       | 33160/122310 [1:01:46<2:22:26, 10.43it/s][A
 27%|████████▋                       | 33167/122310 [1:01:47<2:25:59, 10.18it/s][A
 27%|████████▋                       | 33182/122310 [1:01:48<1:52:07, 13.25it/s][A
 27%|████████▋                       | 33189/122310 [1:01:49<2:02:35, 12.12it/s][A
 27%|████████▋                       | 33196/122310 [1:01:49<2:11:05, 11.33it/s][A
 27%|████████▋                       | 33204/122310 [1:01:50<2:12:35, 11.20it/s][A
 27%|████████▋                       | 33212/122310 [1:01:51<2:13:36, 11.11

step: 18920, loss: 105.43294398377598, epoch: 1



 27%|████████▋                       | 33303/122310 [1:01:57<1:37:17, 15.25it/s][A
 27%|████████▋                       | 33314/122310 [1:01:58<1:37:35, 15.20it/s][A
 27%|████████▋                       | 33324/122310 [1:01:59<1:41:37, 14.59it/s][A
 27%|████████▋                       | 33329/122310 [1:02:00<2:00:56, 12.26it/s][A
 27%|████████▋                       | 33331/122310 [1:02:00<2:36:26,  9.48it/s][A
 27%|████████▋                       | 33345/122310 [1:02:01<2:00:20, 12.32it/s][A
 27%|████████▋                       | 33353/122310 [1:02:02<2:04:32, 11.90it/s][A
 27%|████████▋                       | 33358/122310 [1:02:03<2:22:42, 10.39it/s][A
 27%|████████▋                       | 33368/122310 [1:02:03<2:10:30, 11.36it/s][A
 27%|████████▋                       | 33381/122310 [1:02:04<1:51:46, 13.26it/s][A
 27%|████████▋                       | 33394/122310 [1:02:05<1:41:35, 14.59it/s][A
 27%|████████▋                       | 33406/122310 [1:02:06<1:38:42, 15.01

step: 18940, loss: 79.04738973155422, epoch: 1



 27%|████████▊                       | 33477/122310 [1:02:12<2:13:13, 11.11it/s][A
 27%|████████▊                       | 33483/122310 [1:02:13<2:24:37, 10.24it/s][A
 27%|████████▊                       | 33497/122310 [1:02:14<1:54:43, 12.90it/s][A
 27%|████████▊                       | 33503/122310 [1:02:14<2:08:56, 11.48it/s][A
 27%|████████▊                       | 33518/122310 [1:02:15<1:44:25, 14.17it/s][A
 27%|████████▊                       | 33533/122310 [1:02:16<1:32:11, 16.05it/s][A
 27%|████████▊                       | 33543/122310 [1:02:17<1:36:32, 15.32it/s][A
 27%|████████▊                       | 33551/122310 [1:02:17<1:45:59, 13.96it/s][A
 27%|████████▊                       | 33559/122310 [1:02:18<1:54:04, 12.97it/s][A
 27%|████████▊                       | 33566/122310 [1:02:19<2:03:48, 11.95it/s][A
 27%|████████▊                       | 33573/122310 [1:02:20<2:12:40, 11.15it/s][A
 27%|████████▊                       | 33587/122310 [1:02:20<1:49:31, 13.50

step: 18960, loss: 81.01841918227525, epoch: 1



 28%|████████▊                       | 33651/122310 [1:02:27<2:10:02, 11.36it/s][A
 28%|████████▊                       | 33656/122310 [1:02:28<2:26:04, 10.12it/s][A
 28%|████████▊                       | 33660/122310 [1:02:28<2:47:07,  8.84it/s][A
 28%|████████▊                       | 33667/122310 [1:02:29<2:43:25,  9.04it/s][A
 28%|████████▊                       | 33674/122310 [1:02:30<2:41:09,  9.17it/s][A
 28%|████████▊                       | 33682/122310 [1:02:31<2:32:55,  9.66it/s][A
 28%|████████▊                       | 33687/122310 [1:02:31<2:48:17,  8.78it/s][A
 28%|████████▊                       | 33706/122310 [1:02:32<1:46:46, 13.83it/s][A
 28%|████████▊                       | 33709/122310 [1:02:33<2:15:27, 10.90it/s][A
 28%|████████▊                       | 33715/122310 [1:02:34<2:25:43, 10.13it/s][A
 28%|████████▊                       | 33732/122310 [1:02:34<1:45:24, 14.01it/s][A
 28%|████████▊                       | 33743/122310 [1:02:35<1:43:26, 14.27

step: 18980, loss: 90.82161739665914, epoch: 1



 28%|████████▊                       | 33821/122310 [1:02:42<1:55:29, 12.77it/s][A
 28%|████████▊                       | 33829/122310 [1:02:42<2:01:02, 12.18it/s][A
 28%|████████▊                       | 33840/122310 [1:02:43<1:53:21, 13.01it/s][A
 28%|████████▊                       | 33846/122310 [1:02:44<2:07:39, 11.55it/s][A
 28%|████████▊                       | 33856/122310 [1:02:45<2:01:01, 12.18it/s][A
 28%|████████▊                       | 33860/122310 [1:02:45<2:25:11, 10.15it/s][A
 28%|████████▊                       | 33867/122310 [1:02:46<2:28:21,  9.94it/s][A
 28%|████████▊                       | 33873/122310 [1:02:47<2:36:15,  9.43it/s][A
 28%|████████▊                       | 33880/122310 [1:02:48<2:36:04,  9.44it/s][A
 28%|████████▊                       | 33889/122310 [1:02:48<2:23:40, 10.26it/s][A
 28%|████████▊                       | 33905/122310 [1:02:49<1:47:41, 13.68it/s][A
 28%|████████▊                       | 33911/122310 [1:02:50<2:02:15, 12.05

step: 19000, loss: 92.79087991575435, epoch: 1
saving weights



 28%|████████▉                       | 33998/122310 [1:02:57<2:28:55,  9.88it/s][A
 28%|████████▉                       | 34002/122310 [1:02:57<2:47:11,  8.80it/s][A
 28%|████████▉                       | 34013/122310 [1:02:58<2:22:01, 10.36it/s][A
 28%|████████▉                       | 34025/122310 [1:02:59<2:02:59, 11.96it/s][A
 28%|████████▉                       | 34035/122310 [1:03:00<1:58:18, 12.44it/s][A
 28%|████████▉                       | 34044/122310 [1:03:00<1:58:52, 12.38it/s][A
 28%|████████▉                       | 34051/122310 [1:03:01<2:08:11, 11.48it/s][A
 28%|████████▉                       | 34054/122310 [1:03:02<2:36:43,  9.39it/s][A
 28%|████████▉                       | 34067/122310 [1:03:02<2:04:22, 11.82it/s][A
 28%|████████▉                       | 34077/122310 [1:03:03<1:59:18, 12.33it/s][A
 28%|████████▉                       | 34085/122310 [1:03:04<2:03:25, 11.91it/s][A
 28%|████████▉                       | 34101/122310 [1:03:05<1:39:06, 14.83

step: 19020, loss: 80.75782824391717, epoch: 1



 28%|████████▉                       | 34190/122310 [1:03:12<1:53:38, 12.92it/s][A
 28%|████████▉                       | 34199/122310 [1:03:13<1:55:53, 12.67it/s][A
 28%|████████▉                       | 34206/122310 [1:03:13<2:04:42, 11.77it/s][A
 28%|████████▉                       | 34216/122310 [1:03:14<1:58:59, 12.34it/s][A
 28%|████████▉                       | 34232/122310 [1:03:15<1:37:12, 15.10it/s][A
 28%|████████▉                       | 34237/122310 [1:03:16<1:56:43, 12.58it/s][A
 28%|████████▉                       | 34244/122310 [1:03:16<2:05:14, 11.72it/s][A
 28%|████████▉                       | 34254/122310 [1:03:17<2:00:20, 12.20it/s][A
 28%|████████▉                       | 34260/122310 [1:03:18<2:12:46, 11.05it/s][A
 28%|████████▉                       | 34269/122310 [1:03:19<2:08:30, 11.42it/s][A
 28%|████████▉                       | 34276/122310 [1:03:19<2:15:24, 10.84it/s][A
 28%|████████▉                       | 34279/122310 [1:03:20<2:47:26,  8.76

step: 19040, loss: 71.40296399966036, epoch: 1



 28%|████████▉                       | 34347/122310 [1:03:27<1:53:50, 12.88it/s][A
 28%|████████▉                       | 34359/122310 [1:03:27<1:45:27, 13.90it/s][A
 28%|████████▉                       | 34363/122310 [1:03:28<2:08:51, 11.38it/s][A
 28%|████████▉                       | 34370/122310 [1:03:29<2:16:11, 10.76it/s][A
 28%|████████▉                       | 34379/122310 [1:03:30<2:10:45, 11.21it/s][A
 28%|████████▉                       | 34386/122310 [1:03:31<2:58:38,  8.20it/s][A
 28%|████████▉                       | 34392/122310 [1:03:32<2:57:50,  8.24it/s][A
 28%|█████████                       | 34403/122310 [1:03:33<2:26:46,  9.98it/s][A
 28%|█████████                       | 34415/122310 [1:03:33<2:04:55, 11.73it/s][A
 28%|█████████                       | 34422/122310 [1:03:34<2:11:45, 11.12it/s][A
 28%|█████████                       | 34433/122310 [1:03:35<1:59:52, 12.22it/s][A
 28%|█████████                       | 34444/122310 [1:03:36<1:53:15, 12.93

step: 19060, loss: 98.19969130285612, epoch: 1



 28%|█████████                       | 34506/122310 [1:03:41<2:07:55, 11.44it/s][A
 28%|█████████                       | 34511/122310 [1:03:42<2:24:40, 10.11it/s][A
 28%|█████████                       | 34518/122310 [1:03:43<2:27:23,  9.93it/s][A
 28%|█████████                       | 34522/122310 [1:03:44<2:49:19,  8.64it/s][A
 28%|█████████                       | 34527/122310 [1:03:44<3:00:46,  8.09it/s][A
 28%|█████████                       | 34531/122310 [1:03:45<3:20:24,  7.30it/s][A
 28%|█████████                       | 34543/122310 [1:03:46<2:26:10, 10.01it/s][A
 28%|█████████                       | 34554/122310 [1:03:47<2:07:04, 11.51it/s][A
 28%|█████████                       | 34559/122310 [1:03:47<2:25:30, 10.05it/s][A
 28%|█████████                       | 34563/122310 [1:03:48<2:48:30,  8.68it/s][A
 28%|█████████                       | 34569/122310 [1:03:49<2:51:45,  8.51it/s][A
 28%|█████████                       | 34575/122310 [1:03:50<2:53:01,  8.45

step: 19080, loss: 73.15046260164232, epoch: 1



 28%|█████████                       | 34636/122310 [1:03:56<2:58:03,  8.21it/s][A
 28%|█████████                       | 34643/122310 [1:03:57<2:50:07,  8.59it/s][A
 28%|█████████                       | 34650/122310 [1:03:58<3:33:08,  6.85it/s][A
 28%|█████████                       | 34657/122310 [1:03:59<3:14:54,  7.50it/s][A
 28%|█████████                       | 34661/122310 [1:04:00<3:29:31,  6.97it/s][A
 28%|█████████                       | 34672/122310 [1:04:01<2:39:19,  9.17it/s][A
 28%|█████████                       | 34681/122310 [1:04:01<2:26:15,  9.99it/s][A
 28%|█████████                       | 34689/122310 [1:04:02<2:22:30, 10.25it/s][A
 28%|█████████                       | 34696/122310 [1:04:03<2:25:28, 10.04it/s][A
 28%|█████████                       | 34703/122310 [1:04:04<2:27:52,  9.87it/s][A
 28%|█████████                       | 34719/122310 [1:04:04<1:49:17, 13.36it/s][A
 28%|█████████                       | 34726/122310 [1:04:05<1:59:31, 12.21

step: 19100, loss: 77.17387520551503, epoch: 1



 28%|█████████                       | 34790/122310 [1:04:11<2:12:52, 10.98it/s][A
 28%|█████████                       | 34803/122310 [1:04:12<1:52:13, 13.00it/s][A
 28%|█████████                       | 34814/122310 [1:04:12<1:47:20, 13.59it/s][A
 28%|█████████                       | 34821/122310 [1:04:13<1:57:43, 12.39it/s][A
 28%|█████████                       | 34842/122310 [1:04:14<1:24:32, 17.24it/s][A
 28%|█████████                       | 34855/122310 [1:04:15<1:23:52, 17.38it/s][A
 29%|█████████                       | 34866/122310 [1:04:15<1:28:08, 16.54it/s][A
 29%|█████████                       | 34872/122310 [1:04:16<1:43:24, 14.09it/s][A
 29%|█████████▏                      | 34881/122310 [1:04:17<1:48:05, 13.48it/s][A
 29%|█████████▏                      | 34887/122310 [1:04:18<2:02:15, 11.92it/s][A
 29%|█████████▏                      | 34890/122310 [1:04:18<2:32:42,  9.54it/s][A
 29%|█████████▏                      | 34911/122310 [1:04:19<1:35:25, 15.26

step: 19120, loss: 81.09046301154213, epoch: 1



 29%|█████████▏                      | 34975/122310 [1:04:26<1:52:11, 12.97it/s][A
 29%|█████████▏                      | 34984/122310 [1:04:26<1:53:56, 12.77it/s][A
 29%|█████████▏                      | 34987/122310 [1:04:27<2:22:12, 10.23it/s][A
 29%|█████████▏                      | 34992/122310 [1:04:28<2:38:09,  9.20it/s][A
 29%|█████████▏                      | 35002/122310 [1:04:29<2:18:13, 10.53it/s][A
 29%|█████████▏                      | 35014/122310 [1:04:29<1:58:48, 12.25it/s][A
 29%|█████████▏                      | 35023/122310 [1:04:30<1:58:59, 12.23it/s][A
 29%|█████████▏                      | 35033/122310 [1:04:31<1:54:46, 12.67it/s][A
 29%|█████████▏                      | 35037/122310 [1:04:32<2:18:53, 10.47it/s][A
 29%|█████████▏                      | 35045/122310 [1:04:32<2:18:22, 10.51it/s][A
 29%|█████████▏                      | 35048/122310 [1:04:33<2:49:35,  8.58it/s][A
 29%|█████████▏                      | 35052/122310 [1:04:34<3:10:13,  7.64

step: 19140, loss: 76.15895266429274, epoch: 1



 29%|█████████▏                      | 35133/122310 [1:04:40<2:03:53, 11.73it/s][A
 29%|█████████▏                      | 35136/122310 [1:04:41<2:33:00,  9.50it/s][A
 29%|█████████▏                      | 35147/122310 [1:04:42<2:10:30, 11.13it/s][A
 29%|█████████▏                      | 35154/122310 [1:04:43<2:16:23, 10.65it/s][A
 29%|█████████▏                      | 35162/122310 [1:04:43<2:15:37, 10.71it/s][A
 29%|█████████▏                      | 35166/122310 [1:04:44<2:39:22,  9.11it/s][A
 29%|█████████▏                      | 35179/122310 [1:04:45<2:04:16, 11.69it/s][A
 29%|█████████▏                      | 35184/122310 [1:04:46<2:22:48, 10.17it/s][A
 29%|█████████▏                      | 35190/122310 [1:04:46<2:31:57,  9.56it/s][A
 29%|█████████▏                      | 35197/122310 [1:04:47<2:32:06,  9.55it/s][A
 29%|█████████▏                      | 35207/122310 [1:04:48<2:15:22, 10.72it/s][A
 29%|█████████▏                      | 35214/122310 [1:04:49<2:20:28, 10.33

step: 19160, loss: 85.12854116031068, epoch: 1



 29%|█████████▏                      | 35279/122310 [1:04:55<2:13:50, 10.84it/s][A
 29%|█████████▏                      | 35282/122310 [1:04:56<2:43:09,  8.89it/s][A
 29%|█████████▏                      | 35288/122310 [1:04:57<2:47:22,  8.67it/s][A
 29%|█████████▏                      | 35299/122310 [1:04:57<2:17:29, 10.55it/s][A
 29%|█████████▏                      | 35308/122310 [1:04:58<2:11:24, 11.03it/s][A
 29%|█████████▏                      | 35320/122310 [1:04:59<1:55:08, 12.59it/s][A
 29%|█████████▏                      | 35327/122310 [1:05:00<2:04:33, 11.64it/s][A
 29%|█████████▏                      | 35333/122310 [1:05:00<2:16:19, 10.63it/s][A
 29%|█████████▏                      | 35337/122310 [1:05:01<2:39:47,  9.07it/s][A
 29%|█████████▏                      | 35343/122310 [1:05:02<2:44:50,  8.79it/s][A
 29%|█████████▏                      | 35354/122310 [1:05:03<2:16:50, 10.59it/s][A
 29%|█████████▎                      | 35360/122310 [1:05:03<2:27:07,  9.85

step: 19180, loss: 74.74138976489208, epoch: 1



 29%|█████████▎                      | 35443/122310 [1:05:10<2:06:38, 11.43it/s][A
 29%|█████████▎                      | 35448/122310 [1:05:11<2:24:05, 10.05it/s][A
 29%|█████████▎                      | 35456/122310 [1:05:11<2:21:05, 10.26it/s][A
 29%|█████████▎                      | 35462/122310 [1:05:12<2:29:44,  9.67it/s][A
 29%|█████████▎                      | 35471/122310 [1:05:13<2:18:50, 10.42it/s][A
 29%|█████████▎                      | 35473/122310 [1:05:14<2:58:31,  8.11it/s][A
 29%|█████████▎                      | 35486/122310 [1:05:14<2:11:46, 10.98it/s][A
 29%|█████████▎                      | 35492/122310 [1:05:16<3:06:08,  7.77it/s][A
 29%|█████████▎                      | 35493/122310 [1:05:17<3:52:16,  6.23it/s][A
 29%|█████████▎                      | 35503/122310 [1:05:17<2:57:17,  8.16it/s][A
 29%|█████████▎                      | 35511/122310 [1:05:18<2:43:13,  8.86it/s][A
 29%|█████████▎                      | 35517/122310 [1:05:19<2:46:42,  8.68

step: 19200, loss: 71.17253198166124, epoch: 1
sim1 and sim2 are 0.29903376504716733, 0.16597925718302742
cosine of pred and queen: 0.570651623876707
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahar


 29%|█████████▎                      | 35584/122310 [1:05:37<1:51:53, 12.92it/s][A

Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: belgium
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: italy
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.26666666666666666
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generat


 29%|█████████                      | 35591/122310 [1:06:40<64:07:37,  2.66s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.08875739644970414



 29%|█████████                      | 35602/122310 [1:06:41<42:22:00,  1.76s/it][A
 29%|█████████                      | 35606/122310 [1:06:41<36:31:56,  1.52s/it][A
 29%|█████████                      | 35610/122310 [1:06:43<31:32:05,  1.31s/it][A
 29%|█████████                      | 35617/122310 [1:06:43<22:31:53,  1.07it/s][A
 29%|█████████                      | 35624/122310 [1:06:44<16:23:43,  1.47it/s][A
 29%|█████████                      | 35629/122310 [1:06:45<13:18:42,  1.81it/s][A
 29%|█████████                      | 35632/122310 [1:06:46<12:02:25,  2.00it/s][A
 29%|█████████▎                      | 35647/122310 [1:06:46<6:04:46,  3.96it/s][A
 29%|█████████▎                      | 35660/122310 [1:06:47<4:09:56,  5.78it/s][A
 29%|█████████▎                      | 35669/122310 [1:06:48<3:32:40,  6.79it/s][A
 29%|█████████▎                      | 35673/122310 [1:06:49<3:39:58,  6.56it/s][A
 29%|█████████▎                      | 35683/122310 [1:06:49<2:59:11,  8.06

step: 19220, loss: 81.41168696838191, epoch: 1



 29%|█████████▎                      | 35773/122310 [1:06:55<1:38:46, 14.60it/s][A
 29%|█████████▎                      | 35777/122310 [1:06:56<2:00:10, 12.00it/s][A
 29%|█████████▎                      | 35789/122310 [1:06:57<1:48:20, 13.31it/s][A
 29%|█████████▎                      | 35799/122310 [1:06:57<1:46:55, 13.48it/s][A
 29%|█████████▎                      | 35807/122310 [1:06:58<1:53:10, 12.74it/s][A
 29%|█████████▎                      | 35820/122310 [1:06:59<1:41:02, 14.27it/s][A
 29%|█████████▎                      | 35830/122310 [1:06:59<1:42:02, 14.12it/s][A
 29%|█████████▍                      | 35840/122310 [1:07:00<1:42:49, 14.02it/s][A
 29%|█████████▍                      | 35850/122310 [1:07:01<1:43:56, 13.86it/s][A
 29%|█████████▍                      | 35858/122310 [1:07:02<1:50:44, 13.01it/s][A
 29%|█████████▍                      | 35877/122310 [1:07:02<1:25:13, 16.90it/s][A
 29%|█████████▍                      | 35880/122310 [1:07:03<1:50:07, 13.08

step: 19240, loss: 91.58665770771175, epoch: 1



 29%|█████████▍                      | 35944/122310 [1:07:10<2:35:21,  9.26it/s][A
 29%|█████████▍                      | 35950/122310 [1:07:10<2:40:14,  8.98it/s][A
 29%|█████████▍                      | 35961/122310 [1:07:11<2:14:36, 10.69it/s][A
 29%|█████████▍                      | 35966/122310 [1:07:12<2:29:34,  9.62it/s][A
 29%|█████████▍                      | 35972/122310 [1:07:13<2:36:22,  9.20it/s][A
 29%|█████████▍                      | 35989/122310 [1:07:13<1:48:02, 13.32it/s][A
 29%|█████████▍                      | 35996/122310 [1:07:14<1:57:34, 12.23it/s][A
 29%|█████████▍                      | 36008/122310 [1:07:15<1:46:46, 13.47it/s][A
 29%|█████████▍                      | 36014/122310 [1:07:15<2:00:38, 11.92it/s][A
 29%|█████████▍                      | 36019/122310 [1:07:16<2:17:57, 10.42it/s][A
 29%|█████████▍                      | 36034/122310 [1:07:17<1:47:08, 13.42it/s][A
 29%|█████████▍                      | 36043/122310 [1:07:18<1:50:26, 13.02

step: 19260, loss: 72.70323545704588, epoch: 1



 30%|█████████▍                      | 36109/122310 [1:07:24<2:33:48,  9.34it/s][A
 30%|█████████▍                      | 36116/122310 [1:07:25<2:33:13,  9.38it/s][A
 30%|█████████▍                      | 36117/122310 [1:07:26<3:24:47,  7.01it/s][A
 30%|█████████▍                      | 36133/122310 [1:07:27<2:05:58, 11.40it/s][A
 30%|█████████▍                      | 36135/122310 [1:07:27<2:43:09,  8.80it/s][A
 30%|█████████▍                      | 36136/122310 [1:07:28<3:37:14,  6.61it/s][A
 30%|█████████▍                      | 36144/122310 [1:07:29<3:02:25,  7.87it/s][A
 30%|█████████▍                      | 36145/122310 [1:07:29<4:01:18,  5.95it/s][A
 30%|█████████▍                      | 36150/122310 [1:07:30<3:51:19,  6.21it/s][A
 30%|█████████▍                      | 36157/122310 [1:07:31<3:18:58,  7.22it/s][A
 30%|█████████▍                      | 36168/122310 [1:07:32<2:30:02,  9.57it/s][A
 30%|█████████▍                      | 36176/122310 [1:07:32<2:24:37,  9.93

step: 19280, loss: 91.71288937530827, epoch: 1



 30%|█████████▍                      | 36249/122310 [1:07:39<2:12:36, 10.82it/s][A
 30%|█████████▍                      | 36258/122310 [1:07:40<2:07:18, 11.27it/s][A
 30%|█████████▍                      | 36266/122310 [1:07:40<2:07:54, 11.21it/s][A
 30%|█████████▍                      | 36280/122310 [1:07:41<1:46:21, 13.48it/s][A
 30%|█████████▍                      | 36292/122310 [1:07:42<1:39:32, 14.40it/s][A
 30%|█████████▍                      | 36305/122310 [1:07:43<1:32:55, 15.42it/s][A
 30%|█████████▌                      | 36311/122310 [1:07:43<1:48:01, 13.27it/s][A
 30%|█████████▌                      | 36327/122310 [1:07:44<1:30:11, 15.89it/s][A
 30%|█████████▌                      | 36334/122310 [1:07:45<1:42:36, 13.97it/s][A
 30%|█████████▌                      | 36348/122310 [1:07:46<1:32:10, 15.54it/s][A
 30%|█████████▌                      | 36359/122310 [1:07:46<1:33:12, 15.37it/s][A
 30%|█████████▌                      | 36366/122310 [1:07:47<1:45:10, 13.62

step: 19300, loss: 105.28205924786296, epoch: 1



 30%|█████████▌                      | 36438/122310 [1:07:54<2:28:17,  9.65it/s][A
 30%|█████████▌                      | 36439/122310 [1:07:54<3:18:34,  7.21it/s][A
 30%|█████████▌                      | 36461/122310 [1:07:55<1:41:49, 14.05it/s][A
 30%|█████████▌                      | 36463/122310 [1:07:56<2:13:49, 10.69it/s][A
 30%|█████████▌                      | 36471/122310 [1:07:57<2:13:06, 10.75it/s][A
 30%|█████████▌                      | 36476/122310 [1:07:57<2:29:58,  9.54it/s][A
 30%|█████████▌                      | 36481/122310 [1:07:58<2:43:27,  8.75it/s][A
 30%|█████████▌                      | 36492/122310 [1:07:59<2:14:39, 10.62it/s][A
 30%|█████████▌                      | 36497/122310 [1:07:59<2:31:03,  9.47it/s][A
 30%|█████████▌                      | 36506/122310 [1:08:00<2:18:23, 10.33it/s][A
 30%|█████████▌                      | 36516/122310 [1:08:01<2:06:06, 11.34it/s][A
 30%|█████████▌                      | 36529/122310 [1:08:02<1:48:24, 13.19

step: 19320, loss: 80.83808183495108, epoch: 1



 30%|█████████▌                      | 36603/122310 [1:08:08<2:14:24, 10.63it/s][A
 30%|█████████▌                      | 36609/122310 [1:08:09<2:24:18,  9.90it/s][A
 30%|█████████▌                      | 36615/122310 [1:08:10<2:31:57,  9.40it/s][A
 30%|█████████▌                      | 36631/122310 [1:08:10<1:48:46, 13.13it/s][A
 30%|█████████▌                      | 36651/122310 [1:08:11<1:22:10, 17.37it/s][A
 30%|█████████▌                      | 36662/122310 [1:08:12<1:25:47, 16.64it/s][A
 30%|█████████▌                      | 36668/122310 [1:08:13<1:41:02, 14.13it/s][A
 30%|█████████▌                      | 36675/122310 [1:08:13<1:51:49, 12.76it/s][A
 30%|█████████▌                      | 36682/122310 [1:08:14<2:01:26, 11.75it/s][A
 30%|█████████▌                      | 36690/122310 [1:08:15<2:03:55, 11.51it/s][A
 30%|█████████▌                      | 36701/122310 [1:08:16<1:53:50, 12.53it/s][A
 30%|█████████▌                      | 36712/122310 [1:08:16<1:47:52, 13.23

step: 19340, loss: 90.803070770421, epoch: 1



 30%|█████████▌                      | 36781/122310 [1:08:23<1:42:12, 13.95it/s][A
 30%|█████████▋                      | 36789/122310 [1:08:24<1:49:03, 13.07it/s][A
 30%|█████████▋                      | 36811/122310 [1:08:24<1:18:53, 18.06it/s][A
 30%|█████████▋                      | 36822/122310 [1:08:25<1:23:35, 17.05it/s][A
 30%|█████████▋                      | 36840/122310 [1:08:26<1:13:54, 19.27it/s][A
 30%|█████████▋                      | 36846/122310 [1:08:27<1:29:19, 15.95it/s][A
 30%|█████████▋                      | 36851/122310 [1:08:27<1:48:17, 13.15it/s][A
 30%|█████████▋                      | 36859/122310 [1:08:28<1:54:45, 12.41it/s][A
 30%|█████████▋                      | 36866/122310 [1:08:29<2:03:30, 11.53it/s][A
 30%|█████████▋                      | 36873/122310 [1:08:30<2:10:02, 10.95it/s][A
 30%|█████████▋                      | 36876/122310 [1:08:30<2:40:34,  8.87it/s][A
 30%|█████████▋                      | 36879/122310 [1:08:31<3:12:33,  7.39

step: 19360, loss: 90.98400079409944, epoch: 1



 30%|█████████▋                      | 36964/122310 [1:08:38<1:52:02, 12.70it/s][A
 30%|█████████▋                      | 36980/122310 [1:08:39<1:32:27, 15.38it/s][A
 30%|█████████▋                      | 36990/122310 [1:08:39<1:35:26, 14.90it/s][A
 30%|█████████▋                      | 37008/122310 [1:08:40<1:20:30, 17.66it/s][A
 30%|█████████▋                      | 37015/122310 [1:08:41<1:33:14, 15.25it/s][A
 30%|█████████▋                      | 37021/122310 [1:08:41<1:48:20, 13.12it/s][A
 30%|█████████▋                      | 37030/122310 [1:08:42<1:50:24, 12.87it/s][A
 30%|█████████▋                      | 37035/122310 [1:08:43<2:08:40, 11.05it/s][A
 30%|█████████▋                      | 37043/122310 [1:08:44<2:09:19, 10.99it/s][A
 30%|█████████▋                      | 37049/122310 [1:08:44<2:20:52, 10.09it/s][A
 30%|█████████▋                      | 37063/122310 [1:08:45<1:51:19, 12.76it/s][A
 30%|█████████▋                      | 37081/122310 [1:08:46<1:27:35, 16.22

step: 19380, loss: 91.12165700228256, epoch: 1



 30%|█████████▋                      | 37147/122310 [1:08:53<2:23:57,  9.86it/s][A
 30%|█████████▋                      | 37163/122310 [1:08:53<1:45:47, 13.41it/s][A
 30%|█████████▋                      | 37172/122310 [1:08:54<1:48:36, 13.06it/s][A
 30%|█████████▋                      | 37183/122310 [1:08:55<1:44:44, 13.54it/s][A
 30%|█████████▋                      | 37190/122310 [1:08:55<1:54:46, 12.36it/s][A
 30%|█████████▋                      | 37205/122310 [1:08:56<1:36:01, 14.77it/s][A
 30%|█████████▋                      | 37208/122310 [1:08:57<2:02:26, 11.58it/s][A
 30%|█████████▋                      | 37212/122310 [1:08:58<2:26:19,  9.69it/s][A
 30%|█████████▋                      | 37216/122310 [1:08:58<2:48:24,  8.42it/s][A
 30%|█████████▋                      | 37225/122310 [1:08:59<2:28:45,  9.53it/s][A
 30%|█████████▋                      | 37231/122310 [1:09:01<3:23:03,  6.98it/s][A
 30%|█████████▋                      | 37242/122310 [1:09:01<2:39:25,  8.89

step: 19400, loss: 88.67199534463536, epoch: 1



 31%|█████████▊                      | 37310/122310 [1:09:07<2:08:09, 11.05it/s][A
 31%|█████████▊                      | 37323/122310 [1:09:08<1:49:00, 12.99it/s][A
 31%|█████████▊                      | 37328/122310 [1:09:09<2:06:57, 11.16it/s][A
 31%|█████████▊                      | 37335/122310 [1:09:10<2:12:48, 10.66it/s][A
 31%|█████████▊                      | 37352/122310 [1:09:10<1:38:53, 14.32it/s][A
 31%|█████████▊                      | 37355/122310 [1:09:11<2:05:09, 11.31it/s][A
 31%|█████████▊                      | 37362/122310 [1:09:12<2:11:55, 10.73it/s][A
 31%|█████████▊                      | 37369/122310 [1:09:12<2:16:47, 10.35it/s][A
 31%|█████████▊                      | 37376/122310 [1:09:13<2:20:48, 10.05it/s][A
 31%|█████████▊                      | 37378/122310 [1:09:14<3:00:10,  7.86it/s][A
 31%|█████████▊                      | 37385/122310 [1:09:15<2:49:22,  8.36it/s][A
 31%|█████████▊                      | 37392/122310 [1:09:15<2:43:22,  8.66

step: 19420, loss: 97.22818310238915, epoch: 1



 31%|█████████▊                      | 37473/122310 [1:09:22<1:49:42, 12.89it/s][A
 31%|█████████▊                      | 37481/122310 [1:09:23<1:54:54, 12.30it/s][A
 31%|█████████▊                      | 37493/122310 [1:09:24<1:44:47, 13.49it/s][A
 31%|█████████▊                      | 37497/122310 [1:09:24<2:07:46, 11.06it/s][A
 31%|█████████▊                      | 37502/122310 [1:09:25<2:24:13,  9.80it/s][A
 31%|█████████▊                      | 37508/122310 [1:09:26<2:32:06,  9.29it/s][A
 31%|█████████▊                      | 37528/122310 [1:09:27<1:36:41, 14.61it/s][A
 31%|█████████▊                      | 37530/122310 [1:09:27<2:07:39, 11.07it/s][A
 31%|█████████▊                      | 37537/122310 [1:09:28<2:13:46, 10.56it/s][A
 31%|█████████▊                      | 37546/122310 [1:09:29<2:07:25, 11.09it/s][A
 31%|█████████▊                      | 37553/122310 [1:09:29<2:13:11, 10.61it/s][A
 31%|█████████▊                      | 37559/122310 [1:09:30<2:23:18,  9.86

step: 19440, loss: 79.55012070118046, epoch: 1



 31%|█████████▊                      | 37654/122310 [1:09:37<1:38:22, 14.34it/s][A
 31%|█████████▊                      | 37664/122310 [1:09:38<1:39:49, 14.13it/s][A
 31%|█████████▊                      | 37675/122310 [1:09:38<1:38:06, 14.38it/s][A
 31%|█████████▊                      | 37684/122310 [1:09:39<1:42:35, 13.75it/s][A
 31%|█████████▊                      | 37690/122310 [1:09:41<2:31:54,  9.28it/s][A
 31%|█████████▊                      | 37696/122310 [1:09:41<2:36:40,  9.00it/s][A
 31%|█████████▊                      | 37706/122310 [1:09:42<2:18:40, 10.17it/s][A
 31%|█████████▊                      | 37713/122310 [1:09:43<2:20:45, 10.02it/s][A
 31%|█████████▊                      | 37719/122310 [1:09:43<2:28:52,  9.47it/s][A
 31%|█████████▊                      | 37729/122310 [1:09:44<2:12:35, 10.63it/s][A
 31%|█████████▊                      | 37733/122310 [1:09:45<2:34:26,  9.13it/s][A
 31%|█████████▊                      | 37741/122310 [1:09:46<2:26:23,  9.63

step: 19460, loss: 110.35062245026566, epoch: 1



 31%|█████████▉                      | 37800/122310 [1:09:52<2:19:45, 10.08it/s][A
 31%|█████████▉                      | 37809/122310 [1:09:52<2:12:07, 10.66it/s][A
 31%|█████████▉                      | 37817/122310 [1:09:53<2:11:17, 10.73it/s][A
 31%|█████████▉                      | 37829/122310 [1:09:54<1:54:55, 12.25it/s][A
 31%|█████████▉                      | 37834/122310 [1:09:55<2:11:58, 10.67it/s][A
 31%|█████████▉                      | 37843/122310 [1:09:55<2:06:51, 11.10it/s][A
 31%|█████████▉                      | 37852/122310 [1:09:56<2:03:18, 11.42it/s][A
 31%|█████████▉                      | 37861/122310 [1:09:57<2:00:46, 11.65it/s][A
 31%|█████████▉                      | 37874/122310 [1:09:58<1:44:57, 13.41it/s][A
 31%|█████████▉                      | 37882/122310 [1:09:58<1:51:23, 12.63it/s][A
 31%|█████████▉                      | 37889/122310 [1:09:59<2:03:27, 11.40it/s][A
 31%|█████████▉                      | 37893/122310 [1:10:00<2:25:48,  9.65

step: 19480, loss: 79.0205957953287, epoch: 1



 31%|█████████▉                      | 37986/122310 [1:10:06<1:44:02, 13.51it/s][A
 31%|█████████▉                      | 37991/122310 [1:10:07<2:01:54, 11.53it/s][A
 31%|█████████▉                      | 38002/122310 [1:10:08<1:52:07, 12.53it/s][A
 31%|█████████▉                      | 38007/122310 [1:10:09<2:10:17, 10.78it/s][A
 31%|█████████▉                      | 38015/122310 [1:10:09<2:10:10, 10.79it/s][A
 31%|█████████▉                      | 38023/122310 [1:10:10<2:10:16, 10.78it/s][A
 31%|█████████▉                      | 38037/122310 [1:10:11<1:46:16, 13.22it/s][A
 31%|█████████▉                      | 38047/122310 [1:10:12<1:45:16, 13.34it/s][A
 31%|█████████▉                      | 38055/122310 [1:10:12<1:51:38, 12.58it/s][A
 31%|█████████▉                      | 38063/122310 [1:10:13<1:56:25, 12.06it/s][A
 31%|█████████▉                      | 38073/122310 [1:10:14<1:52:21, 12.49it/s][A
 31%|█████████▉                      | 38080/122310 [1:10:15<2:01:59, 11.51

step: 19500, loss: 87.96134707096382, epoch: 1



 31%|█████████▉                      | 38149/122310 [1:10:21<2:04:02, 11.31it/s][A
 31%|█████████▉                      | 38155/122310 [1:10:22<2:14:56, 10.39it/s][A
 31%|█████████▉                      | 38157/122310 [1:10:23<2:53:35,  8.08it/s][A
 31%|█████████▉                      | 38163/122310 [1:10:24<2:53:21,  8.09it/s][A
 31%|█████████▉                      | 38168/122310 [1:10:24<3:01:32,  7.72it/s][A
 31%|█████████▉                      | 38178/122310 [1:10:25<2:28:23,  9.45it/s][A
 31%|█████████▉                      | 38183/122310 [1:10:26<2:41:42,  8.67it/s][A
 31%|█████████▉                      | 38193/122310 [1:10:26<2:17:33, 10.19it/s][A
 31%|█████████▉                      | 38208/122310 [1:10:27<1:45:37, 13.27it/s][A
 31%|█████████▉                      | 38210/122310 [1:10:28<2:16:59, 10.23it/s][A
 31%|█████████▉                      | 38221/122310 [1:10:29<1:59:14, 11.75it/s][A
 31%|██████████                      | 38236/122310 [1:10:29<1:36:43, 14.49

step: 19520, loss: 86.67159862898058, epoch: 1



 31%|██████████                      | 38324/122310 [1:10:36<2:07:19, 10.99it/s][A
 31%|██████████                      | 38340/122310 [1:10:36<1:37:14, 14.39it/s][A
 31%|██████████                      | 38352/122310 [1:10:37<1:32:26, 15.14it/s][A
 31%|██████████                      | 38361/122310 [1:10:38<1:37:20, 14.37it/s][A
 31%|██████████                      | 38373/122310 [1:10:39<1:33:10, 15.01it/s][A
 31%|██████████                      | 38378/122310 [1:10:39<1:50:39, 12.64it/s][A
 31%|██████████                      | 38390/122310 [1:10:40<1:40:48, 13.88it/s][A
 31%|██████████                      | 38395/122310 [1:10:41<1:57:51, 11.87it/s][A
 31%|██████████                      | 38409/122310 [1:10:41<1:38:44, 14.16it/s][A
 31%|██████████                      | 38425/122310 [1:10:42<1:24:11, 16.61it/s][A
 31%|██████████                      | 38432/122310 [1:10:43<1:36:11, 14.53it/s][A
 31%|██████████                      | 38437/122310 [1:10:44<1:53:33, 12.31

step: 19540, loss: 104.27041549492678, epoch: 1



 31%|██████████                      | 38489/122310 [1:10:50<2:45:11,  8.46it/s][A
 31%|██████████                      | 38500/122310 [1:10:51<2:12:28, 10.54it/s][A
 31%|██████████                      | 38509/122310 [1:10:51<2:04:58, 11.18it/s][A
 31%|██████████                      | 38516/122310 [1:10:52<2:09:46, 10.76it/s][A
 31%|██████████                      | 38526/122310 [1:10:53<1:58:50, 11.75it/s][A
 32%|██████████                      | 38536/122310 [1:10:54<1:52:35, 12.40it/s][A
 32%|██████████                      | 38540/122310 [1:10:54<2:14:36, 10.37it/s][A
 32%|██████████                      | 38543/122310 [1:10:55<2:43:41,  8.53it/s][A
 32%|██████████                      | 38560/122310 [1:10:56<1:47:09, 13.03it/s][A
 32%|██████████                      | 38568/122310 [1:10:57<1:52:16, 12.43it/s][A
 32%|██████████                      | 38579/122310 [1:10:57<1:45:31, 13.22it/s][A
 32%|██████████                      | 38597/122310 [1:10:58<1:23:09, 16.78

step: 19560, loss: 85.6882373503209, epoch: 1



 32%|██████████                      | 38676/122310 [1:11:04<1:46:43, 13.06it/s][A
 32%|██████████                      | 38681/122310 [1:11:06<2:41:37,  8.62it/s][A
 32%|██████████                      | 38683/122310 [1:11:07<3:13:29,  7.20it/s][A
 32%|██████████                      | 38686/122310 [1:11:07<3:35:58,  6.45it/s][A
 32%|██████████                      | 38694/122310 [1:11:08<2:59:56,  7.74it/s][A
 32%|██████████▏                     | 38704/122310 [1:11:09<2:26:48,  9.49it/s][A
 32%|██████████▏                     | 38709/122310 [1:11:09<2:38:18,  8.80it/s][A
 32%|██████████▏                     | 38717/122310 [1:11:10<2:26:41,  9.50it/s][A
 32%|██████████▏                     | 38727/122310 [1:11:11<2:09:18, 10.77it/s][A
 32%|██████████▏                     | 38731/122310 [1:11:12<2:29:45,  9.30it/s][A
 32%|██████████▏                     | 38739/122310 [1:11:12<2:21:28,  9.85it/s][A
 32%|██████████▏                     | 38745/122310 [1:11:13<2:27:42,  9.43

step: 19580, loss: 69.66493501107144, epoch: 1



 32%|██████████▏                     | 38834/122310 [1:11:19<1:21:12, 17.13it/s][A
 32%|██████████▏                     | 38837/122310 [1:11:19<1:44:31, 13.31it/s][A
 32%|██████████▏                     | 38847/122310 [1:11:20<1:43:00, 13.50it/s][A
 32%|██████████▏                     | 38862/122310 [1:11:21<1:28:05, 15.79it/s][A
 32%|██████████▏                     | 38879/122310 [1:11:22<1:16:21, 18.21it/s][A
 32%|██████████▏                     | 38887/122310 [1:11:22<1:26:35, 16.06it/s][A
 32%|██████████▏                     | 38901/122310 [1:11:23<1:21:01, 17.16it/s][A
 32%|██████████▏                     | 38912/122310 [1:11:24<1:23:35, 16.63it/s][A
 32%|██████████▏                     | 38919/122310 [1:11:24<1:35:23, 14.57it/s][A
 32%|██████████▏                     | 38931/122310 [1:11:25<1:31:11, 15.24it/s][A
 32%|██████████▏                     | 38935/122310 [1:11:28<3:33:16,  6.52it/s][A
 32%|██████████▏                     | 38949/122310 [1:11:29<3:05:18,  7.50

step: 19600, loss: 89.34831475292587, epoch: 1
sim1 and sim2 are 0.34206250840734437, 0.18913850928562037
cosine of pred and queen: 0.49279951687220663
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: group
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahar


 32%|██████████▏                     | 38986/122310 [1:11:47<1:54:50, 12.09it/s][A

Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: government
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.28888888888888886
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: drink
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, pred: emblem
Actual: say:says::provide


 32%|█████████▉                     | 38995/122310 [1:12:50<52:03:05,  2.25s/it][A

Actual: india:rupee::denmark:krone, pred: belgium
Accuracy is 0.10059171597633136



 32%|█████████▉                     | 39002/122310 [1:12:50<41:12:37,  1.78s/it][A
 32%|█████████▉                     | 39008/122310 [1:12:51<33:06:49,  1.43s/it][A
 32%|█████████▉                     | 39017/122310 [1:12:52<23:23:39,  1.01s/it][A
 32%|█████████▉                     | 39024/122310 [1:12:53<18:00:32,  1.28it/s][A
 32%|█████████▉                     | 39028/122310 [1:12:53<15:37:49,  1.48it/s][A
 32%|█████████▉                     | 39032/122310 [1:12:54<13:23:16,  1.73it/s][A
 32%|█████████▉                     | 39035/122310 [1:12:55<12:02:05,  1.92it/s][A
 32%|█████████▉                     | 39039/122310 [1:12:56<10:06:17,  2.29it/s][A
 32%|██████████▏                     | 39047/122310 [1:12:56<6:46:45,  3.41it/s][A
 32%|██████████▏                     | 39054/122310 [1:12:57<5:18:19,  4.36it/s][A
 32%|██████████▏                     | 39063/122310 [1:12:58<4:00:31,  5.77it/s][A
 32%|██████████▏                     | 39077/122310 [1:12:59<2:44:01,  8.46

step: 19620, loss: 75.91285905388185, epoch: 1



 32%|██████████▏                     | 39155/122310 [1:13:05<1:40:40, 13.77it/s][A
 32%|██████████▏                     | 39170/122310 [1:13:06<1:28:37, 15.63it/s][A
 32%|██████████▏                     | 39176/122310 [1:13:07<1:43:29, 13.39it/s][A
 32%|██████████▎                     | 39191/122310 [1:13:08<1:30:07, 15.37it/s][A
 32%|██████████▎                     | 39195/122310 [1:13:08<1:51:47, 12.39it/s][A
 32%|██████████▎                     | 39207/122310 [1:13:09<1:43:14, 13.42it/s][A
 32%|██████████▎                     | 39215/122310 [1:13:10<1:49:26, 12.66it/s][A
 32%|██████████▎                     | 39222/122310 [1:13:11<1:58:52, 11.65it/s][A
 32%|██████████▎                     | 39234/122310 [1:13:11<1:46:51, 12.96it/s][A
 32%|██████████▎                     | 39241/122310 [1:13:12<1:57:21, 11.80it/s][A
 32%|██████████▎                     | 39245/122310 [1:13:13<2:19:59,  9.89it/s][A
 32%|██████████▎                     | 39259/122310 [1:13:14<1:50:32, 12.52

step: 19640, loss: 81.58709951234454, epoch: 1



 32%|██████████▎                     | 39352/122310 [1:13:20<2:09:58, 10.64it/s][A
 32%|██████████▎                     | 39358/122310 [1:13:21<2:20:06,  9.87it/s][A
 32%|██████████▎                     | 39368/122310 [1:13:22<2:06:49, 10.90it/s][A
 32%|██████████▎                     | 39378/122310 [1:13:23<1:58:41, 11.64it/s][A
 32%|██████████▎                     | 39391/122310 [1:13:23<1:43:37, 13.34it/s][A
 32%|██████████▎                     | 39397/122310 [1:13:24<1:57:28, 11.76it/s][A
 32%|██████████▎                     | 39401/122310 [1:13:25<2:21:09,  9.79it/s][A
 32%|██████████▎                     | 39411/122310 [1:13:26<2:06:44, 10.90it/s][A
 32%|██████████▎                     | 39423/122310 [1:13:26<1:51:13, 12.42it/s][A
 32%|██████████▎                     | 39433/122310 [1:13:27<1:48:51, 12.69it/s][A
 32%|██████████▎                     | 39450/122310 [1:13:28<1:28:34, 15.59it/s][A
 32%|██████████▎                     | 39457/122310 [1:13:29<1:40:25, 13.75

step: 19660, loss: 89.28404226169488, epoch: 1



 32%|██████████▎                     | 39532/122310 [1:13:35<2:06:15, 10.93it/s][A
 32%|██████████▎                     | 39541/122310 [1:13:36<2:01:06, 11.39it/s][A
 32%|██████████▎                     | 39546/122310 [1:13:37<2:17:32, 10.03it/s][A
 32%|██████████▎                     | 39554/122310 [1:13:38<2:13:32, 10.33it/s][A
 32%|██████████▎                     | 39562/122310 [1:13:38<2:11:25, 10.49it/s][A
 32%|██████████▎                     | 39573/122310 [1:13:39<1:56:19, 11.86it/s][A
 32%|██████████▎                     | 39585/122310 [1:13:40<1:44:11, 13.23it/s][A
 32%|██████████▎                     | 39593/122310 [1:13:41<1:49:32, 12.58it/s][A
 32%|██████████▎                     | 39605/122310 [1:13:41<1:40:18, 13.74it/s][A
 32%|██████████▎                     | 39621/122310 [1:13:42<1:25:10, 16.18it/s][A
 32%|██████████▎                     | 39624/122310 [1:13:43<1:49:40, 12.57it/s][A
 32%|██████████▎                     | 39628/122310 [1:13:43<2:12:13, 10.42

step: 19680, loss: 186.92225946593607, epoch: 1



 32%|██████████▍                     | 39692/122310 [1:13:50<3:17:54,  6.96it/s][A
 32%|██████████▍                     | 39695/122310 [1:13:51<3:45:14,  6.11it/s][A
 32%|██████████▍                     | 39703/122310 [1:13:52<3:02:16,  7.55it/s][A
 32%|██████████▍                     | 39717/122310 [1:13:52<2:05:05, 11.00it/s][A
 32%|██████████▍                     | 39720/122310 [1:13:53<2:33:30,  8.97it/s][A
 32%|██████████▍                     | 39732/122310 [1:13:54<2:03:09, 11.17it/s][A
 32%|██████████▍                     | 39741/122310 [1:13:54<1:59:18, 11.53it/s][A
 32%|██████████▍                     | 39748/122310 [1:13:55<2:05:28, 10.97it/s][A
 33%|██████████▍                     | 39753/122310 [1:13:56<2:22:13,  9.67it/s][A
 33%|██████████▍                     | 39757/122310 [1:13:57<2:43:02,  8.44it/s][A
 33%|██████████▍                     | 39769/122310 [1:13:57<2:07:01, 10.83it/s][A
 33%|██████████▍                     | 39773/122310 [1:13:58<2:29:30,  9.20

step: 19700, loss: 97.67868047274933, epoch: 1



 33%|██████████▍                     | 39828/122310 [1:14:05<2:20:51,  9.76it/s][A
 33%|██████████▍                     | 39831/122310 [1:14:05<2:49:57,  8.09it/s][A
 33%|██████████▍                     | 39850/122310 [1:14:06<1:42:14, 13.44it/s][A
 33%|██████████▍                     | 39854/122310 [1:14:07<2:04:08, 11.07it/s][A
 33%|██████████▍                     | 39865/122310 [1:14:08<1:52:22, 12.23it/s][A
 33%|██████████▍                     | 39872/122310 [1:14:08<2:00:04, 11.44it/s][A
 33%|██████████▍                     | 39889/122310 [1:14:09<1:31:50, 14.96it/s][A
 33%|██████████▍                     | 39899/122310 [1:14:10<1:34:19, 14.56it/s][A
 33%|██████████▍                     | 39908/122310 [1:14:11<1:38:45, 13.91it/s][A
 33%|██████████▍                     | 39917/122310 [1:14:11<1:42:42, 13.37it/s][A
 33%|██████████▍                     | 39921/122310 [1:14:12<2:04:44, 11.01it/s][A
 33%|██████████▍                     | 39928/122310 [1:14:13<2:09:51, 10.57

step: 19720, loss: 83.92293242137399, epoch: 1



 33%|██████████▍                     | 39982/122310 [1:14:19<2:58:21,  7.69it/s][A
 33%|██████████▍                     | 39986/122310 [1:14:20<3:14:52,  7.04it/s][A
 33%|██████████▍                     | 39994/122310 [1:14:21<2:46:46,  8.23it/s][A
 33%|██████████▍                     | 39999/122310 [1:14:22<2:55:32,  7.82it/s][A
 33%|██████████▍                     | 40008/122310 [1:14:22<2:29:59,  9.15it/s][A
 33%|██████████▍                     | 40014/122310 [1:14:23<2:34:52,  8.86it/s][A
 33%|██████████▍                     | 40020/122310 [1:14:24<2:38:41,  8.64it/s][A
 33%|██████████▍                     | 40024/122310 [1:14:24<2:57:58,  7.71it/s][A
 33%|██████████▍                     | 40039/122310 [1:14:25<1:59:13, 11.50it/s][A
 33%|██████████▍                     | 40046/122310 [1:14:26<2:05:25, 10.93it/s][A
 33%|██████████▍                     | 40054/122310 [1:14:27<2:05:10, 10.95it/s][A
 33%|██████████▍                     | 40062/122310 [1:14:27<2:05:46, 10.90

step: 19740, loss: 96.88705479499174, epoch: 1



 33%|██████████▍                     | 40111/122310 [1:14:34<3:39:42,  6.24it/s][A
 33%|██████████▍                     | 40123/122310 [1:14:35<2:34:15,  8.88it/s][A
 33%|██████████▍                     | 40124/122310 [1:14:35<3:19:32,  6.86it/s][A
 33%|██████████▌                     | 40137/122310 [1:14:36<2:18:20,  9.90it/s][A
 33%|██████████▌                     | 40145/122310 [1:14:37<2:14:24, 10.19it/s][A
 33%|██████████▌                     | 40156/122310 [1:14:38<1:58:57, 11.51it/s][A
 33%|██████████▌                     | 40158/122310 [1:14:38<2:32:30,  8.98it/s][A
 33%|██████████▌                     | 40162/122310 [1:14:39<2:51:48,  7.97it/s][A
 33%|██████████▌                     | 40174/122310 [1:14:40<2:11:00, 10.45it/s][A
 33%|██████████▌                     | 40180/122310 [1:14:41<2:19:34,  9.81it/s][A
 33%|██████████▌                     | 40191/122310 [1:14:41<2:00:25, 11.37it/s][A
 33%|██████████▌                     | 40198/122310 [1:14:42<2:06:43, 10.80

step: 19760, loss: 65.82124424501173, epoch: 1



 33%|██████████▌                     | 40277/122310 [1:14:49<1:45:35, 12.95it/s][A
 33%|██████████▌                     | 40279/122310 [1:14:49<2:18:10,  9.89it/s][A
 33%|██████████▌                     | 40290/122310 [1:14:50<1:59:48, 11.41it/s][A
 33%|██████████▌                     | 40292/122310 [1:14:51<2:35:41,  8.78it/s][A
 33%|██████████▌                     | 40297/122310 [1:14:52<2:46:15,  8.22it/s][A
 33%|██████████▌                     | 40306/122310 [1:14:52<2:25:10,  9.41it/s][A
 33%|██████████▌                     | 40313/122310 [1:14:53<2:24:34,  9.45it/s][A
 33%|██████████▌                     | 40319/122310 [1:14:54<2:30:42,  9.07it/s][A
 33%|██████████▌                     | 40328/122310 [1:14:55<2:16:08, 10.04it/s][A
 33%|██████████▌                     | 40334/122310 [1:14:55<2:24:16,  9.47it/s][A
 33%|██████████▌                     | 40351/122310 [1:14:56<1:40:33, 13.58it/s][A
 33%|██████████▌                     | 40357/122310 [1:14:57<1:54:52, 11.89

step: 19780, loss: 99.2205682187351, epoch: 1



 33%|██████████▌                     | 40443/122310 [1:15:03<1:36:39, 14.12it/s][A
 33%|██████████▌                     | 40449/122310 [1:15:04<1:50:05, 12.39it/s][A
 33%|██████████▌                     | 40461/122310 [1:15:05<1:40:29, 13.57it/s][A
 33%|██████████▌                     | 40477/122310 [1:15:06<1:25:11, 16.01it/s][A
 33%|██████████▌                     | 40486/122310 [1:15:06<1:31:42, 14.87it/s][A
 33%|██████████▌                     | 40495/122310 [1:15:07<1:36:34, 14.12it/s][A
 33%|██████████▌                     | 40503/122310 [1:15:08<1:43:31, 13.17it/s][A
 33%|██████████▌                     | 40516/122310 [1:15:09<1:33:56, 14.51it/s][A
 33%|██████████▌                     | 40523/122310 [1:15:09<1:44:40, 13.02it/s][A
 33%|██████████▌                     | 40528/122310 [1:15:10<2:01:50, 11.19it/s][A
 33%|██████████▌                     | 40543/122310 [1:15:11<1:37:28, 13.98it/s][A
 33%|██████████▌                     | 40558/122310 [1:15:11<1:25:35, 15.92

step: 19800, loss: 69.09529340436988, epoch: 1



 33%|██████████▋                     | 40629/122310 [1:15:18<1:59:11, 11.42it/s][A
 33%|██████████▋                     | 40636/122310 [1:15:19<2:04:50, 10.90it/s][A
 33%|██████████▋                     | 40646/122310 [1:15:20<1:55:58, 11.74it/s][A
 33%|██████████▋                     | 40655/122310 [1:15:20<1:54:30, 11.89it/s][A
 33%|██████████▋                     | 40672/122310 [1:15:21<1:29:07, 15.27it/s][A
 33%|██████████▋                     | 40682/122310 [1:15:22<1:32:33, 14.70it/s][A
 33%|██████████▋                     | 40686/122310 [1:15:22<1:54:05, 11.92it/s][A
 33%|██████████▋                     | 40694/122310 [1:15:23<1:57:11, 11.61it/s][A
 33%|██████████▋                     | 40698/122310 [1:15:24<2:19:14,  9.77it/s][A
 33%|██████████▋                     | 40711/122310 [1:15:25<1:52:10, 12.12it/s][A
 33%|██████████▋                     | 40722/122310 [1:15:25<1:44:31, 13.01it/s][A
 33%|██████████▋                     | 40732/122310 [1:15:26<1:43:14, 13.17

step: 19820, loss: 96.65419126752451, epoch: 1



 33%|██████████▋                     | 40814/122310 [1:15:33<2:10:52, 10.38it/s][A
 33%|██████████▋                     | 40823/122310 [1:15:34<2:03:59, 10.95it/s][A
 33%|██████████▋                     | 40837/122310 [1:15:34<1:41:17, 13.41it/s][A
 33%|██████████▋                     | 40844/122310 [1:15:35<1:50:54, 12.24it/s][A
 33%|██████████▋                     | 40853/122310 [1:15:36<1:50:56, 12.24it/s][A
 33%|██████████▋                     | 40867/122310 [1:15:36<1:35:13, 14.26it/s][A
 33%|██████████▋                     | 40878/122310 [1:15:37<1:33:36, 14.50it/s][A
 33%|██████████▋                     | 40885/122310 [1:15:38<1:44:16, 13.01it/s][A
 33%|██████████▋                     | 40894/122310 [1:15:39<1:46:40, 12.72it/s][A
 33%|██████████▋                     | 40900/122310 [1:15:39<1:59:12, 11.38it/s][A
 33%|██████████▋                     | 40915/122310 [1:15:40<1:36:24, 14.07it/s][A
 33%|██████████▋                     | 40921/122310 [1:15:41<1:50:19, 12.30

step: 19840, loss: 117.47763809162308, epoch: 1



 34%|██████████▋                     | 40999/122310 [1:15:48<1:59:52, 11.30it/s][A
 34%|██████████▋                     | 41005/122310 [1:15:48<2:10:42, 10.37it/s][A
 34%|██████████▋                     | 41020/122310 [1:15:50<2:11:45, 10.28it/s][A
 34%|██████████▋                     | 41034/122310 [1:15:50<1:48:50, 12.45it/s][A
 34%|██████████▋                     | 41044/122310 [1:15:51<1:46:18, 12.74it/s][A
 34%|██████████▋                     | 41051/122310 [1:15:52<1:53:51, 11.90it/s][A
 34%|██████████▋                     | 41067/122310 [1:15:53<1:32:36, 14.62it/s][A
 34%|██████████▋                     | 41081/122310 [1:15:53<1:25:18, 15.87it/s][A
 34%|██████████▊                     | 41095/122310 [1:15:56<2:07:39, 10.60it/s][A
 34%|██████████▊                     | 41097/122310 [1:15:56<2:30:25,  9.00it/s][A
 34%|██████████▊                     | 41108/122310 [1:15:57<2:10:15, 10.39it/s][A
 34%|██████████▊                     | 41115/122310 [1:15:58<2:13:20, 10.15

step: 19860, loss: 102.52670180127501, epoch: 1



 34%|██████████▊                     | 41183/122310 [1:16:02<1:32:07, 14.68it/s][A
 34%|██████████▊                     | 41190/122310 [1:16:03<1:42:33, 13.18it/s][A
 34%|██████████▊                     | 41200/122310 [1:16:04<1:41:34, 13.31it/s][A
 34%|██████████▊                     | 41204/122310 [1:16:04<2:03:17, 10.96it/s][A
 34%|██████████▊                     | 41213/122310 [1:16:05<1:59:43, 11.29it/s][A
 34%|██████████▊                     | 41222/122310 [1:16:06<1:57:01, 11.55it/s][A
 34%|██████████▊                     | 41227/122310 [1:16:07<2:13:37, 10.11it/s][A
 34%|██████████▊                     | 41244/122310 [1:16:07<1:36:36, 13.99it/s][A
 34%|██████████▊                     | 41248/122310 [1:16:08<1:58:05, 11.44it/s][A
 34%|██████████▊                     | 41252/122310 [1:16:09<2:20:00,  9.65it/s][A
 34%|██████████▊                     | 41260/122310 [1:16:10<2:14:59, 10.01it/s][A
 34%|██████████▊                     | 41269/122310 [1:16:10<2:06:14, 10.70

step: 19880, loss: 81.42188843808766, epoch: 1



 34%|██████████▊                     | 41344/122310 [1:16:17<2:02:23, 11.02it/s][A
 34%|██████████▊                     | 41352/122310 [1:16:18<2:03:40, 10.91it/s][A
 34%|██████████▊                     | 41361/122310 [1:16:18<1:58:29, 11.39it/s][A
 34%|██████████▊                     | 41369/122310 [1:16:19<1:59:49, 11.26it/s][A
 34%|██████████▊                     | 41378/122310 [1:16:20<1:57:06, 11.52it/s][A
 34%|██████████▊                     | 41388/122310 [1:16:21<1:51:06, 12.14it/s][A
 34%|██████████▊                     | 41395/122310 [1:16:21<1:58:26, 11.39it/s][A
 34%|██████████▊                     | 41401/122310 [1:16:22<2:09:33, 10.41it/s][A
 34%|██████████▊                     | 41418/122310 [1:16:23<1:34:53, 14.21it/s][A
 34%|██████████▊                     | 41420/122310 [1:16:24<2:04:48, 10.80it/s][A
 34%|██████████▊                     | 41433/122310 [1:16:24<1:44:48, 12.86it/s][A
 34%|██████████▊                     | 41447/122310 [1:16:25<1:31:37, 14.71

step: 19900, loss: 85.06544474437702, epoch: 1



 34%|██████████▊                     | 41511/122310 [1:16:32<2:27:08,  9.15it/s][A
 34%|██████████▊                     | 41518/122310 [1:16:32<2:25:23,  9.26it/s][A
 34%|██████████▊                     | 41525/122310 [1:16:33<2:24:40,  9.31it/s][A
 34%|██████████▊                     | 41526/122310 [1:16:34<3:10:16,  7.08it/s][A
 34%|██████████▊                     | 41530/122310 [1:16:35<3:22:54,  6.64it/s][A
 34%|██████████▊                     | 41542/122310 [1:16:35<2:22:17,  9.46it/s][A
 34%|██████████▊                     | 41553/122310 [1:16:36<2:01:40, 11.06it/s][A
 34%|██████████▊                     | 41562/122310 [1:16:37<1:58:08, 11.39it/s][A
 34%|██████████▉                     | 41571/122310 [1:16:38<1:55:46, 11.62it/s][A
 34%|██████████▉                     | 41576/122310 [1:16:38<2:11:48, 10.21it/s][A
 34%|██████████▉                     | 41585/122310 [1:16:39<2:04:22, 10.82it/s][A
 34%|██████████▉                     | 41599/122310 [1:16:40<1:41:25, 13.26

step: 19920, loss: 77.15084906568744, epoch: 1



 34%|██████████▉                     | 41663/122310 [1:16:46<2:46:20,  8.08it/s][A
 34%|██████████▉                     | 41673/122310 [1:16:47<2:18:47,  9.68it/s][A
 34%|██████████▉                     | 41678/122310 [1:16:48<2:33:04,  8.78it/s][A
 34%|██████████▉                     | 41686/122310 [1:16:49<2:23:32,  9.36it/s][A
 34%|██████████▉                     | 41702/122310 [1:16:49<1:43:30, 12.98it/s][A
 34%|██████████▉                     | 41710/122310 [1:16:50<1:49:31, 12.27it/s][A
 34%|██████████▉                     | 41717/122310 [1:16:51<1:58:34, 11.33it/s][A
 34%|██████████▉                     | 41724/122310 [1:16:52<2:05:08, 10.73it/s][A
 34%|██████████▉                     | 41731/122310 [1:16:52<2:10:08, 10.32it/s][A
 34%|██████████▉                     | 41737/122310 [1:16:53<2:19:45,  9.61it/s][A
 34%|██████████▉                     | 41747/122310 [1:16:54<2:04:29, 10.79it/s][A
 34%|██████████▉                     | 41753/122310 [1:16:55<2:16:40,  9.82

step: 19940, loss: 121.83005933332362, epoch: 1



 34%|██████████▉                     | 41821/122310 [1:17:02<2:36:29,  8.57it/s][A
 34%|██████████▉                     | 41828/122310 [1:17:02<2:33:13,  8.75it/s][A
 34%|██████████▉                     | 41842/122310 [1:17:03<1:55:32, 11.61it/s][A
 34%|██████████▉                     | 41859/122310 [1:17:04<1:30:46, 14.77it/s][A
 34%|██████████▉                     | 41862/122310 [1:17:05<1:56:15, 11.53it/s][A
 34%|██████████▉                     | 41867/122310 [1:17:06<2:14:03, 10.00it/s][A
 34%|██████████▉                     | 41872/122310 [1:17:06<2:29:31,  8.97it/s][A
 34%|██████████▉                     | 41877/122310 [1:17:07<2:42:58,  8.23it/s][A
 34%|██████████▉                     | 41890/122310 [1:17:08<2:03:53, 10.82it/s][A
 34%|██████████▉                     | 41896/122310 [1:17:09<2:55:30,  7.64it/s][A
 34%|██████████▉                     | 41908/122310 [1:17:10<2:19:13,  9.62it/s][A
 34%|██████████▉                     | 41925/122310 [1:17:11<1:43:54, 12.89

step: 19960, loss: 77.24953434915416, epoch: 1



 34%|██████████▉                     | 41984/122310 [1:17:17<2:16:59,  9.77it/s][A
 34%|██████████▉                     | 41994/122310 [1:17:18<2:04:58, 10.71it/s][A
 34%|██████████▉                     | 42005/122310 [1:17:19<1:54:05, 11.73it/s][A
 34%|██████████▉                     | 42016/122310 [1:17:19<1:47:11, 12.48it/s][A
 34%|██████████▉                     | 42022/122310 [1:17:20<2:00:29, 11.10it/s][A
 34%|██████████▉                     | 42037/122310 [1:17:21<1:38:46, 13.55it/s][A
 34%|███████████                     | 42049/122310 [1:17:22<1:34:25, 14.17it/s][A
 34%|███████████                     | 42072/122310 [1:17:22<1:11:06, 18.81it/s][A
 34%|███████████                     | 42081/122310 [1:17:23<1:19:56, 16.73it/s][A
 34%|███████████                     | 42083/122310 [1:17:24<1:47:01, 12.49it/s][A
 34%|███████████                     | 42088/122310 [1:17:25<2:04:44, 10.72it/s][A
 34%|███████████                     | 42090/122310 [1:17:25<2:41:15,  8.29

step: 19980, loss: 89.97159666020455, epoch: 1



 34%|███████████                     | 42178/122310 [1:17:32<1:43:14, 12.94it/s][A
 34%|███████████                     | 42186/122310 [1:17:33<1:49:19, 12.22it/s][A
 35%|███████████                     | 42199/122310 [1:17:34<1:37:53, 13.64it/s][A
 35%|███████████                     | 42204/122310 [1:17:35<1:55:58, 11.51it/s][A
 35%|███████████                     | 42215/122310 [1:17:35<1:48:47, 12.27it/s][A
 35%|███████████                     | 42217/122310 [1:17:36<2:21:46,  9.42it/s][A
 35%|███████████                     | 42228/122310 [1:17:37<2:02:36, 10.89it/s][A
 35%|███████████                     | 42242/122310 [1:17:38<1:42:50, 12.98it/s][A
 35%|███████████                     | 42246/122310 [1:17:39<2:05:10, 10.66it/s][A
 35%|███████████                     | 42248/122310 [1:17:39<2:41:32,  8.26it/s][A
 35%|███████████                     | 42259/122310 [1:17:41<2:52:07,  7.75it/s][A
 35%|███████████                     | 42269/122310 [1:17:42<2:26:40,  9.10

step: 20000, loss: 74.91852448963535, epoch: 1
sim1 and sim2 are 0.44479214915914855, 0.23112400355824883
cosine of pred and queen: 0.42391469413977445
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: disputed
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: saturday
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 


 35%|███████████                     | 42340/122310 [1:17:57<1:29:21, 14.91it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: singh
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: emblem
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: football
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: government
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:i


 35%|██████████▋                    | 42345/122310 [1:19:07<56:43:23,  2.55s/it][A
 35%|██████████▋                    | 42348/122310 [1:19:08<50:43:58,  2.28s/it][A
 35%|██████████▋                    | 42355/122310 [1:19:08<37:10:21,  1.67s/it][A
 35%|██████████▋                    | 42360/122310 [1:19:09<29:38:53,  1.33s/it][A
 35%|██████████▋                    | 42372/122310 [1:19:10<17:24:53,  1.28it/s][A
 35%|██████████▋                    | 42379/122310 [1:19:11<13:25:50,  1.65it/s][A
 35%|███████████                     | 42390/122310 [1:19:11<8:59:14,  2.47it/s][A
 35%|███████████                     | 42394/122310 [1:19:12<8:12:11,  2.71it/s][A
 35%|███████████                     | 42402/122310 [1:19:13<6:16:01,  3.54it/s][A
 35%|███████████                     | 42413/122310 [1:19:14<4:27:00,  4.99it/s][A
 35%|███████████                     | 42424/122310 [1:19:14<3:24:48,  6.50it/s][A
 35%|███████████                     | 42433/122310 [1:19:15<2:57:31,  7.50

step: 20020, loss: 73.36430179683906, epoch: 1



 35%|███████████▏                    | 42528/122310 [1:19:23<1:51:29, 11.93it/s][A
 35%|███████████▏                    | 42540/122310 [1:19:24<1:41:20, 13.12it/s][A
 35%|███████████▏                    | 42547/122310 [1:19:24<1:51:09, 11.96it/s][A
 35%|███████████▏                    | 42551/122310 [1:19:25<2:13:22,  9.97it/s][A
 35%|███████████▏                    | 42555/122310 [1:19:26<2:35:10,  8.57it/s][A
 35%|███████████▏                    | 42568/122310 [1:19:27<1:59:07, 11.16it/s][A
 35%|███████████▏                    | 42577/122310 [1:19:27<1:56:51, 11.37it/s][A
 35%|███████████▏                    | 42580/122310 [1:19:28<2:25:01,  9.16it/s][A
 35%|███████████▏                    | 42588/122310 [1:19:29<2:18:36,  9.59it/s][A
 35%|███████████▏                    | 42596/122310 [1:19:30<2:14:34,  9.87it/s][A
 35%|███████████▏                    | 42605/122310 [1:19:30<2:06:57, 10.46it/s][A
 35%|███████████▏                    | 42625/122310 [1:19:31<1:27:30, 15.18

step: 20040, loss: 88.02320883529578, epoch: 1



 35%|███████████▏                    | 42686/122310 [1:19:38<3:08:48,  7.03it/s][A
 35%|███████████▏                    | 42696/122310 [1:19:39<2:29:51,  8.85it/s][A
 35%|███████████▏                    | 42702/122310 [1:19:39<2:34:17,  8.60it/s][A
 35%|███████████▏                    | 42707/122310 [1:19:40<2:45:26,  8.02it/s][A
 35%|███████████▏                    | 42711/122310 [1:19:41<3:04:16,  7.20it/s][A
 35%|███████████▏                    | 42715/122310 [1:19:42<3:21:02,  6.60it/s][A
 35%|███████████▏                    | 42736/122310 [1:19:42<1:41:35, 13.06it/s][A
 35%|███████████▏                    | 42744/122310 [1:19:43<1:45:27, 12.57it/s][A
 35%|███████████▏                    | 42752/122310 [1:19:44<1:48:39, 12.20it/s][A
 35%|███████████▏                    | 42759/122310 [1:19:45<1:55:10, 11.51it/s][A
 35%|███████████▏                    | 42765/122310 [1:19:45<2:04:43, 10.63it/s][A
 35%|███████████▏                    | 42775/122310 [1:19:46<1:53:37, 11.67

step: 20060, loss: 82.42309098245634, epoch: 1



 35%|███████████▏                    | 42843/122310 [1:19:52<2:06:04, 10.51it/s][A
 35%|███████████▏                    | 42845/122310 [1:19:53<2:44:35,  8.05it/s][A
 35%|███████████▏                    | 42853/122310 [1:19:54<2:29:31,  8.86it/s][A
 35%|███████████▏                    | 42862/122310 [1:19:55<2:15:06,  9.80it/s][A
 35%|███████████▏                    | 42872/122310 [1:19:55<2:01:53, 10.86it/s][A
 35%|███████████▏                    | 42874/122310 [1:19:57<3:25:47,  6.43it/s][A
 35%|███████████▏                    | 42878/122310 [1:19:58<3:33:53,  6.19it/s][A
 35%|███████████▏                    | 42891/122310 [1:19:58<2:25:48,  9.08it/s][A
 35%|███████████▏                    | 42902/122310 [1:19:59<2:05:17, 10.56it/s][A
 35%|███████████▏                    | 42913/122310 [1:20:00<1:51:55, 11.82it/s][A
 35%|███████████▏                    | 42920/122310 [1:20:01<1:56:59, 11.31it/s][A
 35%|███████████▏                    | 42929/122310 [1:20:01<1:52:55, 11.72

step: 20080, loss: 88.19566654276584, epoch: 1



 35%|███████████▏                    | 42998/122310 [1:20:07<1:51:09, 11.89it/s][A
 35%|███████████▎                    | 43011/122310 [1:20:08<1:35:28, 13.84it/s][A
 35%|███████████▎                    | 43015/122310 [1:20:08<1:56:08, 11.38it/s][A
 35%|███████████▎                    | 43025/122310 [1:20:09<1:48:18, 12.20it/s][A
 35%|███████████▎                    | 43032/122310 [1:20:10<1:54:51, 11.50it/s][A
 35%|███████████▎                    | 43038/122310 [1:20:11<2:04:26, 10.62it/s][A
 35%|███████████▎                    | 43049/122310 [1:20:11<1:49:21, 12.08it/s][A
 35%|███████████▎                    | 43052/122310 [1:20:12<2:15:34,  9.74it/s][A
 35%|███████████▎                    | 43057/122310 [1:20:13<2:28:41,  8.88it/s][A
 35%|███████████▎                    | 43060/122310 [1:20:13<2:55:54,  7.51it/s][A
 35%|███████████▎                    | 43072/122310 [1:20:14<2:07:43, 10.34it/s][A
 35%|███████████▎                    | 43086/122310 [1:20:15<1:40:25, 13.15

step: 20100, loss: 84.9215100121561, epoch: 1



 35%|███████████▎                    | 43159/122310 [1:20:21<1:50:57, 11.89it/s][A
 35%|███████████▎                    | 43173/122310 [1:20:22<1:32:25, 14.27it/s][A
 35%|███████████▎                    | 43177/122310 [1:20:23<1:52:56, 11.68it/s][A
 35%|███████████▎                    | 43186/122310 [1:20:23<1:49:51, 12.00it/s][A
 35%|███████████▎                    | 43193/122310 [1:20:24<1:56:00, 11.37it/s][A
 35%|███████████▎                    | 43197/122310 [1:20:25<2:16:41,  9.65it/s][A
 35%|███████████▎                    | 43200/122310 [1:20:25<2:44:14,  8.03it/s][A
 35%|███████████▎                    | 43207/122310 [1:20:26<2:33:32,  8.59it/s][A
 35%|███████████▎                    | 43222/122310 [1:20:27<1:46:42, 12.35it/s][A
 35%|███████████▎                    | 43235/122310 [1:20:28<1:33:41, 14.07it/s][A
 35%|███████████▎                    | 43240/122310 [1:20:28<1:50:50, 11.89it/s][A
 35%|███████████▎                    | 43247/122310 [1:20:29<1:56:50, 11.28

step: 20120, loss: 83.10944173592802, epoch: 1



 35%|███████████▎                    | 43311/122310 [1:20:35<2:39:01,  8.28it/s][A
 35%|███████████▎                    | 43323/122310 [1:20:36<2:01:16, 10.86it/s][A
 35%|███████████▎                    | 43331/122310 [1:20:37<1:59:48, 10.99it/s][A
 35%|███████████▎                    | 43335/122310 [1:20:38<2:20:08,  9.39it/s][A
 35%|███████████▎                    | 43349/122310 [1:20:38<1:46:06, 12.40it/s][A
 35%|███████████▎                    | 43359/122310 [1:20:39<1:42:13, 12.87it/s][A
 35%|███████████▎                    | 43368/122310 [1:20:40<1:42:27, 12.84it/s][A
 35%|███████████▎                    | 43378/122310 [1:20:40<1:39:17, 13.25it/s][A
 35%|███████████▎                    | 43389/122310 [1:20:41<1:34:43, 13.89it/s][A
 35%|███████████▎                    | 43395/122310 [1:20:42<1:47:19, 12.26it/s][A
 35%|███████████▎                    | 43405/122310 [1:20:43<1:42:59, 12.77it/s][A
 35%|███████████▎                    | 43410/122310 [1:20:43<1:58:57, 11.05

step: 20140, loss: 82.02198311409101, epoch: 1



 36%|███████████▍                    | 43501/122310 [1:20:50<1:42:30, 12.81it/s][A
 36%|███████████▍                    | 43507/122310 [1:20:50<1:53:33, 11.56it/s][A
 36%|███████████▍                    | 43517/122310 [1:20:51<1:46:42, 12.31it/s][A
 36%|███████████▍                    | 43523/122310 [1:20:52<1:57:40, 11.16it/s][A
 36%|███████████▍                    | 43525/122310 [1:20:53<2:31:57,  8.64it/s][A
 36%|███████████▍                    | 43532/122310 [1:20:53<2:25:53,  9.00it/s][A
 36%|███████████▍                    | 43538/122310 [1:20:54<2:28:25,  8.85it/s][A
 36%|███████████▍                    | 43544/122310 [1:20:55<2:31:14,  8.68it/s][A
 36%|███████████▍                    | 43550/122310 [1:20:55<2:32:09,  8.63it/s][A
 36%|███████████▍                    | 43573/122310 [1:20:56<1:23:21, 15.74it/s][A
 36%|███████████▍                    | 43582/122310 [1:20:57<1:28:40, 14.80it/s][A
 36%|███████████▍                    | 43585/122310 [1:20:57<1:53:13, 11.59

step: 20160, loss: 100.19251143962869, epoch: 1



 36%|███████████▍                    | 43653/122310 [1:21:04<2:04:25, 10.54it/s][A
 36%|███████████▍                    | 43671/122310 [1:21:05<1:27:40, 14.95it/s][A
 36%|███████████▍                    | 43680/122310 [1:21:05<1:32:05, 14.23it/s][A
 36%|███████████▍                    | 43685/122310 [1:21:06<1:48:52, 12.04it/s][A
 36%|███████████▍                    | 43690/122310 [1:21:07<2:04:22, 10.53it/s][A
 36%|███████████▍                    | 43696/122310 [1:21:08<2:12:25,  9.89it/s][A
 36%|███████████▍                    | 43703/122310 [1:21:08<2:13:36,  9.81it/s][A
 36%|███████████▍                    | 43716/122310 [1:21:09<1:46:06, 12.35it/s][A
 36%|███████████▍                    | 43728/122310 [1:21:10<1:37:28, 13.44it/s][A
 36%|███████████▍                    | 43736/122310 [1:21:10<1:44:22, 12.55it/s][A
 36%|███████████▍                    | 43745/122310 [1:21:11<1:46:10, 12.33it/s][A
 36%|███████████▍                    | 43757/122310 [1:21:12<1:38:19, 13.32

step: 20180, loss: 76.67360404340215, epoch: 1



 36%|███████████▍                    | 43817/122310 [1:21:19<1:52:48, 11.60it/s][A
 36%|███████████▍                    | 43824/122310 [1:21:20<1:59:40, 10.93it/s][A
 36%|███████████▍                    | 43831/122310 [1:21:20<2:05:24, 10.43it/s][A
 36%|███████████▍                    | 43844/122310 [1:21:21<1:45:02, 12.45it/s][A
 36%|███████████▍                    | 43853/122310 [1:21:22<1:46:18, 12.30it/s][A
 36%|███████████▍                    | 43855/122310 [1:21:23<2:18:39,  9.43it/s][A
 36%|███████████▍                    | 43858/122310 [1:21:23<2:44:21,  7.96it/s][A
 36%|███████████▍                    | 43867/122310 [1:21:24<2:19:59,  9.34it/s][A
 36%|███████████▍                    | 43877/122310 [1:21:25<2:02:05, 10.71it/s][A
 36%|███████████▍                    | 43884/122310 [1:21:25<2:04:44, 10.48it/s][A
 36%|███████████▍                    | 43893/122310 [1:21:26<1:57:41, 11.10it/s][A
 36%|███████████▍                    | 43898/122310 [1:21:27<2:11:54,  9.91

step: 20200, loss: 73.08155413830049, epoch: 1



 36%|███████████▍                    | 43955/122310 [1:21:33<2:54:32,  7.48it/s][A
 36%|███████████▌                    | 43959/122310 [1:21:34<3:08:02,  6.94it/s][A
 36%|███████████▌                    | 43961/122310 [1:21:35<3:49:39,  5.69it/s][A
 36%|███████████▌                    | 43969/122310 [1:21:35<2:57:02,  7.37it/s][A
 36%|███████████▌                    | 43978/122310 [1:21:36<2:25:50,  8.95it/s][A
 36%|███████████▌                    | 43987/122310 [1:21:37<2:10:22, 10.01it/s][A
 36%|███████████▌                    | 43990/122310 [1:21:38<2:37:07,  8.31it/s][A
 36%|███████████▌                    | 43994/122310 [1:21:40<5:30:46,  3.95it/s][A
 36%|███████████▌                    | 43999/122310 [1:21:41<4:48:46,  4.52it/s][A
 36%|███████████▌                    | 44017/122310 [1:21:42<2:27:50,  8.83it/s][A
 36%|███████████▌                    | 44028/122310 [1:21:43<2:06:20, 10.33it/s][A
 36%|███████████▌                    | 44038/122310 [1:21:43<1:57:07, 11.14

step: 20220, loss: 110.6183965314972, epoch: 1



 36%|███████████▌                    | 44071/122310 [1:21:48<2:37:13,  8.29it/s][A
 36%|███████████▌                    | 44084/122310 [1:21:48<2:06:34, 10.30it/s][A
 36%|███████████▌                    | 44096/122310 [1:21:49<1:51:00, 11.74it/s][A
 36%|███████████▌                    | 44112/122310 [1:21:50<1:30:44, 14.36it/s][A
 36%|███████████▌                    | 44121/122310 [1:21:50<1:33:40, 13.91it/s][A
 36%|███████████▌                    | 44134/122310 [1:21:51<1:26:57, 14.98it/s][A
 36%|███████████▌                    | 44140/122310 [1:21:52<1:38:58, 13.16it/s][A
 36%|███████████▌                    | 44150/122310 [1:21:53<1:37:08, 13.41it/s][A
 36%|███████████▌                    | 44161/122310 [1:21:53<1:33:08, 13.98it/s][A
 36%|███████████▌                    | 44165/122310 [1:21:55<2:26:26,  8.89it/s][A
 36%|███████████▌                    | 44171/122310 [1:21:55<2:27:47,  8.81it/s][A
 36%|███████████▌                    | 44173/122310 [1:21:56<2:59:04,  7.27

step: 20240, loss: 81.6852370012088, epoch: 1



 36%|███████████▌                    | 44240/122310 [1:22:02<2:38:31,  8.21it/s][A
 36%|███████████▌                    | 44248/122310 [1:22:03<2:22:37,  9.12it/s][A
 36%|███████████▌                    | 44262/122310 [1:22:03<1:46:07, 12.26it/s][A
 36%|███████████▌                    | 44276/122310 [1:22:04<1:30:03, 14.44it/s][A
 36%|███████████▌                    | 44284/122310 [1:22:05<1:36:14, 13.51it/s][A
 36%|███████████▌                    | 44291/122310 [1:22:05<1:44:50, 12.40it/s][A
 36%|███████████▌                    | 44311/122310 [1:22:06<1:16:00, 17.10it/s][A
 36%|███████████▌                    | 44325/122310 [1:22:07<1:12:45, 17.86it/s][A
 36%|███████████▌                    | 44334/122310 [1:22:08<1:19:45, 16.29it/s][A
 36%|███████████▌                    | 44345/122310 [1:22:08<1:21:26, 15.95it/s][A
 36%|███████████▌                    | 44351/122310 [1:22:09<1:34:28, 13.75it/s][A
 36%|███████████▌                    | 44354/122310 [1:22:10<1:59:07, 10.91

step: 20260, loss: 71.99670787714963, epoch: 1



 36%|███████████▋                    | 44435/122310 [1:22:16<1:54:15, 11.36it/s][A
 36%|███████████▋                    | 44444/122310 [1:22:17<1:50:25, 11.75it/s][A
 36%|███████████▋                    | 44453/122310 [1:22:18<1:47:58, 12.02it/s][A
 36%|███████████▋                    | 44460/122310 [1:22:18<1:54:00, 11.38it/s][A
 36%|███████████▋                    | 44469/122310 [1:22:19<1:50:29, 11.74it/s][A
 36%|███████████▋                    | 44479/122310 [1:22:20<1:45:22, 12.31it/s][A
 36%|███████████▋                    | 44484/122310 [1:22:20<2:01:00, 10.72it/s][A
 36%|███████████▋                    | 44487/122310 [1:22:21<2:27:27,  8.80it/s][A
 36%|███████████▋                    | 44489/122310 [1:22:22<3:05:30,  6.99it/s][A
 36%|███████████▋                    | 44498/122310 [1:22:23<2:29:42,  8.66it/s][A
 36%|███████████▋                    | 44505/122310 [1:22:23<2:23:40,  9.03it/s][A
 36%|███████████▋                    | 44514/122310 [1:22:24<2:09:38, 10.00

step: 20280, loss: 116.50668527364475, epoch: 1



 36%|███████████▋                    | 44592/122310 [1:22:30<1:45:56, 12.23it/s][A
 36%|███████████▋                    | 44599/122310 [1:22:31<1:52:27, 11.52it/s][A
 36%|███████████▋                    | 44606/122310 [1:22:32<1:57:37, 11.01it/s][A
 36%|███████████▋                    | 44611/122310 [1:22:33<2:12:12,  9.79it/s][A
 36%|███████████▋                    | 44616/122310 [1:22:33<2:24:16,  8.98it/s][A
 36%|███████████▋                    | 44626/122310 [1:22:34<2:03:23, 10.49it/s][A
 36%|███████████▋                    | 44643/122310 [1:22:35<1:29:10, 14.52it/s][A
 37%|███████████▋                    | 44647/122310 [1:22:35<1:49:27, 11.82it/s][A
 37%|███████████▋                    | 44652/122310 [1:22:36<2:05:16, 10.33it/s][A
 37%|███████████▋                    | 44667/122310 [1:22:37<1:35:35, 13.54it/s][A
 37%|███████████▋                    | 44673/122310 [1:22:38<1:47:53, 11.99it/s][A
 37%|███████████▋                    | 44682/122310 [1:22:38<1:46:22, 12.16

step: 20300, loss: 85.28670759347754, epoch: 1



 37%|███████████▋                    | 44751/122310 [1:22:45<2:07:26, 10.14it/s][A
 37%|███████████▋                    | 44757/122310 [1:22:45<2:13:45,  9.66it/s][A
 37%|███████████▋                    | 44768/122310 [1:22:46<1:53:14, 11.41it/s][A
 37%|███████████▋                    | 44777/122310 [1:22:47<1:49:40, 11.78it/s][A
 37%|███████████▋                    | 44787/122310 [1:22:48<1:43:36, 12.47it/s][A
 37%|███████████▋                    | 44798/122310 [1:22:48<1:36:54, 13.33it/s][A
 37%|███████████▋                    | 44808/122310 [1:22:49<1:35:22, 13.54it/s][A
 37%|███████████▋                    | 44814/122310 [1:22:50<1:47:31, 12.01it/s][A
 37%|███████████▋                    | 44833/122310 [1:22:50<1:18:41, 16.41it/s][A
 37%|███████████▋                    | 44845/122310 [1:22:51<1:18:11, 16.51it/s][A
 37%|███████████▋                    | 44854/122310 [1:22:52<1:23:58, 15.37it/s][A
 37%|███████████▋                    | 44858/122310 [1:22:53<1:43:58, 12.42

step: 20320, loss: 81.55282122031517, epoch: 1



 37%|███████████▊                    | 44918/122310 [1:22:59<2:15:07,  9.55it/s][A
 37%|███████████▊                    | 44930/122310 [1:23:00<1:51:17, 11.59it/s][A
 37%|███████████▊                    | 44942/122310 [1:23:00<1:38:36, 13.08it/s][A
 37%|███████████▊                    | 44959/122310 [1:23:01<1:19:45, 16.16it/s][A
 37%|███████████▊                    | 44973/122310 [1:23:02<1:14:59, 17.19it/s][A
 37%|███████████▊                    | 44980/122310 [1:23:03<1:26:08, 14.96it/s][A
 37%|███████████▊                    | 44995/122310 [1:23:03<1:17:20, 16.66it/s][A
 37%|███████████▊                    | 44999/122310 [1:23:04<1:35:41, 13.47it/s][A
 37%|███████████▊                    | 45010/122310 [1:23:05<1:31:57, 14.01it/s][A
 37%|███████████▊                    | 45019/122310 [1:23:05<1:34:53, 13.57it/s][A
 37%|███████████▊                    | 45026/122310 [1:23:06<1:43:35, 12.43it/s][A
 37%|███████████▊                    | 45037/122310 [1:23:07<1:36:33, 13.34

step: 20340, loss: 68.6417505673078, epoch: 1



 37%|███████████▊                    | 45114/122310 [1:23:13<2:40:41,  8.01it/s][A
 37%|███████████▊                    | 45121/122310 [1:23:14<2:31:39,  8.48it/s][A
 37%|███████████▊                    | 45128/122310 [1:23:15<2:25:27,  8.84it/s][A
 37%|███████████▊                    | 45140/122310 [1:23:15<1:56:44, 11.02it/s][A
 37%|███████████▊                    | 45147/122310 [1:23:16<2:00:31, 10.67it/s][A
 37%|███████████▊                    | 45151/122310 [1:23:17<2:19:20,  9.23it/s][A
 37%|███████████▊                    | 45155/122310 [1:23:18<2:37:39,  8.16it/s][A
 37%|███████████▊                    | 45163/122310 [1:23:18<2:21:44,  9.07it/s][A
 37%|███████████▊                    | 45173/122310 [1:23:19<2:01:58, 10.54it/s][A
 37%|███████████▊                    | 45185/122310 [1:23:20<1:43:42, 12.39it/s][A
 37%|███████████▊                    | 45187/122310 [1:23:21<2:55:06,  7.34it/s][A
 37%|███████████▊                    | 45192/122310 [1:23:22<2:56:25,  7.29

step: 20360, loss: 94.39425898484052, epoch: 1



 37%|███████████▊                    | 45258/122310 [1:23:28<1:34:26, 13.60it/s][A
 37%|███████████▊                    | 45267/122310 [1:23:28<1:36:25, 13.32it/s][A
 37%|███████████▊                    | 45275/122310 [1:23:29<1:40:56, 12.72it/s][A
 37%|███████████▊                    | 45293/122310 [1:23:30<1:17:56, 16.47it/s][A
 37%|███████████▊                    | 45300/122310 [1:23:30<1:28:35, 14.49it/s][A
 37%|███████████▊                    | 45308/122310 [1:23:31<1:35:13, 13.48it/s][A
 37%|███████████▊                    | 45315/122310 [1:23:32<1:43:56, 12.35it/s][A
 37%|███████████▊                    | 45323/122310 [1:23:33<1:46:24, 12.06it/s][A
 37%|███████████▊                    | 45328/122310 [1:23:33<2:01:41, 10.54it/s][A
 37%|███████████▊                    | 45337/122310 [1:23:34<1:54:57, 11.16it/s][A
 37%|███████████▊                    | 45344/122310 [1:23:35<1:59:10, 10.76it/s][A
 37%|███████████▊                    | 45353/122310 [1:23:35<1:53:13, 11.33

step: 20380, loss: 82.7936671798072, epoch: 1



 37%|███████████▉                    | 45427/122310 [1:23:42<2:00:42, 10.62it/s][A
 37%|███████████▉                    | 45429/122310 [1:23:43<2:35:47,  8.22it/s][A
 37%|███████████▉                    | 45434/122310 [1:23:43<2:45:57,  7.72it/s][A
 37%|███████████▉                    | 45443/122310 [1:23:44<2:22:35,  8.98it/s][A
 37%|███████████▉                    | 45451/122310 [1:23:45<2:15:49,  9.43it/s][A
 37%|███████████▉                    | 45460/122310 [1:23:46<2:06:19, 10.14it/s][A
 37%|███████████▉                    | 45473/122310 [1:23:47<1:44:42, 12.23it/s][A
 37%|███████████▉                    | 45484/122310 [1:23:47<1:39:29, 12.87it/s][A
 37%|███████████▉                    | 45488/122310 [1:23:48<2:00:52, 10.59it/s][A
 37%|███████████▉                    | 45491/122310 [1:23:49<2:29:17,  8.58it/s][A
 37%|███████████▉                    | 45496/122310 [1:23:50<2:40:10,  7.99it/s][A
 37%|███████████▉                    | 45500/122310 [1:23:50<2:59:23,  7.14

step: 20400, loss: 76.30634272446831, epoch: 1
sim1 and sim2 are 0.400824590752756, 0.22496283936854522
cosine of pred and queen: 0.38522446429397
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharast


 37%|███████████▉                    | 45563/122310 [1:24:07<1:43:48, 12.32it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: singh
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: emblem
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: football
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: first
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italia


 37%|███████████▌                   | 45573/122310 [1:25:16<54:42:17,  2.57s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 37%|███████████▌                   | 45577/122310 [1:25:18<47:37:29,  2.23s/it][A
 37%|███████████▌                   | 45579/122310 [1:25:18<43:43:45,  2.05s/it][A
 37%|███████████▌                   | 45587/122310 [1:25:19<28:46:56,  1.35s/it][A
 37%|███████████▌                   | 45598/122310 [1:25:20<17:29:33,  1.22it/s][A
 37%|███████████▌                   | 45600/122310 [1:25:21<16:34:18,  1.29it/s][A
 37%|███████████▌                   | 45605/122310 [1:25:21<13:07:08,  1.62it/s][A
 37%|███████████▉                    | 45611/122310 [1:25:22<9:55:05,  2.15it/s][A
 37%|███████████▉                    | 45616/122310 [1:25:23<8:06:34,  2.63it/s][A
 37%|███████████▉                    | 45627/122310 [1:25:24<5:04:03,  4.20it/s][A
 37%|███████████▉                    | 45640/122310 [1:25:24<3:24:08,  6.26it/s][A
 37%|███████████▉                    | 45646/122310 [1:25:25<3:13:57,  6.59it/s][A
 37%|███████████▉                    | 45656/122310 [1:25:26<2:39:36,  8.00

step: 20420, loss: 85.20003616443995, epoch: 1



 37%|███████████▉                    | 45705/122310 [1:25:32<2:08:32,  9.93it/s][A
 37%|███████████▉                    | 45716/122310 [1:25:33<1:52:16, 11.37it/s][A
 37%|███████████▉                    | 45723/122310 [1:25:33<1:58:37, 10.76it/s][A
 37%|███████████▉                    | 45728/122310 [1:25:34<2:13:32,  9.56it/s][A
 37%|███████████▉                    | 45735/122310 [1:25:35<2:14:16,  9.50it/s][A
 37%|███████████▉                    | 45744/122310 [1:25:36<2:04:14, 10.27it/s][A
 37%|███████████▉                    | 45753/122310 [1:25:36<1:58:22, 10.78it/s][A
 37%|███████████▉                    | 45760/122310 [1:25:38<2:40:18,  7.96it/s][A
 37%|███████████▉                    | 45763/122310 [1:25:39<3:02:03,  7.01it/s][A
 37%|███████████▉                    | 45772/122310 [1:25:39<2:32:56,  8.34it/s][A
 37%|███████████▉                    | 45780/122310 [1:25:41<3:01:14,  7.04it/s][A
 37%|███████████▉                    | 45787/122310 [1:25:42<2:47:43,  7.60

step: 20440, loss: 83.6270704687693, epoch: 1



 37%|███████████▉                    | 45852/122310 [1:25:47<1:43:25, 12.32it/s][A
 37%|███████████▉                    | 45863/122310 [1:25:48<1:37:57, 13.01it/s][A
 38%|████████████                    | 45872/122310 [1:25:48<1:40:00, 12.74it/s][A
 38%|████████████                    | 45880/122310 [1:25:49<1:44:59, 12.13it/s][A
 38%|████████████                    | 45894/122310 [1:25:50<1:30:31, 14.07it/s][A
 38%|████████████                    | 45899/122310 [1:25:51<1:48:12, 11.77it/s][A
 38%|████████████                    | 45906/122310 [1:25:51<1:54:57, 11.08it/s][A
 38%|████████████                    | 45923/122310 [1:25:52<1:27:36, 14.53it/s][A
 38%|████████████                    | 45929/122310 [1:25:53<1:41:19, 12.56it/s][A
 38%|████████████                    | 45934/122310 [1:25:54<1:57:55, 10.79it/s][A
 38%|████████████                    | 45943/122310 [1:25:54<1:54:24, 11.13it/s][A
 38%|████████████                    | 45954/122310 [1:25:55<1:44:30, 12.18

step: 20460, loss: 101.61871887505087, epoch: 1



 38%|████████████                    | 46022/122310 [1:26:01<1:44:18, 12.19it/s][A
 38%|████████████                    | 46027/122310 [1:26:02<1:57:11, 10.85it/s][A
 38%|████████████                    | 46031/122310 [1:26:03<2:15:07,  9.41it/s][A
 38%|████████████                    | 46038/122310 [1:26:04<2:12:59,  9.56it/s][A
 38%|████████████                    | 46046/122310 [1:26:04<2:05:52, 10.10it/s][A
 38%|████████████                    | 46059/122310 [1:26:05<1:41:45, 12.49it/s][A
 38%|████████████                    | 46071/122310 [1:26:06<1:32:07, 13.79it/s][A
 38%|████████████                    | 46086/122310 [1:26:06<1:19:33, 15.97it/s][A
 38%|████████████                    | 46092/122310 [1:26:07<1:32:27, 13.74it/s][A
 38%|████████████                    | 46099/122310 [1:26:08<1:41:01, 12.57it/s][A
 38%|████████████                    | 46104/122310 [1:26:09<1:55:50, 10.96it/s][A
 38%|████████████                    | 46115/122310 [1:26:09<1:43:03, 12.32

step: 20480, loss: 97.53121136277493, epoch: 1



 38%|████████████                    | 46205/122310 [1:26:16<1:52:37, 11.26it/s][A
 38%|████████████                    | 46212/122310 [1:26:16<1:56:41, 10.87it/s][A
 38%|████████████                    | 46219/122310 [1:26:17<1:59:44, 10.59it/s][A
 38%|████████████                    | 46229/122310 [1:26:18<1:49:05, 11.62it/s][A
 38%|████████████                    | 46233/122310 [1:26:18<2:08:43,  9.85it/s][A
 38%|████████████                    | 46236/122310 [1:26:19<2:35:31,  8.15it/s][A
 38%|████████████                    | 46240/122310 [1:26:20<2:51:12,  7.41it/s][A
 38%|████████████                    | 46249/122310 [1:26:21<2:21:58,  8.93it/s][A
 38%|████████████                    | 46255/122310 [1:26:21<2:23:55,  8.81it/s][A
 38%|████████████                    | 46262/122310 [1:26:22<2:19:17,  9.10it/s][A
 38%|████████████                    | 46268/122310 [1:26:23<2:22:11,  8.91it/s][A
 38%|████████████                    | 46278/122310 [1:26:23<2:01:14, 10.45

step: 20500, loss: 77.96537575613114, epoch: 1



 38%|████████████▏                   | 46378/122310 [1:26:30<1:28:15, 14.34it/s][A
 38%|████████████▏                   | 46394/122310 [1:26:31<1:15:19, 16.80it/s][A
 38%|████████████▏                   | 46403/122310 [1:26:31<1:21:21, 15.55it/s][A
 38%|████████████▏                   | 46407/122310 [1:26:32<1:40:17, 12.61it/s][A
 38%|████████████▏                   | 46421/122310 [1:26:33<1:25:40, 14.76it/s][A
 38%|████████████▏                   | 46429/122310 [1:26:33<1:32:26, 13.68it/s][A
 38%|████████████▏                   | 46431/122310 [1:26:34<2:01:28, 10.41it/s][A
 38%|████████████▏                   | 46441/122310 [1:26:35<1:49:37, 11.53it/s][A
 38%|████████████▏                   | 46446/122310 [1:26:36<2:04:18, 10.17it/s][A
 38%|████████████▏                   | 46450/122310 [1:26:36<2:23:23,  8.82it/s][A
 38%|████████████▏                   | 46458/122310 [1:26:37<2:13:29,  9.47it/s][A
 38%|████████████▏                   | 46460/122310 [1:26:38<2:49:01,  7.48

step: 20520, loss: 88.11741988144338, epoch: 1



 38%|████████████▏                   | 46544/122310 [1:26:44<1:29:54, 14.05it/s][A
 38%|████████████▏                   | 46555/122310 [1:26:45<1:27:07, 14.49it/s][A
 38%|████████████▏                   | 46567/122310 [1:26:46<1:23:18, 15.15it/s][A
 38%|████████████▏                   | 46577/122310 [1:26:46<1:24:58, 14.85it/s][A
 38%|████████████▏                   | 46585/122310 [1:26:47<1:32:22, 13.66it/s][A
 38%|████████████▏                   | 46596/122310 [1:26:48<1:28:48, 14.21it/s][A
 38%|████████████▏                   | 46605/122310 [1:26:48<1:32:14, 13.68it/s][A
 38%|████████████▏                   | 46619/122310 [1:26:49<1:21:48, 15.42it/s][A
 38%|████████████▏                   | 46632/122310 [1:26:50<1:18:05, 16.15it/s][A
 38%|████████████▏                   | 46645/122310 [1:26:51<1:15:28, 16.71it/s][A
 38%|████████████▏                   | 46662/122310 [1:26:51<1:07:06, 18.79it/s][A
 38%|████████████▏                   | 46669/122310 [1:26:52<1:18:46, 16.00

step: 20540, loss: 77.06255389656988, epoch: 1



 38%|████████████▏                   | 46740/122310 [1:26:58<1:48:43, 11.58it/s][A
 38%|████████████▏                   | 46747/122310 [1:26:59<1:53:36, 11.08it/s][A
 38%|████████████▏                   | 46766/122310 [1:27:00<1:20:14, 15.69it/s][A
 38%|████████████▏                   | 46770/122310 [1:27:01<1:39:10, 12.70it/s][A
 38%|████████████▏                   | 46778/122310 [1:27:01<1:43:11, 12.20it/s][A
 38%|████████████▏                   | 46785/122310 [1:27:02<1:49:07, 11.53it/s][A
 38%|████████████▏                   | 46791/122310 [1:27:03<1:58:33, 10.62it/s][A
 38%|████████████▏                   | 46810/122310 [1:27:03<1:21:30, 15.44it/s][A
 38%|████████████▏                   | 46815/122310 [1:27:04<1:37:35, 12.89it/s][A
 38%|████████████▎                   | 46827/122310 [1:27:05<1:30:02, 13.97it/s][A
 38%|████████████▎                   | 46833/122310 [1:27:06<1:41:55, 12.34it/s][A
 38%|████████████▎                   | 46855/122310 [1:27:06<1:10:26, 17.85

step: 20560, loss: 82.80586719516381, epoch: 1



 38%|████████████▎                   | 46926/122310 [1:27:13<1:47:05, 11.73it/s][A
 38%|████████████▎                   | 46936/122310 [1:27:14<1:41:40, 12.36it/s][A
 38%|████████████▎                   | 46938/122310 [1:27:14<2:11:39,  9.54it/s][A
 38%|████████████▎                   | 46942/122310 [1:27:15<2:29:43,  8.39it/s][A
 38%|████████████▎                   | 46953/122310 [1:27:16<1:59:33, 10.50it/s][A
 38%|████████████▎                   | 46959/122310 [1:27:16<2:07:23,  9.86it/s][A
 38%|████████████▎                   | 46977/122310 [1:27:17<1:26:43, 14.48it/s][A
 38%|████████████▎                   | 46989/122310 [1:27:18<1:22:38, 15.19it/s][A
 38%|████████████▎                   | 46998/122310 [1:27:19<1:27:01, 14.42it/s][A
 38%|████████████▎                   | 47002/122310 [1:27:19<1:46:42, 11.76it/s][A
 38%|████████████▎                   | 47012/122310 [1:27:20<1:40:55, 12.44it/s][A
 38%|████████████▎                   | 47017/122310 [1:27:21<1:55:52, 10.83

step: 20580, loss: 75.75495800136363, epoch: 1



 39%|████████████▎                   | 47094/122310 [1:27:27<1:55:52, 10.82it/s][A
 39%|████████████▎                   | 47102/122310 [1:27:28<1:54:16, 10.97it/s][A
 39%|████████████▎                   | 47117/122310 [1:27:29<1:29:34, 13.99it/s][A
 39%|████████████▎                   | 47127/122310 [1:27:29<1:29:24, 14.01it/s][A
 39%|████████████▎                   | 47133/122310 [1:27:30<1:41:25, 12.35it/s][A
 39%|████████████▎                   | 47144/122310 [1:27:31<1:35:23, 13.13it/s][A
 39%|████████████▎                   | 47151/122310 [1:27:31<1:44:40, 11.97it/s][A
 39%|████████████▎                   | 47157/122310 [1:27:32<1:54:49, 10.91it/s][A
 39%|████████████▎                   | 47174/122310 [1:27:33<1:25:01, 14.73it/s][A
 39%|████████████▎                   | 47181/122310 [1:27:34<1:34:25, 13.26it/s][A
 39%|████████████▎                   | 47186/122310 [1:27:34<1:49:45, 11.41it/s][A
 39%|████████████▎                   | 47191/122310 [1:27:36<2:41:18,  7.76

step: 20600, loss: 99.06509938152186, epoch: 1



 39%|████████████▎                   | 47241/122310 [1:27:41<2:15:45,  9.22it/s][A
 39%|████████████▎                   | 47248/122310 [1:27:42<2:13:08,  9.40it/s][A
 39%|████████████▎                   | 47258/122310 [1:27:43<1:56:23, 10.75it/s][A
 39%|████████████▎                   | 47269/122310 [1:27:44<1:43:05, 12.13it/s][A
 39%|████████████▎                   | 47281/122310 [1:27:44<1:32:27, 13.52it/s][A
 39%|████████████▎                   | 47291/122310 [1:27:45<1:31:24, 13.68it/s][A
 39%|████████████▍                   | 47308/122310 [1:27:46<1:14:56, 16.68it/s][A
 39%|████████████▍                   | 47322/122310 [1:27:46<1:11:17, 17.53it/s][A
 39%|████████████▍                   | 47333/122310 [1:27:47<1:14:29, 16.77it/s][A
 39%|████████████▍                   | 47341/122310 [1:27:48<1:22:31, 15.14it/s][A
 39%|████████████▍                   | 47349/122310 [1:27:49<1:29:35, 13.95it/s][A
 39%|████████████▍                   | 47353/122310 [1:27:49<1:49:08, 11.45

step: 20620, loss: 101.0399088979037, epoch: 1



 39%|████████████▍                   | 47439/122310 [1:27:56<1:17:04, 16.19it/s][A
 39%|████████████▍                   | 47450/122310 [1:27:56<1:18:12, 15.95it/s][A
 39%|████████████▍                   | 47455/122310 [1:27:57<1:33:58, 13.27it/s][A
 39%|████████████▍                   | 47459/122310 [1:27:58<1:53:33, 10.99it/s][A
 39%|████████████▍                   | 47468/122310 [1:27:59<1:48:45, 11.47it/s][A
 39%|████████████▍                   | 47474/122310 [1:27:59<1:58:12, 10.55it/s][A
 39%|████████████▍                   | 47479/122310 [1:28:00<2:11:01,  9.52it/s][A
 39%|████████████▍                   | 47486/122310 [1:28:01<2:10:15,  9.57it/s][A
 39%|████████████▍                   | 47492/122310 [1:28:01<2:15:46,  9.18it/s][A
 39%|████████████▍                   | 47503/122310 [1:28:02<1:53:07, 11.02it/s][A
 39%|████████████▍                   | 47516/122310 [1:28:03<1:35:02, 13.11it/s][A
 39%|████████████▍                   | 47531/122310 [1:28:04<1:20:31, 15.48

step: 20640, loss: 70.31785291426203, epoch: 1



 39%|████████████▍                   | 47624/122310 [1:28:10<1:21:38, 15.25it/s][A
 39%|████████████▍                   | 47628/122310 [1:28:11<1:40:15, 12.41it/s][A
 39%|████████████▍                   | 47634/122310 [1:28:12<1:51:46, 11.13it/s][A
 39%|████████████▍                   | 47646/122310 [1:28:12<1:37:07, 12.81it/s][A
 39%|████████████▍                   | 47653/122310 [1:28:13<1:44:16, 11.93it/s][A
 39%|████████████▍                   | 47664/122310 [1:28:14<1:36:29, 12.89it/s][A
 39%|████████████▍                   | 47670/122310 [1:28:14<1:47:22, 11.59it/s][A
 39%|████████████▍                   | 47673/122310 [1:28:15<2:12:25,  9.39it/s][A
 39%|████████████▍                   | 47680/122310 [1:28:16<2:11:17,  9.47it/s][A
 39%|████████████▍                   | 47685/122310 [1:28:17<2:22:09,  8.75it/s][A
 39%|████████████▍                   | 47690/122310 [1:28:17<2:32:33,  8.15it/s][A
 39%|████████████▍                   | 47700/122310 [1:28:18<2:06:26,  9.83

step: 20660, loss: 81.82667274188672, epoch: 1



 39%|████████████▍                   | 47771/122310 [1:28:24<2:12:57,  9.34it/s][A
 39%|████████████▌                   | 47784/122310 [1:28:25<1:43:24, 12.01it/s][A
 39%|████████████▌                   | 47794/122310 [1:28:26<1:39:10, 12.52it/s][A
 39%|████████████▌                   | 47797/122310 [1:28:27<2:03:13, 10.08it/s][A
 39%|████████████▌                   | 47807/122310 [1:28:27<1:50:24, 11.25it/s][A
 39%|████████████▌                   | 47815/122310 [1:28:28<1:50:28, 11.24it/s][A
 39%|████████████▌                   | 47829/122310 [1:28:29<1:30:15, 13.75it/s][A
 39%|████████████▌                   | 47838/122310 [1:28:29<1:32:42, 13.39it/s][A
 39%|████████████▌                   | 47850/122310 [1:28:30<1:26:01, 14.43it/s][A
 39%|████████████▌                   | 47859/122310 [1:28:31<1:29:56, 13.80it/s][A
 39%|████████████▌                   | 47870/122310 [1:28:32<1:26:54, 14.27it/s][A
 39%|████████████▌                   | 47880/122310 [1:28:32<1:27:39, 14.15

step: 20680, loss: 86.64521508739458, epoch: 1



 39%|████████████▌                   | 47956/122310 [1:28:39<1:43:17, 12.00it/s][A
 39%|████████████▌                   | 47965/122310 [1:28:39<1:41:43, 12.18it/s][A
 39%|████████████▌                   | 47971/122310 [1:28:40<1:52:08, 11.05it/s][A
 39%|████████████▌                   | 47981/122310 [1:28:41<1:43:45, 11.94it/s][A
 39%|████████████▌                   | 47986/122310 [1:28:42<1:58:07, 10.49it/s][A
 39%|████████████▌                   | 47995/122310 [1:28:42<1:51:49, 11.08it/s][A
 39%|████████████▌                   | 47999/122310 [1:28:43<2:11:20,  9.43it/s][A
 39%|████████████▌                   | 48003/122310 [1:28:44<2:29:00,  8.31it/s][A
 39%|████████████▌                   | 48013/122310 [1:28:45<2:04:33,  9.94it/s][A
 39%|████████████▌                   | 48014/122310 [1:28:45<2:46:33,  7.43it/s][A
 39%|████████████▌                   | 48025/122310 [1:28:46<2:06:20,  9.80it/s][A
 39%|████████████▌                   | 48032/122310 [1:28:47<2:06:00,  9.82

step: 20700, loss: 81.29932617028459, epoch: 1



 39%|████████████▌                   | 48114/122310 [1:28:53<1:31:37, 13.50it/s][A
 39%|████████████▌                   | 48126/122310 [1:28:54<1:25:24, 14.48it/s][A
 39%|████████████▌                   | 48135/122310 [1:28:55<1:28:33, 13.96it/s][A
 39%|████████████▌                   | 48139/122310 [1:28:55<1:48:01, 11.44it/s][A
 39%|████████████▌                   | 48148/122310 [1:28:56<1:44:46, 11.80it/s][A
 39%|████████████▌                   | 48162/122310 [1:28:57<1:27:40, 14.09it/s][A
 39%|████████████▌                   | 48167/122310 [1:28:59<2:44:35,  7.51it/s][A
 39%|████████████▌                   | 48174/122310 [1:29:00<2:34:54,  7.98it/s][A
 39%|████████████▌                   | 48185/122310 [1:29:00<2:07:18,  9.70it/s][A
 39%|████████████▌                   | 48199/122310 [1:29:01<1:41:31, 12.17it/s][A
 39%|████████████▌                   | 48203/122310 [1:29:02<1:58:31, 10.42it/s][A
 39%|████████████▌                   | 48206/122310 [1:29:02<2:21:12,  8.75

step: 20720, loss: 79.32119753471419, epoch: 1



 39%|████████████▌                   | 48247/122310 [1:29:07<2:49:47,  7.27it/s][A
 39%|████████████▌                   | 48254/122310 [1:29:08<2:34:00,  8.01it/s][A
 39%|████████████▋                   | 48257/122310 [1:29:09<2:59:30,  6.88it/s][A
 39%|████████████▋                   | 48266/122310 [1:29:10<2:23:40,  8.59it/s][A
 39%|████████████▋                   | 48270/122310 [1:29:10<2:40:04,  7.71it/s][A
 39%|████████████▋                   | 48275/122310 [1:29:11<2:44:17,  7.51it/s][A
 39%|████████████▋                   | 48285/122310 [1:29:12<2:10:35,  9.45it/s][A
 39%|████████████▋                   | 48298/122310 [1:29:12<1:42:25, 12.04it/s][A
 39%|████████████▋                   | 48304/122310 [1:29:13<1:52:31, 10.96it/s][A
 40%|████████████▋                   | 48321/122310 [1:29:14<1:23:12, 14.82it/s][A
 40%|████████████▋                   | 48335/122310 [1:29:15<1:16:01, 16.22it/s][A
 40%|████████████▋                   | 48348/122310 [1:29:15<1:13:11, 16.84

step: 20740, loss: 74.58802281168923, epoch: 1



 40%|████████████▋                   | 48430/122310 [1:29:22<1:46:33, 11.56it/s][A
 40%|████████████▋                   | 48443/122310 [1:29:22<1:30:38, 13.58it/s][A
 40%|████████████▋                   | 48447/122310 [1:29:23<1:50:32, 11.14it/s][A
 40%|████████████▋                   | 48450/122310 [1:29:24<2:15:51,  9.06it/s][A
 40%|████████████▋                   | 48454/122310 [1:29:25<2:32:37,  8.06it/s][A
 40%|████████████▋                   | 48468/122310 [1:29:25<1:47:00, 11.50it/s][A
 40%|████████████▋                   | 48472/122310 [1:29:26<2:06:05,  9.76it/s][A
 40%|████████████▋                   | 48480/122310 [1:29:27<2:00:58, 10.17it/s][A
 40%|████████████▋                   | 48488/122310 [1:29:27<1:57:27, 10.48it/s][A
 40%|████████████▋                   | 48491/122310 [1:29:28<2:23:13,  8.59it/s][A
 40%|████████████▋                   | 48501/122310 [1:29:29<2:00:36, 10.20it/s][A
 40%|████████████▋                   | 48512/122310 [1:29:30<1:44:36, 11.76

step: 20760, loss: 76.24273036100094, epoch: 1



 40%|████████████▋                   | 48561/122310 [1:29:36<2:35:22,  7.91it/s][A
 40%|████████████▋                   | 48568/122310 [1:29:37<2:25:22,  8.45it/s][A
 40%|████████████▋                   | 48577/122310 [1:29:37<2:07:24,  9.65it/s][A
 40%|████████████▋                   | 48583/122310 [1:29:38<2:12:08,  9.30it/s][A
 40%|████████████▋                   | 48590/122310 [1:29:39<2:09:52,  9.46it/s][A
 40%|████████████▋                   | 48596/122310 [1:29:40<2:15:42,  9.05it/s][A
 40%|████████████▋                   | 48599/122310 [1:29:40<2:41:06,  7.63it/s][A
 40%|████████████▋                   | 48606/122310 [1:29:41<2:28:40,  8.26it/s][A
 40%|████████████▋                   | 48610/122310 [1:29:42<2:43:49,  7.50it/s][A
 40%|████████████▋                   | 48621/122310 [1:29:42<2:04:53,  9.83it/s][A
 40%|████████████▋                   | 48634/122310 [1:29:43<1:39:32, 12.34it/s][A
 40%|████████████▋                   | 48639/122310 [1:29:44<1:54:21, 10.74

step: 20780, loss: 75.63680428981657, epoch: 1



 40%|████████████▋                   | 48710/122310 [1:29:50<2:07:50,  9.60it/s][A
 40%|████████████▋                   | 48720/122310 [1:29:51<1:52:15, 10.93it/s][A
 40%|████████████▋                   | 48731/122310 [1:29:52<1:40:13, 12.24it/s][A
 40%|████████████▊                   | 48734/122310 [1:29:52<2:04:23,  9.86it/s][A
 40%|████████████▊                   | 48750/122310 [1:29:53<1:30:05, 13.61it/s][A
 40%|████████████▊                   | 48756/122310 [1:29:54<1:41:45, 12.05it/s][A
 40%|████████████▊                   | 48762/122310 [1:29:55<1:51:57, 10.95it/s][A
 40%|████████████▊                   | 48768/122310 [1:29:55<2:00:08, 10.20it/s][A
 40%|████████████▊                   | 48781/122310 [1:29:56<1:37:27, 12.58it/s][A
 40%|████████████▊                   | 48790/122310 [1:29:57<1:37:31, 12.56it/s][A
 40%|████████████▊                   | 48798/122310 [1:29:57<1:40:44, 12.16it/s][A
 40%|████████████▊                   | 48806/122310 [1:29:58<1:43:18, 11.86

step: 20800, loss: 71.12306577355292, epoch: 1
sim1 and sim2 are 0.41263073412162676, 0.23843342191358105
cosine of pred and queen: 0.32985163427413994
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 


 40%|████████████▊                   | 48862/122310 [1:30:17<1:58:43, 10.31it/s][A

Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: africa
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: says
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.3333333333333333
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::


 40%|████████████▍                  | 48867/122310 [1:31:17<68:48:24,  3.37s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 40%|████████████▍                  | 48879/122310 [1:31:17<39:25:32,  1.93s/it][A
 40%|████████████▍                  | 48894/122310 [1:31:18<22:37:12,  1.11s/it][A
 40%|████████████▍                  | 48896/122310 [1:31:19<21:25:41,  1.05s/it][A
 40%|████████████▍                  | 48903/122310 [1:31:20<16:02:41,  1.27it/s][A
 40%|████████████▊                   | 48918/122310 [1:31:20<9:06:51,  2.24it/s][A
 40%|████████████▊                   | 48922/122310 [1:31:21<8:18:40,  2.45it/s][A
 40%|████████████▊                   | 48924/122310 [1:31:22<8:11:39,  2.49it/s][A
 40%|████████████▊                   | 48934/122310 [1:31:22<5:26:40,  3.74it/s][A
 40%|████████████▊                   | 48946/122310 [1:31:23<3:42:01,  5.51it/s][A
 40%|████████████▊                   | 48955/122310 [1:31:24<3:03:28,  6.66it/s][A
 40%|████████████▊                   | 48967/122310 [1:31:25<2:22:34,  8.57it/s][A
 40%|████████████▊                   | 48980/122310 [1:31:25<1:55:16, 10.60

step: 20820, loss: 81.82890944575182, epoch: 1



 40%|████████████▊                   | 49045/122310 [1:31:32<1:37:56, 12.47it/s][A
 40%|████████████▊                   | 49051/122310 [1:31:32<1:48:00, 11.30it/s][A
 40%|████████████▊                   | 49063/122310 [1:31:33<1:34:02, 12.98it/s][A
 40%|████████████▊                   | 49072/122310 [1:31:34<1:34:37, 12.90it/s][A
 40%|████████████▊                   | 49083/122310 [1:31:34<1:29:10, 13.69it/s][A
 40%|████████████▊                   | 49092/122310 [1:31:35<1:31:11, 13.38it/s][A
 40%|████████████▊                   | 49098/122310 [1:31:36<1:42:30, 11.90it/s][A
 40%|████████████▊                   | 49107/122310 [1:31:37<1:40:29, 12.14it/s][A
 40%|████████████▊                   | 49115/122310 [1:31:37<1:42:41, 11.88it/s][A
 40%|████████████▊                   | 49117/122310 [1:31:38<2:14:14,  9.09it/s][A
 40%|████████████▊                   | 49118/122310 [1:31:39<2:58:26,  6.84it/s][A
 40%|████████████▊                   | 49127/122310 [1:31:39<2:22:11,  8.58

step: 20840, loss: 78.00259035770956, epoch: 1



 40%|████████████▊                   | 49209/122310 [1:31:46<2:05:03,  9.74it/s][A
 40%|████████████▉                   | 49225/122310 [1:31:47<1:29:41, 13.58it/s][A
 40%|████████████▉                   | 49234/122310 [1:31:47<1:31:38, 13.29it/s][A
 40%|████████████▉                   | 49246/122310 [1:31:48<1:24:42, 14.37it/s][A
 40%|████████████▉                   | 49251/122310 [1:31:49<1:40:06, 12.16it/s][A
 40%|████████████▉                   | 49259/122310 [1:31:49<1:43:19, 11.78it/s][A
 40%|████████████▉                   | 49270/122310 [1:31:50<1:34:28, 12.88it/s][A
 40%|████████████▉                   | 49275/122310 [1:31:51<1:49:21, 11.13it/s][A
 40%|████████████▉                   | 49290/122310 [1:31:52<1:26:30, 14.07it/s][A
 40%|████████████▉                   | 49297/122310 [1:31:52<1:35:12, 12.78it/s][A
 40%|████████████▉                   | 49309/122310 [1:31:53<1:26:41, 14.03it/s][A
 40%|████████████▉                   | 49322/122310 [1:31:54<1:19:34, 15.29

step: 20860, loss: 92.8658265804774, epoch: 1



 40%|████████████▉                   | 49390/122310 [1:32:00<1:43:18, 11.76it/s][A
 40%|████████████▉                   | 49405/122310 [1:32:01<1:23:23, 14.57it/s][A
 40%|████████████▉                   | 49409/122310 [1:32:02<1:42:19, 11.87it/s][A
 40%|████████████▉                   | 49418/122310 [1:32:02<1:40:37, 12.07it/s][A
 40%|████████████▉                   | 49421/122310 [1:32:03<2:04:22,  9.77it/s][A
 40%|████████████▉                   | 49430/122310 [1:32:04<1:54:28, 10.61it/s][A
 40%|████████████▉                   | 49433/122310 [1:32:04<2:20:01,  8.67it/s][A
 40%|████████████▉                   | 49438/122310 [1:32:05<2:28:13,  8.19it/s][A
 40%|████████████▉                   | 49449/122310 [1:32:06<1:57:08, 10.37it/s][A
 40%|████████████▉                   | 49457/122310 [1:32:07<1:54:11, 10.63it/s][A
 40%|████████████▉                   | 49462/122310 [1:32:07<2:07:54,  9.49it/s][A
 40%|████████████▉                   | 49478/122310 [1:32:08<1:31:21, 13.29

step: 20880, loss: 68.58025057720701, epoch: 1



 41%|████████████▉                   | 49548/122310 [1:32:14<1:42:43, 11.80it/s][A
 41%|████████████▉                   | 49554/122310 [1:32:15<1:52:27, 10.78it/s][A
 41%|████████████▉                   | 49564/122310 [1:32:16<1:42:57, 11.78it/s][A
 41%|████████████▉                   | 49568/122310 [1:32:17<2:02:07,  9.93it/s][A
 41%|████████████▉                   | 49574/122310 [1:32:17<2:07:51,  9.48it/s][A
 41%|████████████▉                   | 49585/122310 [1:32:18<1:47:44, 11.25it/s][A
 41%|████████████▉                   | 49590/122310 [1:32:19<2:01:19,  9.99it/s][A
 41%|████████████▉                   | 49598/122310 [1:32:19<1:56:50, 10.37it/s][A
 41%|████████████▉                   | 49612/122310 [1:32:20<1:32:21, 13.12it/s][A
 41%|████████████▉                   | 49621/122310 [1:32:21<1:34:07, 12.87it/s][A
 41%|████████████▉                   | 49628/122310 [1:32:22<1:41:58, 11.88it/s][A
 41%|████████████▉                   | 49639/122310 [1:32:22<1:33:33, 12.95

step: 20900, loss: 88.50004219170172, epoch: 1



 41%|█████████████                   | 49722/122310 [1:32:29<1:32:58, 13.01it/s][A
 41%|█████████████                   | 49725/122310 [1:32:29<1:56:24, 10.39it/s][A
 41%|█████████████                   | 49731/122310 [1:32:30<2:03:31,  9.79it/s][A
 41%|█████████████                   | 49738/122310 [1:32:31<2:03:29,  9.80it/s][A
 41%|█████████████                   | 49745/122310 [1:32:32<2:04:02,  9.75it/s][A
 41%|█████████████                   | 49758/122310 [1:32:32<1:39:09, 12.19it/s][A
 41%|█████████████                   | 49772/122310 [1:32:33<1:23:50, 14.42it/s][A
 41%|█████████████                   | 49774/122310 [1:32:34<1:50:44, 10.92it/s][A
 41%|█████████████                   | 49786/122310 [1:32:34<1:34:58, 12.73it/s][A
 41%|█████████████                   | 49794/122310 [1:32:35<1:38:28, 12.27it/s][A
 41%|█████████████                   | 49805/122310 [1:32:36<1:32:15, 13.10it/s][A
 41%|█████████████                   | 49807/122310 [1:32:37<1:59:45, 10.09

step: 20920, loss: 79.7838408360431, epoch: 1



 41%|█████████████                   | 49897/122310 [1:32:43<1:35:56, 12.58it/s][A
 41%|█████████████                   | 49910/122310 [1:32:44<1:24:32, 14.27it/s][A
 41%|█████████████                   | 49914/122310 [1:32:44<1:43:23, 11.67it/s][A
 41%|█████████████                   | 49917/122310 [1:32:45<2:09:14,  9.34it/s][A
 41%|█████████████                   | 49925/122310 [1:32:46<2:01:21,  9.94it/s][A
 41%|█████████████                   | 49935/122310 [1:32:47<1:48:35, 11.11it/s][A
 41%|█████████████                   | 49950/122310 [1:32:47<1:25:44, 14.07it/s][A
 41%|█████████████                   | 49952/122310 [1:32:48<1:52:52, 10.68it/s][A
 41%|█████████████                   | 49955/122310 [1:32:49<2:18:54,  8.68it/s][A
 41%|█████████████                   | 49956/122310 [1:32:50<3:05:03,  6.52it/s][A
 41%|█████████████                   | 49964/122310 [1:32:50<2:33:28,  7.86it/s][A
 41%|█████████████                   | 49969/122310 [1:32:51<2:39:34,  7.56

step: 20940, loss: 73.4063248495248, epoch: 1



 41%|█████████████                   | 50055/122310 [1:32:57<1:38:32, 12.22it/s][A
 41%|█████████████                   | 50064/122310 [1:32:58<1:37:24, 12.36it/s][A
 41%|█████████████                   | 50075/122310 [1:32:59<1:30:50, 13.25it/s][A
 41%|█████████████                   | 50084/122310 [1:33:00<1:32:49, 12.97it/s][A
 41%|█████████████                   | 50094/122310 [1:33:00<1:30:41, 13.27it/s][A
 41%|█████████████                   | 50101/122310 [1:33:01<1:38:36, 12.20it/s][A
 41%|█████████████                   | 50110/122310 [1:33:02<1:37:49, 12.30it/s][A
 41%|█████████████                   | 50118/122310 [1:33:03<1:40:15, 12.00it/s][A
 41%|█████████████                   | 50121/122310 [1:33:03<2:05:02,  9.62it/s][A
 41%|█████████████                   | 50126/122310 [1:33:04<2:15:48,  8.86it/s][A
 41%|█████████████                   | 50137/122310 [1:33:05<1:51:05, 10.83it/s][A
 41%|█████████████                   | 50144/122310 [1:33:05<1:54:26, 10.51

step: 20960, loss: 73.66595690722338, epoch: 1



 41%|█████████████▏                  | 50244/122310 [1:33:12<1:16:24, 15.72it/s][A
 41%|█████████████▏                  | 50250/122310 [1:33:13<1:28:44, 13.53it/s][A
 41%|█████████████▏                  | 50262/122310 [1:33:13<1:22:41, 14.52it/s][A
 41%|█████████████▏                  | 50269/122310 [1:33:14<1:31:34, 13.11it/s][A
 41%|█████████████▏                  | 50276/122310 [1:33:15<1:39:05, 12.12it/s][A
 41%|█████████████▏                  | 50284/122310 [1:33:15<1:41:24, 11.84it/s][A
 41%|█████████████▏                  | 50293/122310 [1:33:16<1:39:23, 12.08it/s][A
 41%|█████████████▏                  | 50307/122310 [1:33:17<1:23:38, 14.35it/s][A
 41%|█████████████▏                  | 50318/122310 [1:33:18<1:22:06, 14.61it/s][A
 41%|█████████████▏                  | 50329/122310 [1:33:18<1:20:38, 14.88it/s][A
 41%|█████████████▏                  | 50338/122310 [1:33:19<1:24:35, 14.18it/s][A
 41%|█████████████▏                  | 50343/122310 [1:33:20<1:39:40, 12.03

step: 20980, loss: 79.74795436486231, epoch: 1



 41%|█████████████▏                  | 50427/122310 [1:33:26<1:35:46, 12.51it/s][A
 41%|█████████████▏                  | 50434/122310 [1:33:27<1:42:16, 11.71it/s][A
 41%|█████████████▏                  | 50436/122310 [1:33:28<2:12:58,  9.01it/s][A
 41%|█████████████▏                  | 50448/122310 [1:33:28<1:45:46, 11.32it/s][A
 41%|█████████████▏                  | 50459/122310 [1:33:29<1:35:41, 12.51it/s][A
 41%|█████████████▏                  | 50469/122310 [1:33:30<1:32:33, 12.94it/s][A
 41%|█████████████▏                  | 50480/122310 [1:33:30<1:28:05, 13.59it/s][A
 41%|█████████████▏                  | 50486/122310 [1:33:31<1:39:57, 11.98it/s][A
 41%|█████████████▏                  | 50492/122310 [1:33:32<1:49:31, 10.93it/s][A
 41%|█████████████▏                  | 50494/122310 [1:33:33<2:21:00,  8.49it/s][A
 41%|█████████████▏                  | 50501/122310 [1:33:33<2:14:37,  8.89it/s][A
 41%|█████████████▏                  | 50507/122310 [1:33:35<2:57:33,  6.74

step: 21000, loss: 79.00120254303438, epoch: 1
saving weights



 41%|█████████████▏                  | 50581/122310 [1:33:41<1:48:08, 11.06it/s][A
 41%|█████████████▏                  | 50586/122310 [1:33:41<1:58:07, 10.12it/s][A
 41%|█████████████▏                  | 50589/122310 [1:33:42<2:18:33,  8.63it/s][A
 41%|█████████████▏                  | 50598/122310 [1:33:43<2:03:38,  9.67it/s][A
 41%|█████████████▏                  | 50600/122310 [1:33:43<2:33:06,  7.81it/s][A
 41%|█████████████▏                  | 50613/122310 [1:33:44<1:51:30, 10.72it/s][A
 41%|█████████████▏                  | 50619/122310 [1:33:45<1:58:43, 10.06it/s][A
 41%|█████████████▏                  | 50627/122310 [1:33:46<1:55:09, 10.37it/s][A
 41%|█████████████▏                  | 50630/122310 [1:33:46<2:19:53,  8.54it/s][A
 41%|█████████████▏                  | 50636/122310 [1:33:47<2:20:20,  8.51it/s][A
 41%|█████████████▏                  | 50642/122310 [1:33:48<2:21:37,  8.43it/s][A
 41%|█████████████▎                  | 50649/122310 [1:33:48<2:15:31,  8.81

step: 21020, loss: 73.78379623104618, epoch: 1



 41%|█████████████▎                  | 50700/122310 [1:33:56<2:22:22,  8.38it/s][A
 41%|█████████████▎                  | 50709/122310 [1:33:56<2:03:44,  9.64it/s][A
 41%|█████████████▎                  | 50721/122310 [1:33:57<1:41:39, 11.74it/s][A
 41%|█████████████▎                  | 50729/122310 [1:33:58<1:42:41, 11.62it/s][A
 41%|█████████████▎                  | 50741/122310 [1:33:59<1:30:50, 13.13it/s][A
 41%|█████████████▎                  | 50745/122310 [1:33:59<1:49:19, 10.91it/s][A
 41%|█████████████▎                  | 50750/122310 [1:34:00<2:02:26,  9.74it/s][A
 42%|█████████████▎                  | 50765/122310 [1:34:01<1:31:33, 13.02it/s][A
 42%|█████████████▎                  | 50776/122310 [1:34:01<1:27:22, 13.64it/s][A
 42%|█████████████▎                  | 50785/122310 [1:34:02<1:29:59, 13.25it/s][A
 42%|█████████████▎                  | 50792/122310 [1:34:03<1:38:07, 12.15it/s][A
 42%|█████████████▎                  | 50799/122310 [1:34:04<1:44:20, 11.42

step: 21040, loss: 84.1026692949144, epoch: 1



 42%|█████████████▎                  | 50876/122310 [1:34:10<2:06:29,  9.41it/s][A
 42%|█████████████▎                  | 50887/122310 [1:34:11<1:46:13, 11.21it/s][A
 42%|█████████████▎                  | 50901/122310 [1:34:11<1:26:27, 13.77it/s][A
 42%|█████████████▎                  | 50909/122310 [1:34:12<1:31:37, 12.99it/s][A
 42%|█████████████▎                  | 50921/122310 [1:34:13<1:24:16, 14.12it/s][A
 42%|█████████████▎                  | 50926/122310 [1:34:14<1:39:23, 11.97it/s][A
 42%|█████████████▎                  | 50933/122310 [1:34:14<1:45:01, 11.33it/s][A
 42%|█████████████▎                  | 50941/122310 [1:34:15<1:45:15, 11.30it/s][A
 42%|█████████████▎                  | 50947/122310 [1:34:16<1:55:02, 10.34it/s][A
 42%|█████████████▎                  | 50956/122310 [1:34:17<1:48:33, 10.95it/s][A
 42%|█████████████▎                  | 50967/122310 [1:34:17<1:37:18, 12.22it/s][A
 42%|█████████████▎                  | 50977/122310 [1:34:18<1:33:32, 12.71

step: 21060, loss: 107.56579942795335, epoch: 1



 42%|█████████████▎                  | 51037/122310 [1:34:24<3:00:07,  6.59it/s][A
 42%|█████████████▎                  | 51044/122310 [1:34:25<2:37:05,  7.56it/s][A
 42%|█████████████▎                  | 51053/122310 [1:34:26<2:11:12,  9.05it/s][A
 42%|█████████████▎                  | 51064/122310 [1:34:27<1:48:28, 10.95it/s][A
 42%|█████████████▎                  | 51069/122310 [1:34:27<2:01:42,  9.76it/s][A
 42%|█████████████▎                  | 51076/122310 [1:34:28<2:02:28,  9.69it/s][A
 42%|█████████████▎                  | 51085/122310 [1:34:29<1:52:06, 10.59it/s][A
 42%|█████████████▎                  | 51092/122310 [1:34:29<1:55:27, 10.28it/s][A
 42%|█████████████▎                  | 51099/122310 [1:34:30<1:57:09, 10.13it/s][A
 42%|█████████████▎                  | 51101/122310 [1:34:31<2:29:05,  7.96it/s][A
 42%|█████████████▎                  | 51110/122310 [1:34:32<2:06:47,  9.36it/s][A
 42%|█████████████▎                  | 51116/122310 [1:34:32<2:10:59,  9.06

step: 21080, loss: 86.07423796885305, epoch: 1



 42%|█████████████▍                  | 51194/122310 [1:34:39<2:02:56,  9.64it/s][A
 42%|█████████████▍                  | 51197/122310 [1:34:40<3:11:58,  6.17it/s][A
 42%|█████████████▍                  | 51202/122310 [1:34:41<3:05:25,  6.39it/s][A
 42%|█████████████▍                  | 51209/122310 [1:34:42<2:42:44,  7.28it/s][A
 42%|█████████████▍                  | 51219/122310 [1:34:42<2:10:18,  9.09it/s][A
 42%|█████████████▍                  | 51235/122310 [1:34:43<1:32:40, 12.78it/s][A
 42%|█████████████▍                  | 51241/122310 [1:34:44<1:42:31, 11.55it/s][A
 42%|█████████████▍                  | 51247/122310 [1:34:45<1:51:38, 10.61it/s][A
 42%|█████████████▍                  | 51258/122310 [1:34:45<1:38:40, 12.00it/s][A
 42%|█████████████▍                  | 51266/122310 [1:34:46<1:40:22, 11.80it/s][A
 42%|█████████████▍                  | 51287/122310 [1:34:47<1:09:41, 16.99it/s][A
 42%|█████████████▍                  | 51297/122310 [1:34:47<1:13:45, 16.05

step: 21100, loss: 80.29007210330677, epoch: 1



 42%|█████████████▍                  | 51355/122310 [1:34:53<1:50:06, 10.74it/s][A
 42%|█████████████▍                  | 51365/122310 [1:34:54<1:40:53, 11.72it/s][A
 42%|█████████████▍                  | 51371/122310 [1:34:55<1:50:11, 10.73it/s][A
 42%|█████████████▍                  | 51381/122310 [1:34:55<1:41:00, 11.70it/s][A
 42%|█████████████▍                  | 51394/122310 [1:34:56<1:26:39, 13.64it/s][A
 42%|█████████████▍                  | 51399/122310 [1:34:57<1:41:31, 11.64it/s][A
 42%|█████████████▍                  | 51406/122310 [1:34:57<1:46:31, 11.09it/s][A
 42%|█████████████▍                  | 51417/122310 [1:34:58<1:35:36, 12.36it/s][A
 42%|█████████████▍                  | 51423/122310 [1:34:59<1:45:35, 11.19it/s][A
 42%|█████████████▍                  | 51428/122310 [1:35:00<1:59:45,  9.87it/s][A
 42%|█████████████▍                  | 51430/122310 [1:35:00<2:31:35,  7.79it/s][A
 42%|█████████████▍                  | 51435/122310 [1:35:01<2:36:30,  7.55

step: 21120, loss: 70.99379119129111, epoch: 1



 42%|█████████████▍                  | 51515/122310 [1:35:08<1:21:40, 14.45it/s][A
 42%|█████████████▍                  | 51528/122310 [1:35:08<1:16:16, 15.47it/s][A
 42%|█████████████▍                  | 51535/122310 [1:35:09<1:26:07, 13.70it/s][A
 42%|█████████████▍                  | 51538/122310 [1:35:10<1:48:36, 10.86it/s][A
 42%|█████████████▍                  | 51545/122310 [1:35:10<1:52:11, 10.51it/s][A
 42%|█████████████▍                  | 51557/122310 [1:35:11<1:35:43, 12.32it/s][A
 42%|█████████████▍                  | 51563/122310 [1:35:12<1:46:09, 11.11it/s][A
 42%|█████████████▍                  | 51572/122310 [1:35:13<1:43:07, 11.43it/s][A
 42%|█████████████▍                  | 51579/122310 [1:35:13<1:48:14, 10.89it/s][A
 42%|█████████████▍                  | 51584/122310 [1:35:14<2:01:36,  9.69it/s][A
 42%|█████████████▍                  | 51598/122310 [1:35:15<1:34:14, 12.51it/s][A
 42%|█████████████▌                  | 51608/122310 [1:35:16<1:31:56, 12.82

step: 21140, loss: 87.41022395204028, epoch: 1



 42%|█████████████▌                  | 51695/122310 [1:35:22<1:23:42, 14.06it/s][A
 42%|█████████████▌                  | 51703/122310 [1:35:23<1:29:33, 13.14it/s][A
 42%|█████████████▌                  | 51715/122310 [1:35:24<1:23:00, 14.17it/s][A
 42%|█████████████▌                  | 51719/122310 [1:35:24<1:41:35, 11.58it/s][A
 42%|█████████████▌                  | 51731/122310 [1:35:25<1:30:21, 13.02it/s][A
 42%|█████████████▌                  | 51747/122310 [1:35:26<1:14:36, 15.76it/s][A
 42%|█████████████▌                  | 51755/122310 [1:35:26<1:21:56, 14.35it/s][A
 42%|█████████████▌                  | 51760/122310 [1:35:27<1:36:51, 12.14it/s][A
 42%|█████████████▌                  | 51771/122310 [1:35:28<1:30:19, 13.02it/s][A
 42%|█████████████▌                  | 51778/122310 [1:35:29<1:37:44, 12.03it/s][A
 42%|█████████████▌                  | 51785/122310 [1:35:29<1:44:07, 11.29it/s][A
 42%|█████████████▌                  | 51789/122310 [1:35:30<2:02:37,  9.59

step: 21160, loss: 77.37879069262549, epoch: 1



 42%|█████████████▌                  | 51860/122310 [1:35:37<1:54:03, 10.29it/s][A
 42%|█████████████▌                  | 51872/122310 [1:35:37<1:37:14, 12.07it/s][A
 42%|█████████████▌                  | 51881/122310 [1:35:38<1:36:32, 12.16it/s][A
 42%|█████████████▌                  | 51890/122310 [1:35:39<1:37:08, 12.08it/s][A
 42%|█████████████▌                  | 51893/122310 [1:35:40<1:59:13,  9.84it/s][A
 42%|█████████████▌                  | 51898/122310 [1:35:40<2:10:44,  8.98it/s][A
 42%|█████████████▌                  | 51907/122310 [1:35:41<1:57:22, 10.00it/s][A
 42%|█████████████▌                  | 51922/122310 [1:35:42<1:28:47, 13.21it/s][A
 42%|█████████████▌                  | 51939/122310 [1:35:42<1:12:19, 16.22it/s][A
 42%|█████████████▌                  | 51950/122310 [1:35:43<1:13:55, 15.86it/s][A
 42%|█████████████▌                  | 51957/122310 [1:35:44<1:23:34, 14.03it/s][A
 42%|█████████████▌                  | 51965/122310 [1:35:45<1:29:12, 13.14

step: 21180, loss: 76.80383653747576, epoch: 1



 43%|█████████████▌                  | 52022/122310 [1:35:51<2:05:21,  9.34it/s][A
 43%|█████████████▌                  | 52031/122310 [1:35:52<1:54:05, 10.27it/s][A
 43%|█████████████▌                  | 52036/122310 [1:35:53<2:06:49,  9.24it/s][A
 43%|█████████████▌                  | 52042/122310 [1:35:53<2:10:23,  8.98it/s][A
 43%|█████████████▌                  | 52053/122310 [1:35:54<1:48:20, 10.81it/s][A
 43%|█████████████▌                  | 52063/122310 [1:35:55<1:39:58, 11.71it/s][A
 43%|█████████████▌                  | 52072/122310 [1:35:56<2:07:52,  9.15it/s][A
 43%|█████████████▌                  | 52077/122310 [1:35:57<2:15:55,  8.61it/s][A
 43%|█████████████▋                  | 52082/122310 [1:35:58<2:23:13,  8.17it/s][A
 43%|█████████████▋                  | 52094/122310 [1:35:58<1:52:08, 10.44it/s][A
 43%|█████████████▋                  | 52109/122310 [1:35:59<1:28:22, 13.24it/s][A
 43%|█████████████▋                  | 52116/122310 [1:36:00<1:35:39, 12.23

step: 21200, loss: 78.59032201286395, epoch: 1
sim1 and sim2 are 0.46301562883224917, 0.24847071584542932
cosine of pred and queen: 0.27017538199537056
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahara


 43%|█████████████▋                  | 52158/122310 [1:36:17<1:54:21, 10.22it/s][A

Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: cent
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: says
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.28888888888888886
Actual: walk:walks::vanish:va


 43%|█████████████▏                 | 52172/122310 [1:37:19<47:40:26,  2.45s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.10650887573964497



 43%|█████████████▏                 | 52188/122310 [1:37:20<27:56:59,  1.43s/it][A
 43%|█████████████▏                 | 52192/122310 [1:37:21<24:44:46,  1.27s/it][A
 43%|█████████████▏                 | 52203/122310 [1:37:22<16:46:27,  1.16it/s][A
 43%|█████████████▏                 | 52207/122310 [1:37:22<14:47:01,  1.32it/s][A
 43%|█████████████▏                 | 52214/122310 [1:37:23<11:18:08,  1.72it/s][A
 43%|█████████████▋                  | 52220/122310 [1:37:24<9:02:52,  2.15it/s][A
 43%|█████████████▋                  | 52225/122310 [1:37:24<7:36:30,  2.56it/s][A
 43%|█████████████▋                  | 52231/122310 [1:37:25<6:06:42,  3.19it/s][A
 43%|█████████████▋                  | 52236/122310 [1:37:26<5:16:27,  3.69it/s][A
 43%|█████████████▋                  | 52247/122310 [1:37:27<3:30:25,  5.55it/s][A
 43%|█████████████▋                  | 52250/122310 [1:37:27<3:40:17,  5.30it/s][A
 43%|█████████████▋                  | 52258/122310 [1:37:28<2:59:05,  6.52

step: 21220, loss: 107.00595349901832, epoch: 1



 43%|█████████████▋                  | 52286/122310 [1:37:37<6:50:48,  2.84it/s][A
 43%|█████████████▋                  | 52289/122310 [1:37:37<6:31:33,  2.98it/s][A
 43%|█████████████▋                  | 52297/122310 [1:37:38<4:51:04,  4.01it/s][A
 43%|█████████████▋                  | 52303/122310 [1:37:39<4:10:24,  4.66it/s][A
 43%|█████████████▋                  | 52314/122310 [1:37:39<2:58:21,  6.54it/s][A
 43%|█████████████▋                  | 52321/122310 [1:37:40<2:42:13,  7.19it/s][A
 43%|█████████████▋                  | 52330/122310 [1:37:41<2:18:57,  8.39it/s][A
 43%|█████████████▋                  | 52332/122310 [1:37:42<2:45:45,  7.04it/s][A
 43%|█████████████▋                  | 52340/122310 [1:37:42<2:24:12,  8.09it/s][A
 43%|█████████████▋                  | 52345/122310 [1:37:43<2:29:40,  7.79it/s][A
 43%|█████████████▋                  | 52353/122310 [1:37:44<2:13:32,  8.73it/s][A
 43%|█████████████▋                  | 52364/122310 [1:37:44<1:49:53, 10.61

step: 21240, loss: 92.6877617258118, epoch: 1



 43%|█████████████▋                  | 52437/122310 [1:37:49<1:23:45, 13.90it/s][A
 43%|█████████████▋                  | 52444/122310 [1:37:50<1:31:46, 12.69it/s][A
 43%|█████████████▋                  | 52449/122310 [1:37:50<1:46:17, 10.95it/s][A
 43%|█████████████▋                  | 52454/122310 [1:37:51<1:59:06,  9.78it/s][A
 43%|█████████████▋                  | 52464/122310 [1:37:52<1:45:41, 11.01it/s][A
 43%|█████████████▋                  | 52474/122310 [1:37:52<1:37:54, 11.89it/s][A
 43%|█████████████▋                  | 52486/122310 [1:37:53<1:27:37, 13.28it/s][A
 43%|█████████████▋                  | 52488/122310 [1:37:55<2:29:08,  7.80it/s][A
 43%|█████████████▋                  | 52491/122310 [1:37:55<2:48:37,  6.90it/s][A
 43%|█████████████▋                  | 52494/122310 [1:37:56<3:07:52,  6.19it/s][A
 43%|█████████████▋                  | 52504/122310 [1:37:57<2:20:37,  8.27it/s][A
 43%|█████████████▋                  | 52517/122310 [1:37:57<1:46:00, 10.97

step: 21260, loss: 78.9446981542786, epoch: 1



 43%|█████████████▊                  | 52585/122310 [1:38:03<1:22:57, 14.01it/s][A
 43%|█████████████▊                  | 52592/122310 [1:38:04<1:31:03, 12.76it/s][A
 43%|█████████████▊                  | 52596/122310 [1:38:05<1:50:02, 10.56it/s][A
 43%|█████████████▊                  | 52601/122310 [1:38:05<2:02:23,  9.49it/s][A
 43%|█████████████▊                  | 52617/122310 [1:38:06<1:27:16, 13.31it/s][A
 43%|█████████████▊                  | 52635/122310 [1:38:07<1:09:02, 16.82it/s][A
 43%|██████████████▋                   | 52656/122310 [1:38:08<56:32, 20.53it/s][A
 43%|██████████████▋                   | 52677/122310 [1:38:08<50:08, 23.14it/s][A
 43%|█████████████▊                  | 52681/122310 [1:38:09<1:04:57, 17.86it/s][A
 43%|█████████████▊                  | 52692/122310 [1:38:10<1:08:01, 17.06it/s][A
 43%|██████████████▋                   | 52712/122310 [1:38:10<57:09, 20.30it/s][A
 43%|█████████████▊                  | 52719/122310 [1:38:11<1:07:52, 17.09

step: 21280, loss: 74.21253801032144, epoch: 1



 43%|█████████████▊                  | 52802/122310 [1:38:18<1:29:16, 12.98it/s][A
 43%|█████████████▊                  | 52807/122310 [1:38:18<1:43:34, 11.18it/s][A
 43%|█████████████▊                  | 52815/122310 [1:38:19<1:43:54, 11.15it/s][A
 43%|█████████████▊                  | 52828/122310 [1:38:20<1:27:49, 13.19it/s][A
 43%|█████████████▊                  | 52833/122310 [1:38:21<1:42:06, 11.34it/s][A
 43%|█████████████▊                  | 52841/122310 [1:38:21<1:42:50, 11.26it/s][A
 43%|█████████████▊                  | 52853/122310 [1:38:22<1:30:07, 12.84it/s][A
 43%|█████████████▊                  | 52858/122310 [1:38:23<1:44:32, 11.07it/s][A
 43%|█████████████▊                  | 52869/122310 [1:38:23<1:34:01, 12.31it/s][A
 43%|█████████████▊                  | 52883/122310 [1:38:24<1:20:13, 14.42it/s][A
 43%|█████████████▊                  | 52894/122310 [1:38:25<1:18:54, 14.66it/s][A
 43%|█████████████▊                  | 52901/122310 [1:38:26<1:27:51, 13.17

step: 21300, loss: 86.79021580103773, epoch: 1



 43%|█████████████▊                  | 53000/122310 [1:38:32<1:02:06, 18.60it/s][A
 43%|█████████████▊                  | 53012/122310 [1:38:33<1:04:09, 18.00it/s][A
 43%|█████████████▊                  | 53022/122310 [1:38:34<1:08:37, 16.83it/s][A
 43%|█████████████▉                  | 53035/122310 [1:38:34<1:07:22, 17.14it/s][A
 43%|█████████████▉                  | 53039/122310 [1:38:35<1:24:21, 13.69it/s][A
 43%|█████████████▉                  | 53043/122310 [1:38:36<1:42:34, 11.26it/s][A
 43%|█████████████▉                  | 53050/122310 [1:38:36<1:46:59, 10.79it/s][A
 43%|█████████████▉                  | 53054/122310 [1:38:37<2:05:09,  9.22it/s][A
 43%|█████████████▉                  | 53059/122310 [1:38:38<2:16:36,  8.45it/s][A
 43%|█████████████▉                  | 53075/122310 [1:38:39<1:32:13, 12.51it/s][A
 43%|█████████████▉                  | 53085/122310 [1:38:39<1:29:21, 12.91it/s][A
 43%|█████████████▉                  | 53091/122310 [1:38:40<1:40:37, 11.46

step: 21320, loss: 81.66465108665659, epoch: 1



 43%|█████████████▉                  | 53178/122310 [1:38:47<1:17:57, 14.78it/s][A
 43%|█████████████▉                  | 53184/122310 [1:38:47<1:29:58, 12.80it/s][A
 43%|█████████████▉                  | 53194/122310 [1:38:48<1:27:45, 13.13it/s][A
 43%|█████████████▉                  | 53201/122310 [1:38:49<1:35:03, 12.12it/s][A
 44%|█████████████▉                  | 53209/122310 [1:38:50<1:37:26, 11.82it/s][A
 44%|█████████████▉                  | 53222/122310 [1:38:50<1:24:18, 13.66it/s][A
 44%|█████████████▉                  | 53232/122310 [1:38:51<1:23:51, 13.73it/s][A
 44%|█████████████▉                  | 53242/122310 [1:38:52<1:23:45, 13.74it/s][A
 44%|█████████████▉                  | 53252/122310 [1:38:52<1:23:36, 13.77it/s][A
 44%|█████████████▉                  | 53262/122310 [1:38:53<1:23:27, 13.79it/s][A
 44%|█████████████▉                  | 53269/122310 [1:38:54<1:31:35, 12.56it/s][A
 44%|█████████████▉                  | 53274/122310 [1:38:55<1:46:13, 10.83

step: 21340, loss: 88.16110914583103, epoch: 1



 44%|█████████████▉                  | 53346/122310 [1:39:01<1:27:03, 13.20it/s][A
 44%|█████████████▉                  | 53353/122310 [1:39:02<1:34:27, 12.17it/s][A
 44%|█████████████▉                  | 53365/122310 [1:39:03<1:25:09, 13.49it/s][A
 44%|█████████████▉                  | 53375/122310 [1:39:03<1:24:22, 13.62it/s][A
 44%|█████████████▉                  | 53379/122310 [1:39:04<1:42:49, 11.17it/s][A
 44%|█████████████▉                  | 53388/122310 [1:39:05<1:39:21, 11.56it/s][A
 44%|█████████████▉                  | 53397/122310 [1:39:05<1:37:02, 11.84it/s][A
 44%|█████████████▉                  | 53403/122310 [1:39:06<1:47:14, 10.71it/s][A
 44%|█████████████▉                  | 53409/122310 [1:39:07<1:54:49, 10.00it/s][A
 44%|█████████████▉                  | 53415/122310 [1:39:08<2:00:25,  9.54it/s][A
 44%|█████████████▉                  | 53423/122310 [1:39:08<1:55:09,  9.97it/s][A
 44%|█████████████▉                  | 53434/122310 [1:39:09<1:39:34, 11.53

step: 21360, loss: 76.3666687042885, epoch: 1



 44%|█████████████▉                  | 53487/122310 [1:39:16<2:10:45,  8.77it/s][A
 44%|█████████████▉                  | 53493/122310 [1:39:16<2:12:38,  8.65it/s][A
 44%|█████████████▉                  | 53500/122310 [1:39:17<2:07:50,  8.97it/s][A
 44%|█████████████▉                  | 53510/122310 [1:39:18<1:50:17, 10.40it/s][A
 44%|██████████████                  | 53514/122310 [1:39:18<2:08:01,  8.96it/s][A
 44%|██████████████                  | 53519/122310 [1:39:19<2:17:03,  8.37it/s][A
 44%|██████████████                  | 53532/122310 [1:39:20<1:41:51, 11.25it/s][A
 44%|██████████████                  | 53535/122310 [1:39:21<2:05:45,  9.12it/s][A
 44%|██████████████                  | 53547/122310 [1:39:21<1:41:07, 11.33it/s][A
 44%|██████████████                  | 53554/122310 [1:39:22<1:46:22, 10.77it/s][A
 44%|██████████████                  | 53570/122310 [1:39:23<1:20:57, 14.15it/s][A
 44%|██████████████                  | 53578/122310 [1:39:24<1:27:09, 13.14

step: 21380, loss: 74.75324941389434, epoch: 1



 44%|██████████████                  | 53645/122310 [1:39:30<2:24:29,  7.92it/s][A
 44%|██████████████                  | 53651/122310 [1:39:31<2:22:47,  8.01it/s][A
 44%|██████████████                  | 53662/122310 [1:39:32<1:52:42, 10.15it/s][A
 44%|██████████████                  | 53677/122310 [1:39:32<1:25:39, 13.36it/s][A
 44%|██████████████                  | 53685/122310 [1:39:33<1:30:18, 12.66it/s][A
 44%|██████████████                  | 53692/122310 [1:39:34<1:37:14, 11.76it/s][A
 44%|██████████████                  | 53699/122310 [1:39:34<1:42:47, 11.12it/s][A
 44%|██████████████                  | 53701/122310 [1:39:35<2:12:34,  8.63it/s][A
 44%|██████████████                  | 53716/122310 [1:39:36<1:33:25, 12.24it/s][A
 44%|██████████████                  | 53723/122310 [1:39:37<1:39:38, 11.47it/s][A
 44%|██████████████                  | 53731/122310 [1:39:37<1:40:38, 11.36it/s][A
 44%|██████████████                  | 53736/122310 [1:39:38<1:54:03, 10.02

step: 21400, loss: 79.60461579080616, epoch: 1



 44%|██████████████                  | 53808/122310 [1:39:45<1:24:14, 13.55it/s][A
 44%|██████████████                  | 53816/122310 [1:39:45<1:29:06, 12.81it/s][A
 44%|██████████████                  | 53825/122310 [1:39:46<1:29:50, 12.70it/s][A
 44%|██████████████                  | 53832/122310 [1:39:47<1:36:37, 11.81it/s][A
 44%|██████████████                  | 53841/122310 [1:39:48<1:35:20, 11.97it/s][A
 44%|██████████████                  | 53846/122310 [1:39:48<1:49:15, 10.44it/s][A
 44%|██████████████                  | 53857/122310 [1:39:49<1:35:59, 11.88it/s][A
 44%|██████████████                  | 53872/122310 [1:39:50<1:18:50, 14.47it/s][A
 44%|██████████████                  | 53882/122310 [1:39:50<1:19:55, 14.27it/s][A
 44%|██████████████                  | 53891/122310 [1:39:51<1:23:16, 13.69it/s][A
 44%|██████████████                  | 53893/122310 [1:39:53<2:22:09,  8.02it/s][A
 44%|██████████████                  | 53899/122310 [1:39:53<2:21:18,  8.07

step: 21420, loss: 104.3649799838541, epoch: 1



 44%|██████████████                  | 53949/122310 [1:39:59<1:54:36,  9.94it/s][A
 44%|██████████████                  | 53958/122310 [1:40:00<1:46:55, 10.65it/s][A
 44%|██████████████                  | 53962/122310 [1:40:01<2:03:41,  9.21it/s][A
 44%|██████████████                  | 53970/122310 [1:40:01<1:57:23,  9.70it/s][A
 44%|██████████████                  | 53978/122310 [1:40:02<1:53:27, 10.04it/s][A
 44%|██████████████                  | 53986/122310 [1:40:03<1:50:07, 10.34it/s][A
 44%|██████████████▏                 | 53994/122310 [1:40:03<1:48:10, 10.53it/s][A
 44%|██████████████▏                 | 54007/122310 [1:40:04<1:29:32, 12.71it/s][A
 44%|██████████████▏                 | 54013/122310 [1:40:05<1:40:02, 11.38it/s][A
 44%|██████████████▏                 | 54022/122310 [1:40:06<1:37:09, 11.71it/s][A
 44%|██████████████▏                 | 54034/122310 [1:40:06<1:26:26, 13.17it/s][A
 44%|██████████████▏                 | 54045/122310 [1:40:07<1:22:35, 13.78

step: 21440, loss: 88.73998251639881, epoch: 1



 44%|██████████████▏                 | 54132/122310 [1:40:14<1:22:32, 13.77it/s][A
 44%|██████████████▏                 | 54140/122310 [1:40:14<1:27:45, 12.95it/s][A
 44%|██████████████▏                 | 54148/122310 [1:40:15<1:31:47, 12.38it/s][A
 44%|██████████████▏                 | 54158/122310 [1:40:16<1:28:35, 12.82it/s][A
 44%|██████████████▏                 | 54167/122310 [1:40:17<1:29:45, 12.65it/s][A
 44%|██████████████▏                 | 54170/122310 [1:40:17<1:52:02, 10.14it/s][A
 44%|██████████████▏                 | 54172/122310 [1:40:18<2:23:04,  7.94it/s][A
 44%|██████████████▏                 | 54179/122310 [1:40:19<2:14:10,  8.46it/s][A
 44%|██████████████▏                 | 54186/122310 [1:40:19<2:08:40,  8.82it/s][A
 44%|██████████████▏                 | 54197/122310 [1:40:20<1:45:52, 10.72it/s][A
 44%|██████████████▏                 | 54202/122310 [1:40:21<1:58:24,  9.59it/s][A
 44%|██████████████▏                 | 54213/122310 [1:40:22<1:40:37, 11.28

step: 21460, loss: 72.75938683756944, epoch: 1



 44%|██████████████▏                 | 54281/122310 [1:40:28<1:37:34, 11.62it/s][A
 44%|██████████████▏                 | 54297/122310 [1:40:29<1:16:48, 14.76it/s][A
 44%|██████████████▏                 | 54303/122310 [1:40:30<1:28:30, 12.81it/s][A
 44%|██████████████▏                 | 54312/122310 [1:40:30<1:29:21, 12.68it/s][A
 44%|██████████████▏                 | 54320/122310 [1:40:31<1:32:47, 12.21it/s][A
 44%|██████████████▏                 | 54323/122310 [1:40:32<1:55:51,  9.78it/s][A
 44%|██████████████▏                 | 54328/122310 [1:40:32<2:06:59,  8.92it/s][A
 44%|██████████████▏                 | 54332/122310 [1:40:33<2:23:30,  7.89it/s][A
 44%|██████████████▏                 | 54338/122310 [1:40:34<2:20:58,  8.04it/s][A
 44%|██████████████▏                 | 54352/122310 [1:40:35<1:39:13, 11.41it/s][A
 44%|██████████████▏                 | 54358/122310 [1:40:35<1:48:06, 10.48it/s][A
 44%|██████████████▏                 | 54366/122310 [1:40:36<1:47:07, 10.57

step: 21480, loss: 78.27289356415974, epoch: 1



 45%|██████████████▏                 | 54446/122310 [1:40:43<1:26:44, 13.04it/s][A
 45%|██████████████▏                 | 54455/122310 [1:40:43<1:27:46, 12.88it/s][A
 45%|██████████████▎                 | 54468/122310 [1:40:44<1:18:46, 14.35it/s][A
 45%|██████████████▎                 | 54475/122310 [1:40:45<1:27:11, 12.97it/s][A
 45%|██████████████▎                 | 54483/122310 [1:40:46<1:31:16, 12.39it/s][A
 45%|██████████████▎                 | 54488/122310 [1:40:46<1:45:16, 10.74it/s][A
 45%|██████████████▎                 | 54492/122310 [1:40:47<2:02:58,  9.19it/s][A
 45%|██████████████▎                 | 54505/122310 [1:40:48<1:35:47, 11.80it/s][A
 45%|██████████████▎                 | 54510/122310 [1:40:48<1:49:16, 10.34it/s][A
 45%|██████████████▎                 | 54518/122310 [1:40:49<1:46:59, 10.56it/s][A
 45%|██████████████▎                 | 54533/122310 [1:40:50<1:23:08, 13.59it/s][A
 45%|██████████████▎                 | 54542/122310 [1:40:51<1:25:17, 13.24

step: 21500, loss: 77.09547795761165, epoch: 1



 45%|██████████████▎                 | 54615/122310 [1:40:57<1:40:12, 11.26it/s][A
 45%|██████████████▎                 | 54625/122310 [1:40:58<1:34:03, 11.99it/s][A
 45%|██████████████▎                 | 54628/122310 [1:40:59<1:56:40,  9.67it/s][A
 45%|██████████████▎                 | 54641/122310 [1:40:59<1:32:51, 12.15it/s][A
 45%|██████████████▎                 | 54645/122310 [1:41:00<1:50:57, 10.16it/s][A
 45%|██████████████▎                 | 54666/122310 [1:41:01<1:11:29, 15.77it/s][A
 45%|██████████████▎                 | 54670/122310 [1:41:02<1:28:36, 12.72it/s][A
 45%|██████████████▎                 | 54684/122310 [1:41:02<1:16:37, 14.71it/s][A
 45%|██████████████▎                 | 54700/122310 [1:41:03<1:06:44, 16.88it/s][A
 45%|██████████████▎                 | 54710/122310 [1:41:04<1:11:11, 15.83it/s][A
 45%|███████████████▏                  | 54730/122310 [1:41:04<58:16, 19.33it/s][A
 45%|██████████████▎                 | 54734/122310 [1:41:05<1:14:03, 15.21

step: 21520, loss: 79.60162433565802, epoch: 1



 45%|██████████████▎                 | 54814/122310 [1:41:12<1:31:55, 12.24it/s][A
 45%|██████████████▎                 | 54828/122310 [1:41:12<1:18:10, 14.39it/s][A
 45%|██████████████▎                 | 54833/122310 [1:41:13<1:32:37, 12.14it/s][A
 45%|██████████████▎                 | 54840/122310 [1:41:14<1:38:50, 11.38it/s][A
 45%|██████████████▎                 | 54847/122310 [1:41:15<1:43:15, 10.89it/s][A
 45%|██████████████▎                 | 54867/122310 [1:41:15<1:10:43, 15.89it/s][A
 45%|██████████████▎                 | 54883/122310 [1:41:16<1:03:18, 17.75it/s][A
 45%|███████████████▎                  | 54900/122310 [1:41:17<57:48, 19.43it/s][A
 45%|██████████████▎                 | 54904/122310 [1:41:17<1:13:40, 15.25it/s][A
 45%|██████████████▎                 | 54920/122310 [1:41:18<1:05:00, 17.28it/s][A
 45%|██████████████▎                 | 54923/122310 [1:41:19<1:24:05, 13.36it/s][A
 45%|██████████████▎                 | 54934/122310 [1:41:20<1:20:36, 13.93

step: 21540, loss: 66.14462392614163, epoch: 1



 45%|██████████████▍                 | 55001/122310 [1:41:26<1:53:59,  9.84it/s][A
 45%|██████████████▍                 | 55009/122310 [1:41:27<1:49:27, 10.25it/s][A
 45%|██████████████▍                 | 55019/122310 [1:41:28<1:39:20, 11.29it/s][A
 45%|██████████████▍                 | 55031/122310 [1:41:28<1:27:18, 12.84it/s][A
 45%|██████████████▍                 | 55038/122310 [1:41:29<1:34:01, 11.92it/s][A
 45%|██████████████▍                 | 55048/122310 [1:41:30<1:30:19, 12.41it/s][A
 45%|██████████████▍                 | 55052/122310 [1:41:31<1:47:38, 10.41it/s][A
 45%|██████████████▍                 | 55066/122310 [1:41:31<1:25:52, 13.05it/s][A
 45%|██████████████▍                 | 55069/122310 [1:41:32<1:47:42, 10.41it/s][A
 45%|██████████████▍                 | 55077/122310 [1:41:33<1:45:32, 10.62it/s][A
 45%|██████████████▍                 | 55082/122310 [1:41:33<1:57:38,  9.52it/s][A
 45%|██████████████▍                 | 55095/122310 [1:41:34<1:33:04, 12.04

step: 21560, loss: 70.81660356335404, epoch: 1



 45%|██████████████▍                 | 55167/122310 [1:41:41<1:47:16, 10.43it/s][A
 45%|██████████████▍                 | 55174/122310 [1:41:41<1:49:35, 10.21it/s][A
 45%|██████████████▍                 | 55184/122310 [1:41:42<1:39:26, 11.25it/s][A
 45%|██████████████▍                 | 55193/122310 [1:41:43<1:36:17, 11.62it/s][A
 45%|██████████████▍                 | 55195/122310 [1:41:44<2:05:16,  8.93it/s][A
 45%|██████████████▍                 | 55200/122310 [1:41:44<2:15:19,  8.27it/s][A
 45%|██████████████▍                 | 55211/122310 [1:41:45<1:48:03, 10.35it/s][A
 45%|██████████████▍                 | 55227/122310 [1:41:46<1:20:44, 13.85it/s][A
 45%|██████████████▍                 | 55236/122310 [1:41:46<1:23:07, 13.45it/s][A
 45%|██████████████▍                 | 55252/122310 [1:41:47<1:09:50, 16.00it/s][A
 45%|██████████████▍                 | 55263/122310 [1:41:48<1:10:51, 15.77it/s][A
 45%|██████████████▍                 | 55266/122310 [1:41:49<1:30:50, 12.30

step: 21580, loss: 79.15180713167669, epoch: 1



 45%|██████████████▍                 | 55349/122310 [1:41:55<1:35:06, 11.73it/s][A
 45%|██████████████▍                 | 55359/122310 [1:41:56<1:30:04, 12.39it/s][A
 45%|██████████████▍                 | 55366/122310 [1:41:57<1:36:37, 11.55it/s][A
 45%|██████████████▍                 | 55380/122310 [1:41:57<1:20:24, 13.87it/s][A
 45%|██████████████▍                 | 55389/122310 [1:41:58<1:22:50, 13.46it/s][A
 45%|██████████████▍                 | 55399/122310 [1:41:59<1:22:34, 13.51it/s][A
 45%|██████████████▍                 | 55405/122310 [1:42:00<1:33:26, 11.93it/s][A
 45%|██████████████▍                 | 55411/122310 [1:42:00<1:42:37, 10.86it/s][A
 45%|██████████████▍                 | 55419/122310 [1:42:01<1:43:04, 10.82it/s][A
 45%|██████████████▌                 | 55430/122310 [1:42:02<1:32:10, 12.09it/s][A
 45%|██████████████▌                 | 55441/122310 [1:42:02<1:25:35, 13.02it/s][A
 45%|██████████████▌                 | 55449/122310 [1:42:03<1:29:45, 12.41

step: 21600, loss: 71.2822324377146, epoch: 1
sim1 and sim2 are 0.4845796004431638, 0.2564396816790837
cosine of pred and queen: 0.3128418753073583
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharast


 45%|██████████████▌                 | 55509/122310 [1:42:28<1:38:22, 11.32it/s][A

Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: efforts
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.28888888888888886
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: bread
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, pred: emblem
Actual: say:says::provide:provides, pred: emblem
Actual: scream:screams::sing:sings, pred: shebna
Actual: play:plays::listen:listens, pred: emblem
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: thamer
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes, pred: emblem
Actual: melon:melons::pig:pigs, pred: illustrative
Actual: hand:hands::goat:goats, pred: effigies
Actual: horse:hors


 45%|██████████████                 | 55518/122310 [1:43:23<50:25:48,  2.72s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.10059171597633136



 45%|██████████████                 | 55523/122310 [1:43:24<40:45:27,  2.20s/it][A
 45%|██████████████                 | 55541/122310 [1:43:25<20:21:02,  1.10s/it][A
 45%|██████████████                 | 55547/122310 [1:43:25<16:48:20,  1.10it/s][A
 45%|██████████████                 | 55555/122310 [1:43:26<12:41:42,  1.46it/s][A
 45%|██████████████▌                 | 55573/122310 [1:43:27<7:07:32,  2.60it/s][A
 45%|██████████████▌                 | 55577/122310 [1:43:27<6:38:00,  2.79it/s][A
 45%|██████████████▌                 | 55587/122310 [1:43:28<4:58:09,  3.73it/s][A
 45%|██████████████▌                 | 55589/122310 [1:43:29<5:06:09,  3.63it/s][A
 45%|██████████████▌                 | 55602/122310 [1:43:30<3:20:17,  5.55it/s][A
 45%|██████████████▌                 | 55609/122310 [1:43:30<2:58:13,  6.24it/s][A
 45%|██████████████▌                 | 55619/122310 [1:43:31<2:25:21,  7.65it/s][A
 45%|██████████████▌                 | 55624/122310 [1:43:32<2:27:44,  7.52

step: 21620, loss: 83.07110437363946, epoch: 1



 46%|██████████████▌                 | 55699/122310 [1:43:38<1:44:15, 10.65it/s][A
 46%|██████████████▌                 | 55710/122310 [1:43:39<1:32:04, 12.05it/s][A
 46%|██████████████▌                 | 55718/122310 [1:43:40<1:34:21, 11.76it/s][A
 46%|██████████████▌                 | 55722/122310 [1:43:40<1:51:20,  9.97it/s][A
 46%|██████████████▌                 | 55729/122310 [1:43:41<1:52:06,  9.90it/s][A
 46%|██████████████▌                 | 55736/122310 [1:43:42<1:52:37,  9.85it/s][A
 46%|██████████████▌                 | 55746/122310 [1:43:43<1:40:15, 11.07it/s][A
 46%|██████████████▌                 | 55755/122310 [1:43:43<1:36:34, 11.49it/s][A
 46%|██████████████▌                 | 55758/122310 [1:43:44<1:58:57,  9.32it/s][A
 46%|██████████████▌                 | 55765/122310 [1:43:46<3:07:50,  5.90it/s][A
 46%|██████████████▌                 | 55770/122310 [1:43:47<3:01:02,  6.13it/s][A
 46%|██████████████▌                 | 55776/122310 [1:43:48<2:46:24,  6.66

step: 21640, loss: 75.95311197298423, epoch: 1



 46%|██████████████▌                 | 55829/122310 [1:43:53<1:47:25, 10.31it/s][A
 46%|██████████████▌                 | 55842/122310 [1:43:53<1:27:53, 12.60it/s][A
 46%|██████████████▌                 | 55853/122310 [1:43:54<1:22:36, 13.41it/s][A
 46%|██████████████▌                 | 55865/122310 [1:43:55<1:16:59, 14.38it/s][A
 46%|██████████████▌                 | 55869/122310 [1:43:56<1:34:06, 11.77it/s][A
 46%|██████████████▌                 | 55873/122310 [1:43:56<1:51:44,  9.91it/s][A
 46%|██████████████▌                 | 55878/122310 [1:43:57<2:02:45,  9.02it/s][A
 46%|██████████████▌                 | 55883/122310 [1:43:58<2:11:46,  8.40it/s][A
 46%|██████████████▌                 | 55892/122310 [1:43:58<1:54:48,  9.64it/s][A
 46%|██████████████▌                 | 55899/122310 [1:43:59<1:55:46,  9.56it/s][A
 46%|██████████████▋                 | 55908/122310 [1:44:00<1:46:07, 10.43it/s][A
 46%|██████████████▋                 | 55917/122310 [1:44:01<1:40:13, 11.04

step: 21660, loss: 96.03855317913876, epoch: 1



 46%|██████████████▋                 | 56001/122310 [1:44:07<1:43:17, 10.70it/s][A
 46%|██████████████▋                 | 56006/122310 [1:44:08<1:54:59,  9.61it/s][A
 46%|██████████████▋                 | 56018/122310 [1:44:09<1:34:35, 11.68it/s][A
 46%|██████████████▋                 | 56021/122310 [1:44:09<1:56:31,  9.48it/s][A
 46%|██████████████▋                 | 56035/122310 [1:44:10<1:28:38, 12.46it/s][A
 46%|██████████████▋                 | 56041/122310 [1:44:11<1:38:18, 11.23it/s][A
 46%|██████████████▋                 | 56055/122310 [1:44:11<1:20:54, 13.65it/s][A
 46%|██████████████▋                 | 56069/122310 [1:44:12<1:11:39, 15.41it/s][A
 46%|██████████████▋                 | 56078/122310 [1:44:13<1:15:57, 14.53it/s][A
 46%|██████████████▋                 | 56086/122310 [1:44:14<1:21:43, 13.51it/s][A
 46%|██████████████▋                 | 56091/122310 [1:44:14<1:35:29, 11.56it/s][A
 46%|██████████████▋                 | 56097/122310 [1:44:15<1:44:13, 10.59

step: 21680, loss: 84.66303550278502, epoch: 1



 46%|██████████████▋                 | 56168/122310 [1:44:22<1:45:26, 10.45it/s][A
 46%|██████████████▋                 | 56177/122310 [1:44:22<1:39:27, 11.08it/s][A
 46%|██████████████▋                 | 56184/122310 [1:44:23<1:43:21, 10.66it/s][A
 46%|██████████████▋                 | 56191/122310 [1:44:24<1:46:16, 10.37it/s][A
 46%|██████████████▋                 | 56198/122310 [1:44:24<1:48:05, 10.19it/s][A
 46%|██████████████▋                 | 56214/122310 [1:44:26<1:44:09, 10.58it/s][A
 46%|██████████████▋                 | 56216/122310 [1:44:27<2:06:38,  8.70it/s][A
 46%|██████████████▋                 | 56222/122310 [1:44:27<2:08:25,  8.58it/s][A
 46%|██████████████▋                 | 56230/122310 [1:44:28<1:59:09,  9.24it/s][A
 46%|██████████████▋                 | 56238/122310 [1:44:29<1:52:49,  9.76it/s][A
 46%|██████████████▋                 | 56255/122310 [1:44:29<1:20:31, 13.67it/s][A
 46%|██████████████▋                 | 56266/122310 [1:44:30<1:17:46, 14.15

step: 21700, loss: 86.66522922712687, epoch: 1



 46%|██████████████▋                 | 56316/122310 [1:44:36<2:38:14,  6.95it/s][A
 46%|██████████████▋                 | 56323/122310 [1:44:37<2:21:52,  7.75it/s][A
 46%|██████████████▋                 | 56328/122310 [1:44:37<2:25:53,  7.54it/s][A
 46%|██████████████▋                 | 56340/122310 [1:44:38<1:47:06, 10.27it/s][A
 46%|██████████████▋                 | 56347/122310 [1:44:39<1:49:19, 10.06it/s][A
 46%|██████████████▋                 | 56353/122310 [1:44:40<1:55:29,  9.52it/s][A
 46%|██████████████▋                 | 56376/122310 [1:44:40<1:07:42, 16.23it/s][A
 46%|██████████████▊                 | 56385/122310 [1:44:41<1:12:31, 15.15it/s][A
 46%|██████████████▊                 | 56388/122310 [1:44:42<1:33:04, 11.80it/s][A
 46%|██████████████▊                 | 56393/122310 [1:44:42<1:45:48, 10.38it/s][A
 46%|██████████████▊                 | 56401/122310 [1:44:43<1:43:59, 10.56it/s][A
 46%|██████████████▊                 | 56406/122310 [1:44:44<1:55:59,  9.47

step: 21720, loss: 87.55771833375897, epoch: 1



 46%|██████████████▊                 | 56485/122310 [1:44:50<1:23:20, 13.16it/s][A
 46%|██████████████▊                 | 56497/122310 [1:44:51<1:17:12, 14.21it/s][A
 46%|██████████████▊                 | 56501/122310 [1:44:52<1:34:38, 11.59it/s][A
 46%|██████████████▊                 | 56508/122310 [1:44:53<1:39:19, 11.04it/s][A
 46%|██████████████▊                 | 56513/122310 [1:44:56<4:04:59,  4.48it/s][A
 46%|██████████████▊                 | 56517/122310 [1:44:57<3:55:50,  4.65it/s][A
 46%|██████████████▊                 | 56528/122310 [1:44:58<2:44:58,  6.65it/s][A
 46%|██████████████▊                 | 56546/122310 [1:44:58<1:44:14, 10.51it/s][A
 46%|██████████████▊                 | 56555/122310 [1:44:59<1:39:54, 10.97it/s][A
 46%|██████████████▊                 | 56560/122310 [1:45:00<1:49:49,  9.98it/s][A
 46%|██████████████▊                 | 56572/122310 [1:45:01<1:33:26, 11.73it/s][A
 46%|██████████████▊                 | 56587/122310 [1:45:01<1:17:10, 14.19

step: 21740, loss: 93.29067870682782, epoch: 1



 46%|██████████████▊                 | 56618/122310 [1:45:05<2:20:53,  7.77it/s][A
 46%|██████████████▊                 | 56626/122310 [1:45:06<2:04:50,  8.77it/s][A
 46%|██████████████▊                 | 56639/122310 [1:45:06<1:35:10, 11.50it/s][A
 46%|██████████████▊                 | 56650/122310 [1:45:07<1:26:37, 12.63it/s][A
 46%|███████████████▊                  | 56674/122310 [1:45:08<58:42, 18.63it/s][A
 46%|██████████████▊                 | 56676/122310 [1:45:08<1:18:37, 13.91it/s][A
 46%|██████████████▊                 | 56689/122310 [1:45:09<1:12:11, 15.15it/s][A
 46%|██████████████▊                 | 56696/122310 [1:45:10<1:20:57, 13.51it/s][A
 46%|██████████████▊                 | 56702/122310 [1:45:11<1:31:22, 11.97it/s][A
 46%|██████████████▊                 | 56710/122310 [1:45:11<1:33:34, 11.68it/s][A
 46%|██████████████▊                 | 56720/122310 [1:45:12<1:28:44, 12.32it/s][A
 46%|██████████████▊                 | 56732/122310 [1:45:13<1:20:16, 13.61

step: 21760, loss: 89.27960620637805, epoch: 1



 46%|██████████████▊                 | 56800/122310 [1:45:19<1:38:08, 11.13it/s][A
 46%|██████████████▊                 | 56805/122310 [1:45:20<1:50:40,  9.86it/s][A
 46%|██████████████▊                 | 56814/122310 [1:45:21<1:42:42, 10.63it/s][A
 46%|██████████████▊                 | 56826/122310 [1:45:22<1:28:00, 12.40it/s][A
 46%|██████████████▊                 | 56836/122310 [1:45:22<1:25:36, 12.75it/s][A
 46%|██████████████▊                 | 56842/122310 [1:45:23<1:35:35, 11.41it/s][A
 46%|██████████████▊                 | 56848/122310 [1:45:24<1:45:18, 10.36it/s][A
 46%|██████████████▉                 | 56857/122310 [1:45:24<1:39:02, 11.01it/s][A
 46%|██████████████▉                 | 56867/122310 [1:45:25<1:32:22, 11.81it/s][A
 46%|██████████████▉                 | 56871/122310 [1:45:26<1:49:44,  9.94it/s][A
 47%|██████████████▉                 | 56879/122310 [1:45:27<1:46:18, 10.26it/s][A
 47%|██████████████▉                 | 56889/122310 [1:45:27<1:36:50, 11.26

step: 21780, loss: 103.95812479479744, epoch: 1



 47%|██████████████▉                 | 56947/122310 [1:45:34<2:25:46,  7.47it/s][A
 47%|██████████████▉                 | 56961/122310 [1:45:35<1:38:47, 11.02it/s][A
 47%|██████████████▉                 | 56966/122310 [1:45:35<1:50:59,  9.81it/s][A
 47%|██████████████▉                 | 56977/122310 [1:45:36<1:35:17, 11.43it/s][A
 47%|██████████████▉                 | 56981/122310 [1:45:37<1:52:54,  9.64it/s][A
 47%|██████████████▉                 | 56988/122310 [1:45:38<1:52:45,  9.66it/s][A
 47%|██████████████▉                 | 56992/122310 [1:45:38<2:09:25,  8.41it/s][A
 47%|██████████████▉                 | 57005/122310 [1:45:39<1:36:23, 11.29it/s][A
 47%|██████████████▉                 | 57011/122310 [1:45:40<1:44:50, 10.38it/s][A
 47%|██████████████▉                 | 57024/122310 [1:45:40<1:26:20, 12.60it/s][A
 47%|██████████████▉                 | 57026/122310 [1:45:41<1:52:09,  9.70it/s][A
 47%|██████████████▉                 | 57034/122310 [1:45:43<2:20:44,  7.73

step: 21800, loss: 79.86343121479865, epoch: 1



 47%|██████████████▉                 | 57094/122310 [1:45:48<1:31:16, 11.91it/s][A
 47%|██████████████▉                 | 57103/122310 [1:45:49<1:30:04, 12.06it/s][A
 47%|██████████████▉                 | 57120/122310 [1:45:50<1:10:17, 15.46it/s][A
 47%|██████████████▉                 | 57131/122310 [1:45:51<1:10:41, 15.37it/s][A
 47%|██████████████▉                 | 57140/122310 [1:45:51<1:14:52, 14.51it/s][A
 47%|██████████████▉                 | 57154/122310 [1:45:52<1:08:22, 15.88it/s][A
 47%|██████████████▉                 | 57163/122310 [1:45:53<1:14:46, 14.52it/s][A
 47%|██████████████▉                 | 57173/122310 [1:45:54<1:16:54, 14.12it/s][A
 47%|██████████████▉                 | 57189/122310 [1:45:54<1:06:02, 16.44it/s][A
 47%|██████████████▉                 | 57197/122310 [1:45:55<1:13:08, 14.84it/s][A
 47%|██████████████▉                 | 57212/122310 [1:45:56<1:05:32, 16.55it/s][A
 47%|██████████████▉                 | 57226/122310 [1:45:56<1:02:24, 17.38

step: 21820, loss: 76.87968404143277, epoch: 1



 47%|██████████████▉                 | 57307/122310 [1:46:03<1:18:56, 13.72it/s][A
 47%|███████████████▉                  | 57330/122310 [1:46:04<56:46, 19.08it/s][A
 47%|███████████████▉                  | 57345/122310 [1:46:05<55:25, 19.53it/s][A
 47%|███████████████                 | 57349/122310 [1:46:05<1:10:17, 15.40it/s][A
 47%|███████████████                 | 57358/122310 [1:46:06<1:14:32, 14.52it/s][A
 47%|███████████████                 | 57365/122310 [1:46:07<1:22:52, 13.06it/s][A
 47%|███████████████                 | 57378/122310 [1:46:07<1:14:24, 14.55it/s][A
 47%|███████████████                 | 57388/122310 [1:46:08<1:15:46, 14.28it/s][A
 47%|███████████████                 | 57401/122310 [1:46:09<1:10:15, 15.40it/s][A
 47%|███████████████                 | 57410/122310 [1:46:10<1:14:26, 14.53it/s][A
 47%|███████████████                 | 57416/122310 [1:46:10<1:25:36, 12.63it/s][A
 47%|███████████████                 | 57426/122310 [1:46:11<1:23:08, 13.01

step: 21840, loss: 82.60207385674028, epoch: 1



 47%|███████████████                 | 57503/122310 [1:46:18<1:33:23, 11.56it/s][A
 47%|███████████████                 | 57513/122310 [1:46:18<1:28:13, 12.24it/s][A
 47%|███████████████                 | 57529/122310 [1:46:19<1:11:05, 15.19it/s][A
 47%|███████████████                 | 57537/122310 [1:46:20<1:17:40, 13.90it/s][A
 47%|███████████████                 | 57543/122310 [1:46:20<1:28:01, 12.26it/s][A
 47%|███████████████                 | 57554/122310 [1:46:21<1:22:31, 13.08it/s][A
 47%|███████████████                 | 57563/122310 [1:46:22<1:23:32, 12.92it/s][A
 47%|███████████████                 | 57565/122310 [1:46:23<1:49:28,  9.86it/s][A
 47%|███████████████                 | 57573/122310 [1:46:23<1:45:51, 10.19it/s][A
 47%|███████████████                 | 57589/122310 [1:46:24<1:18:36, 13.72it/s][A
 47%|███████████████                 | 57595/122310 [1:46:25<1:29:09, 12.10it/s][A
 47%|███████████████                 | 57604/122310 [1:46:26<1:29:02, 12.11

step: 21860, loss: 86.02172749056328, epoch: 1



 47%|███████████████                 | 57671/122310 [1:46:32<1:52:00,  9.62it/s][A
 47%|███████████████                 | 57680/122310 [1:46:33<1:44:19, 10.33it/s][A
 47%|███████████████                 | 57682/122310 [1:46:34<2:09:59,  8.29it/s][A
 47%|███████████████                 | 57709/122310 [1:46:34<1:05:50, 16.35it/s][A
 47%|███████████████                 | 57714/122310 [1:46:35<1:18:36, 13.70it/s][A
 47%|███████████████                 | 57722/122310 [1:46:36<1:23:34, 12.88it/s][A
 47%|███████████████                 | 57728/122310 [1:46:36<1:33:05, 11.56it/s][A
 47%|███████████████                 | 57739/122310 [1:46:37<1:25:07, 12.64it/s][A
 47%|███████████████                 | 57742/122310 [1:46:38<1:46:32, 10.10it/s][A
 47%|███████████████                 | 57744/122310 [1:46:39<2:16:29,  7.88it/s][A
 47%|███████████████                 | 57747/122310 [1:46:39<2:38:04,  6.81it/s][A
 47%|███████████████                 | 57753/122310 [1:46:40<2:29:02,  7.22

step: 21880, loss: 78.34083349921205, epoch: 1



 47%|███████████████▏                | 57824/122310 [1:46:47<1:59:15,  9.01it/s][A
 47%|███████████████▏                | 57829/122310 [1:46:47<2:08:02,  8.39it/s][A
 47%|███████████████▏                | 57839/122310 [1:46:48<1:47:01, 10.04it/s][A
 47%|███████████████▏                | 57846/122310 [1:46:49<1:48:11,  9.93it/s][A
 47%|███████████████▏                | 57850/122310 [1:46:49<2:04:48,  8.61it/s][A
 47%|███████████████▏                | 57867/122310 [1:46:50<1:22:18, 13.05it/s][A
 47%|███████████████▏                | 57876/122310 [1:46:51<1:23:42, 12.83it/s][A
 47%|███████████████▏                | 57883/122310 [1:46:52<1:30:04, 11.92it/s][A
 47%|███████████████▏                | 57893/122310 [1:46:52<1:26:03, 12.48it/s][A
 47%|███████████████▏                | 57909/122310 [1:46:53<1:10:06, 15.31it/s][A
 47%|███████████████▏                | 57921/122310 [1:46:54<1:08:30, 15.66it/s][A
 47%|███████████████▏                | 57934/122310 [1:46:55<1:06:00, 16.26

step: 21900, loss: 87.06931057676091, epoch: 1



 47%|███████████████▏                | 58009/122310 [1:47:01<1:38:45, 10.85it/s][A
 47%|███████████████▏                | 58017/122310 [1:47:02<1:37:58, 10.94it/s][A
 47%|███████████████▏                | 58027/122310 [1:47:03<1:30:38, 11.82it/s][A
 47%|███████████████▏                | 58039/122310 [1:47:03<1:21:05, 13.21it/s][A
 47%|███████████████▏                | 58046/122310 [1:47:04<1:28:02, 12.17it/s][A
 47%|███████████████▏                | 58053/122310 [1:47:05<1:34:14, 11.36it/s][A
 47%|███████████████▏                | 58061/122310 [1:47:05<1:34:35, 11.32it/s][A
 47%|███████████████▏                | 58071/122310 [1:47:06<1:28:55, 12.04it/s][A
 47%|███████████████▏                | 58081/122310 [1:47:07<1:25:22, 12.54it/s][A
 47%|███████████████▏                | 58087/122310 [1:47:08<1:34:39, 11.31it/s][A
 47%|███████████████▏                | 58097/122310 [1:47:08<1:28:51, 12.04it/s][A
 48%|███████████████▏                | 58102/122310 [1:47:09<1:42:04, 10.48

step: 21920, loss: 76.37166966251083, epoch: 1



 48%|███████████████▏                | 58168/122310 [1:47:16<1:52:56,  9.47it/s][A
 48%|███████████████▏                | 58175/122310 [1:47:16<1:52:11,  9.53it/s][A
 48%|███████████████▏                | 58180/122310 [1:47:17<2:02:15,  8.74it/s][A
 48%|███████████████▏                | 58184/122310 [1:47:18<2:17:00,  7.80it/s][A
 48%|███████████████▏                | 58195/122310 [1:47:18<1:46:30, 10.03it/s][A
 48%|███████████████▏                | 58202/122310 [1:47:19<1:47:51,  9.91it/s][A
 48%|███████████████▏                | 58219/122310 [1:47:20<1:16:31, 13.96it/s][A
 48%|███████████████▏                | 58229/122310 [1:47:21<1:16:39, 13.93it/s][A
 48%|███████████████▏                | 58241/122310 [1:47:21<1:12:38, 14.70it/s][A
 48%|███████████████▏                | 58256/122310 [1:47:22<1:04:36, 16.52it/s][A
 48%|███████████████▏                | 58263/122310 [1:47:23<1:14:06, 14.41it/s][A
 48%|███████████████▏                | 58270/122310 [1:47:24<1:22:03, 13.01

step: 21940, loss: 70.11496412406676, epoch: 1



 48%|███████████████▎                | 58355/122310 [1:47:30<1:40:36, 10.60it/s][A
 48%|███████████████▎                | 58367/122310 [1:47:31<1:26:53, 12.26it/s][A
 48%|███████████████▎                | 58381/122310 [1:47:32<1:14:29, 14.30it/s][A
 48%|███████████████▎                | 58394/122310 [1:47:32<1:09:25, 15.34it/s][A
 48%|███████████████▎                | 58411/122310 [1:47:33<1:01:14, 17.39it/s][A
 48%|███████████████▎                | 58425/122310 [1:47:34<1:00:37, 17.56it/s][A
 48%|███████████████▎                | 58433/122310 [1:47:35<1:09:07, 15.40it/s][A
 48%|███████████████▎                | 58440/122310 [1:47:35<1:18:38, 13.54it/s][A
 48%|███████████████▎                | 58447/122310 [1:47:36<1:27:15, 12.20it/s][A
 48%|███████████████▎                | 58454/122310 [1:47:37<1:32:42, 11.48it/s][A
 48%|███████████████▎                | 58470/122310 [1:47:38<1:12:57, 14.58it/s][A
 48%|███████████████▎                | 58476/122310 [1:47:38<1:23:36, 12.72

step: 21960, loss: 76.51168260937378, epoch: 1



 48%|███████████████▎                | 58574/122310 [1:47:45<1:26:10, 12.33it/s][A
 48%|███████████████▎                | 58584/122310 [1:47:46<1:23:03, 12.79it/s][A
 48%|███████████████▎                | 58591/122310 [1:47:46<1:30:03, 11.79it/s][A
 48%|███████████████▎                | 58608/122310 [1:47:47<1:09:28, 15.28it/s][A
 48%|███████████████▎                | 58618/122310 [1:47:48<1:11:29, 14.85it/s][A
 48%|███████████████▎                | 58625/122310 [1:47:48<1:19:47, 13.30it/s][A
 48%|███████████████▎                | 58635/122310 [1:47:49<1:18:44, 13.48it/s][A
 48%|███████████████▎                | 58646/122310 [1:47:50<1:15:51, 13.99it/s][A
 48%|███████████████▎                | 58649/122310 [1:47:51<1:36:01, 11.05it/s][A
 48%|███████████████▎                | 58658/122310 [1:47:51<1:32:32, 11.46it/s][A
 48%|███████████████▎                | 58669/122310 [1:47:52<1:24:21, 12.57it/s][A
 48%|███████████████▎                | 58678/122310 [1:47:53<1:24:45, 12.51

step: 21980, loss: 79.18141052748139, epoch: 1



 48%|███████████████▎                | 58749/122310 [1:47:59<1:31:43, 11.55it/s][A
 48%|███████████████▎                | 58759/122310 [1:48:00<1:26:40, 12.22it/s][A
 48%|███████████████▎                | 58766/122310 [1:48:01<1:32:31, 11.45it/s][A
 48%|███████████████▍                | 58780/122310 [1:48:02<1:16:46, 13.79it/s][A
 48%|███████████████▍                | 58788/122310 [1:48:02<1:21:38, 12.97it/s][A
 48%|███████████████▍                | 58807/122310 [1:48:03<1:02:35, 16.91it/s][A
 48%|███████████████▍                | 58817/122310 [1:48:04<1:06:09, 15.99it/s][A
 48%|███████████████▍                | 58825/122310 [1:48:04<1:12:47, 14.54it/s][A
 48%|███████████████▍                | 58829/122310 [1:48:05<1:29:52, 11.77it/s][A
 48%|███████████████▍                | 58835/122310 [1:48:06<1:38:24, 10.75it/s][A
 48%|███████████████▍                | 58853/122310 [1:48:07<1:10:39, 14.97it/s][A
 48%|███████████████▍                | 58869/122310 [1:48:07<1:01:54, 17.08

step: 22000, loss: 70.88996422087155, epoch: 1
sim1 and sim2 are 0.42835533615303995, 0.2575658107993139
cosine of pred and queen: 0.28244656638936505
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maha


 48%|███████████████▍                | 58920/122310 [1:48:28<1:25:25, 12.37it/s][A

Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: cent
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: says
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: media
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: efforts
Actual: denmark:danish::germany:german, pred: zone
Accuracy is 0.24444444444444444
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: let
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, p


 48%|██████████████▉                | 58933/122310 [1:49:28<40:17:48,  2.29s/it][A
 48%|██████████████▉                | 58936/122310 [1:49:29<36:14:20,  2.06s/it][A
 48%|██████████████▉                | 58953/122310 [1:49:30<19:21:10,  1.10s/it][A
 48%|██████████████▉                | 58959/122310 [1:49:31<16:00:36,  1.10it/s][A
 48%|██████████████▉                | 58967/122310 [1:49:31<12:07:00,  1.45it/s][A
 48%|██████████████▉                | 58970/122310 [1:49:32<11:06:53,  1.58it/s][A
 48%|███████████████▍                | 58980/122310 [1:49:33<7:24:51,  2.37it/s][A
 48%|███████████████▍                | 58985/122310 [1:49:33<6:23:20,  2.75it/s][A
 48%|███████████████▍                | 58992/122310 [1:49:34<5:02:00,  3.49it/s][A
 48%|███████████████▍                | 58997/122310 [1:49:35<4:27:31,  3.94it/s][A
 48%|███████████████▍                | 58998/122310 [1:49:36<4:57:38,  3.55it/s][A
 48%|███████████████▍                | 59010/122310 [1:49:36<2:56:19,  5.98

step: 22020, loss: 93.79995376295602, epoch: 1



 48%|███████████████▍                | 59109/122310 [1:49:44<1:12:09, 14.60it/s][A
 48%|███████████████▍                | 59117/122310 [1:49:44<1:17:32, 13.58it/s][A
 48%|███████████████▍                | 59128/122310 [1:49:45<1:14:32, 14.13it/s][A
 48%|███████████████▍                | 59141/122310 [1:49:46<1:08:33, 15.36it/s][A
 48%|███████████████▍                | 59146/122310 [1:49:46<1:22:03, 12.83it/s][A
 48%|███████████████▍                | 59158/122310 [1:49:47<1:15:37, 13.92it/s][A
 48%|███████████████▍                | 59165/122310 [1:49:48<1:22:38, 12.73it/s][A
 48%|███████████████▍                | 59179/122310 [1:49:49<1:11:12, 14.78it/s][A
 48%|███████████████▍                | 59190/122310 [1:49:49<1:10:22, 14.95it/s][A
 48%|███████████████▍                | 59204/122310 [1:49:50<1:04:23, 16.33it/s][A
 48%|███████████████▍                | 59210/122310 [1:49:51<1:15:32, 13.92it/s][A
 48%|███████████████▍                | 59218/122310 [1:49:51<1:20:34, 13.05

step: 22040, loss: 86.29023067846782, epoch: 1



 48%|███████████████▌                | 59291/122310 [1:49:58<1:43:18, 10.17it/s][A
 48%|███████████████▌                | 59301/122310 [1:49:59<1:33:00, 11.29it/s][A
 48%|███████████████▌                | 59312/122310 [1:49:59<1:23:45, 12.54it/s][A
 48%|███████████████▌                | 59318/122310 [1:50:00<1:33:05, 11.28it/s][A
 49%|███████████████▌                | 59324/122310 [1:50:01<1:40:40, 10.43it/s][A
 49%|███████████████▌                | 59329/122310 [1:50:01<1:52:31,  9.33it/s][A
 49%|███████████████▌                | 59330/122310 [1:50:02<2:31:21,  6.93it/s][A
 49%|███████████████▌                | 59343/122310 [1:50:03<1:41:59, 10.29it/s][A
 49%|███████████████▌                | 59349/122310 [1:50:04<1:48:49,  9.64it/s][A
 49%|███████████████▌                | 59357/122310 [1:50:04<1:43:56, 10.09it/s][A
 49%|███████████████▌                | 59372/122310 [1:50:05<1:18:59, 13.28it/s][A
 49%|███████████████▌                | 59385/122310 [1:50:06<1:11:42, 14.63

step: 22060, loss: 78.49678054445012, epoch: 1



 49%|███████████████▌                | 59440/122310 [1:50:12<1:46:57,  9.80it/s][A
 49%|███████████████▌                | 59444/122310 [1:50:14<2:39:35,  6.57it/s][A
 49%|███████████████▌                | 59460/122310 [1:50:15<1:40:18, 10.44it/s][A
 49%|███████████████▌                | 59470/122310 [1:50:15<1:32:28, 11.33it/s][A
 49%|███████████████▌                | 59483/122310 [1:50:16<1:20:01, 13.09it/s][A
 49%|███████████████▌                | 59486/122310 [1:50:17<1:39:22, 10.54it/s][A
 49%|███████████████▌                | 59501/122310 [1:50:17<1:18:21, 13.36it/s][A
 49%|███████████████▌                | 59507/122310 [1:50:18<1:27:57, 11.90it/s][A
 49%|███████████████▌                | 59516/122310 [1:50:19<1:26:40, 12.07it/s][A
 49%|███████████████▌                | 59526/122310 [1:50:20<1:23:01, 12.60it/s][A
 49%|███████████████▌                | 59538/122310 [1:50:21<1:38:28, 10.62it/s][A
 49%|███████████████▌                | 59543/122310 [1:50:22<1:47:27,  9.74

step: 22080, loss: 82.80751596196394, epoch: 1



 49%|███████████████▌                | 59602/122310 [1:50:27<1:25:19, 12.25it/s][A
 49%|███████████████▌                | 59610/122310 [1:50:28<1:28:16, 11.84it/s][A
 49%|███████████████▌                | 59621/122310 [1:50:28<1:21:22, 12.84it/s][A
 49%|███████████████▌                | 59632/122310 [1:50:29<1:17:32, 13.47it/s][A
 49%|███████████████▌                | 59634/122310 [1:50:30<1:40:56, 10.35it/s][A
 49%|███████████████▌                | 59643/122310 [1:50:30<1:35:09, 10.98it/s][A
 49%|███████████████▌                | 59649/122310 [1:50:31<1:42:27, 10.19it/s][A
 49%|███████████████▌                | 59657/122310 [1:50:32<1:40:02, 10.44it/s][A
 49%|███████████████▌                | 59664/122310 [1:50:33<1:44:53,  9.95it/s][A
 49%|███████████████▌                | 59667/122310 [1:50:33<2:08:55,  8.10it/s][A
 49%|███████████████▌                | 59670/122310 [1:50:34<2:33:01,  6.82it/s][A
 49%|███████████████▌                | 59676/122310 [1:50:35<2:25:41,  7.17

step: 22100, loss: 70.5598277694657, epoch: 1



 49%|███████████████▋                | 59752/122310 [1:50:42<1:19:05, 13.18it/s][A
 49%|███████████████▋                | 59772/122310 [1:50:43<1:01:13, 17.02it/s][A
 49%|███████████████▋                | 59783/122310 [1:50:43<1:04:09, 16.24it/s][A
 49%|███████████████▋                | 59793/122310 [1:50:44<1:08:10, 15.29it/s][A
 49%|███████████████▋                | 59799/122310 [1:50:45<1:19:43, 13.07it/s][A
 49%|███████████████▋                | 59806/122310 [1:50:46<1:27:15, 11.94it/s][A
 49%|███████████████▋                | 59813/122310 [1:50:46<1:34:09, 11.06it/s][A
 49%|███████████████▋                | 59822/122310 [1:50:47<1:32:04, 11.31it/s][A
 49%|███████████████▋                | 59839/122310 [1:50:48<1:11:24, 14.58it/s][A
 49%|███████████████▋                | 59848/122310 [1:50:49<1:15:43, 13.75it/s][A
 49%|███████████████▋                | 59855/122310 [1:50:50<1:24:02, 12.38it/s][A
 49%|███████████████▋                | 59867/122310 [1:50:50<1:17:51, 13.37

step: 22120, loss: 85.67456642707468, epoch: 1



 49%|███████████████▋                | 59942/122310 [1:50:57<1:44:21,  9.96it/s][A
 49%|███████████████▋                | 59951/122310 [1:50:58<1:38:25, 10.56it/s][A
 49%|███████████████▋                | 59962/122310 [1:50:59<1:28:44, 11.71it/s][A
 49%|███████████████▋                | 59968/122310 [1:50:59<1:38:17, 10.57it/s][A
 49%|███████████████▋                | 59978/122310 [1:51:00<1:31:37, 11.34it/s][A
 49%|███████████████▋                | 59984/122310 [1:51:01<1:40:52, 10.30it/s][A
 49%|███████████████▋                | 59993/122310 [1:51:02<1:36:46, 10.73it/s][A
 49%|███████████████▋                | 60001/122310 [1:51:03<1:37:06, 10.69it/s][A
 49%|███████████████▋                | 60011/122310 [1:51:03<1:30:58, 11.41it/s][A
 49%|███████████████▋                | 60024/122310 [1:51:04<1:19:05, 13.12it/s][A
 49%|███████████████▋                | 60038/122310 [1:51:05<1:10:42, 14.68it/s][A
 49%|███████████████▋                | 60053/122310 [1:51:06<1:04:15, 16.15

step: 22140, loss: 74.42483333432082, epoch: 1



 49%|███████████████▋                | 60136/122310 [1:51:12<1:17:18, 13.41it/s][A
 49%|███████████████▋                | 60150/122310 [1:51:13<1:09:36, 14.88it/s][A
 49%|███████████████▋                | 60158/122310 [1:51:14<1:16:20, 13.57it/s][A
 49%|███████████████▋                | 60166/122310 [1:51:15<1:21:55, 12.64it/s][A
 49%|███████████████▋                | 60177/122310 [1:51:16<1:18:55, 13.12it/s][A
 49%|███████████████▋                | 60187/122310 [1:51:16<1:19:09, 13.08it/s][A
 49%|███████████████▋                | 60199/122310 [1:51:17<1:15:00, 13.80it/s][A
 49%|███████████████▊                | 60202/122310 [1:51:18<1:36:24, 10.74it/s][A
 49%|███████████████▊                | 60205/122310 [1:51:19<1:58:21,  8.75it/s][A
 49%|███████████████▊                | 60209/122310 [1:51:19<2:14:15,  7.71it/s][A
 49%|███████████████▊                | 60217/122310 [1:51:20<2:01:24,  8.52it/s][A
 49%|███████████████▊                | 60227/122310 [1:51:21<1:45:11,  9.84

step: 22160, loss: 80.5494531314823, epoch: 1



 49%|███████████████▊                | 60295/122310 [1:51:28<2:01:32,  8.50it/s][A
 49%|███████████████▊                | 60299/122310 [1:51:29<2:16:42,  7.56it/s][A
 49%|███████████████▊                | 60317/122310 [1:51:29<1:23:42, 12.34it/s][A
 49%|███████████████▊                | 60323/122310 [1:51:30<1:33:37, 11.03it/s][A
 49%|███████████████▊                | 60333/122310 [1:51:31<1:28:44, 11.64it/s][A
 49%|███████████████▊                | 60336/122310 [1:51:32<1:50:41,  9.33it/s][A
 49%|███████████████▊                | 60345/122310 [1:51:32<1:42:27, 10.08it/s][A
 49%|███████████████▊                | 60358/122310 [1:51:33<1:25:02, 12.14it/s][A
 49%|███████████████▊                | 60368/122310 [1:51:34<1:23:09, 12.41it/s][A
 49%|███████████████▊                | 60374/122310 [1:51:35<1:33:21, 11.06it/s][A
 49%|███████████████▊                | 60388/122310 [1:51:35<1:18:20, 13.17it/s][A
 49%|███████████████▊                | 60393/122310 [1:51:36<1:32:30, 11.16

step: 22180, loss: 70.60929868393788, epoch: 1



 49%|███████████████▊                | 60473/122310 [1:51:43<1:39:57, 10.31it/s][A
 49%|███████████████▊                | 60479/122310 [1:51:44<1:47:11,  9.61it/s][A
 49%|███████████████▊                | 60492/122310 [1:51:45<1:27:05, 11.83it/s][A
 49%|███████████████▊                | 60505/122310 [1:51:45<1:16:46, 13.42it/s][A
 49%|███████████████▊                | 60516/122310 [1:51:46<1:14:59, 13.73it/s][A
 49%|████████████████▊                 | 60537/122310 [1:51:47<57:40, 17.85it/s][A
 49%|███████████████▊                | 60541/122310 [1:51:48<1:13:04, 14.09it/s][A
 50%|███████████████▊                | 60550/122310 [1:51:48<1:17:01, 13.36it/s][A
 50%|███████████████▊                | 60557/122310 [1:51:49<1:24:48, 12.13it/s][A
 50%|███████████████▊                | 60564/122310 [1:51:50<1:31:29, 11.25it/s][A
 50%|███████████████▊                | 60567/122310 [1:51:51<1:54:38,  8.98it/s][A
 50%|███████████████▊                | 60570/122310 [1:51:51<2:17:43,  7.47

step: 22200, loss: 86.18785256602594, epoch: 1



 50%|███████████████▊                | 60644/122310 [1:51:58<1:40:40, 10.21it/s][A
 50%|███████████████▊                | 60647/122310 [1:51:59<2:03:58,  8.29it/s][A
 50%|███████████████▊                | 60663/122310 [1:52:00<1:25:08, 12.07it/s][A
 50%|███████████████▊                | 60675/122310 [1:52:01<1:18:10, 13.14it/s][A
 50%|███████████████▉                | 60685/122310 [1:52:01<1:18:04, 13.15it/s][A
 50%|███████████████▉                | 60693/122310 [1:52:02<1:22:58, 12.38it/s][A
 50%|███████████████▉                | 60703/122310 [1:52:03<1:21:27, 12.60it/s][A
 50%|███████████████▉                | 60710/122310 [1:52:04<1:28:36, 11.59it/s][A
 50%|███████████████▉                | 60729/122310 [1:52:04<1:05:53, 15.58it/s][A
 50%|███████████████▉                | 60736/122310 [1:52:06<1:37:42, 10.50it/s][A
 50%|███████████████▉                | 60747/122310 [1:52:07<1:29:07, 11.51it/s][A
 50%|███████████████▉                | 60759/122310 [1:52:08<1:21:16, 12.62

step: 22220, loss: 83.1348884766872, epoch: 1



 50%|███████████████▉                | 60824/122310 [1:52:14<1:29:24, 11.46it/s][A
 50%|███████████████▉                | 60832/122310 [1:52:14<1:31:37, 11.18it/s][A
 50%|███████████████▉                | 60836/122310 [1:52:15<1:48:44,  9.42it/s][A
 50%|███████████████▉                | 60849/122310 [1:52:16<1:27:31, 11.70it/s][A
 50%|███████████████▉                | 60858/122310 [1:52:17<1:27:26, 11.71it/s][A
 50%|███████████████▉                | 60873/122310 [1:52:17<1:12:52, 14.05it/s][A
 50%|███████████████▉                | 60888/122310 [1:52:18<1:05:07, 15.72it/s][A
 50%|███████████████▉                | 60895/122310 [1:52:19<1:14:13, 13.79it/s][A
 50%|███████████████▉                | 60902/122310 [1:52:20<1:22:33, 12.40it/s][A
 50%|███████████████▉                | 60912/122310 [1:52:21<1:21:07, 12.61it/s][A
 50%|███████████████▉                | 60914/122310 [1:52:21<1:46:19,  9.62it/s][A
 50%|███████████████▉                | 60918/122310 [1:52:22<2:03:03,  8.31

step: 22240, loss: 82.35907598404076, epoch: 1



 50%|███████████████▉                | 61004/122310 [1:52:29<1:21:18, 12.57it/s][A
 50%|███████████████▉                | 61011/122310 [1:52:30<1:28:36, 11.53it/s][A
 50%|███████████████▉                | 61018/122310 [1:52:30<1:34:02, 10.86it/s][A
 50%|███████████████▉                | 61027/122310 [1:52:31<1:31:32, 11.16it/s][A
 50%|███████████████▉                | 61034/122310 [1:52:32<1:36:48, 10.55it/s][A
 50%|███████████████▉                | 61049/122310 [1:52:33<1:16:45, 13.30it/s][A
 50%|███████████████▉                | 61055/122310 [1:52:34<1:53:45,  8.97it/s][A
 50%|███████████████▉                | 61067/122310 [1:52:35<1:35:42, 10.66it/s][A
 50%|███████████████▉                | 61077/122310 [1:52:36<1:30:13, 11.31it/s][A
 50%|███████████████▉                | 61081/122310 [1:52:37<1:45:37,  9.66it/s][A
 50%|███████████████▉                | 61096/122310 [1:52:37<1:21:58, 12.45it/s][A
 50%|███████████████▉                | 61103/122310 [1:52:38<1:28:20, 11.55

step: 22260, loss: 74.48573316714271, epoch: 1



 50%|████████████████                | 61185/122310 [1:52:44<1:18:31, 12.97it/s][A
 50%|████████████████                | 61189/122310 [1:52:45<1:35:15, 10.69it/s][A
 50%|████████████████                | 61192/122310 [1:52:46<1:56:52,  8.72it/s][A
 50%|████████████████                | 61198/122310 [1:52:47<2:37:01,  6.49it/s][A
 50%|████████████████                | 61204/122310 [1:52:48<2:29:13,  6.83it/s][A
 50%|████████████████                | 61213/122310 [1:52:49<2:05:22,  8.12it/s][A
 50%|████████████████                | 61220/122310 [1:52:50<2:02:01,  8.34it/s][A
 50%|████████████████                | 61223/122310 [1:52:50<2:22:39,  7.14it/s][A
 50%|████████████████                | 61240/122310 [1:52:51<1:29:11, 11.41it/s][A
 50%|████████████████                | 61255/122310 [1:52:52<1:14:19, 13.69it/s][A
 50%|████████████████                | 61260/122310 [1:52:53<1:27:11, 11.67it/s][A
 50%|████████████████                | 61268/122310 [1:52:53<1:30:09, 11.28

step: 22280, loss: 78.34613097813639, epoch: 1



 50%|████████████████                | 61328/122310 [1:52:59<1:32:07, 11.03it/s][A
 50%|████████████████                | 61335/122310 [1:53:00<1:36:56, 10.48it/s][A
 50%|████████████████                | 61344/122310 [1:53:01<1:33:23, 10.88it/s][A
 50%|████████████████                | 61348/122310 [1:53:02<1:50:27,  9.20it/s][A
 50%|████████████████                | 61358/122310 [1:53:03<1:37:48, 10.39it/s][A
 50%|████████████████                | 61370/122310 [1:53:03<1:25:12, 11.92it/s][A
 50%|████████████████                | 61375/122310 [1:53:04<1:38:23, 10.32it/s][A
 50%|████████████████                | 61384/122310 [1:53:05<1:34:14, 10.78it/s][A
 50%|████████████████                | 61400/122310 [1:53:06<1:13:27, 13.82it/s][A
 50%|████████████████                | 61414/122310 [1:53:06<1:07:12, 15.10it/s][A
 50%|████████████████                | 61417/122310 [1:53:07<1:26:21, 11.75it/s][A
 50%|████████████████                | 61431/122310 [1:53:08<1:13:54, 13.73

step: 22300, loss: 86.19348130646853, epoch: 1



 50%|████████████████                | 61513/122310 [1:53:15<1:28:31, 11.45it/s][A
 50%|████████████████                | 61527/122310 [1:53:16<1:14:36, 13.58it/s][A
 50%|████████████████                | 61532/122310 [1:53:16<1:28:12, 11.48it/s][A
 50%|████████████████                | 61546/122310 [1:53:17<1:14:56, 13.51it/s][A
 50%|████████████████                | 61552/122310 [1:53:18<1:25:23, 11.86it/s][A
 50%|████████████████                | 61560/122310 [1:53:19<1:28:34, 11.43it/s][A
 50%|████████████████                | 61577/122310 [1:53:19<1:09:13, 14.62it/s][A
 50%|████████████████                | 61582/122310 [1:53:20<1:22:36, 12.25it/s][A
 50%|████████████████                | 61587/122310 [1:53:21<1:35:53, 10.55it/s][A
 50%|████████████████                | 61598/122310 [1:53:22<1:26:31, 11.70it/s][A
 50%|████████████████                | 61606/122310 [1:53:22<1:29:10, 11.35it/s][A
 50%|████████████████                | 61613/122310 [1:53:23<1:35:09, 10.63

step: 22320, loss: 95.5784888317099, epoch: 1



 50%|████████████████▏               | 61671/122310 [1:53:30<1:29:07, 11.34it/s][A
 50%|████████████████▏               | 61678/122310 [1:53:31<1:34:37, 10.68it/s][A
 50%|████████████████▏               | 61684/122310 [1:53:32<1:43:14,  9.79it/s][A
 50%|████████████████▏               | 61692/122310 [1:53:32<1:41:09,  9.99it/s][A
 50%|████████████████▏               | 61701/122310 [1:53:33<1:35:55, 10.53it/s][A
 50%|████████████████▏               | 61709/122310 [1:53:34<1:36:10, 10.50it/s][A
 50%|████████████████▏               | 61721/122310 [1:53:35<1:24:07, 12.00it/s][A
 50%|████████████████▏               | 61730/122310 [1:53:35<1:24:18, 11.98it/s][A
 50%|████████████████▏               | 61735/122310 [1:53:36<1:37:40, 10.34it/s][A
 50%|████████████████▏               | 61742/122310 [1:53:37<1:40:56, 10.00it/s][A
 50%|████████████████▏               | 61749/122310 [1:53:38<1:43:36,  9.74it/s][A
 50%|████████████████▏               | 61755/122310 [1:53:38<1:50:04,  9.17

step: 22340, loss: 81.98523883421127, epoch: 1



 51%|████████████████▏               | 61833/122310 [1:53:45<1:55:19,  8.74it/s][A
 51%|████████████████▏               | 61835/122310 [1:53:46<2:24:28,  6.98it/s][A
 51%|████████████████▏               | 61848/122310 [1:53:47<1:40:44, 10.00it/s][A
 51%|████████████████▏               | 61854/122310 [1:53:48<1:47:37,  9.36it/s][A
 51%|████████████████▏               | 61862/122310 [1:53:48<1:43:56,  9.69it/s][A
 51%|████████████████▏               | 61868/122310 [1:53:49<1:50:13,  9.14it/s][A
 51%|████████████████▏               | 61880/122310 [1:53:50<1:30:35, 11.12it/s][A
 51%|████████████████▏               | 61890/122310 [1:53:51<1:25:58, 11.71it/s][A
 51%|████████████████▏               | 61896/122310 [1:53:51<1:35:16, 10.57it/s][A
 51%|████████████████▏               | 61902/122310 [1:53:52<1:43:20,  9.74it/s][A
 51%|████████████████▏               | 61911/122310 [1:53:53<1:37:09, 10.36it/s][A
 51%|████████████████▏               | 61919/122310 [1:53:54<1:36:51, 10.39

step: 22360, loss: 406.1967685158846, epoch: 1



 51%|████████████████▏               | 62002/122310 [1:54:01<1:47:13,  9.37it/s][A
 51%|████████████████▏               | 62008/122310 [1:54:01<1:52:50,  8.91it/s][A
 51%|████████████████▏               | 62019/122310 [1:54:02<1:35:08, 10.56it/s][A
 51%|████████████████▏               | 62028/122310 [1:54:03<1:31:51, 10.94it/s][A
 51%|████████████████▏               | 62040/122310 [1:54:04<1:21:18, 12.35it/s][A
 51%|████████████████▏               | 62048/122310 [1:54:05<1:24:53, 11.83it/s][A
 51%|████████████████▏               | 62051/122310 [1:54:05<1:46:45,  9.41it/s][A
 51%|████████████████▏               | 62057/122310 [1:54:06<1:51:39,  8.99it/s][A
 51%|████████████████▏               | 62065/122310 [1:54:07<1:46:36,  9.42it/s][A
 51%|████████████████▏               | 62083/122310 [1:54:08<1:13:31, 13.65it/s][A
 51%|████████████████▏               | 62092/122310 [1:54:08<1:16:43, 13.08it/s][A
 51%|████████████████▏               | 62098/122310 [1:54:09<1:27:09, 11.51

step: 22380, loss: 79.84964144532415, epoch: 1



 51%|████████████████▎               | 62165/122310 [1:54:16<2:01:31,  8.25it/s][A
 51%|████████████████▎               | 62175/122310 [1:54:17<1:43:43,  9.66it/s][A
 51%|████████████████▎               | 62187/122310 [1:54:17<1:27:54, 11.40it/s][A
 51%|████████████████▎               | 62198/122310 [1:54:18<1:22:12, 12.19it/s][A
 51%|████████████████▎               | 62203/122310 [1:54:19<1:34:47, 10.57it/s][A
 51%|████████████████▎               | 62216/122310 [1:54:20<1:20:21, 12.47it/s][A
 51%|████████████████▎               | 62225/122310 [1:54:21<1:21:34, 12.28it/s][A
 51%|████████████████▎               | 62229/122310 [1:54:21<1:38:21, 10.18it/s][A
 51%|████████████████▎               | 62237/122310 [1:54:23<2:06:28,  7.92it/s][A
 51%|████████████████▎               | 62247/122310 [1:54:24<1:48:46,  9.20it/s][A
 51%|████████████████▎               | 62257/122310 [1:54:24<1:37:55, 10.22it/s][A
 51%|████████████████▎               | 62261/122310 [1:54:25<1:53:36,  8.81

step: 22400, loss: 88.4370335216694, epoch: 1
sim1 and sim2 are 0.3979141585186639, 0.26999625535460475
cosine of pred and queen: 0.2529269735581193
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: saisai
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahara

Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: suddenly
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 51%|███████████████▊               | 62323/122310 [1:55:49<46:06:10,  2.77s/it][A

Actual: india:rupee::denmark:krone, pred: netherlands
Accuracy is 0.10650887573964497



 51%|███████████████▊               | 62331/122310 [1:55:50<33:38:58,  2.02s/it][A
 51%|███████████████▊               | 62344/122310 [1:55:51<20:47:23,  1.25s/it][A
 51%|███████████████▊               | 62347/122310 [1:55:52<18:52:19,  1.13s/it][A
 51%|███████████████▊               | 62355/122310 [1:55:52<13:36:01,  1.22it/s][A
 51%|████████████████▎               | 62364/122310 [1:55:53<9:35:14,  1.74it/s][A
 51%|████████████████▎               | 62371/122310 [1:55:54<7:30:02,  2.22it/s][A
 51%|████████████████▎               | 62378/122310 [1:55:55<5:55:23,  2.81it/s][A
 51%|████████████████▎               | 62383/122310 [1:55:55<5:10:17,  3.22it/s][A
 51%|████████████████▎               | 62393/122310 [1:55:56<3:39:41,  4.55it/s][A
 51%|████████████████▎               | 62399/122310 [1:55:57<3:16:02,  5.09it/s][A
 51%|████████████████▎               | 62413/122310 [1:55:58<2:11:47,  7.57it/s][A
 51%|████████████████▎               | 62422/122310 [1:55:59<1:57:58,  8.46

step: 22420, loss: 70.90950718098327, epoch: 1



 51%|████████████████▎               | 62485/122310 [1:56:05<1:56:36,  8.55it/s][A
 51%|████████████████▎               | 62489/122310 [1:56:06<2:10:55,  7.62it/s][A
 51%|████████████████▎               | 62492/122310 [1:56:07<2:32:26,  6.54it/s][A
 51%|████████████████▎               | 62502/122310 [1:56:08<1:56:32,  8.55it/s][A
 51%|████████████████▎               | 62509/122310 [1:56:08<1:54:12,  8.73it/s][A
 51%|████████████████▎               | 62518/122310 [1:56:09<1:43:09,  9.66it/s][A
 51%|████████████████▎               | 62530/122310 [1:56:10<1:26:15, 11.55it/s][A
 51%|████████████████▎               | 62537/122310 [1:56:11<1:31:43, 10.86it/s][A
 51%|████████████████▎               | 62544/122310 [1:56:11<1:35:57, 10.38it/s][A
 51%|████████████████▎               | 62548/122310 [1:56:12<1:52:36,  8.85it/s][A
 51%|████████████████▎               | 62562/122310 [1:56:13<1:25:01, 11.71it/s][A
 51%|████████████████▎               | 62579/122310 [1:56:14<1:06:46, 14.91

step: 22440, loss: 70.32336787044679, epoch: 1



 51%|████████████████▍               | 62656/122310 [1:56:20<1:22:40, 12.03it/s][A
 51%|████████████████▍               | 62665/122310 [1:56:21<1:22:47, 12.01it/s][A
 51%|████████████████▍               | 62682/122310 [1:56:22<1:05:36, 15.15it/s][A
 51%|█████████████████▍                | 62707/122310 [1:56:23<48:28, 20.50it/s][A
 51%|█████████████████▍                | 62718/122310 [1:56:24<53:02, 18.73it/s][A
 51%|█████████████████▍                | 62743/122310 [1:56:24<43:05, 23.04it/s][A
 51%|█████████████████▍                | 62747/122310 [1:56:25<56:02, 17.71it/s][A
 51%|█████████████████▍                | 62759/122310 [1:56:26<57:54, 17.14it/s][A
 51%|█████████████████▍                | 62771/122310 [1:56:27<59:13, 16.75it/s][A
 51%|████████████████▍               | 62777/122310 [1:56:27<1:10:22, 14.10it/s][A
 51%|████████████████▍               | 62785/122310 [1:56:28<1:16:03, 13.04it/s][A
 51%|████████████████▍               | 62799/122310 [1:56:29<1:07:40, 14.65

step: 22460, loss: 80.42060259983373, epoch: 1



 51%|████████████████▍               | 62877/122310 [1:56:36<1:22:06, 12.06it/s][A
 51%|████████████████▍               | 62884/122310 [1:56:36<1:28:22, 11.21it/s][A
 51%|████████████████▍               | 62888/122310 [1:56:37<1:45:17,  9.41it/s][A
 51%|████████████████▍               | 62890/122310 [1:56:38<2:13:42,  7.41it/s][A
 51%|████████████████▍               | 62898/122310 [1:56:39<1:58:23,  8.36it/s][A
 51%|████████████████▍               | 62908/122310 [1:56:39<1:40:49,  9.82it/s][A
 51%|████████████████▍               | 62918/122310 [1:56:40<1:31:16, 10.84it/s][A
 51%|████████████████▍               | 62928/122310 [1:56:41<1:25:42, 11.55it/s][A
 51%|████████████████▍               | 62940/122310 [1:56:42<1:17:15, 12.81it/s][A
 51%|████████████████▍               | 62944/122310 [1:56:42<1:33:58, 10.53it/s][A
 51%|████████████████▍               | 62955/122310 [1:56:43<1:24:31, 11.70it/s][A
 51%|████████████████▍               | 62964/122310 [1:56:44<1:24:14, 11.74

step: 22480, loss: 110.90739996822056, epoch: 1



 52%|████████████████▍               | 63022/122310 [1:56:51<1:30:45, 10.89it/s][A
 52%|████████████████▍               | 63034/122310 [1:56:52<1:20:17, 12.30it/s][A
 52%|████████████████▍               | 63037/122310 [1:56:52<1:40:30,  9.83it/s][A
 52%|████████████████▍               | 63040/122310 [1:56:53<2:01:59,  8.10it/s][A
 52%|████████████████▍               | 63049/122310 [1:56:54<1:47:26,  9.19it/s][A
 52%|████████████████▍               | 63057/122310 [1:56:55<1:42:46,  9.61it/s][A
 52%|████████████████▌               | 63079/122310 [1:56:55<1:04:07, 15.39it/s][A
 52%|████████████████▌               | 63093/122310 [1:56:56<1:00:43, 16.25it/s][A
 52%|████████████████▌               | 63099/122310 [1:56:57<1:11:40, 13.77it/s][A
 52%|████████████████▌               | 63104/122310 [1:56:58<1:25:06, 11.59it/s][A
 52%|████████████████▌               | 63115/122310 [1:56:58<1:19:21, 12.43it/s][A
 52%|████████████████▌               | 63127/122310 [1:56:59<1:13:41, 13.39

step: 22500, loss: 76.0155412520397, epoch: 1



 52%|████████████████▌               | 63209/122310 [1:57:06<1:35:17, 10.34it/s][A
 52%|████████████████▌               | 63216/122310 [1:57:07<1:38:25, 10.01it/s][A
 52%|████████████████▌               | 63224/122310 [1:57:08<1:37:18, 10.12it/s][A
 52%|████████████████▌               | 63230/122310 [1:57:08<1:43:59,  9.47it/s][A
 52%|████████████████▌               | 63242/122310 [1:57:09<1:26:50, 11.34it/s][A
 52%|████████████████▌               | 63247/122310 [1:57:10<1:39:46,  9.87it/s][A
 52%|████████████████▌               | 63249/122310 [1:57:11<2:07:59,  7.69it/s][A
 52%|████████████████▌               | 63256/122310 [1:57:11<2:00:21,  8.18it/s][A
 52%|████████████████▌               | 63260/122310 [1:57:12<2:14:58,  7.29it/s][A
 52%|████████████████▌               | 63267/122310 [1:57:13<2:05:38,  7.83it/s][A
 52%|████████████████▌               | 63270/122310 [1:57:14<2:27:17,  6.68it/s][A
 52%|████████████████▌               | 63279/122310 [1:57:15<1:59:49,  8.21

step: 22520, loss: 80.19777216991733, epoch: 1



 52%|████████████████▌               | 63349/122310 [1:57:21<1:44:49,  9.37it/s][A
 52%|████████████████▌               | 63356/122310 [1:57:22<1:45:11,  9.34it/s][A
 52%|████████████████▌               | 63361/122310 [1:57:23<1:55:41,  8.49it/s][A
 52%|████████████████▌               | 63366/122310 [1:57:24<2:03:42,  7.94it/s][A
 52%|████████████████▌               | 63374/122310 [1:57:24<1:52:45,  8.71it/s][A
 52%|████████████████▌               | 63378/122310 [1:57:25<2:08:16,  7.66it/s][A
 52%|████████████████▌               | 63381/122310 [1:57:26<2:29:50,  6.55it/s][A
 52%|████████████████▌               | 63392/122310 [1:57:27<1:49:58,  8.93it/s][A
 52%|████████████████▌               | 63403/122310 [1:57:28<2:01:10,  8.10it/s][A
 52%|████████████████▌               | 63404/122310 [1:57:29<2:32:00,  6.46it/s][A
 52%|████████████████▌               | 63411/122310 [1:57:30<2:17:05,  7.16it/s][A
 52%|████████████████▌               | 63419/122310 [1:57:31<2:01:53,  8.05

step: 22540, loss: 100.63112662954438, epoch: 1



 52%|████████████████▌               | 63471/122310 [1:57:37<2:21:51,  6.91it/s][A
 52%|████████████████▌               | 63479/122310 [1:57:38<2:05:44,  7.80it/s][A
 52%|████████████████▌               | 63488/122310 [1:57:39<1:51:02,  8.83it/s][A
 52%|████████████████▌               | 63500/122310 [1:57:40<1:31:37, 10.70it/s][A
 52%|████████████████▌               | 63512/122310 [1:57:40<1:20:50, 12.12it/s][A
 52%|████████████████▌               | 63529/122310 [1:57:41<1:05:12, 15.02it/s][A
 52%|████████████████▌               | 63533/122310 [1:57:42<1:20:51, 12.12it/s][A
 52%|████████████████▌               | 63537/122310 [1:57:43<1:36:35, 10.14it/s][A
 52%|████████████████▋               | 63550/122310 [1:57:44<1:20:17, 12.20it/s][A
 52%|████████████████▋               | 63554/122310 [1:57:44<1:37:22, 10.06it/s][A
 52%|████████████████▋               | 63558/122310 [1:57:45<1:52:55,  8.67it/s][A
 52%|████████████████▋               | 63565/122310 [1:57:46<1:50:56,  8.83

step: 22560, loss: 72.88592501451079, epoch: 1



 52%|████████████████▋               | 63641/122310 [1:57:52<1:20:11, 12.19it/s][A
 52%|████████████████▋               | 63644/122310 [1:57:53<1:40:36,  9.72it/s][A
 52%|████████████████▋               | 63648/122310 [1:57:53<1:56:08,  8.42it/s][A
 52%|████████████████▋               | 63665/122310 [1:57:54<1:17:47, 12.56it/s][A
 52%|████████████████▋               | 63675/122310 [1:57:55<1:16:23, 12.79it/s][A
 52%|████████████████▋               | 63683/122310 [1:57:56<1:20:46, 12.10it/s][A
 52%|████████████████▋               | 63689/122310 [1:57:57<1:30:45, 10.77it/s][A
 52%|████████████████▋               | 63699/122310 [1:57:57<1:24:51, 11.51it/s][A
 52%|████████████████▋               | 63711/122310 [1:57:58<1:16:16, 12.80it/s][A
 52%|████████████████▋               | 63717/122310 [1:57:59<1:26:34, 11.28it/s][A
 52%|████████████████▋               | 63722/122310 [1:58:00<1:38:54,  9.87it/s][A
 52%|████████████████▋               | 63731/122310 [1:58:00<1:33:43, 10.42

step: 22580, loss: 84.12006339240645, epoch: 1



 52%|████████████████▋               | 63785/122310 [1:58:07<2:42:36,  6.00it/s][A
 52%|████████████████▋               | 63790/122310 [1:58:08<2:37:51,  6.18it/s][A
 52%|████████████████▋               | 63798/122310 [1:58:09<2:10:19,  7.48it/s][A
 52%|████████████████▋               | 63804/122310 [1:58:09<2:08:04,  7.61it/s][A
 52%|████████████████▋               | 63813/122310 [1:58:10<1:49:44,  8.88it/s][A
 52%|████████████████▋               | 63824/122310 [1:58:11<1:32:17, 10.56it/s][A
 52%|████████████████▋               | 63843/122310 [1:58:12<1:05:50, 14.80it/s][A
 52%|████████████████▋               | 63855/122310 [1:58:13<1:04:42, 15.05it/s][A
 52%|████████████████▋               | 63867/122310 [1:58:13<1:04:06, 15.19it/s][A
 52%|████████████████▋               | 63874/122310 [1:58:14<1:12:46, 13.38it/s][A
 52%|████████████████▋               | 63882/122310 [1:58:15<1:17:29, 12.57it/s][A
 52%|████████████████▋               | 63897/122310 [1:58:16<1:06:15, 14.69

step: 22600, loss: 80.21209689387543, epoch: 1



 52%|████████████████▋               | 63978/122310 [1:58:22<1:13:13, 13.28it/s][A
 52%|████████████████▋               | 63985/122310 [1:58:23<1:20:22, 12.09it/s][A
 52%|████████████████▋               | 63995/122310 [1:58:24<1:18:27, 12.39it/s][A
 52%|████████████████▋               | 64005/122310 [1:58:25<1:17:12, 12.59it/s][A
 52%|████████████████▋               | 64012/122310 [1:58:26<1:24:09, 11.55it/s][A
 52%|████████████████▋               | 64019/122310 [1:58:26<1:30:22, 10.75it/s][A
 52%|████████████████▊               | 64023/122310 [1:58:27<1:46:27,  9.13it/s][A
 52%|████████████████▊               | 64031/122310 [1:58:28<1:41:47,  9.54it/s][A
 52%|████████████████▊               | 64035/122310 [1:58:29<1:57:29,  8.27it/s][A
 52%|████████████████▊               | 64041/122310 [1:58:29<1:59:16,  8.14it/s][A
 52%|████████████████▊               | 64046/122310 [1:58:30<2:06:13,  7.69it/s][A
 52%|████████████████▊               | 64054/122310 [1:58:31<1:53:46,  8.53

step: 22620, loss: 72.65527474973376, epoch: 1



 52%|████████████████▊               | 64122/122310 [1:58:38<1:42:26,  9.47it/s][A
 52%|████████████████▊               | 64131/122310 [1:58:39<1:35:07, 10.19it/s][A
 52%|████████████████▊               | 64137/122310 [1:58:39<1:41:31,  9.55it/s][A
 52%|████████████████▊               | 64149/122310 [1:58:40<1:25:04, 11.39it/s][A
 52%|████████████████▊               | 64154/122310 [1:58:41<1:37:30,  9.94it/s][A
 52%|████████████████▊               | 64161/122310 [1:58:42<1:39:36,  9.73it/s][A
 52%|████████████████▊               | 64166/122310 [1:58:42<1:51:10,  8.72it/s][A
 52%|████████████████▊               | 64173/122310 [1:58:43<1:48:59,  8.89it/s][A
 52%|████████████████▊               | 64195/122310 [1:58:44<1:05:09, 14.87it/s][A
 52%|████████████████▊               | 64197/122310 [1:58:45<1:26:42, 11.17it/s][A
 52%|████████████████▊               | 64205/122310 [1:58:45<1:28:17, 10.97it/s][A
 52%|████████████████▊               | 64208/122310 [1:58:46<1:48:48,  8.90

step: 22640, loss: 80.17711201757277, epoch: 1



 53%|████████████████▊               | 64293/122310 [1:58:53<1:12:34, 13.32it/s][A
 53%|████████████████▊               | 64309/122310 [1:58:54<1:01:47, 15.64it/s][A
 53%|█████████████████▉                | 64324/122310 [1:58:55<57:22, 16.84it/s][A
 53%|████████████████▊               | 64326/122310 [1:58:55<1:16:38, 12.61it/s][A
 53%|████████████████▊               | 64330/122310 [1:58:56<1:32:51, 10.41it/s][A
 53%|████████████████▊               | 64336/122310 [1:58:57<1:40:24,  9.62it/s][A
 53%|████████████████▊               | 64339/122310 [1:58:58<2:01:47,  7.93it/s][A
 53%|████████████████▊               | 64350/122310 [1:58:58<1:37:46,  9.88it/s][A
 53%|████████████████▊               | 64357/122310 [1:58:59<1:40:01,  9.66it/s][A
 53%|████████████████▊               | 64371/122310 [1:59:00<1:18:38, 12.28it/s][A
 53%|████████████████▊               | 64383/122310 [1:59:01<1:12:29, 13.32it/s][A
 53%|████████████████▊               | 64391/122310 [1:59:01<1:17:29, 12.46

step: 22660, loss: 76.0415811626337, epoch: 1



 53%|████████████████▊               | 64458/122310 [1:59:08<1:45:56,  9.10it/s][A
 53%|████████████████▊               | 64464/122310 [1:59:09<1:50:20,  8.74it/s][A
 53%|████████████████▊               | 64467/122310 [1:59:10<2:12:06,  7.30it/s][A
 53%|████████████████▊               | 64472/122310 [1:59:11<2:15:30,  7.11it/s][A
 53%|████████████████▊               | 64482/122310 [1:59:11<1:48:03,  8.92it/s][A
 53%|████████████████▊               | 64489/122310 [1:59:12<1:47:04,  9.00it/s][A
 53%|████████████████▊               | 64493/122310 [1:59:13<2:02:31,  7.86it/s][A
 53%|████████████████▉               | 64501/122310 [1:59:14<1:51:28,  8.64it/s][A
 53%|████████████████▉               | 64506/122310 [1:59:14<2:00:22,  8.00it/s][A
 53%|████████████████▉               | 64518/122310 [1:59:15<1:33:16, 10.33it/s][A
 53%|████████████████▉               | 64531/122310 [1:59:16<1:18:08, 12.32it/s][A
 53%|████████████████▉               | 64539/122310 [1:59:17<1:21:53, 11.76

step: 22680, loss: 106.41592823006147, epoch: 1



 53%|████████████████▉               | 64611/122310 [1:59:23<1:22:39, 11.63it/s][A
 53%|████████████████▉               | 64619/122310 [1:59:24<1:25:09, 11.29it/s][A
 53%|████████████████▉               | 64628/122310 [1:59:25<1:23:50, 11.47it/s][A
 53%|████████████████▉               | 64634/122310 [1:59:26<1:32:31, 10.39it/s][A
 53%|████████████████▉               | 64636/122310 [1:59:27<1:59:35,  8.04it/s][A
 53%|████████████████▉               | 64640/122310 [1:59:27<2:12:53,  7.23it/s][A
 53%|████████████████▉               | 64650/122310 [1:59:28<1:46:39,  9.01it/s][A
 53%|████████████████▉               | 64657/122310 [1:59:29<1:46:05,  9.06it/s][A
 53%|████████████████▉               | 64669/122310 [1:59:30<1:26:53, 11.06it/s][A
 53%|████████████████▉               | 64687/122310 [1:59:30<1:04:59, 14.78it/s][A
 53%|████████████████▉               | 64693/122310 [1:59:31<1:15:37, 12.70it/s][A
 53%|████████████████▉               | 64701/122310 [1:59:32<1:19:46, 12.04

step: 22700, loss: 77.07602935109989, epoch: 1



 53%|████████████████▉               | 64774/122310 [1:59:39<1:23:10, 11.53it/s][A
 53%|████████████████▉               | 64780/122310 [1:59:40<1:31:38, 10.46it/s][A
 53%|████████████████▉               | 64786/122310 [1:59:40<1:38:48,  9.70it/s][A
 53%|████████████████▉               | 64799/122310 [1:59:41<1:20:30, 11.91it/s][A
 53%|████████████████▉               | 64811/122310 [1:59:42<1:13:37, 13.02it/s][A
 53%|████████████████▉               | 64818/122310 [1:59:43<1:20:27, 11.91it/s][A
 53%|████████████████▉               | 64832/122310 [1:59:43<1:09:12, 13.84it/s][A
 53%|████████████████▉               | 64837/122310 [1:59:44<1:22:11, 11.65it/s][A
 53%|████████████████▉               | 64847/122310 [1:59:45<1:19:07, 12.10it/s][A
 53%|████████████████▉               | 64860/122310 [1:59:46<1:10:23, 13.60it/s][A
 53%|████████████████▉               | 64866/122310 [1:59:46<1:20:40, 11.87it/s][A
 53%|████████████████▉               | 64871/122310 [1:59:47<1:33:53, 10.20

step: 22720, loss: 67.04293488635147, epoch: 1



 53%|████████████████▉               | 64956/122310 [1:59:54<1:31:02, 10.50it/s][A
 53%|████████████████▉               | 64960/122310 [1:59:55<1:47:15,  8.91it/s][A
 53%|████████████████▉               | 64973/122310 [1:59:56<1:24:05, 11.36it/s][A
 53%|█████████████████               | 64980/122310 [1:59:56<1:29:19, 10.70it/s][A
 53%|█████████████████               | 64987/122310 [1:59:57<1:33:12, 10.25it/s][A
 53%|█████████████████               | 64990/122310 [1:59:58<1:54:44,  8.33it/s][A
 53%|█████████████████               | 65004/122310 [1:59:59<1:24:38, 11.28it/s][A
 53%|█████████████████               | 65012/122310 [1:59:59<1:26:05, 11.09it/s][A
 53%|█████████████████               | 65021/122310 [2:00:00<1:24:19, 11.32it/s][A
 53%|█████████████████               | 65032/122310 [2:00:01<1:18:20, 12.19it/s][A
 53%|█████████████████               | 65035/122310 [2:00:02<1:38:12,  9.72it/s][A
 53%|█████████████████               | 65044/122310 [2:00:02<1:32:28, 10.32

step: 22740, loss: 77.39105448986517, epoch: 1



 53%|█████████████████               | 65130/122310 [2:00:09<1:14:21, 12.82it/s][A
 53%|█████████████████               | 65139/122310 [2:00:10<1:16:06, 12.52it/s][A
 53%|█████████████████               | 65143/122310 [2:00:11<1:32:21, 10.32it/s][A
 53%|█████████████████               | 65149/122310 [2:00:12<1:39:26,  9.58it/s][A
 53%|█████████████████               | 65160/122310 [2:00:12<1:26:11, 11.05it/s][A
 53%|█████████████████               | 65170/122310 [2:00:13<1:21:25, 11.69it/s][A
 53%|█████████████████               | 65183/122310 [2:00:14<1:11:33, 13.30it/s][A
 53%|█████████████████               | 65187/122310 [2:00:15<1:27:24, 10.89it/s][A
 53%|█████████████████               | 65197/122310 [2:00:15<1:22:28, 11.54it/s][A
 53%|█████████████████               | 65209/122310 [2:00:16<1:14:13, 12.82it/s][A
 53%|█████████████████               | 65215/122310 [2:00:17<1:23:55, 11.34it/s][A
 53%|█████████████████               | 65219/122310 [2:00:18<2:10:17,  7.30

step: 22760, loss: 89.90039991817481, epoch: 1



 53%|█████████████████               | 65285/122310 [2:00:25<1:52:14,  8.47it/s][A
 53%|█████████████████               | 65295/122310 [2:00:25<1:36:40,  9.83it/s][A
 53%|█████████████████               | 65306/122310 [2:00:26<1:24:36, 11.23it/s][A
 53%|█████████████████               | 65324/122310 [2:00:27<1:03:45, 14.90it/s][A
 53%|█████████████████               | 65336/122310 [2:00:28<1:02:38, 15.16it/s][A
 53%|█████████████████               | 65338/122310 [2:00:28<1:23:31, 11.37it/s][A
 53%|█████████████████               | 65344/122310 [2:00:29<1:31:57, 10.32it/s][A
 53%|█████████████████               | 65351/122310 [2:00:30<1:35:12,  9.97it/s][A
 53%|█████████████████               | 65356/122310 [2:00:31<1:45:38,  8.99it/s][A
 53%|█████████████████               | 65364/122310 [2:00:31<1:41:27,  9.35it/s][A
 53%|█████████████████               | 65368/122310 [2:00:32<1:56:25,  8.15it/s][A
 53%|█████████████████               | 65383/122310 [2:00:33<1:21:57, 11.58

step: 22780, loss: 170.77976966589918, epoch: 1



 53%|█████████████████               | 65432/122310 [2:00:41<3:18:21,  4.78it/s][A
 54%|█████████████████               | 65441/122310 [2:00:41<2:36:22,  6.06it/s][A
 54%|█████████████████               | 65453/122310 [2:00:42<1:57:34,  8.06it/s][A
 54%|█████████████████▏              | 65460/122310 [2:00:43<1:54:07,  8.30it/s][A
 54%|█████████████████▏              | 65474/122310 [2:00:44<1:28:07, 10.75it/s][A
 54%|█████████████████▏              | 65479/122310 [2:00:44<1:38:24,  9.63it/s][A
 54%|█████████████████▏              | 65488/122310 [2:00:45<1:32:24, 10.25it/s][A
 54%|█████████████████▏              | 65491/122310 [2:00:46<1:51:48,  8.47it/s][A
 54%|█████████████████▏              | 65505/122310 [2:00:47<1:24:01, 11.27it/s][A
 54%|█████████████████▏              | 65521/122310 [2:00:48<1:07:16, 14.07it/s][A
 54%|█████████████████▏              | 65529/122310 [2:00:48<1:13:01, 12.96it/s][A
 54%|█████████████████▏              | 65533/122310 [2:00:49<1:28:24, 10.70

step: 22800, loss: 73.97509696405308, epoch: 1
sim1 and sim2 are 0.4830372392284747, 0.2684187877280937
cosine of pred and queen: 0.22724813358232515
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: hollande
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma


 54%|█████████████████▏              | 65579/122310 [2:01:08<1:04:47, 14.59it/s][A

Actual: argentina:peso::russia:ruble, pred: sales
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: higher
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: italy
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: says
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: media
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: cameron
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.2222222222222222
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:gene


 54%|████████████████▋              | 65596/122310 [2:02:13<30:02:05,  1.91s/it][A

Actual: india:rupee::denmark:krone, pred: higher
Accuracy is 0.07692307692307693



 54%|████████████████▋              | 65598/122310 [2:02:14<28:31:54,  1.81s/it][A
 54%|████████████████▋              | 65607/122310 [2:02:15<20:45:07,  1.32s/it][A
 54%|████████████████▋              | 65613/122310 [2:02:15<16:43:52,  1.06s/it][A
 54%|████████████████▋              | 65622/122310 [2:02:16<11:52:54,  1.33it/s][A
 54%|█████████████████▏              | 65634/122310 [2:02:17<7:48:12,  2.02it/s][A
 54%|█████████████████▏              | 65636/122310 [2:02:18<7:38:51,  2.06it/s][A
 54%|█████████████████▏              | 65641/122310 [2:02:18<6:26:59,  2.44it/s][A
 54%|█████████████████▏              | 65651/122310 [2:02:19<4:23:01,  3.59it/s][A
 54%|█████████████████▏              | 65658/122310 [2:02:20<3:37:20,  4.34it/s][A
 54%|█████████████████▏              | 65661/122310 [2:02:21<3:40:35,  4.28it/s][A

step: 22820, loss: 130.08842145890122, epoch: 1



 54%|█████████████████▏              | 65672/122310 [2:02:30<7:39:38,  2.05it/s][A
 54%|█████████████████▏              | 65674/122310 [2:02:30<7:28:15,  2.11it/s][A
 54%|█████████████████▏              | 65678/122310 [2:02:31<6:27:51,  2.43it/s][A
 54%|█████████████████▏              | 65682/122310 [2:02:32<5:36:53,  2.80it/s][A
 54%|█████████████████▏              | 65690/122310 [2:02:33<3:55:01,  4.02it/s][A
 54%|█████████████████▏              | 65698/122310 [2:02:33<3:01:09,  5.21it/s][A
 54%|█████████████████▏              | 65703/122310 [2:02:34<2:51:52,  5.49it/s][A
 54%|█████████████████▏              | 65713/122310 [2:02:35<2:09:56,  7.26it/s][A
 54%|█████████████████▏              | 65717/122310 [2:02:36<2:19:08,  6.78it/s][A
 54%|█████████████████▏              | 65737/122310 [2:02:36<1:19:18, 11.89it/s][A
 54%|█████████████████▏              | 65747/122310 [2:02:37<1:16:59, 12.24it/s][A
 54%|█████████████████▏              | 65754/122310 [2:02:38<1:22:29, 11.43

step: 22840, loss: 87.28817850758496, epoch: 1



 54%|█████████████████▏              | 65818/122310 [2:02:44<1:54:11,  8.25it/s][A
 54%|█████████████████▏              | 65825/122310 [2:02:45<1:49:55,  8.56it/s][A
 54%|█████████████████▏              | 65831/122310 [2:02:46<1:52:31,  8.37it/s][A
 54%|█████████████████▏              | 65840/122310 [2:02:47<2:10:21,  7.22it/s][A
 54%|█████████████████▏              | 65848/122310 [2:02:48<1:56:52,  8.05it/s][A
 54%|█████████████████▏              | 65860/122310 [2:02:49<1:33:27, 10.07it/s][A
 54%|█████████████████▏              | 65870/122310 [2:02:49<1:26:04, 10.93it/s][A
 54%|█████████████████▏              | 65879/122310 [2:02:50<1:24:06, 11.18it/s][A
 54%|█████████████████▏              | 65884/122310 [2:02:51<1:35:10,  9.88it/s][A
 54%|█████████████████▏              | 65900/122310 [2:02:52<1:11:37, 13.13it/s][A
 54%|█████████████████▏              | 65910/122310 [2:02:52<1:11:53, 13.07it/s][A
 54%|█████████████████▏              | 65918/122310 [2:02:53<1:16:01, 12.36

step: 22860, loss: 67.34261370107288, epoch: 1



 54%|█████████████████▎              | 65995/122310 [2:02:59<1:26:57, 10.79it/s][A
 54%|█████████████████▎              | 66006/122310 [2:03:00<1:18:37, 11.93it/s][A
 54%|█████████████████▎              | 66013/122310 [2:03:01<1:24:25, 11.11it/s][A
 54%|█████████████████▎              | 66027/122310 [2:03:02<1:10:32, 13.30it/s][A
 54%|█████████████████▎              | 66029/122310 [2:03:02<1:32:34, 10.13it/s][A
 54%|█████████████████▎              | 66042/122310 [2:03:03<1:16:38, 12.24it/s][A
 54%|█████████████████▎              | 66051/122310 [2:03:04<1:17:33, 12.09it/s][A
 54%|█████████████████▎              | 66059/122310 [2:03:05<1:20:28, 11.65it/s][A
 54%|█████████████████▎              | 66068/122310 [2:03:05<1:20:16, 11.68it/s][A
 54%|█████████████████▎              | 66072/122310 [2:03:06<1:35:41,  9.80it/s][A
 54%|█████████████████▎              | 66086/122310 [2:03:07<1:15:55, 12.34it/s][A
 54%|█████████████████▎              | 66094/122310 [2:03:08<1:19:11, 11.83

step: 22880, loss: 87.547364073102, epoch: 1



 54%|█████████████████▎              | 66146/122310 [2:03:15<2:49:14,  5.53it/s][A
 54%|█████████████████▎              | 66151/122310 [2:03:16<2:40:46,  5.82it/s][A
 54%|█████████████████▎              | 66165/122310 [2:03:17<1:42:26,  9.13it/s][A
 54%|█████████████████▎              | 66170/122310 [2:03:17<1:50:54,  8.44it/s][A
 54%|█████████████████▎              | 66176/122310 [2:03:18<1:53:00,  8.28it/s][A
 54%|█████████████████▎              | 66185/122310 [2:03:19<1:40:45,  9.28it/s][A
 54%|█████████████████▎              | 66196/122310 [2:03:20<1:26:56, 10.76it/s][A
 54%|█████████████████▎              | 66204/122310 [2:03:21<1:27:20, 10.71it/s][A
 54%|█████████████████▎              | 66212/122310 [2:03:21<1:27:54, 10.64it/s][A
 54%|█████████████████▎              | 66218/122310 [2:03:22<1:35:04,  9.83it/s][A
 54%|█████████████████▎              | 66225/122310 [2:03:23<1:36:47,  9.66it/s][A
 54%|█████████████████▎              | 66228/122310 [2:03:24<1:58:05,  7.92

step: 22900, loss: 133.21146561122418, epoch: 1



 54%|█████████████████▎              | 66279/122310 [2:03:30<1:31:21, 10.22it/s][A
 54%|█████████████████▎              | 66285/122310 [2:03:30<1:37:47,  9.55it/s][A
 54%|█████████████████▎              | 66292/122310 [2:03:31<1:38:49,  9.45it/s][A
 54%|█████████████████▎              | 66295/122310 [2:03:32<2:00:30,  7.75it/s][A
 54%|█████████████████▎              | 66298/122310 [2:03:33<2:21:07,  6.62it/s][A
 54%|█████████████████▎              | 66302/122310 [2:03:33<2:30:30,  6.20it/s][A
 54%|█████████████████▎              | 66308/122310 [2:03:34<2:18:43,  6.73it/s][A
 54%|█████████████████▎              | 66312/122310 [2:03:35<2:28:09,  6.30it/s][A
 54%|█████████████████▎              | 66328/122310 [2:03:36<1:27:00, 10.72it/s][A
 54%|█████████████████▎              | 66333/122310 [2:03:36<1:38:18,  9.49it/s][A
 54%|█████████████████▎              | 66348/122310 [2:03:37<1:14:19, 12.55it/s][A
 54%|█████████████████▎              | 66357/122310 [2:03:38<1:15:38, 12.33

step: 22920, loss: 69.21510656525105, epoch: 1



 54%|█████████████████▍              | 66434/122310 [2:03:45<1:20:29, 11.57it/s][A
 54%|█████████████████▍              | 66443/122310 [2:03:46<1:19:47, 11.67it/s][A
 54%|█████████████████▍              | 66450/122310 [2:03:46<1:25:08, 10.94it/s][A
 54%|█████████████████▍              | 66461/122310 [2:03:47<1:17:57, 11.94it/s][A
 54%|█████████████████▍              | 66476/122310 [2:03:48<1:05:06, 14.29it/s][A
 54%|█████████████████▍              | 66487/122310 [2:03:49<1:04:56, 14.33it/s][A
 54%|█████████████████▍              | 66500/122310 [2:03:49<1:01:21, 15.16it/s][A
 54%|█████████████████▍              | 66507/122310 [2:03:50<1:09:50, 13.32it/s][A
 54%|█████████████████▍              | 66509/122310 [2:03:51<1:31:56, 10.12it/s][A
 54%|█████████████████▍              | 66522/122310 [2:03:52<1:16:33, 12.15it/s][A
 54%|█████████████████▍              | 66533/122310 [2:03:53<1:12:32, 12.82it/s][A
 54%|█████████████████▍              | 66536/122310 [2:03:53<1:31:23, 10.17

step: 22940, loss: 137.59191298090104, epoch: 1



 54%|█████████████████▍              | 66635/122310 [2:04:00<1:11:18, 13.01it/s][A
 54%|█████████████████▍              | 66642/122310 [2:04:01<1:18:02, 11.89it/s][A
 54%|█████████████████▍              | 66646/122310 [2:04:02<1:33:44,  9.90it/s][A
 55%|█████████████████▍              | 66660/122310 [2:04:02<1:14:20, 12.48it/s][A
 55%|█████████████████▍              | 66674/122310 [2:04:03<1:04:54, 14.29it/s][A
 55%|█████████████████▍              | 66686/122310 [2:04:04<1:03:13, 14.66it/s][A
 55%|█████████████████▍              | 66697/122310 [2:04:05<1:03:20, 14.63it/s][A
 55%|█████████████████▍              | 66707/122310 [2:04:05<1:05:14, 14.20it/s][A
 55%|██████████████████▌               | 66729/122310 [2:04:06<49:47, 18.60it/s][A
 55%|██████████████████▌               | 66737/122310 [2:04:07<57:15, 16.18it/s][A
 55%|██████████████████▌               | 66750/122310 [2:04:08<56:12, 16.47it/s][A
 55%|█████████████████▍              | 66756/122310 [2:04:09<1:06:37, 13.90

step: 22960, loss: 93.30398315077764, epoch: 1



 55%|█████████████████▍              | 66816/122310 [2:04:15<2:08:11,  7.22it/s][A
 55%|█████████████████▍              | 66824/122310 [2:04:16<1:52:27,  8.22it/s][A
 55%|█████████████████▍              | 66837/122310 [2:04:17<1:25:17, 10.84it/s][A
 55%|█████████████████▍              | 66841/122310 [2:04:18<1:40:43,  9.18it/s][A
 55%|█████████████████▍              | 66854/122310 [2:04:18<1:19:55, 11.56it/s][A
 55%|█████████████████▍              | 66866/122310 [2:04:19<1:12:05, 12.82it/s][A
 55%|█████████████████▍              | 66875/122310 [2:04:20<1:13:58, 12.49it/s][A
 55%|█████████████████▍              | 66887/122310 [2:04:21<1:08:14, 13.53it/s][A
 55%|█████████████████▌              | 66894/122310 [2:04:21<1:15:33, 12.22it/s][A
 55%|█████████████████▌              | 66905/122310 [2:04:22<1:11:30, 12.91it/s][A
 55%|██████████████████▌               | 66923/122310 [2:04:23<57:13, 16.13it/s][A
 55%|██████████████████▌               | 66940/122310 [2:04:24<51:21, 17.97

step: 22980, loss: 93.3530913589237, epoch: 1



 55%|█████████████████▌              | 67024/122310 [2:04:31<1:10:54, 13.00it/s][A
 55%|█████████████████▌              | 67032/122310 [2:04:31<1:15:09, 12.26it/s][A
 55%|█████████████████▌              | 67040/122310 [2:04:32<1:18:34, 11.72it/s][A
 55%|█████████████████▌              | 67045/122310 [2:04:33<1:30:20, 10.20it/s][A
 55%|█████████████████▌              | 67051/122310 [2:04:34<1:36:53,  9.51it/s][A
 55%|█████████████████▌              | 67063/122310 [2:04:34<1:21:06, 11.35it/s][A
 55%|█████████████████▌              | 67070/122310 [2:04:35<1:26:20, 10.66it/s][A
 55%|█████████████████▌              | 67081/122310 [2:04:36<1:18:03, 11.79it/s][A
 55%|█████████████████▌              | 67087/122310 [2:04:37<1:26:16, 10.67it/s][A
 55%|█████████████████▌              | 67093/122310 [2:04:37<1:34:00,  9.79it/s][A
 55%|█████████████████▌              | 67109/122310 [2:04:38<1:10:17, 13.09it/s][A
 55%|██████████████████▋               | 67132/122310 [2:04:39<50:36, 18.17

step: 23000, loss: 85.64029006574077, epoch: 1
saving weights



 55%|█████████████████▌              | 67214/122310 [2:04:46<1:41:31,  9.04it/s][A
 55%|█████████████████▌              | 67225/122310 [2:04:47<1:28:28, 10.38it/s][A
 55%|█████████████████▌              | 67240/122310 [2:04:48<1:11:56, 12.76it/s][A
 55%|█████████████████▌              | 67250/122310 [2:04:48<1:11:23, 12.85it/s][A
 55%|█████████████████▌              | 67254/122310 [2:04:49<1:25:06, 10.78it/s][A
 55%|█████████████████▌              | 67257/122310 [2:04:50<1:43:59,  8.82it/s][A
 55%|█████████████████▌              | 67259/122310 [2:04:51<2:09:49,  7.07it/s][A
 55%|█████████████████▌              | 67271/122310 [2:04:51<1:35:16,  9.63it/s][A
 55%|█████████████████▌              | 67276/122310 [2:04:52<1:45:25,  8.70it/s][A
 55%|█████████████████▌              | 67289/122310 [2:04:53<1:22:19, 11.14it/s][A
 55%|█████████████████▌              | 67305/122310 [2:04:54<1:04:59, 14.10it/s][A
 55%|█████████████████▌              | 67315/122310 [2:04:54<1:06:17, 13.83

step: 23020, loss: 77.61354375647664, epoch: 1



 55%|█████████████████▋              | 67406/122310 [2:05:02<1:13:03, 12.53it/s][A
 55%|█████████████████▋              | 67416/122310 [2:05:03<1:11:55, 12.72it/s][A
 55%|█████████████████▋              | 67421/122310 [2:05:03<1:23:58, 10.89it/s][A
 55%|█████████████████▋              | 67424/122310 [2:05:04<1:44:15,  8.77it/s][A
 55%|█████████████████▋              | 67430/122310 [2:05:05<1:47:16,  8.53it/s][A
 55%|█████████████████▋              | 67434/122310 [2:05:06<2:02:38,  7.46it/s][A
 55%|█████████████████▋              | 67435/122310 [2:05:07<2:42:19,  5.63it/s][A
 55%|█████████████████▋              | 67443/122310 [2:05:07<2:08:42,  7.10it/s][A
 55%|█████████████████▋              | 67457/122310 [2:05:08<1:27:21, 10.46it/s][A
 55%|█████████████████▋              | 67467/122310 [2:05:09<1:21:37, 11.20it/s][A
 55%|█████████████████▋              | 67477/122310 [2:05:10<1:17:46, 11.75it/s][A
 55%|█████████████████▋              | 67482/122310 [2:05:10<1:29:37, 10.20

step: 23040, loss: 88.43026321165598, epoch: 1



 55%|█████████████████▋              | 67546/122310 [2:05:17<1:44:10,  8.76it/s][A
 55%|█████████████████▋              | 67554/122310 [2:05:18<1:38:19,  9.28it/s][A
 55%|█████████████████▋              | 67567/122310 [2:05:19<1:18:21, 11.64it/s][A
 55%|█████████████████▋              | 67573/122310 [2:05:20<1:26:34, 10.54it/s][A
 55%|█████████████████▋              | 67583/122310 [2:05:20<1:20:33, 11.32it/s][A
 55%|█████████████████▋              | 67600/122310 [2:05:21<1:02:20, 14.62it/s][A
 55%|█████████████████▋              | 67610/122310 [2:05:22<1:04:22, 14.16it/s][A
 55%|█████████████████▋              | 67614/122310 [2:05:23<1:19:11, 11.51it/s][A
 55%|█████████████████▋              | 67621/122310 [2:05:23<1:24:08, 10.83it/s][A
 55%|██████████████████▊               | 67641/122310 [2:05:24<58:48, 15.50it/s][A
 55%|█████████████████▋              | 67649/122310 [2:05:25<1:05:08, 13.99it/s][A
 55%|█████████████████▋              | 67657/122310 [2:05:26<1:10:33, 12.91

step: 23060, loss: 68.01812686967452, epoch: 1



 55%|██████████████████▊               | 67740/122310 [2:05:33<58:40, 15.50it/s][A
 55%|█████████████████▋              | 67745/122310 [2:05:33<1:10:17, 12.94it/s][A
 55%|█████████████████▋              | 67756/122310 [2:05:34<1:07:55, 13.39it/s][A
 55%|█████████████████▋              | 67764/122310 [2:05:35<1:13:03, 12.44it/s][A
 55%|█████████████████▋              | 67776/122310 [2:05:36<1:07:35, 13.45it/s][A
 55%|█████████████████▋              | 67779/122310 [2:05:36<1:25:52, 10.58it/s][A
 55%|█████████████████▋              | 67784/122310 [2:05:37<1:36:40,  9.40it/s][A
 55%|█████████████████▋              | 67794/122310 [2:05:38<1:26:19, 10.52it/s][A
 55%|█████████████████▋              | 67799/122310 [2:05:39<1:37:18,  9.34it/s][A
 55%|█████████████████▋              | 67808/122310 [2:05:39<1:30:37, 10.02it/s][A
 55%|█████████████████▋              | 67812/122310 [2:05:40<1:45:20,  8.62it/s][A
 55%|█████████████████▋              | 67822/122310 [2:05:41<1:30:41, 10.01

step: 23080, loss: 70.81810472908892, epoch: 1



 56%|█████████████████▊              | 67911/122310 [2:05:48<1:18:18, 11.58it/s][A
 56%|█████████████████▊              | 67917/122310 [2:05:49<1:26:11, 10.52it/s][A
 56%|█████████████████▊              | 67920/122310 [2:05:49<1:46:13,  8.53it/s][A
 56%|█████████████████▊              | 67924/122310 [2:05:50<1:59:38,  7.58it/s][A
 56%|█████████████████▊              | 67930/122310 [2:05:51<1:58:16,  7.66it/s][A
 56%|█████████████████▊              | 67936/122310 [2:05:52<1:57:14,  7.73it/s][A
 56%|█████████████████▊              | 67943/122310 [2:05:52<1:50:38,  8.19it/s][A
 56%|█████████████████▊              | 67957/122310 [2:05:53<1:20:34, 11.24it/s][A
 56%|█████████████████▊              | 67966/122310 [2:05:54<1:19:05, 11.45it/s][A
 56%|█████████████████▊              | 67975/122310 [2:05:55<1:18:23, 11.55it/s][A
 56%|█████████████████▊              | 67979/122310 [2:05:55<1:33:33,  9.68it/s][A
 56%|█████████████████▊              | 67989/122310 [2:05:56<1:24:31, 10.71

step: 23100, loss: 93.34386717611046, epoch: 1



 56%|█████████████████▊              | 68056/122310 [2:06:03<1:37:25,  9.28it/s][A
 56%|█████████████████▊              | 68067/122310 [2:06:04<1:25:37, 10.56it/s][A
 56%|█████████████████▊              | 68070/122310 [2:06:05<1:41:53,  8.87it/s][A
 56%|█████████████████▊              | 68085/122310 [2:06:05<1:16:24, 11.83it/s][A
 56%|█████████████████▊              | 68098/122310 [2:06:06<1:08:01, 13.28it/s][A
 56%|█████████████████▊              | 68106/122310 [2:06:07<1:12:25, 12.47it/s][A
 56%|█████████████████▊              | 68119/122310 [2:06:08<1:05:16, 13.84it/s][A
 56%|█████████████████▊              | 68123/122310 [2:06:08<1:20:16, 11.25it/s][A
 56%|█████████████████▊              | 68125/122310 [2:06:09<1:42:57,  8.77it/s][A
 56%|█████████████████▊              | 68133/122310 [2:06:10<1:37:04,  9.30it/s][A
 56%|█████████████████▊              | 68148/122310 [2:06:11<1:12:39, 12.42it/s][A
 56%|█████████████████▊              | 68153/122310 [2:06:11<1:24:27, 10.69

step: 23120, loss: 96.8882860712074, epoch: 1



 56%|█████████████████▊              | 68230/122310 [2:06:18<1:32:17,  9.77it/s][A
 56%|█████████████████▊              | 68239/122310 [2:06:19<1:26:42, 10.39it/s][A
 56%|█████████████████▊              | 68249/122310 [2:06:20<1:20:08, 11.24it/s][A
 56%|█████████████████▊              | 68264/122310 [2:06:21<1:05:21, 13.78it/s][A
 56%|█████████████████▊              | 68273/122310 [2:06:21<1:08:18, 13.19it/s][A
 56%|█████████████████▊              | 68277/122310 [2:06:22<1:23:27, 10.79it/s][A
 56%|█████████████████▊              | 68290/122310 [2:06:23<1:10:52, 12.70it/s][A
 56%|█████████████████▊              | 68297/122310 [2:06:24<1:17:52, 11.56it/s][A
 56%|█████████████████▊              | 68301/122310 [2:06:24<1:32:56,  9.69it/s][A
 56%|█████████████████▊              | 68309/122310 [2:06:25<1:30:30,  9.94it/s][A
 56%|█████████████████▉              | 68329/122310 [2:06:26<1:00:28, 14.88it/s][A
 56%|██████████████████▉               | 68347/122310 [2:06:27<51:26, 17.48

step: 23140, loss: 107.29556766004708, epoch: 1



 56%|█████████████████▉              | 68427/122310 [2:06:33<1:20:05, 11.21it/s][A
 56%|█████████████████▉              | 68436/122310 [2:06:34<1:19:03, 11.36it/s][A
 56%|█████████████████▉              | 68439/122310 [2:06:35<1:37:48,  9.18it/s][A
 56%|█████████████████▉              | 68445/122310 [2:06:36<1:42:09,  8.79it/s][A
 56%|█████████████████▉              | 68451/122310 [2:06:37<1:45:16,  8.53it/s][A
 56%|█████████████████▉              | 68462/122310 [2:06:37<1:27:40, 10.24it/s][A
 56%|█████████████████▉              | 68464/122310 [2:06:38<1:52:12,  8.00it/s][A
 56%|█████████████████▉              | 68481/122310 [2:06:39<1:12:48, 12.32it/s][A
 56%|█████████████████▉              | 68488/122310 [2:06:40<1:19:05, 11.34it/s][A
 56%|█████████████████▉              | 68494/122310 [2:06:40<1:27:05, 10.30it/s][A
 56%|█████████████████▉              | 68501/122310 [2:06:41<1:29:54,  9.97it/s][A
 56%|█████████████████▉              | 68506/122310 [2:06:42<1:40:01,  8.97

step: 23160, loss: 85.46022244906864, epoch: 1



 56%|█████████████████▉              | 68569/122310 [2:06:49<1:37:46,  9.16it/s][A
 56%|█████████████████▉              | 68574/122310 [2:06:50<1:46:51,  8.38it/s][A
 56%|█████████████████▉              | 68581/122310 [2:06:50<1:44:16,  8.59it/s][A
 56%|█████████████████▉              | 68590/122310 [2:06:51<1:33:28,  9.58it/s][A
 56%|█████████████████▉              | 68596/122310 [2:06:52<1:38:54,  9.05it/s][A
 56%|█████████████████▉              | 68600/122310 [2:06:53<1:52:55,  7.93it/s][A
 56%|█████████████████▉              | 68607/122310 [2:06:53<1:47:36,  8.32it/s][A
 56%|█████████████████▉              | 68622/122310 [2:06:54<1:16:16, 11.73it/s][A
 56%|█████████████████▉              | 68626/122310 [2:06:55<1:31:16,  9.80it/s][A
 56%|█████████████████▉              | 68631/122310 [2:06:56<1:41:17,  8.83it/s][A
 56%|█████████████████▉              | 68640/122310 [2:06:56<1:32:07,  9.71it/s][A
 56%|█████████████████▉              | 68655/122310 [2:06:57<1:10:14, 12.73

step: 23180, loss: 93.04075329701445, epoch: 1



 56%|█████████████████▉              | 68724/122310 [2:07:04<1:15:09, 11.88it/s][A
 56%|█████████████████▉              | 68730/122310 [2:07:05<1:23:10, 10.74it/s][A
 56%|█████████████████▉              | 68738/122310 [2:07:06<1:23:35, 10.68it/s][A
 56%|█████████████████▉              | 68741/122310 [2:07:06<1:43:09,  8.65it/s][A
 56%|█████████████████▉              | 68758/122310 [2:07:07<1:09:51, 12.78it/s][A
 56%|█████████████████▉              | 68770/122310 [2:07:08<1:05:14, 13.68it/s][A
 56%|███████████████████               | 68789/122310 [2:07:09<52:16, 17.06it/s][A
 56%|█████████████████▉              | 68795/122310 [2:07:09<1:02:37, 14.24it/s][A
 56%|██████████████████              | 68803/122310 [2:07:10<1:08:14, 13.07it/s][A
 56%|███████████████████▏              | 68818/122310 [2:07:11<59:18, 15.03it/s][A
 56%|██████████████████              | 68823/122310 [2:07:12<1:11:15, 12.51it/s][A
 56%|██████████████████              | 68834/122310 [2:07:12<1:08:35, 12.99

step: 23200, loss: 78.54533416428814, epoch: 1
sim1 and sim2 are 0.45037448467177255, 0.23678890237446273
cosine of pred and queen: 0.21127648658524692
Actual: athens:greece::madrid:spain, pred: real
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharas

Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: suddenly
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: happy
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: rajasthan
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: rajasthan
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 56%|█████████████████▍             | 68887/122310 [2:08:37<42:23:25,  2.86s/it][A

Actual: india:rupee::denmark:krone, pred: higher
Accuracy is 0.10650887573964497



 56%|█████████████████▍             | 68891/122310 [2:08:38<35:45:11,  2.41s/it][A
 56%|█████████████████▍             | 68897/122310 [2:08:39<26:47:47,  1.81s/it][A
 56%|█████████████████▍             | 68908/122310 [2:08:39<16:15:01,  1.10s/it][A
 56%|█████████████████▍             | 68918/122310 [2:08:40<10:59:51,  1.35it/s][A
 56%|██████████████████              | 68926/122310 [2:08:41<8:16:30,  1.79it/s][A
 56%|██████████████████              | 68930/122310 [2:08:42<7:21:06,  2.02it/s][A
 56%|██████████████████              | 68940/122310 [2:08:42<5:00:26,  2.96it/s][A
 56%|██████████████████              | 68949/122310 [2:08:43<3:47:09,  3.92it/s][A
 56%|██████████████████              | 68951/122310 [2:08:44<3:56:51,  3.75it/s][A
 56%|██████████████████              | 68965/122310 [2:08:45<2:25:27,  6.11it/s][A
 56%|██████████████████              | 68974/122310 [2:08:45<2:03:23,  7.20it/s][A
 56%|██████████████████              | 68982/122310 [2:08:46<1:52:15,  7.92

step: 23220, loss: 70.00373076295014, epoch: 1



 56%|██████████████████              | 69064/122310 [2:08:53<1:32:01,  9.64it/s][A
 56%|██████████████████              | 69073/122310 [2:08:54<1:26:09, 10.30it/s][A
 56%|██████████████████              | 69082/122310 [2:08:54<1:21:56, 10.83it/s][A
 56%|██████████████████              | 69092/122310 [2:08:55<1:16:41, 11.57it/s][A
 56%|██████████████████              | 69096/122310 [2:08:57<1:59:01,  7.45it/s][A
 57%|██████████████████              | 69107/122310 [2:08:57<1:35:30,  9.28it/s][A
 57%|██████████████████              | 69117/122310 [2:08:58<1:25:10, 10.41it/s][A
 57%|██████████████████              | 69127/122310 [2:08:59<1:18:57, 11.23it/s][A
 57%|██████████████████              | 69133/122310 [2:09:00<1:25:11, 10.40it/s][A
 57%|██████████████████              | 69144/122310 [2:09:00<1:15:52, 11.68it/s][A
 57%|██████████████████              | 69152/122310 [2:09:01<1:17:16, 11.46it/s][A
 57%|██████████████████              | 69161/122310 [2:09:02<1:15:25, 11.75

step: 23240, loss: 71.5763924462003, epoch: 1



 57%|██████████████████              | 69233/122310 [2:09:08<1:12:22, 12.22it/s][A
 57%|██████████████████              | 69241/122310 [2:09:08<1:14:25, 11.88it/s][A
 57%|██████████████████              | 69249/122310 [2:09:09<1:16:02, 11.63it/s][A
 57%|██████████████████              | 69256/122310 [2:09:10<1:20:00, 11.05it/s][A
 57%|██████████████████              | 69265/122310 [2:09:11<1:17:05, 11.47it/s][A
 57%|██████████████████              | 69271/122310 [2:09:11<1:24:05, 10.51it/s][A
 57%|██████████████████▏             | 69286/122310 [2:09:12<1:05:14, 13.54it/s][A
 57%|██████████████████▏             | 69296/122310 [2:09:13<1:24:47, 10.42it/s][A
 57%|██████████████████▏             | 69306/122310 [2:09:14<1:18:38, 11.23it/s][A
 57%|██████████████████▏             | 69315/122310 [2:09:15<1:16:37, 11.53it/s][A
 57%|██████████████████▏             | 69327/122310 [2:09:16<1:08:37, 12.87it/s][A
 57%|██████████████████▏             | 69337/122310 [2:09:16<1:07:26, 13.09

step: 23260, loss: 89.729808350434, epoch: 1



 57%|███████████████████▎              | 69433/122310 [2:09:22<46:26, 18.98it/s][A
 57%|███████████████████▎              | 69443/122310 [2:09:23<50:39, 17.39it/s][A
 57%|███████████████████▎              | 69452/122310 [2:09:24<55:29, 15.88it/s][A
 57%|██████████████████▏             | 69459/122310 [2:09:25<1:07:26, 13.06it/s][A
 57%|██████████████████▏             | 69461/122310 [2:09:25<1:27:44, 10.04it/s][A
 57%|██████████████████▏             | 69472/122310 [2:09:26<1:17:01, 11.43it/s][A
 57%|██████████████████▏             | 69484/122310 [2:09:27<1:08:36, 12.83it/s][A
 57%|██████████████████▏             | 69491/122310 [2:09:28<1:13:43, 11.94it/s][A
 57%|██████████████████▏             | 69502/122310 [2:09:28<1:08:07, 12.92it/s][A
 57%|██████████████████▏             | 69509/122310 [2:09:29<1:13:25, 11.99it/s][A
 57%|███████████████████▎              | 69528/122310 [2:09:30<54:10, 16.24it/s][A
 57%|███████████████████▎              | 69547/122310 [2:09:30<45:56, 19.14

step: 23280, loss: 78.42244400619245, epoch: 1



 57%|██████████████████▏             | 69629/122310 [2:09:37<1:02:19, 14.09it/s][A
 57%|███████████████████▎              | 69648/122310 [2:09:38<49:20, 17.79it/s][A
 57%|███████████████████▎              | 69659/122310 [2:09:38<51:40, 16.98it/s][A
 57%|███████████████████▎              | 69673/122310 [2:09:39<49:19, 17.78it/s][A
 57%|███████████████████▎              | 69680/122310 [2:09:40<57:29, 15.26it/s][A
 57%|███████████████████▎              | 69691/122310 [2:09:41<57:24, 15.28it/s][A
 57%|██████████████████▏             | 69696/122310 [2:09:41<1:08:34, 12.79it/s][A
 57%|██████████████████▏             | 69704/122310 [2:09:42<1:11:22, 12.28it/s][A
 57%|███████████████████▍              | 69721/122310 [2:09:43<56:06, 15.62it/s][A
 57%|██████████████████▏             | 69728/122310 [2:09:43<1:03:10, 13.87it/s][A
 57%|██████████████████▏             | 69736/122310 [2:09:44<1:07:11, 13.04it/s][A
 57%|██████████████████▏             | 69747/122310 [2:09:45<1:03:59, 13.69

step: 23300, loss: 70.80456275633377, epoch: 1



 57%|███████████████████▍              | 69846/122310 [2:09:51<59:32, 14.69it/s][A
 57%|██████████████████▎             | 69856/122310 [2:09:52<1:00:41, 14.40it/s][A
 57%|██████████████████▎             | 69862/122310 [2:09:53<1:09:33, 12.57it/s][A
 57%|██████████████████▎             | 69865/122310 [2:09:54<1:27:33,  9.98it/s][A
 57%|██████████████████▎             | 69867/122310 [2:09:54<1:50:58,  7.88it/s][A
 57%|██████████████████▎             | 69875/122310 [2:09:55<1:38:58,  8.83it/s][A
 57%|██████████████████▎             | 69883/122310 [2:09:56<1:32:20,  9.46it/s][A
 57%|██████████████████▎             | 69884/122310 [2:09:56<2:03:01,  7.10it/s][A
 57%|██████████████████▎             | 69890/122310 [2:09:57<1:58:10,  7.39it/s][A
 57%|██████████████████▎             | 69891/122310 [2:09:58<2:35:36,  5.61it/s][A
 57%|██████████████████▎             | 69906/122310 [2:09:59<1:26:07, 10.14it/s][A
 57%|██████████████████▎             | 69910/122310 [2:09:59<1:39:30,  8.78

step: 23320, loss: 80.47200696438519, epoch: 1



 57%|██████████████████▎             | 69989/122310 [2:10:06<1:07:55, 12.84it/s][A
 57%|██████████████████▎             | 69995/122310 [2:10:07<1:16:01, 11.47it/s][A
 57%|██████████████████▎             | 70003/122310 [2:10:07<1:16:41, 11.37it/s][A
 57%|██████████████████▎             | 70019/122310 [2:10:08<1:00:05, 14.50it/s][A
 57%|██████████████████▎             | 70021/122310 [2:10:09<1:18:45, 11.07it/s][A
 57%|██████████████████▎             | 70023/122310 [2:10:10<1:41:36,  8.58it/s][A
 57%|██████████████████▎             | 70029/122310 [2:10:10<1:42:07,  8.53it/s][A
 57%|██████████████████▎             | 70041/122310 [2:10:11<1:19:23, 10.97it/s][A
 57%|██████████████████▎             | 70049/122310 [2:10:12<1:19:04, 11.01it/s][A
 57%|██████████████████▎             | 70063/122310 [2:10:12<1:04:21, 13.53it/s][A
 57%|███████████████████▍              | 70083/122310 [2:10:13<48:56, 17.78it/s][A
 57%|███████████████████▍              | 70097/122310 [2:10:14<47:44, 18.23

step: 23340, loss: 72.4509455986086, epoch: 1



 57%|██████████████████▎             | 70154/122310 [2:10:20<1:15:01, 11.59it/s][A
 57%|██████████████████▎             | 70163/122310 [2:10:21<1:13:27, 11.83it/s][A
 57%|██████████████████▎             | 70168/122310 [2:10:22<1:23:02, 10.46it/s][A
 57%|██████████████████▎             | 70171/122310 [2:10:23<1:40:26,  8.65it/s][A
 57%|██████████████████▎             | 70181/122310 [2:10:23<1:25:28, 10.16it/s][A
 57%|██████████████████▎             | 70189/122310 [2:10:24<1:23:16, 10.43it/s][A
 57%|██████████████████▎             | 70202/122310 [2:10:25<1:08:52, 12.61it/s][A
 57%|██████████████████▎             | 70209/122310 [2:10:25<1:13:56, 11.74it/s][A
 57%|██████████████████▎             | 70223/122310 [2:10:26<1:01:51, 14.03it/s][A
 57%|███████████████████▌              | 70237/122310 [2:10:27<55:52, 15.53it/s][A
 57%|███████████████████▌              | 70247/122310 [2:10:28<57:59, 14.96it/s][A
 57%|██████████████████▍             | 70251/122310 [2:10:28<1:11:06, 12.20

step: 23360, loss: 95.35780661672626, epoch: 1



 57%|██████████████████▍             | 70317/122310 [2:10:35<1:37:53,  8.85it/s][A
 58%|██████████████████▍             | 70330/122310 [2:10:36<1:14:58, 11.55it/s][A
 58%|██████████████████▍             | 70337/122310 [2:10:36<1:18:34, 11.03it/s][A
 58%|███████████████████▌              | 70354/122310 [2:10:37<58:36, 14.78it/s][A
 58%|██████████████████▍             | 70362/122310 [2:10:38<1:03:19, 13.67it/s][A
 58%|██████████████████▍             | 70369/122310 [2:10:38<1:09:22, 12.48it/s][A
 58%|██████████████████▍             | 70377/122310 [2:10:39<1:12:02, 12.01it/s][A
 58%|███████████████████▌              | 70395/122310 [2:10:40<54:32, 15.86it/s][A
 58%|██████████████████▍             | 70402/122310 [2:10:41<1:01:47, 14.00it/s][A
 58%|███████████████████▌              | 70415/122310 [2:10:41<56:43, 15.25it/s][A
 58%|██████████████████▍             | 70419/122310 [2:10:42<1:10:09, 12.33it/s][A
 58%|██████████████████▍             | 70423/122310 [2:10:43<1:24:16, 10.26

step: 23380, loss: 84.8513552421653, epoch: 1



 58%|██████████████████▍             | 70456/122310 [2:10:49<2:52:12,  5.02it/s][A
 58%|██████████████████▍             | 70469/122310 [2:10:50<1:58:50,  7.27it/s][A
 58%|██████████████████▍             | 70487/122310 [2:10:51<1:20:15, 10.76it/s][A
 58%|██████████████████▍             | 70499/122310 [2:10:51<1:11:42, 12.04it/s][A
 58%|██████████████████▍             | 70514/122310 [2:10:52<1:01:12, 14.10it/s][A
 58%|██████████████████▍             | 70520/122310 [2:10:53<1:08:33, 12.59it/s][A
 58%|██████████████████▍             | 70533/122310 [2:10:54<1:01:38, 14.00it/s][A
 58%|██████████████████▍             | 70542/122310 [2:10:54<1:03:45, 13.53it/s][A
 58%|██████████████████▍             | 70550/122310 [2:10:55<1:07:40, 12.75it/s][A
 58%|██████████████████▍             | 70553/122310 [2:10:56<1:23:38, 10.31it/s][A
 58%|██████████████████▍             | 70564/122310 [2:10:57<1:13:43, 11.70it/s][A
 58%|██████████████████▍             | 70570/122310 [2:10:57<1:20:41, 10.69

step: 23400, loss: 78.4069410860617, epoch: 1



 58%|███████████████████▋              | 70653/122310 [2:11:04<56:55, 15.12it/s][A
 58%|██████████████████▍             | 70662/122310 [2:11:05<1:17:57, 11.04it/s][A
 58%|██████████████████▍             | 70672/122310 [2:11:06<1:13:10, 11.76it/s][A
 58%|██████████████████▍             | 70681/122310 [2:11:07<1:12:13, 11.91it/s][A
 58%|███████████████████▋              | 70699/122310 [2:11:07<55:47, 15.42it/s][A
 58%|██████████████████▍             | 70706/122310 [2:11:08<1:02:13, 13.82it/s][A
 58%|██████████████████▍             | 70708/122310 [2:11:09<1:20:43, 10.65it/s][A
 58%|██████████████████▌             | 70717/122310 [2:11:10<1:16:46, 11.20it/s][A
 58%|██████████████████▌             | 70730/122310 [2:11:10<1:05:20, 13.16it/s][A
 58%|███████████████████▋              | 70748/122310 [2:11:11<51:59, 16.53it/s][A
 58%|██████████████████▌             | 70751/122310 [2:11:12<1:06:38, 12.90it/s][A
 58%|███████████████████▋              | 70768/122310 [2:11:13<53:32, 16.04

step: 23420, loss: 83.55243115866917, epoch: 1



 58%|██████████████████▌             | 70856/122310 [2:11:18<1:03:54, 13.42it/s][A
 58%|██████████████████▌             | 70861/122310 [2:11:19<1:14:18, 11.54it/s][A
 58%|██████████████████▌             | 70874/122310 [2:11:20<1:03:51, 13.43it/s][A
 58%|██████████████████▌             | 70880/122310 [2:11:21<1:12:01, 11.90it/s][A
 58%|██████████████████▌             | 70887/122310 [2:11:21<1:16:26, 11.21it/s][A
 58%|███████████████████▋              | 70904/122310 [2:11:22<57:38, 14.86it/s][A
 58%|██████████████████▌             | 70911/122310 [2:11:23<1:04:25, 13.30it/s][A
 58%|██████████████████▌             | 70921/122310 [2:11:23<1:03:52, 13.41it/s][A
 58%|██████████████████▌             | 70932/122310 [2:11:24<1:01:12, 13.99it/s][A
 58%|███████████████████▋              | 70943/122310 [2:11:25<59:37, 14.36it/s][A
 58%|██████████████████▌             | 70949/122310 [2:11:26<1:08:26, 12.51it/s][A
 58%|██████████████████▌             | 70955/122310 [2:11:26<1:16:07, 11.24

step: 23440, loss: 128.68960188468995, epoch: 1



 58%|██████████████████▌             | 71005/122310 [2:11:33<2:24:46,  5.91it/s][A
 58%|██████████████████▌             | 71012/122310 [2:11:34<2:07:29,  6.71it/s][A
 58%|██████████████████▌             | 71017/122310 [2:11:34<2:06:35,  6.75it/s][A
 58%|██████████████████▌             | 71029/122310 [2:11:35<1:33:06,  9.18it/s][A
 58%|██████████████████▌             | 71041/122310 [2:11:36<1:16:50, 11.12it/s][A
 58%|██████████████████▌             | 71050/122310 [2:11:36<1:14:34, 11.46it/s][A
 58%|███████████████████▊              | 71069/122310 [2:11:37<54:58, 15.53it/s][A
 58%|███████████████████▊              | 71083/122310 [2:11:38<51:18, 16.64it/s][A
 58%|███████████████████▊              | 71094/122310 [2:11:39<52:47, 16.17it/s][A
 58%|███████████████████▊              | 71102/122310 [2:11:39<58:11, 14.67it/s][A
 58%|██████████████████▌             | 71110/122310 [2:11:40<1:03:05, 13.53it/s][A
 58%|██████████████████▌             | 71121/122310 [2:11:41<1:00:41, 14.06

step: 23460, loss: 82.62314488993485, epoch: 1



 58%|██████████████████▋             | 71206/122310 [2:11:47<1:12:29, 11.75it/s][A
 58%|██████████████████▋             | 71218/122310 [2:11:48<1:04:34, 13.19it/s][A
 58%|██████████████████▋             | 71229/122310 [2:11:49<1:02:02, 13.72it/s][A
 58%|██████████████████▋             | 71232/122310 [2:11:50<1:17:47, 10.94it/s][A
 58%|██████████████████▋             | 71242/122310 [2:11:50<1:12:16, 11.78it/s][A
 58%|██████████████████▋             | 71251/122310 [2:11:51<1:10:58, 11.99it/s][A
 58%|███████████████████▊              | 71267/122310 [2:11:52<56:34, 15.04it/s][A
 58%|██████████████████▋             | 71270/122310 [2:11:52<1:12:28, 11.74it/s][A
 58%|██████████████████▋             | 71279/122310 [2:11:53<1:11:09, 11.95it/s][A
 58%|██████████████████▋             | 71284/122310 [2:11:54<1:21:23, 10.45it/s][A
 58%|██████████████████▋             | 71293/122310 [2:11:55<1:17:15, 11.01it/s][A
 58%|██████████████████▋             | 71300/122310 [2:11:55<1:20:08, 10.61

step: 23480, loss: 94.20665382675946, epoch: 1



 58%|██████████████████▋             | 71382/122310 [2:12:02<1:11:30, 11.87it/s][A
 58%|██████████████████▋             | 71387/122310 [2:12:03<1:21:51, 10.37it/s][A
 58%|██████████████████▋             | 71396/122310 [2:12:03<1:17:10, 10.99it/s][A
 58%|██████████████████▋             | 71406/122310 [2:12:04<1:11:28, 11.87it/s][A
 58%|██████████████████▋             | 71414/122310 [2:12:05<1:13:18, 11.57it/s][A
 58%|██████████████████▋             | 71422/122310 [2:12:06<1:14:21, 11.41it/s][A
 58%|██████████████████▋             | 71427/122310 [2:12:06<1:24:25, 10.04it/s][A
 58%|██████████████████▋             | 71435/122310 [2:12:07<1:22:12, 10.31it/s][A
 58%|██████████████████▋             | 71443/122310 [2:12:08<1:20:35, 10.52it/s][A
 58%|██████████████████▋             | 71450/122310 [2:12:08<1:22:35, 10.26it/s][A
 58%|██████████████████▋             | 71457/122310 [2:12:09<1:24:37, 10.01it/s][A
 58%|██████████████████▋             | 71469/122310 [2:12:10<1:10:53, 11.95

step: 23500, loss: 86.44013683548674, epoch: 1



 58%|██████████████████▋             | 71527/122310 [2:12:16<1:32:14,  9.18it/s][A
 58%|██████████████████▋             | 71537/122310 [2:12:17<1:20:05, 10.57it/s][A
 58%|██████████████████▋             | 71548/122310 [2:12:18<1:10:43, 11.96it/s][A
 59%|██████████████████▋             | 71558/122310 [2:12:19<1:07:45, 12.48it/s][A
 59%|██████████████████▋             | 71563/122310 [2:12:19<1:17:54, 10.86it/s][A
 59%|██████████████████▋             | 71572/122310 [2:12:20<1:15:01, 11.27it/s][A
 59%|██████████████████▋             | 71586/122310 [2:12:21<1:02:00, 13.63it/s][A
 59%|███████████████████▉              | 71602/122310 [2:12:22<52:16, 16.17it/s][A
 59%|███████████████████▉              | 71614/122310 [2:12:22<52:08, 16.21it/s][A
 59%|███████████████████▉              | 71625/122310 [2:12:23<53:13, 15.87it/s][A
 59%|██████████████████▋             | 71630/122310 [2:12:24<1:03:46, 13.24it/s][A
 59%|██████████████████▋             | 71641/122310 [2:12:24<1:01:18, 13.78

step: 23520, loss: 67.1627565758211, epoch: 1



 59%|██████████████████▊             | 71700/122310 [2:12:31<1:26:57,  9.70it/s][A
 59%|██████████████████▊             | 71707/122310 [2:12:32<1:27:01,  9.69it/s][A
 59%|██████████████████▊             | 71713/122310 [2:12:32<1:30:48,  9.29it/s][A
 59%|██████████████████▊             | 71722/122310 [2:12:33<1:22:28, 10.22it/s][A
 59%|███████████████████▉              | 71739/122310 [2:12:34<59:26, 14.18it/s][A
 59%|██████████████████▊             | 71745/122310 [2:12:35<1:07:58, 12.40it/s][A
 59%|██████████████████▊             | 71751/122310 [2:12:35<1:15:27, 11.17it/s][A
 59%|██████████████████▊             | 71760/122310 [2:12:36<1:13:06, 11.52it/s][A
 59%|██████████████████▊             | 71767/122310 [2:12:37<1:16:48, 10.97it/s][A
 59%|██████████████████▊             | 71775/122310 [2:12:38<1:17:09, 10.91it/s][A
 59%|██████████████████▊             | 71781/122310 [2:12:38<1:23:29, 10.09it/s][A
 59%|███████████████████▉              | 71801/122310 [2:12:39<55:01, 15.30

step: 23540, loss: 104.84233215067711, epoch: 1



 59%|██████████████████▊             | 71851/122310 [2:12:46<1:37:23,  8.64it/s][A
 59%|██████████████████▊             | 71858/122310 [2:12:46<1:33:38,  8.98it/s][A
 59%|██████████████████▊             | 71862/122310 [2:12:47<1:46:04,  7.93it/s][A
 59%|██████████████████▊             | 71870/122310 [2:12:48<1:34:42,  8.88it/s][A
 59%|██████████████████▊             | 71882/122310 [2:12:48<1:15:11, 11.18it/s][A
 59%|██████████████████▊             | 71890/122310 [2:12:49<1:15:28, 11.13it/s][A
 59%|██████████████████▊             | 71895/122310 [2:12:50<1:25:06,  9.87it/s][A
 59%|██████████████████▊             | 71909/122310 [2:12:51<1:06:09, 12.70it/s][A
 59%|██████████████████▊             | 71919/122310 [2:12:51<1:04:29, 13.02it/s][A
 59%|███████████████████▉              | 71933/122310 [2:12:52<56:13, 14.93it/s][A
 59%|███████████████████▉              | 71942/122310 [2:12:53<59:28, 14.11it/s][A
 59%|██████████████████▊             | 71947/122310 [2:12:54<1:10:05, 11.98

step: 23560, loss: 94.23103374665676, epoch: 1



 59%|██████████████████▊             | 72018/122310 [2:13:00<1:29:06,  9.41it/s][A
 59%|██████████████████▊             | 72021/122310 [2:13:01<1:47:02,  7.83it/s][A
 59%|██████████████████▊             | 72032/122310 [2:13:02<1:23:33, 10.03it/s][A
 59%|██████████████████▊             | 72043/122310 [2:13:02<1:12:20, 11.58it/s][A
 59%|██████████████████▊             | 72053/122310 [2:13:03<1:08:35, 12.21it/s][A
 59%|██████████████████▊             | 72057/122310 [2:13:04<1:22:12, 10.19it/s][A
 59%|██████████████████▊             | 72064/122310 [2:13:04<1:23:17, 10.05it/s][A
 59%|██████████████████▊             | 72072/122310 [2:13:05<1:20:49, 10.36it/s][A
 59%|██████████████████▊             | 72079/122310 [2:13:06<1:22:46, 10.11it/s][A
 59%|██████████████████▊             | 72083/122310 [2:13:07<1:35:35,  8.76it/s][A
 59%|██████████████████▊             | 72096/122310 [2:13:07<1:12:53, 11.48it/s][A
 59%|██████████████████▊             | 72106/122310 [2:13:08<1:08:49, 12.16

step: 23580, loss: 80.62181960452341, epoch: 1



 59%|████████████████████              | 72189/122310 [2:13:15<54:06, 15.44it/s][A
 59%|████████████████████              | 72200/122310 [2:13:15<54:21, 15.36it/s][A
 59%|██████████████████▉             | 72207/122310 [2:13:16<1:01:16, 13.63it/s][A
 59%|██████████████████▉             | 72211/122310 [2:13:18<1:36:34,  8.65it/s][A
 59%|██████████████████▉             | 72219/122310 [2:13:18<1:30:22,  9.24it/s][A
 59%|██████████████████▉             | 72234/122310 [2:13:19<1:08:20, 12.21it/s][A
 59%|████████████████████              | 72251/122310 [2:13:20<54:44, 15.24it/s][A
 59%|████████████████████              | 72273/122310 [2:13:20<42:55, 19.43it/s][A
 59%|████████████████████              | 72276/122310 [2:13:21<55:15, 15.09it/s][A
 59%|██████████████████▉             | 72284/122310 [2:13:22<1:00:03, 13.88it/s][A
 59%|██████████████████▉             | 72290/122310 [2:13:23<1:08:20, 12.20it/s][A
 59%|██████████████████▉             | 72300/122310 [2:13:23<1:06:57, 12.45

step: 23600, loss: 87.17508480954056, epoch: 1
sim1 and sim2 are 0.475620757415778, 0.23400224202636197
cosine of pred and queen: 0.17857511505980428
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mah


 59%|████████████████████              | 72365/122310 [2:13:38<53:20, 15.60it/s][A

Actual: chhattisgarh:raipur::assam:dispur, pred: mohamed
Actual: goa:panaji::rajasthan:jaipur, pred: tempura
Actual: jharkhand:ranchi::punjab:chandigarh, pred: bihar
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: emblem
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: football
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: cent
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: diplomats
Actual: sweden:swedish::netherlands:dutch, pred: major
Actual: russia:russian::germany:german


 59%|██████████████████▎            | 72372/122310 [2:14:49<32:52:07,  2.37s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.10059171597633136



 59%|██████████████████▎            | 72382/122310 [2:14:49<23:35:20,  1.70s/it][A
 59%|██████████████████▎            | 72388/122310 [2:14:50<19:12:47,  1.39s/it][A
 59%|██████████████████▎            | 72397/122310 [2:14:51<13:47:35,  1.01it/s][A
 59%|██████████████████▎            | 72401/122310 [2:14:52<11:59:50,  1.16it/s][A
 59%|██████████████████▉             | 72417/122310 [2:14:52<6:34:28,  2.11it/s][A
 59%|██████████████████▉             | 72423/122310 [2:14:53<5:35:04,  2.48it/s][A
 59%|██████████████████▉             | 72434/122310 [2:14:54<3:58:04,  3.49it/s][A
 59%|██████████████████▉             | 72453/122310 [2:14:54<2:23:06,  5.81it/s][A
 59%|██████████████████▉             | 72462/122310 [2:14:55<2:05:25,  6.62it/s][A
 59%|██████████████████▉             | 72466/122310 [2:14:56<2:09:24,  6.42it/s][A
 59%|██████████████████▉             | 72475/122310 [2:14:57<1:51:25,  7.45it/s][A
 59%|██████████████████▉             | 72482/122310 [2:14:57<1:45:51,  7.85

step: 23620, loss: 79.3699621536917, epoch: 1



 59%|██████████████████▉             | 72560/122310 [2:15:04<1:16:04, 10.90it/s][A
 59%|██████████████████▉             | 72564/122310 [2:15:05<1:29:31,  9.26it/s][A
 59%|██████████████████▉             | 72566/122310 [2:15:06<2:26:44,  5.65it/s][A
 59%|██████████████████▉             | 72575/122310 [2:15:07<1:54:35,  7.23it/s][A
 59%|██████████████████▉             | 72586/122310 [2:15:08<1:29:56,  9.21it/s][A
 59%|██████████████████▉             | 72598/122310 [2:15:09<1:14:48, 11.08it/s][A
 59%|██████████████████▉             | 72608/122310 [2:15:09<1:10:35, 11.74it/s][A
 59%|██████████████████▉             | 72621/122310 [2:15:10<1:02:04, 13.34it/s][A
 59%|███████████████████             | 72628/122310 [2:15:11<1:07:31, 12.26it/s][A
 59%|████████████████████▏             | 72643/122310 [2:15:12<56:05, 14.76it/s][A
 59%|████████████████████▏             | 72654/122310 [2:15:12<55:31, 14.90it/s][A
 59%|████████████████████▏             | 72667/122310 [2:15:13<52:06, 15.88

step: 23640, loss: 73.58612801233303, epoch: 1



 59%|███████████████████             | 72715/122310 [2:15:19<1:32:32,  8.93it/s][A
 59%|███████████████████             | 72721/122310 [2:15:19<1:33:56,  8.80it/s][A
 59%|███████████████████             | 72724/122310 [2:15:20<1:51:40,  7.40it/s][A
 59%|███████████████████             | 72727/122310 [2:15:21<2:07:52,  6.46it/s][A
 59%|███████████████████             | 72740/122310 [2:15:22<1:23:02,  9.95it/s][A
 59%|███████████████████             | 72745/122310 [2:15:22<1:30:59,  9.08it/s][A
 59%|███████████████████             | 72756/122310 [2:15:23<1:15:32, 10.93it/s][A
 59%|███████████████████             | 72767/122310 [2:15:24<1:07:18, 12.27it/s][A
 60%|████████████████████▏             | 72781/122310 [2:15:25<57:09, 14.44it/s][A
 60%|███████████████████             | 72783/122310 [2:15:25<1:15:36, 10.92it/s][A
 60%|███████████████████             | 72785/122310 [2:15:27<2:06:31,  6.52it/s][A
 60%|███████████████████             | 72787/122310 [2:15:27<2:26:34,  5.63

step: 23660, loss: 75.92340065965917, epoch: 1



 60%|███████████████████             | 72851/122310 [2:15:33<1:26:07,  9.57it/s][A
 60%|███████████████████             | 72857/122310 [2:15:34<1:29:27,  9.21it/s][A
 60%|███████████████████             | 72868/122310 [2:15:35<1:14:34, 11.05it/s][A
 60%|███████████████████             | 72879/122310 [2:15:35<1:06:49, 12.33it/s][A
 60%|███████████████████             | 72889/122310 [2:15:36<1:04:25, 12.79it/s][A
 60%|████████████████████▎             | 72901/122310 [2:15:37<59:03, 13.94it/s][A
 60%|████████████████████▎             | 72911/122310 [2:15:37<59:16, 13.89it/s][A
 60%|████████████████████▎             | 72922/122310 [2:15:38<57:34, 14.30it/s][A
 60%|███████████████████             | 72927/122310 [2:15:39<1:08:03, 12.09it/s][A
 60%|████████████████████▎             | 72941/122310 [2:15:40<57:38, 14.28it/s][A
 60%|████████████████████▎             | 72956/122310 [2:15:40<50:38, 16.24it/s][A
 60%|████████████████████▎             | 72966/122310 [2:15:41<52:49, 15.57

step: 23680, loss: 84.98374148995634, epoch: 1



 60%|███████████████████             | 73029/122310 [2:15:48<1:22:32,  9.95it/s][A
 60%|███████████████████             | 73041/122310 [2:15:48<1:08:43, 11.95it/s][A
 60%|███████████████████             | 73048/122310 [2:15:49<1:12:41, 11.30it/s][A
 60%|███████████████████             | 73056/122310 [2:15:50<1:12:55, 11.26it/s][A
 60%|███████████████████             | 73063/122310 [2:15:50<1:16:30, 10.73it/s][A
 60%|███████████████████             | 73069/122310 [2:15:51<1:22:36,  9.94it/s][A
 60%|████████████████████▎             | 73086/122310 [2:15:52<59:08, 13.87it/s][A
 60%|███████████████████             | 73094/122310 [2:15:53<1:02:38, 13.09it/s][A
 60%|███████████████████▏            | 73104/122310 [2:15:53<1:01:45, 13.28it/s][A
 60%|███████████████████▏            | 73109/122310 [2:15:54<1:12:13, 11.35it/s][A
 60%|███████████████████▏            | 73116/122310 [2:15:55<1:15:53, 10.80it/s][A
 60%|███████████████████▏            | 73123/122310 [2:15:56<1:19:39, 10.29

step: 23700, loss: 85.3722712851256, epoch: 1



 60%|███████████████████▏            | 73201/122310 [2:16:02<1:04:40, 12.65it/s][A
 60%|███████████████████▏            | 73209/122310 [2:16:03<1:07:48, 12.07it/s][A
 60%|███████████████████▏            | 73220/122310 [2:16:04<1:03:56, 12.80it/s][A
 60%|███████████████████▏            | 73231/122310 [2:16:05<1:01:13, 13.36it/s][A
 60%|███████████████████▏            | 73241/122310 [2:16:05<1:01:35, 13.28it/s][A
 60%|████████████████████▎             | 73255/122310 [2:16:06<54:59, 14.87it/s][A
 60%|███████████████████▏            | 73261/122310 [2:16:07<1:03:47, 12.81it/s][A
 60%|████████████████████▎             | 73275/122310 [2:16:08<56:15, 14.53it/s][A
 60%|████████████████████▎             | 73287/122310 [2:16:08<54:41, 14.94it/s][A
 60%|███████████████████▏            | 73292/122310 [2:16:09<1:05:38, 12.44it/s][A
 60%|███████████████████▏            | 73296/122310 [2:16:11<1:43:22,  7.90it/s][A
 60%|███████████████████▏            | 73299/122310 [2:16:12<1:57:13,  6.97

step: 23720, loss: 81.91756211761287, epoch: 1



 60%|███████████████████▏            | 73371/122310 [2:16:18<1:19:06, 10.31it/s][A
 60%|████████████████████▍             | 73390/122310 [2:16:18<58:24, 13.96it/s][A
 60%|███████████████████▏            | 73396/122310 [2:16:19<1:05:47, 12.39it/s][A
 60%|████████████████████▍             | 73411/122310 [2:16:20<56:14, 14.49it/s][A
 60%|███████████████████▏            | 73417/122310 [2:16:21<1:04:25, 12.65it/s][A
 60%|███████████████████▏            | 73425/122310 [2:16:21<1:07:27, 12.08it/s][A
 60%|███████████████████▏            | 73429/122310 [2:16:22<1:20:34, 10.11it/s][A
 60%|███████████████████▏            | 73433/122310 [2:16:23<1:33:26,  8.72it/s][A
 60%|███████████████████▏            | 73442/122310 [2:16:24<1:24:19,  9.66it/s][A
 60%|███████████████████▏            | 73458/122310 [2:16:24<1:02:21, 13.06it/s][A
 60%|███████████████████▏            | 73464/122310 [2:16:25<1:10:31, 11.54it/s][A
 60%|███████████████████▏            | 73476/122310 [2:16:26<1:03:16, 12.86

step: 23740, loss: 73.07653476597814, epoch: 1



 60%|███████████████████▏            | 73553/122310 [2:16:33<1:07:16, 12.08it/s][A
 60%|███████████████████▏            | 73561/122310 [2:16:33<1:09:42, 11.65it/s][A
 60%|███████████████████▏            | 73572/122310 [2:16:34<1:05:02, 12.49it/s][A
 60%|███████████████████▎            | 73578/122310 [2:16:35<1:12:43, 11.17it/s][A
 60%|███████████████████▎            | 73588/122310 [2:16:36<1:08:47, 11.80it/s][A
 60%|███████████████████▎            | 73597/122310 [2:16:36<1:08:34, 11.84it/s][A
 60%|███████████████████▎            | 73605/122310 [2:16:37<1:10:41, 11.48it/s][A
 60%|███████████████████▎            | 73617/122310 [2:16:38<1:03:24, 12.80it/s][A
 60%|███████████████████▎            | 73625/122310 [2:16:39<1:06:49, 12.14it/s][A
 60%|███████████████████▎            | 73628/122310 [2:16:39<1:23:59,  9.66it/s][A
 60%|███████████████████▎            | 73634/122310 [2:16:40<1:28:30,  9.17it/s][A
 60%|███████████████████▎            | 73646/122310 [2:16:41<1:12:18, 11.22

step: 23760, loss: 90.1281659718611, epoch: 1



 60%|███████████████████▎            | 73745/122310 [2:16:48<1:00:52, 13.30it/s][A
 60%|███████████████████▎            | 73755/122310 [2:16:49<1:18:21, 10.33it/s][A
 60%|███████████████████▎            | 73760/122310 [2:16:50<1:25:17,  9.49it/s][A
 60%|███████████████████▎            | 73767/122310 [2:16:51<1:25:33,  9.46it/s][A
 60%|███████████████████▎            | 73783/122310 [2:16:51<1:03:30, 12.74it/s][A
 60%|███████████████████▎            | 73789/122310 [2:16:52<1:10:47, 11.42it/s][A
 60%|███████████████████▎            | 73796/122310 [2:16:53<1:14:37, 10.84it/s][A
 60%|███████████████████▎            | 73807/122310 [2:16:54<1:07:26, 11.99it/s][A
 60%|███████████████████▎            | 73812/122310 [2:16:54<1:17:32, 10.42it/s][A
 60%|███████████████████▎            | 73817/122310 [2:16:55<1:26:39,  9.33it/s][A
 60%|████████████████████▌             | 73836/122310 [2:16:56<57:27, 14.06it/s][A
 60%|███████████████████▎            | 73839/122310 [2:16:57<1:12:24, 11.16

step: 23780, loss: 75.08452262157756, epoch: 1



 60%|███████████████████▎            | 73900/122310 [2:17:03<1:20:21, 10.04it/s][A
 60%|███████████████████▎            | 73902/122310 [2:17:03<1:42:45,  7.85it/s][A
 60%|███████████████████▎            | 73903/122310 [2:17:04<2:16:37,  5.91it/s][A
 60%|███████████████████▎            | 73917/122310 [2:17:05<1:22:32,  9.77it/s][A
 60%|███████████████████▎            | 73924/122310 [2:17:05<1:23:57,  9.60it/s][A
 60%|███████████████████▎            | 73929/122310 [2:17:06<1:32:05,  8.76it/s][A
 60%|███████████████████▎            | 73936/122310 [2:17:07<1:30:00,  8.96it/s][A
 60%|███████████████████▎            | 73946/122310 [2:17:08<1:18:15, 10.30it/s][A
 60%|███████████████████▎            | 73953/122310 [2:17:08<1:20:25, 10.02it/s][A
 60%|███████████████████▎            | 73964/122310 [2:17:09<1:10:27, 11.44it/s][A
 60%|███████████████████▎            | 73971/122310 [2:17:10<1:14:24, 10.83it/s][A
 60%|███████████████████▎            | 73974/122310 [2:17:11<1:31:35,  8.80

step: 23800, loss: 77.32823111466932, epoch: 1



 61%|████████████████████▌             | 74069/122310 [2:17:17<56:45, 14.17it/s][A
 61%|███████████████████▍            | 74075/122310 [2:17:18<1:05:12, 12.33it/s][A
 61%|███████████████████▍            | 74086/122310 [2:17:19<1:01:22, 13.09it/s][A
 61%|███████████████████▍            | 74094/122310 [2:17:20<1:04:54, 12.38it/s][A
 61%|███████████████████▍            | 74099/122310 [2:17:20<1:15:13, 10.68it/s][A
 61%|███████████████████▍            | 74111/122310 [2:17:21<1:05:14, 12.31it/s][A
 61%|███████████████████▍            | 74122/122310 [2:17:22<1:01:30, 13.06it/s][A
 61%|███████████████████▍            | 74130/122310 [2:17:23<1:04:58, 12.36it/s][A
 61%|████████████████████▌             | 74147/122310 [2:17:23<51:50, 15.48it/s][A
 61%|███████████████████▍            | 74151/122310 [2:17:24<1:04:23, 12.46it/s][A
 61%|████████████████████▌             | 74163/122310 [2:17:25<59:13, 13.55it/s][A
 61%|███████████████████▍            | 74166/122310 [2:17:26<1:15:02, 10.69

step: 23820, loss: 75.60444035777586, epoch: 1



 61%|███████████████████▍            | 74236/122310 [2:17:32<1:07:50, 11.81it/s][A
 61%|███████████████████▍            | 74245/122310 [2:17:33<1:07:12, 11.92it/s][A
 61%|███████████████████▍            | 74249/122310 [2:17:34<1:20:45,  9.92it/s][A
 61%|███████████████████▍            | 74251/122310 [2:17:35<1:43:19,  7.75it/s][A
 61%|███████████████████▍            | 74257/122310 [2:17:35<1:42:08,  7.84it/s][A
 61%|███████████████████▍            | 74260/122310 [2:17:36<2:00:06,  6.67it/s][A
 61%|███████████████████▍            | 74262/122310 [2:17:37<2:26:45,  5.46it/s][A
 61%|███████████████████▍            | 74265/122310 [2:17:38<2:38:28,  5.05it/s][A
 61%|███████████████████▍            | 74271/122310 [2:17:38<2:15:26,  5.91it/s][A
 61%|███████████████████▍            | 74276/122310 [2:17:39<2:09:50,  6.17it/s][A
 61%|███████████████████▍            | 74296/122310 [2:17:40<1:04:48, 12.35it/s][A
 61%|███████████████████▍            | 74303/122310 [2:17:41<1:09:51, 11.45

step: 23840, loss: 82.0627645597663, epoch: 1



 61%|███████████████████▍            | 74370/122310 [2:17:47<1:12:52, 10.96it/s][A
 61%|███████████████████▍            | 74379/122310 [2:17:48<1:10:43, 11.30it/s][A
 61%|███████████████████▍            | 74386/122310 [2:17:49<1:14:33, 10.71it/s][A
 61%|███████████████████▍            | 74392/122310 [2:17:50<1:20:28,  9.92it/s][A
 61%|███████████████████▍            | 74399/122310 [2:17:50<1:21:34,  9.79it/s][A
 61%|███████████████████▍            | 74406/122310 [2:17:51<1:22:40,  9.66it/s][A
 61%|███████████████████▍            | 74413/122310 [2:17:52<1:23:10,  9.60it/s][A
 61%|███████████████████▍            | 74426/122310 [2:17:52<1:06:41, 11.97it/s][A
 61%|███████████████████▍            | 74434/122310 [2:17:53<1:09:06, 11.54it/s][A
 61%|███████████████████▍            | 74439/122310 [2:17:54<1:19:12, 10.07it/s][A
 61%|███████████████████▍            | 74441/122310 [2:17:55<1:41:03,  7.89it/s][A
 61%|███████████████████▍            | 74455/122310 [2:17:55<1:11:38, 11.13

step: 23860, loss: 77.01039344196975, epoch: 1



 61%|███████████████████▌            | 74538/122310 [2:18:02<1:02:20, 12.77it/s][A
 61%|███████████████████▌            | 74546/122310 [2:18:03<1:05:17, 12.19it/s][A
 61%|███████████████████▌            | 74557/122310 [2:18:04<1:01:26, 12.95it/s][A
 61%|████████████████████▋             | 74571/122310 [2:18:04<54:07, 14.70it/s][A
 61%|████████████████████▋             | 74580/122310 [2:18:05<57:28, 13.84it/s][A
 61%|███████████████████▌            | 74582/122310 [2:18:06<1:15:40, 10.51it/s][A
 61%|███████████████████▌            | 74593/122310 [2:18:07<1:07:34, 11.77it/s][A
 61%|███████████████████▌            | 74602/122310 [2:18:07<1:07:16, 11.82it/s][A
 61%|███████████████████▌            | 74613/122310 [2:18:08<1:02:45, 12.67it/s][A
 61%|███████████████████▌            | 74623/122310 [2:18:09<1:01:36, 12.90it/s][A
 61%|████████████████████▋             | 74634/122310 [2:18:10<59:12, 13.42it/s][A
 61%|████████████████████▊             | 74651/122310 [2:18:10<49:14, 16.13

step: 23880, loss: 80.85537537618697, epoch: 1



 61%|███████████████████▌            | 74711/122310 [2:18:17<1:27:01,  9.12it/s][A
 61%|███████████████████▌            | 74721/122310 [2:18:18<1:16:11, 10.41it/s][A
 61%|████████████████████▊             | 74740/122310 [2:18:19<53:37, 14.79it/s][A
 61%|████████████████████▊             | 74754/122310 [2:18:19<49:32, 16.00it/s][A
 61%|████████████████████▊             | 74767/122310 [2:18:20<48:16, 16.41it/s][A
 61%|████████████████████▊             | 74779/122310 [2:18:21<48:41, 16.27it/s][A
 61%|███████████████████▌            | 74783/122310 [2:18:22<1:00:50, 13.02it/s][A
 61%|███████████████████▌            | 74793/122310 [2:18:22<1:00:38, 13.06it/s][A
 61%|████████████████████▊             | 74808/122310 [2:18:23<52:14, 15.16it/s][A
 61%|███████████████████▌            | 74814/122310 [2:18:24<1:00:38, 13.05it/s][A
 61%|███████████████████▌            | 74822/122310 [2:18:25<1:04:00, 12.36it/s][A
 61%|███████████████████▌            | 74828/122310 [2:18:25<1:11:36, 11.05

step: 23900, loss: 67.8054303729588, epoch: 1



 61%|███████████████████▌            | 74904/122310 [2:18:32<1:28:20,  8.94it/s][A
 61%|███████████████████▌            | 74916/122310 [2:18:33<1:10:45, 11.16it/s][A
 61%|███████████████████▌            | 74927/122310 [2:18:34<1:04:17, 12.28it/s][A
 61%|███████████████████▌            | 74935/122310 [2:18:34<1:06:17, 11.91it/s][A
 61%|███████████████████▌            | 74945/122310 [2:18:35<1:03:29, 12.43it/s][A
 61%|████████████████████▊             | 74960/122310 [2:18:36<53:11, 14.84it/s][A
 61%|████████████████████▊             | 74974/122310 [2:18:37<49:01, 16.09it/s][A
 61%|████████████████████▊             | 74990/122310 [2:18:37<44:26, 17.74it/s][A
 61%|████████████████████▊             | 74993/122310 [2:18:38<57:50, 13.64it/s][A
 61%|███████████████████▌            | 75000/122310 [2:18:39<1:03:17, 12.46it/s][A
 61%|███████████████████▌            | 75008/122310 [2:18:39<1:05:46, 11.99it/s][A
 61%|████████████████████▊             | 75022/122310 [2:18:40<55:58, 14.08

step: 23920, loss: 77.9934469472821, epoch: 1



 61%|███████████████████▋            | 75100/122310 [2:18:47<1:13:00, 10.78it/s][A
 61%|███████████████████▋            | 75107/122310 [2:18:48<1:15:30, 10.42it/s][A
 61%|███████████████████▋            | 75112/122310 [2:18:48<1:24:03,  9.36it/s][A
 61%|███████████████████▋            | 75123/122310 [2:18:49<1:11:34, 10.99it/s][A
 61%|███████████████████▋            | 75128/122310 [2:18:50<1:20:41,  9.75it/s][A
 61%|███████████████████▋            | 75136/122310 [2:18:50<1:17:47, 10.11it/s][A
 61%|████████████████████▉             | 75151/122310 [2:18:51<59:33, 13.20it/s][A
 61%|███████████████████▋            | 75153/122310 [2:18:52<1:18:11, 10.05it/s][A
 61%|███████████████████▋            | 75158/122310 [2:18:53<1:26:23,  9.10it/s][A
 61%|███████████████████▋            | 75161/122310 [2:18:53<1:43:27,  7.60it/s][A
 61%|███████████████████▋            | 75169/122310 [2:18:54<1:31:40,  8.57it/s][A
 61%|███████████████████▋            | 75181/122310 [2:18:55<1:11:51, 10.93

step: 23940, loss: 86.06653343163951, epoch: 1



 62%|███████████████████▋            | 75237/122310 [2:19:01<1:30:23,  8.68it/s][A
 62%|███████████████████▋            | 75247/122310 [2:19:02<1:18:03, 10.05it/s][A
 62%|███████████████████▋            | 75254/122310 [2:19:03<1:19:04,  9.92it/s][A
 62%|███████████████████▋            | 75258/122310 [2:19:04<1:30:51,  8.63it/s][A
 62%|███████████████████▋            | 75268/122310 [2:19:04<1:17:47, 10.08it/s][A
 62%|███████████████████▋            | 75274/122310 [2:19:05<1:22:34,  9.49it/s][A
 62%|████████████████████▉             | 75291/122310 [2:19:06<58:14, 13.45it/s][A
 62%|███████████████████▋            | 75294/122310 [2:19:07<1:13:15, 10.70it/s][A
 62%|████████████████████▉             | 75308/122310 [2:19:07<59:39, 13.13it/s][A
 62%|████████████████████▉             | 75320/122310 [2:19:08<55:26, 14.12it/s][A
 62%|████████████████████▉             | 75329/122310 [2:19:09<57:42, 13.57it/s][A
 62%|████████████████████▉             | 75350/122310 [2:19:10<43:18, 18.07

step: 23960, loss: 80.95760124734177, epoch: 1



 62%|███████████████████▋            | 75428/122310 [2:19:16<1:00:41, 12.87it/s][A
 62%|███████████████████▋            | 75430/122310 [2:19:17<1:19:29,  9.83it/s][A
 62%|███████████████████▋            | 75435/122310 [2:19:18<1:27:40,  8.91it/s][A
 62%|███████████████████▋            | 75437/122310 [2:19:18<1:50:18,  7.08it/s][A
 62%|███████████████████▋            | 75445/122310 [2:19:19<1:34:32,  8.26it/s][A
 62%|███████████████████▋            | 75450/122310 [2:19:20<1:40:18,  7.79it/s][A
 62%|███████████████████▋            | 75456/122310 [2:19:21<1:38:34,  7.92it/s][A
 62%|███████████████████▋            | 75473/122310 [2:19:21<1:02:29, 12.49it/s][A
 62%|███████████████████▋            | 75477/122310 [2:19:23<1:37:50,  7.98it/s][A
 62%|███████████████████▋            | 75480/122310 [2:19:24<1:50:44,  7.05it/s][A
 62%|███████████████████▋            | 75484/122310 [2:19:24<1:57:55,  6.62it/s][A
 62%|███████████████████▊            | 75491/122310 [2:19:25<1:45:05,  7.43

step: 23980, loss: 66.33424091453158, epoch: 1



 62%|███████████████████▊            | 75547/122310 [2:19:31<1:27:49,  8.88it/s][A
 62%|███████████████████▊            | 75554/122310 [2:19:32<1:26:39,  8.99it/s][A
 62%|███████████████████▊            | 75559/122310 [2:19:33<1:34:00,  8.29it/s][A
 62%|███████████████████▊            | 75569/122310 [2:19:33<1:20:19,  9.70it/s][A
 62%|███████████████████▊            | 75583/122310 [2:19:34<1:03:13, 12.32it/s][A
 62%|███████████████████▊            | 75589/122310 [2:19:35<1:10:51, 10.99it/s][A
 62%|███████████████████▊            | 75600/122310 [2:19:36<1:05:02, 11.97it/s][A
 62%|███████████████████▊            | 75603/122310 [2:19:36<1:21:06,  9.60it/s][A
 62%|███████████████████▊            | 75612/122310 [2:19:37<1:16:04, 10.23it/s][A
 62%|███████████████████▊            | 75619/122310 [2:19:38<1:18:43,  9.89it/s][A
 62%|███████████████████▊            | 75623/122310 [2:19:39<1:31:52,  8.47it/s][A
 62%|███████████████████▊            | 75630/122310 [2:19:39<1:29:42,  8.67

step: 24000, loss: 86.11758867224573, epoch: 1
sim1 and sim2 are 0.5124095064488104, 0.2341395979194794
cosine of pred and queen: 0.1877772541705383
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharastra


 62%|███████████████████▊            | 75697/122310 [2:19:58<1:04:52, 11.97it/s][A

Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: currency
Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: yen
Actual: india:rupee::denmark:krone, pred: higher
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: limited
Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: budgetary
Actual: poland:polish::italy:italian, pred: official
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: efforts
Actual: denmark:danish::germany:german, pred: size
Accur


 62%|███████████████████▏           | 75703/122310 [2:21:06<39:37:49,  3.06s/it][A
 62%|███████████████████▏           | 75704/122310 [2:21:07<38:07:37,  2.95s/it][A
 62%|███████████████████▏           | 75711/122310 [2:21:08<25:52:59,  2.00s/it][A
 62%|███████████████████▏           | 75715/122310 [2:21:08<20:51:27,  1.61s/it][A
 62%|███████████████████▏           | 75727/122310 [2:21:09<11:16:02,  1.15it/s][A
 62%|███████████████████▏           | 75729/122310 [2:21:10<10:36:25,  1.22it/s][A
 62%|███████████████████▊            | 75738/122310 [2:21:11<6:49:08,  1.90it/s][A
 62%|███████████████████▊            | 75748/122310 [2:21:11<4:33:13,  2.84it/s][A
 62%|███████████████████▊            | 75765/122310 [2:21:12<2:37:30,  4.93it/s][A
 62%|███████████████████▊            | 75781/122310 [2:21:13<1:49:39,  7.07it/s][A
 62%|███████████████████▊            | 75786/122310 [2:21:14<1:50:22,  7.02it/s][A
 62%|███████████████████▊            | 75793/122310 [2:21:14<1:44:02,  7.45

step: 24020, loss: 73.17926798799675, epoch: 1



 62%|███████████████████▊            | 75886/122310 [2:21:22<1:11:16, 10.86it/s][A
 62%|███████████████████▊            | 75889/122310 [2:21:23<1:26:59,  8.89it/s][A
 62%|███████████████████▊            | 75895/122310 [2:21:23<1:29:36,  8.63it/s][A
 62%|█████████████████████             | 75913/122310 [2:21:24<58:21, 13.25it/s][A
 62%|███████████████████▊            | 75919/122310 [2:21:25<1:06:16, 11.67it/s][A
 62%|███████████████████▊            | 75929/122310 [2:21:26<1:03:27, 12.18it/s][A
 62%|███████████████████▊            | 75935/122310 [2:21:26<1:10:41, 10.93it/s][A
 62%|███████████████████▊            | 75947/122310 [2:21:27<1:01:53, 12.49it/s][A
 62%|███████████████████▊            | 75954/122310 [2:21:28<1:06:58, 11.54it/s][A
 62%|███████████████████▊            | 75957/122310 [2:21:29<1:23:19,  9.27it/s][A
 62%|███████████████████▊            | 75964/122310 [2:21:29<1:23:08,  9.29it/s][A
 62%|███████████████████▉            | 75976/122310 [2:21:30<1:08:07, 11.33

step: 24040, loss: 87.19544058325552, epoch: 1



 62%|███████████████████▉            | 76032/122310 [2:21:37<1:26:50,  8.88it/s][A
 62%|███████████████████▉            | 76039/122310 [2:21:38<1:25:31,  9.02it/s][A
 62%|███████████████████▉            | 76049/122310 [2:21:38<1:14:29, 10.35it/s][A
 62%|███████████████████▉            | 76056/122310 [2:21:40<1:39:34,  7.74it/s][A
 62%|███████████████████▉            | 76072/122310 [2:21:41<1:09:09, 11.14it/s][A
 62%|███████████████████▉            | 76079/122310 [2:21:41<1:12:13, 10.67it/s][A
 62%|███████████████████▉            | 76084/122310 [2:21:42<1:20:23,  9.58it/s][A
 62%|███████████████████▉            | 76091/122310 [2:21:43<1:21:06,  9.50it/s][A
 62%|███████████████████▉            | 76102/122310 [2:21:44<1:10:20, 10.95it/s][A
 62%|███████████████████▉            | 76109/122310 [2:21:44<1:13:19, 10.50it/s][A
 62%|█████████████████████▏            | 76127/122310 [2:21:45<53:19, 14.43it/s][A
 62%|█████████████████████▏            | 76135/122310 [2:21:46<57:58, 13.27

step: 24060, loss: 81.85995831802983, epoch: 1



 62%|███████████████████▉            | 76204/122310 [2:21:52<1:06:41, 11.52it/s][A
 62%|███████████████████▉            | 76208/122310 [2:21:53<1:19:25,  9.67it/s][A
 62%|███████████████████▉            | 76214/122310 [2:21:53<1:23:30,  9.20it/s][A
 62%|███████████████████▉            | 76222/122310 [2:21:54<1:19:43,  9.63it/s][A
 62%|███████████████████▉            | 76233/122310 [2:21:55<1:08:45, 11.17it/s][A
 62%|███████████████████▉            | 76241/122310 [2:21:56<1:09:55, 10.98it/s][A
 62%|█████████████████████▏            | 76258/122310 [2:21:56<53:06, 14.45it/s][A
 62%|███████████████████▉            | 76263/122310 [2:21:57<1:03:18, 12.12it/s][A
 62%|███████████████████▉            | 76269/122310 [2:21:58<1:10:21, 10.91it/s][A
 62%|███████████████████▉            | 76278/122310 [2:21:59<1:08:16, 11.24it/s][A
 62%|█████████████████████▏            | 76292/122310 [2:21:59<57:24, 13.36it/s][A
 62%|█████████████████████▏            | 76303/122310 [2:22:00<55:47, 13.74

step: 24080, loss: 86.31359842317666, epoch: 1



 62%|███████████████████▉            | 76375/122310 [2:22:07<1:09:13, 11.06it/s][A
 62%|█████████████████████▏            | 76388/122310 [2:22:08<59:05, 12.95it/s][A
 62%|███████████████████▉            | 76391/122310 [2:22:08<1:14:40, 10.25it/s][A
 62%|███████████████████▉            | 76395/122310 [2:22:09<1:26:16,  8.87it/s][A
 62%|███████████████████▉            | 76403/122310 [2:22:10<1:21:09,  9.43it/s][A
 62%|███████████████████▉            | 76407/122310 [2:22:10<1:33:09,  8.21it/s][A
 62%|███████████████████▉            | 76415/122310 [2:22:11<1:25:11,  8.98it/s][A
 62%|███████████████████▉            | 76424/122310 [2:22:12<1:16:49,  9.95it/s][A
 62%|███████████████████▉            | 76434/122310 [2:22:13<1:09:40, 10.97it/s][A
 62%|███████████████████▉            | 76440/122310 [2:22:13<1:15:26, 10.13it/s][A
 63%|████████████████████            | 76449/122310 [2:22:14<1:11:27, 10.70it/s][A
 63%|████████████████████            | 76458/122310 [2:22:15<1:08:34, 11.14

step: 24100, loss: 75.37441536373488, epoch: 1



 63%|████████████████████            | 76528/122310 [2:22:22<1:03:28, 12.02it/s][A
 63%|████████████████████            | 76533/122310 [2:22:22<1:13:03, 10.44it/s][A
 63%|████████████████████            | 76540/122310 [2:22:23<1:15:11, 10.15it/s][A
 63%|█████████████████████▎            | 76555/122310 [2:22:24<57:57, 13.16it/s][A
 63%|█████████████████████▎            | 76566/122310 [2:22:25<56:01, 13.61it/s][A
 63%|█████████████████████▎            | 76574/122310 [2:22:25<59:34, 12.79it/s][A
 63%|████████████████████            | 76579/122310 [2:22:26<1:09:24, 10.98it/s][A
 63%|████████████████████            | 76585/122310 [2:22:27<1:15:23, 10.11it/s][A
 63%|████████████████████            | 76588/122310 [2:22:28<1:32:26,  8.24it/s][A
 63%|████████████████████            | 76603/122310 [2:22:28<1:04:25, 11.82it/s][A
 63%|████████████████████            | 76609/122310 [2:22:29<1:11:13, 10.69it/s][A
 63%|█████████████████████▎            | 76624/122310 [2:22:30<56:16, 13.53

step: 24120, loss: 79.11677523758867, epoch: 1



 63%|████████████████████            | 76708/122310 [2:22:37<1:01:05, 12.44it/s][A
 63%|████████████████████            | 76710/122310 [2:22:37<1:19:30,  9.56it/s][A
 63%|████████████████████            | 76715/122310 [2:22:38<1:27:18,  8.70it/s][A
 63%|████████████████████            | 76723/122310 [2:22:39<1:21:27,  9.33it/s][A
 63%|████████████████████            | 76730/122310 [2:22:40<1:21:14,  9.35it/s][A
 63%|████████████████████            | 76738/122310 [2:22:40<1:17:45,  9.77it/s][A
 63%|████████████████████            | 76745/122310 [2:22:41<1:18:39,  9.66it/s][A
 63%|████████████████████            | 76746/122310 [2:22:42<1:45:48,  7.18it/s][A
 63%|████████████████████            | 76750/122310 [2:22:42<1:54:39,  6.62it/s][A
 63%|████████████████████            | 76760/122310 [2:22:43<1:27:42,  8.66it/s][A
 63%|████████████████████            | 76770/122310 [2:22:44<1:15:13, 10.09it/s][A
 63%|████████████████████            | 76780/122310 [2:22:45<1:08:29, 11.08

step: 24140, loss: 76.95036456483348, epoch: 1



 63%|████████████████████            | 76859/122310 [2:22:51<1:09:14, 10.94it/s][A
 63%|████████████████████            | 76863/122310 [2:22:52<1:21:37,  9.28it/s][A
 63%|████████████████████            | 76870/122310 [2:22:53<1:21:13,  9.32it/s][A
 63%|████████████████████            | 76879/122310 [2:22:54<1:14:29, 10.17it/s][A
 63%|████████████████████            | 76886/122310 [2:22:54<1:16:14,  9.93it/s][A
 63%|█████████████████████▍            | 76905/122310 [2:22:55<51:46, 14.61it/s][A
 63%|█████████████████████▍            | 76912/122310 [2:22:56<57:58, 13.05it/s][A
 63%|█████████████████████▍            | 76921/122310 [2:22:57<59:21, 12.75it/s][A
 63%|█████████████████████▍            | 76931/122310 [2:22:57<58:24, 12.95it/s][A
 63%|█████████████████████▍            | 76943/122310 [2:22:58<54:25, 13.89it/s][A
 63%|████████████████████▏           | 76950/122310 [2:22:59<1:00:16, 12.54it/s][A
 63%|████████████████████▏           | 76959/122310 [2:23:00<1:01:15, 12.34

step: 24160, loss: 102.07828428824756, epoch: 1



 63%|█████████████████████▍            | 77039/122310 [2:23:06<59:53, 12.60it/s][A
 63%|█████████████████████▍            | 77055/122310 [2:23:07<49:19, 15.29it/s][A
 63%|█████████████████████▍            | 77063/122310 [2:23:08<54:07, 13.93it/s][A
 63%|█████████████████████▍            | 77070/122310 [2:23:09<59:58, 12.57it/s][A
 63%|█████████████████████▍            | 77080/122310 [2:23:09<58:52, 12.80it/s][A
 63%|█████████████████████▍            | 77090/122310 [2:23:10<58:03, 12.98it/s][A
 63%|████████████████████▏           | 77096/122310 [2:23:11<1:05:19, 11.53it/s][A
 63%|█████████████████████▍            | 77112/122310 [2:23:12<51:57, 14.50it/s][A
 63%|█████████████████████▍            | 77119/122310 [2:23:12<58:01, 12.98it/s][A
 63%|████████████████████▏           | 77123/122310 [2:23:13<1:10:30, 10.68it/s][A
 63%|████████████████████▏           | 77132/122310 [2:23:14<1:08:02, 11.06it/s][A
 63%|████████████████████▏           | 77142/122310 [2:23:15<1:03:54, 11.78

step: 24180, loss: 79.1543151797388, epoch: 1



 63%|████████████████████▏           | 77192/122310 [2:23:21<2:01:34,  6.19it/s][A
 63%|████████████████████▏           | 77193/122310 [2:23:22<2:32:54,  4.92it/s][A
 63%|████████████████████▏           | 77199/122310 [2:23:23<2:09:48,  5.79it/s][A
 63%|████████████████████▏           | 77206/122310 [2:23:24<1:51:37,  6.73it/s][A
 63%|████████████████████▏           | 77207/122310 [2:23:24<2:24:23,  5.21it/s][A
 63%|████████████████████▏           | 77216/122310 [2:23:25<1:45:36,  7.12it/s][A
 63%|████████████████████▏           | 77226/122310 [2:23:26<1:24:46,  8.86it/s][A
 63%|████████████████████▏           | 77229/122310 [2:23:27<1:40:17,  7.49it/s][A
 63%|████████████████████▏           | 77239/122310 [2:23:27<1:21:26,  9.22it/s][A
 63%|████████████████████▏           | 77249/122310 [2:23:28<1:11:42, 10.47it/s][A
 63%|████████████████████▏           | 77252/122310 [2:23:29<1:27:00,  8.63it/s][A
 63%|████████████████████▏           | 77259/122310 [2:23:29<1:24:33,  8.88

step: 24200, loss: 95.39112044789384, epoch: 1



 63%|████████████████████▏           | 77320/122310 [2:23:36<1:20:49,  9.28it/s][A
 63%|████████████████████▏           | 77327/122310 [2:23:37<1:20:05,  9.36it/s][A
 63%|█████████████████████▌            | 77347/122310 [2:23:38<50:56, 14.71it/s][A
 63%|█████████████████████▌            | 77354/122310 [2:23:38<56:53, 13.17it/s][A
 63%|█████████████████████▌            | 77362/122310 [2:23:39<59:58, 12.49it/s][A
 63%|█████████████████████▌            | 77372/122310 [2:23:40<58:42, 12.76it/s][A
 63%|█████████████████████▌            | 77381/122310 [2:23:41<59:13, 12.64it/s][A
 63%|█████████████████████▌            | 77397/122310 [2:23:41<48:33, 15.42it/s][A
 63%|█████████████████████▌            | 77402/122310 [2:23:42<58:26, 12.81it/s][A
 63%|████████████████████▎           | 77409/122310 [2:23:43<1:03:28, 11.79it/s][A
 63%|████████████████████▎           | 77418/122310 [2:23:43<1:02:46, 11.92it/s][A
 63%|████████████████████▎           | 77425/122310 [2:23:44<1:06:45, 11.21

step: 24220, loss: 72.63446847250641, epoch: 1



 63%|████████████████████▎           | 77503/122310 [2:23:51<1:00:59, 12.24it/s][A
 63%|████████████████████▎           | 77512/122310 [2:23:52<1:00:55, 12.25it/s][A
 63%|█████████████████████▌            | 77523/122310 [2:23:52<57:10, 13.05it/s][A
 63%|████████████████████▎           | 77528/122310 [2:23:53<1:06:34, 11.21it/s][A
 63%|████████████████████▎           | 77531/122310 [2:23:54<1:22:22,  9.06it/s][A
 63%|████████████████████▎           | 77545/122310 [2:23:55<1:02:02, 12.03it/s][A
 63%|████████████████████▎           | 77553/122310 [2:23:55<1:03:45, 11.70it/s][A
 63%|████████████████████▎           | 77560/122310 [2:23:56<1:07:31, 11.05it/s][A
 63%|████████████████████▎           | 77568/122310 [2:23:57<1:07:55, 10.98it/s][A
 63%|████████████████████▎           | 77571/122310 [2:23:57<1:23:34,  8.92it/s][A
 63%|████████████████████▎           | 77578/122310 [2:23:58<1:21:58,  9.09it/s][A
 63%|█████████████████████▌            | 77598/122310 [2:23:59<51:34, 14.45

step: 24240, loss: 72.06912135036899, epoch: 1



 63%|████████████████████▎           | 77657/122310 [2:24:06<1:28:08,  8.44it/s][A
 63%|████████████████████▎           | 77661/122310 [2:24:06<1:38:40,  7.54it/s][A
 63%|████████████████████▎           | 77666/122310 [2:24:07<1:41:40,  7.32it/s][A
 64%|████████████████████▎           | 77679/122310 [2:24:08<1:11:18, 10.43it/s][A
 64%|████████████████████▎           | 77690/122310 [2:24:09<1:03:11, 11.77it/s][A
 64%|█████████████████████▌            | 77702/122310 [2:24:09<56:34, 13.14it/s][A
 64%|█████████████████████▌            | 77716/122310 [2:24:10<49:59, 14.87it/s][A
 64%|█████████████████████▌            | 77735/122310 [2:24:11<41:09, 18.05it/s][A
 64%|█████████████████████▌            | 77738/122310 [2:24:12<53:21, 13.92it/s][A
 64%|█████████████████████▌            | 77751/122310 [2:24:12<49:29, 15.01it/s][A
 64%|█████████████████████▌            | 77763/122310 [2:24:13<48:19, 15.37it/s][A
 64%|█████████████████████▌            | 77770/122310 [2:24:14<54:34, 13.60

step: 24260, loss: 102.70639995160239, epoch: 1



 64%|████████████████████▎           | 77773/122310 [2:24:24<5:34:56,  2.22it/s][A
 64%|████████████████████▎           | 77775/122310 [2:24:25<5:28:28,  2.26it/s][A
 64%|████████████████████▎           | 77777/122310 [2:24:25<5:21:24,  2.31it/s][A
 64%|████████████████████▎           | 77784/122310 [2:24:26<3:49:12,  3.24it/s][A
 64%|████████████████████▎           | 77791/122310 [2:24:27<2:56:08,  4.21it/s][A
 64%|████████████████████▎           | 77796/122310 [2:24:28<2:38:30,  4.68it/s][A
 64%|████████████████████▎           | 77811/122310 [2:24:28<1:33:48,  7.91it/s][A
 64%|████████████████████▎           | 77824/122310 [2:24:29<1:13:07, 10.14it/s][A
 64%|████████████████████▎           | 77831/122310 [2:24:30<1:14:14,  9.98it/s][A
 64%|████████████████████▎           | 77845/122310 [2:24:31<1:00:07, 12.33it/s][A
 64%|█████████████████████▋            | 77858/122310 [2:24:31<54:10, 13.68it/s][A
 64%|████████████████████▎           | 77864/122310 [2:24:32<1:01:23, 12.07

step: 24280, loss: 70.278930704015, epoch: 1



 64%|█████████████████████▋            | 77908/122310 [2:24:35<55:36, 13.31it/s][A
 64%|█████████████████████▋            | 77920/122310 [2:24:36<52:15, 14.16it/s][A
 64%|█████████████████████▋            | 77931/122310 [2:24:37<51:38, 14.32it/s][A
 64%|█████████████████████▋            | 77953/122310 [2:24:37<39:15, 18.83it/s][A
 64%|█████████████████████▋            | 77965/122310 [2:24:38<41:06, 17.98it/s][A
 64%|█████████████████████▋            | 77971/122310 [2:24:39<49:14, 15.01it/s][A
 64%|█████████████████████▋            | 77980/122310 [2:24:40<52:19, 14.12it/s][A
 64%|█████████████████████▋            | 77989/122310 [2:24:40<54:56, 13.45it/s][A
 64%|█████████████████████▋            | 77997/122310 [2:24:41<58:24, 12.65it/s][A
 64%|█████████████████████▋            | 78007/122310 [2:24:42<57:26, 12.85it/s][A
 64%|████████████████████▍           | 78015/122310 [2:24:43<1:00:20, 12.24it/s][A
 64%|█████████████████████▋            | 78025/122310 [2:24:43<58:43, 12.57

step: 24300, loss: 75.67500289192223, epoch: 1



 64%|████████████████████▍           | 78083/122310 [2:24:50<1:26:18,  8.54it/s][A
 64%|████████████████████▍           | 78087/122310 [2:24:51<1:35:32,  7.71it/s][A
 64%|████████████████████▍           | 78093/122310 [2:24:52<1:34:24,  7.81it/s][A
 64%|████████████████████▍           | 78100/122310 [2:24:52<1:29:14,  8.26it/s][A
 64%|████████████████████▍           | 78108/122310 [2:24:53<1:22:06,  8.97it/s][A
 64%|████████████████████▍           | 78114/122310 [2:24:54<1:25:14,  8.64it/s][A
 64%|████████████████████▍           | 78123/122310 [2:24:55<1:16:34,  9.62it/s][A
 64%|████████████████████▍           | 78126/122310 [2:24:55<1:32:18,  7.98it/s][A
 64%|████████████████████▍           | 78130/122310 [2:24:56<1:42:10,  7.21it/s][A
 64%|████████████████████▍           | 78136/122310 [2:24:57<1:39:19,  7.41it/s][A
 64%|████████████████████▍           | 78149/122310 [2:24:58<1:10:59, 10.37it/s][A
 64%|████████████████████▍           | 78156/122310 [2:24:58<1:13:05, 10.07

step: 24320, loss: 65.87020038121904, epoch: 1



 64%|█████████████████████▊            | 78251/122310 [2:25:05<52:24, 14.01it/s][A
 64%|█████████████████████▊            | 78262/122310 [2:25:06<51:30, 14.25it/s][A
 64%|█████████████████████▊            | 78277/122310 [2:25:07<45:56, 15.97it/s][A
 64%|████████████████████▍           | 78279/122310 [2:25:07<1:01:05, 12.01it/s][A
 64%|█████████████████████▊            | 78297/122310 [2:25:08<46:53, 15.64it/s][A
 64%|█████████████████████▊            | 78311/122310 [2:25:09<44:33, 16.46it/s][A
 64%|█████████████████████▊            | 78313/122310 [2:25:10<59:20, 12.36it/s][A
 64%|████████████████████▍           | 78320/122310 [2:25:10<1:04:15, 11.41it/s][A
 64%|█████████████████████▊            | 78331/122310 [2:25:11<58:59, 12.42it/s][A
 64%|█████████████████████▊            | 78346/122310 [2:25:12<49:48, 14.71it/s][A
 64%|█████████████████████▊            | 78354/122310 [2:25:13<54:14, 13.51it/s][A
 64%|█████████████████████▊            | 78361/122310 [2:25:13<59:42, 12.27

step: 24340, loss: 72.58750270425917, epoch: 1



 64%|████████████████████▌           | 78424/122310 [2:25:20<1:32:38,  7.90it/s][A
 64%|████████████████████▌           | 78435/122310 [2:25:21<1:13:08, 10.00it/s][A
 64%|████████████████████▌           | 78443/122310 [2:25:22<1:11:35, 10.21it/s][A
 64%|████████████████████▌           | 78451/122310 [2:25:22<1:10:35, 10.35it/s][A
 64%|████████████████████▌           | 78453/122310 [2:25:24<1:57:48,  6.20it/s][A
 64%|████████████████████▌           | 78461/122310 [2:25:24<1:39:49,  7.32it/s][A
 64%|████████████████████▌           | 78465/122310 [2:25:25<1:47:11,  6.82it/s][A
 64%|████████████████████▌           | 78468/122310 [2:25:26<2:00:41,  6.05it/s][A
 64%|████████████████████▌           | 78479/122310 [2:25:27<1:26:16,  8.47it/s][A
 64%|████████████████████▌           | 78492/122310 [2:25:27<1:06:24, 11.00it/s][A
 64%|████████████████████▌           | 78496/122310 [2:25:28<1:17:44,  9.39it/s][A
 64%|████████████████████▌           | 78507/122310 [2:25:29<1:06:51, 10.92

step: 24360, loss: 84.5103364826159, epoch: 1



 64%|████████████████████▌           | 78570/122310 [2:25:35<1:05:49, 11.07it/s][A
 64%|████████████████████▌           | 78580/122310 [2:25:36<1:02:07, 11.73it/s][A
 64%|█████████████████████▊            | 78592/122310 [2:25:37<55:57, 13.02it/s][A
 64%|█████████████████████▊            | 78600/122310 [2:25:37<58:59, 12.35it/s][A
 64%|█████████████████████▊            | 78609/122310 [2:25:38<59:29, 12.24it/s][A
 64%|████████████████████▌           | 78619/122310 [2:25:39<1:15:04,  9.70it/s][A
 64%|████████████████████▌           | 78630/122310 [2:25:40<1:06:42, 10.91it/s][A
 64%|████████████████████▌           | 78636/122310 [2:25:41<1:11:14, 10.22it/s][A
 64%|████████████████████▌           | 78643/122310 [2:25:42<1:12:48, 10.00it/s][A
 64%|████████████████████▌           | 78656/122310 [2:25:42<1:00:16, 12.07it/s][A
 64%|█████████████████████▊            | 78675/122310 [2:25:43<45:47, 15.88it/s][A
 64%|█████████████████████▊            | 78685/122310 [2:25:44<47:57, 15.16

step: 24380, loss: 102.0142589916012, epoch: 1



 64%|████████████████████▌           | 78753/122310 [2:25:50<1:18:42,  9.22it/s][A
 64%|████████████████████▌           | 78765/122310 [2:25:51<1:04:23, 11.27it/s][A
 64%|████████████████████▌           | 78773/122310 [2:25:51<1:05:48, 11.02it/s][A
 64%|████████████████████▌           | 78776/122310 [2:25:52<1:21:01,  8.95it/s][A
 64%|████████████████████▌           | 78779/122310 [2:25:53<1:36:39,  7.51it/s][A
 64%|████████████████████▌           | 78794/122310 [2:25:54<1:04:29, 11.25it/s][A
 64%|████████████████████▌           | 78801/122310 [2:25:54<1:07:49, 10.69it/s][A
 64%|█████████████████████▉            | 78819/122310 [2:25:55<49:32, 14.63it/s][A
 64%|█████████████████████▉            | 78831/122310 [2:25:56<48:08, 15.05it/s][A
 64%|█████████████████████▉            | 78837/122310 [2:25:57<55:53, 12.96it/s][A
 64%|████████████████████▋           | 78844/122310 [2:25:57<1:00:54, 11.89it/s][A
 64%|████████████████████▋           | 78853/122310 [2:25:58<1:00:43, 11.93

step: 24400, loss: 75.82974470260466, epoch: 1
sim1 and sim2 are 0.5138574670078665, 0.2319341749954567
cosine of pred and queen: 0.1548374448614567
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: largement
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahara

Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: view
Actual: islamabad:pakistan::oslo:norway, pred: april
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: soused
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: rajasthan
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 65%|████████████████████           | 78915/122310 [2:27:21<46:07:44,  3.83s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.1242603550295858



 65%|████████████████████           | 78923/122310 [2:27:22<30:07:18,  2.50s/it][A
 65%|████████████████████           | 78931/122310 [2:27:23<20:20:42,  1.69s/it][A
 65%|████████████████████           | 78939/122310 [2:27:23<14:05:58,  1.17s/it][A
 65%|████████████████████           | 78942/122310 [2:27:24<12:26:32,  1.03s/it][A
 65%|████████████████████▋           | 78950/122310 [2:27:25<8:21:02,  1.44it/s][A
 65%|████████████████████▋           | 78964/122310 [2:27:25<4:41:25,  2.57it/s][A
 65%|████████████████████▋           | 78973/122310 [2:27:26<3:34:05,  3.37it/s][A
 65%|████████████████████▋           | 78983/122310 [2:27:27<2:41:44,  4.46it/s][A
 65%|████████████████████▋           | 78990/122310 [2:27:28<2:20:58,  5.12it/s][A
 65%|████████████████████▋           | 78997/122310 [2:27:28<2:04:03,  5.82it/s][A
 65%|████████████████████▋           | 79002/122310 [2:27:29<2:00:28,  5.99it/s][A
 65%|████████████████████▋           | 79010/122310 [2:27:30<1:43:06,  7.00

step: 24420, loss: 83.70932098711215, epoch: 1



 65%|████████████████████▋           | 79080/122310 [2:27:37<1:03:00, 11.43it/s][A
 65%|████████████████████▋           | 79083/122310 [2:27:37<1:17:35,  9.29it/s][A
 65%|████████████████████▋           | 79094/122310 [2:27:38<1:06:03, 10.90it/s][A
 65%|████████████████████▋           | 79098/122310 [2:27:39<1:17:30,  9.29it/s][A
 65%|████████████████████▋           | 79106/122310 [2:27:40<1:13:58,  9.73it/s][A
 65%|████████████████████▋           | 79112/122310 [2:27:40<1:18:05,  9.22it/s][A
 65%|████████████████████▋           | 79119/122310 [2:27:41<1:17:11,  9.33it/s][A
 65%|████████████████████▋           | 79128/122310 [2:27:42<1:10:48, 10.16it/s][A
 65%|████████████████████▋           | 79136/122310 [2:27:43<1:09:48, 10.31it/s][A
 65%|████████████████████▋           | 79138/122310 [2:27:43<1:29:48,  8.01it/s][A
 65%|████████████████████▋           | 79147/122310 [2:27:44<1:18:00,  9.22it/s][A
 65%|████████████████████▋           | 79156/122310 [2:27:45<1:11:15, 10.09

step: 24440, loss: 77.47408305065227, epoch: 1



 65%|████████████████████▋           | 79234/122310 [2:27:51<1:13:50,  9.72it/s][A
 65%|████████████████████▋           | 79239/122310 [2:27:52<1:21:26,  8.81it/s][A
 65%|████████████████████▋           | 79245/122310 [2:27:53<1:23:11,  8.63it/s][A
 65%|████████████████████▋           | 79251/122310 [2:27:54<1:24:52,  8.46it/s][A
 65%|████████████████████▋           | 79257/122310 [2:27:54<1:26:00,  8.34it/s][A
 65%|████████████████████▋           | 79269/122310 [2:27:55<1:07:03, 10.70it/s][A
 65%|████████████████████▋           | 79280/122310 [2:27:56<1:00:18, 11.89it/s][A
 65%|████████████████████▋           | 79287/122310 [2:27:57<1:04:18, 11.15it/s][A
 65%|████████████████████▋           | 79295/122310 [2:27:57<1:05:00, 11.03it/s][A
 65%|████████████████████▋           | 79302/122310 [2:27:58<1:08:16, 10.50it/s][A
 65%|████████████████████▋           | 79306/122310 [2:27:59<1:19:52,  8.97it/s][A
 65%|████████████████████▊           | 79313/122310 [2:28:00<1:19:00,  9.07

step: 24460, loss: 140.14538036981907, epoch: 1



 65%|████████████████████▊           | 79361/122310 [2:28:06<1:40:55,  7.09it/s][A
 65%|████████████████████▊           | 79368/122310 [2:28:07<1:33:14,  7.68it/s][A
 65%|████████████████████▊           | 79374/122310 [2:28:08<1:31:57,  7.78it/s][A
 65%|████████████████████▊           | 79386/122310 [2:28:09<1:11:04, 10.07it/s][A
 65%|████████████████████▊           | 79391/122310 [2:28:09<1:18:21,  9.13it/s][A
 65%|████████████████████▊           | 79394/122310 [2:28:10<1:33:15,  7.67it/s][A
 65%|████████████████████▊           | 79398/122310 [2:28:11<1:41:59,  7.01it/s][A
 65%|████████████████████▊           | 79405/122310 [2:28:12<1:32:58,  7.69it/s][A
 65%|████████████████████▊           | 79409/122310 [2:28:12<1:42:10,  7.00it/s][A
 65%|████████████████████▊           | 79425/122310 [2:28:13<1:03:14, 11.30it/s][A
 65%|████████████████████▊           | 79432/122310 [2:28:14<1:06:34, 10.73it/s][A
 65%|████████████████████▊           | 79436/122310 [2:28:15<1:18:49,  9.06

step: 24480, loss: 130.10837173470486, epoch: 1



 65%|████████████████████▊           | 79476/122310 [2:28:21<2:32:52,  4.67it/s][A
 65%|████████████████████▊           | 79481/122310 [2:28:22<2:20:58,  5.06it/s][A
 65%|████████████████████▊           | 79489/122310 [2:28:23<1:54:17,  6.24it/s][A
 65%|████████████████████▊           | 79500/122310 [2:28:23<1:26:57,  8.21it/s][A
 65%|████████████████████▊           | 79512/122310 [2:28:24<1:10:09, 10.17it/s][A
 65%|████████████████████▊           | 79518/122310 [2:28:25<1:14:11,  9.61it/s][A
 65%|██████████████████████            | 79532/122310 [2:28:26<58:51, 12.11it/s][A
 65%|██████████████████████            | 79544/122310 [2:28:26<54:10, 13.16it/s][A
 65%|██████████████████████            | 79551/122310 [2:28:27<58:57, 12.09it/s][A
 65%|██████████████████████            | 79565/122310 [2:28:28<50:55, 13.99it/s][A
 65%|██████████████████████            | 79579/122310 [2:28:29<46:32, 15.30it/s][A
 65%|████████████████████▊           | 79581/122310 [2:28:29<1:01:13, 11.63

step: 24500, loss: 90.12244864192242, epoch: 1



 65%|████████████████████▊           | 79641/122310 [2:28:36<1:23:36,  8.51it/s][A
 65%|████████████████████▊           | 79645/122310 [2:28:37<1:33:50,  7.58it/s][A
 65%|████████████████████▊           | 79653/122310 [2:28:38<1:23:12,  8.54it/s][A
 65%|██████████████████████▏           | 79674/122310 [2:28:38<49:15, 14.43it/s][A
 65%|██████████████████████▏           | 79682/122310 [2:28:39<53:28, 13.29it/s][A
 65%|████████████████████▊           | 79688/122310 [2:28:40<1:00:24, 11.76it/s][A
 65%|██████████████████████▏           | 79701/122310 [2:28:41<52:53, 13.43it/s][A
 65%|████████████████████▊           | 79704/122310 [2:28:41<1:06:37, 10.66it/s][A
 65%|████████████████████▊           | 79712/122310 [2:28:42<1:06:39, 10.65it/s][A
 65%|██████████████████████▏           | 79724/122310 [2:28:43<57:42, 12.30it/s][A
 65%|██████████████████████▏           | 79736/122310 [2:28:44<52:40, 13.47it/s][A
 65%|██████████████████████▏           | 79751/122310 [2:28:44<45:52, 15.46

step: 24520, loss: 79.7312044450382, epoch: 1



 65%|████████████████████▉           | 79800/122310 [2:28:51<1:20:23,  8.81it/s][A
 65%|████████████████████▉           | 79811/122310 [2:28:52<1:06:48, 10.60it/s][A
 65%|████████████████████▉           | 79817/122310 [2:28:53<1:11:56,  9.84it/s][A
 65%|████████████████████▉           | 79821/122310 [2:28:53<1:23:32,  8.48it/s][A
 65%|████████████████████▉           | 79829/122310 [2:28:54<1:17:08,  9.18it/s][A
 65%|████████████████████▉           | 79836/122310 [2:28:55<1:16:35,  9.24it/s][A
 65%|████████████████████▉           | 79846/122310 [2:28:56<1:07:34, 10.47it/s][A
 65%|████████████████████▉           | 79853/122310 [2:28:56<1:09:33, 10.17it/s][A
 65%|██████████████████████▏           | 79866/122310 [2:28:57<57:22, 12.33it/s][A
 65%|████████████████████▉           | 79869/122310 [2:28:58<1:11:49,  9.85it/s][A
 65%|████████████████████▉           | 79879/122310 [2:28:59<1:04:44, 10.92it/s][A
 65%|████████████████████▉           | 79885/122310 [2:28:59<1:10:29, 10.03

step: 24540, loss: 80.00221772152472, epoch: 1



 65%|████████████████████▉           | 79954/122310 [2:29:06<1:09:17, 10.19it/s][A
 65%|████████████████████▉           | 79960/122310 [2:29:07<1:13:48,  9.56it/s][A
 65%|████████████████████▉           | 79967/122310 [2:29:07<1:14:28,  9.48it/s][A
 65%|████████████████████▉           | 79979/122310 [2:29:08<1:01:39, 11.44it/s][A
 65%|████████████████████▉           | 79981/122310 [2:29:09<1:19:53,  8.83it/s][A
 65%|██████████████████████▏           | 79999/122310 [2:29:10<52:30, 13.43it/s][A
 65%|██████████████████████▏           | 80006/122310 [2:29:10<57:57, 12.17it/s][A
 65%|████████████████████▉           | 80012/122310 [2:29:11<1:04:47, 10.88it/s][A
 65%|████████████████████▉           | 80016/122310 [2:29:12<1:16:11,  9.25it/s][A
 65%|████████████████████▉           | 80021/122310 [2:29:13<1:22:57,  8.50it/s][A
 65%|████████████████████▉           | 80025/122310 [2:29:13<1:33:22,  7.55it/s][A
 65%|████████████████████▉           | 80034/122310 [2:29:14<1:19:18,  8.88

step: 24560, loss: 86.24438093534089, epoch: 1



 65%|████████████████████▉           | 80102/122310 [2:29:21<1:18:06,  9.01it/s][A
 65%|████████████████████▉           | 80108/122310 [2:29:22<1:20:22,  8.75it/s][A
 66%|██████████████████████▎           | 80131/122310 [2:29:22<45:43, 15.37it/s][A
 66%|██████████████████████▎           | 80143/122310 [2:29:23<45:05, 15.59it/s][A
 66%|██████████████████████▎           | 80152/122310 [2:29:24<48:16, 14.55it/s][A
 66%|██████████████████████▎           | 80160/122310 [2:29:25<52:25, 13.40it/s][A
 66%|████████████████████▉           | 80165/122310 [2:29:25<1:01:37, 11.40it/s][A
 66%|██████████████████████▎           | 80177/122310 [2:29:26<54:59, 12.77it/s][A
 66%|████████████████████▉           | 80181/122310 [2:29:27<1:06:22, 10.58it/s][A
 66%|████████████████████▉           | 80186/122310 [2:29:28<1:14:25,  9.43it/s][A
 66%|████████████████████▉           | 80190/122310 [2:29:28<1:25:48,  8.18it/s][A
 66%|██████████████████████▎           | 80205/122310 [2:29:29<59:44, 11.75

step: 24580, loss: 84.34287486212193, epoch: 1



 66%|██████████████████████▎           | 80281/122310 [2:29:36<55:08, 12.70it/s][A
 66%|██████████████████████▎           | 80292/122310 [2:29:37<54:15, 12.91it/s][A
 66%|█████████████████████           | 80295/122310 [2:29:38<1:08:37, 10.20it/s][A
 66%|█████████████████████           | 80306/122310 [2:29:38<1:01:02, 11.47it/s][A
 66%|██████████████████████▎           | 80323/122310 [2:29:39<47:26, 14.75it/s][A
 66%|██████████████████████▎           | 80333/122310 [2:29:40<49:08, 14.24it/s][A
 66%|██████████████████████▎           | 80342/122310 [2:29:41<51:56, 13.47it/s][A
 66%|██████████████████████▎           | 80348/122310 [2:29:41<59:49, 11.69it/s][A
 66%|█████████████████████           | 80354/122310 [2:29:42<1:07:04, 10.43it/s][A
 66%|█████████████████████           | 80364/122310 [2:29:43<1:03:01, 11.09it/s][A
 66%|█████████████████████           | 80369/122310 [2:29:44<1:14:36,  9.37it/s][A
 66%|█████████████████████           | 80373/122310 [2:29:45<1:25:33,  8.17

step: 24600, loss: 73.1655420287556, epoch: 1



 66%|██████████████████████▎           | 80470/122310 [2:29:52<51:13, 13.61it/s][A
 66%|█████████████████████           | 80472/122310 [2:29:53<1:07:07, 10.39it/s][A
 66%|█████████████████████           | 80479/122310 [2:29:53<1:10:11,  9.93it/s][A
 66%|█████████████████████           | 80487/122310 [2:29:54<1:08:32, 10.17it/s][A
 66%|█████████████████████           | 80496/122310 [2:29:55<1:05:03, 10.71it/s][A
 66%|██████████████████████▍           | 80509/122310 [2:29:56<54:59, 12.67it/s][A
 66%|█████████████████████           | 80516/122310 [2:29:56<1:00:35, 11.50it/s][A
 66%|█████████████████████           | 80522/122310 [2:29:57<1:08:16, 10.20it/s][A
 66%|█████████████████████           | 80528/122310 [2:29:58<1:14:09,  9.39it/s][A
 66%|█████████████████████           | 80534/122310 [2:29:59<1:18:09,  8.91it/s][A
 66%|█████████████████████           | 80544/122310 [2:30:00<1:09:59,  9.95it/s][A
 66%|█████████████████████           | 80554/122310 [2:30:00<1:04:29, 10.79

step: 24620, loss: 74.09730852458681, epoch: 1



 66%|██████████████████████▍           | 80627/122310 [2:30:08<53:14, 13.05it/s][A
 66%|██████████████████████▍           | 80636/122310 [2:30:08<55:04, 12.61it/s][A
 66%|██████████████████████▍           | 80648/122310 [2:30:09<52:11, 13.30it/s][A
 66%|█████████████████████           | 80653/122310 [2:30:10<1:01:35, 11.27it/s][A
 66%|██████████████████████▍           | 80666/122310 [2:30:11<54:57, 12.63it/s][A
 66%|██████████████████████▍           | 80675/122310 [2:30:12<56:43, 12.23it/s][A
 66%|█████████████████████           | 80679/122310 [2:30:12<1:09:04, 10.05it/s][A
 66%|█████████████████████           | 80688/122310 [2:30:13<1:07:16, 10.31it/s][A
 66%|█████████████████████           | 80691/122310 [2:30:14<1:22:43,  8.38it/s][A
 66%|█████████████████████           | 80697/122310 [2:30:15<1:25:21,  8.13it/s][A
 66%|█████████████████████           | 80711/122310 [2:30:16<1:03:47, 10.87it/s][A
 66%|█████████████████████           | 80715/122310 [2:30:16<1:15:31,  9.18

step: 24640, loss: 90.17832312914754, epoch: 1



 66%|█████████████████████▏          | 80775/122310 [2:30:23<1:02:25, 11.09it/s][A
 66%|█████████████████████▏          | 80777/122310 [2:30:24<1:20:34,  8.59it/s][A
 66%|█████████████████████▏          | 80786/122310 [2:30:25<1:11:57,  9.62it/s][A
 66%|█████████████████████▏          | 80795/122310 [2:30:26<1:07:09, 10.30it/s][A
 66%|█████████████████████▏          | 80798/122310 [2:30:26<1:22:05,  8.43it/s][A
 66%|█████████████████████▏          | 80810/122310 [2:30:27<1:04:48, 10.67it/s][A
 66%|█████████████████████▏          | 80818/122310 [2:30:28<1:04:43, 10.68it/s][A
 66%|█████████████████████▏          | 80827/122310 [2:30:29<1:02:35, 11.05it/s][A
 66%|██████████████████████▍           | 80838/122310 [2:30:29<57:04, 12.11it/s][A
 66%|██████████████████████▍           | 80847/122310 [2:30:30<57:13, 12.08it/s][A
 66%|██████████████████████▍           | 80857/122310 [2:30:31<55:39, 12.41it/s][A
 66%|█████████████████████▏          | 80864/122310 [2:30:32<1:00:04, 11.50

step: 24660, loss: 76.82424470167797, epoch: 1



 66%|██████████████████████▌           | 80953/122310 [2:30:38<49:47, 13.84it/s][A
 66%|█████████████████████▏          | 80956/122310 [2:30:39<1:02:44, 10.98it/s][A
 66%|██████████████████████▌           | 80967/122310 [2:30:40<57:05, 12.07it/s][A
 66%|█████████████████████▏          | 80973/122310 [2:30:41<1:03:27, 10.86it/s][A
 66%|█████████████████████▏          | 80980/122310 [2:30:41<1:06:08, 10.41it/s][A
 66%|██████████████████████▌           | 80998/122310 [2:30:42<47:28, 14.50it/s][A
 66%|██████████████████████▌           | 81003/122310 [2:30:43<56:35, 12.17it/s][A
 66%|██████████████████████▌           | 81013/122310 [2:30:44<54:55, 12.53it/s][A
 66%|██████████████████████▌           | 81020/122310 [2:30:44<59:37, 11.54it/s][A
 66%|██████████████████████▌           | 81031/122310 [2:30:45<55:09, 12.47it/s][A
 66%|██████████████████████▌           | 81041/122310 [2:30:46<53:59, 12.74it/s][A
 66%|█████████████████████▏          | 81047/122310 [2:30:47<1:01:16, 11.22

step: 24680, loss: 85.527970989924, epoch: 1



 66%|██████████████████████▌           | 81131/122310 [2:30:54<50:17, 13.65it/s][A
 66%|██████████████████████▌           | 81138/122310 [2:30:54<55:22, 12.39it/s][A
 66%|█████████████████████▏          | 81144/122310 [2:30:55<1:01:57, 11.07it/s][A
 66%|█████████████████████▏          | 81146/122310 [2:30:56<1:20:42,  8.50it/s][A
 66%|██████████████████████▌           | 81161/122310 [2:30:57<57:33, 11.92it/s][A
 66%|██████████████████████▌           | 81169/122310 [2:30:57<59:29, 11.53it/s][A
 66%|█████████████████████▏          | 81177/122310 [2:30:58<1:01:19, 11.18it/s][A
 66%|█████████████████████▏          | 81183/122310 [2:31:00<1:26:47,  7.90it/s][A
 66%|█████████████████████▏          | 81190/122310 [2:31:00<1:22:57,  8.26it/s][A
 66%|█████████████████████▏          | 81195/122310 [2:31:02<1:50:41,  6.19it/s][A
 66%|█████████████████████▏          | 81202/122310 [2:31:03<1:38:56,  6.92it/s][A
 66%|█████████████████████▏          | 81204/122310 [2:31:03<1:56:17,  5.89

step: 24700, loss: 67.55055226444692, epoch: 1



 66%|█████████████████████▎          | 81257/122310 [2:31:09<1:18:22,  8.73it/s][A
 66%|█████████████████████▎          | 81267/122310 [2:31:09<1:07:43, 10.10it/s][A
 66%|█████████████████████▎          | 81273/122310 [2:31:10<1:12:12,  9.47it/s][A
 66%|█████████████████████▎          | 81280/122310 [2:31:11<1:12:27,  9.44it/s][A
 66%|█████████████████████▎          | 81290/122310 [2:31:12<1:04:22, 10.62it/s][A
 66%|█████████████████████▎          | 81298/122310 [2:31:12<1:04:24, 10.61it/s][A
 66%|█████████████████████▎          | 81302/122310 [2:31:13<1:16:20,  8.95it/s][A
 66%|█████████████████████▎          | 81307/122310 [2:31:14<1:23:02,  8.23it/s][A
 66%|█████████████████████▎          | 81312/122310 [2:31:15<1:27:54,  7.77it/s][A
 66%|█████████████████████▎          | 81315/122310 [2:31:17<2:42:27,  4.21it/s][A
 66%|█████████████████████▎          | 81329/122310 [2:31:18<1:33:34,  7.30it/s][A
 66%|█████████████████████▎          | 81334/122310 [2:31:18<1:35:14,  7.17

step: 24720, loss: 68.89639106403614, epoch: 1



 67%|█████████████████████▎          | 81405/122310 [2:31:24<1:04:17, 10.60it/s][A
 67%|██████████████████████▋           | 81419/122310 [2:31:24<52:24, 13.00it/s][A
 67%|██████████████████████▋           | 81434/122310 [2:31:25<45:10, 15.08it/s][A
 67%|██████████████████████▋           | 81444/122310 [2:31:26<46:52, 14.53it/s][A
 67%|██████████████████████▋           | 81450/122310 [2:31:27<54:15, 12.55it/s][A
 67%|█████████████████████▎          | 81453/122310 [2:31:27<1:07:54, 10.03it/s][A
 67%|█████████████████████▎          | 81459/122310 [2:31:28<1:12:21,  9.41it/s][A
 67%|█████████████████████▎          | 81465/122310 [2:31:29<1:16:03,  8.95it/s][A
 67%|█████████████████████▎          | 81475/122310 [2:31:30<1:06:30, 10.23it/s][A
 67%|█████████████████████▎          | 81477/122310 [2:31:30<1:26:18,  7.89it/s][A
 67%|█████████████████████▎          | 81481/122310 [2:31:31<1:35:24,  7.13it/s][A
 67%|█████████████████████▎          | 81493/122310 [2:31:32<1:09:36,  9.77

step: 24740, loss: 84.01864047687232, epoch: 1



 67%|█████████████████████▎          | 81568/122310 [2:31:39<1:02:40, 10.83it/s][A
 67%|█████████████████████▎          | 81572/122310 [2:31:39<1:13:42,  9.21it/s][A
 67%|█████████████████████▎          | 81576/122310 [2:31:40<1:24:14,  8.06it/s][A
 67%|█████████████████████▎          | 81583/122310 [2:31:41<1:20:28,  8.43it/s][A
 67%|█████████████████████▎          | 81593/122310 [2:31:42<1:08:38,  9.89it/s][A
 67%|█████████████████████▎          | 81598/122310 [2:31:42<1:16:20,  8.89it/s][A
 67%|█████████████████████▎          | 81606/122310 [2:31:43<1:12:07,  9.40it/s][A
 67%|█████████████████████▎          | 81613/122310 [2:31:44<1:12:12,  9.39it/s][A
 67%|█████████████████████▎          | 81619/122310 [2:31:45<1:15:51,  8.94it/s][A
 67%|█████████████████████▎          | 81631/122310 [2:31:45<1:01:29, 11.03it/s][A
 67%|██████████████████████▋           | 81640/122310 [2:31:46<59:54, 11.31it/s][A
 67%|██████████████████████▋           | 81651/122310 [2:31:47<54:56, 12.33

step: 24760, loss: 76.97242582815663, epoch: 1



 67%|██████████████████████▋           | 81723/122310 [2:31:54<58:05, 11.64it/s][A
 67%|██████████████████████▋           | 81731/122310 [2:31:54<59:31, 11.36it/s][A
 67%|█████████████████████▍          | 81734/122310 [2:31:55<1:13:51,  9.16it/s][A
 67%|█████████████████████▍          | 81745/122310 [2:31:56<1:02:30, 10.81it/s][A
 67%|██████████████████████▋           | 81761/122310 [2:31:57<48:55, 13.81it/s][A
 67%|██████████████████████▋           | 81768/122310 [2:31:57<54:06, 12.49it/s][A
 67%|█████████████████████▍          | 81773/122310 [2:31:59<1:22:51,  8.15it/s][A
 67%|█████████████████████▍          | 81779/122310 [2:32:00<1:23:16,  8.11it/s][A
 67%|█████████████████████▍          | 81786/122310 [2:32:01<1:20:01,  8.44it/s][A
 67%|█████████████████████▍          | 81798/122310 [2:32:01<1:05:10, 10.36it/s][A
 67%|█████████████████████▍          | 81803/122310 [2:32:02<1:12:42,  9.29it/s][A
 67%|█████████████████████▍          | 81814/122310 [2:32:03<1:02:22, 10.82

step: 24780, loss: 72.7528190502349, epoch: 1



 67%|█████████████████████▍          | 81872/122310 [2:32:09<1:07:09, 10.04it/s][A
 67%|██████████████████████▊           | 81885/122310 [2:32:10<55:06, 12.23it/s][A
 67%|██████████████████████▊           | 81894/122310 [2:32:10<55:32, 12.13it/s][A
 67%|█████████████████████▍          | 81898/122310 [2:32:11<1:06:37, 10.11it/s][A
 67%|█████████████████████▍          | 81900/122310 [2:32:12<1:25:13,  7.90it/s][A
 67%|█████████████████████▍          | 81912/122310 [2:32:13<1:05:27, 10.28it/s][A
 67%|█████████████████████▍          | 81916/122310 [2:32:13<1:16:14,  8.83it/s][A
 67%|█████████████████████▍          | 81922/122310 [2:32:14<1:18:30,  8.57it/s][A
 67%|█████████████████████▍          | 81928/122310 [2:32:15<1:20:06,  8.40it/s][A
 67%|██████████████████████▊           | 81942/122310 [2:32:16<58:42, 11.46it/s][A
 67%|█████████████████████▍          | 81949/122310 [2:32:16<1:02:54, 10.69it/s][A
 67%|█████████████████████▍          | 81958/122310 [2:32:17<1:01:02, 11.02

step: 24800, loss: 70.30700154222814, epoch: 1
sim1 and sim2 are 0.5271054567969978, 0.23789791377855124
cosine of pred and queen: 0.18743413093407776
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: hags
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharas


 67%|██████████████████████▊           | 82032/122310 [2:32:38<55:50, 12.02it/s][A

Actual: armenia:dram::iran:rial, pred: biscay
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: services
Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: official
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.35555555555555557
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks


 67%|████████████████████▊          | 82038/122310 [2:33:45<33:23:09,  2.98s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.1242603550295858



 67%|████████████████████▊          | 82045/122310 [2:33:46<24:58:00,  2.23s/it][A
 67%|████████████████████▊          | 82054/122310 [2:33:47<17:09:56,  1.54s/it][A
 67%|████████████████████▊          | 82063/122310 [2:33:47<12:01:09,  1.08s/it][A
 67%|█████████████████████▍          | 82068/122310 [2:33:48<9:56:15,  1.12it/s][A
 67%|█████████████████████▍          | 82074/122310 [2:33:49<7:44:49,  1.44it/s][A
 67%|█████████████████████▍          | 82083/122310 [2:33:50<5:19:56,  2.10it/s][A
 67%|█████████████████████▍          | 82086/122310 [2:33:50<4:57:28,  2.25it/s][A
 67%|█████████████████████▍          | 82099/122310 [2:33:51<2:55:26,  3.82it/s][A
 67%|█████████████████████▍          | 82103/122310 [2:33:52<2:46:34,  4.02it/s][A
 67%|█████████████████████▍          | 82110/122310 [2:33:53<2:18:15,  4.85it/s][A
 67%|█████████████████████▍          | 82117/122310 [2:33:53<1:58:03,  5.67it/s][A
 67%|█████████████████████▍          | 82123/122310 [2:33:54<1:48:56,  6.15

step: 24820, loss: 78.23213390068126, epoch: 1



 67%|██████████████████████▊           | 82218/122310 [2:34:01<54:34, 12.24it/s][A
 67%|██████████████████████▊           | 82226/122310 [2:34:02<56:38, 11.80it/s][A
 67%|██████████████████████▊           | 82236/122310 [2:34:02<54:27, 12.26it/s][A
 67%|█████████████████████▌          | 82239/122310 [2:34:03<1:07:52,  9.84it/s][A
 67%|██████████████████████▊           | 82257/122310 [2:34:04<47:23, 14.09it/s][A
 67%|██████████████████████▊           | 82265/122310 [2:34:05<51:05, 13.06it/s][A
 67%|██████████████████████▊           | 82271/122310 [2:34:05<57:58, 11.51it/s][A
 67%|█████████████████████▌          | 82277/122310 [2:34:06<1:03:31, 10.50it/s][A
 67%|█████████████████████▌          | 82279/122310 [2:34:08<1:46:26,  6.27it/s][A
 67%|█████████████████████▌          | 82287/122310 [2:34:08<1:30:28,  7.37it/s][A
 67%|█████████████████████▌          | 82298/122310 [2:34:09<1:11:46,  9.29it/s][A
 67%|█████████████████████▌          | 82304/122310 [2:34:10<1:15:08,  8.87

step: 24840, loss: 82.64515170543436, epoch: 1



 67%|██████████████████████▉           | 82379/122310 [2:34:16<55:33, 11.98it/s][A
 67%|██████████████████████▉           | 82401/122310 [2:34:17<38:41, 17.19it/s][A
 67%|██████████████████████▉           | 82404/122310 [2:34:17<50:13, 13.24it/s][A
 67%|██████████████████████▉           | 82414/122310 [2:34:18<50:16, 13.23it/s][A
 67%|██████████████████████▉           | 82422/122310 [2:34:19<53:14, 12.48it/s][A
 67%|██████████████████████▉           | 82428/122310 [2:34:20<59:39, 11.14it/s][A
 67%|█████████████████████▌          | 82435/122310 [2:34:20<1:02:54, 10.57it/s][A
 67%|█████████████████████▌          | 82440/122310 [2:34:21<1:10:21,  9.44it/s][A
 67%|█████████████████████▌          | 82447/122310 [2:34:22<1:10:41,  9.40it/s][A
 67%|█████████████████████▌          | 82450/122310 [2:34:23<1:25:13,  7.80it/s][A
 67%|██████████████████████▉           | 82467/122310 [2:34:23<54:11, 12.26it/s][A
 67%|██████████████████████▉           | 82478/122310 [2:34:24<51:06, 12.99

step: 24860, loss: 94.2194884052611, epoch: 1



 68%|██████████████████████▉           | 82571/122310 [2:34:31<43:08, 15.35it/s][A
 68%|██████████████████████▉           | 82582/122310 [2:34:32<43:48, 15.11it/s][A
 68%|██████████████████████▉           | 82589/122310 [2:34:32<49:17, 13.43it/s][A
 68%|██████████████████████▉           | 82605/122310 [2:34:33<41:47, 15.84it/s][A
 68%|██████████████████████▉           | 82612/122310 [2:34:34<47:34, 13.91it/s][A
 68%|██████████████████████▉           | 82624/122310 [2:34:35<45:37, 14.50it/s][A
 68%|██████████████████████▉           | 82638/122310 [2:34:35<41:51, 15.80it/s][A
 68%|██████████████████████▉           | 82647/122310 [2:34:36<45:00, 14.69it/s][A
 68%|██████████████████████▉           | 82654/122310 [2:34:37<50:32, 13.08it/s][A
 68%|██████████████████████▉           | 82665/122310 [2:34:38<48:57, 13.50it/s][A
 68%|██████████████████████▉           | 82676/122310 [2:34:38<47:49, 13.81it/s][A
 68%|██████████████████████▉           | 82681/122310 [2:34:39<56:27, 11.70

step: 24880, loss: 84.48538286445851, epoch: 1



 68%|█████████████████████▋          | 82711/122310 [2:34:46<3:01:54,  3.63it/s][A
 68%|█████████████████████▋          | 82715/122310 [2:34:47<2:48:14,  3.92it/s][A
 68%|█████████████████████▋          | 82726/122310 [2:34:47<1:50:34,  5.97it/s][A
 68%|█████████████████████▋          | 82733/122310 [2:34:48<1:38:40,  6.68it/s][A
 68%|█████████████████████▋          | 82739/122310 [2:34:49<1:34:14,  7.00it/s][A
 68%|█████████████████████▋          | 82752/122310 [2:34:50<1:08:40,  9.60it/s][A
 68%|█████████████████████▋          | 82763/122310 [2:34:50<1:00:13, 10.95it/s][A
 68%|███████████████████████           | 82779/122310 [2:34:51<47:56, 13.74it/s][A
 68%|███████████████████████           | 82786/122310 [2:34:52<52:36, 12.52it/s][A
 68%|███████████████████████           | 82795/122310 [2:34:53<53:16, 12.36it/s][A
 68%|███████████████████████           | 82803/122310 [2:34:53<55:27, 11.87it/s][A
 68%|█████████████████████▋          | 82807/122310 [2:34:54<1:06:02,  9.97

step: 24900, loss: 76.706908919833, epoch: 1



 68%|███████████████████████           | 82890/122310 [2:35:01<56:01, 11.73it/s][A
 68%|█████████████████████▋          | 82892/122310 [2:35:02<1:12:16,  9.09it/s][A
 68%|███████████████████████           | 82906/122310 [2:35:02<54:55, 11.96it/s][A
 68%|███████████████████████           | 82914/122310 [2:35:03<56:46, 11.57it/s][A
 68%|█████████████████████▋          | 82921/122310 [2:35:04<1:00:25, 10.86it/s][A
 68%|█████████████████████▋          | 82928/122310 [2:35:05<1:02:46, 10.46it/s][A
 68%|███████████████████████           | 82942/122310 [2:35:05<50:48, 12.92it/s][A
 68%|███████████████████████           | 82947/122310 [2:35:06<59:25, 11.04it/s][A
 68%|█████████████████████▋          | 82954/122310 [2:35:07<1:02:16, 10.53it/s][A
 68%|█████████████████████▋          | 82962/122310 [2:35:08<1:01:57, 10.59it/s][A
 68%|█████████████████████▋          | 82969/122310 [2:35:08<1:04:21, 10.19it/s][A
 68%|█████████████████████▋          | 82971/122310 [2:35:09<1:22:38,  7.93

step: 24920, loss: 83.08274051656957, epoch: 1



 68%|█████████████████████▋          | 83049/122310 [2:35:16<1:11:18,  9.18it/s][A
 68%|█████████████████████▋          | 83052/122310 [2:35:17<1:25:46,  7.63it/s][A
 68%|█████████████████████▋          | 83062/122310 [2:35:17<1:10:05,  9.33it/s][A
 68%|█████████████████████▋          | 83071/122310 [2:35:18<1:04:38, 10.12it/s][A
 68%|█████████████████████▋          | 83074/122310 [2:35:19<1:18:49,  8.30it/s][A
 68%|█████████████████████▋          | 83077/122310 [2:35:20<1:33:34,  6.99it/s][A
 68%|█████████████████████▋          | 83081/122310 [2:35:20<1:40:45,  6.49it/s][A
 68%|█████████████████████▋          | 83095/122310 [2:35:21<1:04:35, 10.12it/s][A
 68%|███████████████████████           | 83105/122310 [2:35:22<59:03, 11.06it/s][A
 68%|███████████████████████           | 83119/122310 [2:35:23<49:07, 13.30it/s][A
 68%|███████████████████████           | 83124/122310 [2:35:23<57:56, 11.27it/s][A
 68%|███████████████████████           | 83133/122310 [2:35:24<57:15, 11.40

step: 24940, loss: 77.3643212344922, epoch: 1



 68%|█████████████████████▊          | 83187/122310 [2:35:31<1:13:42,  8.85it/s][A
 68%|███████████████████████▏          | 83199/122310 [2:35:32<59:11, 11.01it/s][A
 68%|█████████████████████▊          | 83204/122310 [2:35:33<1:06:39,  9.78it/s][A
 68%|███████████████████████▏          | 83214/122310 [2:35:33<59:44, 10.91it/s][A
 68%|███████████████████████▏          | 83226/122310 [2:35:34<52:09, 12.49it/s][A
 68%|███████████████████████▏          | 83234/122310 [2:35:35<54:22, 11.98it/s][A
 68%|███████████████████████▏          | 83248/122310 [2:35:36<46:23, 14.03it/s][A
 68%|███████████████████████▏          | 83253/122310 [2:35:36<54:38, 11.91it/s][A
 68%|███████████████████████▏          | 83260/122310 [2:35:37<58:10, 11.19it/s][A
 68%|███████████████████████▏          | 83276/122310 [2:35:38<45:21, 14.34it/s][A
 68%|█████████████████████▊          | 83278/122310 [2:35:39<1:00:34, 10.74it/s][A
 68%|█████████████████████▊          | 83282/122310 [2:35:39<1:10:47,  9.19

step: 24960, loss: 81.64897276421445, epoch: 1



 68%|███████████████████████▏          | 83369/122310 [2:35:46<57:53, 11.21it/s][A
 68%|███████████████████████▏          | 83380/122310 [2:35:47<52:34, 12.34it/s][A
 68%|█████████████████████▊          | 83384/122310 [2:35:47<1:03:02, 10.29it/s][A
 68%|█████████████████████▊          | 83389/122310 [2:35:48<1:10:17,  9.23it/s][A
 68%|█████████████████████▊          | 83392/122310 [2:35:49<1:24:22,  7.69it/s][A
 68%|█████████████████████▊          | 83403/122310 [2:35:50<1:05:48,  9.85it/s][A
 68%|█████████████████████▊          | 83406/122310 [2:35:50<1:20:17,  8.08it/s][A
 68%|█████████████████████▊          | 83410/122310 [2:35:51<1:29:00,  7.28it/s][A
 68%|█████████████████████▊          | 83418/122310 [2:35:52<1:17:43,  8.34it/s][A
 68%|█████████████████████▊          | 83427/122310 [2:35:53<1:08:12,  9.50it/s][A
 68%|█████████████████████▊          | 83433/122310 [2:35:53<1:11:16,  9.09it/s][A
 68%|█████████████████████▊          | 83439/122310 [2:35:54<1:13:40,  8.79

step: 24980, loss: 85.76835987261329, epoch: 1



 68%|███████████████████████▏          | 83524/122310 [2:36:01<59:00, 10.96it/s][A
 68%|█████████████████████▊          | 83527/122310 [2:36:01<1:12:30,  8.92it/s][A
 68%|█████████████████████▊          | 83535/122310 [2:36:02<1:08:04,  9.49it/s][A
 68%|█████████████████████▊          | 83540/122310 [2:36:03<1:14:49,  8.64it/s][A
 68%|█████████████████████▊          | 83549/122310 [2:36:04<1:07:10,  9.62it/s][A
 68%|█████████████████████▊          | 83556/122310 [2:36:04<1:07:42,  9.54it/s][A
 68%|█████████████████████▊          | 83563/122310 [2:36:05<1:07:54,  9.51it/s][A
 68%|█████████████████████▊          | 83567/122310 [2:36:06<1:17:53,  8.29it/s][A
 68%|█████████████████████▊          | 83577/122310 [2:36:07<1:05:34,  9.85it/s][A
 68%|███████████████████████▏          | 83590/122310 [2:36:07<53:04, 12.16it/s][A
 68%|███████████████████████▏          | 83601/122310 [2:36:08<49:46, 12.96it/s][A
 68%|███████████████████████▏          | 83617/122310 [2:36:09<41:30, 15.53

step: 25000, loss: 82.10137810834796, epoch: 1
saving weights



 68%|█████████████████████▉          | 83674/122310 [2:36:16<1:33:32,  6.88it/s][A
 68%|█████████████████████▉          | 83687/122310 [2:36:16<1:08:06,  9.45it/s][A
 68%|███████████████████████▎          | 83702/122310 [2:36:17<52:49, 12.18it/s][A
 68%|█████████████████████▉          | 83704/122310 [2:36:18<1:06:46,  9.64it/s][A
 68%|███████████████████████▎          | 83717/122310 [2:36:19<54:17, 11.85it/s][A
 68%|███████████████████████▎          | 83724/122310 [2:36:19<57:35, 11.17it/s][A
 68%|█████████████████████▉          | 83729/122310 [2:36:20<1:05:09,  9.87it/s][A
 68%|█████████████████████▉          | 83738/122310 [2:36:21<1:00:51, 10.56it/s][A
 68%|███████████████████████▎          | 83754/122310 [2:36:22<46:29, 13.82it/s][A
 68%|███████████████████████▎          | 83762/122310 [2:36:22<50:11, 12.80it/s][A
 69%|███████████████████████▎          | 83784/122310 [2:36:23<36:06, 17.79it/s][A
 69%|███████████████████████▎          | 83787/122310 [2:36:24<46:45, 13.73

step: 25020, loss: 73.59003356555618, epoch: 1



 69%|███████████████████████▎          | 83869/122310 [2:36:31<54:40, 11.72it/s][A
 69%|███████████████████████▎          | 83884/122310 [2:36:32<44:46, 14.30it/s][A
 69%|███████████████████████▎          | 83899/122310 [2:36:33<39:45, 16.10it/s][A
 69%|███████████████████████▎          | 83903/122310 [2:36:33<49:34, 12.91it/s][A
 69%|███████████████████████▎          | 83912/122310 [2:36:34<50:23, 12.70it/s][A
 69%|███████████████████████▎          | 83924/122310 [2:36:35<46:41, 13.70it/s][A
 69%|███████████████████████▎          | 83928/122310 [2:36:36<56:45, 11.27it/s][A
 69%|███████████████████████▎          | 83937/122310 [2:36:36<55:23, 11.55it/s][A
 69%|███████████████████████▎          | 83944/122310 [2:36:37<58:27, 10.94it/s][A
 69%|███████████████████████▎          | 83959/122310 [2:36:38<46:28, 13.75it/s][A
 69%|███████████████████████▎          | 83967/122310 [2:36:39<49:46, 12.84it/s][A
 69%|███████████████████████▎          | 83972/122310 [2:36:39<57:50, 11.05

step: 25040, loss: 82.23484332159912, epoch: 1



 69%|█████████████████████▉          | 84047/122310 [2:36:46<1:00:30, 10.54it/s][A
 69%|███████████████████████▎          | 84060/122310 [2:36:47<50:19, 12.67it/s][A
 69%|███████████████████████▎          | 84069/122310 [2:36:48<50:50, 12.54it/s][A
 69%|███████████████████████▎          | 84079/122310 [2:36:48<49:40, 12.83it/s][A
 69%|███████████████████████▍          | 84090/122310 [2:36:49<47:27, 13.42it/s][A
 69%|███████████████████████▍          | 84095/122310 [2:36:50<55:46, 11.42it/s][A
 69%|██████████████████████          | 84101/122310 [2:36:50<1:00:55, 10.45it/s][A
 69%|██████████████████████          | 84105/122310 [2:36:51<1:11:36,  8.89it/s][A
 69%|██████████████████████          | 84108/122310 [2:36:52<1:25:36,  7.44it/s][A
 69%|██████████████████████          | 84111/122310 [2:36:53<1:38:42,  6.45it/s][A
 69%|███████████████████████▍          | 84128/122310 [2:36:53<55:44, 11.42it/s][A
 69%|██████████████████████          | 84131/122310 [2:36:54<1:09:06,  9.21

step: 25060, loss: 84.05483076512603, epoch: 1



 69%|███████████████████████▍          | 84215/122310 [2:37:01<43:45, 14.51it/s][A
 69%|███████████████████████▍          | 84222/122310 [2:37:02<48:52, 12.99it/s][A
 69%|███████████████████████▍          | 84228/122310 [2:37:02<54:58, 11.54it/s][A
 69%|███████████████████████▍          | 84243/122310 [2:37:03<44:50, 14.15it/s][A
 69%|███████████████████████▍          | 84250/122310 [2:37:04<49:54, 12.71it/s][A
 69%|███████████████████████▍          | 84258/122310 [2:37:05<52:11, 12.15it/s][A
 69%|███████████████████████▍          | 84272/122310 [2:37:05<44:39, 14.20it/s][A
 69%|███████████████████████▍          | 84282/122310 [2:37:06<45:23, 13.96it/s][A
 69%|███████████████████████▍          | 84291/122310 [2:37:07<47:07, 13.45it/s][A
 69%|███████████████████████▍          | 84304/122310 [2:37:08<43:09, 14.68it/s][A
 69%|███████████████████████▍          | 84311/122310 [2:37:08<48:18, 13.11it/s][A
 69%|███████████████████████▍          | 84320/122310 [2:37:09<49:19, 12.84

step: 25080, loss: 87.32951863785398, epoch: 1



 69%|███████████████████████▍          | 84410/122310 [2:37:16<41:00, 15.40it/s][A
 69%|███████████████████████▍          | 84417/122310 [2:37:16<46:15, 13.65it/s][A
 69%|███████████████████████▍          | 84427/122310 [2:37:17<46:32, 13.57it/s][A
 69%|███████████████████████▍          | 84431/122310 [2:37:18<56:31, 11.17it/s][A
 69%|██████████████████████          | 84437/122310 [2:37:19<1:01:42, 10.23it/s][A
 69%|██████████████████████          | 84444/122310 [2:37:19<1:02:56, 10.03it/s][A
 69%|███████████████████████▍          | 84456/122310 [2:37:20<53:15, 11.85it/s][A
 69%|██████████████████████          | 84461/122310 [2:37:21<1:01:01, 10.34it/s][A
 69%|██████████████████████          | 84467/122310 [2:37:22<1:05:28,  9.63it/s][A
 69%|██████████████████████          | 84475/122310 [2:37:22<1:03:13,  9.97it/s][A
 69%|██████████████████████          | 84479/122310 [2:37:23<1:13:29,  8.58it/s][A
 69%|██████████████████████          | 84488/122310 [2:37:24<1:05:22,  9.64

step: 25100, loss: 81.66548495374704, epoch: 1



 69%|███████████████████████▌          | 84577/122310 [2:37:31<45:22, 13.86it/s][A
 69%|███████████████████████▌          | 84597/122310 [2:37:31<35:22, 17.77it/s][A
 69%|███████████████████████▌          | 84606/122310 [2:37:32<38:58, 16.13it/s][A
 69%|███████████████████████▌          | 84618/122310 [2:37:33<38:57, 16.12it/s][A
 69%|███████████████████████▌          | 84622/122310 [2:37:34<48:46, 12.88it/s][A
 69%|███████████████████████▌          | 84629/122310 [2:37:34<53:17, 11.78it/s][A
 69%|███████████████████████▌          | 84640/122310 [2:37:35<49:33, 12.67it/s][A
 69%|███████████████████████▌          | 84649/122310 [2:37:36<50:09, 12.52it/s][A
 69%|██████████████████████▏         | 84652/122310 [2:37:37<1:03:04,  9.95it/s][A
 69%|██████████████████████▏         | 84658/122310 [2:37:37<1:06:38,  9.42it/s][A
 69%|███████████████████████▌          | 84668/122310 [2:37:38<59:01, 10.63it/s][A
 69%|███████████████████████▌          | 84679/122310 [2:37:39<52:51, 11.86

step: 25120, loss: 87.59305240117327, epoch: 1



 69%|███████████████████████▌          | 84757/122310 [2:37:45<57:30, 10.88it/s][A
 69%|██████████████████████▏         | 84762/122310 [2:37:46<1:04:29,  9.70it/s][A
 69%|███████████████████████▌          | 84773/122310 [2:37:47<56:02, 11.16it/s][A
 69%|███████████████████████▌          | 84792/122310 [2:37:48<40:21, 15.49it/s][A
 69%|███████████████████████▌          | 84796/122310 [2:37:48<50:06, 12.48it/s][A
 69%|███████████████████████▌          | 84806/122310 [2:37:49<48:56, 12.77it/s][A
 69%|██████████████████████▏         | 84808/122310 [2:37:50<1:04:08,  9.74it/s][A
 69%|███████████████████████▌          | 84821/122310 [2:37:51<51:59, 12.02it/s][A
 69%|██████████████████████▏         | 84823/122310 [2:37:51<1:07:21,  9.28it/s][A
 69%|██████████████████████▏         | 84832/122310 [2:37:52<1:01:43, 10.12it/s][A
 69%|███████████████████████▌          | 84841/122310 [2:37:53<58:35, 10.66it/s][A
 69%|██████████████████████▏         | 84848/122310 [2:37:54<1:00:48, 10.27

step: 25140, loss: 75.1235545353411, epoch: 1



 69%|██████████████████████▏         | 84920/122310 [2:38:00<1:00:41, 10.27it/s][A
 69%|██████████████████████▏         | 84926/122310 [2:38:01<1:05:15,  9.55it/s][A
 69%|██████████████████████▏         | 84930/122310 [2:38:02<1:14:18,  8.38it/s][A
 69%|██████████████████████▏         | 84937/122310 [2:38:03<1:11:27,  8.72it/s][A
 69%|██████████████████████▏         | 84942/122310 [2:38:03<1:16:54,  8.10it/s][A
 69%|██████████████████████▏         | 84949/122310 [2:38:04<1:12:54,  8.54it/s][A
 69%|██████████████████████▏         | 84957/122310 [2:38:05<1:07:30,  9.22it/s][A
 69%|███████████████████████▌          | 84967/122310 [2:38:06<59:13, 10.51it/s][A
 69%|███████████████████████▌          | 84977/122310 [2:38:06<54:31, 11.41it/s][A
 69%|███████████████████████▌          | 84987/122310 [2:38:07<51:38, 12.05it/s][A
 69%|███████████████████████▋          | 84999/122310 [2:38:08<46:59, 13.23it/s][A
 70%|███████████████████████▋          | 85006/122310 [2:38:09<51:45, 12.01

step: 25160, loss: 99.54255253734078, epoch: 1



 70%|███████████████████████▋          | 85082/122310 [2:38:15<47:11, 13.15it/s][A
 70%|███████████████████████▋          | 85087/122310 [2:38:16<55:07, 11.26it/s][A
 70%|██████████████████████▎         | 85093/122310 [2:38:17<1:00:14, 10.30it/s][A
 70%|███████████████████████▋          | 85109/122310 [2:38:17<45:09, 13.73it/s][A
 70%|███████████████████████▋          | 85126/122310 [2:38:18<37:34, 16.49it/s][A
 70%|███████████████████████▋          | 85129/122310 [2:38:19<48:46, 12.71it/s][A
 70%|███████████████████████▋          | 85135/122310 [2:38:20<54:51, 11.29it/s][A
 70%|███████████████████████▋          | 85146/122310 [2:38:20<50:11, 12.34it/s][A
 70%|███████████████████████▋          | 85156/122310 [2:38:21<48:52, 12.67it/s][A
 70%|███████████████████████▋          | 85161/122310 [2:38:22<56:53, 10.88it/s][A
 70%|███████████████████████▋          | 85173/122310 [2:38:23<49:54, 12.40it/s][A
 70%|███████████████████████▋          | 85178/122310 [2:38:23<57:48, 10.71

step: 25180, loss: 84.14170506237322, epoch: 1



 70%|███████████████████████▋          | 85263/122310 [2:38:30<44:44, 13.80it/s][A
 70%|███████████████████████▋          | 85268/122310 [2:38:31<52:46, 11.70it/s][A
 70%|██████████████████████▎         | 85273/122310 [2:38:32<1:00:35, 10.19it/s][A
 70%|██████████████████████▎         | 85277/122310 [2:38:32<1:10:36,  8.74it/s][A
 70%|███████████████████████▋          | 85289/122310 [2:38:33<56:22, 10.94it/s][A
 70%|██████████████████████▎         | 85294/122310 [2:38:34<1:03:51,  9.66it/s][A
 70%|██████████████████████▎         | 85298/122310 [2:38:35<1:13:39,  8.38it/s][A
 70%|██████████████████████▎         | 85305/122310 [2:38:35<1:10:58,  8.69it/s][A
 70%|██████████████████████▎         | 85310/122310 [2:38:36<1:16:21,  8.08it/s][A
 70%|██████████████████████▎         | 85314/122310 [2:38:37<1:24:53,  7.26it/s][A
 70%|██████████████████████▎         | 85320/122310 [2:38:38<1:22:13,  7.50it/s][A
 70%|██████████████████████▎         | 85327/122310 [2:38:38<1:16:38,  8.04

step: 25200, loss: 83.10004788892314, epoch: 1
sim1 and sim2 are 0.5133769429423951, 0.22747242430219292
cosine of pred and queen: 0.22493756561169306
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: maladie
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahar

Actual: cairo:egypt::manila:philippines, pred: internationalist
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: child
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: ipa
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 70%|█████████████████████▋         | 85401/122310 [2:40:00<29:25:41,  2.87s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.1301775147928994



 70%|█████████████████████▋         | 85413/122310 [2:40:01<18:18:26,  1.79s/it][A
 70%|█████████████████████▋         | 85427/122310 [2:40:02<11:20:50,  1.11s/it][A
 70%|██████████████████████▎         | 85442/122310 [2:40:02<7:16:50,  1.41it/s][A
 70%|██████████████████████▎         | 85446/122310 [2:40:03<6:36:13,  1.55it/s][A
 70%|██████████████████████▎         | 85452/122310 [2:40:04<5:28:02,  1.87it/s][A
 70%|██████████████████████▎         | 85460/122310 [2:40:04<4:09:59,  2.46it/s][A
 70%|██████████████████████▎         | 85469/122310 [2:40:05<3:06:36,  3.29it/s][A
 70%|██████████████████████▎         | 85477/122310 [2:40:06<2:29:01,  4.12it/s][A
 70%|██████████████████████▎         | 85490/122310 [2:40:07<1:43:12,  5.95it/s][A
 70%|██████████████████████▎         | 85505/122310 [2:40:07<1:14:00,  8.29it/s][A
 70%|██████████████████████▎         | 85510/122310 [2:40:08<1:16:38,  8.00it/s][A
 70%|██████████████████████▎         | 85513/122310 [2:40:09<1:25:03,  7.21

step: 25220, loss: 71.87507503238444, epoch: 1



 70%|███████████████████████▊          | 85605/122310 [2:40:15<40:19, 15.17it/s][A
 70%|███████████████████████▊          | 85611/122310 [2:40:16<46:32, 13.14it/s][A
 70%|███████████████████████▊          | 85620/122310 [2:40:17<47:26, 12.89it/s][A
 70%|███████████████████████▊          | 85634/122310 [2:40:18<41:28, 14.74it/s][A
 70%|███████████████████████▊          | 85639/122310 [2:40:18<49:20, 12.39it/s][A
 70%|███████████████████████▊          | 85651/122310 [2:40:19<45:08, 13.54it/s][A
 70%|███████████████████████▊          | 85661/122310 [2:40:20<44:58, 13.58it/s][A
 70%|███████████████████████▊          | 85664/122310 [2:40:21<56:46, 10.76it/s][A
 70%|██████████████████████▍         | 85669/122310 [2:40:21<1:03:48,  9.57it/s][A
 70%|███████████████████████▊          | 85681/122310 [2:40:22<52:33, 11.62it/s][A
 70%|███████████████████████▊          | 85687/122310 [2:40:23<57:34, 10.60it/s][A
 70%|███████████████████████▊          | 85694/122310 [2:40:23<59:13, 10.30

step: 25240, loss: 76.99469691306953, epoch: 1



 70%|███████████████████████▊          | 85776/122310 [2:40:30<52:21, 11.63it/s][A
 70%|███████████████████████▊          | 85786/122310 [2:40:31<49:46, 12.23it/s][A
 70%|███████████████████████▊          | 85793/122310 [2:40:32<53:23, 11.40it/s][A
 70%|███████████████████████▊          | 85799/122310 [2:40:32<58:23, 10.42it/s][A
 70%|██████████████████████▍         | 85805/122310 [2:40:33<1:02:14,  9.78it/s][A
 70%|██████████████████████▍         | 85813/122310 [2:40:34<1:00:15, 10.09it/s][A
 70%|██████████████████████▍         | 85819/122310 [2:40:34<1:03:46,  9.54it/s][A
 70%|███████████████████████▊          | 85831/122310 [2:40:35<52:32, 11.57it/s][A
 70%|███████████████████████▊          | 85841/122310 [2:40:36<49:47, 12.21it/s][A
 70%|███████████████████████▊          | 85848/122310 [2:40:37<53:11, 11.43it/s][A
 70%|███████████████████████▊          | 85860/122310 [2:40:37<47:04, 12.90it/s][A
 70%|███████████████████████▊          | 85870/122310 [2:40:38<46:19, 13.11

step: 25260, loss: 102.1652236102146, epoch: 1



 70%|██████████████████████▍         | 85940/122310 [2:40:45<1:02:32,  9.69it/s][A
 70%|██████████████████████▍         | 85941/122310 [2:40:45<1:24:07,  7.21it/s][A
 70%|██████████████████████▍         | 85951/122310 [2:40:46<1:06:26,  9.12it/s][A
 70%|███████████████████████▉          | 85962/122310 [2:40:47<55:33, 10.90it/s][A
 70%|███████████████████████▉          | 85971/122310 [2:40:48<53:38, 11.29it/s][A
 70%|███████████████████████▉          | 85980/122310 [2:40:48<52:15, 11.59it/s][A
 70%|███████████████████████▉          | 85990/122310 [2:40:49<49:35, 12.21it/s][A
 70%|███████████████████████▉          | 86003/122310 [2:40:50<43:40, 13.86it/s][A
 70%|███████████████████████▉          | 86009/122310 [2:40:51<49:55, 12.12it/s][A
 70%|███████████████████████▉          | 86015/122310 [2:40:51<55:24, 10.92it/s][A
 70%|███████████████████████▉          | 86021/122310 [2:40:52<59:48, 10.11it/s][A
 70%|██████████████████████▌         | 86026/122310 [2:40:53<1:06:20,  9.12

step: 25280, loss: 65.19163070527676, epoch: 1



 70%|███████████████████████▉          | 86119/122310 [2:40:59<56:38, 10.65it/s][A
 70%|██████████████████████▌         | 86124/122310 [2:41:00<1:03:29,  9.50it/s][A
 70%|███████████████████████▉          | 86135/122310 [2:41:01<54:07, 11.14it/s][A
 70%|███████████████████████▉          | 86146/122310 [2:41:02<49:14, 12.24it/s][A
 70%|███████████████████████▉          | 86169/122310 [2:41:02<33:30, 17.98it/s][A
 70%|███████████████████████▉          | 86187/122310 [2:41:03<30:12, 19.93it/s][A
 70%|███████████████████████▉          | 86191/122310 [2:41:04<38:37, 15.58it/s][A
 70%|███████████████████████▉          | 86200/122310 [2:41:05<41:12, 14.60it/s][A
 70%|███████████████████████▉          | 86213/122310 [2:41:05<38:46, 15.51it/s][A
 70%|███████████████████████▉          | 86220/122310 [2:41:06<43:53, 13.71it/s][A
 71%|███████████████████████▉          | 86229/122310 [2:41:07<45:16, 13.28it/s][A
 71%|███████████████████████▉          | 86237/122310 [2:41:08<48:35, 12.37

step: 25300, loss: 78.51956629021679, epoch: 1



 71%|██████████████████████▌         | 86271/122310 [2:41:14<2:23:56,  4.17it/s][A
 71%|██████████████████████▌         | 86274/122310 [2:41:15<2:24:46,  4.15it/s][A
 71%|██████████████████████▌         | 86282/122310 [2:41:16<1:47:38,  5.58it/s][A
 71%|██████████████████████▌         | 86287/122310 [2:41:17<1:42:55,  5.83it/s][A
 71%|██████████████████████▌         | 86299/122310 [2:41:17<1:11:13,  8.43it/s][A
 71%|██████████████████████▌         | 86307/122310 [2:41:18<1:06:28,  9.03it/s][A
 71%|██████████████████████▌         | 86312/122310 [2:41:19<1:11:38,  8.38it/s][A
 71%|██████████████████████▌         | 86320/122310 [2:41:20<1:06:29,  9.02it/s][A
 71%|███████████████████████▉          | 86335/122310 [2:41:20<49:26, 12.13it/s][A
 71%|████████████████████████          | 86343/122310 [2:41:21<51:24, 11.66it/s][A
 71%|██████████████████████▌         | 86345/122310 [2:41:22<1:07:10,  8.92it/s][A
 71%|██████████████████████▌         | 86353/122310 [2:41:23<1:03:40,  9.41

step: 25320, loss: 80.22073359546481, epoch: 1



 71%|████████████████████████          | 86429/122310 [2:41:29<52:44, 11.34it/s][A
 71%|████████████████████████          | 86446/122310 [2:41:30<40:19, 14.82it/s][A
 71%|████████████████████████          | 86450/122310 [2:41:31<49:32, 12.06it/s][A
 71%|████████████████████████          | 86461/122310 [2:41:31<46:11, 12.93it/s][A
 71%|████████████████████████          | 86475/122310 [2:41:32<40:28, 14.76it/s][A
 71%|████████████████████████          | 86486/122310 [2:41:33<40:15, 14.83it/s][A
 71%|████████████████████████          | 86497/122310 [2:41:34<40:10, 14.86it/s][A
 71%|████████████████████████          | 86504/122310 [2:41:34<44:59, 13.26it/s][A
 71%|████████████████████████          | 86507/122310 [2:41:35<56:47, 10.51it/s][A
 71%|████████████████████████          | 86515/122310 [2:41:36<56:08, 10.63it/s][A
 71%|████████████████████████          | 86522/122310 [2:41:37<57:56, 10.30it/s][A
 71%|████████████████████████          | 86533/122310 [2:41:37<51:14, 11.64

step: 25340, loss: 72.5888501489822, epoch: 1



 71%|████████████████████████          | 86614/122310 [2:41:44<45:00, 13.22it/s][A
 71%|████████████████████████          | 86620/122310 [2:41:45<50:48, 11.71it/s][A
 71%|████████████████████████          | 86629/122310 [2:41:45<50:11, 11.85it/s][A
 71%|████████████████████████          | 86639/122310 [2:41:46<48:06, 12.36it/s][A
 71%|████████████████████████          | 86648/122310 [2:41:47<48:10, 12.34it/s][A
 71%|████████████████████████          | 86657/122310 [2:41:48<48:16, 12.31it/s][A
 71%|████████████████████████          | 86668/122310 [2:41:48<45:31, 13.05it/s][A
 71%|████████████████████████          | 86672/122310 [2:41:49<54:55, 10.81it/s][A
 71%|████████████████████████          | 86678/122310 [2:41:50<59:18, 10.01it/s][A
 71%|██████████████████████▋         | 86685/122310 [2:41:51<1:00:07,  9.88it/s][A
 71%|██████████████████████▋         | 86692/122310 [2:41:51<1:00:57,  9.74it/s][A
 71%|████████████████████████          | 86702/122310 [2:41:52<54:29, 10.89

step: 25360, loss: 71.40520430617616, epoch: 1



 71%|████████████████████████▏         | 86797/122310 [2:41:59<37:22, 15.83it/s][A
 71%|████████████████████████▏         | 86803/122310 [2:41:59<43:36, 13.57it/s][A
 71%|████████████████████████▏         | 86817/122310 [2:42:00<38:55, 15.20it/s][A
 71%|████████████████████████▏         | 86834/122310 [2:42:01<33:44, 17.52it/s][A
 71%|████████████████████████▏         | 86840/122310 [2:42:02<40:04, 14.75it/s][A
 71%|████████████████████████▏         | 86848/122310 [2:42:02<43:41, 13.53it/s][A
 71%|████████████████████████▏         | 86862/122310 [2:42:03<38:57, 15.16it/s][A
 71%|████████████████████████▏         | 86878/122310 [2:42:04<34:31, 17.10it/s][A
 71%|████████████████████████▏         | 86888/122310 [2:42:05<36:48, 16.04it/s][A
 71%|████████████████████████▏         | 86896/122310 [2:42:05<40:50, 14.45it/s][A
 71%|████████████████████████▏         | 86906/122310 [2:42:06<41:29, 14.22it/s][A
 71%|████████████████████████▏         | 86910/122310 [2:42:07<50:58, 11.57

step: 25380, loss: 73.004869160239, epoch: 1



 71%|████████████████████████▏         | 86996/122310 [2:42:13<42:46, 13.76it/s][A
 71%|████████████████████████▏         | 87005/122310 [2:42:14<44:12, 13.31it/s][A
 71%|████████████████████████▏         | 87010/122310 [2:42:15<51:53, 11.34it/s][A
 71%|████████████████████████▏         | 87021/122310 [2:42:16<47:23, 12.41it/s][A
 71%|████████████████████████▏         | 87036/122310 [2:42:16<39:42, 14.81it/s][A
 71%|████████████████████████▏         | 87042/122310 [2:42:17<45:48, 12.83it/s][A
 71%|████████████████████████▏         | 87050/122310 [2:42:18<47:57, 12.25it/s][A
 71%|████████████████████████▏         | 87058/122310 [2:42:19<49:42, 11.82it/s][A
 71%|████████████████████████▏         | 87074/122310 [2:42:19<39:40, 14.80it/s][A
 71%|████████████████████████▏         | 87081/122310 [2:42:20<44:34, 13.17it/s][A
 71%|████████████████████████▏         | 87088/122310 [2:42:21<48:36, 12.08it/s][A
 71%|████████████████████████▏         | 87095/122310 [2:42:22<51:56, 11.30

step: 25400, loss: 88.05592439484982, epoch: 1



 71%|████████████████████████▏         | 87172/122310 [2:42:28<46:24, 12.62it/s][A
 71%|████████████████████████▏         | 87176/122310 [2:42:29<55:55, 10.47it/s][A
 71%|██████████████████████▊         | 87181/122310 [2:42:30<1:02:38,  9.35it/s][A
 71%|████████████████████████▏         | 87189/122310 [2:42:30<59:48,  9.79it/s][A
 71%|██████████████████████▊         | 87196/122310 [2:42:31<1:00:22,  9.69it/s][A
 71%|██████████████████████▊         | 87199/122310 [2:42:32<1:12:59,  8.02it/s][A
 71%|██████████████████████▊         | 87205/122310 [2:42:33<1:34:12,  6.21it/s][A
 71%|██████████████████████▊         | 87212/122310 [2:42:34<1:23:28,  7.01it/s][A
 71%|██████████████████████▊         | 87220/122310 [2:42:35<1:13:22,  7.97it/s][A
 71%|██████████████████████▊         | 87226/122310 [2:42:36<1:13:09,  7.99it/s][A
 71%|██████████████████████▊         | 87235/122310 [2:42:36<1:04:15,  9.10it/s][A
 71%|████████████████████████▎         | 87246/122310 [2:42:37<54:31, 10.72

step: 25420, loss: 91.32433251132935, epoch: 1



 71%|████████████████████████▎         | 87311/122310 [2:42:43<57:30, 10.14it/s][A
 71%|██████████████████████▊         | 87314/122310 [2:42:44<1:10:12,  8.31it/s][A
 71%|██████████████████████▊         | 87320/122310 [2:42:45<1:10:56,  8.22it/s][A
 71%|████████████████████████▎         | 87331/122310 [2:42:45<57:25, 10.15it/s][A
 71%|██████████████████████▊         | 87335/122310 [2:42:46<1:06:45,  8.73it/s][A
 71%|████████████████████████▎         | 87345/122310 [2:42:47<57:35, 10.12it/s][A
 71%|██████████████████████▊         | 87348/122310 [2:42:48<1:10:18,  8.29it/s][A
 71%|██████████████████████▊         | 87355/122310 [2:42:48<1:07:37,  8.62it/s][A
 71%|██████████████████████▊         | 87364/122310 [2:42:49<1:00:28,  9.63it/s][A
 71%|████████████████████████▎         | 87374/122310 [2:42:50<54:09, 10.75it/s][A
 71%|████████████████████████▎         | 87388/122310 [2:42:51<44:34, 13.06it/s][A
 71%|████████████████████████▎         | 87398/122310 [2:42:51<44:38, 13.03

step: 25440, loss: 72.70606812461075, epoch: 1



 72%|████████████████████████▎         | 87488/122310 [2:42:58<44:57, 12.91it/s][A
 72%|████████████████████████▎         | 87500/122310 [2:42:59<41:55, 13.84it/s][A
 72%|████████████████████████▎         | 87513/122310 [2:43:00<39:03, 14.85it/s][A
 72%|████████████████████████▎         | 87517/122310 [2:43:00<48:12, 12.03it/s][A
 72%|████████████████████████▎         | 87523/122310 [2:43:01<53:29, 10.84it/s][A
 72%|████████████████████████▎         | 87529/122310 [2:43:02<58:00,  9.99it/s][A
 72%|████████████████████████▎         | 87538/122310 [2:43:03<54:38, 10.61it/s][A
 72%|████████████████████████▎         | 87552/122310 [2:43:03<44:40, 12.97it/s][A
 72%|████████████████████████▎         | 87562/122310 [2:43:04<44:28, 13.02it/s][A
 72%|████████████████████████▎         | 87571/122310 [2:43:05<45:24, 12.75it/s][A
 72%|████████████████████████▎         | 87582/122310 [2:43:06<43:26, 13.32it/s][A
 72%|████████████████████████▎         | 87595/122310 [2:43:06<40:00, 14.46

step: 25460, loss: 81.38034651839786, epoch: 1



 72%|████████████████████████▎         | 87673/122310 [2:43:13<43:43, 13.20it/s][A
 72%|████████████████████████▎         | 87680/122310 [2:43:14<47:16, 12.21it/s][A
 72%|████████████████████████▍         | 87691/122310 [2:43:14<43:56, 13.13it/s][A
 72%|████████████████████████▍         | 87704/122310 [2:43:15<39:24, 14.63it/s][A
 72%|██████████████████████▉         | 87709/122310 [2:43:17<1:00:32,  9.53it/s][A
 72%|██████████████████████▉         | 87713/122310 [2:43:18<1:24:17,  6.84it/s][A
 72%|██████████████████████▉         | 87714/122310 [2:43:19<1:41:46,  5.67it/s][A
 72%|██████████████████████▉         | 87723/122310 [2:43:19<1:18:42,  7.32it/s][A
 72%|██████████████████████▉         | 87729/122310 [2:43:20<1:15:53,  7.59it/s][A
 72%|██████████████████████▉         | 87735/122310 [2:43:21<1:13:57,  7.79it/s][A
 72%|████████████████████████▍         | 87747/122310 [2:43:22<56:12, 10.25it/s][A
 72%|████████████████████████▍         | 87758/122310 [2:43:22<49:13, 11.70

step: 25480, loss: 90.58483411839715, epoch: 1



 72%|██████████████████████▉         | 87814/122310 [2:43:28<1:08:46,  8.36it/s][A
 72%|██████████████████████▉         | 87821/122310 [2:43:29<1:05:53,  8.72it/s][A
 72%|██████████████████████▉         | 87828/122310 [2:43:29<1:03:56,  8.99it/s][A
 72%|██████████████████████▉         | 87836/122310 [2:43:30<1:00:09,  9.55it/s][A
 72%|████████████████████████▍         | 87851/122310 [2:43:31<45:19, 12.67it/s][A
 72%|████████████████████████▍         | 87853/122310 [2:43:32<58:14,  9.86it/s][A
 72%|████████████████████████▍         | 87863/122310 [2:43:32<51:53, 11.06it/s][A
 72%|████████████████████████▍         | 87868/122310 [2:43:33<58:11,  9.87it/s][A
 72%|████████████████████████▍         | 87876/122310 [2:43:34<55:53, 10.27it/s][A
 72%|██████████████████████▉         | 87881/122310 [2:43:34<1:01:56,  9.26it/s][A
 72%|████████████████████████▍         | 87892/122310 [2:43:35<52:13, 10.98it/s][A
 72%|████████████████████████▍         | 87904/122310 [2:43:36<45:19, 12.65

step: 25500, loss: 79.91189227875104, epoch: 1



 72%|████████████████████████▍         | 87951/122310 [2:43:42<55:20, 10.35it/s][A
 72%|███████████████████████         | 87955/122310 [2:43:42<1:03:56,  8.96it/s][A
 72%|████████████████████████▍         | 87964/122310 [2:43:43<56:57, 10.05it/s][A
 72%|███████████████████████         | 87967/122310 [2:43:44<1:09:11,  8.27it/s][A
 72%|███████████████████████         | 87975/122310 [2:43:45<1:02:43,  9.12it/s][A
 72%|████████████████████████▍         | 87986/122310 [2:43:45<52:03, 10.99it/s][A
 72%|████████████████████████▍         | 88000/122310 [2:43:46<42:11, 13.55it/s][A
 72%|████████████████████████▍         | 88002/122310 [2:43:47<55:18, 10.34it/s][A
 72%|████████████████████████▍         | 88011/122310 [2:43:47<51:53, 11.02it/s][A
 72%|████████████████████████▍         | 88019/122310 [2:43:48<51:58, 11.00it/s][A
 72%|████████████████████████▍         | 88024/122310 [2:43:49<58:45,  9.73it/s][A
 72%|████████████████████████▍         | 88033/122310 [2:43:50<53:59, 10.58

step: 25520, loss: 77.56339034853536, epoch: 1



 72%|███████████████████████         | 88082/122310 [2:43:56<1:10:21,  8.11it/s][A
 72%|███████████████████████         | 88091/122310 [2:43:57<1:00:11,  9.48it/s][A
 72%|████████████████████████▍         | 88102/122310 [2:43:57<50:44, 11.24it/s][A
 72%|████████████████████████▍         | 88109/122310 [2:43:58<52:42, 10.81it/s][A
 72%|███████████████████████         | 88112/122310 [2:43:59<1:04:53,  8.78it/s][A
 72%|████████████████████████▍         | 88125/122310 [2:44:00<49:23, 11.54it/s][A
 72%|████████████████████████▍         | 88130/122310 [2:44:00<56:10, 10.14it/s][A
 72%|████████████████████████▌         | 88140/122310 [2:44:01<50:47, 11.21it/s][A
 72%|████████████████████████▌         | 88147/122310 [2:44:02<52:37, 10.82it/s][A
 72%|████████████████████████▌         | 88155/122310 [2:44:02<52:11, 10.91it/s][A
 72%|████████████████████████▌         | 88167/122310 [2:44:03<44:56, 12.66it/s][A
 72%|████████████████████████▌         | 88177/122310 [2:44:04<43:42, 13.02

step: 25540, loss: 68.98946106505089, epoch: 1



 72%|████████████████████████▌         | 88268/122310 [2:44:10<43:57, 12.91it/s][A
 72%|████████████████████████▌         | 88282/122310 [2:44:11<38:06, 14.88it/s][A
 72%|████████████████████████▌         | 88286/122310 [2:44:12<47:01, 12.06it/s][A
 72%|████████████████████████▌         | 88291/122310 [2:44:13<53:43, 10.55it/s][A
 72%|████████████████████████▌         | 88302/122310 [2:44:13<47:37, 11.90it/s][A
 72%|████████████████████████▌         | 88309/122310 [2:44:14<50:19, 11.26it/s][A
 72%|████████████████████████▌         | 88318/122310 [2:44:15<48:35, 11.66it/s][A
 72%|████████████████████████▌         | 88328/122310 [2:44:15<46:00, 12.31it/s][A
 72%|████████████████████████▌         | 88334/122310 [2:44:16<50:55, 11.12it/s][A
 72%|████████████████████████▌         | 88340/122310 [2:44:17<54:58, 10.30it/s][A
 72%|███████████████████████         | 88345/122310 [2:44:18<1:00:58,  9.28it/s][A
 72%|████████████████████████▌         | 88365/122310 [2:44:18<38:06, 14.84

step: 25560, loss: 77.7365062508648, epoch: 1



 72%|████████████████████████▌         | 88450/122310 [2:44:25<45:31, 12.39it/s][A
 72%|████████████████████████▌         | 88463/122310 [2:44:26<39:57, 14.12it/s][A
 72%|████████████████████████▌         | 88469/122310 [2:44:26<45:55, 12.28it/s][A
 72%|████████████████████████▌         | 88475/122310 [2:44:27<50:27, 11.18it/s][A
 72%|████████████████████████▌         | 88485/122310 [2:44:28<47:15, 11.93it/s][A
 72%|████████████████████████▌         | 88490/122310 [2:44:28<54:19, 10.38it/s][A
 72%|████████████████████████▌         | 88499/122310 [2:44:29<51:06, 11.03it/s][A
 72%|████████████████████████▌         | 88505/122310 [2:44:30<54:56, 10.25it/s][A
 72%|████████████████████████▌         | 88514/122310 [2:44:31<51:36, 10.92it/s][A
 72%|███████████████████████▏        | 88517/122310 [2:44:31<1:03:09,  8.92it/s][A
 72%|████████████████████████▌         | 88530/122310 [2:44:32<48:31, 11.60it/s][A
 72%|████████████████████████▌         | 88539/122310 [2:44:33<47:26, 11.87

step: 25580, loss: 81.15913181699202, epoch: 1



 72%|███████████████████████▏        | 88587/122310 [2:44:39<1:08:13,  8.24it/s][A
 72%|███████████████████████▏        | 88591/122310 [2:44:40<1:13:00,  7.70it/s][A
 72%|███████████████████████▏        | 88598/122310 [2:44:41<1:08:47,  8.17it/s][A
 72%|████████████████████████▋         | 88610/122310 [2:44:41<54:48, 10.25it/s][A
 72%|████████████████████████▋         | 88628/122310 [2:44:42<39:51, 14.08it/s][A
 72%|████████████████████████▋         | 88636/122310 [2:44:43<42:16, 13.28it/s][A
 72%|████████████████████████▋         | 88647/122310 [2:44:44<40:34, 13.83it/s][A
 72%|████████████████████████▋         | 88660/122310 [2:44:45<47:50, 11.72it/s][A
 72%|████████████████████████▋         | 88672/122310 [2:44:46<43:23, 12.92it/s][A
 73%|████████████████████████▋         | 88678/122310 [2:44:46<47:39, 11.76it/s][A
 73%|████████████████████████▋         | 88683/122310 [2:44:47<53:22, 10.50it/s][A
 73%|████████████████████████▋         | 88692/122310 [2:44:48<50:40, 11.06

step: 25600, loss: 75.12655151113984, epoch: 1
sim1 and sim2 are 0.5151611123716987, 0.2436488289033618
cosine of pred and queen: 0.17845736741627058
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: maladie
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: country
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mah


 73%|████████████████████████▋         | 88754/122310 [2:45:09<44:51, 12.47it/s][A

Actual: thailand:thai::india:indian, pred: services
Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: start
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: victory
Actual: denmark:danish::germany:german, pred: size
Accuracy is 0.3111111111111111
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: drink
Actual: sing:sings::shuffle:shuffles, pred: emblem
Actual: sit:sits::go:goes, pred: emblem
Actual: say:says::provide:provides, pred: marked
Actual: scream:screams::sing:sings, pred: shebna
Actual: play:plays::listen:listens, p


 73%|██████████████████████▍        | 88759/122310 [2:46:07<27:07:47,  2.91s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.10650887573964497



 73%|██████████████████████▍        | 88767/122310 [2:46:07<19:07:52,  2.05s/it][A
 73%|██████████████████████▌        | 88774/122310 [2:46:08<14:07:07,  1.52s/it][A
 73%|███████████████████████▏        | 88789/122310 [2:46:09<7:52:22,  1.18it/s][A
 73%|███████████████████████▏        | 88799/122310 [2:46:09<5:41:03,  1.64it/s][A
 73%|███████████████████████▏        | 88809/122310 [2:46:10<4:10:09,  2.23it/s][A
 73%|███████████████████████▏        | 88817/122310 [2:46:11<3:19:24,  2.80it/s][A
 73%|███████████████████████▏        | 88826/122310 [2:46:12<2:34:40,  3.61it/s][A
 73%|███████████████████████▏        | 88837/122310 [2:46:12<1:55:31,  4.83it/s][A
 73%|███████████████████████▏        | 88850/122310 [2:46:13<1:25:47,  6.50it/s][A
 73%|███████████████████████▏        | 88857/122310 [2:46:14<1:20:42,  6.91it/s][A
 73%|███████████████████████▏        | 88862/122310 [2:46:15<1:21:39,  6.83it/s][A
 73%|███████████████████████▎        | 88873/122310 [2:46:16<1:05:55,  8.45

step: 25620, loss: 75.73917445249317, epoch: 1



 73%|████████████████████████▋         | 88955/122310 [2:46:22<52:33, 10.58it/s][A
 73%|████████████████████████▋         | 88964/122310 [2:46:23<50:15, 11.06it/s][A
 73%|████████████████████████▋         | 88975/122310 [2:46:24<45:36, 12.18it/s][A
 73%|████████████████████████▋         | 88980/122310 [2:46:25<52:21, 10.61it/s][A
 73%|████████████████████████▋         | 88985/122310 [2:46:25<58:49,  9.44it/s][A
 73%|███████████████████████▎        | 88987/122310 [2:46:26<1:14:38,  7.44it/s][A
 73%|███████████████████████▎        | 88994/122310 [2:46:27<1:08:52,  8.06it/s][A
 73%|████████████████████████▋         | 89012/122310 [2:46:28<42:55, 12.93it/s][A
 73%|████████████████████████▋         | 89018/122310 [2:46:28<48:18, 11.49it/s][A
 73%|████████████████████████▋         | 89030/122310 [2:46:29<43:00, 12.90it/s][A
 73%|████████████████████████▊         | 89046/122310 [2:46:30<35:47, 15.49it/s][A
 73%|████████████████████████▊         | 89060/122310 [2:46:31<33:32, 16.52

step: 25640, loss: 79.83005959554194, epoch: 1



 73%|████████████████████████▊         | 89159/122310 [2:46:37<43:11, 12.79it/s][A
 73%|████████████████████████▊         | 89163/122310 [2:46:38<52:05, 10.61it/s][A
 73%|████████████████████████▊         | 89174/122310 [2:46:39<46:31, 11.87it/s][A
 73%|████████████████████████▊         | 89177/122310 [2:46:39<57:55,  9.53it/s][A
 73%|███████████████████████▎        | 89179/122310 [2:46:40<1:13:58,  7.46it/s][A
 73%|███████████████████████▎        | 89183/122310 [2:46:41<1:20:28,  6.86it/s][A
 73%|███████████████████████▎        | 89192/122310 [2:46:42<1:05:45,  8.39it/s][A
 73%|███████████████████████▎        | 89196/122310 [2:46:42<1:13:43,  7.49it/s][A
 73%|███████████████████████▎        | 89204/122310 [2:46:43<1:05:27,  8.43it/s][A
 73%|████████████████████████▊         | 89221/122310 [2:46:44<43:18, 12.73it/s][A
 73%|████████████████████████▊         | 89225/122310 [2:46:45<52:26, 10.51it/s][A
 73%|████████████████████████▊         | 89239/122310 [2:46:45<42:25, 12.99

step: 25660, loss: 81.38398769822965, epoch: 1



 73%|████████████████████████▊         | 89338/122310 [2:46:52<40:50, 13.45it/s][A
 73%|███████████████████████▎        | 89343/122310 [2:46:54<1:02:50,  8.74it/s][A
 73%|███████████████████████▍        | 89348/122310 [2:46:55<1:06:48,  8.22it/s][A
 73%|███████████████████████▍        | 89354/122310 [2:46:56<1:07:42,  8.11it/s][A
 73%|███████████████████████▍        | 89363/122310 [2:46:56<1:00:34,  9.06it/s][A
 73%|███████████████████████▍        | 89370/122310 [2:46:57<1:00:49,  9.03it/s][A
 73%|████████████████████████▊         | 89380/122310 [2:46:58<54:09, 10.13it/s][A
 73%|███████████████████████▍        | 89385/122310 [2:46:59<1:00:39,  9.05it/s][A
 73%|███████████████████████▍        | 89390/122310 [2:46:59<1:06:31,  8.25it/s][A
 73%|███████████████████████▍        | 89396/122310 [2:47:00<1:07:45,  8.10it/s][A
 73%|███████████████████████▍        | 89401/122310 [2:47:01<1:12:12,  7.60it/s][A
 73%|███████████████████████▍        | 89409/122310 [2:47:02<1:05:16,  8.40

step: 25680, loss: 68.75374965738236, epoch: 1



 73%|████████████████████████▊         | 89478/122310 [2:47:08<46:39, 11.73it/s][A
 73%|████████████████████████▉         | 89490/122310 [2:47:09<42:19, 12.92it/s][A
 73%|████████████████████████▉         | 89500/122310 [2:47:09<42:09, 12.97it/s][A
 73%|████████████████████████▉         | 89506/122310 [2:47:10<48:08, 11.36it/s][A
 73%|████████████████████████▉         | 89511/122310 [2:47:11<54:59,  9.94it/s][A
 73%|████████████████████████▉         | 89517/122310 [2:47:12<58:32,  9.34it/s][A
 73%|████████████████████████▉         | 89525/122310 [2:47:13<56:21,  9.70it/s][A
 73%|████████████████████████▉         | 89535/122310 [2:47:13<51:03, 10.70it/s][A
 73%|████████████████████████▉         | 89545/122310 [2:47:14<47:49, 11.42it/s][A
 73%|████████████████████████▉         | 89551/122310 [2:47:15<52:45, 10.35it/s][A
 73%|███████████████████████▍        | 89555/122310 [2:47:16<1:02:06,  8.79it/s][A
 73%|████████████████████████▉         | 89570/122310 [2:47:16<45:25, 12.01

step: 25700, loss: 87.57697750498126, epoch: 1



 73%|████████████████████████▉         | 89634/122310 [2:47:23<58:36,  9.29it/s][A
 73%|████████████████████████▉         | 89643/122310 [2:47:24<53:57, 10.09it/s][A
 73%|███████████████████████▍        | 89647/122310 [2:47:25<1:02:35,  8.70it/s][A
 73%|████████████████████████▉         | 89659/122310 [2:47:26<49:50, 10.92it/s][A
 73%|████████████████████████▉         | 89670/122310 [2:47:26<44:59, 12.09it/s][A
 73%|████████████████████████▉         | 89681/122310 [2:47:27<42:06, 12.91it/s][A
 73%|████████████████████████▉         | 89689/122310 [2:47:28<44:18, 12.27it/s][A
 73%|████████████████████████▉         | 89693/122310 [2:47:28<53:08, 10.23it/s][A
 73%|████████████████████████▉         | 89705/122310 [2:47:29<45:16, 12.00it/s][A
 73%|████████████████████████▉         | 89718/122310 [2:47:30<39:53, 13.62it/s][A
 73%|████████████████████████▉         | 89722/122310 [2:47:31<48:40, 11.16it/s][A
 73%|████████████████████████▉         | 89730/122310 [2:47:31<49:28, 10.97

step: 25720, loss: 71.35496185906497, epoch: 1



 73%|████████████████████████▉         | 89821/122310 [2:47:38<36:12, 14.96it/s][A
 73%|████████████████████████▉         | 89833/122310 [2:47:40<46:03, 11.75it/s][A
 73%|████████████████████████▉         | 89836/122310 [2:47:40<54:48,  9.88it/s][A
 73%|████████████████████████▉         | 89845/122310 [2:47:41<51:55, 10.42it/s][A
 73%|████████████████████████▉         | 89853/122310 [2:47:42<51:16, 10.55it/s][A
 73%|████████████████████████▉         | 89869/122310 [2:47:43<39:37, 13.64it/s][A
 73%|████████████████████████▉         | 89877/122310 [2:47:43<42:00, 12.87it/s][A
 73%|████████████████████████▉         | 89885/122310 [2:47:44<45:09, 11.97it/s][A
 73%|████████████████████████▉         | 89897/122310 [2:47:45<42:30, 12.71it/s][A
 74%|████████████████████████▉         | 89910/122310 [2:47:46<38:34, 14.00it/s][A
 74%|████████████████████████▉         | 89928/122310 [2:47:47<32:10, 16.78it/s][A
 74%|█████████████████████████         | 89935/122310 [2:47:47<36:49, 14.65

step: 25740, loss: 72.31119443099294, epoch: 1



 74%|█████████████████████████         | 90023/122310 [2:47:53<38:11, 14.09it/s][A
 74%|█████████████████████████         | 90031/122310 [2:47:54<40:57, 13.13it/s][A
 74%|█████████████████████████         | 90037/122310 [2:47:55<46:15, 11.63it/s][A
 74%|█████████████████████████         | 90043/122310 [2:47:56<50:29, 10.65it/s][A
 74%|█████████████████████████         | 90057/122310 [2:47:56<41:00, 13.11it/s][A
 74%|█████████████████████████         | 90070/122310 [2:47:57<37:10, 14.45it/s][A
 74%|█████████████████████████         | 90079/122310 [2:47:58<38:52, 13.82it/s][A
 74%|█████████████████████████         | 90083/122310 [2:47:59<47:57, 11.20it/s][A
 74%|███████████████████████▌        | 90085/122310 [2:47:59<1:01:33,  8.72it/s][A
 74%|█████████████████████████         | 90098/122310 [2:48:00<47:46, 11.24it/s][A
 74%|█████████████████████████         | 90108/122310 [2:48:01<45:19, 11.84it/s][A
 74%|█████████████████████████         | 90110/122310 [2:48:02<59:08,  9.07

step: 25760, loss: 79.45168804342691, epoch: 1



 74%|█████████████████████████         | 90190/122310 [2:48:09<43:20, 12.35it/s][A
 74%|█████████████████████████         | 90200/122310 [2:48:09<42:38, 12.55it/s][A
 74%|█████████████████████████         | 90208/122310 [2:48:10<44:38, 11.98it/s][A
 74%|█████████████████████████         | 90217/122310 [2:48:11<44:39, 11.98it/s][A
 74%|█████████████████████████         | 90227/122310 [2:48:12<43:44, 12.22it/s][A
 74%|█████████████████████████         | 90233/122310 [2:48:12<49:36, 10.78it/s][A
 74%|███████████████████████▌        | 90235/122310 [2:48:13<1:03:25,  8.43it/s][A
 74%|███████████████████████▌        | 90241/122310 [2:48:14<1:04:50,  8.24it/s][A
 74%|█████████████████████████         | 90250/122310 [2:48:15<57:36,  9.27it/s][A
 74%|█████████████████████████         | 90268/122310 [2:48:15<39:25, 13.54it/s][A
 74%|█████████████████████████         | 90273/122310 [2:48:16<46:01, 11.60it/s][A
 74%|█████████████████████████         | 90280/122310 [2:48:17<49:06, 10.87

step: 25780, loss: 70.31963345691719, epoch: 1



 74%|█████████████████████████         | 90345/122310 [2:48:24<56:14,  9.47it/s][A
 74%|█████████████████████████         | 90357/122310 [2:48:24<46:15, 11.51it/s][A
 74%|█████████████████████████         | 90363/122310 [2:48:25<50:29, 10.54it/s][A
 74%|█████████████████████████         | 90371/122310 [2:48:26<49:57, 10.66it/s][A
 74%|█████████████████████████         | 90377/122310 [2:48:26<53:36,  9.93it/s][A
 74%|█████████████████████████▏        | 90389/122310 [2:48:27<44:46, 11.88it/s][A
 74%|█████████████████████████▏        | 90399/122310 [2:48:28<42:53, 12.40it/s][A
 74%|█████████████████████████▏        | 90402/122310 [2:48:29<53:52,  9.87it/s][A
 74%|█████████████████████████▏        | 90414/122310 [2:48:29<44:54, 11.84it/s][A
 74%|█████████████████████████▏        | 90420/122310 [2:48:30<49:30, 10.74it/s][A
 74%|█████████████████████████▏        | 90425/122310 [2:48:31<55:40,  9.55it/s][A
 74%|███████████████████████▋        | 90429/122310 [2:48:32<1:03:38,  8.35

step: 25800, loss: 82.55994949205174, epoch: 1



 74%|█████████████████████████▏        | 90506/122310 [2:48:38<50:54, 10.41it/s][A
 74%|█████████████████████████▏        | 90518/122310 [2:48:39<43:32, 12.17it/s][A
 74%|█████████████████████████▏        | 90520/122310 [2:48:40<56:32,  9.37it/s][A
 74%|█████████████████████████▏        | 90526/122310 [2:48:40<58:48,  9.01it/s][A
 74%|█████████████████████████▏        | 90540/122310 [2:48:41<44:03, 12.02it/s][A
 74%|█████████████████████████▏        | 90544/122310 [2:48:42<52:38, 10.06it/s][A
 74%|███████████████████████▋        | 90548/122310 [2:48:43<1:01:04,  8.67it/s][A
 74%|███████████████████████▋        | 90553/122310 [2:48:43<1:05:19,  8.10it/s][A
 74%|█████████████████████████▏        | 90561/122310 [2:48:44<58:59,  8.97it/s][A
 74%|███████████████████████▋        | 90567/122310 [2:48:45<1:00:29,  8.75it/s][A
 74%|█████████████████████████▏        | 90580/122310 [2:48:46<46:12, 11.45it/s][A
 74%|█████████████████████████▏        | 90591/122310 [2:48:46<42:13, 12.52

step: 25820, loss: 91.9855645149769, epoch: 1



 74%|█████████████████████████▏        | 90679/122310 [2:48:53<35:23, 14.90it/s][A
 74%|█████████████████████████▏        | 90684/122310 [2:48:54<42:03, 12.53it/s][A
 74%|█████████████████████████▏        | 90691/122310 [2:48:54<45:18, 11.63it/s][A
 74%|█████████████████████████▏        | 90705/122310 [2:48:55<38:02, 13.85it/s][A
 74%|█████████████████████████▏        | 90707/122310 [2:48:56<50:01, 10.53it/s][A
 74%|█████████████████████████▏        | 90712/122310 [2:48:57<55:53,  9.42it/s][A
 74%|█████████████████████████▏        | 90727/122310 [2:48:57<41:17, 12.75it/s][A
 74%|█████████████████████████▏        | 90732/122310 [2:48:58<47:56, 10.98it/s][A
 74%|█████████████████████████▏        | 90738/122310 [2:48:59<52:06, 10.10it/s][A
 74%|█████████████████████████▏        | 90745/122310 [2:49:00<52:40,  9.99it/s][A
 74%|█████████████████████████▏        | 90750/122310 [2:49:00<58:10,  9.04it/s][A
 74%|█████████████████████████▏        | 90765/122310 [2:49:01<42:14, 12.45

step: 25840, loss: 84.67655071508813, epoch: 1



 74%|█████████████████████████▎        | 90839/122310 [2:49:08<48:28, 10.82it/s][A
 74%|█████████████████████████▎        | 90846/122310 [2:49:08<50:19, 10.42it/s][A
 74%|█████████████████████████▎        | 90853/122310 [2:49:09<51:27, 10.19it/s][A
 74%|█████████████████████████▎        | 90857/122310 [2:49:10<59:48,  8.76it/s][A
 74%|█████████████████████████▎        | 90867/122310 [2:49:10<51:20, 10.21it/s][A
 74%|█████████████████████████▎        | 90879/122310 [2:49:11<43:25, 12.07it/s][A
 74%|█████████████████████████▎        | 90892/122310 [2:49:12<38:01, 13.77it/s][A
 74%|█████████████████████████▎        | 90900/122310 [2:49:13<40:32, 12.91it/s][A
 74%|█████████████████████████▎        | 90913/122310 [2:49:13<36:30, 14.33it/s][A
 74%|█████████████████████████▎        | 90923/122310 [2:49:14<37:02, 14.13it/s][A
 74%|█████████████████████████▎        | 90940/122310 [2:49:15<31:00, 16.86it/s][A
 74%|█████████████████████████▎        | 90957/122310 [2:49:16<27:52, 18.75

step: 25860, loss: 74.86407829292693, epoch: 1



 74%|█████████████████████████▎        | 91019/122310 [2:49:22<58:47,  8.87it/s][A
 74%|███████████████████████▊        | 91022/122310 [2:49:23<1:10:06,  7.44it/s][A
 74%|█████████████████████████▎        | 91032/122310 [2:49:24<56:08,  9.28it/s][A
 74%|███████████████████████▊        | 91035/122310 [2:49:24<1:07:17,  7.75it/s][A
 74%|█████████████████████████▎        | 91043/122310 [2:49:25<59:51,  8.70it/s][A
 74%|█████████████████████████▎        | 91052/122310 [2:49:26<53:24,  9.75it/s][A
 74%|█████████████████████████▎        | 91064/122310 [2:49:27<44:10, 11.79it/s][A
 74%|█████████████████████████▎        | 91073/122310 [2:49:27<43:33, 11.95it/s][A
 74%|█████████████████████████▎        | 91079/122310 [2:49:28<48:06, 10.82it/s][A
 74%|█████████████████████████▎        | 91086/122310 [2:49:29<49:59, 10.41it/s][A
 74%|█████████████████████████▎        | 91093/122310 [2:49:30<51:08, 10.17it/s][A
 74%|█████████████████████████▎        | 91104/122310 [2:49:30<44:49, 11.60

step: 25880, loss: 74.79952468852716, epoch: 1



 75%|█████████████████████████▎        | 91191/122310 [2:49:37<36:44, 14.12it/s][A
 75%|█████████████████████████▎        | 91196/122310 [2:49:38<43:25, 11.94it/s][A
 75%|█████████████████████████▎        | 91203/122310 [2:49:38<46:10, 11.23it/s][A
 75%|█████████████████████████▎        | 91208/122310 [2:49:39<52:31,  9.87it/s][A
 75%|███████████████████████▊        | 91209/122310 [2:49:40<1:10:29,  7.35it/s][A
 75%|█████████████████████████▎        | 91220/122310 [2:49:41<53:41,  9.65it/s][A
 75%|█████████████████████████▎        | 91233/122310 [2:49:41<42:53, 12.08it/s][A
 75%|█████████████████████████▎        | 91237/122310 [2:49:42<51:21, 10.08it/s][A
 75%|█████████████████████████▎        | 91243/122310 [2:49:43<54:25,  9.51it/s][A
 75%|█████████████████████████▎        | 91259/122310 [2:49:43<39:08, 13.22it/s][A
 75%|█████████████████████████▎        | 91266/122310 [2:49:44<42:44, 12.11it/s][A
 75%|█████████████████████████▎        | 91273/122310 [2:49:45<45:50, 11.28

step: 25900, loss: 74.47473086328831, epoch: 1



 75%|███████████████████████▉        | 91333/122310 [2:49:52<1:01:58,  8.33it/s][A
 75%|███████████████████████▉        | 91336/122310 [2:49:52<1:12:30,  7.12it/s][A
 75%|███████████████████████▉        | 91344/122310 [2:49:53<1:02:27,  8.26it/s][A
 75%|███████████████████████▉        | 91348/122310 [2:49:54<1:09:29,  7.43it/s][A
 75%|█████████████████████████▍        | 91359/122310 [2:49:54<53:07,  9.71it/s][A
 75%|███████████████████████▉        | 91362/122310 [2:49:58<2:21:44,  3.64it/s][A
 75%|███████████████████████▉        | 91365/122310 [2:49:59<2:18:39,  3.72it/s][A
 75%|███████████████████████▉        | 91376/122310 [2:50:00<1:28:00,  5.86it/s][A
 75%|███████████████████████▉        | 91386/122310 [2:50:00<1:08:43,  7.50it/s][A
 75%|███████████████████████▉        | 91391/122310 [2:50:01<1:10:02,  7.36it/s][A
 75%|███████████████████████▉        | 91396/122310 [2:50:02<1:11:19,  7.22it/s][A
 75%|███████████████████████▉        | 91401/122310 [2:50:03<1:12:28,  7.11

step: 25920, loss: 84.08410998823638, epoch: 1



 75%|█████████████████████████▍        | 91442/122310 [2:50:06<45:17, 11.36it/s][A
 75%|█████████████████████████▍        | 91449/122310 [2:50:07<47:35, 10.81it/s][A
 75%|███████████████████████▉        | 91451/122310 [2:50:08<1:01:07,  8.41it/s][A
 75%|███████████████████████▉        | 91455/122310 [2:50:08<1:07:58,  7.57it/s][A
 75%|███████████████████████▉        | 91463/122310 [2:50:09<1:00:09,  8.55it/s][A
 75%|███████████████████████▉        | 91468/122310 [2:50:10<1:03:54,  8.04it/s][A
 75%|█████████████████████████▍        | 91476/122310 [2:50:11<57:42,  8.91it/s][A
 75%|█████████████████████████▍        | 91484/122310 [2:50:11<53:58,  9.52it/s][A
 75%|█████████████████████████▍        | 91491/122310 [2:50:12<53:56,  9.52it/s][A
 75%|█████████████████████████▍        | 91506/122310 [2:50:13<40:05, 12.81it/s][A
 75%|█████████████████████████▍        | 91522/122310 [2:50:14<33:03, 15.52it/s][A
 75%|█████████████████████████▍        | 91532/122310 [2:50:14<34:22, 14.92

step: 25940, loss: 112.4452675014401, epoch: 1



 75%|█████████████████████████▍        | 91613/122310 [2:50:21<35:15, 14.51it/s][A
 75%|█████████████████████████▍        | 91619/122310 [2:50:22<40:29, 12.63it/s][A
 75%|█████████████████████████▍        | 91632/122310 [2:50:22<36:06, 14.16it/s][A
 75%|█████████████████████████▍        | 91635/122310 [2:50:23<45:50, 11.15it/s][A
 75%|█████████████████████████▍        | 91646/122310 [2:50:24<41:29, 12.32it/s][A
 75%|█████████████████████████▍        | 91659/122310 [2:50:25<36:39, 13.94it/s][A
 75%|█████████████████████████▍        | 91661/122310 [2:50:25<48:26, 10.54it/s][A
 75%|█████████████████████████▍        | 91667/122310 [2:50:26<51:43,  9.87it/s][A
 75%|█████████████████████████▍        | 91679/122310 [2:50:27<43:13, 11.81it/s][A
 75%|█████████████████████████▍        | 91687/122310 [2:50:27<44:13, 11.54it/s][A
 75%|█████████████████████████▍        | 91695/122310 [2:50:28<44:54, 11.36it/s][A
 75%|█████████████████████████▍        | 91707/122310 [2:50:29<39:45, 12.83

step: 25960, loss: 68.69633760525194, epoch: 1



 75%|█████████████████████████▌        | 91774/122310 [2:50:36<45:32, 11.18it/s][A
 75%|█████████████████████████▌        | 91783/122310 [2:50:36<44:10, 11.52it/s][A
 75%|████████████████████████        | 91789/122310 [2:50:38<1:02:54,  8.09it/s][A
 75%|█████████████████████████▌        | 91796/122310 [2:50:38<59:56,  8.48it/s][A
 75%|█████████████████████████▌        | 91811/122310 [2:50:39<43:46, 11.61it/s][A
 75%|█████████████████████████▌        | 91821/122310 [2:50:40<41:46, 12.16it/s][A
 75%|█████████████████████████▌        | 91830/122310 [2:50:41<53:14,  9.54it/s][A
 75%|█████████████████████████▌        | 91838/122310 [2:50:42<51:23,  9.88it/s][A
 75%|█████████████████████████▌        | 91845/122310 [2:50:43<51:53,  9.79it/s][A
 75%|█████████████████████████▌        | 91856/122310 [2:50:44<45:19, 11.20it/s][A
 75%|█████████████████████████▌        | 91866/122310 [2:50:44<42:45, 11.87it/s][A
 75%|█████████████████████████▌        | 91876/122310 [2:50:45<41:06, 12.34

step: 25980, loss: 81.53981276518738, epoch: 1



 75%|█████████████████████████▌        | 91930/122310 [2:50:50<36:33, 13.85it/s][A
 75%|█████████████████████████▌        | 91945/122310 [2:50:51<32:15, 15.69it/s][A
 75%|█████████████████████████▌        | 91955/122310 [2:50:52<33:30, 15.10it/s][A
 75%|█████████████████████████▌        | 91967/122310 [2:50:52<32:43, 15.45it/s][A
 75%|█████████████████████████▌        | 91971/122310 [2:50:53<40:17, 12.55it/s][A
 75%|█████████████████████████▌        | 91977/122310 [2:50:54<44:56, 11.25it/s][A
 75%|█████████████████████████▌        | 91982/122310 [2:50:55<50:45,  9.96it/s][A
 75%|█████████████████████████▌        | 91990/122310 [2:50:55<49:19, 10.24it/s][A
 75%|█████████████████████████▌        | 92000/122310 [2:50:56<44:49, 11.27it/s][A
 75%|█████████████████████████▌        | 92014/122310 [2:50:57<37:05, 13.61it/s][A
 75%|█████████████████████████▌        | 92024/122310 [2:50:57<37:08, 13.59it/s][A
 75%|█████████████████████████▌        | 92036/122310 [2:50:58<34:58, 14.42

step: 26000, loss: 172.07172531108301, epoch: 1
sim1 and sim2 are 0.5144512826382164, 0.2479587468792517
cosine of pred and queen: 0.2385091100632627
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: europe
Actual: nigeria:africa::france:europe, pred: europe
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mah


 75%|█████████████████████████▌        | 92112/122310 [2:51:19<27:20, 18.41it/s][A

Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: services
Actual: sweden:swedish::netherlands:dutch, pred: also
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: start
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: victory
Actual: denmark:danish::germany:german, pred: european
Accuracy is 0.37777777777777777
Actual: walk:walks::vanish:vanishes, pred: emblem
Actual: work:works::generate:generates, pred: emblem
Actual: think:thinks::talk:talks, pred: emblem
Actual: vanish:vanishes::eat:eats, pred: drink
Actual: sing:sings


 75%|███████████████████████▎       | 92117/122310 [2:52:21<18:05:27,  2.16s/it][A
 75%|███████████████████████▎       | 92124/122310 [2:52:21<14:23:40,  1.72s/it][A
 75%|███████████████████████▎       | 92129/122310 [2:52:22<12:01:39,  1.43s/it][A
 75%|████████████████████████        | 92137/122310 [2:52:23<8:44:33,  1.04s/it][A
 75%|████████████████████████        | 92144/122310 [2:52:24<6:38:19,  1.26it/s][A
 75%|████████████████████████        | 92158/122310 [2:52:24<3:57:10,  2.12it/s][A
 75%|████████████████████████        | 92162/122310 [2:52:25<3:35:08,  2.34it/s][A
 75%|████████████████████████        | 92175/122310 [2:52:26<2:18:09,  3.64it/s][A
 75%|████████████████████████        | 92186/122310 [2:52:26<1:43:08,  4.87it/s][A
 75%|████████████████████████        | 92195/122310 [2:52:27<1:25:28,  5.87it/s][A
 75%|████████████████████████        | 92208/122310 [2:52:28<1:04:18,  7.80it/s][A
 75%|████████████████████████▏       | 92211/122310 [2:52:29<1:10:24,  7.12

step: 26020, loss: 71.0126254672553, epoch: 1



 75%|█████████████████████████▋        | 92290/122310 [2:52:36<37:07, 13.48it/s][A
 75%|█████████████████████████▋        | 92299/122310 [2:52:37<37:53, 13.20it/s][A
 75%|█████████████████████████▋        | 92305/122310 [2:52:37<42:29, 11.77it/s][A
 75%|█████████████████████████▋        | 92320/122310 [2:52:38<34:35, 14.45it/s][A
 75%|█████████████████████████▋        | 92326/122310 [2:52:39<39:35, 12.62it/s][A
 75%|█████████████████████████▋        | 92335/122310 [2:52:39<39:43, 12.58it/s][A
 76%|█████████████████████████▋        | 92345/122310 [2:52:40<38:29, 12.97it/s][A
 76%|█████████████████████████▋        | 92353/122310 [2:52:41<40:18, 12.38it/s][A
 76%|█████████████████████████▋        | 92363/122310 [2:52:42<39:03, 12.78it/s][A
 76%|█████████████████████████▋        | 92378/122310 [2:52:42<32:55, 15.15it/s][A
 76%|█████████████████████████▋        | 92383/122310 [2:52:43<39:19, 12.68it/s][A
 76%|█████████████████████████▋        | 92393/122310 [2:52:44<38:18, 13.01

step: 26040, loss: 75.94219592218619, epoch: 1



 76%|█████████████████████████▋        | 92475/122310 [2:52:50<32:23, 15.35it/s][A
 76%|█████████████████████████▋        | 92485/122310 [2:52:51<33:23, 14.89it/s][A
 76%|█████████████████████████▋        | 92495/122310 [2:52:52<34:05, 14.58it/s][A
 76%|█████████████████████████▋        | 92501/122310 [2:52:52<39:06, 12.70it/s][A
 76%|█████████████████████████▋        | 92503/122310 [2:52:53<51:08,  9.71it/s][A
 76%|█████████████████████████▋        | 92511/122310 [2:52:54<49:05, 10.12it/s][A
 76%|█████████████████████████▋        | 92526/122310 [2:52:55<37:18, 13.30it/s][A
 76%|█████████████████████████▋        | 92531/122310 [2:52:55<43:35, 11.38it/s][A
 76%|█████████████████████████▋        | 92544/122310 [2:52:56<37:07, 13.36it/s][A
 76%|█████████████████████████▋        | 92558/122310 [2:52:57<32:49, 15.11it/s][A
 76%|█████████████████████████▋        | 92566/122310 [2:52:58<35:36, 13.92it/s][A
 76%|█████████████████████████▋        | 92576/122310 [2:52:58<35:51, 13.82

step: 26060, loss: 90.57464898705352, epoch: 1



 76%|█████████████████████████▊        | 92641/122310 [2:53:05<46:44, 10.58it/s][A
 76%|█████████████████████████▊        | 92647/122310 [2:53:06<50:05,  9.87it/s][A
 76%|█████████████████████████▊        | 92652/122310 [2:53:06<55:03,  8.98it/s][A
 76%|█████████████████████████▊        | 92661/122310 [2:53:07<49:23, 10.01it/s][A
 76%|█████████████████████████▊        | 92668/122310 [2:53:08<49:59,  9.88it/s][A
 76%|█████████████████████████▊        | 92682/122310 [2:53:08<38:56, 12.68it/s][A
 76%|█████████████████████████▊        | 92692/122310 [2:53:09<37:58, 13.00it/s][A
 76%|█████████████████████████▊        | 92702/122310 [2:53:10<37:15, 13.24it/s][A
 76%|█████████████████████████▊        | 92726/122310 [2:53:11<25:45, 19.14it/s][A
 76%|█████████████████████████▊        | 92735/122310 [2:53:11<28:46, 17.13it/s][A
 76%|█████████████████████████▊        | 92743/122310 [2:53:12<32:11, 15.31it/s][A
 76%|█████████████████████████▊        | 92751/122310 [2:53:13<35:09, 14.01

step: 26080, loss: 76.60615480169436, epoch: 1



 76%|█████████████████████████▊        | 92828/122310 [2:53:19<36:47, 13.36it/s][A
 76%|█████████████████████████▊        | 92835/122310 [2:53:20<40:13, 12.21it/s][A
 76%|█████████████████████████▊        | 92842/122310 [2:53:21<43:11, 11.37it/s][A
 76%|█████████████████████████▊        | 92849/122310 [2:53:22<45:17, 10.84it/s][A
 76%|█████████████████████████▊        | 92865/122310 [2:53:22<34:56, 14.04it/s][A
 76%|█████████████████████████▊        | 92875/122310 [2:53:23<35:19, 13.89it/s][A
 76%|█████████████████████████▊        | 92886/122310 [2:53:24<34:33, 14.19it/s][A
 76%|█████████████████████████▊        | 92890/122310 [2:53:25<42:10, 11.63it/s][A
 76%|█████████████████████████▊        | 92897/122310 [2:53:25<44:23, 11.04it/s][A
 76%|█████████████████████████▊        | 92910/122310 [2:53:26<37:46, 12.97it/s][A
 76%|█████████████████████████▊        | 92914/122310 [2:53:27<45:40, 10.73it/s][A
 76%|█████████████████████████▊        | 92921/122310 [2:53:28<47:18, 10.35

step: 26100, loss: 91.54589848762778, epoch: 1



 76%|█████████████████████████▊        | 93009/122310 [2:53:34<33:28, 14.59it/s][A
 76%|█████████████████████████▊        | 93015/122310 [2:53:35<38:31, 12.67it/s][A
 76%|█████████████████████████▊        | 93020/122310 [2:53:36<45:00, 10.85it/s][A
 76%|█████████████████████████▊        | 93026/122310 [2:53:36<48:30, 10.06it/s][A
 76%|████████████████████████▎       | 93028/122310 [2:53:37<1:01:47,  7.90it/s][A
 76%|████████████████████████▎       | 93033/122310 [2:53:38<1:04:15,  7.59it/s][A
 76%|█████████████████████████▊        | 93051/122310 [2:53:39<38:23, 12.70it/s][A
 76%|█████████████████████████▊        | 93057/122310 [2:53:40<55:56,  8.72it/s][A
 76%|█████████████████████████▊        | 93064/122310 [2:53:41<54:24,  8.96it/s][A
 76%|█████████████████████████▊        | 93070/122310 [2:53:41<55:31,  8.78it/s][A
 76%|█████████████████████████▊        | 93079/122310 [2:53:42<50:05,  9.72it/s][A
 76%|█████████████████████████▉        | 93086/122310 [2:53:43<50:13,  9.70

step: 26120, loss: 74.3078850175377, epoch: 1



 76%|█████████████████████████▉        | 93192/122310 [2:53:49<27:24, 17.70it/s][A
 76%|█████████████████████████▉        | 93204/122310 [2:53:49<27:56, 17.36it/s][A
 76%|█████████████████████████▉        | 93219/122310 [2:53:50<26:26, 18.33it/s][A
 76%|█████████████████████████▉        | 93228/122310 [2:53:51<29:17, 16.54it/s][A
 76%|█████████████████████████▉        | 93239/122310 [2:53:52<30:01, 16.13it/s][A
 76%|█████████████████████████▉        | 93245/122310 [2:53:52<35:12, 13.76it/s][A
 76%|█████████████████████████▉        | 93249/122310 [2:53:53<42:58, 11.27it/s][A
 76%|█████████████████████████▉        | 93251/122310 [2:53:54<55:30,  8.72it/s][A
 76%|█████████████████████████▉        | 93260/122310 [2:53:55<49:30,  9.78it/s][A
 76%|████████████████████████▍       | 93262/122310 [2:53:55<1:02:55,  7.69it/s][A
 76%|█████████████████████████▉        | 93270/122310 [2:53:56<55:44,  8.68it/s][A
 76%|████████████████████████▍       | 93274/122310 [2:53:57<1:02:42,  7.72

step: 26140, loss: 86.72938201302546, epoch: 1



 76%|█████████████████████████▉        | 93329/122310 [2:54:03<57:10,  8.45it/s][A
 76%|█████████████████████████▉        | 93339/122310 [2:54:04<47:35, 10.15it/s][A
 76%|█████████████████████████▉        | 93345/122310 [2:54:05<49:58,  9.66it/s][A
 76%|█████████████████████████▉        | 93357/122310 [2:54:05<40:44, 11.84it/s][A
 76%|█████████████████████████▉        | 93364/122310 [2:54:06<42:59, 11.22it/s][A
 76%|█████████████████████████▉        | 93371/122310 [2:54:07<44:29, 10.84it/s][A
 76%|█████████████████████████▉        | 93381/122310 [2:54:08<40:49, 11.81it/s][A
 76%|█████████████████████████▉        | 93390/122310 [2:54:08<39:51, 12.09it/s][A
 76%|█████████████████████████▉        | 93397/122310 [2:54:09<42:03, 11.46it/s][A
 76%|█████████████████████████▉        | 93407/122310 [2:54:10<39:18, 12.25it/s][A
 76%|█████████████████████████▉        | 93413/122310 [2:54:10<43:19, 11.12it/s][A
 76%|█████████████████████████▉        | 93417/122310 [2:54:11<50:57,  9.45

step: 26160, loss: 73.17895903454432, epoch: 1



 76%|█████████████████████████▉        | 93490/122310 [2:54:17<59:46,  8.04it/s][A
 76%|████████████████████████▍       | 93492/122310 [2:54:18<1:13:54,  6.50it/s][A
 76%|████████████████████████▍       | 93498/122310 [2:54:19<1:07:37,  7.10it/s][A
 76%|████████████████████████▍       | 93503/122310 [2:54:20<1:07:57,  7.07it/s][A
 76%|████████████████████████▍       | 93508/122310 [2:54:20<1:07:37,  7.10it/s][A
 76%|████████████████████████▍       | 93512/122310 [2:54:21<1:11:55,  6.67it/s][A
 76%|█████████████████████████▉        | 93522/122310 [2:54:22<53:44,  8.93it/s][A
 76%|█████████████████████████▉        | 93529/122310 [2:54:22<52:00,  9.22it/s][A
 76%|██████████████████████████        | 93535/122310 [2:54:23<53:20,  8.99it/s][A
 76%|██████████████████████████        | 93543/122310 [2:54:24<49:34,  9.67it/s][A
 76%|██████████████████████████        | 93557/122310 [2:54:25<37:40, 12.72it/s][A
 76%|██████████████████████████        | 93567/122310 [2:54:25<36:26, 13.15

step: 26180, loss: 79.43999293943878, epoch: 1



 77%|██████████████████████████        | 93663/122310 [2:54:32<34:46, 13.73it/s][A
 77%|██████████████████████████        | 93672/122310 [2:54:32<35:36, 13.40it/s][A
 77%|██████████████████████████        | 93676/122310 [2:54:33<42:30, 11.22it/s][A
 77%|██████████████████████████        | 93683/122310 [2:54:34<44:04, 10.83it/s][A
 77%|██████████████████████████        | 93691/122310 [2:54:34<43:25, 10.98it/s][A
 77%|██████████████████████████        | 93699/122310 [2:54:35<43:04, 11.07it/s][A
 77%|██████████████████████████        | 93707/122310 [2:54:36<42:54, 11.11it/s][A
 77%|██████████████████████████        | 93713/122310 [2:54:37<46:00, 10.36it/s][A
 77%|██████████████████████████        | 93718/122310 [2:54:37<51:12,  9.31it/s][A
 77%|██████████████████████████        | 93722/122310 [2:54:38<57:39,  8.26it/s][A
 77%|██████████████████████████        | 93731/122310 [2:54:39<49:49,  9.56it/s][A
 77%|████████████████████████▌       | 93736/122310 [2:54:40<1:10:49,  6.72

step: 26200, loss: 152.47303073381303, epoch: 1



 77%|████████████████████████▌       | 93760/122310 [2:54:55<6:38:50,  1.19it/s][A
 77%|████████████████████████▌       | 93761/122310 [2:54:56<6:33:15,  1.21it/s][A
 77%|████████████████████████▌       | 93773/122310 [2:54:56<3:02:20,  2.61it/s][A
 77%|████████████████████████▌       | 93786/122310 [2:54:57<1:48:27,  4.38it/s][A
 77%|████████████████████████▌       | 93804/122310 [2:54:58<1:05:04,  7.30it/s][A
 77%|██████████████████████████        | 93820/122310 [2:54:59<48:18,  9.83it/s][A
 77%|██████████████████████████        | 93826/122310 [2:54:59<49:49,  9.53it/s][A

step: 26220, loss: 77.58551639209702, epoch: 1



 77%|██████████████████████████        | 93829/122310 [2:55:00<56:23,  8.42it/s][A
 77%|██████████████████████████        | 93835/122310 [2:55:01<56:17,  8.43it/s][A
 77%|████████████████████████▌       | 93839/122310 [2:55:02<1:16:45,  6.18it/s][A
 77%|████████████████████████▌       | 93847/122310 [2:55:03<1:04:50,  7.32it/s][A
 77%|████████████████████████▌       | 93856/122310 [2:55:10<2:54:24,  2.72it/s][A
 77%|████████████████████████▌       | 93863/122310 [2:55:11<2:18:26,  3.42it/s][A
 77%|████████████████████████▌       | 93870/122310 [2:55:11<1:52:21,  4.22it/s][A
 77%|████████████████████████▌       | 93881/122310 [2:55:12<1:20:16,  5.90it/s][A
 77%|████████████████████████▌       | 93888/122310 [2:55:13<1:11:44,  6.60it/s][A
 77%|████████████████████████▌       | 93897/122310 [2:55:13<1:00:35,  7.81it/s][A

step: 26240, loss: 91.50515401260687, epoch: 1



 77%|██████████████████████████        | 93919/122310 [2:55:15<44:35, 10.61it/s][A
 77%|██████████████████████████        | 93928/122310 [2:55:16<42:49, 11.05it/s][A
 77%|██████████████████████████        | 93935/122310 [2:55:16<44:03, 10.73it/s][A
 77%|██████████████████████████        | 93938/122310 [2:55:17<51:33,  9.17it/s][A
 77%|██████████████████████████        | 93948/122310 [2:55:18<45:14, 10.45it/s][A
 77%|██████████████████████████        | 93955/122310 [2:55:18<45:56, 10.29it/s][A
 77%|██████████████████████████        | 93963/122310 [2:55:19<44:48, 10.54it/s][A
 77%|██████████████████████████        | 93971/122310 [2:55:20<43:54, 10.76it/s][A
 77%|██████████████████████████        | 93978/122310 [2:55:21<44:57, 10.50it/s][A
 77%|██████████████████████████▏       | 93984/122310 [2:55:21<47:43,  9.89it/s][A
 77%|████████████████████████▌       | 93985/122310 [2:55:22<1:04:02,  7.37it/s][A
 77%|████████████████████████▌       | 93989/122310 [2:55:23<1:08:36,  6.88

step: 26260, loss: 89.60564480312307, epoch: 1



 77%|██████████████████████████▏       | 94069/122310 [2:55:28<42:18, 11.12it/s][A
 77%|██████████████████████████▏       | 94077/122310 [2:55:29<42:07, 11.17it/s][A
 77%|██████████████████████████▏       | 94080/122310 [2:55:30<51:49,  9.08it/s][A
 77%|██████████████████████████▏       | 94088/122310 [2:55:31<48:15,  9.75it/s][A
 77%|██████████████████████████▏       | 94099/122310 [2:55:31<41:06, 11.44it/s][A
 77%|██████████████████████████▏       | 94106/122310 [2:55:32<42:44, 11.00it/s][A
 77%|██████████████████████████▏       | 94114/122310 [2:55:33<42:19, 11.10it/s][A
 77%|██████████████████████████▏       | 94119/122310 [2:55:33<47:33,  9.88it/s][A
 77%|██████████████████████████▏       | 94124/122310 [2:55:34<52:07,  9.01it/s][A
 77%|██████████████████████████▏       | 94130/122310 [2:55:35<52:59,  8.86it/s][A
 77%|██████████████████████████▏       | 94146/122310 [2:55:35<36:10, 12.98it/s][A
 77%|██████████████████████████▏       | 94158/122310 [2:55:36<33:11, 14.14

step: 26280, loss: 78.51593514700674, epoch: 1



 77%|██████████████████████████▏       | 94272/122310 [2:55:43<20:08, 23.20it/s][A
 77%|██████████████████████████▏       | 94275/122310 [2:55:43<26:33, 17.60it/s][A
 77%|██████████████████████████▏       | 94283/122310 [2:55:44<29:50, 15.66it/s][A
 77%|██████████████████████████▏       | 94289/122310 [2:55:45<34:27, 13.55it/s][A
 77%|██████████████████████████▏       | 94298/122310 [2:55:45<35:10, 13.27it/s][A
 77%|██████████████████████████▏       | 94303/122310 [2:55:46<40:57, 11.40it/s][A
 77%|██████████████████████████▏       | 94313/122310 [2:55:47<38:11, 12.22it/s][A
 77%|██████████████████████████▏       | 94319/122310 [2:55:48<42:02, 11.10it/s][A
 77%|██████████████████████████▏       | 94327/122310 [2:55:49<54:25,  8.57it/s][A
 77%|██████████████████████████▏       | 94336/122310 [2:55:50<48:35,  9.60it/s][A
 77%|██████████████████████████▏       | 94346/122310 [2:55:50<43:22, 10.74it/s][A
 77%|██████████████████████████▏       | 94352/122310 [2:55:51<45:56, 10.14

step: 26300, loss: 81.14241220247779, epoch: 1



 77%|██████████████████████████▏       | 94406/122310 [2:55:57<44:20, 10.49it/s][A
 77%|██████████████████████████▏       | 94418/122310 [2:55:57<37:35, 12.36it/s][A
 77%|██████████████████████████▏       | 94423/122310 [2:55:58<42:58, 10.81it/s][A
 77%|██████████████████████████▎       | 94434/122310 [2:55:59<37:58, 12.24it/s][A
 77%|██████████████████████████▎       | 94442/122310 [2:56:00<38:54, 11.94it/s][A
 77%|██████████████████████████▎       | 94455/122310 [2:56:00<33:29, 13.86it/s][A
 77%|██████████████████████████▎       | 94461/122310 [2:56:01<37:57, 12.23it/s][A
 77%|██████████████████████████▎       | 94471/122310 [2:56:02<36:16, 12.79it/s][A
 77%|██████████████████████████▎       | 94477/122310 [2:56:02<40:20, 11.50it/s][A
 77%|████████████████████████▋       | 94481/122310 [2:56:04<1:02:19,  7.44it/s][A
 77%|████████████████████████▋       | 94484/122310 [2:56:05<1:09:24,  6.68it/s][A
 77%|██████████████████████████▎       | 94492/122310 [2:56:05<58:49,  7.88

step: 26320, loss: 90.40853421177057, epoch: 1



 77%|██████████████████████████▎       | 94542/122310 [2:56:11<47:42,  9.70it/s][A
 77%|██████████████████████████▎       | 94554/122310 [2:56:12<39:07, 11.83it/s][A
 77%|██████████████████████████▎       | 94561/122310 [2:56:12<41:00, 11.28it/s][A
 77%|██████████████████████████▎       | 94571/122310 [2:56:13<38:10, 12.11it/s][A
 77%|██████████████████████████▎       | 94580/122310 [2:56:14<37:41, 12.26it/s][A
 77%|██████████████████████████▎       | 94587/122310 [2:56:15<51:58,  8.89it/s][A
 77%|██████████████████████████▎       | 94601/122310 [2:56:16<39:56, 11.56it/s][A
 77%|██████████████████████████▎       | 94613/122310 [2:56:17<35:35, 12.97it/s][A
 77%|██████████████████████████▎       | 94620/122310 [2:56:17<38:04, 12.12it/s][A
 77%|██████████████████████████▎       | 94635/122310 [2:56:18<31:31, 14.63it/s][A
 77%|██████████████████████████▎       | 94641/122310 [2:56:19<35:51, 12.86it/s][A
 77%|██████████████████████████▎       | 94651/122310 [2:56:19<34:50, 13.23

step: 26340, loss: 77.41387982145811, epoch: 1



 77%|██████████████████████████▎       | 94720/122310 [2:56:25<45:23, 10.13it/s][A
 77%|██████████████████████████▎       | 94741/122310 [2:56:26<28:49, 15.94it/s][A
 77%|██████████████████████████▎       | 94752/122310 [2:56:27<29:04, 15.79it/s][A
 77%|██████████████████████████▎       | 94761/122310 [2:56:27<30:52, 14.87it/s][A
 77%|██████████████████████████▎       | 94772/122310 [2:56:28<30:27, 15.07it/s][A
 77%|██████████████████████████▎       | 94781/122310 [2:56:29<32:05, 14.30it/s][A
 77%|██████████████████████████▎       | 94787/122310 [2:56:29<36:27, 12.58it/s][A
 78%|██████████████████████████▎       | 94799/122310 [2:56:30<33:07, 13.84it/s][A
 78%|██████████████████████████▎       | 94804/122310 [2:56:31<38:49, 11.81it/s][A
 78%|██████████████████████████▎       | 94818/122310 [2:56:32<32:20, 14.17it/s][A
 78%|██████████████████████████▎       | 94830/122310 [2:56:32<30:37, 14.95it/s][A
 78%|██████████████████████████▎       | 94839/122310 [2:56:34<51:23,  8.91

step: 26360, loss: 82.22243250136086, epoch: 1



 78%|██████████████████████████▍       | 94885/122310 [2:56:39<48:24,  9.44it/s][A
 78%|██████████████████████████▍       | 94894/122310 [2:56:40<43:58, 10.39it/s][A
 78%|██████████████████████████▍       | 94899/122310 [2:56:41<48:35,  9.40it/s][A
 78%|██████████████████████████▍       | 94909/122310 [2:56:42<42:24, 10.77it/s][A
 78%|██████████████████████████▍       | 94914/122310 [2:56:42<47:12,  9.67it/s][A
 78%|██████████████████████████▍       | 94925/122310 [2:56:43<40:13, 11.35it/s][A
 78%|██████████████████████████▍       | 94931/122310 [2:56:44<43:23, 10.52it/s][A
 78%|██████████████████████████▍       | 94936/122310 [2:56:44<48:10,  9.47it/s][A
 78%|██████████████████████████▍       | 94947/122310 [2:56:45<40:30, 11.26it/s][A
 78%|██████████████████████████▍       | 94951/122310 [2:56:46<47:35,  9.58it/s][A
 78%|██████████████████████████▍       | 94958/122310 [2:56:47<47:13,  9.65it/s][A
 78%|██████████████████████████▍       | 94970/122310 [2:56:47<38:33, 11.82

step: 26380, loss: 100.6673271272322, epoch: 1



 78%|████████████████████████▊       | 95024/122310 [2:56:57<1:53:35,  4.00it/s][A
 78%|████████████████████████▊       | 95043/122310 [2:56:58<1:03:18,  7.18it/s][A
 78%|██████████████████████████▍       | 95051/122310 [2:56:59<57:46,  7.86it/s][A
 78%|██████████████████████████▍       | 95061/122310 [2:56:59<50:14,  9.04it/s][A
 78%|██████████████████████████▍       | 95073/122310 [2:57:00<42:16, 10.74it/s][A
 78%|██████████████████████████▍       | 95082/122310 [2:57:01<40:34, 11.19it/s][A
 78%|██████████████████████████▍       | 95091/122310 [2:57:01<39:17, 11.54it/s][A
 78%|██████████████████████████▍       | 95097/122310 [2:57:02<42:15, 10.73it/s][A
 78%|██████████████████████████▍       | 95102/122310 [2:57:03<46:46,  9.69it/s][A
 78%|██████████████████████████▍       | 95105/122310 [2:57:04<55:30,  8.17it/s][A
 78%|██████████████████████████▍       | 95115/122310 [2:57:04<45:57,  9.86it/s][A
 78%|██████████████████████████▍       | 95125/122310 [2:57:05<40:56, 11.07

step: 26400, loss: 76.6740568174842, epoch: 1
sim1 and sim2 are 0.47436437669967685, 0.23023165392955666
cosine of pred and queen: 0.20798130018770186
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: middle
Actual: kenya:africa::netherlands:europe, pred: middle
Actual: mumbai:asia::nairobi:africa, pred: europe
Actual: maharastr


 78%|██████████████████████████▍       | 95139/122310 [2:57:19<43:17, 10.46it/s][A

Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: sales
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: wilderness
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: europe
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: v


 78%|████████████████████████       | 95153/122310 [2:58:21<18:11:16,  2.41s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.1301775147928994



 78%|████████████████████████       | 95162/122310 [2:58:22<13:05:50,  1.74s/it][A
 78%|████████████████████████▉       | 95170/122310 [2:58:22<9:45:21,  1.29s/it][A
 78%|████████████████████████▉       | 95185/122310 [2:58:23<5:49:16,  1.29it/s][A
 78%|████████████████████████▉       | 95192/122310 [2:58:24<4:42:48,  1.60it/s][A
 78%|████████████████████████▉       | 95202/122310 [2:58:25<3:25:23,  2.20it/s][A
 78%|████████████████████████▉       | 95209/122310 [2:58:25<2:47:42,  2.69it/s][A
 78%|████████████████████████▉       | 95219/122310 [2:58:26<2:03:30,  3.66it/s][A
 78%|████████████████████████▉       | 95235/122310 [2:58:27<1:19:20,  5.69it/s][A
 78%|████████████████████████▉       | 95241/122310 [2:58:27<1:14:21,  6.07it/s][A
 78%|████████████████████████▉       | 95246/122310 [2:58:28<1:12:21,  6.23it/s][A
 78%|████████████████████████▉       | 95253/122310 [2:58:29<1:05:14,  6.91it/s][A
 78%|████████████████████████▉       | 95256/122310 [2:58:29<1:10:53,  6.36

step: 26420, loss: 84.13712531342935, epoch: 1



 78%|██████████████████████████▌       | 95336/122310 [2:58:36<33:28, 13.43it/s][A
 78%|██████████████████████████▌       | 95345/122310 [2:58:36<34:00, 13.22it/s][A
 78%|██████████████████████████▌       | 95353/122310 [2:58:37<35:24, 12.69it/s][A
 78%|██████████████████████████▌       | 95364/122310 [2:58:38<33:07, 13.56it/s][A
 78%|██████████████████████████▌       | 95374/122310 [2:58:39<32:55, 13.63it/s][A
 78%|██████████████████████████▌       | 95379/122310 [2:58:39<38:26, 11.68it/s][A
 78%|██████████████████████████▌       | 95386/122310 [2:58:40<39:57, 11.23it/s][A
 78%|██████████████████████████▌       | 95404/122310 [2:58:41<28:56, 15.49it/s][A
 78%|██████████████████████████▌       | 95410/122310 [2:58:41<33:24, 13.42it/s][A
 78%|██████████████████████████▌       | 95425/122310 [2:58:42<28:24, 15.77it/s][A
 78%|██████████████████████████▌       | 95434/122310 [2:58:43<30:09, 14.86it/s][A
 78%|██████████████████████████▌       | 95443/122310 [2:58:44<31:27, 14.23

step: 26440, loss: 87.75561952000263, epoch: 1



 78%|██████████████████████████▌       | 95527/122310 [2:58:50<36:16, 12.30it/s][A
 78%|██████████████████████████▌       | 95535/122310 [2:58:51<37:12, 11.99it/s][A
 78%|██████████████████████████▌       | 95543/122310 [2:58:51<37:53, 11.77it/s][A
 78%|██████████████████████████▌       | 95552/122310 [2:58:52<36:53, 12.09it/s][A
 78%|██████████████████████████▌       | 95560/122310 [2:58:53<37:34, 11.87it/s][A
 78%|██████████████████████████▌       | 95565/122310 [2:58:53<42:49, 10.41it/s][A
 78%|██████████████████████████▌       | 95571/122310 [2:58:54<45:17,  9.84it/s][A
 78%|██████████████████████████▌       | 95579/122310 [2:58:55<43:23, 10.27it/s][A
 78%|██████████████████████████▌       | 95585/122310 [2:58:56<45:52,  9.71it/s][A
 78%|██████████████████████████▌       | 95590/122310 [2:58:56<49:51,  8.93it/s][A
 78%|██████████████████████████▌       | 95608/122310 [2:58:57<32:06, 13.86it/s][A
 78%|██████████████████████████▌       | 95616/122310 [2:58:58<33:57, 13.10

step: 26460, loss: 72.96664146767839, epoch: 1



 78%|██████████████████████████▌       | 95673/122310 [2:59:04<34:30, 12.86it/s][A
 78%|██████████████████████████▌       | 95686/122310 [2:59:05<30:37, 14.49it/s][A
 78%|██████████████████████████▌       | 95697/122310 [2:59:05<29:58, 14.80it/s][A
 78%|██████████████████████████▌       | 95707/122310 [2:59:06<30:20, 14.62it/s][A
 78%|██████████████████████████▌       | 95711/122310 [2:59:07<37:07, 11.94it/s][A
 78%|█████████████████████████       | 95714/122310 [2:59:11<1:54:25,  3.87it/s][A
 78%|█████████████████████████       | 95718/122310 [2:59:12<1:47:07,  4.14it/s][A
 78%|█████████████████████████       | 95733/122310 [2:59:13<1:02:30,  7.09it/s][A
 78%|██████████████████████████▌       | 95748/122310 [2:59:13<44:50,  9.87it/s][A
 78%|██████████████████████████▌       | 95756/122310 [2:59:14<43:22, 10.20it/s][A
 78%|██████████████████████████▌       | 95764/122310 [2:59:15<42:23, 10.44it/s][A
 78%|██████████████████████████▌       | 95767/122310 [2:59:15<49:50,  8.88

step: 26480, loss: 70.2708333971877, epoch: 1



 78%|██████████████████████████▋       | 95793/122310 [2:59:18<51:31,  8.58it/s][A
 78%|██████████████████████████▋       | 95800/122310 [2:59:19<49:17,  8.96it/s][A
 78%|██████████████████████████▋       | 95807/122310 [2:59:20<48:06,  9.18it/s][A
 78%|██████████████████████████▋       | 95816/122310 [2:59:20<43:01, 10.26it/s][A
 78%|██████████████████████████▋       | 95824/122310 [2:59:21<41:48, 10.56it/s][A
 78%|██████████████████████████▋       | 95835/122310 [2:59:22<36:43, 12.02it/s][A
 78%|██████████████████████████▋       | 95845/122310 [2:59:22<34:52, 12.65it/s][A
 78%|██████████████████████████▋       | 95853/122310 [2:59:23<36:00, 12.25it/s][A
 78%|██████████████████████████▋       | 95860/122310 [2:59:24<38:10, 11.55it/s][A
 78%|██████████████████████████▋       | 95870/122310 [2:59:25<35:49, 12.30it/s][A
 78%|██████████████████████████▋       | 95879/122310 [2:59:25<35:27, 12.42it/s][A
 78%|██████████████████████████▋       | 95891/122310 [2:59:26<32:00, 13.76

step: 26500, loss: 75.10142915071272, epoch: 1



 78%|██████████████████████████▋       | 95972/122310 [2:59:32<34:49, 12.61it/s][A
 78%|██████████████████████████▋       | 95977/122310 [2:59:33<40:05, 10.95it/s][A
 78%|██████████████████████████▋       | 95981/122310 [2:59:34<46:50,  9.37it/s][A
 78%|██████████████████████████▋       | 95991/122310 [2:59:35<40:49, 10.75it/s][A
 78%|██████████████████████████▋       | 95994/122310 [2:59:35<49:44,  8.82it/s][A
 78%|██████████████████████████▋       | 96005/122310 [2:59:36<40:37, 10.79it/s][A
 79%|██████████████████████████▋       | 96015/122310 [2:59:37<37:10, 11.79it/s][A
 79%|██████████████████████████▋       | 96020/122310 [2:59:37<42:10, 10.39it/s][A
 79%|██████████████████████████▋       | 96028/122310 [2:59:38<41:14, 10.62it/s][A
 79%|██████████████████████████▋       | 96034/122310 [2:59:39<43:51,  9.99it/s][A
 79%|██████████████████████████▋       | 96043/122310 [2:59:40<40:39, 10.77it/s][A
 79%|██████████████████████████▋       | 96057/122310 [2:59:40<32:29, 13.46

step: 26520, loss: 122.47806192282923, epoch: 1



 79%|██████████████████████████▋       | 96129/122310 [2:59:47<38:44, 11.26it/s][A
 79%|██████████████████████████▋       | 96135/122310 [2:59:47<41:53, 10.41it/s][A
 79%|██████████████████████████▋       | 96147/122310 [2:59:48<35:17, 12.36it/s][A
 79%|██████████████████████████▋       | 96156/122310 [2:59:49<34:59, 12.46it/s][A
 79%|██████████████████████████▋       | 96165/122310 [2:59:49<34:49, 12.51it/s][A
 79%|██████████████████████████▋       | 96171/122310 [2:59:50<38:34, 11.29it/s][A
 79%|██████████████████████████▋       | 96183/122310 [2:59:51<33:37, 12.95it/s][A
 79%|██████████████████████████▋       | 96189/122310 [2:59:52<37:34, 11.59it/s][A
 79%|██████████████████████████▋       | 96196/122310 [2:59:52<39:17, 11.08it/s][A
 79%|██████████████████████████▋       | 96202/122310 [2:59:53<42:16, 10.29it/s][A
 79%|██████████████████████████▋       | 96212/122310 [2:59:54<38:02, 11.43it/s][A
 79%|██████████████████████████▋       | 96220/122310 [2:59:54<38:13, 11.38

step: 26540, loss: 67.30637660543594, epoch: 1



 79%|█████████████████████████▏      | 96304/122310 [3:00:04<1:16:56,  5.63it/s][A
 79%|█████████████████████████▏      | 96307/122310 [3:00:05<1:21:00,  5.35it/s][A
 79%|██████████████████████████▊       | 96319/122310 [3:00:05<58:39,  7.38it/s][A
 79%|██████████████████████████▊       | 96329/122310 [3:00:06<49:50,  8.69it/s][A
 79%|██████████████████████████▊       | 96343/122310 [3:00:07<39:16, 11.02it/s][A
 79%|██████████████████████████▊       | 96350/122310 [3:00:08<40:47, 10.61it/s][A
 79%|██████████████████████████▊       | 96356/122310 [3:00:08<43:46,  9.88it/s][A
 79%|██████████████████████████▊       | 96361/122310 [3:00:09<48:00,  9.01it/s][A
 79%|██████████████████████████▊       | 96370/122310 [3:00:10<43:59,  9.83it/s][A
 79%|██████████████████████████▊       | 96378/122310 [3:00:11<42:59, 10.05it/s][A
 79%|██████████████████████████▊       | 96386/122310 [3:00:11<42:15, 10.22it/s][A
 79%|██████████████████████████▊       | 96389/122310 [3:00:12<51:35,  8.37

step: 26560, loss: 71.2983159488748, epoch: 1



 79%|██████████████████████████▊       | 96426/122310 [3:00:16<42:38, 10.12it/s][A
 79%|██████████████████████████▊       | 96436/122310 [3:00:17<39:00, 11.05it/s][A
 79%|██████████████████████████▊       | 96447/122310 [3:00:17<35:35, 12.11it/s][A
 79%|██████████████████████████▊       | 96457/122310 [3:00:18<34:42, 12.42it/s][A
 79%|██████████████████████████▊       | 96463/122310 [3:00:19<39:37, 10.87it/s][A
 79%|██████████████████████████▊       | 96477/122310 [3:00:22<57:30,  7.49it/s][A
 79%|█████████████████████████▏      | 96481/122310 [3:00:22<1:02:48,  6.85it/s][A
 79%|██████████████████████████▊       | 96491/122310 [3:00:23<54:29,  7.90it/s][A
 79%|██████████████████████████▊       | 96502/122310 [3:00:24<47:14,  9.10it/s][A
 79%|██████████████████████████▊       | 96505/122310 [3:00:25<56:28,  7.61it/s][A
 79%|█████████████████████████▏      | 96509/122310 [3:00:26<1:03:43,  6.75it/s][A
 79%|█████████████████████████▎      | 96516/122310 [3:00:27<1:01:58,  6.94

step: 26580, loss: 75.01267725252383, epoch: 1



 79%|██████████████████████████▊       | 96597/122310 [3:00:36<43:41,  9.81it/s][A
 79%|██████████████████████████▊       | 96605/122310 [3:00:37<45:50,  9.35it/s][A
 79%|██████████████████████████▊       | 96613/122310 [3:00:38<48:31,  8.82it/s][A
 79%|██████████████████████████▊       | 96621/122310 [3:00:39<50:27,  8.49it/s][A
 79%|█████████████████████████▎      | 96624/122310 [3:00:40<1:01:59,  6.91it/s][A
 79%|██████████████████████████▊       | 96632/122310 [3:00:41<59:57,  7.14it/s][A
 79%|██████████████████████████▊       | 96647/122310 [3:00:42<45:11,  9.47it/s][A
 79%|██████████████████████████▊       | 96667/122310 [3:00:43<33:15, 12.85it/s][A
 79%|██████████████████████████▉       | 96682/122310 [3:00:44<31:47, 13.44it/s][A
 79%|██████████████████████████▉       | 96694/122310 [3:00:45<31:52, 13.39it/s][A
 79%|██████████████████████████▉       | 96702/122310 [3:00:46<35:29, 12.03it/s][A
 79%|██████████████████████████▉       | 96708/122310 [3:00:47<40:36, 10.51

step: 26600, loss: 86.10231338305927, epoch: 1



 79%|██████████████████████████▉       | 96807/122310 [3:00:55<32:22, 13.13it/s][A
 79%|██████████████████████████▉       | 96812/122310 [3:00:56<40:30, 10.49it/s][A
 79%|██████████████████████████▉       | 96817/122310 [3:00:57<47:17,  8.98it/s][A
 79%|██████████████████████████▉       | 96827/122310 [3:00:58<45:20,  9.37it/s][A
 79%|██████████████████████████▉       | 96839/122310 [3:00:59<40:33, 10.47it/s][A
 79%|██████████████████████████▉       | 96843/122310 [3:01:00<49:35,  8.56it/s][A
 79%|██████████████████████████▉       | 96853/122310 [3:01:01<47:05,  9.01it/s][A
 79%|█████████████████████████▎      | 96855/122310 [3:01:02<1:00:55,  6.96it/s][A
 79%|██████████████████████████▉       | 96864/122310 [3:01:03<55:42,  7.61it/s][A
 79%|██████████████████████████▉       | 96873/122310 [3:01:04<54:21,  7.80it/s][A
 79%|██████████████████████████▉       | 96881/122310 [3:01:05<52:42,  8.04it/s][A
 79%|██████████████████████████▉       | 96887/122310 [3:01:06<56:23,  7.51

step: 26620, loss: 84.83912309143678, epoch: 1



 79%|██████████████████████████▉       | 96965/122310 [3:01:14<54:07,  7.80it/s][A
 79%|██████████████████████████▉       | 96972/122310 [3:01:15<54:30,  7.75it/s][A
 79%|█████████████████████████▎      | 96973/122310 [3:01:16<1:12:01,  5.86it/s][A
 79%|█████████████████████████▎      | 96976/122310 [3:01:17<1:22:37,  5.11it/s][A
 79%|██████████████████████████▉       | 96989/122310 [3:01:18<54:00,  7.81it/s][A
 79%|██████████████████████████▉       | 96997/122310 [3:01:19<52:13,  8.08it/s][A
 79%|██████████████████████████▉       | 97004/122310 [3:01:20<53:39,  7.86it/s][A
 79%|██████████████████████████▉       | 97010/122310 [3:01:21<55:42,  7.57it/s][A
 79%|██████████████████████████▉       | 97016/122310 [3:01:22<57:34,  7.32it/s][A
 79%|██████████████████████████▉       | 97029/122310 [3:01:23<44:27,  9.48it/s][A
 79%|██████████████████████████▉       | 97038/122310 [3:01:23<43:39,  9.65it/s][A
 79%|██████████████████████████▉       | 97048/122310 [3:01:24<41:27, 10.16

step: 26640, loss: 79.380484246097, epoch: 1



 79%|██████████████████████████▉       | 97106/122310 [3:01:32<50:55,  8.25it/s][A
 79%|██████████████████████████▉       | 97111/122310 [3:01:33<55:51,  7.52it/s][A
 79%|██████████████████████████▉       | 97120/122310 [3:01:34<50:20,  8.34it/s][A
 79%|███████████████████████████       | 97129/122310 [3:01:35<47:34,  8.82it/s][A
 79%|███████████████████████████       | 97134/122310 [3:01:36<53:04,  7.91it/s][A
 79%|█████████████████████████▍      | 97142/122310 [3:01:38<1:07:05,  6.25it/s][A
 79%|█████████████████████████▍      | 97150/122310 [3:01:39<1:01:41,  6.80it/s][A
 79%|█████████████████████████▍      | 97156/122310 [3:01:39<1:02:12,  6.74it/s][A
 79%|█████████████████████████▍      | 97160/122310 [3:01:40<1:08:18,  6.14it/s][A
 79%|█████████████████████████▍      | 97163/122310 [3:01:41<1:17:44,  5.39it/s][A
 79%|███████████████████████████       | 97174/122310 [3:01:42<56:39,  7.39it/s][A
 79%|█████████████████████████▍      | 97181/122310 [3:01:45<1:27:53,  4.77

step: 26660, loss: 81.43550790624715, epoch: 1



 80%|███████████████████████████       | 97246/122310 [3:01:50<38:21, 10.89it/s][A
 80%|███████████████████████████       | 97251/122310 [3:01:51<44:12,  9.45it/s][A
 80%|███████████████████████████       | 97259/122310 [3:01:52<44:36,  9.36it/s][A
 80%|███████████████████████████       | 97266/122310 [3:01:53<46:39,  8.95it/s][A
 80%|███████████████████████████       | 97275/122310 [3:01:54<44:43,  9.33it/s][A
 80%|███████████████████████████       | 97284/122310 [3:01:54<43:27,  9.60it/s][A
 80%|███████████████████████████       | 97296/122310 [3:01:55<38:38, 10.79it/s][A
 80%|███████████████████████████       | 97311/122310 [3:01:56<32:53, 12.67it/s][A
 80%|███████████████████████████       | 97323/122310 [3:01:57<32:04, 12.98it/s][A
 80%|███████████████████████████       | 97330/122310 [3:01:58<36:13, 11.49it/s][A
 80%|███████████████████████████       | 97338/122310 [3:01:59<38:37, 10.78it/s][A
 80%|███████████████████████████       | 97342/122310 [3:02:00<46:43,  8.91

step: 26680, loss: 75.00561965318838, epoch: 1



 80%|███████████████████████████       | 97420/122310 [3:02:08<45:08,  9.19it/s][A
 80%|███████████████████████████       | 97430/122310 [3:02:09<42:11,  9.83it/s][A
 80%|███████████████████████████       | 97436/122310 [3:02:10<46:30,  8.91it/s][A
 80%|███████████████████████████       | 97443/122310 [3:02:10<47:49,  8.67it/s][A
 80%|███████████████████████████       | 97446/122310 [3:02:11<58:19,  7.11it/s][A
 80%|███████████████████████████       | 97461/122310 [3:02:12<41:05, 10.08it/s][A
 80%|███████████████████████████       | 97470/122310 [3:02:13<41:03, 10.08it/s][A
 80%|███████████████████████████       | 97475/122310 [3:02:14<47:13,  8.77it/s][A
 80%|███████████████████████████       | 97480/122310 [3:02:15<52:38,  7.86it/s][A
 80%|███████████████████████████       | 97491/122310 [3:02:16<44:48,  9.23it/s][A
 80%|███████████████████████████       | 97499/122310 [3:02:18<59:28,  6.95it/s][A
 80%|███████████████████████████       | 97511/122310 [3:02:18<48:04,  8.60

step: 26700, loss: 75.35984668271554, epoch: 1



 80%|███████████████████████████▏      | 97590/122310 [3:02:26<42:57,  9.59it/s][A
 80%|███████████████████████████▏      | 97597/122310 [3:02:26<45:30,  9.05it/s][A
 80%|███████████████████████████▏      | 97605/122310 [3:02:27<45:23,  9.07it/s][A
 80%|███████████████████████████▏      | 97615/122310 [3:02:28<42:03,  9.79it/s][A
 80%|███████████████████████████▏      | 97625/122310 [3:02:29<40:11, 10.24it/s][A
 80%|███████████████████████████▏      | 97629/122310 [3:02:30<48:24,  8.50it/s][A
 80%|███████████████████████████▏      | 97639/122310 [3:02:31<44:12,  9.30it/s][A
 80%|███████████████████████████▏      | 97649/122310 [3:02:32<41:27,  9.91it/s][A
 80%|███████████████████████████▏      | 97664/122310 [3:02:33<34:08, 12.03it/s][A
 80%|███████████████████████████▏      | 97671/122310 [3:02:33<37:57, 10.82it/s][A
 80%|███████████████████████████▏      | 97676/122310 [3:02:34<44:17,  9.27it/s][A
 80%|███████████████████████████▏      | 97686/122310 [3:02:35<41:30,  9.89

step: 26720, loss: 71.0765645912079, epoch: 1



 80%|███████████████████████████▏      | 97768/122310 [3:02:44<46:25,  8.81it/s][A
 80%|███████████████████████████▏      | 97778/122310 [3:02:45<44:30,  9.19it/s][A
 80%|███████████████████████████▏      | 97786/122310 [3:02:46<44:35,  9.17it/s][A
 80%|███████████████████████████▏      | 97791/122310 [3:02:46<50:02,  8.17it/s][A
 80%|███████████████████████████▏      | 97799/122310 [3:02:47<48:20,  8.45it/s][A
 80%|███████████████████████████▏      | 97805/122310 [3:02:48<51:47,  7.89it/s][A
 80%|███████████████████████████▏      | 97816/122310 [3:02:49<44:25,  9.19it/s][A
 80%|███████████████████████████▏      | 97822/122310 [3:02:50<48:24,  8.43it/s][A
 80%|███████████████████████████▏      | 97830/122310 [3:02:51<47:20,  8.62it/s][A
 80%|███████████████████████████▏      | 97837/122310 [3:02:52<48:25,  8.42it/s][A
 80%|███████████████████████████▏      | 97844/122310 [3:02:53<49:42,  8.20it/s][A
 80%|███████████████████████████▏      | 97852/122310 [3:02:54<48:09,  8.47

step: 26740, loss: 131.32366302925428, epoch: 1



 80%|███████████████████████████▏      | 97923/122310 [3:03:01<48:40,  8.35it/s][A
 80%|███████████████████████████▏      | 97934/122310 [3:03:02<42:20,  9.60it/s][A
 80%|███████████████████████████▏      | 97946/122310 [3:03:03<37:32, 10.82it/s][A
 80%|███████████████████████████▏      | 97951/122310 [3:03:04<44:02,  9.22it/s][A
 80%|███████████████████████████▏      | 97958/122310 [3:03:05<45:50,  8.85it/s][A
 80%|███████████████████████████▏      | 97965/122310 [3:03:06<47:35,  8.53it/s][A
 80%|███████████████████████████▏      | 97972/122310 [3:03:07<48:33,  8.35it/s][A
 80%|███████████████████████████▏      | 97979/122310 [3:03:08<49:08,  8.25it/s][A
 80%|███████████████████████████▏      | 97989/122310 [3:03:09<44:10,  9.17it/s][A
 80%|███████████████████████████▏      | 97999/122310 [3:03:09<41:14,  9.83it/s][A
 80%|███████████████████████████▏      | 98008/122310 [3:03:10<40:58,  9.88it/s][A
 80%|███████████████████████████▏      | 98015/122310 [3:03:11<43:25,  9.32

step: 26760, loss: 94.21167878973768, epoch: 1



 80%|███████████████████████████▎      | 98082/122310 [3:03:19<44:27,  9.08it/s][A
 80%|███████████████████████████▎      | 98087/122310 [3:03:20<50:07,  8.06it/s][A
 80%|███████████████████████████▎      | 98103/122310 [3:03:21<36:25, 11.07it/s][A
 80%|███████████████████████████▎      | 98112/122310 [3:03:22<37:12, 10.84it/s][A
 80%|███████████████████████████▎      | 98128/122310 [3:03:23<30:57, 13.02it/s][A
 80%|███████████████████████████▎      | 98136/122310 [3:03:24<34:00, 11.84it/s][A
 80%|███████████████████████████▎      | 98149/122310 [3:03:24<31:38, 12.73it/s][A
 80%|███████████████████████████▎      | 98157/122310 [3:03:25<35:33, 11.32it/s][A
 80%|███████████████████████████▎      | 98159/122310 [3:03:26<48:41,  8.27it/s][A
 80%|███████████████████████████▎      | 98170/122310 [3:03:27<43:35,  9.23it/s][A
 80%|███████████████████████████▎      | 98180/122310 [3:03:28<42:55,  9.37it/s][A
 80%|███████████████████████████▎      | 98182/122310 [3:03:29<56:15,  7.15

step: 26780, loss: 79.8458234467456, epoch: 1



 80%|█████████████████████████▋      | 98252/122310 [3:03:38<1:02:07,  6.45it/s][A
 80%|███████████████████████████▎      | 98260/122310 [3:03:39<56:36,  7.08it/s][A
 80%|███████████████████████████▎      | 98268/122310 [3:03:40<53:10,  7.53it/s][A
 80%|███████████████████████████▎      | 98282/122310 [3:03:41<42:05,  9.51it/s][A
 80%|███████████████████████████▎      | 98293/122310 [3:03:42<39:54, 10.03it/s][A
 80%|███████████████████████████▎      | 98304/122310 [3:03:43<38:13, 10.46it/s][A
 80%|███████████████████████████▎      | 98312/122310 [3:03:44<41:06,  9.73it/s][A
 80%|███████████████████████████▎      | 98321/122310 [3:03:45<42:41,  9.37it/s][A
 80%|███████████████████████████▎      | 98330/122310 [3:03:46<42:52,  9.32it/s][A
 80%|███████████████████████████▎      | 98334/122310 [3:03:47<51:14,  7.80it/s][A
 80%|███████████████████████████▎      | 98340/122310 [3:03:48<53:47,  7.43it/s][A
 80%|███████████████████████████▎      | 98354/122310 [3:03:49<41:43,  9.57

step: 26800, loss: 87.46100183891278, epoch: 1
sim1 and sim2 are 0.488611896959494, 0.22889702562375827
cosine of pred and queen: 0.18826100130467877
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: pacific
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: middle
Actual: kenya:africa::netherlands:europe, pred: middle
Actual: mumbai:asia::nairobi:africa, pred: europe
Actual: maharas

Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: april
Actual: grandfather:grandmother::father:mother, pred: sister
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: main
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: main
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 80%|████████████████████████▉      | 98426/122310 [3:05:33<20:06:20,  3.03s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 80%|█████████████████████████▊      | 98450/122310 [3:05:34<9:57:26,  1.50s/it][A
 81%|█████████████████████████▊      | 98466/122310 [3:05:35<6:49:01,  1.03s/it][A
 81%|█████████████████████████▊      | 98470/122310 [3:05:36<6:15:34,  1.06it/s][A
 81%|█████████████████████████▊      | 98483/122310 [3:05:37<4:21:27,  1.52it/s][A
 81%|█████████████████████████▊      | 98487/122310 [3:05:38<3:59:32,  1.66it/s][A
 81%|█████████████████████████▊      | 98497/122310 [3:05:38<2:56:23,  2.25it/s][A
 81%|█████████████████████████▊      | 98500/122310 [3:05:39<2:49:20,  2.34it/s][A
 81%|█████████████████████████▊      | 98511/122310 [3:05:40<1:56:55,  3.39it/s][A
 81%|█████████████████████████▊      | 98519/122310 [3:05:41<1:36:06,  4.13it/s][A
 81%|█████████████████████████▊      | 98527/122310 [3:05:42<1:21:01,  4.89it/s][A
 81%|█████████████████████████▊      | 98533/122310 [3:05:43<1:15:46,  5.23it/s][A
 81%|███████████████████████████▍      | 98546/122310 [3:05:44<55:17,  7.16

step: 26820, loss: 92.39693014414101, epoch: 1



 81%|███████████████████████████▍      | 98628/122310 [3:05:52<37:30, 10.52it/s][A
 81%|███████████████████████████▍      | 98634/122310 [3:05:53<41:56,  9.41it/s][A
 81%|███████████████████████████▍      | 98643/122310 [3:05:53<41:05,  9.60it/s][A
 81%|███████████████████████████▍      | 98645/122310 [3:05:54<52:47,  7.47it/s][A
 81%|█████████████████████████▊      | 98649/122310 [3:05:55<1:00:00,  6.57it/s][A
 81%|███████████████████████████▍      | 98658/122310 [3:05:56<51:55,  7.59it/s][A
 81%|█████████████████████████▊      | 98663/122310 [3:05:59<1:29:50,  4.39it/s][A
 81%|█████████████████████████▊      | 98670/122310 [3:06:00<1:17:06,  5.11it/s][A
 81%|█████████████████████████▊      | 98675/122310 [3:06:01<1:15:11,  5.24it/s][A
 81%|█████████████████████████▊      | 98682/122310 [3:06:01<1:06:59,  5.88it/s][A
 81%|█████████████████████████▊      | 98686/122310 [3:06:02<1:10:43,  5.57it/s][A
 81%|███████████████████████████▍      | 98700/122310 [3:06:03<47:09,  8.35

step: 26840, loss: 77.70568747752611, epoch: 1



 81%|███████████████████████████▍      | 98765/122310 [3:06:09<33:17, 11.79it/s][A
 81%|███████████████████████████▍      | 98775/122310 [3:06:10<33:40, 11.65it/s][A
 81%|███████████████████████████▍      | 98780/122310 [3:06:11<39:30,  9.93it/s][A
 81%|███████████████████████████▍      | 98792/122310 [3:06:12<35:42, 10.98it/s][A
 81%|███████████████████████████▍      | 98795/122310 [3:06:13<44:45,  8.76it/s][A
 81%|███████████████████████████▍      | 98806/122310 [3:06:14<39:58,  9.80it/s][A
 81%|███████████████████████████▍      | 98816/122310 [3:06:15<38:22, 10.20it/s][A
 81%|███████████████████████████▍      | 98822/122310 [3:06:16<42:34,  9.19it/s][A
 81%|███████████████████████████▍      | 98832/122310 [3:06:17<39:45,  9.84it/s][A
 81%|███████████████████████████▍      | 98840/122310 [3:06:17<40:44,  9.60it/s][A
 81%|███████████████████████████▍      | 98845/122310 [3:06:18<46:22,  8.43it/s][A
 81%|███████████████████████████▍      | 98851/122310 [3:06:19<49:12,  7.95

step: 26860, loss: 71.13362407788456, epoch: 1



 81%|███████████████████████████▌      | 98932/122310 [3:06:27<41:43,  9.34it/s][A
 81%|███████████████████████████▌      | 98940/122310 [3:06:28<42:14,  9.22it/s][A
 81%|███████████████████████████▌      | 98950/122310 [3:06:29<40:05,  9.71it/s][A
 81%|███████████████████████████▌      | 98958/122310 [3:06:30<41:52,  9.29it/s][A
 81%|███████████████████████████▌      | 98971/122310 [3:06:31<36:29, 10.66it/s][A
 81%|███████████████████████████▌      | 98980/122310 [3:06:32<38:11, 10.18it/s][A
 81%|███████████████████████████▌      | 98989/122310 [3:06:33<39:29,  9.84it/s][A
 81%|███████████████████████████▌      | 99002/122310 [3:06:34<35:31, 10.94it/s][A
 81%|███████████████████████████▌      | 99011/122310 [3:06:35<36:03, 10.77it/s][A
 81%|███████████████████████████▌      | 99022/122310 [3:06:36<35:10, 11.03it/s][A
 81%|███████████████████████████▌      | 99031/122310 [3:06:37<45:55,  8.45it/s][A
 81%|███████████████████████████▌      | 99036/122310 [3:06:38<49:40,  7.81

step: 26880, loss: 85.92045131984555, epoch: 1



 81%|███████████████████████████▌      | 99115/122310 [3:06:45<39:26,  9.80it/s][A
 81%|███████████████████████████▌      | 99129/122310 [3:06:46<33:04, 11.68it/s][A
 81%|███████████████████████████▌      | 99138/122310 [3:06:48<44:11,  8.74it/s][A
 81%|███████████████████████████▌      | 99142/122310 [3:06:49<50:11,  7.69it/s][A
 81%|█████████████████████████▉      | 99144/122310 [3:06:50<1:01:02,  6.32it/s][A
 81%|███████████████████████████▌      | 99153/122310 [3:06:50<52:31,  7.35it/s][A
 81%|███████████████████████████▌      | 99159/122310 [3:06:51<53:39,  7.19it/s][A
 81%|█████████████████████████▉      | 99162/122310 [3:06:52<1:03:27,  6.08it/s][A
 81%|███████████████████████████▌      | 99171/122310 [3:06:53<53:19,  7.23it/s][A
 81%|███████████████████████████▌      | 99177/122310 [3:06:54<53:52,  7.16it/s][A
 81%|███████████████████████████▌      | 99185/122310 [3:06:55<49:42,  7.75it/s][A
 81%|███████████████████████████▌      | 99195/122310 [3:06:56<43:21,  8.89

step: 26900, loss: 76.3051077281628, epoch: 1



 81%|███████████████████████████▌      | 99286/122310 [3:07:03<28:31, 13.46it/s][A
 81%|███████████████████████████▌      | 99294/122310 [3:07:04<31:29, 12.18it/s][A
 81%|███████████████████████████▌      | 99307/122310 [3:07:04<29:20, 13.06it/s][A
 81%|███████████████████████████▌      | 99315/122310 [3:07:05<32:07, 11.93it/s][A
 81%|███████████████████████████▌      | 99337/122310 [3:07:06<23:56, 15.99it/s][A
 81%|███████████████████████████▌      | 99347/122310 [3:07:07<26:08, 14.64it/s][A
 81%|███████████████████████████▌      | 99358/122310 [3:07:08<27:07, 14.11it/s][A
 81%|███████████████████████████▌      | 99370/122310 [3:07:09<27:13, 14.05it/s][A
 81%|███████████████████████████▋      | 99381/122310 [3:07:10<28:03, 13.62it/s][A
 81%|███████████████████████████▋      | 99392/122310 [3:07:10<28:42, 13.31it/s][A
 81%|███████████████████████████▋      | 99395/122310 [3:07:11<36:59, 10.32it/s][A
 81%|███████████████████████████▋      | 99400/122310 [3:07:12<42:32,  8.98

step: 26920, loss: 102.67142282298119, epoch: 1



 81%|███████████████████████████▋      | 99473/122310 [3:07:20<39:07,  9.73it/s][A
 81%|███████████████████████████▋      | 99476/122310 [3:07:21<49:14,  7.73it/s][A
 81%|███████████████████████████▋      | 99482/122310 [3:07:22<51:02,  7.45it/s][A
 81%|███████████████████████████▋      | 99490/122310 [3:07:23<48:52,  7.78it/s][A
 81%|███████████████████████████▋      | 99495/122310 [3:07:24<54:01,  7.04it/s][A
 81%|███████████████████████████▋      | 99502/122310 [3:07:25<54:36,  6.96it/s][A
 81%|███████████████████████████▋      | 99514/122310 [3:07:26<46:42,  8.14it/s][A
 81%|███████████████████████████▋      | 99526/122310 [3:07:27<43:34,  8.72it/s][A
 81%|███████████████████████████▋      | 99541/122310 [3:07:29<39:27,  9.62it/s][A
 81%|███████████████████████████▋      | 99556/122310 [3:07:30<35:46, 10.60it/s][A
 81%|███████████████████████████▋      | 99571/122310 [3:07:31<33:03, 11.46it/s][A
 81%|███████████████████████████▋      | 99578/122310 [3:07:32<36:22, 10.42

step: 26940, loss: 80.56849006184046, epoch: 1



 81%|███████████████████████████▋      | 99649/122310 [3:07:40<42:01,  8.99it/s][A
 81%|███████████████████████████▋      | 99659/122310 [3:07:41<39:46,  9.49it/s][A
 81%|███████████████████████████▋      | 99670/122310 [3:07:42<37:29, 10.06it/s][A
 81%|███████████████████████████▋      | 99677/122310 [3:07:43<40:41,  9.27it/s][A
 81%|███████████████████████████▋      | 99681/122310 [3:07:44<49:28,  7.62it/s][A
 82%|███████████████████████████▋      | 99686/122310 [3:07:45<54:18,  6.94it/s][A
 82%|██████████████████████████      | 99691/122310 [3:07:46<1:00:50,  6.20it/s][A
 82%|███████████████████████████▋      | 99705/122310 [3:07:47<43:15,  8.71it/s][A
 82%|███████████████████████████▋      | 99716/122310 [3:07:48<39:25,  9.55it/s][A
 82%|███████████████████████████▋      | 99736/122310 [3:07:50<38:20,  9.81it/s][A
 82%|███████████████████████████▋      | 99737/122310 [3:07:51<50:28,  7.45it/s][A
 82%|███████████████████████████▋      | 99741/122310 [3:07:52<57:45,  6.51

step: 26960, loss: 93.21641772907275, epoch: 1



 82%|███████████████████████████▊      | 99827/122310 [3:07:59<29:10, 12.84it/s][A
 82%|███████████████████████████▊      | 99835/122310 [3:08:00<32:20, 11.58it/s][A
 82%|███████████████████████████▊      | 99845/122310 [3:08:01<33:01, 11.34it/s][A
 82%|███████████████████████████▊      | 99848/122310 [3:08:02<43:39,  8.57it/s][A
 82%|███████████████████████████▊      | 99859/122310 [3:08:03<39:47,  9.41it/s][A
 82%|███████████████████████████▊      | 99872/122310 [3:08:04<36:07, 10.35it/s][A
 82%|███████████████████████████▊      | 99877/122310 [3:08:05<42:18,  8.84it/s][A
 82%|███████████████████████████▊      | 99887/122310 [3:08:06<40:27,  9.24it/s][A
 82%|███████████████████████████▊      | 99891/122310 [3:08:07<48:52,  7.65it/s][A
 82%|██████████████████████████▏     | 99900/122310 [3:08:09<1:01:10,  6.11it/s][A
 82%|███████████████████████████▊      | 99915/122310 [3:08:10<44:24,  8.40it/s][A
 82%|███████████████████████████▊      | 99930/122310 [3:08:11<35:56, 10.38

step: 26980, loss: 82.4387550752722, epoch: 1



 82%|███████████████████████████▊      | 99996/122310 [3:08:19<51:34,  7.21it/s][A
 82%|██████████████████████████▉      | 100006/122310 [3:08:20<46:38,  7.97it/s][A
 82%|██████████████████████████▉      | 100018/122310 [3:08:21<39:53,  9.31it/s][A
 82%|██████████████████████████▉      | 100025/122310 [3:08:22<43:32,  8.53it/s][A
 82%|██████████████████████████▉      | 100031/122310 [3:08:23<46:44,  7.94it/s][A
 82%|██████████████████████████▉      | 100042/122310 [3:08:24<41:42,  8.90it/s][A
 82%|██████████████████████████▉      | 100051/122310 [3:08:25<41:09,  9.01it/s][A
 82%|██████████████████████████▉      | 100062/122310 [3:08:26<37:36,  9.86it/s][A
 82%|██████████████████████████▉      | 100065/122310 [3:08:27<47:01,  7.88it/s][A
 82%|███████████████████████████      | 100080/122310 [3:08:28<36:04, 10.27it/s][A
 82%|███████████████████████████      | 100087/122310 [3:08:28<39:05,  9.48it/s][A
 82%|███████████████████████████      | 100094/122310 [3:08:29<41:46,  8.86

step: 27000, loss: 81.44257986088024, epoch: 1
saving weights



 82%|███████████████████████████      | 100146/122310 [3:08:38<58:02,  6.36it/s][A
 82%|███████████████████████████      | 100156/122310 [3:08:39<51:20,  7.19it/s][A
 82%|███████████████████████████      | 100163/122310 [3:08:40<51:56,  7.11it/s][A
 82%|███████████████████████████      | 100181/122310 [3:08:41<36:17, 10.16it/s][A
 82%|███████████████████████████      | 100195/122310 [3:08:42<32:35, 11.31it/s][A
 82%|███████████████████████████      | 100203/122310 [3:08:43<36:27, 10.10it/s][A
 82%|███████████████████████████      | 100206/122310 [3:08:44<46:53,  7.86it/s][A
 82%|███████████████████████████      | 100215/122310 [3:08:45<44:53,  8.20it/s][A
 82%|███████████████████████████      | 100229/122310 [3:08:46<37:47,  9.74it/s][A
 82%|███████████████████████████      | 100234/122310 [3:08:47<44:55,  8.19it/s][A
 82%|███████████████████████████      | 100252/122310 [3:08:48<33:46, 10.89it/s][A
 82%|███████████████████████████      | 100257/122310 [3:08:49<39:54,  9.21

step: 27020, loss: 86.75273244931661, epoch: 1



 82%|███████████████████████████      | 100331/122310 [3:08:59<38:08,  9.60it/s][A
 82%|███████████████████████████      | 100343/122310 [3:09:00<34:47, 10.53it/s][A
 82%|███████████████████████████      | 100350/122310 [3:09:01<38:25,  9.53it/s][A
 82%|███████████████████████████      | 100360/122310 [3:09:02<37:58,  9.63it/s][A
 82%|███████████████████████████      | 100364/122310 [3:09:03<45:14,  8.09it/s][A
 82%|███████████████████████████      | 100373/122310 [3:09:04<42:59,  8.51it/s][A
 82%|███████████████████████████      | 100390/122310 [3:09:05<32:49, 11.13it/s][A
 82%|███████████████████████████      | 100399/122310 [3:09:06<34:11, 10.68it/s][A
 82%|███████████████████████████      | 100409/122310 [3:09:07<33:51, 10.78it/s][A
 82%|███████████████████████████      | 100415/122310 [3:09:08<38:15,  9.54it/s][A
 82%|███████████████████████████      | 100424/122310 [3:09:09<37:45,  9.66it/s][A
 82%|███████████████████████████      | 100432/122310 [3:09:09<38:36,  9.44

step: 27040, loss: 97.924100534196, epoch: 1



 82%|███████████████████████████      | 100512/122310 [3:09:18<34:49, 10.43it/s][A
 82%|███████████████████████████      | 100523/122310 [3:09:19<32:46, 11.08it/s][A
 82%|███████████████████████████      | 100533/122310 [3:09:20<32:30, 11.16it/s][A
 82%|███████████████████████████▏     | 100548/122310 [3:09:20<28:07, 12.90it/s][A
 82%|███████████████████████████▏     | 100550/122310 [3:09:21<36:58,  9.81it/s][A
 82%|███████████████████████████▏     | 100558/122310 [3:09:22<37:45,  9.60it/s][A
 82%|███████████████████████████▏     | 100567/122310 [3:09:23<36:55,  9.81it/s][A
 82%|███████████████████████████▏     | 100572/122310 [3:09:24<42:12,  8.58it/s][A
 82%|███████████████████████████▏     | 100582/122310 [3:09:25<38:15,  9.47it/s][A
 82%|███████████████████████████▏     | 100588/122310 [3:09:26<41:35,  8.71it/s][A
 82%|███████████████████████████▏     | 100594/122310 [3:09:27<44:20,  8.16it/s][A
 82%|███████████████████████████▏     | 100606/122310 [3:09:27<36:48,  9.83

step: 27060, loss: 72.48404740655478, epoch: 1



 82%|███████████████████████████▏     | 100709/122310 [3:09:35<28:40, 12.56it/s][A
 82%|███████████████████████████▏     | 100721/122310 [3:09:36<27:49, 12.93it/s][A
 82%|███████████████████████████▏     | 100725/122310 [3:09:37<34:25, 10.45it/s][A
 82%|███████████████████████████▏     | 100741/122310 [3:09:38<28:01, 12.83it/s][A
 82%|███████████████████████████▏     | 100754/122310 [3:09:39<26:46, 13.42it/s][A
 82%|███████████████████████████▏     | 100756/122310 [3:09:40<35:36, 10.09it/s][A
 82%|███████████████████████████▏     | 100767/122310 [3:09:40<33:12, 10.81it/s][A
 82%|███████████████████████████▏     | 100774/122310 [3:09:41<35:58,  9.98it/s][A
 82%|███████████████████████████▏     | 100781/122310 [3:09:42<38:38,  9.28it/s][A
 82%|███████████████████████████▏     | 100789/122310 [3:09:43<39:15,  9.14it/s][A
 82%|███████████████████████████▏     | 100798/122310 [3:09:44<37:58,  9.44it/s][A
 82%|███████████████████████████▏     | 100804/122310 [3:09:45<41:17,  8.68

step: 27080, loss: 105.57281243820496, epoch: 1



 82%|███████████████████████████▏     | 100874/122310 [3:09:53<41:32,  8.60it/s][A
 82%|███████████████████████████▏     | 100884/122310 [3:09:54<38:13,  9.34it/s][A
 82%|███████████████████████████▏     | 100890/122310 [3:09:55<41:47,  8.54it/s][A
 82%|███████████████████████████▏     | 100895/122310 [3:09:56<46:10,  7.73it/s][A
 82%|███████████████████████████▏     | 100901/122310 [3:09:57<48:01,  7.43it/s][A
 83%|███████████████████████████▏     | 100928/122310 [3:09:58<24:55, 14.30it/s][A
 83%|███████████████████████████▏     | 100938/122310 [3:09:59<26:31, 13.43it/s][A
 83%|███████████████████████████▏     | 100948/122310 [3:10:00<27:40, 12.86it/s][A
 83%|███████████████████████████▏     | 100954/122310 [3:10:00<32:14, 11.04it/s][A
 83%|███████████████████████████▏     | 100965/122310 [3:10:01<32:05, 11.09it/s][A
 83%|███████████████████████████▏     | 100979/122310 [3:10:02<28:42, 12.39it/s][A
 83%|███████████████████████████▏     | 100985/122310 [3:10:03<32:59, 10.77

step: 27100, loss: 81.17537696559873, epoch: 1



 83%|███████████████████████████▎     | 101058/122310 [3:10:12<50:53,  6.96it/s][A
 83%|███████████████████████████▎     | 101065/122310 [3:10:13<49:46,  7.11it/s][A
 83%|███████████████████████████▎     | 101072/122310 [3:10:14<49:50,  7.10it/s][A
 83%|███████████████████████████▎     | 101075/122310 [3:10:15<58:26,  6.06it/s][A
 83%|███████████████████████████▎     | 101080/122310 [3:10:15<59:08,  5.98it/s][A
 83%|███████████████████████████▎     | 101086/122310 [3:10:16<56:35,  6.25it/s][A
 83%|███████████████████████████▎     | 101097/122310 [3:10:17<43:48,  8.07it/s][A
 83%|███████████████████████████▎     | 101106/122310 [3:10:18<40:28,  8.73it/s][A
 83%|███████████████████████████▎     | 101116/122310 [3:10:19<37:55,  9.31it/s][A
 83%|███████████████████████████▎     | 101123/122310 [3:10:20<40:57,  8.62it/s][A
 83%|███████████████████████████▎     | 101131/122310 [3:10:21<41:37,  8.48it/s][A
 83%|███████████████████████████▎     | 101137/122310 [3:10:22<46:55,  7.52

step: 27120, loss: 69.29466220464833, epoch: 1



 83%|███████████████████████████▎     | 101228/122310 [3:10:30<30:56, 11.36it/s][A
 83%|███████████████████████████▎     | 101234/122310 [3:10:31<36:13,  9.70it/s][A
 83%|███████████████████████████▎     | 101244/122310 [3:10:32<34:44, 10.11it/s][A
 83%|███████████████████████████▎     | 101256/122310 [3:10:33<32:04, 10.94it/s][A
 83%|███████████████████████████▎     | 101261/122310 [3:10:34<39:31,  8.88it/s][A
 83%|███████████████████████████▎     | 101264/122310 [3:10:35<49:45,  7.05it/s][A
 83%|███████████████████████████▎     | 101280/122310 [3:10:36<36:07,  9.70it/s][A
 83%|███████████████████████████▎     | 101298/122310 [3:10:37<28:46, 12.17it/s][A
 83%|███████████████████████████▎     | 101310/122310 [3:10:38<29:20, 11.93it/s][A
 83%|███████████████████████████▎     | 101320/122310 [3:10:39<30:08, 11.61it/s][A
 83%|███████████████████████████▎     | 101329/122310 [3:10:40<31:44, 11.02it/s][A
 83%|███████████████████████████▎     | 101336/122310 [3:10:41<34:33, 10.12

step: 27140, loss: 74.31757966416764, epoch: 1



 83%|███████████████████████████▎     | 101416/122310 [3:10:50<40:53,  8.52it/s][A
 83%|███████████████████████████▎     | 101426/122310 [3:10:51<38:40,  9.00it/s][A
 83%|███████████████████████████▎     | 101440/122310 [3:10:52<32:35, 10.67it/s][A
 83%|███████████████████████████▎     | 101447/122310 [3:10:53<35:40,  9.75it/s][A
 83%|███████████████████████████▍     | 101463/122310 [3:10:54<29:04, 11.95it/s][A
 83%|███████████████████████████▍     | 101468/122310 [3:10:55<34:13, 10.15it/s][A
 83%|███████████████████████████▍     | 101476/122310 [3:10:56<35:20,  9.82it/s][A
 83%|███████████████████████████▍     | 101481/122310 [3:10:56<40:29,  8.57it/s][A
 83%|███████████████████████████▍     | 101489/122310 [3:10:57<39:48,  8.72it/s][A
 83%|███████████████████████████▍     | 101499/122310 [3:10:58<36:36,  9.47it/s][A
 83%|███████████████████████████▍     | 101512/122310 [3:10:59<31:24, 11.04it/s][A
 83%|███████████████████████████▍     | 101518/122310 [3:11:00<35:26,  9.78

step: 27160, loss: 109.14736551506263, epoch: 1



 83%|███████████████████████████▍     | 101581/122310 [3:11:08<44:15,  7.81it/s][A
 83%|███████████████████████████▍     | 101595/122310 [3:11:09<33:53, 10.19it/s][A
 83%|███████████████████████████▍     | 101599/122310 [3:11:11<53:26,  6.46it/s][A
 83%|███████████████████████████▍     | 101607/122310 [3:11:12<48:27,  7.12it/s][A
 83%|███████████████████████████▍     | 101614/122310 [3:11:12<47:16,  7.30it/s][A
 83%|███████████████████████████▍     | 101622/122310 [3:11:13<44:37,  7.73it/s][A
 83%|███████████████████████████▍     | 101629/122310 [3:11:14<44:15,  7.79it/s][A
 83%|███████████████████████████▍     | 101637/122310 [3:11:15<42:18,  8.14it/s][A
 83%|███████████████████████████▍     | 101646/122310 [3:11:16<39:25,  8.73it/s][A
 83%|███████████████████████████▍     | 101661/122310 [3:11:17<30:55, 11.13it/s][A
 83%|███████████████████████████▍     | 101666/122310 [3:11:18<36:21,  9.46it/s][A
 83%|███████████████████████████▍     | 101668/122310 [3:11:19<46:42,  7.37

step: 27180, loss: 75.5317125004641, epoch: 1



 83%|███████████████████████████▍     | 101761/122310 [3:11:26<26:43, 12.82it/s][A
 83%|███████████████████████████▍     | 101768/122310 [3:11:27<30:08, 11.36it/s][A
 83%|███████████████████████████▍     | 101779/122310 [3:11:28<29:15, 11.70it/s][A
 83%|███████████████████████████▍     | 101791/122310 [3:11:28<27:52, 12.27it/s][A
 83%|███████████████████████████▍     | 101813/122310 [3:11:29<21:17, 16.05it/s][A
 83%|███████████████████████████▍     | 101821/122310 [3:11:30<24:30, 13.94it/s][A
 83%|███████████████████████████▍     | 101829/122310 [3:11:31<27:20, 12.49it/s][A
 83%|███████████████████████████▍     | 101835/122310 [3:11:32<31:36, 10.79it/s][A
 83%|███████████████████████████▍     | 101840/122310 [3:11:33<37:03,  9.21it/s][A
 83%|███████████████████████████▍     | 101853/122310 [3:11:34<31:22, 10.87it/s][A
 83%|███████████████████████████▍     | 101857/122310 [3:11:35<38:01,  8.97it/s][A
 83%|███████████████████████████▍     | 101867/122310 [3:11:35<35:15,  9.66

step: 27200, loss: 106.19559117769688, epoch: 1
sim1 and sim2 are 0.46515597941661563, 0.20160701856387775
cosine of pred and queen: 0.17013706183756
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: pacific
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: middle
Actual: kenya:africa::netherlands:europe, pred: middle
Actual: mumbai:asia::nairobi:africa, pred: europe
Actual: maharastr

Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: april
Actual: grandfather:grandmother::father:mother, pred: sister
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: pradesh
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 83%|████████████████████████▉     | 101912/122310 [3:13:12<34:49:17,  6.15s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 83%|████████████████████████▉     | 101914/122310 [3:13:14<30:06:26,  5.31s/it][A
 83%|████████████████████████▉     | 101917/122310 [3:13:15<22:50:44,  4.03s/it][A
 83%|█████████████████████████▊     | 101933/122310 [3:13:15<7:57:31,  1.41s/it][A
 83%|█████████████████████████▊     | 101947/122310 [3:13:16<4:31:02,  1.25it/s][A
 83%|█████████████████████████▊     | 101956/122310 [3:13:17<3:21:12,  1.69it/s][A
 83%|█████████████████████████▊     | 101965/122310 [3:13:18<2:31:50,  2.23it/s][A
 83%|█████████████████████████▊     | 101970/122310 [3:13:19<2:14:16,  2.52it/s][A
 83%|█████████████████████████▊     | 101984/122310 [3:13:20<1:25:28,  3.96it/s][A
 83%|█████████████████████████▊     | 101990/122310 [3:13:21<1:18:00,  4.34it/s][A
 83%|█████████████████████████▊     | 101996/122310 [3:13:22<1:11:39,  4.72it/s][A
 83%|███████████████████████████▌     | 102005/122310 [3:13:23<59:01,  5.73it/s][A
 83%|███████████████████████████▌     | 102010/122310 [3:13:23<59:18,  5.70

step: 27220, loss: 69.36431810220608, epoch: 1



 83%|███████████████████████████▌     | 102070/122310 [3:13:31<39:07,  8.62it/s][A
 83%|███████████████████████████▌     | 102077/122310 [3:13:32<40:40,  8.29it/s][A
 83%|███████████████████████████▌     | 102085/122310 [3:13:33<40:04,  8.41it/s][A
 83%|███████████████████████████▌     | 102091/122310 [3:13:34<42:47,  7.87it/s][A
 83%|███████████████████████████▌     | 102096/122310 [3:13:35<47:16,  7.13it/s][A
 83%|███████████████████████████▌     | 102106/122310 [3:13:36<41:35,  8.10it/s][A
 83%|███████████████████████████▌     | 102114/122310 [3:13:37<41:02,  8.20it/s][A
 83%|███████████████████████████▌     | 102121/122310 [3:13:38<42:05,  7.99it/s][A
 84%|███████████████████████████▌     | 102136/122310 [3:13:38<32:37, 10.31it/s][A
 84%|███████████████████████████▌     | 102144/122310 [3:13:39<34:29,  9.75it/s][A
 84%|███████████████████████████▌     | 102151/122310 [3:13:40<36:50,  9.12it/s][A
 84%|███████████████████████████▌     | 102161/122310 [3:13:41<35:01,  9.59

step: 27240, loss: 75.0264919466383, epoch: 1



 84%|███████████████████████████▌     | 102236/122310 [3:13:49<37:06,  9.02it/s][A
 84%|███████████████████████████▌     | 102247/122310 [3:13:50<33:15, 10.05it/s][A
 84%|███████████████████████████▌     | 102253/122310 [3:13:51<36:43,  9.10it/s][A
 84%|███████████████████████████▌     | 102263/122310 [3:13:52<34:06,  9.80it/s][A
 84%|███████████████████████████▌     | 102275/122310 [3:13:53<30:30, 10.94it/s][A
 84%|███████████████████████████▌     | 102285/122310 [3:13:54<30:45, 10.85it/s][A
 84%|███████████████████████████▌     | 102291/122310 [3:13:55<35:15,  9.46it/s][A
 84%|███████████████████████████▌     | 102306/122310 [3:13:56<29:08, 11.44it/s][A
 84%|███████████████████████████▌     | 102311/122310 [3:13:57<34:45,  9.59it/s][A
 84%|███████████████████████████▌     | 102327/122310 [3:13:58<27:47, 11.98it/s][A
 84%|███████████████████████████▌     | 102337/122310 [3:13:58<28:40, 11.61it/s][A
 84%|███████████████████████████▌     | 102349/122310 [3:13:59<28:19, 11.75

step: 27260, loss: 82.84195063928014, epoch: 1



 84%|███████████████████████████▋     | 102430/122310 [3:14:08<42:15,  7.84it/s][A
 84%|███████████████████████████▋     | 102437/122310 [3:14:09<42:06,  7.86it/s][A
 84%|███████████████████████████▋     | 102450/122310 [3:14:09<33:35,  9.85it/s][A
 84%|███████████████████████████▋     | 102458/122310 [3:14:10<34:23,  9.62it/s][A
 84%|███████████████████████████▋     | 102474/122310 [3:14:11<27:12, 12.15it/s][A
 84%|███████████████████████████▋     | 102477/122310 [3:14:12<34:27,  9.59it/s][A
 84%|███████████████████████████▋     | 102482/122310 [3:14:13<39:13,  8.42it/s][A
 84%|███████████████████████████▋     | 102485/122310 [3:14:14<47:34,  6.94it/s][A
 84%|███████████████████████████▋     | 102492/122310 [3:14:15<45:37,  7.24it/s][A
 84%|███████████████████████████▋     | 102498/122310 [3:14:16<46:27,  7.11it/s][A
 84%|███████████████████████████▋     | 102504/122310 [3:14:16<47:00,  7.02it/s][A
 84%|███████████████████████████▋     | 102511/122310 [3:14:17<45:40,  7.23

step: 27280, loss: 96.14556514944194, epoch: 1



 84%|███████████████████████████▋     | 102577/122310 [3:14:25<38:07,  8.63it/s][A
 84%|███████████████████████████▋     | 102582/122310 [3:14:26<42:24,  7.75it/s][A
 84%|███████████████████████████▋     | 102605/122310 [3:14:27<24:48, 13.23it/s][A
 84%|███████████████████████████▋     | 102610/122310 [3:14:28<29:48, 11.02it/s][A
 84%|███████████████████████████▋     | 102615/122310 [3:14:29<34:46,  9.44it/s][A
 84%|███████████████████████████▋     | 102637/122310 [3:14:30<23:24, 14.01it/s][A
 84%|███████████████████████████▋     | 102639/122310 [3:14:31<31:03, 10.55it/s][A
 84%|███████████████████████████▋     | 102655/122310 [3:14:31<25:28, 12.86it/s][A
 84%|███████████████████████████▋     | 102663/122310 [3:14:32<27:55, 11.73it/s][A
 84%|███████████████████████████▋     | 102674/122310 [3:14:33<27:20, 11.97it/s][A
 84%|███████████████████████████▋     | 102681/122310 [3:14:34<30:24, 10.76it/s][A
 84%|███████████████████████████▋     | 102694/122310 [3:14:35<27:18, 11.98

step: 27300, loss: 74.44319065167765, epoch: 1



 84%|███████████████████████████▋     | 102768/122310 [3:14:43<36:24,  8.95it/s][A
 84%|███████████████████████████▋     | 102772/122310 [3:14:44<42:31,  7.66it/s][A
 84%|███████████████████████████▋     | 102784/122310 [3:14:45<34:24,  9.46it/s][A
 84%|███████████████████████████▋     | 102791/122310 [3:14:46<36:16,  8.97it/s][A
 84%|███████████████████████████▋     | 102800/122310 [3:14:46<34:45,  9.36it/s][A
 84%|███████████████████████████▋     | 102814/122310 [3:14:47<28:40, 11.33it/s][A
 84%|███████████████████████████▋     | 102818/122310 [3:14:48<34:57,  9.29it/s][A
 84%|███████████████████████████▋     | 102821/122310 [3:14:49<43:02,  7.55it/s][A
 84%|███████████████████████████▋     | 102839/122310 [3:14:50<28:31, 11.37it/s][A
 84%|███████████████████████████▋     | 102846/122310 [3:14:51<31:20, 10.35it/s][A
 84%|███████████████████████████▊     | 102854/122310 [3:14:52<32:26, 10.00it/s][A
 84%|███████████████████████████▊     | 102861/122310 [3:14:53<34:38,  9.36

step: 27320, loss: 90.11294258065915, epoch: 1



 84%|███████████████████████████▊     | 102921/122310 [3:15:00<44:24,  7.28it/s][A
 84%|███████████████████████████▊     | 102932/122310 [3:15:01<37:38,  8.58it/s][A
 84%|███████████████████████████▊     | 102940/122310 [3:15:02<36:59,  8.73it/s][A
 84%|███████████████████████████▊     | 102952/122310 [3:15:03<31:59, 10.09it/s][A
 84%|███████████████████████████▊     | 102960/122310 [3:15:04<32:51,  9.82it/s][A
 84%|███████████████████████████▊     | 102968/122310 [3:15:05<33:31,  9.61it/s][A
 84%|███████████████████████████▊     | 102975/122310 [3:15:06<35:16,  9.14it/s][A
 84%|███████████████████████████▊     | 102980/122310 [3:15:07<39:45,  8.10it/s][A
 84%|███████████████████████████▊     | 102983/122310 [3:15:07<47:52,  6.73it/s][A
 84%|███████████████████████████▊     | 102991/122310 [3:15:08<43:19,  7.43it/s][A
 84%|███████████████████████████▊     | 102999/122310 [3:15:09<40:39,  7.91it/s][A
 84%|███████████████████████████▊     | 103006/122310 [3:15:10<40:28,  7.95

step: 27340, loss: 77.60747011107, epoch: 1



 84%|███████████████████████████▊     | 103077/122310 [3:15:18<35:35,  9.01it/s][A
 84%|███████████████████████████▊     | 103089/122310 [3:15:19<30:51, 10.38it/s][A
 84%|███████████████████████████▊     | 103095/122310 [3:15:20<34:28,  9.29it/s][A
 84%|███████████████████████████▊     | 103098/122310 [3:15:21<42:38,  7.51it/s][A
 84%|███████████████████████████▊     | 103108/122310 [3:15:22<36:46,  8.70it/s][A
 84%|███████████████████████████▊     | 103123/122310 [3:15:22<28:37, 11.17it/s][A
 84%|███████████████████████████▊     | 103128/122310 [3:15:23<33:28,  9.55it/s][A
 84%|███████████████████████████▊     | 103143/122310 [3:15:24<27:02, 11.81it/s][A
 84%|███████████████████████████▊     | 103152/122310 [3:15:25<28:08, 11.35it/s][A
 84%|███████████████████████████▊     | 103163/122310 [3:15:26<27:13, 11.72it/s][A
 84%|███████████████████████████▊     | 103171/122310 [3:15:27<29:10, 10.93it/s][A
 84%|███████████████████████████▊     | 103179/122310 [3:15:28<30:44, 10.37

step: 27360, loss: 79.19667765414681, epoch: 1



 84%|███████████████████████████▊     | 103251/122310 [3:15:36<32:36,  9.74it/s][A
 84%|███████████████████████████▊     | 103259/122310 [3:15:37<33:12,  9.56it/s][A
 84%|███████████████████████████▊     | 103267/122310 [3:15:37<33:42,  9.41it/s][A
 84%|███████████████████████████▊     | 103271/122310 [3:15:38<39:45,  7.98it/s][A
 84%|███████████████████████████▊     | 103280/122310 [3:15:39<36:39,  8.65it/s][A
 84%|███████████████████████████▊     | 103286/122310 [3:15:40<39:11,  8.09it/s][A
 84%|███████████████████████████▊     | 103293/122310 [3:15:41<39:20,  8.06it/s][A
 84%|███████████████████████████▊     | 103303/122310 [3:15:42<34:58,  9.06it/s][A
 84%|███████████████████████████▉     | 103316/122310 [3:15:43<29:36, 10.69it/s][A
 84%|███████████████████████████▉     | 103327/122310 [3:15:44<28:08, 11.24it/s][A
 84%|███████████████████████████▉     | 103334/122310 [3:15:44<30:58, 10.21it/s][A
 84%|███████████████████████████▉     | 103343/122310 [3:15:45<30:49, 10.25

step: 27380, loss: 63.81349407277728, epoch: 1



 85%|███████████████████████████▉     | 103422/122310 [3:15:53<28:20, 11.11it/s][A
 85%|███████████████████████████▉     | 103430/122310 [3:15:54<29:59, 10.49it/s][A
 85%|███████████████████████████▉     | 103439/122310 [3:15:56<39:37,  7.94it/s][A
 85%|███████████████████████████▉     | 103448/122310 [3:15:57<37:03,  8.48it/s][A
 85%|███████████████████████████▉     | 103451/122310 [3:15:58<43:41,  7.19it/s][A
 85%|███████████████████████████▉     | 103461/122310 [3:15:59<37:42,  8.33it/s][A
 85%|███████████████████████████▉     | 103474/122310 [3:15:59<31:08, 10.08it/s][A
 85%|███████████████████████████▉     | 103476/122310 [3:16:00<40:12,  7.81it/s][A
 85%|███████████████████████████▉     | 103482/122310 [3:16:01<41:25,  7.57it/s][A
 85%|███████████████████████████▉     | 103492/122310 [3:16:02<36:33,  8.58it/s][A
 85%|███████████████████████████▉     | 103501/122310 [3:16:03<34:35,  9.06it/s][A
 85%|███████████████████████████▉     | 103506/122310 [3:16:04<39:51,  7.86

step: 27400, loss: 81.06652688167902, epoch: 1



 85%|███████████████████████████▉     | 103572/122310 [3:16:11<31:02, 10.06it/s][A
 85%|███████████████████████████▉     | 103579/122310 [3:16:12<33:00,  9.46it/s][A
 85%|███████████████████████████▉     | 103592/122310 [3:16:13<28:12, 11.06it/s][A
 85%|███████████████████████████▉     | 103601/122310 [3:16:14<28:53, 10.79it/s][A
 85%|███████████████████████████▉     | 103606/122310 [3:16:15<33:34,  9.28it/s][A
 85%|███████████████████████████▉     | 103615/122310 [3:16:15<32:36,  9.55it/s][A
 85%|███████████████████████████▉     | 103618/122310 [3:16:16<40:15,  7.74it/s][A
 85%|███████████████████████████▉     | 103631/122310 [3:16:17<31:28,  9.89it/s][A
 85%|███████████████████████████▉     | 103639/122310 [3:16:18<32:35,  9.55it/s][A
 85%|███████████████████████████▉     | 103651/122310 [3:16:19<28:51, 10.77it/s][A
 85%|███████████████████████████▉     | 103660/122310 [3:16:20<30:15, 10.27it/s][A
 85%|███████████████████████████▉     | 103665/122310 [3:16:21<35:33,  8.74

step: 27420, loss: 83.06111169832431, epoch: 1



 85%|███████████████████████████▉     | 103747/122310 [3:16:30<30:34, 10.12it/s][A
 85%|███████████████████████████▉     | 103750/122310 [3:16:32<50:27,  6.13it/s][A
 85%|███████████████████████████▉     | 103763/122310 [3:16:33<38:40,  7.99it/s][A
 85%|███████████████████████████▉     | 103768/122310 [3:16:34<42:37,  7.25it/s][A
 85%|███████████████████████████▉     | 103772/122310 [3:16:35<48:30,  6.37it/s][A
 85%|████████████████████████████     | 103781/122310 [3:16:36<43:22,  7.12it/s][A
 85%|████████████████████████████     | 103794/122310 [3:16:36<34:25,  8.96it/s][A
 85%|████████████████████████████     | 103802/122310 [3:16:37<34:46,  8.87it/s][A
 85%|████████████████████████████     | 103814/122310 [3:16:38<30:54,  9.97it/s][A
 85%|████████████████████████████     | 103822/122310 [3:16:39<32:20,  9.53it/s][A
 85%|████████████████████████████     | 103829/122310 [3:16:40<33:54,  9.08it/s][A
 85%|████████████████████████████     | 103836/122310 [3:16:41<35:05,  8.77

step: 27440, loss: 79.89080233264853, epoch: 1



 85%|████████████████████████████     | 103884/122310 [3:16:48<49:30,  6.20it/s][A
 85%|████████████████████████████     | 103891/122310 [3:16:49<45:35,  6.73it/s][A
 85%|████████████████████████████     | 103898/122310 [3:16:50<43:08,  7.11it/s][A
 85%|████████████████████████████     | 103907/122310 [3:16:51<38:09,  8.04it/s][A
 85%|████████████████████████████     | 103914/122310 [3:16:52<38:15,  8.01it/s][A
 85%|████████████████████████████     | 103920/122310 [3:16:53<40:04,  7.65it/s][A
 85%|████████████████████████████     | 103937/122310 [3:16:53<27:26, 11.16it/s][A
 85%|████████████████████████████     | 103941/122310 [3:16:54<33:23,  9.17it/s][A
 85%|████████████████████████████     | 103948/122310 [3:16:55<34:51,  8.78it/s][A
 85%|████████████████████████████     | 103952/122310 [3:16:56<40:34,  7.54it/s][A
 85%|████████████████████████████     | 103958/122310 [3:16:57<41:53,  7.30it/s][A
 85%|████████████████████████████     | 103960/122310 [3:16:58<52:39,  5.81

step: 27460, loss: 85.409730438519, epoch: 1



 85%|████████████████████████████     | 104013/122310 [3:17:06<44:23,  6.87it/s][A
 85%|████████████████████████████     | 104021/122310 [3:17:07<40:19,  7.56it/s][A
 85%|████████████████████████████     | 104030/122310 [3:17:08<36:26,  8.36it/s][A
 85%|████████████████████████████     | 104036/122310 [3:17:08<38:31,  7.91it/s][A
 85%|████████████████████████████     | 104044/122310 [3:17:09<36:50,  8.26it/s][A
 85%|████████████████████████████     | 104064/122310 [3:17:10<24:14, 12.55it/s][A
 85%|████████████████████████████     | 104071/122310 [3:17:11<27:05, 11.22it/s][A
 85%|████████████████████████████     | 104080/122310 [3:17:12<27:51, 10.90it/s][A
 85%|████████████████████████████     | 104091/122310 [3:17:13<26:46, 11.34it/s][A
 85%|████████████████████████████     | 104097/122310 [3:17:14<30:22, 10.00it/s][A
 85%|████████████████████████████     | 104104/122310 [3:17:15<32:51,  9.24it/s][A
 85%|████████████████████████████     | 104108/122310 [3:17:16<40:30,  7.49

step: 27480, loss: 75.75427346465483, epoch: 1



 85%|████████████████████████████     | 104189/122310 [3:17:24<38:21,  7.87it/s][A
 85%|████████████████████████████     | 104192/122310 [3:17:25<45:49,  6.59it/s][A
 85%|████████████████████████████     | 104201/122310 [3:17:26<39:43,  7.60it/s][A
 85%|████████████████████████████     | 104216/122310 [3:17:27<30:00, 10.05it/s][A
 85%|████████████████████████████     | 104225/122310 [3:17:28<29:55, 10.07it/s][A
 85%|████████████████████████████     | 104233/122310 [3:17:29<31:01,  9.71it/s][A
 85%|████████████████████████████     | 104238/122310 [3:17:30<35:21,  8.52it/s][A
 85%|████████████████████████████▏    | 104251/122310 [3:17:31<29:21, 10.25it/s][A
 85%|████████████████████████████▏    | 104257/122310 [3:17:32<32:59,  9.12it/s][A
 85%|████████████████████████████▏    | 104264/122310 [3:17:33<35:32,  8.46it/s][A
 85%|████████████████████████████▏    | 104273/122310 [3:17:34<34:24,  8.74it/s][A
 85%|████████████████████████████▏    | 104282/122310 [3:17:35<33:41,  8.92

step: 27500, loss: 84.50762840692539, epoch: 1



 85%|████████████████████████████▏    | 104363/122310 [3:17:43<28:45, 10.40it/s][A
 85%|████████████████████████████▏    | 104378/122310 [3:17:44<27:01, 11.06it/s][A
 85%|████████████████████████████▏    | 104385/122310 [3:17:46<31:36,  9.45it/s][A
 85%|████████████████████████████▏    | 104395/122310 [3:17:47<33:54,  8.80it/s][A
 85%|████████████████████████████▏    | 104410/122310 [3:17:48<30:07,  9.90it/s][A
 85%|████████████████████████████▏    | 104420/122310 [3:17:49<32:18,  9.23it/s][A
 85%|████████████████████████████▏    | 104421/122310 [3:17:51<44:30,  6.70it/s][A
 85%|████████████████████████████▏    | 104427/122310 [3:17:52<47:44,  6.24it/s][A
 85%|████████████████████████████▏    | 104432/122310 [3:17:53<52:39,  5.66it/s][A
 85%|████████████████████████████▏    | 104440/122310 [3:17:54<48:30,  6.14it/s][A
 85%|████████████████████████████▏    | 104450/122310 [3:17:55<43:23,  6.86it/s][A
 85%|████████████████████████████▏    | 104451/122310 [3:17:56<57:22,  5.19

step: 27520, loss: 87.58791594341488, epoch: 1



 85%|████████████████████████████▏    | 104512/122310 [3:18:06<46:45,  6.34it/s][A
 85%|████████████████████████████▏    | 104518/122310 [3:18:07<45:48,  6.47it/s][A
 85%|████████████████████████████▏    | 104524/122310 [3:18:07<44:51,  6.61it/s][A
 85%|████████████████████████████▏    | 104530/122310 [3:18:08<44:27,  6.67it/s][A
 85%|████████████████████████████▏    | 104536/122310 [3:18:09<43:58,  6.74it/s][A
 85%|████████████████████████████▏    | 104542/122310 [3:18:10<45:39,  6.49it/s][A
 85%|████████████████████████████▏    | 104550/122310 [3:18:11<40:53,  7.24it/s][A
 85%|████████████████████████████▏    | 104565/122310 [3:18:12<29:11, 10.13it/s][A
 85%|████████████████████████████▏    | 104572/122310 [3:18:13<31:06,  9.50it/s][A
 86%|████████████████████████████▏    | 104577/122310 [3:18:14<35:28,  8.33it/s][A
 86%|████████████████████████████▏    | 104584/122310 [3:18:15<35:53,  8.23it/s][A
 86%|████████████████████████████▏    | 104593/122310 [3:18:15<33:26,  8.83

step: 27540, loss: 91.07499635695916, epoch: 1



 86%|████████████████████████████▏    | 104663/122310 [3:18:24<33:58,  8.66it/s][A
 86%|████████████████████████████▏    | 104676/122310 [3:18:24<27:57, 10.51it/s][A
 86%|████████████████████████████▏    | 104698/122310 [3:18:25<19:43, 14.88it/s][A
 86%|████████████████████████████▎    | 104708/122310 [3:18:26<21:10, 13.85it/s][A
 86%|████████████████████████████▎    | 104717/122310 [3:18:27<22:57, 12.77it/s][A
 86%|████████████████████████████▎    | 104722/122310 [3:18:28<27:31, 10.65it/s][A
 86%|████████████████████████████▎    | 104729/122310 [3:18:29<29:39,  9.88it/s][A
 86%|████████████████████████████▎    | 104734/122310 [3:18:30<34:13,  8.56it/s][A
 86%|████████████████████████████▎    | 104744/122310 [3:18:31<31:22,  9.33it/s][A
 86%|████████████████████████████▎    | 104750/122310 [3:18:31<34:00,  8.60it/s][A
 86%|████████████████████████████▎    | 104759/122310 [3:18:32<32:02,  9.13it/s][A
 86%|████████████████████████████▎    | 104767/122310 [3:18:33<31:56,  9.16

step: 27560, loss: 75.00340223142892, epoch: 1



 86%|████████████████████████████▎    | 104849/122310 [3:18:41<25:41, 11.32it/s][A
 86%|████████████████████████████▎    | 104860/122310 [3:18:42<24:48, 11.72it/s][A
 86%|████████████████████████████▎    | 104868/122310 [3:18:43<26:28, 10.98it/s][A
 86%|████████████████████████████▎    | 104876/122310 [3:18:44<27:45, 10.47it/s][A
 86%|████████████████████████████▎    | 104884/122310 [3:18:45<28:46, 10.09it/s][A
 86%|████████████████████████████▎    | 104889/122310 [3:18:46<33:04,  8.78it/s][A
 86%|████████████████████████████▎    | 104899/122310 [3:18:46<30:14,  9.60it/s][A
 86%|████████████████████████████▎    | 104910/122310 [3:18:47<27:31, 10.54it/s][A
 86%|████████████████████████████▎    | 104922/122310 [3:18:48<25:10, 11.51it/s][A
 86%|████████████████████████████▎    | 104931/122310 [3:18:49<25:59, 11.15it/s][A
 86%|████████████████████████████▎    | 104941/122310 [3:18:50<25:47, 11.22it/s][A
 86%|████████████████████████████▎    | 104945/122310 [3:18:51<31:15,  9.26

step: 27580, loss: 83.06072542623762, epoch: 1



 86%|████████████████████████████▎    | 105013/122310 [3:18:59<34:33,  8.34it/s][A
 86%|████████████████████████████▎    | 105027/122310 [3:18:59<27:00, 10.66it/s][A
 86%|████████████████████████████▎    | 105030/122310 [3:19:00<33:59,  8.47it/s][A
 86%|████████████████████████████▎    | 105038/122310 [3:19:01<32:58,  8.73it/s][A
 86%|████████████████████████████▎    | 105046/122310 [3:19:02<32:23,  8.88it/s][A
 86%|████████████████████████████▎    | 105069/122310 [3:19:03<20:18, 14.15it/s][A
 86%|████████████████████████████▎    | 105078/122310 [3:19:04<22:03, 13.02it/s][A
 86%|████████████████████████████▎    | 105087/122310 [3:19:05<23:24, 12.26it/s][A
 86%|████████████████████████████▎    | 105092/122310 [3:19:06<27:51, 10.30it/s][A
 86%|████████████████████████████▎    | 105100/122310 [3:19:06<28:49,  9.95it/s][A
 86%|████████████████████████████▎    | 105110/122310 [3:19:07<27:30, 10.42it/s][A
 86%|████████████████████████████▎    | 105116/122310 [3:19:08<30:39,  9.34

step: 27600, loss: 84.78950457072085, epoch: 1
sim1 and sim2 are 0.46911517845043565, 0.22919653664351605
cosine of pred and queen: 0.21304585333671688
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: pacific
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: middle
Actual: kenya:africa::netherlands:europe, pred: middle
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: m

Actual: cairo:egypt::manila:philippines, pred: meditated
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: sister
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: pradesh
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: pradesh
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 86%|█████████████████████████▊    | 105181/122310 [3:20:48<18:54:05,  3.97s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 86%|█████████████████████████▊    | 105184/122310 [3:20:49<16:16:20,  3.42s/it][A
 86%|█████████████████████████▊    | 105191/122310 [3:20:50<11:02:01,  2.32s/it][A
 86%|██████████████████████████▋    | 105203/122310 [3:20:51<6:13:27,  1.31s/it][A
 86%|██████████████████████████▋    | 105219/122310 [3:20:52<3:28:09,  1.37it/s][A
 86%|██████████████████████████▋    | 105228/122310 [3:20:53<2:39:19,  1.79it/s][A
 86%|██████████████████████████▋    | 105236/122310 [3:20:54<2:06:34,  2.25it/s][A
 86%|██████████████████████████▋    | 105248/122310 [3:20:55<1:29:06,  3.19it/s][A
 86%|██████████████████████████▋    | 105254/122310 [3:20:55<1:19:30,  3.58it/s][A
 86%|██████████████████████████▋    | 105261/122310 [3:20:56<1:08:26,  4.15it/s][A
 86%|████████████████████████████▍    | 105269/122310 [3:20:57<57:38,  4.93it/s][A
 86%|████████████████████████████▍    | 105278/122310 [3:20:58<48:05,  5.90it/s][A
 86%|████████████████████████████▍    | 105289/122310 [3:20:59<39:03,  7.26

step: 27620, loss: 91.80941486213734, epoch: 1



 86%|████████████████████████████▍    | 105383/122310 [3:21:07<19:18, 14.62it/s][A
 86%|████████████████████████████▍    | 105394/122310 [3:21:08<20:09, 13.99it/s][A
 86%|████████████████████████████▍    | 105403/122310 [3:21:09<21:53, 12.87it/s][A
 86%|████████████████████████████▍    | 105406/122310 [3:21:10<28:10, 10.00it/s][A
 86%|████████████████████████████▍    | 105411/122310 [3:21:10<32:17,  8.72it/s][A
 86%|████████████████████████████▍    | 105417/122310 [3:21:11<34:36,  8.13it/s][A
 86%|████████████████████████████▍    | 105420/122310 [3:21:12<41:50,  6.73it/s][A
 86%|████████████████████████████▍    | 105432/122310 [3:21:13<32:00,  8.79it/s][A
 86%|████████████████████████████▍    | 105437/122310 [3:21:14<35:56,  7.83it/s][A
 86%|████████████████████████████▍    | 105447/122310 [3:21:15<31:33,  8.91it/s][A
 86%|████████████████████████████▍    | 105453/122310 [3:21:16<33:53,  8.29it/s][A
 86%|████████████████████████████▍    | 105462/122310 [3:21:17<32:00,  8.77

step: 27640, loss: 91.67270836978395, epoch: 1



 86%|████████████████████████████▍    | 105556/122310 [3:21:25<24:31, 11.38it/s][A
 86%|████████████████████████████▍    | 105563/122310 [3:21:26<26:58, 10.35it/s][A
 86%|████████████████████████████▍    | 105575/122310 [3:21:26<24:34, 11.35it/s][A
 86%|████████████████████████████▍    | 105579/122310 [3:21:27<29:58,  9.30it/s][A
 86%|████████████████████████████▍    | 105586/122310 [3:21:28<31:19,  8.90it/s][A
 86%|████████████████████████████▍    | 105598/122310 [3:21:29<27:01, 10.31it/s][A
 86%|████████████████████████████▍    | 105608/122310 [3:21:30<26:10, 10.63it/s][A
 86%|████████████████████████████▍    | 105614/122310 [3:21:31<29:54,  9.30it/s][A
 86%|████████████████████████████▍    | 105619/122310 [3:21:32<34:59,  7.95it/s][A
 86%|████████████████████████████▍    | 105625/122310 [3:21:33<37:23,  7.44it/s][A
 86%|████████████████████████████▌    | 105632/122310 [3:21:34<36:58,  7.52it/s][A
 86%|████████████████████████████▌    | 105639/122310 [3:21:35<36:26,  7.63

step: 27660, loss: 80.37199282628906, epoch: 1



 86%|████████████████████████████▌    | 105706/122310 [3:21:43<38:18,  7.23it/s][A
 86%|████████████████████████████▌    | 105715/122310 [3:21:43<34:01,  8.13it/s][A
 86%|████████████████████████████▌    | 105720/122310 [3:21:44<37:27,  7.38it/s][A
 86%|████████████████████████████▌    | 105725/122310 [3:21:45<40:03,  6.90it/s][A
 86%|████████████████████████████▌    | 105735/122310 [3:21:46<33:36,  8.22it/s][A
 86%|████████████████████████████▌    | 105742/122310 [3:21:47<33:58,  8.13it/s][A
 86%|████████████████████████████▌    | 105746/122310 [3:21:48<39:03,  7.07it/s][A
 86%|████████████████████████████▌    | 105755/122310 [3:21:49<34:26,  8.01it/s][A
 86%|████████████████████████████▌    | 105769/122310 [3:21:50<26:33, 10.38it/s][A
 86%|████████████████████████████▌    | 105786/122310 [3:21:51<21:21, 12.89it/s][A
 86%|████████████████████████████▌    | 105789/122310 [3:21:51<27:27, 10.03it/s][A
 86%|████████████████████████████▌    | 105790/122310 [3:21:52<37:12,  7.40

step: 27680, loss: 75.12843919967945, epoch: 1



 87%|████████████████████████████▌    | 105880/122310 [3:22:00<20:23, 13.43it/s][A
 87%|████████████████████████████▌    | 105895/122310 [3:22:01<19:02, 14.37it/s][A
 87%|████████████████████████████▌    | 105909/122310 [3:22:02<18:34, 14.72it/s][A
 87%|████████████████████████████▌    | 105917/122310 [3:22:03<20:55, 13.05it/s][A
 87%|████████████████████████████▌    | 105934/122310 [3:22:04<18:20, 14.89it/s][A
 87%|████████████████████████████▌    | 105940/122310 [3:22:05<22:15, 12.26it/s][A
 87%|████████████████████████████▌    | 105947/122310 [3:22:06<24:46, 11.01it/s][A
 87%|████████████████████████████▌    | 105955/122310 [3:22:07<26:19, 10.35it/s][A
 87%|████████████████████████████▌    | 105971/122310 [3:22:07<21:29, 12.67it/s][A
 87%|████████████████████████████▌    | 105981/122310 [3:22:08<22:07, 12.30it/s][A
 87%|████████████████████████████▌    | 105986/122310 [3:22:09<26:28, 10.28it/s][A
 87%|████████████████████████████▌    | 105992/122310 [3:22:10<29:19,  9.27

step: 27700, loss: 116.17659517580817, epoch: 1



 87%|████████████████████████████▌    | 106064/122310 [3:22:18<31:59,  8.46it/s][A
 87%|████████████████████████████▌    | 106066/122310 [3:22:19<39:46,  6.81it/s][A
 87%|████████████████████████████▌    | 106075/122310 [3:22:20<34:49,  7.77it/s][A
 87%|████████████████████████████▌    | 106082/122310 [3:22:21<34:34,  7.82it/s][A
 87%|████████████████████████████▌    | 106093/122310 [3:22:22<29:32,  9.15it/s][A
 87%|████████████████████████████▋    | 106101/122310 [3:22:23<29:31,  9.15it/s][A
 87%|████████████████████████████▋    | 106112/122310 [3:22:23<26:38, 10.13it/s][A
 87%|████████████████████████████▋    | 106122/122310 [3:22:24<25:41, 10.50it/s][A
 87%|████████████████████████████▋    | 106137/122310 [3:22:25<21:39, 12.45it/s][A
 87%|████████████████████████████▋    | 106142/122310 [3:22:26<25:54, 10.40it/s][A
 87%|████████████████████████████▋    | 106144/122310 [3:22:27<33:40,  8.00it/s][A
 87%|████████████████████████████▋    | 106146/122310 [3:22:28<42:49,  6.29

step: 27720, loss: 77.90473629656415, epoch: 1



 87%|████████████████████████████▋    | 106226/122310 [3:22:36<23:58, 11.18it/s][A
 87%|████████████████████████████▋    | 106240/122310 [3:22:37<21:20, 12.55it/s][A
 87%|████████████████████████████▋    | 106242/122310 [3:22:38<28:15,  9.48it/s][A
 87%|████████████████████████████▋    | 106253/122310 [3:22:38<25:51, 10.35it/s][A
 87%|████████████████████████████▋    | 106263/122310 [3:22:39<25:02, 10.68it/s][A
 87%|████████████████████████████▋    | 106280/122310 [3:22:40<20:08, 13.27it/s][A
 87%|████████████████████████████▋    | 106291/122310 [3:22:41<20:28, 13.03it/s][A
 87%|████████████████████████████▋    | 106299/122310 [3:22:43<29:46,  8.96it/s][A
 87%|████████████████████████████▋    | 106308/122310 [3:22:44<28:38,  9.31it/s][A
 87%|████████████████████████████▋    | 106321/122310 [3:22:45<24:52, 10.71it/s][A
 87%|████████████████████████████▋    | 106327/122310 [3:22:46<27:44,  9.60it/s][A
 87%|████████████████████████████▋    | 106335/122310 [3:22:46<28:12,  9.44

step: 27740, loss: 76.50728810179895, epoch: 1



 87%|████████████████████████████▋    | 106397/122310 [3:22:53<28:33,  9.29it/s][A
 87%|████████████████████████████▋    | 106406/122310 [3:22:54<27:55,  9.49it/s][A
 87%|████████████████████████████▋    | 106412/122310 [3:22:55<30:23,  8.72it/s][A
 87%|████████████████████████████▋    | 106422/122310 [3:22:56<27:47,  9.53it/s][A
 87%|████████████████████████████▋    | 106430/122310 [3:22:59<45:20,  5.84it/s][A
 87%|████████████████████████████▋    | 106434/122310 [3:23:00<47:32,  5.57it/s][A
 87%|████████████████████████████▋    | 106446/122310 [3:23:01<35:37,  7.42it/s][A
 87%|████████████████████████████▋    | 106455/122310 [3:23:01<32:31,  8.12it/s][A
 87%|████████████████████████████▋    | 106462/122310 [3:23:02<32:42,  8.08it/s][A
 87%|████████████████████████████▋    | 106474/122310 [3:23:03<27:33,  9.58it/s][A
 87%|████████████████████████████▋    | 106488/122310 [3:23:04<23:17, 11.32it/s][A
 87%|████████████████████████████▋    | 106496/122310 [3:23:05<24:40, 10.68

step: 27760, loss: 73.52135572800086, epoch: 1



 87%|███████████████████████████    | 106543/122310 [3:23:14<1:04:58,  4.04it/s][A
 87%|████████████████████████████▋    | 106552/122310 [3:23:15<50:41,  5.18it/s][A
 87%|████████████████████████████▊    | 106560/122310 [3:23:16<43:47,  5.99it/s][A
 87%|████████████████████████████▊    | 106567/122310 [3:23:16<40:43,  6.44it/s][A
 87%|████████████████████████████▊    | 106577/122310 [3:23:17<34:14,  7.66it/s][A
 87%|████████████████████████████▊    | 106591/122310 [3:23:18<26:43,  9.80it/s][A
 87%|████████████████████████████▊    | 106603/122310 [3:23:19<24:13, 10.80it/s][A
 87%|████████████████████████████▊    | 106616/122310 [3:23:20<21:56, 11.92it/s][A
 87%|████████████████████████████▊    | 106626/122310 [3:23:21<22:13, 11.76it/s][A
 87%|████████████████████████████▊    | 106634/122310 [3:23:22<23:46, 10.99it/s][A
 87%|████████████████████████████▊    | 106640/122310 [3:23:23<26:51,  9.72it/s][A
 87%|████████████████████████████▊    | 106645/122310 [3:23:24<30:36,  8.53

step: 27780, loss: 79.00403228484552, epoch: 1



 87%|████████████████████████████▊    | 106711/122310 [3:23:29<19:17, 13.47it/s][A
 87%|████████████████████████████▊    | 106716/122310 [3:23:30<23:16, 11.17it/s][A
 87%|████████████████████████████▊    | 106726/122310 [3:23:31<23:07, 11.23it/s][A
 87%|████████████████████████████▊    | 106728/122310 [3:23:31<30:26,  8.53it/s][A
 87%|████████████████████████████▊    | 106738/122310 [3:23:32<27:39,  9.38it/s][A
 87%|████████████████████████████▊    | 106748/122310 [3:23:33<26:00,  9.97it/s][A
 87%|████████████████████████████▊    | 106759/122310 [3:23:34<24:17, 10.67it/s][A
 87%|████████████████████████████▊    | 106768/122310 [3:23:35<24:31, 10.56it/s][A
 87%|████████████████████████████▊    | 106784/122310 [3:23:36<20:11, 12.82it/s][A
 87%|████████████████████████████▊    | 106791/122310 [3:23:37<22:43, 11.38it/s][A
 87%|████████████████████████████▊    | 106796/122310 [3:23:38<26:47,  9.65it/s][A
 87%|████████████████████████████▊    | 106805/122310 [3:23:39<26:16,  9.83

step: 27800, loss: 80.14869128860275, epoch: 1



 87%|████████████████████████████▊    | 106895/122310 [3:23:47<23:10, 11.09it/s][A
 87%|████████████████████████████▊    | 106908/122310 [3:23:47<21:03, 12.19it/s][A
 87%|████████████████████████████▊    | 106927/122310 [3:23:48<17:06, 14.99it/s][A
 87%|████████████████████████████▊    | 106934/122310 [3:23:49<20:02, 12.79it/s][A
 87%|████████████████████████████▊    | 106936/122310 [3:23:50<26:28,  9.68it/s][A
 87%|████████████████████████████▊    | 106939/122310 [3:23:51<32:47,  7.81it/s][A
 87%|████████████████████████████▊    | 106945/122310 [3:23:52<34:04,  7.52it/s][A
 87%|████████████████████████████▊    | 106950/122310 [3:23:53<36:47,  6.96it/s][A
 87%|████████████████████████████▊    | 106959/122310 [3:23:54<32:18,  7.92it/s][A
 87%|████████████████████████████▊    | 106963/122310 [3:23:54<37:06,  6.89it/s][A
 87%|████████████████████████████▊    | 106972/122310 [3:23:55<32:24,  7.89it/s][A
 87%|████████████████████████████▊    | 106979/122310 [3:23:56<32:20,  7.90

step: 27820, loss: 86.88717140270616, epoch: 1



 88%|████████████████████████████▉    | 107064/122310 [3:24:04<26:08,  9.72it/s][A
 88%|████████████████████████████▉    | 107072/122310 [3:24:05<26:55,  9.43it/s][A
 88%|████████████████████████████▉    | 107084/122310 [3:24:06<23:50, 10.64it/s][A
 88%|████████████████████████████▉    | 107094/122310 [3:24:07<23:30, 10.78it/s][A
 88%|████████████████████████████▉    | 107107/122310 [3:24:08<21:47, 11.63it/s][A
 88%|████████████████████████████▉    | 107115/122310 [3:24:09<23:46, 10.65it/s][A
 88%|████████████████████████████▉    | 107120/122310 [3:24:10<28:05,  9.01it/s][A
 88%|████████████████████████████▉    | 107124/122310 [3:24:11<33:19,  7.59it/s][A
 88%|████████████████████████████▉    | 107131/122310 [3:24:12<33:39,  7.52it/s][A
 88%|████████████████████████████▉    | 107144/122310 [3:24:13<27:13,  9.28it/s][A
 88%|████████████████████████████▉    | 107155/122310 [3:24:14<25:32,  9.89it/s][A
 88%|████████████████████████████▉    | 107163/122310 [3:24:17<43:07,  5.85

step: 27840, loss: 91.58633926847669, epoch: 1



 88%|████████████████████████████▉    | 107217/122310 [3:24:23<27:51,  9.03it/s][A
 88%|████████████████████████████▉    | 107226/122310 [3:24:24<26:51,  9.36it/s][A
 88%|████████████████████████████▉    | 107233/122310 [3:24:25<28:02,  8.96it/s][A
 88%|████████████████████████████▉    | 107244/122310 [3:24:26<25:11,  9.97it/s][A
 88%|████████████████████████████▉    | 107260/122310 [3:24:27<20:17, 12.36it/s][A
 88%|████████████████████████████▉    | 107271/122310 [3:24:28<20:22, 12.30it/s][A
 88%|████████████████████████████▉    | 107286/122310 [3:24:29<18:16, 13.70it/s][A
 88%|████████████████████████████▉    | 107290/122310 [3:24:29<22:53, 10.94it/s][A
 88%|████████████████████████████▉    | 107294/122310 [3:24:30<27:47,  9.00it/s][A
 88%|████████████████████████████▉    | 107298/122310 [3:24:31<32:30,  7.70it/s][A
 88%|████████████████████████████▉    | 107307/122310 [3:24:32<29:36,  8.44it/s][A
 88%|████████████████████████████▉    | 107312/122310 [3:24:33<32:48,  7.62

step: 27860, loss: 73.29890315535368, epoch: 1



 88%|████████████████████████████▉    | 107383/122310 [3:24:41<27:52,  8.93it/s][A
 88%|████████████████████████████▉    | 107385/122310 [3:24:42<35:36,  6.99it/s][A
 88%|████████████████████████████▉    | 107389/122310 [3:24:43<39:52,  6.24it/s][A
 88%|████████████████████████████▉    | 107402/122310 [3:24:44<28:28,  8.73it/s][A
 88%|████████████████████████████▉    | 107418/122310 [3:24:45<21:34, 11.50it/s][A
 88%|████████████████████████████▉    | 107431/122310 [3:24:46<19:58, 12.42it/s][A
 88%|████████████████████████████▉    | 107437/122310 [3:24:46<23:05, 10.73it/s][A
 88%|████████████████████████████▉    | 107452/122310 [3:24:47<19:46, 12.52it/s][A
 88%|████████████████████████████▉    | 107458/122310 [3:24:48<22:55, 10.80it/s][A
 88%|████████████████████████████▉    | 107469/122310 [3:24:49<21:55, 11.28it/s][A
 88%|████████████████████████████▉    | 107471/122310 [3:24:50<29:00,  8.53it/s][A
 88%|████████████████████████████▉    | 107472/122310 [3:24:51<38:50,  6.37

step: 27880, loss: 82.24901599891982, epoch: 1



 88%|█████████████████████████████    | 107526/122310 [3:24:59<36:55,  6.67it/s][A
 88%|█████████████████████████████    | 107535/122310 [3:25:00<31:50,  7.73it/s][A
 88%|█████████████████████████████    | 107545/122310 [3:25:01<27:56,  8.81it/s][A
 88%|█████████████████████████████    | 107563/122310 [3:25:02<20:01, 12.27it/s][A
 88%|█████████████████████████████    | 107573/122310 [3:25:02<20:29, 11.98it/s][A
 88%|█████████████████████████████    | 107583/122310 [3:25:03<20:52, 11.76it/s][A
 88%|█████████████████████████████    | 107588/122310 [3:25:04<24:48,  9.89it/s][A
 88%|█████████████████████████████    | 107590/122310 [3:25:05<32:21,  7.58it/s][A
 88%|█████████████████████████████    | 107593/122310 [3:25:06<38:35,  6.36it/s][A
 88%|█████████████████████████████    | 107605/122310 [3:25:07<28:44,  8.53it/s][A
 88%|█████████████████████████████    | 107615/122310 [3:25:08<26:18,  9.31it/s][A
 88%|█████████████████████████████    | 107623/122310 [3:25:09<26:35,  9.21

step: 27900, loss: 82.7972135625733, epoch: 1



 88%|█████████████████████████████    | 107705/122310 [3:25:17<24:27,  9.95it/s][A
 88%|█████████████████████████████    | 107717/122310 [3:25:18<22:03, 11.03it/s][A
 88%|█████████████████████████████    | 107726/122310 [3:25:18<22:39, 10.73it/s][A
 88%|█████████████████████████████    | 107734/122310 [3:25:19<23:52, 10.18it/s][A
 88%|█████████████████████████████    | 107740/122310 [3:25:20<26:26,  9.18it/s][A
 88%|█████████████████████████████    | 107746/122310 [3:25:21<28:40,  8.47it/s][A
 88%|█████████████████████████████    | 107762/122310 [3:25:22<21:26, 11.31it/s][A
 88%|█████████████████████████████    | 107767/122310 [3:25:23<25:15,  9.60it/s][A
 88%|█████████████████████████████    | 107771/122310 [3:25:24<29:59,  8.08it/s][A
 88%|█████████████████████████████    | 107781/122310 [3:25:25<26:53,  9.01it/s][A
 88%|█████████████████████████████    | 107784/122310 [3:25:26<33:02,  7.33it/s][A
 88%|█████████████████████████████    | 107787/122310 [3:25:26<39:19,  6.16

step: 27920, loss: 94.0226404388561, epoch: 1



 88%|█████████████████████████████    | 107857/122310 [3:25:34<25:41,  9.38it/s][A
 88%|█████████████████████████████    | 107871/122310 [3:25:35<21:14, 11.33it/s][A
 88%|█████████████████████████████    | 107880/122310 [3:25:36<21:53, 10.99it/s][A
 88%|█████████████████████████████    | 107886/122310 [3:25:37<24:37,  9.76it/s][A
 88%|█████████████████████████████    | 107888/122310 [3:25:38<32:07,  7.48it/s][A
 88%|█████████████████████████████    | 107901/122310 [3:25:39<25:04,  9.57it/s][A
 88%|█████████████████████████████    | 107908/122310 [3:25:40<26:23,  9.09it/s][A
 88%|█████████████████████████████    | 107915/122310 [3:25:41<27:30,  8.72it/s][A
 88%|█████████████████████████████    | 107919/122310 [3:25:41<32:07,  7.47it/s][A
 88%|█████████████████████████████    | 107924/122310 [3:25:42<34:34,  6.94it/s][A
 88%|█████████████████████████████    | 107928/122310 [3:25:43<38:37,  6.21it/s][A
 88%|█████████████████████████████    | 107935/122310 [3:25:44<35:44,  6.70

step: 27940, loss: 96.26239814427358, epoch: 1



 88%|█████████████████████████████▏   | 108019/122310 [3:25:52<22:48, 10.45it/s][A
 88%|█████████████████████████████▏   | 108028/122310 [3:25:53<22:53, 10.40it/s][A
 88%|█████████████████████████████▏   | 108033/122310 [3:25:54<26:29,  8.98it/s][A
 88%|█████████████████████████████▏   | 108038/122310 [3:25:55<29:54,  7.95it/s][A
 88%|█████████████████████████████▏   | 108045/122310 [3:25:56<29:48,  7.98it/s][A
 88%|█████████████████████████████▏   | 108051/122310 [3:25:57<31:11,  7.62it/s][A
 88%|█████████████████████████████▏   | 108053/122310 [3:25:57<39:26,  6.03it/s][A
 88%|█████████████████████████████▏   | 108063/122310 [3:25:58<31:10,  7.62it/s][A
 88%|█████████████████████████████▏   | 108068/122310 [3:25:59<33:47,  7.03it/s][A
 88%|█████████████████████████████▏   | 108080/122310 [3:26:00<26:25,  8.98it/s][A
 88%|█████████████████████████████▏   | 108087/122310 [3:26:01<27:21,  8.66it/s][A
 88%|█████████████████████████████▏   | 108096/122310 [3:26:02<25:56,  9.13

step: 27960, loss: 96.93299829819327, epoch: 1



 88%|█████████████████████████████▏   | 108180/122310 [3:26:10<26:11,  8.99it/s][A
 88%|█████████████████████████████▏   | 108192/122310 [3:26:11<22:46, 10.33it/s][A
 88%|█████████████████████████████▏   | 108195/122310 [3:26:12<28:23,  8.29it/s][A
 88%|█████████████████████████████▏   | 108204/122310 [3:26:12<26:29,  8.87it/s][A
 88%|█████████████████████████████▏   | 108206/122310 [3:26:13<34:04,  6.90it/s][A
 88%|█████████████████████████████▏   | 108218/122310 [3:26:14<26:28,  8.87it/s][A
 88%|█████████████████████████████▏   | 108221/122310 [3:26:15<32:22,  7.25it/s][A
 88%|█████████████████████████████▏   | 108236/122310 [3:26:16<23:03, 10.17it/s][A
 89%|█████████████████████████████▏   | 108250/122310 [3:26:17<19:42, 11.89it/s][A
 89%|█████████████████████████████▏   | 108258/122310 [3:26:18<21:12, 11.04it/s][A
 89%|█████████████████████████████▏   | 108263/122310 [3:26:19<24:51,  9.42it/s][A
 89%|█████████████████████████████▏   | 108271/122310 [3:26:20<25:13,  9.28

step: 27980, loss: 78.66599801168104, epoch: 1



 89%|█████████████████████████████▏   | 108335/122310 [3:26:28<32:51,  7.09it/s][A
 89%|█████████████████████████████▏   | 108348/122310 [3:26:29<26:24,  8.81it/s][A
 89%|█████████████████████████████▏   | 108351/122310 [3:26:30<33:04,  7.03it/s][A
 89%|█████████████████████████████▏   | 108354/122310 [3:26:31<39:02,  5.96it/s][A
 89%|█████████████████████████████▏   | 108359/122310 [3:26:32<40:25,  5.75it/s][A
 89%|█████████████████████████████▏   | 108367/122310 [3:26:33<35:10,  6.61it/s][A
 89%|█████████████████████████████▏   | 108374/122310 [3:26:34<34:51,  6.66it/s][A
 89%|█████████████████████████████▏   | 108378/122310 [3:26:35<39:44,  5.84it/s][A
 89%|█████████████████████████████▏   | 108385/122310 [3:26:36<37:35,  6.17it/s][A
 89%|█████████████████████████████▏   | 108397/122310 [3:26:37<28:58,  8.00it/s][A
 89%|█████████████████████████████▏   | 108406/122310 [3:26:38<27:32,  8.41it/s][A
 89%|█████████████████████████████▎   | 108421/122310 [3:26:39<22:09, 10.45

step: 28000, loss: 78.20684357798295, epoch: 1
sim1 and sim2 are 0.4999054259927659, 0.20905482144940804
cosine of pred and queen: 0.20026048796606227
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: italy
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma

Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: sister
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: state
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: state
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar
Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11834319526627218
saving weight


 89%|██████████████████████████▌   | 108472/122310 [3:28:28<15:17:33,  3.98s/it][A
 89%|██████████████████████████▌   | 108479/122310 [3:28:29<11:12:38,  2.92s/it][A
 89%|███████████████████████████▍   | 108484/122310 [3:28:30<8:54:29,  2.32s/it][A
 89%|███████████████████████████▍   | 108500/122310 [3:28:31<4:33:09,  1.19s/it][A
 89%|███████████████████████████▌   | 108508/122310 [3:28:32<3:28:19,  1.10it/s][A
 89%|███████████████████████████▌   | 108518/122310 [3:28:33<2:28:58,  1.54it/s][A
 89%|███████████████████████████▌   | 108527/122310 [3:28:34<1:53:17,  2.03it/s][A
 89%|███████████████████████████▌   | 108537/122310 [3:28:35<1:24:38,  2.71it/s][A
 89%|███████████████████████████▌   | 108547/122310 [3:28:36<1:05:36,  3.50it/s][A
 89%|█████████████████████████████▎   | 108562/122310 [3:28:37<45:40,  5.02it/s][A
 89%|█████████████████████████████▎   | 108568/122310 [3:28:38<44:16,  5.17it/s][A
 89%|█████████████████████████████▎   | 108581/122310 [3:28:39<34:17,  6.67

step: 28020, loss: 62.458524010713376, epoch: 1



 89%|█████████████████████████████▎   | 108662/122310 [3:28:50<32:48,  6.93it/s][A
 89%|█████████████████████████████▎   | 108667/122310 [3:28:51<35:07,  6.47it/s][A
 89%|█████████████████████████████▎   | 108673/122310 [3:28:52<35:02,  6.49it/s][A
 89%|█████████████████████████████▎   | 108682/122310 [3:28:53<30:28,  7.45it/s][A
 89%|█████████████████████████████▎   | 108685/122310 [3:28:54<35:50,  6.34it/s][A
 89%|█████████████████████████████▎   | 108690/122310 [3:28:54<37:01,  6.13it/s][A
 89%|█████████████████████████████▎   | 108700/122310 [3:28:55<29:57,  7.57it/s][A
 89%|█████████████████████████████▎   | 108712/122310 [3:28:56<24:17,  9.33it/s][A
 89%|█████████████████████████████▎   | 108725/122310 [3:28:57<20:51, 10.85it/s][A
 89%|█████████████████████████████▎   | 108733/122310 [3:28:58<21:53, 10.34it/s][A
 89%|█████████████████████████████▎   | 108747/122310 [3:28:59<18:56, 11.93it/s][A
 89%|█████████████████████████████▎   | 108757/122310 [3:29:00<19:14, 11.74

step: 28040, loss: 83.05146554199935, epoch: 1



 89%|█████████████████████████████▎   | 108831/122310 [3:29:08<20:29, 10.96it/s][A
 89%|█████████████████████████████▎   | 108844/122310 [3:29:09<18:34, 12.08it/s][A
 89%|█████████████████████████████▎   | 108852/122310 [3:29:10<20:11, 11.11it/s][A
 89%|█████████████████████████████▎   | 108861/122310 [3:29:10<20:34, 10.89it/s][A
 89%|█████████████████████████████▎   | 108869/122310 [3:29:11<22:09, 10.11it/s][A
 89%|█████████████████████████████▍   | 108883/122310 [3:29:12<19:14, 11.63it/s][A
 89%|█████████████████████████████▍   | 108890/122310 [3:29:13<22:00, 10.16it/s][A
 89%|█████████████████████████████▍   | 108896/122310 [3:29:14<25:08,  8.89it/s][A
 89%|█████████████████████████████▍   | 108902/122310 [3:29:15<28:00,  7.98it/s][A
 89%|█████████████████████████████▍   | 108920/122310 [3:29:16<19:41, 11.34it/s][A
 89%|█████████████████████████████▍   | 108927/122310 [3:29:18<28:01,  7.96it/s][A
 89%|█████████████████████████████▍   | 108934/122310 [3:29:19<28:16,  7.89

step: 28060, loss: 69.85050611613133, epoch: 1



 89%|█████████████████████████████▍   | 108997/122310 [3:29:27<35:42,  6.21it/s][A
 89%|█████████████████████████████▍   | 109003/122310 [3:29:28<37:28,  5.92it/s][A
 89%|█████████████████████████████▍   | 109013/122310 [3:29:29<32:50,  6.75it/s][A
 89%|█████████████████████████████▍   | 109025/122310 [3:29:30<28:47,  7.69it/s][A
 89%|█████████████████████████████▍   | 109028/122310 [3:29:33<46:04,  4.81it/s][A
 89%|█████████████████████████████▍   | 109045/122310 [3:29:34<28:21,  7.79it/s][A
 89%|█████████████████████████████▍   | 109051/122310 [3:29:34<29:08,  7.58it/s][A
 89%|█████████████████████████████▍   | 109052/122310 [3:29:35<36:34,  6.04it/s][A
 89%|█████████████████████████████▍   | 109061/122310 [3:29:36<30:51,  7.15it/s][A
 89%|█████████████████████████████▍   | 109074/122310 [3:29:37<23:51,  9.25it/s][A
 89%|█████████████████████████████▍   | 109084/122310 [3:29:38<22:21,  9.86it/s][A
 89%|█████████████████████████████▍   | 109094/122310 [3:29:39<21:23, 10.30

step: 28080, loss: 72.96339560871445, epoch: 1



 89%|█████████████████████████████▍   | 109168/122310 [3:29:46<20:28, 10.70it/s][A
 89%|█████████████████████████████▍   | 109176/122310 [3:29:47<21:25, 10.22it/s][A
 89%|█████████████████████████████▍   | 109184/122310 [3:29:47<22:03,  9.92it/s][A
 89%|█████████████████████████████▍   | 109192/122310 [3:29:48<22:30,  9.71it/s][A
 89%|█████████████████████████████▍   | 109205/122310 [3:29:49<19:20, 11.29it/s][A
 89%|█████████████████████████████▍   | 109207/122310 [3:29:50<25:27,  8.58it/s][A
 89%|█████████████████████████████▍   | 109212/122310 [3:29:52<37:00,  5.90it/s][A
 89%|█████████████████████████████▍   | 109219/122310 [3:29:53<33:52,  6.44it/s][A
 89%|█████████████████████████████▍   | 109227/122310 [3:29:54<30:30,  7.15it/s][A
 89%|█████████████████████████████▍   | 109228/122310 [3:29:54<39:35,  5.51it/s][A
 89%|█████████████████████████████▍   | 109238/122310 [3:29:55<30:25,  7.16it/s][A
 89%|█████████████████████████████▍   | 109250/122310 [3:29:56<24:01,  9.06

step: 28100, loss: 104.41817932947589, epoch: 1



 89%|█████████████████████████████▍   | 109299/122310 [3:30:03<48:10,  4.50it/s][A
 89%|█████████████████████████████▍   | 109316/122310 [3:30:04<25:45,  8.41it/s][A
 89%|█████████████████████████████▍   | 109320/122310 [3:30:05<29:20,  7.38it/s][A
 89%|█████████████████████████████▍   | 109329/122310 [3:30:06<26:55,  8.04it/s][A
 89%|█████████████████████████████▌   | 109339/122310 [3:30:07<24:00,  9.01it/s][A
 89%|█████████████████████████████▌   | 109348/122310 [3:30:08<23:01,  9.38it/s][A
 89%|█████████████████████████████▌   | 109357/122310 [3:30:08<22:30,  9.59it/s][A
 89%|█████████████████████████████▌   | 109362/122310 [3:30:09<25:23,  8.50it/s][A
 89%|█████████████████████████████▌   | 109372/122310 [3:30:10<23:11,  9.30it/s][A
 89%|█████████████████████████████▌   | 109379/122310 [3:30:11<24:33,  8.77it/s][A
 89%|█████████████████████████████▌   | 109393/122310 [3:30:12<20:04, 10.72it/s][A
 89%|█████████████████████████████▌   | 109402/122310 [3:30:13<20:29, 10.50

step: 28120, loss: 94.71483698973032, epoch: 1



 90%|█████████████████████████████▌   | 109485/122310 [3:30:21<21:17, 10.04it/s][A
 90%|█████████████████████████████▌   | 109490/122310 [3:30:22<24:27,  8.74it/s][A
 90%|█████████████████████████████▌   | 109497/122310 [3:30:23<24:59,  8.54it/s][A
 90%|█████████████████████████████▌   | 109508/122310 [3:30:24<22:02,  9.68it/s][A
 90%|█████████████████████████████▌   | 109516/122310 [3:30:24<22:25,  9.51it/s][A
 90%|█████████████████████████████▌   | 109528/122310 [3:30:25<19:51, 10.73it/s][A
 90%|█████████████████████████████▌   | 109535/122310 [3:30:26<21:32,  9.88it/s][A
 90%|█████████████████████████████▌   | 109542/122310 [3:30:27<22:53,  9.30it/s][A
 90%|█████████████████████████████▌   | 109548/122310 [3:30:28<24:51,  8.56it/s][A
 90%|█████████████████████████████▌   | 109555/122310 [3:30:29<25:13,  8.43it/s][A
 90%|█████████████████████████████▌   | 109565/122310 [3:30:30<22:43,  9.35it/s][A
 90%|█████████████████████████████▌   | 109577/122310 [3:30:31<20:35, 10.30

step: 28140, loss: 82.83433506250793, epoch: 1



 90%|█████████████████████████████▌   | 109666/122310 [3:30:39<17:04, 12.34it/s][A
 90%|█████████████████████████████▌   | 109668/122310 [3:30:40<22:23,  9.41it/s][A
 90%|█████████████████████████████▌   | 109682/122310 [3:30:41<18:40, 11.27it/s][A
 90%|█████████████████████████████▌   | 109692/122310 [3:30:42<19:10, 10.97it/s][A
 90%|█████████████████████████████▌   | 109708/122310 [3:30:43<16:31, 12.70it/s][A
 90%|█████████████████████████████▌   | 109719/122310 [3:30:46<27:39,  7.59it/s][A
 90%|█████████████████████████████▌   | 109731/122310 [3:30:47<24:18,  8.63it/s][A
 90%|█████████████████████████████▌   | 109740/122310 [3:30:48<24:11,  8.66it/s][A
 90%|█████████████████████████████▌   | 109748/122310 [3:30:49<24:47,  8.45it/s][A
 90%|█████████████████████████████▌   | 109759/122310 [3:30:50<22:29,  9.30it/s][A
 90%|█████████████████████████████▌   | 109765/122310 [3:30:50<24:22,  8.58it/s][A
 90%|█████████████████████████████▌   | 109773/122310 [3:30:51<24:13,  8.62

step: 28160, loss: 73.05370742732262, epoch: 1



 90%|█████████████████████████████▋   | 109813/122310 [3:30:58<37:01,  5.63it/s][A
 90%|█████████████████████████████▋   | 109823/122310 [3:30:59<29:04,  7.16it/s][A
 90%|█████████████████████████████▋   | 109834/122310 [3:31:00<24:29,  8.49it/s][A
 90%|█████████████████████████████▋   | 109838/122310 [3:31:01<28:34,  7.27it/s][A
 90%|█████████████████████████████▋   | 109842/122310 [3:31:02<32:35,  6.38it/s][A
 90%|█████████████████████████████▋   | 109848/122310 [3:31:03<33:08,  6.27it/s][A
 90%|█████████████████████████████▋   | 109855/122310 [3:31:04<31:15,  6.64it/s][A
 90%|█████████████████████████████▋   | 109861/122310 [3:31:04<31:21,  6.62it/s][A
 90%|█████████████████████████████▋   | 109874/122310 [3:31:05<23:24,  8.86it/s][A
 90%|█████████████████████████████▋   | 109880/122310 [3:31:06<25:42,  8.06it/s][A
 90%|█████████████████████████████▋   | 109894/122310 [3:31:07<20:28, 10.11it/s][A
 90%|█████████████████████████████▋   | 109901/122310 [3:31:08<22:13,  9.31

step: 28180, loss: 92.0045300474335, epoch: 1



 90%|█████████████████████████████▋   | 109950/122310 [3:31:17<33:25,  6.16it/s][A
 90%|█████████████████████████████▋   | 109955/122310 [3:31:17<34:33,  5.96it/s][A
 90%|█████████████████████████████▋   | 109962/122310 [3:31:18<31:47,  6.47it/s][A
 90%|█████████████████████████████▋   | 109976/122310 [3:31:19<22:35,  9.10it/s][A
 90%|█████████████████████████████▋   | 109981/122310 [3:31:20<25:34,  8.04it/s][A
 90%|█████████████████████████████▋   | 109993/122310 [3:31:21<21:33,  9.52it/s][A
 90%|█████████████████████████████▋   | 109997/122310 [3:31:22<25:33,  8.03it/s][A
 90%|█████████████████████████████▋   | 110006/122310 [3:31:23<24:05,  8.51it/s][A
 90%|█████████████████████████████▋   | 110012/122310 [3:31:25<34:29,  5.94it/s][A
 90%|█████████████████████████████▋   | 110015/122310 [3:31:26<39:04,  5.24it/s][A
 90%|█████████████████████████████▋   | 110021/122310 [3:31:27<37:34,  5.45it/s][A
 90%|█████████████████████████████▋   | 110028/122310 [3:31:28<33:47,  6.06

step: 28200, loss: 93.43385589589296, epoch: 1



 90%|█████████████████████████████▋   | 110102/122310 [3:31:35<23:13,  8.76it/s][A
 90%|█████████████████████████████▋   | 110106/122310 [3:31:36<27:16,  7.46it/s][A
 90%|█████████████████████████████▋   | 110117/122310 [3:31:37<23:23,  8.68it/s][A
 90%|█████████████████████████████▋   | 110129/122310 [3:31:38<20:50,  9.74it/s][A
 90%|█████████████████████████████▋   | 110131/122310 [3:31:39<27:44,  7.32it/s][A
 90%|█████████████████████████████▋   | 110147/122310 [3:31:40<20:28,  9.90it/s][A
 90%|█████████████████████████████▋   | 110152/122310 [3:31:41<24:07,  8.40it/s][A
 90%|█████████████████████████████▋   | 110167/122310 [3:31:42<19:19, 10.47it/s][A
 90%|█████████████████████████████▋   | 110174/122310 [3:31:43<21:34,  9.38it/s][A
 90%|█████████████████████████████▋   | 110191/122310 [3:31:44<17:13, 11.73it/s][A
 90%|█████████████████████████████▋   | 110198/122310 [3:31:45<19:14, 10.49it/s][A
 90%|█████████████████████████████▋   | 110207/122310 [3:31:46<20:00, 10.08

step: 28220, loss: 79.27235013094688, epoch: 1



 90%|█████████████████████████████▋   | 110263/122310 [3:31:55<25:36,  7.84it/s][A
 90%|█████████████████████████████▊   | 110278/122310 [3:31:56<19:33, 10.25it/s][A
 90%|█████████████████████████████▊   | 110284/122310 [3:31:57<22:13,  9.02it/s][A
 90%|█████████████████████████████▊   | 110292/122310 [3:31:58<22:51,  8.76it/s][A
 90%|█████████████████████████████▊   | 110301/122310 [3:31:59<22:28,  8.91it/s][A
 90%|█████████████████████████████▊   | 110306/122310 [3:32:00<25:27,  7.86it/s][A
 90%|█████████████████████████████▊   | 110307/122310 [3:32:01<34:21,  5.82it/s][A
 90%|█████████████████████████████▊   | 110308/122310 [3:32:02<45:10,  4.43it/s][A
 90%|█████████████████████████████▊   | 110313/122310 [3:32:03<42:45,  4.68it/s][A
 90%|█████████████████████████████▊   | 110318/122310 [3:32:03<41:03,  4.87it/s][A
 90%|█████████████████████████████▊   | 110326/122310 [3:32:04<33:39,  5.93it/s][A
 90%|███████████████████████████▉   | 110332/122310 [3:32:10<1:22:37,  2.42

step: 28240, loss: 86.82035162545559, epoch: 1



 90%|█████████████████████████████▊   | 110373/122310 [3:32:14<27:09,  7.33it/s][A
 90%|█████████████████████████████▊   | 110381/122310 [3:32:15<26:08,  7.60it/s][A
 90%|█████████████████████████████▊   | 110390/122310 [3:32:16<24:26,  8.13it/s][A
 90%|█████████████████████████████▊   | 110397/122310 [3:32:17<24:58,  7.95it/s][A
 90%|█████████████████████████████▊   | 110405/122310 [3:32:18<24:22,  8.14it/s][A
 90%|█████████████████████████████▊   | 110412/122310 [3:32:19<24:55,  7.96it/s][A
 90%|█████████████████████████████▊   | 110421/122310 [3:32:19<23:28,  8.44it/s][A
 90%|█████████████████████████████▊   | 110428/122310 [3:32:20<24:17,  8.15it/s][A
 90%|█████████████████████████████▊   | 110436/122310 [3:32:21<23:52,  8.29it/s][A
 90%|█████████████████████████████▊   | 110444/122310 [3:32:22<23:38,  8.37it/s][A
 90%|█████████████████████████████▊   | 110451/122310 [3:32:23<24:28,  8.07it/s][A
 90%|█████████████████████████████▊   | 110454/122310 [3:32:24<29:42,  6.65

step: 28260, loss: 92.59953997327433, epoch: 1



 90%|█████████████████████████████▊   | 110507/122310 [3:32:33<28:47,  6.83it/s][A
 90%|█████████████████████████████▊   | 110510/122310 [3:32:34<34:11,  5.75it/s][A
 90%|█████████████████████████████▊   | 110525/122310 [3:32:35<22:19,  8.80it/s][A
 90%|█████████████████████████████▊   | 110529/122310 [3:32:36<34:12,  5.74it/s][A
 90%|█████████████████████████████▊   | 110530/122310 [3:32:37<43:02,  4.56it/s][A
 90%|█████████████████████████████▊   | 110537/122310 [3:32:38<36:44,  5.34it/s][A
 90%|█████████████████████████████▊   | 110553/122310 [3:32:39<23:00,  8.52it/s][A
 90%|█████████████████████████████▊   | 110561/122310 [3:32:40<23:00,  8.51it/s][A
 90%|█████████████████████████████▊   | 110566/122310 [3:32:41<26:05,  7.50it/s][A
 90%|█████████████████████████████▊   | 110577/122310 [3:32:42<22:34,  8.66it/s][A
 90%|█████████████████████████████▊   | 110579/122310 [3:32:43<29:47,  6.56it/s][A
 90%|█████████████████████████████▊   | 110601/122310 [3:32:44<18:45, 10.41

step: 28280, loss: 89.04450869424764, epoch: 1



 90%|█████████████████████████████▊   | 110676/122310 [3:32:52<26:05,  7.43it/s][A
 90%|█████████████████████████████▊   | 110683/122310 [3:32:53<26:16,  7.38it/s][A
 91%|█████████████████████████████▊   | 110693/122310 [3:32:54<23:30,  8.24it/s][A
 91%|█████████████████████████████▊   | 110702/122310 [3:32:55<22:47,  8.49it/s][A
 91%|█████████████████████████████▊   | 110705/122310 [3:32:56<28:28,  6.79it/s][A
 91%|█████████████████████████████▊   | 110714/122310 [3:32:57<26:18,  7.35it/s][A
 91%|█████████████████████████████▊   | 110721/122310 [3:32:59<27:02,  7.14it/s][A
 91%|█████████████████████████████▉   | 110729/122310 [3:33:00<26:27,  7.30it/s][A
 91%|█████████████████████████████▉   | 110735/122310 [3:33:01<28:08,  6.86it/s][A
 91%|█████████████████████████████▉   | 110742/122310 [3:33:02<27:48,  6.93it/s][A
 91%|█████████████████████████████▉   | 110749/122310 [3:33:03<27:43,  6.95it/s][A
 91%|█████████████████████████████▉   | 110757/122310 [3:33:04<26:21,  7.30

step: 28300, loss: 78.79343692405578, epoch: 1



 91%|█████████████████████████████▉   | 110855/122310 [3:33:13<16:43, 11.41it/s][A
 91%|█████████████████████████████▉   | 110863/122310 [3:33:14<18:23, 10.37it/s][A
 91%|█████████████████████████████▉   | 110868/122310 [3:33:15<21:51,  8.72it/s][A
 91%|█████████████████████████████▉   | 110884/122310 [3:33:16<17:40, 10.78it/s][A
 91%|█████████████████████████████▉   | 110895/122310 [3:33:17<17:27, 10.90it/s][A
 91%|█████████████████████████████▉   | 110901/122310 [3:33:18<20:12,  9.41it/s][A
 91%|█████████████████████████████▉   | 110913/122310 [3:33:19<18:31, 10.25it/s][A
 91%|█████████████████████████████▉   | 110928/122310 [3:33:20<16:06, 11.78it/s][A
 91%|█████████████████████████████▉   | 110934/122310 [3:33:21<18:47, 10.09it/s][A
 91%|█████████████████████████████▉   | 110943/122310 [3:33:22<19:18,  9.81it/s][A
 91%|█████████████████████████████▉   | 110956/122310 [3:33:23<17:32, 10.79it/s][A
 91%|█████████████████████████████▉   | 110966/122310 [3:33:24<17:48, 10.62

step: 28320, loss: 74.0016002420181, epoch: 1



 91%|█████████████████████████████▉   | 111044/122310 [3:33:33<18:32, 10.13it/s][A
 91%|█████████████████████████████▉   | 111052/122310 [3:33:34<19:31,  9.61it/s][A
 91%|█████████████████████████████▉   | 111057/122310 [3:33:35<22:36,  8.29it/s][A
 91%|█████████████████████████████▉   | 111063/122310 [3:33:36<24:18,  7.71it/s][A
 91%|█████████████████████████████▉   | 111069/122310 [3:33:38<33:36,  5.57it/s][A
 91%|█████████████████████████████▉   | 111074/122310 [3:33:39<34:05,  5.49it/s][A
 91%|█████████████████████████████▉   | 111078/122310 [3:33:40<36:26,  5.14it/s][A
 91%|█████████████████████████████▉   | 111084/122310 [3:33:40<34:15,  5.46it/s][A
 91%|█████████████████████████████▉   | 111097/122310 [3:33:41<24:05,  7.76it/s][A
 91%|█████████████████████████████▉   | 111110/122310 [3:33:42<19:46,  9.44it/s][A
 91%|█████████████████████████████▉   | 111116/122310 [3:33:43<21:50,  8.54it/s][A
 91%|█████████████████████████████▉   | 111130/122310 [3:33:44<18:00, 10.34

step: 28340, loss: 81.67475996607097, epoch: 1



 91%|█████████████████████████████▉   | 111190/122310 [3:33:52<23:51,  7.77it/s][A
 91%|██████████████████████████████   | 111194/122310 [3:33:53<27:27,  6.75it/s][A
 91%|██████████████████████████████   | 111203/122310 [3:33:54<24:35,  7.53it/s][A
 91%|██████████████████████████████   | 111208/122310 [3:33:55<26:49,  6.90it/s][A
 91%|██████████████████████████████   | 111215/122310 [3:33:58<42:15,  4.38it/s][A
 91%|██████████████████████████████   | 111223/122310 [3:33:59<35:26,  5.21it/s][A
 91%|██████████████████████████████   | 111229/122310 [3:34:00<33:46,  5.47it/s][A
 91%|██████████████████████████████   | 111240/122310 [3:34:01<26:41,  6.91it/s][A
 91%|██████████████████████████████   | 111250/122310 [3:34:02<23:37,  7.80it/s][A
 91%|██████████████████████████████   | 111261/122310 [3:34:02<21:03,  8.75it/s][A
 91%|██████████████████████████████   | 111267/122310 [3:34:03<22:47,  8.07it/s][A
 91%|██████████████████████████████   | 111277/122310 [3:34:04<21:09,  8.69

step: 28360, loss: 70.50028636939817, epoch: 1



 91%|██████████████████████████████   | 111316/122310 [3:34:11<27:13,  6.73it/s][A
 91%|██████████████████████████████   | 111325/122310 [3:34:12<24:21,  7.52it/s][A
 91%|██████████████████████████████   | 111329/122310 [3:34:13<28:04,  6.52it/s][A
 91%|██████████████████████████████   | 111340/122310 [3:34:14<22:51,  8.00it/s][A
 91%|██████████████████████████████   | 111354/122310 [3:34:15<18:19,  9.97it/s][A
 91%|██████████████████████████████   | 111360/122310 [3:34:16<20:36,  8.86it/s][A
 91%|██████████████████████████████   | 111371/122310 [3:34:17<18:49,  9.69it/s][A
 91%|██████████████████████████████   | 111378/122310 [3:34:18<20:19,  8.96it/s][A
 91%|██████████████████████████████   | 111382/122310 [3:34:19<24:11,  7.53it/s][A
 91%|██████████████████████████████   | 111390/122310 [3:34:20<23:24,  7.78it/s][A
 91%|██████████████████████████████   | 111398/122310 [3:34:21<23:02,  7.89it/s][A
 91%|██████████████████████████████   | 111403/122310 [3:34:22<25:40,  7.08

step: 28380, loss: 81.37976785763593, epoch: 1



 91%|██████████████████████████████   | 111493/122310 [3:34:31<16:22, 11.01it/s][A
 91%|██████████████████████████████   | 111495/122310 [3:34:32<21:44,  8.29it/s][A
 91%|██████████████████████████████   | 111501/122310 [3:34:33<23:34,  7.64it/s][A
 91%|██████████████████████████████   | 111507/122310 [3:34:34<25:00,  7.20it/s][A
 91%|██████████████████████████████   | 111525/122310 [3:34:35<16:48, 10.70it/s][A
 91%|██████████████████████████████   | 111539/122310 [3:34:35<14:49, 12.11it/s][A
 91%|██████████████████████████████   | 111543/122310 [3:34:36<18:11,  9.86it/s][A
 91%|██████████████████████████████   | 111552/122310 [3:34:37<18:04,  9.92it/s][A
 91%|██████████████████████████████   | 111562/122310 [3:34:38<17:29, 10.24it/s][A
 91%|██████████████████████████████   | 111570/122310 [3:34:39<18:12,  9.83it/s][A
 91%|██████████████████████████████   | 111580/122310 [3:34:40<17:36, 10.16it/s][A
 91%|██████████████████████████████   | 111589/122310 [3:34:41<17:38, 10.12

step: 28400, loss: 77.63995794209696, epoch: 1
sim1 and sim2 are 0.4696107170482813, 0.23211455775243053
cosine of pred and queen: 0.24304519442548392
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: benefits
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: members
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: italy
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual:

Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: sister
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: bihar
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: bihar
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 91%|███████████████████████████▍  | 111661/122310 [3:36:25<10:22:38,  3.51s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 91%|████████████████████████████▎  | 111670/122310 [3:36:26<7:15:41,  2.46s/it][A
 91%|████████████████████████████▎  | 111683/122310 [3:36:28<4:31:36,  1.53s/it][A
 91%|████████████████████████████▎  | 111692/122310 [3:36:29<3:21:59,  1.14s/it][A
 91%|████████████████████████████▎  | 111702/122310 [3:36:30<2:25:02,  1.22it/s][A
 91%|████████████████████████████▎  | 111712/122310 [3:36:31<1:45:36,  1.67it/s][A
 91%|████████████████████████████▎  | 111714/122310 [3:36:31<1:43:14,  1.71it/s][A
 91%|████████████████████████████▎  | 111722/122310 [3:36:32<1:16:50,  2.30it/s][A
 91%|██████████████████████████████▏  | 111730/122310 [3:36:33<58:58,  2.99it/s][A
 91%|██████████████████████████████▏  | 111735/122310 [3:36:34<53:12,  3.31it/s][A
 91%|██████████████████████████████▏  | 111745/122310 [3:36:35<38:43,  4.55it/s][A
 91%|██████████████████████████████▏  | 111749/122310 [3:36:36<38:42,  4.55it/s][A
 91%|██████████████████████████████▏  | 111755/122310 [3:36:37<35:10,  5.00

step: 28420, loss: 102.21213821864956, epoch: 1



 91%|██████████████████████████████▏  | 111829/122310 [3:36:45<19:29,  8.96it/s][A
 91%|██████████████████████████████▏  | 111843/122310 [3:36:46<15:52, 10.99it/s][A
 91%|██████████████████████████████▏  | 111847/122310 [3:36:46<19:10,  9.09it/s][A
 91%|██████████████████████████████▏  | 111853/122310 [3:36:47<20:35,  8.46it/s][A
 91%|██████████████████████████████▏  | 111865/122310 [3:36:48<17:22, 10.02it/s][A
 91%|██████████████████████████████▏  | 111873/122310 [3:36:49<17:52,  9.73it/s][A
 91%|██████████████████████████████▏  | 111881/122310 [3:36:50<18:09,  9.58it/s][A
 91%|██████████████████████████████▏  | 111893/122310 [3:36:51<16:03, 10.82it/s][A
 91%|██████████████████████████████▏  | 111905/122310 [3:36:52<14:51, 11.68it/s][A
 91%|██████████████████████████████▏  | 111913/122310 [3:36:53<15:52, 10.91it/s][A
 92%|██████████████████████████████▏  | 111930/122310 [3:36:53<12:54, 13.41it/s][A
 92%|██████████████████████████████▏  | 111937/122310 [3:36:54<14:42, 11.76

step: 28440, loss: 79.17332753791295, epoch: 1



 92%|██████████████████████████████▏  | 112014/122310 [3:37:02<17:31,  9.79it/s][A
 92%|██████████████████████████████▏  | 112026/122310 [3:37:03<15:39, 10.94it/s][A
 92%|██████████████████████████████▏  | 112040/122310 [3:37:04<13:44, 12.46it/s][A
 92%|██████████████████████████████▏  | 112046/122310 [3:37:05<15:51, 10.79it/s][A
 92%|██████████████████████████████▏  | 112051/122310 [3:37:06<18:26,  9.27it/s][A
 92%|██████████████████████████████▏  | 112059/122310 [3:37:07<18:30,  9.23it/s][A
 92%|██████████████████████████████▏  | 112077/122310 [3:37:08<13:31, 12.61it/s][A
 92%|██████████████████████████████▏  | 112090/122310 [3:37:08<12:48, 13.30it/s][A
 92%|██████████████████████████████▏  | 112102/122310 [3:37:09<12:40, 13.42it/s][A
 92%|██████████████████████████████▏  | 112108/122310 [3:37:10<14:49, 11.47it/s][A
 92%|██████████████████████████████▎  | 112120/122310 [3:37:11<14:04, 12.07it/s][A
 92%|██████████████████████████████▎  | 112128/122310 [3:37:12<15:14, 11.14

step: 28460, loss: 91.46995793273254, epoch: 1



 92%|██████████████████████████████▎  | 112198/122310 [3:37:20<20:27,  8.24it/s][A
 92%|██████████████████████████████▎  | 112207/122310 [3:37:21<19:01,  8.85it/s][A
 92%|██████████████████████████████▎  | 112218/122310 [3:37:22<16:55,  9.94it/s][A
 92%|██████████████████████████████▎  | 112227/122310 [3:37:23<16:38, 10.10it/s][A
 92%|██████████████████████████████▎  | 112237/122310 [3:37:23<15:59, 10.49it/s][A
 92%|██████████████████████████████▎  | 112242/122310 [3:37:24<18:31,  9.06it/s][A
 92%|██████████████████████████████▎  | 112248/122310 [3:37:25<19:58,  8.40it/s][A
 92%|██████████████████████████████▎  | 112257/122310 [3:37:26<18:45,  8.93it/s][A
 92%|██████████████████████████████▎  | 112269/122310 [3:37:27<16:17, 10.28it/s][A
 92%|██████████████████████████████▎  | 112280/122310 [3:37:28<15:21, 10.89it/s][A
 92%|██████████████████████████████▎  | 112293/122310 [3:37:29<13:50, 12.06it/s][A
 92%|██████████████████████████████▎  | 112300/122310 [3:37:30<15:24, 10.83

step: 28480, loss: 79.07170331843591, epoch: 1



 92%|██████████████████████████████▎  | 112374/122310 [3:37:38<15:21, 10.78it/s][A
 92%|██████████████████████████████▎  | 112378/122310 [3:37:38<18:33,  8.92it/s][A
 92%|██████████████████████████████▎  | 112382/122310 [3:37:39<21:49,  7.58it/s][A
 92%|██████████████████████████████▎  | 112384/122310 [3:37:40<27:27,  6.03it/s][A
 92%|██████████████████████████████▎  | 112393/122310 [3:37:41<22:42,  7.28it/s][A
 92%|██████████████████████████████▎  | 112394/122310 [3:37:42<30:35,  5.40it/s][A
 92%|██████████████████████████████▎  | 112403/122310 [3:37:43<24:14,  6.81it/s][A
 92%|██████████████████████████████▎  | 112416/122310 [3:37:44<18:01,  9.15it/s][A
 92%|██████████████████████████████▎  | 112432/122310 [3:37:45<13:53, 11.85it/s][A
 92%|██████████████████████████████▎  | 112444/122310 [3:37:45<13:16, 12.39it/s][A
 92%|██████████████████████████████▎  | 112451/122310 [3:37:46<14:51, 11.06it/s][A
 92%|██████████████████████████████▎  | 112456/122310 [3:37:47<17:22,  9.45

step: 28500, loss: 76.57663079842146, epoch: 1



 92%|██████████████████████████████▎  | 112544/122310 [3:37:55<15:52, 10.26it/s][A
 92%|██████████████████████████████▎  | 112551/122310 [3:37:56<17:03,  9.54it/s][A
 92%|██████████████████████████████▎  | 112559/122310 [3:37:57<17:20,  9.37it/s][A
 92%|██████████████████████████████▎  | 112563/122310 [3:37:58<20:35,  7.89it/s][A
 92%|██████████████████████████████▎  | 112570/122310 [3:37:59<20:32,  7.90it/s][A
 92%|██████████████████████████████▍  | 112583/122310 [3:38:00<16:22,  9.90it/s][A
 92%|██████████████████████████████▍  | 112589/122310 [3:38:00<17:56,  9.03it/s][A
 92%|██████████████████████████████▍  | 112599/122310 [3:38:01<16:37,  9.73it/s][A
 92%|██████████████████████████████▍  | 112608/122310 [3:38:02<16:28,  9.82it/s][A
 92%|██████████████████████████████▍  | 112616/122310 [3:38:03<17:50,  9.06it/s][A
 92%|██████████████████████████████▍  | 112625/122310 [3:38:04<17:38,  9.15it/s][A
 92%|██████████████████████████████▍  | 112640/122310 [3:38:05<14:30, 11.10

step: 28520, loss: 76.86759482013167, epoch: 1



 92%|██████████████████████████████▍  | 112709/122310 [3:38:14<22:00,  7.27it/s][A
 92%|██████████████████████████████▍  | 112721/122310 [3:38:15<17:38,  9.06it/s][A
 92%|██████████████████████████████▍  | 112722/122310 [3:38:16<23:41,  6.75it/s][A
 92%|██████████████████████████████▍  | 112731/122310 [3:38:17<20:32,  7.77it/s][A
 92%|██████████████████████████████▍  | 112739/122310 [3:38:18<19:37,  8.13it/s][A
 92%|██████████████████████████████▍  | 112750/122310 [3:38:19<16:51,  9.45it/s][A
 92%|██████████████████████████████▍  | 112763/122310 [3:38:19<14:25, 11.03it/s][A
 92%|██████████████████████████████▍  | 112773/122310 [3:38:20<14:19, 11.10it/s][A
 92%|██████████████████████████████▍  | 112780/122310 [3:38:21<15:35, 10.18it/s][A
 92%|██████████████████████████████▍  | 112795/122310 [3:38:22<12:56, 12.25it/s][A
 92%|██████████████████████████████▍  | 112808/122310 [3:38:23<12:11, 12.98it/s][A
 92%|██████████████████████████████▍  | 112815/122310 [3:38:24<13:46, 11.49

step: 28540, loss: 83.78531709461913, epoch: 1



 92%|██████████████████████████████▍  | 112906/122310 [3:38:32<18:25,  8.51it/s][A
 92%|██████████████████████████████▍  | 112913/122310 [3:38:34<24:21,  6.43it/s][A
 92%|██████████████████████████████▍  | 112923/122310 [3:38:34<20:26,  7.65it/s][A
 92%|██████████████████████████████▍  | 112929/122310 [3:38:35<21:01,  7.43it/s][A
 92%|██████████████████████████████▍  | 112935/122310 [3:38:36<21:25,  7.29it/s][A
 92%|██████████████████████████████▍  | 112954/122310 [3:38:37<13:47, 11.31it/s][A
 92%|██████████████████████████████▍  | 112958/122310 [3:38:38<16:36,  9.38it/s][A
 92%|██████████████████████████████▍  | 112964/122310 [3:38:39<17:58,  8.66it/s][A
 92%|██████████████████████████████▍  | 112971/122310 [3:38:40<18:21,  8.48it/s][A
 92%|██████████████████████████████▍  | 112975/122310 [3:38:41<21:15,  7.32it/s][A
 92%|██████████████████████████████▍  | 112982/122310 [3:38:41<20:44,  7.49it/s][A
 92%|██████████████████████████████▍  | 112995/122310 [3:38:42<16:09,  9.61

step: 28560, loss: 72.52442433994577, epoch: 1



 92%|██████████████████████████████▌  | 113061/122310 [3:38:49<15:49,  9.74it/s][A
 92%|██████████████████████████████▌  | 113075/122310 [3:38:50<13:16, 11.60it/s][A
 92%|██████████████████████████████▌  | 113094/122310 [3:38:51<10:30, 14.61it/s][A
 92%|██████████████████████████████▌  | 113099/122310 [3:38:52<12:49, 11.97it/s][A
 92%|██████████████████████████████▌  | 113104/122310 [3:38:53<15:14, 10.06it/s][A
 92%|██████████████████████████████▌  | 113114/122310 [3:38:54<14:36, 10.49it/s][A
 92%|██████████████████████████████▌  | 113127/122310 [3:38:55<12:58, 11.80it/s][A
 92%|██████████████████████████████▌  | 113136/122310 [3:38:55<13:27, 11.36it/s][A
 93%|██████████████████████████████▌  | 113146/122310 [3:38:56<13:28, 11.34it/s][A
 93%|██████████████████████████████▌  | 113150/122310 [3:38:57<16:29,  9.26it/s][A
 93%|██████████████████████████████▌  | 113157/122310 [3:38:58<17:15,  8.84it/s][A
 93%|██████████████████████████████▌  | 113161/122310 [3:38:59<20:16,  7.52

step: 28580, loss: 81.48257040993961, epoch: 1



 93%|██████████████████████████████▌  | 113227/122310 [3:39:07<19:31,  7.75it/s][A
 93%|██████████████████████████████▌  | 113236/122310 [3:39:08<17:47,  8.50it/s][A
 93%|██████████████████████████████▌  | 113245/122310 [3:39:09<16:44,  9.02it/s][A
 93%|██████████████████████████████▌  | 113251/122310 [3:39:10<18:02,  8.37it/s][A
 93%|██████████████████████████████▌  | 113261/122310 [3:39:11<22:07,  6.82it/s][A
 93%|██████████████████████████████▌  | 113270/122310 [3:39:12<19:47,  7.62it/s][A
 93%|██████████████████████████████▌  | 113280/122310 [3:39:13<17:37,  8.54it/s][A
 93%|██████████████████████████████▌  | 113294/122310 [3:39:14<14:29, 10.37it/s][A
 93%|██████████████████████████████▌  | 113303/122310 [3:39:15<14:29, 10.35it/s][A
 93%|██████████████████████████████▌  | 113314/122310 [3:39:16<13:40, 10.96it/s][A
 93%|██████████████████████████████▌  | 113326/122310 [3:39:17<12:44, 11.76it/s][A
 93%|██████████████████████████████▌  | 113328/122310 [3:39:18<16:39,  8.99

step: 28600, loss: 83.69384836300026, epoch: 1



 93%|██████████████████████████████▌  | 113407/122310 [3:39:26<15:37,  9.50it/s][A
 93%|██████████████████████████████▌  | 113415/122310 [3:39:27<15:52,  9.34it/s][A
 93%|██████████████████████████████▌  | 113425/122310 [3:39:27<14:55,  9.92it/s][A
 93%|██████████████████████████████▌  | 113434/122310 [3:39:28<14:48,  9.99it/s][A
 93%|██████████████████████████████▌  | 113441/122310 [3:39:29<15:49,  9.34it/s][A
 93%|██████████████████████████████▌  | 113456/122310 [3:39:30<12:52, 11.46it/s][A
 93%|██████████████████████████████▌  | 113466/122310 [3:39:31<12:52, 11.45it/s][A
 93%|██████████████████████████████▌  | 113471/122310 [3:39:32<15:08,  9.73it/s][A
 93%|██████████████████████████████▌  | 113478/122310 [3:39:33<15:58,  9.22it/s][A
 93%|██████████████████████████████▌  | 113488/122310 [3:39:34<14:54,  9.87it/s][A
 93%|██████████████████████████████▌  | 113491/122310 [3:39:34<18:32,  7.93it/s][A
 93%|██████████████████████████████▌  | 113499/122310 [3:39:35<17:40,  8.31

step: 28620, loss: 76.28812592376711, epoch: 1



 93%|██████████████████████████████▋  | 113569/122310 [3:39:43<14:52,  9.80it/s][A
 93%|██████████████████████████████▋  | 113579/122310 [3:39:44<14:12, 10.24it/s][A
 93%|██████████████████████████████▋  | 113585/122310 [3:39:45<15:42,  9.26it/s][A
 93%|██████████████████████████████▋  | 113590/122310 [3:39:46<17:42,  8.21it/s][A
 93%|██████████████████████████████▋  | 113594/122310 [3:39:47<20:22,  7.13it/s][A
 93%|██████████████████████████████▋  | 113604/122310 [3:39:48<17:15,  8.41it/s][A
 93%|██████████████████████████████▋  | 113611/122310 [3:39:49<17:30,  8.28it/s][A
 93%|██████████████████████████████▋  | 113617/122310 [3:39:49<18:24,  7.87it/s][A
 93%|██████████████████████████████▋  | 113631/122310 [3:39:50<14:04, 10.28it/s][A
 93%|██████████████████████████████▋  | 113647/122310 [3:39:51<11:23, 12.68it/s][A
 93%|██████████████████████████████▋  | 113652/122310 [3:39:52<13:38, 10.58it/s][A
 93%|██████████████████████████████▋  | 113658/122310 [3:39:53<15:15,  9.45

step: 28640, loss: 70.57790246136672, epoch: 1



 93%|██████████████████████████████▋  | 113719/122310 [3:40:01<16:45,  8.54it/s][A
 93%|██████████████████████████████▋  | 113733/122310 [3:40:02<13:21, 10.70it/s][A
 93%|██████████████████████████████▋  | 113737/122310 [3:40:03<16:04,  8.89it/s][A
 93%|██████████████████████████████▋  | 113748/122310 [3:40:04<14:16,  9.99it/s][A
 93%|██████████████████████████████▋  | 113757/122310 [3:40:04<14:07, 10.09it/s][A
 93%|██████████████████████████████▋  | 113767/122310 [3:40:05<13:35, 10.48it/s][A
 93%|██████████████████████████████▋  | 113778/122310 [3:40:06<12:50, 11.08it/s][A
 93%|██████████████████████████████▋  | 113785/122310 [3:40:07<14:08, 10.05it/s][A
 93%|██████████████████████████████▋  | 113797/122310 [3:40:08<12:43, 11.16it/s][A
 93%|██████████████████████████████▋  | 113810/122310 [3:40:09<11:34, 12.23it/s][A
 93%|██████████████████████████████▋  | 113817/122310 [3:40:10<12:54, 10.97it/s][A
 93%|██████████████████████████████▋  | 113825/122310 [3:40:11<13:34, 10.42

step: 28660, loss: 75.23266983060635, epoch: 1



 93%|██████████████████████████████▋  | 113906/122310 [3:40:19<14:58,  9.36it/s][A
 93%|██████████████████████████████▋  | 113918/122310 [3:40:19<13:21, 10.47it/s][A
 93%|██████████████████████████████▋  | 113926/122310 [3:40:20<13:51, 10.08it/s][A
 93%|██████████████████████████████▋  | 113941/122310 [3:40:21<11:30, 12.13it/s][A
 93%|██████████████████████████████▋  | 113954/122310 [3:40:22<10:46, 12.92it/s][A
 93%|██████████████████████████████▋  | 113956/122310 [3:40:23<14:16,  9.76it/s][A
 93%|██████████████████████████████▋  | 113967/122310 [3:40:24<13:09, 10.57it/s][A
 93%|██████████████████████████████▊  | 113971/122310 [3:40:25<15:49,  8.79it/s][A
 93%|██████████████████████████████▊  | 113977/122310 [3:40:26<17:02,  8.15it/s][A
 93%|██████████████████████████████▊  | 113981/122310 [3:40:26<19:28,  7.13it/s][A
 93%|██████████████████████████████▊  | 113994/122310 [3:40:27<14:47,  9.37it/s][A
 93%|██████████████████████████████▊  | 114006/122310 [3:40:28<13:02, 10.61

step: 28680, loss: 74.10099329165666, epoch: 1



 93%|██████████████████████████████▊  | 114100/122310 [3:40:36<14:06,  9.70it/s][A
 93%|██████████████████████████████▊  | 114107/122310 [3:40:37<15:20,  8.91it/s][A
 93%|██████████████████████████████▊  | 114108/122310 [3:40:39<26:48,  5.10it/s][A
 93%|██████████████████████████████▊  | 114115/122310 [3:40:40<24:14,  5.63it/s][A
 93%|██████████████████████████████▊  | 114124/122310 [3:40:41<20:31,  6.65it/s][A
 93%|██████████████████████████████▊  | 114141/122310 [3:40:42<14:15,  9.55it/s][A
 93%|██████████████████████████████▊  | 114150/122310 [3:40:43<14:22,  9.46it/s][A
 93%|██████████████████████████████▊  | 114156/122310 [3:40:44<15:48,  8.60it/s][A
 93%|██████████████████████████████▊  | 114165/122310 [3:40:45<15:02,  9.02it/s][A
 93%|██████████████████████████████▊  | 114172/122310 [3:40:46<15:32,  8.73it/s][A
 93%|██████████████████████████████▊  | 114183/122310 [3:40:47<13:47,  9.82it/s][A
 93%|██████████████████████████████▊  | 114194/122310 [3:40:48<13:01, 10.38

step: 28700, loss: 64.86649301124879, epoch: 1



 93%|██████████████████████████████▊  | 114250/122310 [3:40:55<20:56,  6.42it/s][A
 93%|██████████████████████████████▊  | 114261/122310 [3:40:56<16:33,  8.11it/s][A
 93%|██████████████████████████████▊  | 114277/122310 [3:40:57<12:22, 10.81it/s][A
 93%|██████████████████████████████▊  | 114295/122310 [3:40:58<10:10, 13.14it/s][A
 93%|██████████████████████████████▊  | 114311/122310 [3:40:59<09:34, 13.92it/s][A
 93%|██████████████████████████████▊  | 114319/122310 [3:41:00<11:21, 11.72it/s][A
 93%|██████████████████████████████▊  | 114328/122310 [3:41:01<13:00, 10.23it/s][A
 93%|██████████████████████████████▊  | 114332/122310 [3:41:03<17:21,  7.66it/s][A
 93%|██████████████████████████████▊  | 114343/122310 [3:41:04<15:51,  8.38it/s][A
 93%|██████████████████████████████▊  | 114353/122310 [3:41:05<15:06,  8.78it/s][A
 94%|██████████████████████████████▊  | 114363/122310 [3:41:06<14:07,  9.38it/s][A
 94%|██████████████████████████████▊  | 114375/122310 [3:41:07<12:43, 10.39

step: 28720, loss: 92.63261260250879, epoch: 1



 94%|██████████████████████████████▊  | 114422/122310 [3:41:14<21:19,  6.17it/s][A
 94%|██████████████████████████████▊  | 114431/122310 [3:41:15<18:00,  7.29it/s][A
 94%|██████████████████████████████▉  | 114438/122310 [3:41:16<17:36,  7.45it/s][A
 94%|██████████████████████████████▉  | 114439/122310 [3:41:17<22:57,  5.71it/s][A
 94%|██████████████████████████████▉  | 114446/122310 [3:41:18<20:28,  6.40it/s][A
 94%|██████████████████████████████▉  | 114457/122310 [3:41:19<15:55,  8.22it/s][A
 94%|██████████████████████████████▉  | 114460/122310 [3:41:20<19:16,  6.78it/s][A
 94%|██████████████████████████████▉  | 114464/122310 [3:41:21<21:11,  6.17it/s][A
 94%|██████████████████████████████▉  | 114474/122310 [3:41:21<16:49,  7.76it/s][A
 94%|██████████████████████████████▉  | 114481/122310 [3:41:22<16:36,  7.86it/s][A
 94%|██████████████████████████████▉  | 114492/122310 [3:41:23<14:02,  9.28it/s][A
 94%|██████████████████████████████▉  | 114502/122310 [3:41:24<13:05,  9.94

step: 28740, loss: 86.05731640805975, epoch: 1



 94%|██████████████████████████████▉  | 114598/122310 [3:41:32<09:47, 13.12it/s][A
 94%|██████████████████████████████▉  | 114606/122310 [3:41:33<10:45, 11.94it/s][A
 94%|██████████████████████████████▉  | 114612/122310 [3:41:34<12:16, 10.45it/s][A
 94%|██████████████████████████████▉  | 114623/122310 [3:41:34<11:31, 11.11it/s][A
 94%|██████████████████████████████▉  | 114638/122310 [3:41:35<09:52, 12.95it/s][A
 94%|██████████████████████████████▉  | 114641/122310 [3:41:36<12:36, 10.14it/s][A
 94%|██████████████████████████████▉  | 114648/122310 [3:41:37<13:25,  9.51it/s][A
 94%|██████████████████████████████▉  | 114661/122310 [3:41:38<11:24, 11.18it/s][A
 94%|██████████████████████████████▉  | 114670/122310 [3:41:39<11:42, 10.88it/s][A
 94%|██████████████████████████████▉  | 114672/122310 [3:41:40<15:15,  8.34it/s][A
 94%|██████████████████████████████▉  | 114676/122310 [3:41:41<17:37,  7.22it/s][A
 94%|██████████████████████████████▉  | 114686/122310 [3:41:41<15:00,  8.47

step: 28760, loss: 80.58155637576652, epoch: 1



 94%|██████████████████████████████▉  | 114751/122310 [3:41:49<15:10,  8.30it/s][A
 94%|██████████████████████████████▉  | 114759/122310 [3:41:50<14:41,  8.56it/s][A
 94%|██████████████████████████████▉  | 114768/122310 [3:41:51<13:47,  9.12it/s][A
 94%|██████████████████████████████▉  | 114778/122310 [3:41:52<12:44,  9.85it/s][A
 94%|██████████████████████████████▉  | 114794/122310 [3:41:53<10:05, 12.41it/s][A
 94%|██████████████████████████████▉  | 114804/122310 [3:41:54<10:18, 12.13it/s][A
 94%|██████████████████████████████▉  | 114810/122310 [3:41:55<11:49, 10.57it/s][A
 94%|██████████████████████████████▉  | 114815/122310 [3:41:55<13:41,  9.13it/s][A
 94%|██████████████████████████████▉  | 114822/122310 [3:41:56<14:11,  8.80it/s][A
 94%|██████████████████████████████▉  | 114831/122310 [3:41:57<13:45,  9.06it/s][A
 94%|██████████████████████████████▉  | 114839/122310 [3:41:58<14:44,  8.45it/s][A
 94%|██████████████████████████████▉  | 114848/122310 [3:41:59<14:17,  8.70

step: 28780, loss: 69.95783089716726, epoch: 1



 94%|███████████████████████████████  | 114913/122310 [3:42:08<18:24,  6.70it/s][A
 94%|███████████████████████████████  | 114930/122310 [3:42:09<12:32,  9.80it/s][A
 94%|███████████████████████████████  | 114941/122310 [3:42:10<11:39, 10.53it/s][A
 94%|███████████████████████████████  | 114952/122310 [3:42:11<11:24, 10.75it/s][A
 94%|███████████████████████████████  | 114962/122310 [3:42:12<11:17, 10.85it/s][A
 94%|███████████████████████████████  | 114975/122310 [3:42:13<10:21, 11.81it/s][A
 94%|███████████████████████████████  | 114987/122310 [3:42:14<10:26, 11.68it/s][A
 94%|███████████████████████████████  | 114993/122310 [3:42:15<13:04,  9.32it/s][A
 94%|███████████████████████████████  | 115002/122310 [3:42:16<13:25,  9.07it/s][A
 94%|███████████████████████████████  | 115009/122310 [3:42:17<14:16,  8.52it/s][A
 94%|███████████████████████████████  | 115015/122310 [3:42:18<15:29,  7.85it/s][A
 94%|███████████████████████████████  | 115030/122310 [3:42:19<12:28,  9.73

step: 28800, loss: 80.9095375879357, epoch: 1
sim1 and sim2 are 0.5240039813791316, 0.23755078256005738
cosine of pred and queen: 0.21519706550669962
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: pacific
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: italy
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maharas


 94%|███████████████████████████████  | 115091/122310 [3:42:40<12:31,  9.60it/s][A

Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: sales
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: would
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: cent
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: staffers
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: deceitfull
Actual: poland:polish::italy:italian, pred: media
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Act


 94%|█████████████████████████████▏ | 115099/122310 [3:43:56<6:01:05,  3.00s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 94%|█████████████████████████████▏ | 115107/122310 [3:43:57<4:26:43,  2.22s/it][A
 94%|█████████████████████████████▏ | 115116/122310 [3:43:58<3:09:18,  1.58s/it][A
 94%|█████████████████████████████▏ | 115126/122310 [3:43:58<2:11:03,  1.09s/it][A
 94%|█████████████████████████████▏ | 115135/122310 [3:43:59<1:35:47,  1.25it/s][A
 94%|█████████████████████████████▏ | 115144/122310 [3:44:00<1:10:44,  1.69it/s][A
 94%|███████████████████████████████  | 115156/122310 [3:44:01<48:35,  2.45it/s][A
 94%|███████████████████████████████  | 115160/122310 [3:44:02<45:42,  2.61it/s][A
 94%|███████████████████████████████  | 115162/122310 [3:44:03<47:18,  2.52it/s][A
 94%|███████████████████████████████  | 115169/122310 [3:44:04<38:11,  3.12it/s][A
 94%|███████████████████████████████  | 115176/122310 [3:44:05<31:26,  3.78it/s][A
 94%|███████████████████████████████  | 115184/122310 [3:44:06<25:50,  4.60it/s][A
 94%|███████████████████████████████  | 115190/122310 [3:44:07<24:06,  4.92

step: 28820, loss: 78.77495740785383, epoch: 1



 94%|███████████████████████████████  | 115264/122310 [3:44:16<13:46,  8.53it/s][A
 94%|███████████████████████████████  | 115269/122310 [3:44:17<15:14,  7.70it/s][A
 94%|███████████████████████████████  | 115280/122310 [3:44:18<13:21,  8.77it/s][A
 94%|███████████████████████████████  | 115282/122310 [3:44:19<16:55,  6.92it/s][A
 94%|███████████████████████████████  | 115283/122310 [3:44:20<22:45,  5.15it/s][A
 94%|███████████████████████████████  | 115292/122310 [3:44:21<18:33,  6.30it/s][A
 94%|███████████████████████████████  | 115298/122310 [3:44:22<18:44,  6.23it/s][A
 94%|███████████████████████████████  | 115303/122310 [3:44:23<19:44,  5.92it/s][A
 94%|███████████████████████████████  | 115319/122310 [3:44:24<12:51,  9.06it/s][A
 94%|███████████████████████████████  | 115326/122310 [3:44:25<14:10,  8.22it/s][A
 94%|███████████████████████████████  | 115334/122310 [3:44:26<14:34,  7.97it/s][A
 94%|███████████████████████████████  | 115340/122310 [3:44:27<15:49,  7.34

step: 28840, loss: 74.09049263721414, epoch: 1



 94%|███████████████████████████████▏ | 115407/122310 [3:44:36<16:19,  7.05it/s][A
 94%|███████████████████████████████▏ | 115416/122310 [3:44:37<15:13,  7.55it/s][A
 94%|███████████████████████████████▏ | 115422/122310 [3:44:38<16:13,  7.08it/s][A
 94%|███████████████████████████████▏ | 115427/122310 [3:44:39<17:45,  6.46it/s][A
 94%|███████████████████████████████▏ | 115433/122310 [3:44:40<17:58,  6.38it/s][A
 94%|███████████████████████████████▏ | 115448/122310 [3:44:41<12:37,  9.06it/s][A
 94%|███████████████████████████████▏ | 115452/122310 [3:44:42<15:04,  7.58it/s][A
 94%|███████████████████████████████▏ | 115462/122310 [3:44:43<13:37,  8.37it/s][A
 94%|███████████████████████████████▏ | 115478/122310 [3:44:44<10:34, 10.77it/s][A
 94%|███████████████████████████████▏ | 115487/122310 [3:44:45<10:58, 10.36it/s][A
 94%|███████████████████████████████▏ | 115494/122310 [3:44:46<12:01,  9.45it/s][A
 94%|███████████████████████████████▏ | 115508/122310 [3:44:47<10:20, 10.96

step: 28860, loss: 78.45701628875656, epoch: 1



 95%|███████████████████████████████▏ | 115594/122310 [3:44:56<12:40,  8.83it/s][A
 95%|███████████████████████████████▏ | 115600/122310 [3:44:57<13:59,  7.99it/s][A
 95%|███████████████████████████████▏ | 115601/122310 [3:44:58<18:49,  5.94it/s][A
 95%|███████████████████████████████▏ | 115607/122310 [3:44:59<18:41,  5.98it/s][A
 95%|███████████████████████████████▏ | 115613/122310 [3:45:00<18:21,  6.08it/s][A
 95%|███████████████████████████████▏ | 115621/122310 [3:45:01<16:34,  6.73it/s][A
 95%|███████████████████████████████▏ | 115625/122310 [3:45:02<18:38,  5.97it/s][A
 95%|███████████████████████████████▏ | 115630/122310 [3:45:03<19:22,  5.74it/s][A
 95%|███████████████████████████████▏ | 115636/122310 [3:45:04<18:51,  5.90it/s][A
 95%|███████████████████████████████▏ | 115646/122310 [3:45:05<15:16,  7.27it/s][A
 95%|███████████████████████████████▏ | 115653/122310 [3:45:06<15:11,  7.30it/s][A
 95%|███████████████████████████████▏ | 115660/122310 [3:45:07<15:12,  7.29

step: 28880, loss: 71.29310121340136, epoch: 1



 95%|███████████████████████████████▏ | 115723/122310 [3:45:15<12:00,  9.14it/s][A
 95%|███████████████████████████████▏ | 115726/122310 [3:45:16<14:35,  7.52it/s][A
 95%|███████████████████████████████▏ | 115737/122310 [3:45:17<12:10,  9.00it/s][A
 95%|███████████████████████████████▏ | 115740/122310 [3:45:18<14:51,  7.37it/s][A
 95%|███████████████████████████████▏ | 115752/122310 [3:45:18<11:47,  9.26it/s][A
 95%|███████████████████████████████▏ | 115761/122310 [3:45:19<11:25,  9.55it/s][A
 95%|███████████████████████████████▏ | 115772/122310 [3:45:20<10:23, 10.48it/s][A
 95%|███████████████████████████████▏ | 115778/122310 [3:45:21<11:33,  9.42it/s][A
 95%|███████████████████████████████▏ | 115786/122310 [3:45:22<11:39,  9.33it/s][A
 95%|███████████████████████████████▏ | 115791/122310 [3:45:23<13:08,  8.27it/s][A
 95%|███████████████████████████████▏ | 115800/122310 [3:45:24<12:12,  8.89it/s][A
 95%|███████████████████████████████▏ | 115808/122310 [3:45:25<12:06,  8.94

step: 28900, loss: 89.31565874961154, epoch: 1



 95%|███████████████████████████████▎ | 115884/122310 [3:45:32<09:57, 10.76it/s][A
 95%|███████████████████████████████▎ | 115895/122310 [3:45:33<09:30, 11.25it/s][A
 95%|███████████████████████████████▎ | 115897/122310 [3:45:34<12:23,  8.62it/s][A
 95%|███████████████████████████████▎ | 115901/122310 [3:45:35<14:23,  7.42it/s][A
 95%|███████████████████████████████▎ | 115913/122310 [3:45:36<11:27,  9.30it/s][A
 95%|███████████████████████████████▎ | 115918/122310 [3:45:37<12:52,  8.27it/s][A
 95%|███████████████████████████████▎ | 115931/122310 [3:45:38<10:19, 10.29it/s][A
 95%|███████████████████████████████▎ | 115936/122310 [3:45:39<11:58,  8.87it/s][A
 95%|███████████████████████████████▎ | 115940/122310 [3:45:39<14:05,  7.53it/s][A
 95%|███████████████████████████████▎ | 115946/122310 [3:45:40<14:25,  7.35it/s][A
 95%|███████████████████████████████▎ | 115957/122310 [3:45:41<11:56,  8.87it/s][A
 95%|███████████████████████████████▎ | 115962/122310 [3:45:43<17:21,  6.10

step: 28920, loss: 71.06588140889096, epoch: 1



 95%|███████████████████████████████▎ | 116032/122310 [3:45:50<09:09, 11.43it/s][A
 95%|███████████████████████████████▎ | 116042/122310 [3:45:51<09:09, 11.41it/s][A
 95%|███████████████████████████████▎ | 116053/122310 [3:45:52<08:51, 11.78it/s][A
 95%|███████████████████████████████▎ | 116065/122310 [3:45:53<08:24, 12.37it/s][A
 95%|███████████████████████████████▎ | 116071/122310 [3:45:54<09:41, 10.74it/s][A
 95%|███████████████████████████████▎ | 116079/122310 [3:45:54<10:08, 10.23it/s][A
 95%|███████████████████████████████▎ | 116088/122310 [3:45:55<10:05, 10.27it/s][A
 95%|███████████████████████████████▎ | 116101/122310 [3:45:56<08:52, 11.65it/s][A
 95%|███████████████████████████████▎ | 116104/122310 [3:45:57<11:19,  9.13it/s][A
 95%|███████████████████████████████▎ | 116108/122310 [3:45:58<13:16,  7.79it/s][A
 95%|███████████████████████████████▎ | 116119/122310 [3:45:59<11:15,  9.16it/s][A
 95%|███████████████████████████████▎ | 116125/122310 [3:46:00<12:09,  8.48

step: 28940, loss: 85.29436442387095, epoch: 1



 95%|███████████████████████████████▎ | 116207/122310 [3:46:08<11:04,  9.19it/s][A
 95%|███████████████████████████████▎ | 116226/122310 [3:46:08<07:54, 12.83it/s][A
 95%|███████████████████████████████▎ | 116233/122310 [3:46:09<08:57, 11.31it/s][A
 95%|███████████████████████████████▎ | 116235/122310 [3:46:10<11:41,  8.66it/s][A
 95%|███████████████████████████████▎ | 116241/122310 [3:46:11<12:25,  8.14it/s][A
 95%|███████████████████████████████▎ | 116244/122310 [3:46:12<15:10,  6.67it/s][A
 95%|███████████████████████████████▎ | 116250/122310 [3:46:13<15:00,  6.73it/s][A
 95%|███████████████████████████████▎ | 116259/122310 [3:46:14<13:00,  7.75it/s][A
 95%|███████████████████████████████▎ | 116267/122310 [3:46:15<12:21,  8.15it/s][A
 95%|███████████████████████████████▎ | 116273/122310 [3:46:15<12:55,  7.78it/s][A
 95%|███████████████████████████████▎ | 116287/122310 [3:46:16<09:47, 10.25it/s][A
 95%|███████████████████████████████▍ | 116297/122310 [3:46:17<09:25, 10.63

step: 28960, loss: 84.92581811579977, epoch: 1



 95%|███████████████████████████████▍ | 116331/122310 [3:46:28<37:03,  2.69it/s][A
 95%|███████████████████████████████▍ | 116338/122310 [3:46:29<30:17,  3.28it/s][A
 95%|███████████████████████████████▍ | 116344/122310 [3:46:29<26:19,  3.78it/s][A
 95%|███████████████████████████████▍ | 116355/122310 [3:46:30<18:58,  5.23it/s][A
 95%|███████████████████████████████▍ | 116369/122310 [3:46:31<13:35,  7.28it/s][A
 95%|███████████████████████████████▍ | 116388/122310 [3:46:32<09:30, 10.38it/s][A
 95%|███████████████████████████████▍ | 116390/122310 [3:46:33<11:35,  8.51it/s][A
 95%|███████████████████████████████▍ | 116400/122310 [3:46:34<10:38,  9.25it/s][A
 95%|███████████████████████████████▍ | 116406/122310 [3:46:35<11:25,  8.62it/s][A
 95%|███████████████████████████████▍ | 116412/122310 [3:46:36<12:02,  8.16it/s][A
 95%|███████████████████████████████▍ | 116420/122310 [3:46:36<11:37,  8.44it/s][A
 95%|███████████████████████████████▍ | 116425/122310 [3:46:37<12:46,  7.67

step: 28980, loss: 94.9085474814133, epoch: 1



 95%|███████████████████████████████▍ | 116490/122310 [3:46:43<08:55, 10.87it/s][A
 95%|███████████████████████████████▍ | 116500/122310 [3:46:43<08:48, 10.99it/s][A
 95%|███████████████████████████████▍ | 116509/122310 [3:46:44<08:59, 10.74it/s][A
 95%|███████████████████████████████▍ | 116522/122310 [3:46:45<08:01, 12.02it/s][A
 95%|███████████████████████████████▍ | 116533/122310 [3:46:46<07:52, 12.23it/s][A
 95%|███████████████████████████████▍ | 116547/122310 [3:46:47<07:11, 13.36it/s][A
 95%|███████████████████████████████▍ | 116555/122310 [3:46:48<07:54, 12.13it/s][A
 95%|███████████████████████████████▍ | 116561/122310 [3:46:49<09:03, 10.57it/s][A
 95%|███████████████████████████████▍ | 116569/122310 [3:46:49<09:25, 10.15it/s][A
 95%|███████████████████████████████▍ | 116581/122310 [3:46:50<08:29, 11.23it/s][A
 95%|███████████████████████████████▍ | 116587/122310 [3:46:51<09:35,  9.94it/s][A
 95%|███████████████████████████████▍ | 116602/122310 [3:46:52<07:50, 12.12

step: 29000, loss: 78.02437651184351, epoch: 1
saving weights



 95%|███████████████████████████████▍ | 116659/122310 [3:47:00<14:30,  6.49it/s][A
 95%|███████████████████████████████▍ | 116666/122310 [3:47:01<13:41,  6.87it/s][A
 95%|███████████████████████████████▍ | 116677/122310 [3:47:02<11:18,  8.31it/s][A
 95%|███████████████████████████████▍ | 116680/122310 [3:47:03<13:21,  7.03it/s][A
 95%|███████████████████████████████▍ | 116695/122310 [3:47:04<09:29,  9.85it/s][A
 95%|███████████████████████████████▍ | 116700/122310 [3:47:04<10:46,  8.68it/s][A
 95%|███████████████████████████████▍ | 116710/122310 [3:47:05<09:51,  9.47it/s][A
 95%|███████████████████████████████▍ | 116724/122310 [3:47:06<08:09, 11.41it/s][A
 95%|███████████████████████████████▍ | 116728/122310 [3:47:07<09:53,  9.40it/s][A
 95%|███████████████████████████████▍ | 116735/122310 [3:47:08<10:17,  9.02it/s][A
 95%|███████████████████████████████▍ | 116747/122310 [3:47:09<08:51, 10.46it/s][A
 95%|███████████████████████████████▌ | 116756/122310 [3:47:10<08:56, 10.34

step: 29020, loss: 90.76682529421898, epoch: 1



 96%|███████████████████████████████▌ | 116846/122310 [3:47:18<07:59, 11.40it/s][A
 96%|███████████████████████████████▌ | 116850/122310 [3:47:19<09:41,  9.39it/s][A
 96%|███████████████████████████████▌ | 116860/122310 [3:47:20<09:03, 10.03it/s][A
 96%|███████████████████████████████▌ | 116872/122310 [3:47:21<08:07, 11.15it/s][A
 96%|███████████████████████████████▌ | 116886/122310 [3:47:22<07:08, 12.66it/s][A
 96%|███████████████████████████████▌ | 116896/122310 [3:47:23<07:20, 12.29it/s][A
 96%|███████████████████████████████▌ | 116902/122310 [3:47:24<08:26, 10.68it/s][A
 96%|███████████████████████████████▌ | 116912/122310 [3:47:25<08:15, 10.88it/s][A
 96%|███████████████████████████████▌ | 116926/122310 [3:47:25<07:12, 12.43it/s][A
 96%|███████████████████████████████▌ | 116937/122310 [3:47:26<07:11, 12.46it/s][A
 96%|███████████████████████████████▌ | 116951/122310 [3:47:27<06:37, 13.49it/s][A
 96%|███████████████████████████████▌ | 116960/122310 [3:47:28<07:09, 12.45

step: 29040, loss: 89.74436957376842, epoch: 1



 96%|███████████████████████████████▌ | 117022/122310 [3:47:36<08:33, 10.30it/s][A
 96%|███████████████████████████████▌ | 117029/122310 [3:47:37<09:08,  9.64it/s][A
 96%|███████████████████████████████▌ | 117041/122310 [3:47:38<08:04, 10.87it/s][A
 96%|███████████████████████████████▌ | 117054/122310 [3:47:39<07:15, 12.07it/s][A
 96%|███████████████████████████████▌ | 117067/122310 [3:47:39<06:47, 12.86it/s][A
 96%|███████████████████████████████▌ | 117074/122310 [3:47:40<07:37, 11.43it/s][A
 96%|███████████████████████████████▌ | 117083/122310 [3:47:41<07:50, 11.11it/s][A
 96%|███████████████████████████████▌ | 117094/122310 [3:47:42<07:32, 11.53it/s][A
 96%|███████████████████████████████▌ | 117102/122310 [3:47:43<08:04, 10.75it/s][A
 96%|███████████████████████████████▌ | 117109/122310 [3:47:44<08:44,  9.92it/s][A
 96%|███████████████████████████████▌ | 117115/122310 [3:47:45<09:40,  8.95it/s][A
 96%|███████████████████████████████▌ | 117117/122310 [3:47:46<12:18,  7.03

step: 29060, loss: 78.26298254937785, epoch: 1



 96%|███████████████████████████████▋ | 117215/122310 [3:47:53<08:24, 10.09it/s][A
 96%|███████████████████████████████▋ | 117220/122310 [3:47:54<09:41,  8.76it/s][A

step: 29080, loss: 166.97852914926025, epoch: 1



 96%|███████████████████████████████▋ | 117234/122310 [3:48:11<49:25,  1.71it/s][A
 96%|███████████████████████████████▋ | 117240/122310 [3:48:12<41:30,  2.04it/s][A
 96%|███████████████████████████████▋ | 117251/122310 [3:48:13<29:06,  2.90it/s][A
 96%|███████████████████████████████▋ | 117252/122310 [3:48:14<31:06,  2.71it/s][A
 96%|███████████████████████████████▋ | 117258/122310 [3:48:14<25:57,  3.24it/s][A
 96%|███████████████████████████████▋ | 117263/122310 [3:48:15<23:11,  3.63it/s][A
 96%|███████████████████████████████▋ | 117271/122310 [3:48:16<18:06,  4.64it/s][A
 96%|███████████████████████████████▋ | 117281/122310 [3:48:17<13:50,  6.05it/s][A
 96%|███████████████████████████████▋ | 117285/122310 [3:48:18<14:40,  5.71it/s][A
 96%|███████████████████████████████▋ | 117300/122310 [3:48:19<09:49,  8.50it/s][A
 96%|███████████████████████████████▋ | 117302/122310 [3:48:20<12:03,  6.92it/s][A
 96%|███████████████████████████████▋ | 117313/122310 [3:48:21<09:53,  8.42

step: 29100, loss: 73.58527980179387, epoch: 1



 96%|███████████████████████████████▋ | 117374/122310 [3:48:28<10:56,  7.52it/s][A
 96%|███████████████████████████████▋ | 117381/122310 [3:48:29<10:44,  7.65it/s][A
 96%|███████████████████████████████▋ | 117392/122310 [3:48:30<08:58,  9.14it/s][A
 96%|███████████████████████████████▋ | 117395/122310 [3:48:31<10:58,  7.46it/s][A
 96%|███████████████████████████████▋ | 117405/122310 [3:48:32<09:25,  8.67it/s][A
 96%|███████████████████████████████▋ | 117412/122310 [3:48:33<09:38,  8.47it/s][A
 96%|███████████████████████████████▋ | 117430/122310 [3:48:34<06:42, 12.13it/s][A
 96%|███████████████████████████████▋ | 117443/122310 [3:48:35<06:14, 12.99it/s][A
 96%|███████████████████████████████▋ | 117456/122310 [3:48:35<05:57, 13.56it/s][A
 96%|███████████████████████████████▋ | 117459/122310 [3:48:36<07:41, 10.51it/s][A
 96%|███████████████████████████████▋ | 117477/122310 [3:48:37<05:56, 13.54it/s][A
 96%|███████████████████████████████▋ | 117483/122310 [3:48:38<06:57, 11.57

step: 29120, loss: 80.28978887195542, epoch: 1



 96%|███████████████████████████████▋ | 117552/122310 [3:48:46<10:16,  7.71it/s][A
 96%|███████████████████████████████▋ | 117559/122310 [3:48:47<10:08,  7.80it/s][A
 96%|███████████████████████████████▋ | 117567/122310 [3:48:48<09:37,  8.22it/s][A
 96%|███████████████████████████████▋ | 117581/122310 [3:48:49<07:29, 10.53it/s][A
 96%|███████████████████████████████▋ | 117593/122310 [3:48:49<06:50, 11.50it/s][A
 96%|███████████████████████████████▋ | 117598/122310 [3:48:50<08:02,  9.77it/s][A
 96%|███████████████████████████████▋ | 117606/122310 [3:48:51<08:12,  9.55it/s][A
 96%|███████████████████████████████▋ | 117610/122310 [3:48:52<09:37,  8.13it/s][A
 96%|███████████████████████████████▋ | 117621/122310 [3:48:53<08:15,  9.47it/s][A
 96%|███████████████████████████████▋ | 117629/122310 [3:48:54<08:19,  9.36it/s][A
 96%|███████████████████████████████▋ | 117639/122310 [3:48:55<07:49,  9.96it/s][A
 96%|███████████████████████████████▋ | 117653/122310 [3:48:56<06:35, 11.77

step: 29140, loss: 95.36438620202698, epoch: 1



 96%|███████████████████████████████▊ | 117712/122310 [3:49:03<10:28,  7.32it/s][A
 96%|███████████████████████████████▊ | 117716/122310 [3:49:04<11:44,  6.52it/s][A
 96%|███████████████████████████████▊ | 117723/122310 [3:49:05<10:56,  6.99it/s][A
 96%|███████████████████████████████▊ | 117733/122310 [3:49:06<09:08,  8.34it/s][A
 96%|███████████████████████████████▊ | 117742/122310 [3:49:07<08:31,  8.93it/s][A
 96%|███████████████████████████████▊ | 117753/122310 [3:49:08<07:34, 10.03it/s][A
 96%|███████████████████████████████▊ | 117762/122310 [3:49:09<07:29, 10.13it/s][A
 96%|███████████████████████████████▊ | 117775/122310 [3:49:10<06:33, 11.53it/s][A
 96%|███████████████████████████████▊ | 117790/122310 [3:49:10<05:42, 13.21it/s][A
 96%|███████████████████████████████▊ | 117801/122310 [3:49:11<05:47, 12.99it/s][A
 96%|███████████████████████████████▊ | 117810/122310 [3:49:12<06:10, 12.15it/s][A
 96%|███████████████████████████████▊ | 117816/122310 [3:49:17<17:41,  4.24

step: 29160, loss: 98.49688586249296, epoch: 1



 96%|███████████████████████████████▊ | 117850/122310 [3:49:21<10:07,  7.34it/s][A
 96%|███████████████████████████████▊ | 117853/122310 [3:49:22<11:35,  6.41it/s][A
 96%|███████████████████████████████▊ | 117856/122310 [3:49:23<13:10,  5.63it/s][A
 96%|███████████████████████████████▊ | 117864/122310 [3:49:24<11:14,  6.59it/s][A
 96%|███████████████████████████████▊ | 117869/122310 [3:49:24<11:40,  6.34it/s][A
 96%|███████████████████████████████▊ | 117876/122310 [3:49:25<10:51,  6.81it/s][A
 96%|███████████████████████████████▊ | 117884/122310 [3:49:26<09:51,  7.49it/s][A
 96%|███████████████████████████████▊ | 117897/122310 [3:49:27<07:38,  9.62it/s][A
 96%|███████████████████████████████▊ | 117905/122310 [3:49:28<07:51,  9.34it/s][A
 96%|███████████████████████████████▊ | 117911/122310 [3:49:29<08:48,  8.32it/s][A
 96%|███████████████████████████████▊ | 117918/122310 [3:49:30<09:05,  8.06it/s][A
 96%|███████████████████████████████▊ | 117925/122310 [3:49:31<09:18,  7.86

step: 29180, loss: 69.37202497159193, epoch: 1



 96%|███████████████████████████████▊ | 117982/122310 [3:49:39<09:23,  7.68it/s][A
 96%|███████████████████████████████▊ | 117992/122310 [3:49:40<08:11,  8.78it/s][A
 96%|███████████████████████████████▊ | 118001/122310 [3:49:41<07:47,  9.22it/s][A
 96%|███████████████████████████████▊ | 118007/122310 [3:49:42<08:25,  8.51it/s][A
 96%|███████████████████████████████▊ | 118013/122310 [3:49:42<08:57,  8.00it/s][A
 96%|███████████████████████████████▊ | 118022/122310 [3:49:43<08:17,  8.61it/s][A
 97%|███████████████████████████████▊ | 118032/122310 [3:49:44<07:34,  9.41it/s][A
 97%|███████████████████████████████▊ | 118038/122310 [3:49:45<08:14,  8.64it/s][A
 97%|███████████████████████████████▊ | 118045/122310 [3:49:46<08:24,  8.46it/s][A
 97%|███████████████████████████████▊ | 118063/122310 [3:49:47<05:52, 12.05it/s][A
 97%|███████████████████████████████▊ | 118067/122310 [3:49:48<07:10,  9.85it/s][A
 97%|███████████████████████████████▊ | 118072/122310 [3:49:49<08:15,  8.55

step: 29200, loss: 86.78471951341973, epoch: 1
sim1 and sim2 are 0.5429418150445082, 0.2332433216988948
cosine of pred and queen: 0.20819318728528935
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: middle
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma

Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: daughter
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: emblem
Actual: uncle:aunt::grandson:granddaughter, pred: emblem
Actual: fortunate:fortunately::efficient:efficiently, pred: emblem
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: business
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: bihar
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 97%|█████████████████████████████▉ | 118139/122310 [3:51:33<4:23:48,  3.79s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 97%|█████████████████████████████▉ | 118156/122310 [3:51:34<2:18:11,  2.00s/it][A
 97%|█████████████████████████████▉ | 118161/122310 [3:51:34<1:56:52,  1.69s/it][A
 97%|█████████████████████████████▉ | 118180/122310 [3:51:35<1:02:21,  1.10it/s][A
 97%|███████████████████████████████▉ | 118188/122310 [3:51:36<50:03,  1.37it/s][A
 97%|███████████████████████████████▉ | 118198/122310 [3:51:37<37:23,  1.83it/s][A
 97%|███████████████████████████████▉ | 118214/122310 [3:51:38<24:03,  2.84it/s][A
 97%|███████████████████████████████▉ | 118219/122310 [3:51:39<22:12,  3.07it/s][A
 97%|███████████████████████████████▉ | 118238/122310 [3:51:40<13:35,  4.99it/s][A
 97%|███████████████████████████████▉ | 118243/122310 [3:51:41<13:19,  5.08it/s][A
 97%|███████████████████████████████▉ | 118247/122310 [3:51:41<13:35,  4.98it/s][A
 97%|███████████████████████████████▉ | 118249/122310 [3:51:43<17:53,  3.78it/s][A
 97%|███████████████████████████████▉ | 118258/122310 [3:51:44<13:40,  4.94

step: 29220, loss: 99.41941427940797, epoch: 1



 97%|███████████████████████████████▉ | 118306/122310 [3:51:51<10:14,  6.52it/s][A
 97%|███████████████████████████████▉ | 118323/122310 [3:51:52<06:29, 10.24it/s][A
 97%|███████████████████████████████▉ | 118328/122310 [3:51:53<07:26,  8.92it/s][A
 97%|███████████████████████████████▉ | 118336/122310 [3:51:54<07:21,  9.00it/s][A
 97%|███████████████████████████████▉ | 118345/122310 [3:51:55<07:05,  9.32it/s][A
 97%|███████████████████████████████▉ | 118353/122310 [3:51:56<07:08,  9.24it/s][A
 97%|███████████████████████████████▉ | 118363/122310 [3:51:56<06:39,  9.89it/s][A
 97%|███████████████████████████████▉ | 118373/122310 [3:51:57<06:21, 10.33it/s][A
 97%|███████████████████████████████▉ | 118387/122310 [3:51:58<05:29, 11.92it/s][A
 97%|███████████████████████████████▉ | 118390/122310 [3:51:59<06:56,  9.42it/s][A
 97%|███████████████████████████████▉ | 118402/122310 [3:52:00<06:05, 10.70it/s][A
 97%|███████████████████████████████▉ | 118413/122310 [3:52:01<05:45, 11.28

step: 29240, loss: 73.50387641117096, epoch: 1



 97%|███████████████████████████████▉ | 118499/122310 [3:52:09<06:20, 10.01it/s][A
 97%|███████████████████████████████▉ | 118509/122310 [3:52:10<06:24,  9.88it/s][A
 97%|███████████████████████████████▉ | 118522/122310 [3:52:11<05:34, 11.31it/s][A
 97%|███████████████████████████████▉ | 118535/122310 [3:52:11<05:06, 12.31it/s][A
 97%|███████████████████████████████▉ | 118537/122310 [3:52:12<06:44,  9.34it/s][A
 97%|███████████████████████████████▉ | 118551/122310 [3:52:13<05:35, 11.20it/s][A
 97%|███████████████████████████████▉ | 118559/122310 [3:52:14<05:56, 10.52it/s][A
 97%|███████████████████████████████▉ | 118568/122310 [3:52:15<05:57, 10.46it/s][A
 97%|███████████████████████████████▉ | 118576/122310 [3:52:16<06:10, 10.08it/s][A
 97%|███████████████████████████████▉ | 118584/122310 [3:52:17<06:20,  9.78it/s][A
 97%|███████████████████████████████▉ | 118585/122310 [3:52:18<08:36,  7.21it/s][A
 97%|███████████████████████████████▉ | 118595/122310 [3:52:19<07:18,  8.48

step: 29260, loss: 97.46890612693183, epoch: 1



 97%|████████████████████████████████ | 118644/122310 [3:52:26<11:29,  5.32it/s][A
 97%|████████████████████████████████ | 118649/122310 [3:52:27<11:15,  5.42it/s][A
 97%|████████████████████████████████ | 118656/122310 [3:52:28<10:01,  6.08it/s][A
 97%|████████████████████████████████ | 118665/122310 [3:52:29<08:41,  6.99it/s][A
 97%|████████████████████████████████ | 118666/122310 [3:52:30<12:02,  5.05it/s][A
 97%|████████████████████████████████ | 118668/122310 [3:52:31<14:17,  4.25it/s][A
 97%|████████████████████████████████ | 118676/122310 [3:52:32<10:54,  5.55it/s][A
 97%|████████████████████████████████ | 118690/122310 [3:52:35<11:14,  5.37it/s][A
 97%|████████████████████████████████ | 118695/122310 [3:52:36<11:04,  5.44it/s][A
 97%|████████████████████████████████ | 118705/122310 [3:52:37<08:51,  6.79it/s][A
 97%|████████████████████████████████ | 118713/122310 [3:52:37<08:08,  7.37it/s][A
 97%|████████████████████████████████ | 118720/122310 [3:52:38<07:55,  7.54

step: 29280, loss: 80.71166993675725, epoch: 1



 97%|████████████████████████████████ | 118777/122310 [3:52:45<06:03,  9.72it/s][A
 97%|████████████████████████████████ | 118785/122310 [3:52:46<06:18,  9.32it/s][A
 97%|████████████████████████████████ | 118795/122310 [3:52:47<06:12,  9.44it/s][A
 97%|████████████████████████████████ | 118807/122310 [3:52:48<05:35, 10.45it/s][A
 97%|████████████████████████████████ | 118819/122310 [3:52:49<05:20, 10.91it/s][A
 97%|████████████████████████████████ | 118827/122310 [3:52:50<05:38, 10.30it/s][A
 97%|████████████████████████████████ | 118836/122310 [3:52:51<05:43, 10.11it/s][A
 97%|████████████████████████████████ | 118849/122310 [3:52:52<05:07, 11.26it/s][A
 97%|████████████████████████████████ | 118860/122310 [3:52:52<05:00, 11.48it/s][A
 97%|████████████████████████████████ | 118864/122310 [3:52:53<06:13,  9.24it/s][A
 97%|████████████████████████████████ | 118876/122310 [3:52:54<05:32, 10.34it/s][A
 97%|████████████████████████████████ | 118891/122310 [3:52:55<04:44, 12.00

step: 29300, loss: 77.36971463046557, epoch: 1



 97%|████████████████████████████████ | 118962/122310 [3:53:04<08:16,  6.74it/s][A
 97%|████████████████████████████████ | 118968/122310 [3:53:05<08:52,  6.27it/s][A
 97%|████████████████████████████████ | 118982/122310 [3:53:06<06:29,  8.55it/s][A
 97%|████████████████████████████████ | 118988/122310 [3:53:07<07:13,  7.67it/s][A
 97%|████████████████████████████████ | 118998/122310 [3:53:08<06:37,  8.34it/s][A
 97%|████████████████████████████████ | 119002/122310 [3:53:09<07:48,  7.05it/s][A
 97%|████████████████████████████████ | 119012/122310 [3:53:10<06:59,  7.86it/s][A
 97%|████████████████████████████████ | 119017/122310 [3:53:12<10:06,  5.43it/s][A
 97%|████████████████████████████████ | 119022/122310 [3:53:13<10:12,  5.37it/s][A
 97%|████████████████████████████████ | 119025/122310 [3:53:14<11:20,  4.83it/s][A
 97%|████████████████████████████████ | 119035/122310 [3:53:15<08:42,  6.27it/s][A
 97%|████████████████████████████████ | 119038/122310 [3:53:16<10:02,  5.43

step: 29320, loss: 74.06208884600728, epoch: 1



 97%|████████████████████████████████▏| 119106/122310 [3:53:24<08:12,  6.50it/s][A
 97%|████████████████████████████████▏| 119123/122310 [3:53:25<05:27,  9.74it/s][A
 97%|████████████████████████████████▏| 119133/122310 [3:53:26<05:29,  9.64it/s][A
 97%|████████████████████████████████▏| 119143/122310 [3:53:27<05:27,  9.66it/s][A
 97%|████████████████████████████████▏| 119156/122310 [3:53:28<04:59, 10.53it/s][A
 97%|████████████████████████████████▏| 119163/122310 [3:53:29<05:35,  9.39it/s][A
 97%|████████████████████████████████▏| 119177/122310 [3:53:30<04:50, 10.78it/s][A
 97%|████████████████████████████████▏| 119186/122310 [3:53:31<05:04, 10.27it/s][A
 97%|████████████████████████████████▏| 119196/122310 [3:53:32<05:05, 10.19it/s][A
 97%|████████████████████████████████▏| 119203/122310 [3:53:33<05:33,  9.31it/s][A
 97%|████████████████████████████████▏| 119220/122310 [3:53:34<04:22, 11.78it/s][A
 97%|████████████████████████████████▏| 119227/122310 [3:53:35<04:54, 10.47

step: 29340, loss: 104.28262268736638, epoch: 1



 98%|████████████████████████████████▏| 119314/122310 [3:53:44<05:31,  9.04it/s][A
 98%|████████████████████████████████▏| 119322/122310 [3:53:45<05:37,  8.84it/s][A
 98%|████████████████████████████████▏| 119338/122310 [3:53:46<04:30, 10.99it/s][A
 98%|████████████████████████████████▏| 119346/122310 [3:53:47<04:55, 10.03it/s][A
 98%|████████████████████████████████▏| 119355/122310 [3:53:48<05:02,  9.75it/s][A
 98%|████████████████████████████████▏| 119378/122310 [3:53:49<03:33, 13.74it/s][A
 98%|████████████████████████████████▏| 119390/122310 [3:53:50<03:38, 13.34it/s][A
 98%|████████████████████████████████▏| 119394/122310 [3:53:51<04:33, 10.65it/s][A
 98%|████████████████████████████████▏| 119400/122310 [3:53:52<05:20,  9.08it/s][A
 98%|████████████████████████████████▏| 119408/122310 [3:53:53<05:56,  8.13it/s][A
 98%|████████████████████████████████▏| 119410/122310 [3:53:54<07:37,  6.34it/s][A
 98%|████████████████████████████████▏| 119417/122310 [3:53:55<07:26,  6.48

step: 29360, loss: 74.17402875567095, epoch: 1



 98%|████████████████████████████████▏| 119492/122310 [3:54:04<05:47,  8.10it/s][A
 98%|████████████████████████████████▏| 119503/122310 [3:54:05<04:57,  9.44it/s][A
 98%|████████████████████████████████▏| 119514/122310 [3:54:06<05:47,  8.04it/s][A
 98%|████████████████████████████████▏| 119523/122310 [3:54:08<06:43,  6.91it/s][A
 98%|████████████████████████████████▏| 119528/122310 [3:54:09<06:57,  6.67it/s][A
 98%|████████████████████████████████▎| 119538/122310 [3:54:10<05:57,  7.76it/s][A
 98%|████████████████████████████████▎| 119544/122310 [3:54:11<06:07,  7.53it/s][A
 98%|████████████████████████████████▎| 119547/122310 [3:54:12<07:08,  6.44it/s][A
 98%|████████████████████████████████▎| 119552/122310 [3:54:13<07:23,  6.22it/s][A
 98%|████████████████████████████████▎| 119562/122310 [3:54:14<05:58,  7.66it/s][A
 98%|████████████████████████████████▎| 119571/122310 [3:54:14<05:26,  8.40it/s][A
 98%|████████████████████████████████▎| 119582/122310 [3:54:15<04:43,  9.62

step: 29380, loss: 94.00925288543385, epoch: 1



 98%|████████████████████████████████▎| 119629/122310 [3:54:21<06:09,  7.26it/s][A
 98%|████████████████████████████████▎| 119638/122310 [3:54:22<05:26,  8.18it/s][A
 98%|████████████████████████████████▎| 119649/122310 [3:54:23<04:40,  9.49it/s][A
 98%|████████████████████████████████▎| 119655/122310 [3:54:24<05:04,  8.73it/s][A
 98%|████████████████████████████████▎| 119665/122310 [3:54:25<04:36,  9.55it/s][A
 98%|████████████████████████████████▎| 119673/122310 [3:54:26<04:39,  9.44it/s][A
 98%|████████████████████████████████▎| 119679/122310 [3:54:27<05:03,  8.68it/s][A
 98%|████████████████████████████████▎| 119686/122310 [3:54:28<05:10,  8.45it/s][A
 98%|████████████████████████████████▎| 119702/122310 [3:54:28<03:48, 11.39it/s][A
 98%|████████████████████████████████▎| 119709/122310 [3:54:29<04:12, 10.30it/s][A
 98%|████████████████████████████████▎| 119716/122310 [3:54:30<04:28,  9.65it/s][A
 98%|████████████████████████████████▎| 119722/122310 [3:54:31<04:53,  8.83

step: 29400, loss: 74.11441287519933, epoch: 1



 98%|████████████████████████████████▎| 119788/122310 [3:54:39<04:43,  8.90it/s][A
 98%|████████████████████████████████▎| 119804/122310 [3:54:40<03:33, 11.76it/s][A
 98%|████████████████████████████████▎| 119808/122310 [3:54:41<04:20,  9.61it/s][A
 98%|████████████████████████████████▎| 119823/122310 [3:54:41<03:29, 11.87it/s][A
 98%|████████████████████████████████▎| 119835/122310 [3:54:42<03:19, 12.42it/s][A
 98%|████████████████████████████████▎| 119841/122310 [3:54:43<03:50, 10.70it/s][A
 98%|████████████████████████████████▎| 119845/122310 [3:54:44<04:38,  8.87it/s][A
 98%|████████████████████████████████▎| 119852/122310 [3:54:45<04:45,  8.62it/s][A
 98%|████████████████████████████████▎| 119862/122310 [3:54:46<04:17,  9.50it/s][A
 98%|████████████████████████████████▎| 119873/122310 [3:54:47<03:53, 10.44it/s][A
 98%|████████████████████████████████▎| 119885/122310 [3:54:48<03:31, 11.44it/s][A
 98%|████████████████████████████████▎| 119893/122310 [3:54:48<03:44, 10.78

step: 29420, loss: 71.59471154790216, epoch: 1



 98%|████████████████████████████████▎| 119963/122310 [3:54:57<05:46,  6.77it/s][A
 98%|████████████████████████████████▎| 119975/122310 [3:54:59<04:52,  7.98it/s][A
 98%|████████████████████████████████▎| 119979/122310 [3:55:01<07:33,  5.14it/s][A
 98%|████████████████████████████████▎| 119989/122310 [3:55:02<06:08,  6.29it/s][A
 98%|████████████████████████████████▎| 119993/122310 [3:55:03<06:48,  5.67it/s][A
 98%|████████████████████████████████▍| 120004/122310 [3:55:04<05:26,  7.07it/s][A
 98%|████████████████████████████████▍| 120008/122310 [3:55:05<06:14,  6.15it/s][A
 98%|████████████████████████████████▍| 120019/122310 [3:55:06<05:03,  7.56it/s][A
 98%|████████████████████████████████▍| 120029/122310 [3:55:07<04:29,  8.47it/s][A
 98%|████████████████████████████████▍| 120038/122310 [3:55:08<04:18,  8.80it/s][A
 98%|████████████████████████████████▍| 120048/122310 [3:55:09<04:05,  9.23it/s][A
 98%|████████████████████████████████▍| 120061/122310 [3:55:10<03:35, 10.45

step: 29440, loss: 78.53769153820453, epoch: 1



 98%|████████████████████████████████▍| 120127/122310 [3:55:17<05:14,  6.94it/s][A
 98%|████████████████████████████████▍| 120140/122310 [3:55:18<04:03,  8.92it/s][A
 98%|████████████████████████████████▍| 120151/122310 [3:55:19<03:42,  9.72it/s][A
 98%|████████████████████████████████▍| 120154/122310 [3:55:20<04:40,  7.68it/s][A
 98%|████████████████████████████████▍| 120170/122310 [3:55:21<03:26, 10.35it/s][A
 98%|████████████████████████████████▍| 120173/122310 [3:55:22<04:24,  8.08it/s][A
 98%|████████████████████████████████▍| 120175/122310 [3:55:23<05:37,  6.32it/s][A
 98%|████████████████████████████████▍| 120180/122310 [3:55:24<05:55,  5.99it/s][A
 98%|████████████████████████████████▍| 120186/122310 [3:55:25<05:52,  6.03it/s][A
 98%|████████████████████████████████▍| 120193/122310 [3:55:26<05:31,  6.39it/s][A
 98%|████████████████████████████████▍| 120199/122310 [3:55:27<05:41,  6.19it/s][A
 98%|████████████████████████████████▍| 120207/122310 [3:55:28<05:16,  6.63

step: 29460, loss: 100.86486518223828, epoch: 1



 98%|████████████████████████████████▍| 120277/122310 [3:55:37<05:21,  6.32it/s][A
 98%|████████████████████████████████▍| 120288/122310 [3:55:38<04:10,  8.08it/s][A
 98%|████████████████████████████████▍| 120292/122310 [3:55:39<04:47,  7.01it/s][A
 98%|████████████████████████████████▍| 120302/122310 [3:55:40<04:12,  7.96it/s][A
 98%|████████████████████████████████▍| 120309/122310 [3:55:41<04:18,  7.75it/s][A
 98%|████████████████████████████████▍| 120316/122310 [3:55:42<04:28,  7.42it/s][A
 98%|████████████████████████████████▍| 120321/122310 [3:55:43<05:01,  6.61it/s][A
 98%|████████████████████████████████▍| 120337/122310 [3:55:44<03:34,  9.21it/s][A
 98%|████████████████████████████████▍| 120347/122310 [3:55:45<03:34,  9.16it/s][A
 98%|████████████████████████████████▍| 120352/122310 [3:55:46<04:11,  7.77it/s][A
 98%|████████████████████████████████▍| 120361/122310 [3:55:47<04:09,  7.80it/s][A
 98%|████████████████████████████████▍| 120367/122310 [3:55:48<04:34,  7.07

step: 29480, loss: 78.78806659833987, epoch: 1



 98%|████████████████████████████████▌| 120458/122310 [3:55:56<03:05, 10.00it/s][A
 98%|████████████████████████████████▌| 120465/122310 [3:55:57<03:24,  9.02it/s][A
 98%|████████████████████████████████▌| 120473/122310 [3:55:59<03:40,  8.33it/s][A
 99%|████████████████████████████████▌| 120484/122310 [3:56:00<03:32,  8.60it/s][A
 99%|████████████████████████████████▌| 120491/122310 [3:56:01<04:03,  7.46it/s][A
 99%|████████████████████████████████▌| 120503/122310 [3:56:02<03:40,  8.18it/s][A
 99%|████████████████████████████████▌| 120512/122310 [3:56:03<03:40,  8.14it/s][A
 99%|████████████████████████████████▌| 120521/122310 [3:56:05<03:43,  8.01it/s][A
 99%|████████████████████████████████▌| 120530/122310 [3:56:06<03:49,  7.76it/s][A
 99%|████████████████████████████████▌| 120542/122310 [3:56:07<03:26,  8.55it/s][A
 99%|████████████████████████████████▌| 120553/122310 [3:56:08<03:15,  8.97it/s][A
 99%|████████████████████████████████▌| 120556/122310 [3:56:09<04:16,  6.83

step: 29500, loss: 71.21968089004498, epoch: 1



 99%|████████████████████████████████▌| 120640/122310 [3:56:18<02:33, 10.87it/s][A
 99%|████████████████████████████████▌| 120649/122310 [3:56:19<02:35, 10.65it/s][A
 99%|████████████████████████████████▌| 120656/122310 [3:56:20<02:47,  9.87it/s][A
 99%|████████████████████████████████▌| 120664/122310 [3:56:21<02:51,  9.60it/s][A
 99%|████████████████████████████████▌| 120682/122310 [3:56:22<02:10, 12.48it/s][A
 99%|████████████████████████████████▌| 120699/122310 [3:56:22<01:53, 14.21it/s][A
 99%|████████████████████████████████▌| 120709/122310 [3:56:23<02:00, 13.31it/s][A
 99%|████████████████████████████████▌| 120716/122310 [3:56:24<02:17, 11.63it/s][A
 99%|████████████████████████████████▌| 120735/122310 [3:56:25<01:50, 14.25it/s][A
 99%|████████████████████████████████▌| 120745/122310 [3:56:26<01:59, 13.14it/s][A
 99%|████████████████████████████████▌| 120755/122310 [3:56:27<02:05, 12.36it/s][A
 99%|████████████████████████████████▌| 120768/122310 [3:56:28<01:59, 12.89

step: 29520, loss: 87.4952526229154, epoch: 1



 99%|████████████████████████████████▌| 120839/122310 [3:56:36<02:37,  9.33it/s][A
 99%|████████████████████████████████▌| 120852/122310 [3:56:37<02:15, 10.76it/s][A
 99%|████████████████████████████████▌| 120862/122310 [3:56:38<02:13, 10.87it/s][A
 99%|████████████████████████████████▌| 120868/122310 [3:56:39<02:30,  9.55it/s][A
 99%|████████████████████████████████▌| 120875/122310 [3:56:40<02:39,  8.99it/s][A
 99%|████████████████████████████████▌| 120880/122310 [3:56:41<02:59,  7.96it/s][A
 99%|████████████████████████████████▌| 120894/122310 [3:56:42<02:19, 10.16it/s][A
 99%|████████████████████████████████▌| 120902/122310 [3:56:43<02:24,  9.73it/s][A
 99%|████████████████████████████████▌| 120917/122310 [3:56:44<01:58, 11.71it/s][A
 99%|████████████████████████████████▋| 120923/122310 [3:56:45<02:14, 10.30it/s][A
 99%|████████████████████████████████▋| 120927/122310 [3:56:45<02:41,  8.54it/s][A
 99%|████████████████████████████████▋| 120929/122310 [3:56:46<03:24,  6.74

step: 29540, loss: 81.99408521970909, epoch: 1



 99%|████████████████████████████████▋| 121042/122310 [3:56:54<01:30, 14.07it/s][A
 99%|████████████████████████████████▋| 121050/122310 [3:56:55<01:40, 12.58it/s][A
 99%|████████████████████████████████▋| 121053/122310 [3:56:56<02:07,  9.85it/s][A
 99%|████████████████████████████████▋| 121056/122310 [3:56:57<02:38,  7.90it/s][A
 99%|████████████████████████████████▋| 121061/122310 [3:56:58<02:53,  7.21it/s][A
 99%|████████████████████████████████▋| 121065/122310 [3:56:59<03:14,  6.41it/s][A
 99%|████████████████████████████████▋| 121072/122310 [3:56:59<03:00,  6.84it/s][A
 99%|████████████████████████████████▋| 121082/122310 [3:57:00<02:30,  8.18it/s][A
 99%|████████████████████████████████▋| 121092/122310 [3:57:01<02:13,  9.13it/s][A
 99%|████████████████████████████████▋| 121101/122310 [3:57:02<02:07,  9.45it/s][A
 99%|████████████████████████████████▋| 121114/122310 [3:57:03<01:48, 11.06it/s][A
 99%|████████████████████████████████▋| 121118/122310 [3:57:04<02:10,  9.14

step: 29560, loss: 83.3092464088936, epoch: 1



 99%|████████████████████████████████▋| 121165/122310 [3:57:12<02:51,  6.66it/s][A
 99%|████████████████████████████████▋| 121172/122310 [3:57:13<02:41,  7.04it/s][A
 99%|████████████████████████████████▋| 121181/122310 [3:57:14<02:21,  7.96it/s][A
 99%|████████████████████████████████▋| 121189/122310 [3:57:14<02:15,  8.30it/s][A
 99%|████████████████████████████████▋| 121194/122310 [3:57:15<02:27,  7.54it/s][A
 99%|████████████████████████████████▋| 121201/122310 [3:57:16<02:25,  7.64it/s][A
 99%|████████████████████████████████▋| 121215/122310 [3:57:18<02:20,  7.79it/s][A
 99%|████████████████████████████████▋| 121224/122310 [3:57:19<02:09,  8.39it/s][A
 99%|████████████████████████████████▋| 121231/122310 [3:57:20<02:09,  8.31it/s][A
 99%|████████████████████████████████▋| 121237/122310 [3:57:21<02:15,  7.90it/s][A
 99%|████████████████████████████████▋| 121245/122310 [3:57:21<02:09,  8.25it/s][A
 99%|████████████████████████████████▋| 121253/122310 [3:57:22<02:04,  8.52

step: 29580, loss: 88.23978070630983, epoch: 1



 99%|████████████████████████████████▋| 121325/122310 [3:57:30<02:43,  6.02it/s][A
 99%|████████████████████████████████▋| 121339/122310 [3:57:31<02:02,  7.91it/s][A
 99%|████████████████████████████████▋| 121353/122310 [3:57:32<01:44,  9.16it/s][A
 99%|████████████████████████████████▋| 121360/122310 [3:57:34<01:55,  8.24it/s][A
 99%|████████████████████████████████▋| 121370/122310 [3:57:35<01:55,  8.14it/s][A
 99%|████████████████████████████████▋| 121381/122310 [3:57:36<01:53,  8.21it/s][A
 99%|████████████████████████████████▊| 121388/122310 [3:57:37<01:59,  7.73it/s][A
 99%|████████████████████████████████▊| 121399/122310 [3:57:38<01:44,  8.72it/s][A
 99%|████████████████████████████████▊| 121404/122310 [3:57:39<01:55,  7.86it/s][A
 99%|████████████████████████████████▊| 121419/122310 [3:57:40<01:28, 10.08it/s][A
 99%|████████████████████████████████▊| 121424/122310 [3:57:41<01:45,  8.39it/s][A
 99%|████████████████████████████████▊| 121434/122310 [3:57:42<01:37,  8.98

step: 29600, loss: 72.76828901085861, epoch: 1
sim1 and sim2 are 0.5295280349384783, 0.24484085226843474
cosine of pred and queen: 0.16691737759503764
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: benefits
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tickling
Act


 99%|████████████████████████████████▊| 121492/122310 [3:58:00<01:10, 11.66it/s][A

Actual: telengana:hyderabad::odisha:bhubaneswar, pred: maharashtra
Actual: gujrat:gandhinagar::bihar:patna, pred: patna
Actual: chhattisgarh:raipur::assam:dispur, pred: mohamed
Actual: goa:panaji::rajasthan:jaipur, pred: tempura
Actual: jharkhand:ranchi::punjab:chandigarh, pred: bihar
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: returne
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: cricket
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: construction
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:th


 99%|████████████████████████████████▊| 121501/122310 [3:59:34<41:46,  3.10s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 99%|████████████████████████████████▊| 121513/122310 [3:59:35<27:55,  2.10s/it][A
 99%|████████████████████████████████▊| 121525/122310 [3:59:36<19:01,  1.45s/it][A
 99%|████████████████████████████████▊| 121528/122310 [3:59:36<17:23,  1.33s/it][A
 99%|████████████████████████████████▊| 121538/122310 [3:59:37<11:54,  1.08it/s][A
 99%|████████████████████████████████▊| 121548/122310 [3:59:38<08:21,  1.52it/s][A
 99%|████████████████████████████████▊| 121558/122310 [3:59:39<06:00,  2.08it/s][A
 99%|████████████████████████████████▊| 121563/122310 [3:59:40<05:16,  2.36it/s][A
 99%|████████████████████████████████▊| 121577/122310 [3:59:41<03:19,  3.68it/s][A
 99%|████████████████████████████████▊| 121593/122310 [3:59:42<02:11,  5.44it/s][A
 99%|████████████████████████████████▊| 121599/122310 [3:59:43<02:06,  5.63it/s][A
 99%|████████████████████████████████▊| 121606/122310 [3:59:44<01:59,  5.89it/s][A
 99%|████████████████████████████████▊| 121609/122310 [3:59:45<02:11,  5.32

step: 29620, loss: 81.19500884451435, epoch: 1



 99%|████████████████████████████████▊| 121673/122310 [3:59:54<01:20,  7.88it/s][A
 99%|████████████████████████████████▊| 121675/122310 [3:59:55<01:41,  6.24it/s][A
 99%|████████████████████████████████▊| 121687/122310 [3:59:56<01:15,  8.20it/s][A
100%|████████████████████████████████▊| 121699/122310 [3:59:57<01:05,  9.34it/s][A
100%|████████████████████████████████▊| 121705/122310 [3:59:58<01:13,  8.19it/s][A
100%|████████████████████████████████▊| 121712/122310 [3:59:59<01:20,  7.44it/s][A
100%|████████████████████████████████▊| 121723/122310 [4:00:00<01:13,  8.03it/s][A
100%|████████████████████████████████▊| 121737/122310 [4:00:02<01:01,  9.30it/s][A
100%|████████████████████████████████▊| 121749/122310 [4:00:03<00:57,  9.74it/s][A
100%|████████████████████████████████▊| 121754/122310 [4:00:04<01:11,  7.79it/s][A
100%|████████████████████████████████▊| 121759/122310 [4:00:05<01:19,  6.94it/s][A
100%|████████████████████████████████▊| 121769/122310 [4:00:06<01:10,  7.67

step: 29640, loss: 87.26518663753639, epoch: 1



100%|████████████████████████████████▊| 121836/122310 [4:00:15<00:56,  8.33it/s][A
100%|████████████████████████████████▊| 121844/122310 [4:00:16<00:54,  8.49it/s][A
100%|████████████████████████████████▉| 121854/122310 [4:00:17<00:49,  9.29it/s][A
100%|████████████████████████████████▉| 121863/122310 [4:00:18<00:46,  9.55it/s][A
100%|████████████████████████████████▉| 121884/122310 [4:00:19<00:32, 13.30it/s][A
100%|████████████████████████████████▉| 121891/122310 [4:00:20<00:36, 11.44it/s][A
100%|████████████████████████████████▉| 121893/122310 [4:00:21<00:50,  8.32it/s][A
100%|████████████████████████████████▉| 121898/122310 [4:00:22<00:57,  7.14it/s][A
100%|████████████████████████████████▉| 121903/122310 [4:00:24<01:18,  5.16it/s][A
100%|████████████████████████████████▉| 121915/122310 [4:00:25<00:55,  7.06it/s][A
100%|████████████████████████████████▉| 121927/122310 [4:00:26<00:46,  8.24it/s][A
100%|████████████████████████████████▉| 121934/122310 [4:00:27<00:48,  7.73

step: 29660, loss: 70.24761756617117, epoch: 1



100%|████████████████████████████████▉| 121999/122310 [4:00:35<00:39,  7.80it/s][A
100%|████████████████████████████████▉| 122006/122310 [4:00:36<00:39,  7.69it/s][A
100%|████████████████████████████████▉| 122007/122310 [4:00:37<00:51,  5.85it/s][A
100%|████████████████████████████████▉| 122012/122310 [4:00:38<00:52,  5.73it/s][A
100%|████████████████████████████████▉| 122019/122310 [4:00:39<00:46,  6.26it/s][A
100%|████████████████████████████████▉| 122033/122310 [4:00:40<00:32,  8.45it/s][A
100%|████████████████████████████████▉| 122036/122310 [4:00:41<00:40,  6.80it/s][A
100%|████████████████████████████████▉| 122041/122310 [4:00:42<00:41,  6.45it/s][A
100%|████████████████████████████████▉| 122044/122310 [4:00:43<00:48,  5.51it/s][A
100%|████████████████████████████████▉| 122054/122310 [4:00:43<00:36,  7.09it/s][A
100%|████████████████████████████████▉| 122068/122310 [4:00:44<00:26,  9.27it/s][A
100%|████████████████████████████████▉| 122077/122310 [4:00:46<00:25,  8.98

step: 29680, loss: 128.5327468376051, epoch: 1



100%|████████████████████████████████▉| 122147/122310 [4:00:55<00:24,  6.78it/s][A
100%|████████████████████████████████▉| 122152/122310 [4:00:56<00:25,  6.12it/s][A
100%|████████████████████████████████▉| 122156/122310 [4:00:57<00:30,  5.13it/s][A
100%|████████████████████████████████▉| 122167/122310 [4:00:58<00:23,  6.16it/s][A
100%|████████████████████████████████▉| 122174/122310 [4:00:59<00:22,  5.97it/s][A
100%|████████████████████████████████▉| 122191/122310 [4:01:01<00:14,  8.08it/s][A
100%|████████████████████████████████▉| 122205/122310 [4:01:02<00:11,  8.79it/s][A
100%|████████████████████████████████▉| 122216/122310 [4:01:03<00:10,  8.65it/s][A
100%|████████████████████████████████▉| 122220/122310 [4:01:05<00:12,  6.99it/s][A
100%|████████████████████████████████▉| 122232/122310 [4:01:06<00:10,  7.59it/s][A
100%|████████████████████████████████▉| 122237/122310 [4:01:08<00:11,  6.37it/s][A
100%|████████████████████████████████▉| 122242/122310 [4:01:09<00:11,  5.76

step: 29700, loss: 84.93993109133967, epoch: 1



  0%|                                                | 0/122310 [00:00<?, ?it/s][A
  0%|                                     | 2/122310 [00:00<14:56:48,  2.27it/s][A
  0%|                                     | 5/122310 [00:01<11:37:16,  2.92it/s][A
  0%|                                     | 12/122310 [00:02<6:29:56,  5.23it/s][A
  0%|                                     | 22/122310 [00:03<4:26:58,  7.63it/s][A
  0%|                                     | 32/122310 [00:04<3:53:15,  8.74it/s][A
  0%|                                     | 42/122310 [00:05<3:33:44,  9.53it/s][A
  0%|                                     | 47/122310 [00:06<4:08:37,  8.20it/s][A
  0%|                                     | 52/122310 [00:07<4:40:55,  7.25it/s][A
  0%|                                     | 57/122310 [00:08<4:59:05,  6.81it/s][A
  0%|                                     | 76/122310 [00:08<3:01:04, 11.25it/s][A
  0%|                                     | 78/122310 [00:09<3:56:04,  8.63

step: 29720, loss: 68.62835431071908, epoch: 2



  0%|                                    | 175/122310 [00:18<2:40:30, 12.68it/s][A
  0%|                                    | 182/122310 [00:19<2:53:26, 11.74it/s][A
  0%|                                    | 191/122310 [00:20<2:53:02, 11.76it/s][A
  0%|                                    | 199/122310 [00:21<3:01:05, 11.24it/s][A
  0%|                                    | 201/122310 [00:21<3:56:48,  8.59it/s][A
  0%|                                    | 212/122310 [00:22<3:20:29, 10.15it/s][A
  0%|                                    | 225/122310 [00:23<2:50:13, 11.95it/s][A
  0%|                                    | 234/122310 [00:24<2:51:02, 11.90it/s][A
  0%|                                    | 247/122310 [00:24<2:32:15, 13.36it/s][A
  0%|                                    | 251/122310 [00:25<3:05:26, 10.97it/s][A
  0%|                                    | 262/122310 [00:26<2:52:53, 11.77it/s][A
  0%|                                    | 275/122310 [00:27<2:33:28, 13.25

step: 29740, loss: 77.96453443413947, epoch: 2



  0%|                                    | 354/122310 [00:34<2:41:15, 12.61it/s][A
  0%|                                    | 361/122310 [00:35<2:56:14, 11.53it/s][A
  0%|                                    | 369/122310 [00:35<3:03:04, 11.10it/s][A
  0%|                                    | 382/122310 [00:36<2:38:48, 12.80it/s][A
  0%|                                    | 391/122310 [00:37<2:43:43, 12.41it/s][A
  0%|                                    | 396/122310 [00:39<4:19:34,  7.83it/s][A
  0%|                                    | 401/122310 [00:40<4:44:55,  7.13it/s][A
  0%|                                    | 406/122310 [00:41<5:07:19,  6.61it/s][A
  0%|                                    | 414/122310 [00:42<4:44:07,  7.15it/s][A
  0%|                                    | 423/122310 [00:43<4:21:32,  7.77it/s][A
  0%|▏                                   | 429/122310 [00:43<4:36:20,  7.35it/s][A
  0%|▏                                   | 440/122310 [00:44<3:54:06,  8.68

step: 29760, loss: 70.34835923289116, epoch: 2



  0%|▏                                   | 523/122310 [00:52<3:15:22, 10.39it/s][A
  0%|▏                                   | 527/122310 [00:53<3:57:53,  8.53it/s][A
  0%|▏                                   | 533/122310 [00:54<4:13:10,  8.02it/s][A
  0%|▏                                   | 554/122310 [00:55<2:44:34, 12.33it/s][A
  0%|▏                                   | 564/122310 [00:56<2:52:30, 11.76it/s][A
  0%|▏                                   | 584/122310 [00:57<2:18:36, 14.64it/s][A
  0%|▏                                   | 594/122310 [00:58<2:30:00, 13.52it/s][A
  0%|▏                                   | 598/122310 [00:59<3:04:13, 11.01it/s][A
  0%|▏                                   | 606/122310 [01:00<3:15:32, 10.37it/s][A
  1%|▏                                   | 613/122310 [01:00<3:33:15,  9.51it/s][A
  1%|▏                                   | 623/122310 [01:01<3:23:47,  9.95it/s][A
  1%|▏                                   | 641/122310 [01:02<2:36:13, 12.98

step: 29780, loss: 74.8194256965407, epoch: 2



  1%|▏                                   | 712/122310 [01:10<2:42:55, 12.44it/s][A
  1%|▏                                   | 719/122310 [01:11<3:01:32, 11.16it/s][A
  1%|▏                                   | 728/122310 [01:12<3:06:06, 10.89it/s][A
  1%|▏                                   | 734/122310 [01:13<3:27:04,  9.79it/s][A
  1%|▏                                   | 751/122310 [01:13<2:38:19, 12.80it/s][A
  1%|▏                                   | 759/122310 [01:14<2:51:54, 11.78it/s][A
  1%|▏                                   | 763/122310 [01:15<3:29:24,  9.67it/s][A
  1%|▏                                   | 777/122310 [01:16<2:52:45, 11.72it/s][A
  1%|▏                                   | 791/122310 [01:17<2:34:42, 13.09it/s][A
  1%|▏                                   | 811/122310 [01:18<2:04:47, 16.23it/s][A
  1%|▏                                   | 819/122310 [01:19<2:24:55, 13.97it/s][A
  1%|▏                                   | 821/122310 [01:19<3:11:34, 10.57

step: 29800, loss: 70.52392367981462, epoch: 2



  1%|▎                                   | 889/122310 [01:27<4:56:31,  6.82it/s][A
  1%|▎                                   | 899/122310 [01:28<4:06:39,  8.20it/s][A
  1%|▎                                   | 908/122310 [01:29<3:47:11,  8.91it/s][A
  1%|▎                                   | 914/122310 [01:30<4:05:06,  8.25it/s][A
  1%|▎                                   | 919/122310 [01:31<4:28:28,  7.54it/s][A
  1%|▎                                   | 923/122310 [01:32<5:02:11,  6.69it/s][A
  1%|▎                                   | 939/122310 [01:33<3:16:58, 10.27it/s][A
  1%|▎                                   | 951/122310 [01:33<2:56:59, 11.43it/s][A
  1%|▎                                   | 962/122310 [01:34<2:50:25, 11.87it/s][A
  1%|▎                                   | 975/122310 [01:35<2:36:27, 12.93it/s][A
  1%|▎                                   | 987/122310 [01:36<2:32:18, 13.28it/s][A
  1%|▎                                   | 995/122310 [01:37<2:46:46, 12.12

step: 29820, loss: 79.2375986194301, epoch: 2



  1%|▎                                  | 1047/122310 [01:45<3:38:08,  9.27it/s][A
  1%|▎                                  | 1052/122310 [01:45<4:04:06,  8.28it/s][A
  1%|▎                                  | 1059/122310 [01:46<4:04:06,  8.28it/s][A
  1%|▎                                  | 1070/122310 [01:47<3:30:00,  9.62it/s][A
  1%|▎                                  | 1074/122310 [01:48<4:07:36,  8.16it/s][A
  1%|▎                                  | 1094/122310 [01:49<2:38:16, 12.76it/s][A
  1%|▎                                  | 1099/122310 [01:50<3:07:43, 10.76it/s][A
  1%|▎                                  | 1106/122310 [01:51<3:21:56, 10.00it/s][A
  1%|▎                                  | 1112/122310 [01:51<3:41:24,  9.12it/s][A
  1%|▎                                  | 1117/122310 [01:52<4:09:13,  8.10it/s][A
  1%|▎                                  | 1120/122310 [01:53<5:11:08,  6.49it/s][A
  1%|▎                                  | 1126/122310 [01:55<6:49:45,  4.93

step: 29840, loss: 70.30810159703731, epoch: 2



  1%|▎                                  | 1199/122310 [02:02<4:05:22,  8.23it/s][A
  1%|▎                                  | 1211/122310 [02:03<3:29:48,  9.62it/s][A
  1%|▎                                  | 1218/122310 [02:04<3:51:06,  8.73it/s][A
  1%|▎                                  | 1226/122310 [02:05<3:53:30,  8.64it/s][A
  1%|▎                                  | 1236/122310 [02:06<3:39:42,  9.18it/s][A
  1%|▎                                  | 1244/122310 [02:07<3:49:21,  8.80it/s][A
  1%|▎                                  | 1248/122310 [02:08<4:31:41,  7.43it/s][A
  1%|▎                                  | 1257/122310 [02:09<4:05:47,  8.21it/s][A
  1%|▎                                  | 1268/122310 [02:10<3:36:03,  9.34it/s][A
  1%|▎                                  | 1280/122310 [02:11<3:11:28, 10.54it/s][A
  1%|▎                                  | 1288/122310 [02:12<3:20:28, 10.06it/s][A
  1%|▎                                  | 1300/122310 [02:13<3:00:45, 11.16

step: 29860, loss: 82.30542455029634, epoch: 2



  1%|▍                                  | 1364/122310 [02:20<4:19:12,  7.78it/s][A
  1%|▍                                  | 1369/122310 [02:21<4:39:31,  7.21it/s][A
  1%|▍                                  | 1373/122310 [02:22<5:15:12,  6.39it/s][A
  1%|▍                                  | 1383/122310 [02:23<4:13:12,  7.96it/s][A
  1%|▍                                  | 1391/122310 [02:24<4:01:16,  8.35it/s][A
  1%|▍                                  | 1407/122310 [02:25<2:56:31, 11.42it/s][A
  1%|▍                                  | 1417/122310 [02:26<2:54:50, 11.52it/s][A
  1%|▍                                  | 1427/122310 [02:26<2:53:51, 11.59it/s][A
  1%|▍                                  | 1436/122310 [02:27<2:58:11, 11.31it/s][A
  1%|▍                                  | 1442/122310 [02:28<3:20:58, 10.02it/s][A
  1%|▍                                  | 1447/122310 [02:29<3:49:59,  8.76it/s][A
  1%|▍                                  | 1454/122310 [02:30<3:53:59,  8.61

step: 29880, loss: 69.02662754746405, epoch: 2



  1%|▍                                  | 1519/122310 [02:37<3:48:16,  8.82it/s][A
  1%|▍                                  | 1533/122310 [02:38<3:01:48, 11.07it/s][A
  1%|▍                                  | 1545/122310 [02:39<2:47:31, 12.01it/s][A
  1%|▍                                  | 1550/122310 [02:40<3:17:11, 10.21it/s][A
  1%|▍                                  | 1559/122310 [02:41<3:14:52, 10.33it/s][A
  1%|▍                                  | 1572/122310 [02:42<2:50:33, 11.80it/s][A
  1%|▍                                  | 1588/122310 [02:43<2:24:43, 13.90it/s][A
  1%|▍                                  | 1599/122310 [02:43<2:28:58, 13.50it/s][A
  1%|▍                                  | 1601/122310 [02:45<4:14:38,  7.90it/s][A
  1%|▍                                  | 1619/122310 [02:46<2:59:48, 11.19it/s][A
  1%|▍                                  | 1630/122310 [02:47<2:52:35, 11.65it/s][A
  1%|▍                                  | 1634/122310 [02:48<3:26:45,  9.73

step: 29900, loss: 94.56125387465377, epoch: 2



  1%|▍                                  | 1688/122310 [02:54<3:49:20,  8.77it/s][A
  1%|▍                                  | 1699/122310 [02:55<3:23:24,  9.88it/s][A
  1%|▍                                  | 1711/122310 [02:56<3:00:36, 11.13it/s][A
  1%|▍                                  | 1720/122310 [02:57<3:04:21, 10.90it/s][A
  1%|▍                                  | 1733/122310 [02:58<2:45:38, 12.13it/s][A
  1%|▍                                  | 1738/122310 [02:59<3:14:47, 10.32it/s][A
  1%|▌                                  | 1749/122310 [03:00<3:01:58, 11.04it/s][A
  1%|▌                                  | 1751/122310 [03:01<3:59:16,  8.40it/s][A
  1%|▌                                  | 1761/122310 [03:01<3:34:58,  9.35it/s][A
  1%|▌                                  | 1764/122310 [03:02<4:26:14,  7.55it/s][A
  1%|▌                                  | 1772/122310 [03:03<4:06:53,  8.14it/s][A
  1%|▌                                  | 1782/122310 [03:04<3:39:09,  9.17

step: 29920, loss: 64.27271012708988, epoch: 2



  2%|▌                                  | 1864/122310 [03:12<2:32:07, 13.20it/s][A
  2%|▌                                  | 1870/122310 [03:13<2:59:16, 11.20it/s][A
  2%|▌                                  | 1880/122310 [03:13<2:57:09, 11.33it/s][A
  2%|▌                                  | 1888/122310 [03:14<3:06:17, 10.77it/s][A
  2%|▌                                  | 1892/122310 [03:15<3:45:44,  8.89it/s][A
  2%|▌                                  | 1903/122310 [03:16<3:23:35,  9.86it/s][A
  2%|▌                                  | 1910/122310 [03:17<3:36:03,  9.29it/s][A
  2%|▌                                  | 1922/122310 [03:18<3:09:36, 10.58it/s][A
  2%|▌                                  | 1926/122310 [03:19<3:46:48,  8.85it/s][A
  2%|▌                                  | 1933/122310 [03:20<3:53:11,  8.60it/s][A
  2%|▌                                  | 1942/122310 [03:20<3:37:56,  9.20it/s][A
  2%|▌                                  | 1953/122310 [03:21<3:14:19, 10.32

step: 29940, loss: 90.04203353187619, epoch: 2



  2%|▌                                  | 2017/122310 [03:29<3:55:39,  8.51it/s][A
  2%|▌                                  | 2019/122310 [03:30<5:01:09,  6.66it/s][A
  2%|▌                                  | 2023/122310 [03:31<5:27:42,  6.12it/s][A
  2%|▌                                  | 2027/122310 [03:32<5:53:31,  5.67it/s][A
  2%|▌                                  | 2028/122310 [03:32<7:45:08,  4.31it/s][A
  2%|▌                                  | 2035/122310 [03:33<6:04:40,  5.50it/s][A
  2%|▌                                  | 2051/122310 [03:34<3:32:21,  9.44it/s][A
  2%|▌                                  | 2056/122310 [03:35<3:58:16,  8.41it/s][A
  2%|▌                                  | 2069/122310 [03:36<3:14:20, 10.31it/s][A
  2%|▌                                  | 2076/122310 [03:37<3:27:10,  9.67it/s][A
  2%|▌                                  | 2081/122310 [03:38<3:56:54,  8.46it/s][A
  2%|▌                                  | 2090/122310 [03:38<3:41:49,  9.03

step: 29960, loss: 81.00810313390787, epoch: 2



  2%|▌                                  | 2184/122310 [03:46<3:13:09, 10.37it/s][A
  2%|▋                                  | 2199/122310 [03:47<2:39:53, 12.52it/s][A
  2%|▋                                  | 2204/122310 [03:48<3:11:31, 10.45it/s][A
  2%|▋                                  | 2211/122310 [03:49<3:24:25,  9.79it/s][A
  2%|▋                                  | 2219/122310 [03:50<3:27:01,  9.67it/s][A
  2%|▋                                  | 2225/122310 [03:51<3:46:06,  8.85it/s][A
  2%|▋                                  | 2231/122310 [03:51<3:58:17,  8.40it/s][A
  2%|▋                                  | 2242/122310 [03:52<3:22:19,  9.89it/s][A
  2%|▋                                  | 2249/122310 [03:53<3:31:33,  9.46it/s][A
  2%|▋                                  | 2253/122310 [03:54<4:09:36,  8.02it/s][A
  2%|▋                                  | 2267/122310 [03:55<3:08:53, 10.59it/s][A
  2%|▋                                  | 2273/122310 [03:56<3:29:38,  9.54

step: 29980, loss: 72.87105289313644, epoch: 2



  2%|▋                                  | 2347/122310 [04:03<3:25:29,  9.73it/s][A
  2%|▋                                  | 2350/122310 [04:04<4:14:03,  7.87it/s][A
  2%|▋                                  | 2360/122310 [04:05<3:41:31,  9.02it/s][A
  2%|▋                                  | 2378/122310 [04:06<2:37:25, 12.70it/s][A
  2%|▋                                  | 2384/122310 [04:07<3:01:49, 10.99it/s][A
  2%|▋                                  | 2395/122310 [04:07<2:51:40, 11.64it/s][A
  2%|▋                                  | 2402/122310 [04:08<3:08:11, 10.62it/s][A
  2%|▋                                  | 2410/122310 [04:09<3:13:35, 10.32it/s][A
  2%|▋                                  | 2416/122310 [04:10<3:33:40,  9.35it/s][A
  2%|▋                                  | 2428/122310 [04:11<3:05:54, 10.75it/s][A
  2%|▋                                  | 2440/122310 [04:12<2:50:00, 11.75it/s][A
  2%|▋                                  | 2449/122310 [04:13<2:54:59, 11.42

step: 30000, loss: 79.15669415577977, epoch: 2
sim1 and sim2 are 0.4738462012904692, 0.26146399433346895
cosine of pred and queen: 0.17360338354567312
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: europe
Actual: mah

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: htay
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: mumbai
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: karnataka
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar
Actual: india:rupee


  2%|▋                                 | 2509/122310 [05:42<91:33:18,  2.75s/it][A
  2%|▋                                 | 2518/122310 [05:43<65:44:25,  1.98s/it][A
  2%|▋                                 | 2530/122310 [05:44<43:03:13,  1.29s/it][A
  2%|▋                                 | 2541/122310 [05:45<30:06:57,  1.10it/s][A
  2%|▋                                 | 2548/122310 [05:46<24:13:29,  1.37it/s][A
  2%|▋                                 | 2556/122310 [05:46<18:35:50,  1.79it/s][A
  2%|▋                                 | 2564/122310 [05:47<14:26:43,  2.30it/s][A
  2%|▋                                  | 2577/122310 [05:48<9:39:46,  3.44it/s][A
  2%|▋                                  | 2589/122310 [05:49<7:16:00,  4.58it/s][A
  2%|▋                                  | 2600/122310 [05:50<5:53:24,  5.65it/s][A
  2%|▋                                  | 2610/122310 [05:51<5:03:29,  6.57it/s][A
  2%|▋                                  | 2619/122310 [05:52<4:35:24,  7.24

step: 30020, loss: 81.68701933075401, epoch: 2



  2%|▊                                  | 2688/122310 [06:00<4:36:05,  7.22it/s][A
  2%|▊                                  | 2695/122310 [06:01<4:17:34,  7.74it/s][A
  2%|▊                                  | 2702/122310 [06:02<4:07:22,  8.06it/s][A
  2%|▊                                  | 2710/122310 [06:03<3:57:58,  8.38it/s][A
  2%|▊                                  | 2714/122310 [06:04<4:40:34,  7.10it/s][A
  2%|▊                                  | 2724/122310 [06:05<3:59:34,  8.32it/s][A
  2%|▊                                  | 2733/122310 [06:05<3:43:38,  8.91it/s][A
  2%|▊                                  | 2741/122310 [06:06<3:43:34,  8.91it/s][A
  2%|▊                                  | 2754/122310 [06:07<3:05:57, 10.72it/s][A
  2%|▊                                  | 2771/122310 [06:08<2:30:34, 13.23it/s][A
  2%|▊                                  | 2776/122310 [06:09<3:01:14, 10.99it/s][A
  2%|▊                                  | 2783/122310 [06:10<3:18:08, 10.05

step: 30040, loss: 78.41805993937014, epoch: 2



  2%|▊                                  | 2865/122310 [06:18<2:47:30, 11.88it/s][A
  2%|▊                                  | 2876/122310 [06:19<2:47:20, 11.90it/s][A
  2%|▊                                  | 2889/122310 [06:20<2:36:51, 12.69it/s][A
  2%|▊                                  | 2894/122310 [06:21<3:11:14, 10.41it/s][A
  2%|▊                                  | 2900/122310 [06:22<3:36:39,  9.19it/s][A
  2%|▊                                  | 2901/122310 [06:23<4:56:26,  6.71it/s][A
  2%|▊                                  | 2909/122310 [06:24<4:39:43,  7.11it/s][A
  2%|▊                                  | 2916/122310 [06:25<4:33:23,  7.28it/s][A
  2%|▊                                  | 2923/122310 [06:25<4:31:36,  7.33it/s][A
  2%|▊                                  | 2932/122310 [06:26<4:07:43,  8.03it/s][A
  2%|▊                                  | 2942/122310 [06:27<3:44:09,  8.88it/s][A
  2%|▊                                  | 2943/122310 [06:28<5:04:55,  6.52

step: 30060, loss: 62.5290034701452, epoch: 2



  2%|▊                                  | 3012/122310 [06:36<3:14:12, 10.24it/s][A
  2%|▊                                  | 3018/122310 [06:37<3:34:50,  9.25it/s][A
  2%|▊                                  | 3029/122310 [06:38<3:16:16, 10.13it/s][A
  2%|▊                                  | 3034/122310 [06:39<3:43:22,  8.90it/s][A
  2%|▊                                  | 3046/122310 [06:40<3:12:26, 10.33it/s][A
  3%|▉                                  | 3060/122310 [06:40<2:45:44, 11.99it/s][A
  3%|▉                                  | 3067/122310 [06:41<3:05:18, 10.72it/s][A
  3%|▉                                  | 3075/122310 [06:42<3:12:35, 10.32it/s][A
  3%|▉                                  | 3086/122310 [06:43<3:01:16, 10.96it/s][A
  3%|▉                                  | 3095/122310 [06:44<3:04:17, 10.78it/s][A
  3%|▉                                  | 3102/122310 [06:45<3:19:26,  9.96it/s][A
  3%|▉                                  | 3107/122310 [06:46<3:48:26,  8.70

step: 30080, loss: 75.27892771427632, epoch: 2



  3%|▉                                  | 3195/122310 [06:53<2:42:31, 12.22it/s][A
  3%|▉                                  | 3208/122310 [06:54<2:29:43, 13.26it/s][A
  3%|▉                                  | 3215/122310 [06:55<2:48:51, 11.75it/s][A
  3%|▉                                  | 3222/122310 [06:56<3:08:44, 10.52it/s][A
  3%|▉                                  | 3239/122310 [06:57<2:30:36, 13.18it/s][A
  3%|▉                                  | 3251/122310 [06:58<2:29:00, 13.32it/s][A
  3%|▉                                  | 3262/122310 [06:59<2:32:48, 12.98it/s][A
  3%|▉                                  | 3267/122310 [06:59<3:03:32, 10.81it/s][A
  3%|▉                                  | 3270/122310 [07:00<3:49:46,  8.63it/s][A
  3%|▉                                  | 3292/122310 [07:01<2:25:46, 13.61it/s][A
  3%|▉                                  | 3297/122310 [07:02<2:53:47, 11.41it/s][A
  3%|▉                                  | 3304/122310 [07:03<3:08:16, 10.53

step: 30100, loss: 76.44504336843357, epoch: 2



  3%|▉                                  | 3391/122310 [07:10<3:39:26,  9.03it/s][A
  3%|▉                                  | 3398/122310 [07:11<3:43:08,  8.88it/s][A
  3%|▉                                 | 3400/122310 [07:19<17:28:20,  1.89it/s][A
  3%|▉                                 | 3404/122310 [07:20<15:01:28,  2.20it/s][A
  3%|▉                                  | 3413/122310 [07:21<9:55:35,  3.33it/s][A
  3%|▉                                  | 3424/122310 [07:22<6:45:31,  4.89it/s][A
  3%|▉                                  | 3427/122310 [07:23<7:05:14,  4.66it/s][A
  3%|▉                                  | 3430/122310 [07:24<7:27:48,  4.42it/s][A
  3%|▉                                  | 3431/122310 [07:24<8:55:56,  3.70it/s][A
  3%|▉                                  | 3435/122310 [07:25<8:21:50,  3.95it/s][A
  3%|▉                                  | 3452/122310 [07:26<4:02:57,  8.15it/s][A

step: 30120, loss: 70.30311925076789, epoch: 2



  3%|▉                                  | 3457/122310 [07:27<4:21:15,  7.58it/s][A
  3%|▉                                  | 3463/122310 [07:28<4:25:07,  7.47it/s][A
  3%|▉                                  | 3476/122310 [07:29<3:23:56,  9.71it/s][A
  3%|▉                                  | 3484/122310 [07:29<3:24:38,  9.68it/s][A
  3%|▉                                  | 3493/122310 [07:30<3:19:02,  9.95it/s][A
  3%|█                                  | 3500/122310 [07:31<3:29:23,  9.46it/s][A
  3%|█                                  | 3509/122310 [07:32<3:20:46,  9.86it/s][A
  3%|█                                  | 3520/122310 [07:34<3:56:06,  8.39it/s][A
  3%|█                                  | 3535/122310 [07:34<3:04:19, 10.74it/s][A
  3%|█                                  | 3545/122310 [07:35<2:58:48, 11.07it/s][A
  3%|█                                  | 3559/122310 [07:36<2:36:51, 12.62it/s][A
  3%|█                                  | 3565/122310 [07:37<2:57:46, 11.13

step: 30140, loss: 72.53428143961101, epoch: 2



  3%|█                                  | 3632/122310 [07:44<2:39:19, 12.41it/s][A
  3%|█                                  | 3643/122310 [07:45<2:37:07, 12.59it/s][A
  3%|█                                  | 3652/122310 [07:45<2:43:40, 12.08it/s][A
  3%|█                                  | 3660/122310 [07:46<2:53:06, 11.42it/s][A
  3%|█                                  | 3665/122310 [07:47<3:21:28,  9.81it/s][A
  3%|█                                  | 3678/122310 [07:48<2:50:34, 11.59it/s][A
  3%|█                                  | 3691/122310 [07:49<2:36:05, 12.67it/s][A
  3%|█                                  | 3697/122310 [07:50<3:02:35, 10.83it/s][A
  3%|█                                  | 3712/122310 [07:50<2:34:39, 12.78it/s][A
  3%|█                                  | 3717/122310 [07:51<3:04:52, 10.69it/s][A
  3%|█                                  | 3719/122310 [07:52<4:02:22,  8.15it/s][A
  3%|█                                  | 3729/122310 [07:53<3:36:23,  9.13

step: 30160, loss: 44.92480919463951, epoch: 2



  3%|█                                 | 3747/122310 [08:16<27:09:32,  1.21it/s][A
  3%|█                                 | 3757/122310 [08:17<19:06:31,  1.72it/s][A

step: 30180, loss: 92.28637544576311, epoch: 2



  3%|█                                 | 3771/122310 [08:18<13:04:53,  2.52it/s][A
  3%|█                                 | 3778/122310 [08:19<11:04:01,  2.98it/s][A
  3%|█                                  | 3785/122310 [08:20<9:21:36,  3.52it/s][A
  3%|█                                 | 3790/122310 [08:23<10:46:24,  3.06it/s][A
  3%|█                                 | 3792/122310 [08:23<11:02:50,  2.98it/s][A
  3%|█                                  | 3798/122310 [08:24<9:05:22,  3.62it/s][A
  3%|█                                  | 3809/122310 [08:25<6:10:22,  5.33it/s][A
  3%|█                                  | 3816/122310 [08:26<5:32:26,  5.94it/s][A
  3%|█                                  | 3829/122310 [08:27<4:03:16,  8.12it/s][A
  3%|█                                  | 3844/122310 [08:28<3:07:42, 10.52it/s][A
  3%|█                                  | 3847/122310 [08:28<3:46:57,  8.70it/s][A
  3%|█                                  | 3857/122310 [08:29<3:27:01,  9.54

step: 30200, loss: 73.09133662439916, epoch: 2



  3%|█                                  | 3897/122310 [08:34<3:39:18,  9.00it/s][A
  3%|█                                  | 3902/122310 [08:35<4:03:48,  8.09it/s][A
  3%|█                                  | 3913/122310 [08:36<3:23:36,  9.69it/s][A
  3%|█                                  | 3923/122310 [08:37<3:08:21, 10.48it/s][A
  3%|█▏                                 | 3938/122310 [08:38<2:33:53, 12.82it/s][A
  3%|█▏                                 | 3953/122310 [08:38<2:16:22, 14.47it/s][A
  3%|█▏                                 | 3958/122310 [08:39<2:45:08, 11.94it/s][A
  3%|█▏                                 | 3970/122310 [08:40<2:35:24, 12.69it/s][A
  3%|█▏                                 | 3983/122310 [08:41<2:26:47, 13.43it/s][A
  3%|█▏                                 | 3992/122310 [08:42<2:35:13, 12.70it/s][A
  3%|█▏                                 | 3998/122310 [08:43<2:57:24, 11.11it/s][A
  3%|█▏                                 | 4005/122310 [08:43<3:11:16, 10.31

step: 30220, loss: 67.4016434140928, epoch: 2



  3%|█▏                                 | 4063/122310 [08:51<5:22:40,  6.11it/s][A
  3%|█▏                                 | 4064/122310 [08:52<7:01:03,  4.68it/s][A
  3%|█▏                                 | 4074/122310 [08:52<4:45:05,  6.91it/s][A
  3%|█▏                                 | 4084/122310 [08:53<3:51:57,  8.49it/s][A
  3%|█▏                                 | 4094/122310 [08:54<3:24:59,  9.61it/s][A
  3%|█▏                                 | 4100/122310 [08:55<3:41:05,  8.91it/s][A
  3%|█▏                                 | 4109/122310 [08:56<3:28:41,  9.44it/s][A
  3%|█▏                                 | 4114/122310 [08:57<3:53:18,  8.44it/s][A
  3%|█▏                                 | 4124/122310 [08:57<3:30:21,  9.36it/s][A
  3%|█▏                                 | 4128/122310 [08:58<4:05:40,  8.02it/s][A
  3%|█▏                                 | 4133/122310 [08:59<4:25:36,  7.42it/s][A
  3%|█▏                                 | 4145/122310 [09:00<3:27:24,  9.50

step: 30240, loss: 71.03088657040011, epoch: 2



  3%|█▏                                 | 4214/122310 [09:07<3:32:57,  9.24it/s][A
  3%|█▏                                 | 4222/122310 [09:08<3:32:14,  9.27it/s][A
  3%|█▏                                 | 4229/122310 [09:09<3:38:36,  9.00it/s][A
  3%|█▏                                 | 4240/122310 [09:10<3:12:35, 10.22it/s][A
  3%|█▏                                 | 4249/122310 [09:11<3:09:48, 10.37it/s][A
  3%|█▏                                 | 4258/122310 [09:12<3:07:32, 10.49it/s][A
  3%|█▏                                 | 4264/122310 [09:13<3:27:37,  9.48it/s][A
  4%|█▏                                 | 4281/122310 [09:13<2:36:17, 12.59it/s][A
  4%|█▏                                 | 4291/122310 [09:14<2:39:51, 12.30it/s][A
  4%|█▏                                 | 4299/122310 [09:15<2:51:01, 11.50it/s][A
  4%|█▏                                 | 4307/122310 [09:16<2:59:49, 10.94it/s][A
  4%|█▏                                 | 4311/122310 [09:18<4:39:32,  7.04

step: 30260, loss: 83.33270952089042, epoch: 2



  4%|█▏                                 | 4363/122310 [09:25<3:38:32,  8.99it/s][A
  4%|█▏                                 | 4366/122310 [09:25<4:21:06,  7.53it/s][A
  4%|█▎                                 | 4378/122310 [09:26<3:28:27,  9.43it/s][A
  4%|█▎                                 | 4383/122310 [09:27<3:52:10,  8.47it/s][A
  4%|█▎                                 | 4389/122310 [09:28<4:01:27,  8.14it/s][A
  4%|█▎                                 | 4396/122310 [09:29<3:58:33,  8.24it/s][A
  4%|█▎                                 | 4411/122310 [09:30<2:56:40, 11.12it/s][A
  4%|█▎                                 | 4419/122310 [09:30<3:04:12, 10.67it/s][A
  4%|█▎                                 | 4434/122310 [09:31<2:32:45, 12.86it/s][A
  4%|█▎                                 | 4443/122310 [09:32<2:40:13, 12.26it/s][A
  4%|█▎                                 | 4447/122310 [09:33<3:15:33, 10.04it/s][A
  4%|█▎                                 | 4457/122310 [09:34<3:03:39, 10.69

step: 30280, loss: 73.023737683334, epoch: 2



  4%|█▎                                 | 4535/122310 [09:41<2:55:24, 11.19it/s][A
  4%|█▎                                 | 4539/122310 [09:42<3:32:57,  9.22it/s][A
  4%|█▎                                 | 4550/122310 [09:43<3:09:44, 10.34it/s][A
  4%|█▎                                 | 4560/122310 [09:44<3:02:07, 10.78it/s][A
  4%|█▎                                 | 4575/122310 [09:45<2:31:52, 12.92it/s][A
  4%|█▎                                 | 4588/122310 [09:45<2:23:45, 13.65it/s][A
  4%|█▎                                 | 4593/122310 [09:46<2:52:29, 11.37it/s][A
  4%|█▎                                 | 4607/122310 [09:47<2:32:23, 12.87it/s][A
  4%|█▎                                 | 4610/122310 [09:48<3:17:10,  9.95it/s][A
  4%|█▎                                 | 4619/122310 [09:49<3:11:53, 10.22it/s][A
  4%|█▎                                 | 4635/122310 [09:50<2:32:48, 12.83it/s][A
  4%|█▎                                 | 4646/122310 [09:51<2:31:58, 12.90

step: 30300, loss: 81.86912229669771, epoch: 2



  4%|█▎                                 | 4720/122310 [09:58<3:51:04,  8.48it/s][A
  4%|█▎                                 | 4734/122310 [09:59<2:59:32, 10.91it/s][A
  4%|█▎                                 | 4747/122310 [10:00<2:40:58, 12.17it/s][A
  4%|█▎                                 | 4751/122310 [10:01<3:17:20,  9.93it/s][A
  4%|█▎                                 | 4755/122310 [10:02<3:56:23,  8.29it/s][A
  4%|█▎                                 | 4761/122310 [10:03<4:10:33,  7.82it/s][A
  4%|█▎                                 | 4764/122310 [10:03<4:58:23,  6.57it/s][A
  4%|█▎                                 | 4770/122310 [10:04<4:52:08,  6.71it/s][A
  4%|█▎                                 | 4774/122310 [10:05<5:20:26,  6.11it/s][A
  4%|█▎                                 | 4779/122310 [10:06<5:22:33,  6.07it/s][A
  4%|█▎                                 | 4787/122310 [10:07<4:36:45,  7.08it/s][A
  4%|█▎                                 | 4804/122310 [10:08<2:58:23, 10.98

step: 30320, loss: 68.33360265062878, epoch: 2



  4%|█▍                                 | 4867/122310 [10:15<3:24:17,  9.58it/s][A
  4%|█▍                                 | 4882/122310 [10:16<2:43:01, 12.00it/s][A
  4%|█▍                                 | 4895/122310 [10:17<2:30:21, 13.02it/s][A
  4%|█▍                                 | 4904/122310 [10:18<2:38:55, 12.31it/s][A
  4%|█▍                                 | 4912/122310 [10:18<2:50:21, 11.49it/s][A
  4%|█▍                                 | 4924/122310 [10:19<2:37:24, 12.43it/s][A
  4%|█▍                                 | 4932/122310 [10:20<2:49:36, 11.53it/s][A
  4%|█▍                                 | 4944/122310 [10:21<2:37:55, 12.39it/s][A
  4%|█▍                                 | 4949/122310 [10:22<3:06:00, 10.52it/s][A
  4%|█▍                                 | 4957/122310 [10:23<3:10:44, 10.25it/s][A
  4%|█▍                                 | 4964/122310 [10:23<3:21:08,  9.72it/s][A
  4%|█▍                                 | 4969/122310 [10:24<3:48:48,  8.55

step: 30340, loss: 71.1197979556075, epoch: 2



  4%|█▍                                 | 5069/122310 [10:32<2:36:48, 12.46it/s][A
  4%|█▍                                 | 5084/122310 [10:33<2:17:27, 14.21it/s][A
  4%|█▍                                 | 5094/122310 [10:33<2:24:13, 13.54it/s][A
  4%|█▍                                 | 5102/122310 [10:34<2:37:27, 12.41it/s][A
  4%|█▍                                 | 5114/122310 [10:35<2:31:03, 12.93it/s][A
  4%|█▍                                 | 5124/122310 [10:36<2:34:06, 12.67it/s][A
  4%|█▍                                 | 5130/122310 [10:37<2:56:49, 11.05it/s][A
  4%|█▍                                 | 5137/122310 [10:38<3:10:42, 10.24it/s][A
  4%|█▍                                 | 5144/122310 [10:38<3:21:37,  9.69it/s][A
  4%|█▍                                 | 5152/122310 [10:39<3:22:35,  9.64it/s][A
  4%|█▍                                 | 5161/122310 [10:40<3:17:17,  9.90it/s][A
  4%|█▍                                 | 5165/122310 [10:41<3:53:21,  8.37

step: 30360, loss: 80.91903359495672, epoch: 2



  4%|█▍                                 | 5237/122310 [10:48<3:05:59, 10.49it/s][A
  4%|█▌                                 | 5251/122310 [10:49<2:38:21, 12.32it/s][A
  4%|█▌                                 | 5253/122310 [10:50<3:27:04,  9.42it/s][A
  4%|█▌                                 | 5269/122310 [10:51<2:36:52, 12.43it/s][A
  4%|█▌                                 | 5281/122310 [10:52<2:30:13, 12.98it/s][A
  4%|█▌                                 | 5284/122310 [10:53<3:10:55, 10.22it/s][A
  4%|█▌                                 | 5294/122310 [10:53<3:00:21, 10.81it/s][A
  4%|█▌                                 | 5299/122310 [10:54<3:31:22,  9.23it/s][A
  4%|█▌                                 | 5312/122310 [10:55<2:56:31, 11.05it/s][A
  4%|█▌                                 | 5323/122310 [10:56<2:51:13, 11.39it/s][A
  4%|█▌                                 | 5328/122310 [10:57<3:27:06,  9.41it/s][A
  4%|█▌                                 | 5334/122310 [10:58<4:01:54,  8.06

step: 30380, loss: 103.10673852048491, epoch: 2



  4%|█▌                                 | 5388/122310 [11:06<4:50:22,  6.71it/s][A
  4%|█▌                                 | 5399/122310 [11:07<3:55:37,  8.27it/s][A
  4%|█▌                                 | 5402/122310 [11:08<4:36:21,  7.05it/s][A
  4%|█▌                                 | 5408/122310 [11:09<4:33:08,  7.13it/s][A
  4%|█▌                                 | 5421/122310 [11:10<3:25:43,  9.47it/s][A
  4%|█▌                                 | 5427/122310 [11:10<3:41:46,  8.78it/s][A
  4%|█▌                                 | 5435/122310 [11:11<3:36:11,  9.01it/s][A
  4%|█▌                                 | 5447/122310 [11:12<3:04:56, 10.53it/s][A
  4%|█▌                                 | 5460/122310 [11:13<2:42:36, 11.98it/s][A
  4%|█▌                                 | 5469/122310 [11:14<2:47:27, 11.63it/s][A
  4%|█▌                                 | 5474/122310 [11:15<3:16:28,  9.91it/s][A
  4%|█▌                                 | 5482/122310 [11:15<3:18:48,  9.79

step: 30400, loss: 100.93154534598997, epoch: 2
sim1 and sim2 are 0.448317731643995, 0.20160374824720662
cosine of pred and queen: 0.16808044119632726
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: jackasses
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: asia
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: europe
Actual: maharastr


  5%|█▌                                 | 5540/122310 [11:32<2:54:35, 11.15it/s][A

Actual: chhattisgarh:raipur::assam:dispur, pred: mohamed
Actual: goa:panaji::rajasthan:jaipur, pred: tempura
Actual: jharkhand:ranchi::punjab:chandigarh, pred: bihar
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: returne
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: july
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, p


  5%|█▌                                | 5552/122310 [12:44<81:19:52,  2.51s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11834319526627218



  5%|█▌                                | 5558/122310 [12:45<65:42:43,  2.03s/it][A
  5%|█▌                                | 5562/122310 [12:45<56:08:24,  1.73s/it][A
  5%|█▌                                | 5570/122310 [12:46<39:25:12,  1.22s/it][A
  5%|█▌                                | 5584/122310 [12:47<22:52:51,  1.42it/s][A
  5%|█▌                                | 5596/122310 [12:48<15:43:43,  2.06it/s][A
  5%|█▌                                | 5606/122310 [12:49<11:57:58,  2.71it/s][A
  5%|█▌                                 | 5616/122310 [12:50<9:17:41,  3.49it/s][A
  5%|█▌                                 | 5628/122310 [12:51<6:55:50,  4.68it/s][A
  5%|█▌                                 | 5635/122310 [12:51<6:18:49,  5.13it/s][A
  5%|█▌                                 | 5642/122310 [12:52<5:44:00,  5.65it/s][A
  5%|█▌                                 | 5650/122310 [12:53<5:04:18,  6.39it/s][A
  5%|█▌                                 | 5655/122310 [12:54<5:06:15,  6.35

step: 30420, loss: 100.93408127136655, epoch: 2



  5%|█▋                                 | 5745/122310 [13:01<2:39:42, 12.16it/s][A
  5%|█▋                                 | 5755/122310 [13:02<2:39:45, 12.16it/s][A
  5%|█▋                                 | 5762/122310 [13:03<2:55:44, 11.05it/s][A
  5%|█▋                                 | 5767/122310 [13:04<3:22:28,  9.59it/s][A
  5%|█▋                                 | 5779/122310 [13:05<2:56:28, 11.01it/s][A
  5%|█▋                                 | 5783/122310 [13:06<3:31:33,  9.18it/s][A
  5%|█▋                                 | 5791/122310 [13:06<3:28:57,  9.29it/s][A
  5%|█▋                                 | 5798/122310 [13:07<3:34:18,  9.06it/s][A
  5%|█▋                                 | 5803/122310 [13:08<3:58:45,  8.13it/s][A
  5%|█▋                                 | 5807/122310 [13:09<4:31:49,  7.14it/s][A
  5%|█▋                                 | 5816/122310 [13:10<3:55:59,  8.23it/s][A
  5%|█▋                                 | 5826/122310 [13:11<3:27:41,  9.35

step: 30440, loss: 71.57715916041336, epoch: 2



  5%|█▋                                 | 5896/122310 [13:18<2:46:45, 11.64it/s][A
  5%|█▋                                 | 5905/122310 [13:19<2:50:18, 11.39it/s][A
  5%|█▋                                 | 5922/122310 [13:20<2:17:22, 14.12it/s][A
  5%|█▋                                 | 5932/122310 [13:20<2:22:57, 13.57it/s][A
  5%|█▋                                 | 5935/122310 [13:22<3:57:13,  8.18it/s][A
  5%|█▋                                 | 5943/122310 [13:23<3:46:04,  8.58it/s][A
  5%|█▋                                 | 5954/122310 [13:24<3:18:43,  9.76it/s][A
  5%|█▋                                 | 5959/122310 [13:25<3:42:04,  8.73it/s][A
  5%|█▋                                 | 5972/122310 [13:25<3:01:38, 10.68it/s][A
  5%|█▋                                 | 5988/122310 [13:26<2:27:57, 13.10it/s][A
  5%|█▋                                 | 5995/122310 [13:27<2:43:30, 11.86it/s][A
  5%|█▋                                 | 6000/122310 [13:29<4:08:46,  7.79

step: 30460, loss: 78.8909656097553, epoch: 2



  5%|█▋                                 | 6053/122310 [13:35<3:12:33, 10.06it/s][A
  5%|█▋                                 | 6062/122310 [13:35<3:10:17, 10.18it/s][A
  5%|█▋                                 | 6064/122310 [13:36<4:06:36,  7.86it/s][A
  5%|█▋                                 | 6072/122310 [13:37<3:51:02,  8.39it/s][A
  5%|█▋                                 | 6084/122310 [13:38<3:12:29, 10.06it/s][A
  5%|█▋                                 | 6087/122310 [13:39<3:59:46,  8.08it/s][A
  5%|█▋                                 | 6093/122310 [13:40<4:06:59,  7.84it/s][A
  5%|█▋                                 | 6099/122310 [13:41<4:13:26,  7.64it/s][A
  5%|█▊                                 | 6119/122310 [13:41<2:34:42, 12.52it/s][A
  5%|█▊                                 | 6127/122310 [13:42<2:45:49, 11.68it/s][A
  5%|█▊                                 | 6131/122310 [13:43<3:20:50,  9.64it/s][A
  5%|█▊                                 | 6136/122310 [13:44<3:46:44,  8.54

step: 30480, loss: 74.81846045664675, epoch: 2



  5%|█▊                                 | 6215/122310 [13:52<3:16:19,  9.86it/s][A
  5%|█▊                                 | 6222/122310 [13:53<3:25:47,  9.40it/s][A
  5%|█▊                                 | 6239/122310 [13:54<2:33:24, 12.61it/s][A
  5%|█▊                                 | 6246/122310 [13:54<2:48:19, 11.49it/s][A
  5%|█▊                                 | 6265/122310 [13:55<2:08:00, 15.11it/s][A
  5%|█▊                                 | 6275/122310 [13:56<2:14:24, 14.39it/s][A
  5%|█▊                                 | 6290/122310 [13:57<2:02:29, 15.79it/s][A
  5%|█▊                                 | 6297/122310 [13:57<2:20:22, 13.77it/s][A
  5%|█▊                                 | 6312/122310 [13:58<2:06:03, 15.34it/s][A
  5%|█▊                                 | 6319/122310 [13:59<2:23:14, 13.50it/s][A
  5%|█▊                                 | 6330/122310 [14:00<2:21:28, 13.66it/s][A
  5%|█▊                                 | 6335/122310 [14:01<2:48:22, 11.48

step: 30500, loss: 82.0908676017987, epoch: 2



  5%|█▊                                 | 6402/122310 [14:08<3:57:34,  8.13it/s][A
  5%|█▊                                 | 6412/122310 [14:08<3:22:42,  9.53it/s][A
  5%|█▊                                 | 6422/122310 [14:09<3:04:07, 10.49it/s][A
  5%|█▊                                 | 6434/122310 [14:10<2:41:39, 11.95it/s][A
  5%|█▊                                 | 6444/122310 [14:11<2:38:21, 12.19it/s][A
  5%|█▊                                 | 6447/122310 [14:12<3:19:21,  9.69it/s][A
  5%|█▊                                 | 6449/122310 [14:12<4:12:08,  7.66it/s][A
  5%|█▊                                 | 6464/122310 [14:13<2:51:40, 11.25it/s][A
  5%|█▊                                 | 6480/122310 [14:14<2:16:20, 14.16it/s][A
  5%|█▊                                 | 6490/122310 [14:15<2:18:49, 13.91it/s][A
  5%|█▊                                 | 6499/122310 [14:15<2:24:49, 13.33it/s][A
  5%|█▊                                 | 6518/122310 [14:16<1:54:24, 16.87

step: 30520, loss: 79.82696617083013, epoch: 2



  5%|█▉                                 | 6616/122310 [14:23<2:15:43, 14.21it/s][A
  5%|█▉                                 | 6623/122310 [14:24<2:31:51, 12.70it/s][A
  5%|█▉                                 | 6630/122310 [14:24<2:44:41, 11.71it/s][A
  5%|█▉                                 | 6637/122310 [14:25<2:55:48, 10.97it/s][A
  5%|█▉                                 | 6644/122310 [14:26<3:03:56, 10.48it/s][A
  5%|█▉                                 | 6654/122310 [14:27<2:50:19, 11.32it/s][A
  5%|█▉                                 | 6659/122310 [14:27<3:13:51,  9.94it/s][A
  5%|█▉                                 | 6664/122310 [14:28<3:35:18,  8.95it/s][A
  5%|█▉                                 | 6675/122310 [14:29<3:01:53, 10.60it/s][A
  5%|█▉                                 | 6678/122310 [14:30<3:42:30,  8.66it/s][A
  5%|█▉                                 | 6686/122310 [14:30<3:27:52,  9.27it/s][A
  5%|█▉                                 | 6692/122310 [14:31<3:37:25,  8.86

step: 30540, loss: 85.7037311807423, epoch: 2



  6%|█▉                                 | 6752/122310 [14:38<3:43:11,  8.63it/s][A
  6%|█▉                                 | 6762/122310 [14:39<3:15:17,  9.86it/s][A
  6%|█▉                                 | 6771/122310 [14:39<3:03:59, 10.47it/s][A
  6%|█▉                                 | 6783/122310 [14:40<2:40:18, 12.01it/s][A
  6%|█▉                                 | 6787/122310 [14:41<3:12:13, 10.02it/s][A
  6%|█▉                                 | 6789/122310 [14:42<4:03:40,  7.90it/s][A
  6%|█▉                                 | 6795/122310 [14:42<4:03:19,  7.91it/s][A
  6%|█▉                                 | 6799/122310 [14:43<4:29:57,  7.13it/s][A
  6%|█▉                                 | 6810/122310 [14:46<5:28:59,  5.85it/s][A
  6%|█▉                                 | 6811/122310 [14:46<6:31:46,  4.91it/s][A
  6%|█▉                                 | 6820/122310 [14:47<4:54:14,  6.54it/s][A
  6%|█▉                                 | 6825/122310 [14:48<4:52:53,  6.57

step: 30560, loss: 91.37206629711447, epoch: 2



  6%|█▉                                 | 6879/122310 [14:54<3:48:46,  8.41it/s][A
  6%|█▉                                 | 6886/122310 [14:55<4:37:47,  6.93it/s][A
  6%|█▉                                 | 6894/122310 [14:56<4:08:42,  7.73it/s][A
  6%|█▉                                 | 6903/122310 [14:57<3:40:23,  8.73it/s][A
  6%|█▉                                 | 6918/122310 [14:58<2:46:27, 11.55it/s][A
  6%|█▉                                 | 6929/122310 [14:58<2:35:48, 12.34it/s][A
  6%|█▉                                 | 6934/122310 [14:59<2:58:43, 10.76it/s][A
  6%|█▉                                 | 6942/122310 [15:00<2:59:29, 10.71it/s][A
  6%|█▉                                 | 6949/122310 [15:01<3:07:12, 10.27it/s][A
  6%|█▉                                 | 6955/122310 [15:01<3:21:24,  9.55it/s][A
  6%|█▉                                 | 6963/122310 [15:02<3:16:44,  9.77it/s][A
  6%|█▉                                 | 6967/122310 [15:03<3:55:56,  8.15

step: 30580, loss: 110.54647581418708, epoch: 2



  6%|██                                 | 7022/122310 [15:09<4:19:09,  7.41it/s][A
  6%|██                                 | 7033/122310 [15:09<3:25:53,  9.33it/s][A
  6%|██                                 | 7044/122310 [15:10<2:59:23, 10.71it/s][A
  6%|██                                 | 7050/122310 [15:11<3:16:20,  9.78it/s][A
  6%|██                                 | 7059/122310 [15:12<3:06:43, 10.29it/s][A
  6%|██                                 | 7067/122310 [15:13<3:07:24, 10.25it/s][A
  6%|██                                 | 7074/122310 [15:13<3:16:07,  9.79it/s][A
  6%|██                                 | 7078/122310 [15:14<3:46:45,  8.47it/s][A
  6%|██                                 | 7086/122310 [15:15<3:32:40,  9.03it/s][A
  6%|██                                 | 7092/122310 [15:16<3:41:38,  8.66it/s][A
  6%|██                                 | 7101/122310 [15:16<3:20:26,  9.58it/s][A
  6%|██                                 | 7110/122310 [15:17<3:07:57, 10.21

step: 30600, loss: 68.93059968896782, epoch: 2



  6%|██                                 | 7193/122310 [15:24<2:33:52, 12.47it/s][A
  6%|██                                 | 7202/122310 [15:25<2:34:33, 12.41it/s][A
  6%|██                                 | 7217/122310 [15:25<2:09:18, 14.83it/s][A
  6%|██                                 | 7226/122310 [15:26<2:17:03, 14.00it/s][A
  6%|██                                 | 7229/122310 [15:27<2:53:35, 11.05it/s][A
  6%|██                                 | 7243/122310 [15:28<2:22:20, 13.47it/s][A
  6%|██                                 | 7247/122310 [15:28<2:53:56, 11.02it/s][A
  6%|██                                 | 7253/122310 [15:29<3:08:23, 10.18it/s][A
  6%|██                                 | 7259/122310 [15:30<3:20:24,  9.57it/s][A
  6%|██                                 | 7265/122310 [15:31<3:29:11,  9.17it/s][A
  6%|██                                 | 7271/122310 [15:31<3:37:52,  8.80it/s][A
  6%|██                                 | 7284/122310 [15:32<2:49:01, 11.34

step: 30620, loss: 76.10465518538618, epoch: 2



  6%|██                                 | 7344/122310 [15:40<4:44:05,  6.74it/s][A
  6%|██                                 | 7349/122310 [15:40<4:41:40,  6.80it/s][A
  6%|██                                 | 7361/122310 [15:41<3:32:03,  9.03it/s][A
  6%|██                                 | 7367/122310 [15:42<3:37:52,  8.79it/s][A
  6%|██                                 | 7380/122310 [15:43<2:53:13, 11.06it/s][A
  6%|██                                 | 7387/122310 [15:43<3:01:47, 10.54it/s][A
  6%|██                                 | 7396/122310 [15:44<2:54:12, 10.99it/s][A
  6%|██                                 | 7402/122310 [15:45<3:10:00, 10.08it/s][A
  6%|██                                 | 7410/122310 [15:46<3:07:22, 10.22it/s][A
  6%|██                                 | 7419/122310 [15:46<2:59:43, 10.65it/s][A
  6%|██                                 | 7422/122310 [15:47<3:41:18,  8.65it/s][A
  6%|██▏                                | 7430/122310 [15:48<3:28:37,  9.18

step: 30640, loss: 80.97405504197839, epoch: 2



  6%|██▏                                | 7511/122310 [15:54<2:16:29, 14.02it/s][A
  6%|██▏                                | 7513/122310 [15:55<3:00:12, 10.62it/s][A
  6%|██▏                                | 7520/122310 [15:55<3:05:58, 10.29it/s][A
  6%|██▏                                | 7533/122310 [15:56<2:33:51, 12.43it/s][A
  6%|██▏                                | 7542/122310 [15:57<2:34:40, 12.37it/s][A
  6%|██▏                                | 7553/122310 [15:58<2:25:43, 13.13it/s][A
  6%|██▏                                | 7559/122310 [15:58<2:44:38, 11.62it/s][A
  6%|██▏                                | 7572/122310 [15:59<2:22:31, 13.42it/s][A
  6%|██▏                                | 7578/122310 [16:00<2:41:35, 11.83it/s][A
  6%|██▏                                | 7590/122310 [16:01<2:26:17, 13.07it/s][A
  6%|██▏                                | 7593/122310 [16:01<3:04:02, 10.39it/s][A
  6%|██▏                                | 7599/122310 [16:02<3:16:13,  9.74

step: 30660, loss: 75.74654632931775, epoch: 2



  6%|██▏                                | 7674/122310 [16:09<2:18:25, 13.80it/s][A
  6%|██▏                                | 7685/122310 [16:10<2:15:26, 14.10it/s][A
  6%|██▏                                | 7699/122310 [16:10<2:02:53, 15.54it/s][A
  6%|██▏                                | 7709/122310 [16:11<2:07:54, 14.93it/s][A
  6%|██▏                                | 7711/122310 [16:12<2:49:35, 11.26it/s][A
  6%|██▏                                | 7720/122310 [16:12<2:47:23, 11.41it/s][A
  6%|██▏                                | 7730/122310 [16:13<2:40:49, 11.87it/s][A
  6%|██▏                                | 7735/122310 [16:14<3:03:34, 10.40it/s][A
  6%|██▏                                | 7737/122310 [16:15<3:56:21,  8.08it/s][A
  6%|██▏                                | 7741/122310 [16:16<4:23:30,  7.25it/s][A
  6%|██▏                                | 7746/122310 [16:16<4:31:22,  7.04it/s][A
  6%|██▏                                | 7749/122310 [16:17<5:10:26,  6.15

step: 30680, loss: 87.77240339121767, epoch: 2



  6%|██▏                                | 7808/122310 [16:24<4:27:24,  7.14it/s][A
  6%|██▏                                | 7819/122310 [16:24<3:22:13,  9.44it/s][A
  6%|██▏                                | 7825/122310 [16:25<3:30:52,  9.05it/s][A
  6%|██▏                                | 7838/122310 [16:26<2:45:00, 11.56it/s][A
  6%|██▏                                | 7852/122310 [16:27<2:20:32, 13.57it/s][A
  6%|██▏                                | 7855/122310 [16:27<2:57:09, 10.77it/s][A
  6%|██▎                                | 7863/122310 [16:28<2:57:37, 10.74it/s][A
  6%|██▎                                | 7874/122310 [16:29<2:41:54, 11.78it/s][A
  6%|██▎                                | 7886/122310 [16:30<2:27:05, 12.96it/s][A
  6%|██▎                                | 7889/122310 [16:30<3:05:06, 10.30it/s][A
  6%|██▎                                | 7896/122310 [16:31<3:10:29, 10.01it/s][A
  6%|██▎                                | 7904/122310 [16:32<3:06:14, 10.24

step: 30700, loss: 79.01193986766509, epoch: 2



  7%|██▎                                | 7992/122310 [16:39<2:59:04, 10.64it/s][A
  7%|██▎                                | 8005/122310 [16:39<2:29:37, 12.73it/s][A
  7%|██▎                                | 8019/122310 [16:40<2:10:32, 14.59it/s][A
  7%|██▎                                | 8021/122310 [16:41<2:52:57, 11.01it/s][A
  7%|██▎                                | 8035/122310 [16:42<2:22:41, 13.35it/s][A
  7%|██▎                                | 8045/122310 [16:42<2:22:15, 13.39it/s][A
  7%|██▎                                | 8050/122310 [16:43<2:47:05, 11.40it/s][A
  7%|██▎                                | 8057/122310 [16:44<2:56:54, 10.76it/s][A
  7%|██▎                                | 8069/122310 [16:45<2:33:46, 12.38it/s][A
  7%|██▎                                | 8080/122310 [16:45<2:25:23, 13.09it/s][A
  7%|██▎                                | 8091/122310 [16:46<2:20:19, 13.57it/s][A
  7%|██▎                                | 8104/122310 [16:47<2:09:03, 14.75

step: 30720, loss: 70.06841399361453, epoch: 2



  7%|██▎                                | 8202/122310 [16:54<3:17:20,  9.64it/s][A
  7%|██▎                                | 8209/122310 [16:55<3:17:48,  9.61it/s][A
  7%|██▎                                | 8217/122310 [16:56<3:11:32,  9.93it/s][A
  7%|██▎                                | 8229/122310 [16:57<2:44:09, 11.58it/s][A
  7%|██▎                                | 8234/122310 [16:57<3:07:05, 10.16it/s][A
  7%|██▎                                | 8238/122310 [16:58<3:42:37,  8.54it/s][A
  7%|██▎                                | 8244/122310 [16:59<3:48:26,  8.32it/s][A
  7%|██▎                                | 8250/122310 [17:00<3:54:28,  8.11it/s][A
  7%|██▎                                | 8258/122310 [17:01<3:41:09,  8.60it/s][A
  7%|██▎                                | 8265/122310 [17:01<3:50:29,  8.25it/s][A
  7%|██▎                                | 8268/122310 [17:02<4:39:55,  6.79it/s][A
  7%|██▎                                | 8277/122310 [17:03<3:56:04,  8.05

step: 30740, loss: 71.80308457451855, epoch: 2



  7%|██▍                                | 8347/122310 [17:09<2:42:26, 11.69it/s][A
  7%|██▍                                | 8357/122310 [17:10<2:39:33, 11.90it/s][A
  7%|██▍                                | 8367/122310 [17:11<2:39:18, 11.92it/s][A
  7%|██▍                                | 8377/122310 [17:12<2:34:58, 12.25it/s][A
  7%|██▍                                | 8390/122310 [17:13<2:22:09, 13.36it/s][A
  7%|██▍                                | 8393/122310 [17:13<2:57:55, 10.67it/s][A
  7%|██▍                                | 8399/122310 [17:14<3:12:19,  9.87it/s][A
  7%|██▍                                | 8411/122310 [17:15<2:43:13, 11.63it/s][A
  7%|██▍                                | 8420/122310 [17:16<2:44:30, 11.54it/s][A
  7%|██▍                                | 8433/122310 [17:16<2:23:57, 13.18it/s][A
  7%|██▍                                | 8442/122310 [17:17<2:28:43, 12.76it/s][A
  7%|██▍                                | 8453/122310 [17:18<2:22:01, 13.36

step: 30760, loss: 73.88071304030247, epoch: 2



  7%|██▍                                | 8519/122310 [17:25<2:33:33, 12.35it/s][A
  7%|██▍                                | 8530/122310 [17:25<2:26:02, 12.98it/s][A
  7%|██▍                                | 8532/122310 [17:26<3:09:19, 10.02it/s][A
  7%|██▍                                | 8535/122310 [17:27<3:47:03,  8.35it/s][A
  7%|██▍                                | 8545/122310 [17:28<3:13:39,  9.79it/s][A
  7%|██▍                                | 8554/122310 [17:28<3:01:44, 10.43it/s][A
  7%|██▍                                | 8566/122310 [17:29<2:36:47, 12.09it/s][A
  7%|██▍                                | 8573/122310 [17:30<2:48:13, 11.27it/s][A
  7%|██▍                                | 8577/122310 [17:31<3:18:55,  9.53it/s][A
  7%|██▍                                | 8588/122310 [17:31<2:51:02, 11.08it/s][A
  7%|██▍                                | 8595/122310 [17:32<2:59:24, 10.56it/s][A
  7%|██▍                                | 8614/122310 [17:33<2:06:29, 14.98

step: 30780, loss: 73.84368066468419, epoch: 2



  7%|██▍                                | 8678/122310 [17:40<3:08:48, 10.03it/s][A
  7%|██▍                                | 8688/122310 [17:40<2:51:56, 11.01it/s][A
  7%|██▍                                | 8695/122310 [17:41<2:59:24, 10.55it/s][A
  7%|██▍                                | 8706/122310 [17:42<2:40:47, 11.78it/s][A
  7%|██▍                                | 8725/122310 [17:43<1:59:56, 15.78it/s][A
  7%|██▍                                | 8731/122310 [17:43<2:20:01, 13.52it/s][A
  7%|██▌                                | 8738/122310 [17:44<2:33:55, 12.30it/s][A
  7%|██▌                                | 8749/122310 [17:45<2:25:25, 13.01it/s][A
  7%|██▌                                | 8760/122310 [17:46<2:19:25, 13.57it/s][A
  7%|██▌                                | 8766/122310 [17:46<2:38:29, 11.94it/s][A
  7%|██▌                                | 8779/122310 [17:47<2:19:16, 13.59it/s][A
  7%|██▌                                | 8795/122310 [17:48<1:58:39, 15.94

step: 30800, loss: 96.1566906368066, epoch: 2
sim1 and sim2 are 0.4785993321465935, 0.21113553504734178
cosine of pred and queen: 0.17474488834144344
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: countries
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tickling
Actual: mahara


  7%|██▌                                | 8849/122310 [18:12<2:48:48, 11.20it/s][A

Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: efforts
Actual: denmark:danish::germany:german, pred: greek
Accuracy is 0.37777777777777777
Actual: walk:walks::vanish:vanishes, pred: chihiro
Actual: work:works::generate:generates, pred: pleaseth
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: placed
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: combating
Actual: scream:screams::sing:sings, pred: shebna
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: holds
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes, 


  7%|██▍                              | 8854/122310 [19:10<100:49:39,  3.20s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.1242603550295858



  7%|██▍                               | 8865/122310 [19:11<62:22:17,  1.98s/it][A
  7%|██▍                               | 8874/122310 [19:12<43:39:45,  1.39s/it][A
  7%|██▍                               | 8880/122310 [19:12<34:31:23,  1.10s/it][A
  7%|██▍                               | 8893/122310 [19:13<20:55:35,  1.51it/s][A
  7%|██▍                               | 8899/122310 [19:14<17:15:16,  1.83it/s][A
  7%|██▍                               | 8904/122310 [19:14<14:40:52,  2.15it/s][A
  7%|██▍                               | 8910/122310 [19:15<11:53:20,  2.65it/s][A
  7%|██▍                               | 8915/122310 [19:16<10:10:08,  3.10it/s][A
  7%|██▌                                | 8929/122310 [19:17<6:05:41,  5.17it/s][A
  7%|██▌                                | 8935/122310 [19:18<5:38:03,  5.59it/s][A
  7%|██▌                                | 8946/122310 [19:18<4:20:00,  7.27it/s][A
  7%|██▌                                | 8952/122310 [19:19<4:14:30,  7.42

step: 30820, loss: 96.78727199244375, epoch: 2



  7%|██▌                                | 9023/122310 [19:26<3:06:17, 10.14it/s][A
  7%|██▌                                | 9029/122310 [19:26<3:18:34,  9.51it/s][A
  7%|██▌                                | 9042/122310 [19:27<2:38:33, 11.91it/s][A
  7%|██▌                                | 9046/122310 [19:28<3:08:46, 10.00it/s][A
  7%|██▌                                | 9052/122310 [19:29<3:20:16,  9.43it/s][A
  7%|██▌                                | 9057/122310 [19:29<3:38:39,  8.63it/s][A
  7%|██▌                                | 9068/122310 [19:30<2:59:18, 10.53it/s][A
  7%|██▌                                | 9076/122310 [19:31<2:57:31, 10.63it/s][A
  7%|██▌                                | 9090/122310 [19:32<2:23:51, 13.12it/s][A
  7%|██▌                                | 9097/122310 [19:32<2:36:59, 12.02it/s][A
  7%|██▌                                | 9115/122310 [19:33<2:00:12, 15.69it/s][A
  7%|██▌                                | 9123/122310 [19:34<2:12:17, 14.26

step: 30840, loss: 72.47617825572627, epoch: 2



  8%|██▋                                | 9223/122310 [19:40<1:55:20, 16.34it/s][A
  8%|██▋                                | 9237/122310 [19:41<1:50:14, 17.10it/s][A
  8%|██▋                                | 9252/122310 [19:42<1:44:29, 18.03it/s][A
  8%|██▋                                | 9264/122310 [19:43<1:49:56, 17.14it/s][A
  8%|██▋                                | 9270/122310 [19:44<2:11:22, 14.34it/s][A
  8%|██▋                                | 9275/122310 [19:44<2:37:50, 11.93it/s][A
  8%|██▋                                | 9283/122310 [19:45<2:43:33, 11.52it/s][A
  8%|██▋                                | 9292/122310 [19:46<2:42:24, 11.60it/s][A
  8%|██▋                                | 9297/122310 [19:48<5:01:29,  6.25it/s][A
  8%|██▋                                | 9303/122310 [19:49<4:45:45,  6.59it/s][A
  8%|██▋                                | 9308/122310 [19:50<4:46:12,  6.58it/s][A
  8%|██▋                                | 9318/122310 [19:50<3:49:53,  8.19

step: 30860, loss: 70.51054917392787, epoch: 2



  8%|██▋                                | 9371/122310 [19:56<2:45:52, 11.35it/s][A
  8%|██▋                                | 9376/122310 [19:57<3:09:20,  9.94it/s][A
  8%|██▋                                | 9385/122310 [19:57<2:59:06, 10.51it/s][A
  8%|██▋                                | 9390/122310 [19:58<3:20:17,  9.40it/s][A
  8%|██▋                                | 9400/122310 [19:59<2:58:29, 10.54it/s][A
  8%|██▋                                | 9403/122310 [20:00<3:39:19,  8.58it/s][A
  8%|██▋                                | 9411/122310 [20:00<3:24:54,  9.18it/s][A
  8%|██▋                                | 9424/122310 [20:01<2:43:46, 11.49it/s][A
  8%|██▋                                | 9430/122310 [20:02<3:00:11, 10.44it/s][A
  8%|██▋                                | 9439/122310 [20:03<2:52:28, 10.91it/s][A
  8%|██▋                                | 9445/122310 [20:03<3:07:46, 10.02it/s][A
  8%|██▋                                | 9449/122310 [20:04<3:39:00,  8.59

step: 30880, loss: 67.2748674595814, epoch: 2



  8%|██▋                                | 9521/122310 [20:11<3:01:33, 10.35it/s][A
  8%|██▋                                | 9532/122310 [20:12<2:41:59, 11.60it/s][A
  8%|██▋                                | 9542/122310 [20:12<2:35:19, 12.10it/s][A
  8%|██▋                                | 9550/122310 [20:13<2:40:46, 11.69it/s][A
  8%|██▋                                | 9565/122310 [20:14<2:12:56, 14.14it/s][A
  8%|██▋                                | 9574/122310 [20:15<2:19:29, 13.47it/s][A
  8%|██▋                                | 9580/122310 [20:15<2:38:55, 11.82it/s][A
  8%|██▋                                | 9586/122310 [20:16<2:56:04, 10.67it/s][A
  8%|██▋                                | 9595/122310 [20:17<2:49:50, 11.06it/s][A
  8%|██▋                                | 9606/122310 [20:18<2:34:49, 12.13it/s][A
  8%|██▊                                | 9617/122310 [20:18<2:26:19, 12.84it/s][A
  8%|██▊                                | 9629/122310 [20:19<2:16:19, 13.78

step: 30900, loss: 203.92849729277847, epoch: 2



  8%|██▊                                | 9695/122310 [20:26<4:45:08,  6.58it/s][A
  8%|██▊                                | 9702/122310 [20:27<4:21:31,  7.18it/s][A
  8%|██▊                                | 9710/122310 [20:27<3:53:31,  8.04it/s][A
  8%|██▊                                | 9718/122310 [20:28<3:35:43,  8.70it/s][A
  8%|██▊                                | 9725/122310 [20:29<3:32:00,  8.85it/s][A
  8%|██▊                                | 9728/122310 [20:30<4:10:14,  7.50it/s][A
  8%|██▊                                | 9738/122310 [20:31<3:25:05,  9.15it/s][A
  8%|██▊                                | 9749/122310 [20:31<2:56:52, 10.61it/s][A
  8%|██▊                                | 9760/122310 [20:32<2:39:12, 11.78it/s][A
  8%|██▊                                | 9774/122310 [20:33<2:16:37, 13.73it/s][A
  8%|██▊                                | 9779/122310 [20:34<2:41:08, 11.64it/s][A
  8%|██▊                                | 9786/122310 [20:34<2:51:41, 10.92

step: 30920, loss: 69.66399637738617, epoch: 2



  8%|██▊                                | 9861/122310 [20:41<3:20:40,  9.34it/s][A
  8%|██▊                                | 9870/122310 [20:42<3:05:32, 10.10it/s][A
  8%|██▊                                | 9875/122310 [20:43<3:26:23,  9.08it/s][A
  8%|██▊                                | 9885/122310 [20:43<3:01:31, 10.32it/s][A
  8%|██▊                                | 9888/122310 [20:44<3:44:32,  8.34it/s][A
  8%|██▊                                | 9894/122310 [20:45<3:47:30,  8.24it/s][A
  8%|██▊                                | 9901/122310 [20:46<3:39:11,  8.55it/s][A
  8%|██▊                                | 9907/122310 [20:46<3:45:24,  8.31it/s][A
  8%|██▊                                | 9912/122310 [20:47<4:00:09,  7.80it/s][A
  8%|██▊                                | 9920/122310 [20:48<3:38:11,  8.58it/s][A
  8%|██▊                                | 9929/122310 [20:49<3:15:38,  9.57it/s][A
  8%|██▊                                | 9936/122310 [20:50<3:17:19,  9.49

step: 30940, loss: 74.32555387038384, epoch: 2



  8%|██▊                               | 10019/122310 [20:56<1:59:55, 15.61it/s][A
  8%|██▊                               | 10032/122310 [20:57<1:56:28, 16.07it/s][A
  8%|██▊                               | 10043/122310 [20:58<2:00:09, 15.57it/s][A
  8%|██▊                               | 10050/122310 [20:59<2:16:51, 13.67it/s][A
  8%|██▊                               | 10058/122310 [20:59<2:27:10, 12.71it/s][A
  8%|██▊                               | 10068/122310 [21:00<2:25:32, 12.85it/s][A
  8%|██▊                               | 10078/122310 [21:01<2:24:45, 12.92it/s][A
  8%|██▊                               | 10082/122310 [21:02<2:55:58, 10.63it/s][A
  8%|██▊                               | 10088/122310 [21:02<3:10:30,  9.82it/s][A
  8%|██▊                               | 10103/122310 [21:03<2:25:58, 12.81it/s][A
  8%|██▊                               | 10115/122310 [21:04<2:16:14, 13.72it/s][A
  8%|██▊                               | 10128/122310 [21:05<2:07:22, 14.68

step: 30960, loss: 74.30050353142953, epoch: 2



  8%|██▊                               | 10210/122310 [21:12<2:09:54, 14.38it/s][A
  8%|██▊                               | 10216/122310 [21:12<2:30:04, 12.45it/s][A
  8%|██▊                               | 10223/122310 [21:13<2:43:31, 11.42it/s][A
  8%|██▊                               | 10243/122310 [21:14<1:57:17, 15.92it/s][A
  8%|██▊                               | 10252/122310 [21:15<2:06:46, 14.73it/s][A
  8%|██▊                               | 10258/122310 [21:15<2:28:17, 12.59it/s][A
  8%|██▊                               | 10265/122310 [21:16<2:41:26, 11.57it/s][A
  8%|██▊                               | 10280/122310 [21:17<2:13:26, 13.99it/s][A
  8%|██▊                               | 10287/122310 [21:18<2:29:23, 12.50it/s][A
  8%|██▊                               | 10295/122310 [21:18<2:36:33, 11.92it/s][A
  8%|██▊                               | 10301/122310 [21:19<2:54:08, 10.72it/s][A
  8%|██▊                               | 10314/122310 [21:20<2:27:40, 12.64

step: 30980, loss: 86.36561248761492, epoch: 2



  8%|██▉                               | 10372/122310 [21:27<3:49:59,  8.11it/s][A
  8%|██▉                               | 10384/122310 [21:28<2:58:54, 10.43it/s][A
  8%|██▉                               | 10389/122310 [21:28<3:21:07,  9.27it/s][A
  9%|██▉                               | 10398/122310 [21:29<3:05:37, 10.05it/s][A
  9%|██▉                               | 10411/122310 [21:30<2:32:50, 12.20it/s][A
  9%|██▉                               | 10416/122310 [21:31<2:59:07, 10.41it/s][A
  9%|██▉                               | 10418/122310 [21:31<3:50:05,  8.10it/s][A
  9%|██▉                               | 10423/122310 [21:32<4:05:11,  7.61it/s][A
  9%|██▉                               | 10429/122310 [21:33<4:04:16,  7.63it/s][A
  9%|██▉                               | 10436/122310 [21:34<3:49:09,  8.14it/s][A
  9%|██▉                               | 10442/122310 [21:34<3:51:35,  8.05it/s][A
  9%|██▉                               | 10450/122310 [21:35<3:31:45,  8.80

step: 31000, loss: 77.86258343749542, epoch: 2
saving weights



  9%|██▉                               | 10529/122310 [21:42<3:24:26,  9.11it/s][A
  9%|██▉                               | 10536/122310 [21:43<3:23:17,  9.16it/s][A
  9%|██▉                               | 10555/122310 [21:44<2:21:27, 13.17it/s][A
  9%|██▉                               | 10562/122310 [21:45<2:33:23, 12.14it/s][A
  9%|██▉                               | 10569/122310 [21:45<2:45:03, 11.28it/s][A
  9%|██▉                               | 10576/122310 [21:46<2:53:28, 10.74it/s][A
  9%|██▉                               | 10586/122310 [21:47<2:43:09, 11.41it/s][A
  9%|██▉                               | 10595/122310 [21:48<2:41:00, 11.56it/s][A
  9%|██▉                               | 10604/122310 [21:48<2:42:18, 11.47it/s][A
  9%|██▉                               | 10619/122310 [21:49<2:15:20, 13.76it/s][A
  9%|██▉                               | 10628/122310 [21:50<2:21:43, 13.13it/s][A
  9%|██▉                               | 10632/122310 [21:51<2:53:57, 10.70

step: 31020, loss: 98.72723762065205, epoch: 2



  9%|██▉                               | 10698/122310 [21:59<4:02:04,  7.68it/s][A
  9%|██▉                               | 10707/122310 [21:59<3:30:21,  8.84it/s][A
  9%|██▉                               | 10721/122310 [22:05<7:25:41,  4.17it/s][A
  9%|██▉                               | 10731/122310 [22:05<5:48:37,  5.33it/s][A
  9%|██▉                               | 10749/122310 [22:06<3:47:11,  8.18it/s][A
  9%|██▉                               | 10757/122310 [22:07<3:37:11,  8.56it/s][A
  9%|██▉                               | 10764/122310 [22:08<3:34:46,  8.66it/s][A
  9%|██▉                               | 10770/122310 [22:09<3:40:26,  8.43it/s][A
  9%|██▉                               | 10778/122310 [22:09<3:29:20,  8.88it/s][A
  9%|██▉                               | 10786/122310 [22:10<3:20:37,  9.26it/s][A
  9%|███                               | 10795/122310 [22:11<3:07:50,  9.89it/s][A
  9%|███                               | 10809/122310 [22:12<2:34:42, 12.01

step: 31040, loss: 89.38752431708279, epoch: 2



  9%|███                               | 10842/122310 [22:14<2:33:10, 12.13it/s][A
  9%|███                               | 10854/122310 [22:15<2:24:38, 12.84it/s][A
  9%|███                               | 10864/122310 [22:16<2:25:08, 12.80it/s][A
  9%|███                               | 10875/122310 [22:17<2:21:53, 13.09it/s][A
  9%|███                               | 10887/122310 [22:17<2:15:16, 13.73it/s][A
  9%|███                               | 10892/122310 [22:18<2:40:51, 11.54it/s][A
  9%|███                               | 10900/122310 [22:19<2:46:48, 11.13it/s][A
  9%|███                               | 10904/122310 [22:20<3:17:52,  9.38it/s][A
  9%|███                               | 10922/122310 [22:21<2:18:14, 13.43it/s][A
  9%|███                               | 10926/122310 [22:21<2:50:21, 10.90it/s][A
  9%|███                               | 10929/122310 [22:22<3:30:00,  8.84it/s][A
  9%|███                               | 10932/122310 [22:23<4:12:59,  7.34

step: 31060, loss: 70.94849147450049, epoch: 2



  9%|███                               | 11019/122310 [22:30<3:04:32, 10.05it/s][A
  9%|███                               | 11027/122310 [22:31<3:02:26, 10.17it/s][A
  9%|███                               | 11037/122310 [22:31<2:48:19, 11.02it/s][A
  9%|███                               | 11043/122310 [22:32<3:04:20, 10.06it/s][A
  9%|███                               | 11047/122310 [22:33<3:36:09,  8.58it/s][A
  9%|███                               | 11055/122310 [22:34<3:24:21,  9.07it/s][A
  9%|███                               | 11057/122310 [22:34<4:18:59,  7.16it/s][A
  9%|███                               | 11065/122310 [22:35<3:48:00,  8.13it/s][A
  9%|███                               | 11070/122310 [22:36<4:02:31,  7.64it/s][A
  9%|███                               | 11076/122310 [22:37<4:01:37,  7.67it/s][A
  9%|███                               | 11091/122310 [22:38<2:45:25, 11.21it/s][A
  9%|███                               | 11094/122310 [22:38<3:26:12,  8.99

step: 31080, loss: 77.87190161028894, epoch: 2



  9%|███                               | 11173/122310 [22:45<2:12:52, 13.94it/s][A
  9%|███                               | 11175/122310 [22:46<2:55:52, 10.53it/s][A
  9%|███                               | 11185/122310 [22:47<2:44:54, 11.23it/s][A
  9%|███                               | 11195/122310 [22:48<2:37:03, 11.79it/s][A
  9%|███                               | 11212/122310 [22:48<2:04:39, 14.85it/s][A
  9%|███                               | 11215/122310 [22:49<2:40:46, 11.52it/s][A
  9%|███                               | 11225/122310 [22:50<2:34:38, 11.97it/s][A
  9%|███                               | 11229/122310 [22:51<3:05:58,  9.96it/s][A
  9%|███                               | 11239/122310 [22:51<2:50:28, 10.86it/s][A
  9%|███▏                              | 11256/122310 [22:52<2:10:14, 14.21it/s][A
  9%|███▏                              | 11267/122310 [22:53<2:09:57, 14.24it/s][A
  9%|███▏                              | 11277/122310 [22:54<2:14:00, 13.81

step: 31100, loss: 75.03255603633508, epoch: 2



  9%|███▏                              | 11343/122310 [23:01<3:36:42,  8.53it/s][A
  9%|███▏                              | 11358/122310 [23:02<2:38:09, 11.69it/s][A
  9%|███▏                              | 11367/122310 [23:03<3:25:04,  9.02it/s][A
  9%|███▏                              | 11372/122310 [23:04<3:40:22,  8.39it/s][A
  9%|███▏                              | 11375/122310 [23:05<4:15:31,  7.24it/s][A
  9%|███▏                              | 11386/122310 [23:05<3:22:40,  9.12it/s][A
  9%|███▏                              | 11393/122310 [23:06<3:22:20,  9.14it/s][A
  9%|███▏                              | 11407/122310 [23:07<2:38:08, 11.69it/s][A
  9%|███▏                              | 11410/122310 [23:08<3:17:01,  9.38it/s][A
  9%|███▏                              | 11411/122310 [23:09<4:21:04,  7.08it/s][A
  9%|███▏                              | 11425/122310 [23:09<2:58:42, 10.34it/s][A
  9%|███▏                              | 11429/122310 [23:10<3:29:34,  8.82

step: 31120, loss: 98.61054082081642, epoch: 2



  9%|███▏                              | 11486/122310 [23:16<2:48:24, 10.97it/s][A
  9%|███▏                              | 11499/122310 [23:17<2:25:07, 12.73it/s][A
  9%|███▏                              | 11512/122310 [23:18<2:11:53, 14.00it/s][A
  9%|███▏                              | 11517/122310 [23:19<2:37:21, 11.73it/s][A
  9%|███▏                              | 11526/122310 [23:19<2:37:19, 11.74it/s][A
  9%|███▏                              | 11538/122310 [23:20<2:24:09, 12.81it/s][A
  9%|███▏                              | 11550/122310 [23:21<2:15:41, 13.60it/s][A
  9%|███▏                              | 11562/122310 [23:22<2:10:44, 14.12it/s][A
  9%|███▏                              | 11575/122310 [23:22<2:03:51, 14.90it/s][A
  9%|███▏                              | 11579/122310 [23:23<2:33:27, 12.03it/s][A
  9%|███▏                              | 11596/122310 [23:24<2:02:32, 15.06it/s][A
  9%|███▏                              | 11610/122310 [23:25<1:55:41, 15.95

step: 31140, loss: 68.1964789777371, epoch: 2



 10%|███▏                              | 11678/122310 [23:32<2:41:32, 11.41it/s][A
 10%|███▏                              | 11683/122310 [23:32<3:03:47, 10.03it/s][A
 10%|███▏                              | 11691/122310 [23:33<3:01:30, 10.16it/s][A
 10%|███▎                              | 11704/122310 [23:34<2:31:42, 12.15it/s][A
 10%|███▎                              | 11715/122310 [23:35<2:24:08, 12.79it/s][A
 10%|███▎                              | 11720/122310 [23:36<2:49:51, 10.85it/s][A
 10%|███▎                              | 11729/122310 [23:36<2:45:38, 11.13it/s][A
 10%|███▎                              | 11738/122310 [23:37<2:42:45, 11.32it/s][A
 10%|███▎                              | 11756/122310 [23:38<2:03:23, 14.93it/s][A
 10%|███▎                              | 11762/122310 [23:39<2:24:06, 12.78it/s][A
 10%|███▎                              | 11766/122310 [23:39<2:54:53, 10.53it/s][A
 10%|███▎                              | 11772/122310 [23:40<3:09:59,  9.70

step: 31160, loss: 82.24485881901045, epoch: 2



 10%|███▎                              | 11818/122310 [23:47<4:28:00,  6.87it/s][A
 10%|███▎                              | 11830/122310 [23:48<3:10:03,  9.69it/s][A
 10%|███▎                              | 11835/122310 [23:48<3:28:52,  8.82it/s][A
 10%|███▎                              | 11837/122310 [23:49<4:23:50,  6.98it/s][A
 10%|███▎                              | 11845/122310 [23:50<3:46:01,  8.15it/s][A
 10%|███▎                              | 11856/122310 [23:51<3:00:59, 10.17it/s][A
 10%|███▎                              | 11861/122310 [23:51<3:21:42,  9.13it/s][A
 10%|███▎                              | 11869/122310 [23:52<3:11:02,  9.64it/s][A
 10%|███▎                              | 11882/122310 [23:53<2:33:23, 12.00it/s][A
 10%|███▎                              | 11888/122310 [23:54<2:51:05, 10.76it/s][A
 10%|███▎                              | 11893/122310 [23:54<3:14:23,  9.47it/s][A
 10%|███▎                              | 11898/122310 [23:55<3:34:28,  8.58

step: 31180, loss: 77.61243348616613, epoch: 2



 10%|███▎                              | 11967/122310 [24:02<3:05:51,  9.90it/s][A
 10%|███▎                              | 11976/122310 [24:03<2:55:41, 10.47it/s][A
 10%|███▎                              | 11989/122310 [24:04<2:27:54, 12.43it/s][A
 10%|███▎                              | 11995/122310 [24:04<2:46:50, 11.02it/s][A
 10%|███▎                              | 12002/122310 [24:05<2:55:27, 10.48it/s][A
 10%|███▎                              | 12011/122310 [24:06<2:49:56, 10.82it/s][A
 10%|███▎                              | 12014/122310 [24:07<3:29:57,  8.76it/s][A
 10%|███▎                              | 12025/122310 [24:07<2:55:42, 10.46it/s][A
 10%|███▎                              | 12036/122310 [24:08<2:37:46, 11.65it/s][A
 10%|███▎                              | 12039/122310 [24:09<3:16:37,  9.35it/s][A
 10%|███▎                              | 12050/122310 [24:10<2:48:06, 10.93it/s][A
 10%|███▎                              | 12061/122310 [24:10<2:31:34, 12.12

step: 31200, loss: 71.76234635637702, epoch: 2
sim1 and sim2 are 0.49483858842632245, 0.17942031042776754
cosine of pred and queen: 0.15274017057799744
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mah


 10%|███▎                              | 12128/122310 [24:32<2:04:56, 14.70it/s][A

Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: july
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: deceitfull
Actual: poland:polish::italy:italian, pred: start
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: efforts
Actual: denmark:danish::germany:german, pred: europe
Accuracy is 0.3333333333333333
Actual: walk:walks::vanish:vanishes, pred: chihiro
Actual: work:works::generate:generates, pred: pleaseth
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: placed
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: challenge
Actual: scre


 10%|███▎                             | 12136/122310 [25:32<70:54:17,  2.32s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 10%|███▎                             | 12146/122310 [25:33<50:30:48,  1.65s/it][A
 10%|███▎                             | 12158/122310 [25:34<34:06:12,  1.11s/it][A
 10%|███▎                             | 12165/122310 [25:35<27:18:33,  1.12it/s][A
 10%|███▎                             | 12183/122310 [25:35<15:40:29,  1.95it/s][A
 10%|███▎                             | 12189/122310 [25:36<13:34:18,  2.25it/s][A
 10%|███▎                             | 12196/122310 [25:37<11:13:37,  2.72it/s][A
 10%|███▍                              | 12204/122310 [25:37<8:56:54,  3.42it/s][A
 10%|███▍                              | 12214/122310 [25:38<6:45:42,  4.52it/s][A
 10%|███▍                              | 12224/122310 [25:39<5:19:30,  5.74it/s][A
 10%|███▍                              | 12232/122310 [25:40<4:39:35,  6.56it/s][A
 10%|███▍                              | 12237/122310 [25:40<4:37:32,  6.61it/s][A
 10%|███▍                              | 12257/122310 [25:41<2:47:59, 10.92

step: 31220, loss: 89.67980949135239, epoch: 2



 10%|███▍                              | 12353/122310 [25:48<1:58:54, 15.41it/s][A
 10%|███▍                              | 12360/122310 [25:48<2:13:58, 13.68it/s][A
 10%|███▍                              | 12368/122310 [25:49<2:22:06, 12.89it/s][A
 10%|███▍                              | 12377/122310 [25:50<2:24:04, 12.72it/s][A
 10%|███▍                              | 12383/122310 [25:51<2:40:36, 11.41it/s][A
 10%|███▍                              | 12387/122310 [25:54<6:56:52,  4.39it/s][A
 10%|███▍                              | 12395/122310 [25:55<5:36:09,  5.45it/s][A
 10%|███▍                              | 12406/122310 [25:56<4:13:12,  7.23it/s][A
 10%|███▍                              | 12411/122310 [25:56<4:16:01,  7.15it/s][A
 10%|███▍                              | 12423/122310 [25:57<3:16:50,  9.30it/s][A
 10%|███▍                              | 12441/122310 [25:58<2:20:23, 13.04it/s][A
 10%|███▍                              | 12448/122310 [25:59<2:31:33, 12.08

step: 31240, loss: 78.62030820093382, epoch: 2



 10%|███▍                              | 12480/122310 [26:02<3:19:54,  9.16it/s][A
 10%|███▍                              | 12496/122310 [26:03<2:27:11, 12.43it/s][A
 10%|███▍                              | 12512/122310 [26:04<2:02:47, 14.90it/s][A
 10%|███▍                              | 12518/122310 [26:05<2:20:13, 13.05it/s][A
 10%|███▍                              | 12522/122310 [26:05<2:47:47, 10.91it/s][A
 10%|███▍                              | 12535/122310 [26:06<2:22:46, 12.81it/s][A
 10%|███▍                              | 12544/122310 [26:07<2:24:30, 12.66it/s][A
 10%|███▍                              | 12551/122310 [26:08<3:22:10,  9.05it/s][A
 10%|███▍                              | 12556/122310 [26:09<3:34:21,  8.53it/s][A
 10%|███▍                              | 12568/122310 [26:10<2:53:38, 10.53it/s][A
 10%|███▍                              | 12578/122310 [26:10<2:41:39, 11.31it/s][A
 10%|███▌                              | 12591/122310 [26:11<2:20:32, 13.01

step: 31260, loss: 72.21419989574947, epoch: 2



 10%|███▌                              | 12659/122310 [26:17<2:05:41, 14.54it/s][A
 10%|███▌                              | 12666/122310 [26:18<2:19:51, 13.07it/s][A
 10%|███▌                              | 12676/122310 [26:19<2:18:05, 13.23it/s][A
 10%|███▌                              | 12683/122310 [26:19<2:30:38, 12.13it/s][A
 10%|███▌                              | 12691/122310 [26:20<2:35:31, 11.75it/s][A
 10%|███▌                              | 12703/122310 [26:21<2:19:37, 13.08it/s][A
 10%|███▌                              | 12710/122310 [26:21<2:31:36, 12.05it/s][A
 10%|███▌                              | 12717/122310 [26:22<2:41:37, 11.30it/s][A
 10%|███▌                              | 12723/122310 [26:23<2:57:20, 10.30it/s][A
 10%|███▌                              | 12729/122310 [26:24<3:09:07,  9.66it/s][A
 10%|███▌                              | 12733/122310 [26:24<3:37:21,  8.40it/s][A
 10%|███▌                              | 12740/122310 [26:25<3:29:41,  8.71

step: 31280, loss: 69.5981681407256, epoch: 2



 10%|███▌                              | 12825/122310 [26:32<3:06:04,  9.81it/s][A
 10%|███▌                              | 12827/122310 [26:33<3:55:55,  7.73it/s][A
 10%|███▌                              | 12835/122310 [26:33<3:30:56,  8.65it/s][A
 10%|███▌                              | 12841/122310 [26:34<3:33:51,  8.53it/s][A
 11%|███▌                              | 12848/122310 [26:35<3:26:52,  8.82it/s][A
 11%|███▌                              | 12858/122310 [26:35<2:58:09, 10.24it/s][A
 11%|███▌                              | 12866/122310 [26:36<2:54:24, 10.46it/s][A
 11%|███▌                              | 12880/122310 [26:37<2:20:45, 12.96it/s][A
 11%|███▌                              | 12890/122310 [26:38<2:19:00, 13.12it/s][A
 11%|███▌                              | 12899/122310 [26:38<2:22:18, 12.81it/s][A
 11%|███▌                              | 12907/122310 [26:39<2:29:24, 12.20it/s][A
 11%|███▌                              | 12916/122310 [26:40<2:29:51, 12.17

step: 31300, loss: 73.71734869403186, epoch: 2



 11%|███▌                              | 12985/122310 [26:47<3:26:41,  8.82it/s][A
 11%|███▌                              | 12994/122310 [26:48<3:08:40,  9.66it/s][A
 11%|███▌                              | 13007/122310 [26:49<2:35:38, 11.70it/s][A
 11%|███▌                              | 13017/122310 [26:49<2:29:12, 12.21it/s][A
 11%|███▌                              | 13021/122310 [26:50<2:56:22, 10.33it/s][A
 11%|███▌                              | 13026/122310 [26:51<3:15:39,  9.31it/s][A
 11%|███▌                              | 13034/122310 [26:52<3:07:26,  9.72it/s][A
 11%|███▌                              | 13038/122310 [26:52<3:34:07,  8.51it/s][A
 11%|███▋                              | 13048/122310 [26:53<3:02:31,  9.98it/s][A
 11%|███▋                              | 13053/122310 [26:54<3:21:15,  9.05it/s][A
 11%|███▋                              | 13057/122310 [26:55<3:48:39,  7.96it/s][A
 11%|███▋                              | 13059/122310 [26:57<7:32:29,  4.02

step: 31320, loss: 91.69256629283737, epoch: 2



 11%|███▋                              | 13111/122310 [27:01<2:59:54, 10.12it/s][A
 11%|███▋                              | 13113/122310 [27:02<3:46:23,  8.04it/s][A
 11%|███▋                              | 13119/122310 [27:03<3:44:43,  8.10it/s][A
 11%|███▋                              | 13126/122310 [27:03<3:33:40,  8.52it/s][A
 11%|███▋                              | 13139/122310 [27:04<2:42:53, 11.17it/s][A
 11%|███▋                              | 13148/122310 [27:05<2:38:17, 11.49it/s][A
 11%|███▋                              | 13154/122310 [27:08<5:26:47,  5.57it/s][A
 11%|███▋                              | 13163/122310 [27:09<4:29:50,  6.74it/s][A
 11%|███▋                              | 13165/122310 [27:09<5:07:04,  5.92it/s][A
 11%|███▋                              | 13171/122310 [27:10<4:42:53,  6.43it/s][A
 11%|███▋                              | 13183/122310 [27:11<3:25:41,  8.84it/s][A
 11%|███▋                              | 13188/122310 [27:12<3:39:27,  8.29

step: 31340, loss: 74.8212698503838, epoch: 2



 11%|███▋                              | 13237/122310 [27:16<3:08:21,  9.65it/s][A
 11%|███▋                              | 13247/122310 [27:17<2:48:01, 10.82it/s][A
 11%|███▋                              | 13256/122310 [27:18<2:41:40, 11.24it/s][A
 11%|███▋                              | 13265/122310 [27:18<2:37:22, 11.55it/s][A
 11%|███▋                              | 13275/122310 [27:19<2:29:34, 12.15it/s][A
 11%|███▋                              | 13284/122310 [27:20<2:29:34, 12.15it/s][A
 11%|███▋                              | 13291/122310 [27:20<2:39:34, 11.39it/s][A
 11%|███▋                              | 13299/122310 [27:21<2:41:44, 11.23it/s][A
 11%|███▋                              | 13318/122310 [27:22<1:56:48, 15.55it/s][A
 11%|███▋                              | 13332/122310 [27:23<1:49:25, 16.60it/s][A
 11%|███▋                              | 13342/122310 [27:23<1:55:55, 15.67it/s][A
 11%|███▋                              | 13354/122310 [27:24<1:54:53, 15.81

step: 31360, loss: 86.99769616433674, epoch: 2



 11%|███▋                              | 13418/122310 [27:31<3:22:01,  8.98it/s][A
 11%|███▋                              | 13419/122310 [27:32<4:26:58,  6.80it/s][A
 11%|███▋                              | 13428/122310 [27:32<3:36:55,  8.37it/s][A
 11%|███▋                              | 13444/122310 [27:34<3:10:35,  9.52it/s][A
 11%|███▋                              | 13462/122310 [27:35<2:17:39, 13.18it/s][A
 11%|███▋                              | 13469/122310 [27:35<2:28:25, 12.22it/s][A
 11%|███▋                              | 13474/122310 [27:36<2:48:45, 10.75it/s][A
 11%|███▋                              | 13481/122310 [27:37<2:55:01, 10.36it/s][A
 11%|███▋                              | 13490/122310 [27:37<2:46:29, 10.89it/s][A
 11%|███▊                              | 13500/122310 [27:38<2:35:29, 11.66it/s][A
 11%|███▊                              | 13507/122310 [27:39<2:44:59, 10.99it/s][A
 11%|███▊                              | 13510/122310 [27:40<3:22:13,  8.97

step: 31380, loss: 77.38900267367379, epoch: 2



 11%|███▊                              | 13567/122310 [27:46<3:19:46,  9.07it/s][A
 11%|███▊                              | 13573/122310 [27:46<3:25:25,  8.82it/s][A
 11%|███▊                              | 13583/122310 [27:47<2:57:14, 10.22it/s][A
 11%|███▊                              | 13592/122310 [27:48<2:47:02, 10.85it/s][A
 11%|███▊                              | 13605/122310 [27:49<2:20:48, 12.87it/s][A
 11%|███▊                              | 13614/122310 [27:49<2:23:06, 12.66it/s][A
 11%|███▊                              | 13625/122310 [27:50<2:15:49, 13.34it/s][A
 11%|███▊                              | 13636/122310 [27:51<2:11:24, 13.78it/s][A
 11%|███▊                              | 13642/122310 [27:51<2:29:36, 12.11it/s][A
 11%|███▊                              | 13647/122310 [27:52<2:52:21, 10.51it/s][A
 11%|███▊                              | 13655/122310 [27:53<2:51:10, 10.58it/s][A
 11%|███▊                              | 13660/122310 [27:54<3:13:12,  9.37

step: 31400, loss: 81.9778514106411, epoch: 2



 11%|███▊                              | 13716/122310 [28:00<3:45:41,  8.02it/s][A
 11%|███▊                              | 13719/122310 [28:01<4:25:06,  6.83it/s][A
 11%|███▊                              | 13727/122310 [28:02<3:45:13,  8.04it/s][A
 11%|███▊                              | 13741/122310 [28:03<2:40:22, 11.28it/s][A
 11%|███▊                              | 13751/122310 [28:03<2:31:17, 11.96it/s][A
 11%|███▊                              | 13758/122310 [28:04<2:41:26, 11.21it/s][A
 11%|███▊                              | 13766/122310 [28:05<2:42:51, 11.11it/s][A
 11%|███▊                              | 13778/122310 [28:06<2:23:04, 12.64it/s][A
 11%|███▊                              | 13791/122310 [28:06<2:07:58, 14.13it/s][A
 11%|███▊                              | 13804/122310 [28:07<1:59:21, 15.15it/s][A
 11%|███▊                              | 13813/122310 [28:08<2:06:48, 14.26it/s][A
 11%|███▊                              | 13819/122310 [28:09<2:25:30, 12.43

step: 31420, loss: 90.7164285748163, epoch: 2



 11%|███▊                              | 13895/122310 [28:15<2:49:54, 10.63it/s][A
 11%|███▊                              | 13904/122310 [28:16<2:42:45, 11.10it/s][A
 11%|███▊                              | 13915/122310 [28:17<2:28:07, 12.20it/s][A
 11%|███▊                              | 13920/122310 [28:17<2:50:21, 10.60it/s][A
 11%|███▊                              | 13925/122310 [28:18<3:10:22,  9.49it/s][A
 11%|███▊                              | 13928/122310 [28:19<3:51:11,  7.81it/s][A
 11%|███▉                              | 13940/122310 [28:20<2:54:54, 10.33it/s][A
 11%|███▉                              | 13948/122310 [28:20<2:53:08, 10.43it/s][A
 11%|███▉                              | 13957/122310 [28:21<2:44:41, 10.97it/s][A
 11%|███▉                              | 13967/122310 [28:22<2:34:31, 11.69it/s][A
 11%|███▉                              | 13973/122310 [28:23<2:49:47, 10.63it/s][A
 11%|███▉                              | 13981/122310 [28:23<2:49:25, 10.66

step: 31440, loss: 86.4687572794798, epoch: 2



 11%|███▉                              | 14041/122310 [28:30<3:41:03,  8.16it/s][A
 11%|███▉                              | 14047/122310 [28:31<3:39:42,  8.21it/s][A
 11%|███▉                              | 14052/122310 [28:31<3:50:52,  7.82it/s][A
 11%|███▉                              | 14062/122310 [28:32<3:07:30,  9.62it/s][A
 12%|███▉                              | 14072/122310 [28:33<2:45:36, 10.89it/s][A
 12%|███▉                              | 14085/122310 [28:34<2:18:46, 13.00it/s][A
 12%|███▉                              | 14093/122310 [28:34<2:25:03, 12.43it/s][A
 12%|███▉                              | 14103/122310 [28:35<2:20:35, 12.83it/s][A
 12%|███▉                              | 14111/122310 [28:36<2:26:24, 12.32it/s][A
 12%|███▉                              | 14120/122310 [28:37<2:25:59, 12.35it/s][A
 12%|███▉                              | 14130/122310 [28:37<2:21:15, 12.76it/s][A
 12%|███▉                              | 14135/122310 [28:38<2:44:14, 10.98

step: 31460, loss: 129.5702169316609, epoch: 2



 12%|███▉                              | 14199/122310 [28:44<2:48:10, 10.71it/s][A
 12%|███▉                              | 14203/122310 [28:45<3:16:12,  9.18it/s][A
 12%|███▉                              | 14213/122310 [28:46<2:50:11, 10.59it/s][A
 12%|███▉                              | 14219/122310 [28:47<3:01:21,  9.93it/s][A
 12%|███▉                              | 14225/122310 [28:47<3:14:24,  9.27it/s][A
 12%|███▉                              | 14230/122310 [28:48<3:33:06,  8.45it/s][A
 12%|███▉                              | 14234/122310 [28:49<4:01:02,  7.47it/s][A
 12%|███▉                              | 14240/122310 [28:50<3:55:57,  7.63it/s][A
 12%|███▉                              | 14247/122310 [28:50<3:44:24,  8.03it/s][A
 12%|███▉                              | 14259/122310 [28:51<2:54:08, 10.34it/s][A
 12%|███▉                              | 14267/122310 [28:52<2:54:33, 10.32it/s][A
 12%|███▉                              | 14276/122310 [28:53<2:46:37, 10.81

step: 31480, loss: 90.39341568249009, epoch: 2



 12%|███▉                              | 14346/122310 [29:00<2:59:17, 10.04it/s][A
 12%|███▉                              | 14352/122310 [29:01<3:11:46,  9.38it/s][A
 12%|███▉                              | 14361/122310 [29:01<2:57:39, 10.13it/s][A
 12%|███▉                              | 14378/122310 [29:02<2:10:18, 13.80it/s][A
 12%|███▉                              | 14384/122310 [29:03<2:29:44, 12.01it/s][A
 12%|████                              | 14390/122310 [29:04<2:46:03, 10.83it/s][A
 12%|████                              | 14405/122310 [29:04<2:13:17, 13.49it/s][A
 12%|████                              | 14413/122310 [29:05<2:22:47, 12.59it/s][A
 12%|████                              | 14426/122310 [29:06<2:08:46, 13.96it/s][A
 12%|████                              | 14436/122310 [29:07<2:11:07, 13.71it/s][A
 12%|████                              | 14452/122310 [29:07<1:52:47, 15.94it/s][A
 12%|████                              | 14464/122310 [29:08<1:53:02, 15.90

step: 31500, loss: 78.27491691846608, epoch: 2



 12%|████                              | 14551/122310 [29:15<2:10:37, 13.75it/s][A
 12%|████                              | 14561/122310 [29:16<2:12:10, 13.59it/s][A
 12%|████                              | 14571/122310 [29:16<2:13:04, 13.49it/s][A
 12%|████                              | 14585/122310 [29:17<1:59:47, 14.99it/s][A
 12%|████                              | 14587/122310 [29:18<2:38:55, 11.30it/s][A
 12%|████                              | 14592/122310 [29:19<3:01:04,  9.91it/s][A
 12%|████                              | 14601/122310 [29:19<2:50:34, 10.52it/s][A
 12%|████                              | 14611/122310 [29:20<2:38:33, 11.32it/s][A
 12%|████                              | 14622/122310 [29:21<2:26:10, 12.28it/s][A
 12%|████                              | 14626/122310 [29:22<2:56:45, 10.15it/s][A
 12%|████                              | 14631/122310 [29:22<3:16:44,  9.12it/s][A
 12%|████                              | 14636/122310 [29:23<3:36:26,  8.29

step: 31520, loss: 72.17514553488499, epoch: 2



 12%|████                              | 14720/122310 [29:30<2:07:27, 14.07it/s][A
 12%|████                              | 14728/122310 [29:31<2:17:13, 13.07it/s][A
 12%|████                              | 14731/122310 [29:32<2:54:13, 10.29it/s][A
 12%|████                              | 14741/122310 [29:32<2:39:33, 11.24it/s][A
 12%|████                              | 14754/122310 [29:33<2:17:34, 13.03it/s][A
 12%|████                              | 14762/122310 [29:34<2:26:05, 12.27it/s][A
 12%|████                              | 14773/122310 [29:35<2:18:56, 12.90it/s][A
 12%|████                              | 14780/122310 [29:35<2:30:31, 11.91it/s][A
 12%|████                              | 14791/122310 [29:36<2:21:05, 12.70it/s][A
 12%|████                              | 14794/122310 [29:37<2:58:13, 10.05it/s][A
 12%|████                              | 14805/122310 [29:38<2:36:55, 11.42it/s][A
 12%|████                              | 14818/122310 [29:38<2:16:25, 13.13

step: 31540, loss: 67.52755388034457, epoch: 2



 12%|████▏                             | 14877/122310 [29:45<3:07:44,  9.54it/s][A
 12%|████▏                             | 14887/122310 [29:46<2:47:53, 10.66it/s][A
 12%|████▏                             | 14894/122310 [29:47<2:55:15, 10.21it/s][A
 12%|████▏                             | 14901/122310 [29:47<3:00:48,  9.90it/s][A
 12%|████▏                             | 14908/122310 [29:48<3:05:17,  9.66it/s][A
 12%|████▏                             | 14922/122310 [29:49<2:26:14, 12.24it/s][A
 12%|████▏                             | 14925/122310 [29:50<3:02:52,  9.79it/s][A
 12%|████▏                             | 14940/122310 [29:51<2:20:00, 12.78it/s][A
 12%|████▏                             | 14949/122310 [29:51<2:22:53, 12.52it/s][A
 12%|████▏                             | 14958/122310 [29:52<2:24:39, 12.37it/s][A
 12%|████▏                             | 14966/122310 [29:53<2:31:03, 11.84it/s][A
 12%|████▏                             | 14975/122310 [29:54<2:31:16, 11.83

step: 31560, loss: 79.83568670076522, epoch: 2



 12%|████▏                             | 15045/122310 [30:00<3:29:27,  8.54it/s][A
 12%|████▏                             | 15061/122310 [30:01<2:24:53, 12.34it/s][A
 12%|████▏                             | 15072/122310 [30:02<2:17:22, 13.01it/s][A
 12%|████▏                             | 15080/122310 [30:03<2:25:22, 12.29it/s][A
 12%|████▏                             | 15089/122310 [30:03<2:27:17, 12.13it/s][A
 12%|████▏                             | 15099/122310 [30:04<2:22:48, 12.51it/s][A
 12%|████▏                             | 15109/122310 [30:05<2:20:16, 12.74it/s][A
 12%|████▏                             | 15117/122310 [30:06<2:27:47, 12.09it/s][A
 12%|████▏                             | 15126/122310 [30:06<2:28:15, 12.05it/s][A
 12%|████▏                             | 15139/122310 [30:07<2:11:23, 13.59it/s][A
 12%|████▏                             | 15147/122310 [30:08<2:20:22, 12.72it/s][A
 12%|████▏                             | 15155/122310 [30:09<2:29:12, 11.97

step: 31580, loss: 85.02958953190421, epoch: 2



 12%|████▏                             | 15248/122310 [30:16<2:08:18, 13.91it/s][A
 12%|████▏                             | 15255/122310 [30:16<2:21:57, 12.57it/s][A
 12%|████▏                             | 15258/122310 [30:17<2:58:10, 10.01it/s][A
 12%|████▏                             | 15270/122310 [30:18<2:31:24, 11.78it/s][A
 12%|████▏                             | 15286/122310 [30:19<2:02:15, 14.59it/s][A
 13%|████▎                             | 15292/122310 [30:19<2:21:50, 12.57it/s][A
 13%|████▎                             | 15299/122310 [30:20<2:33:46, 11.60it/s][A
 13%|████▎                             | 15302/122310 [30:21<3:11:20,  9.32it/s][A
 13%|████▎                             | 15310/122310 [30:22<3:03:54,  9.70it/s][A
 13%|████▎                             | 15317/122310 [30:22<3:06:22,  9.57it/s][A
 13%|████▎                             | 15327/122310 [30:23<2:46:47, 10.69it/s][A
 13%|████▎                             | 15331/122310 [30:24<3:16:32,  9.07

step: 31600, loss: 77.96635954785015, epoch: 2
sim1 and sim2 are 0.46974772538124937, 0.2017070625911306
cosine of pred and queen: 0.20311701187016004
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: second
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: euro
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tickling
Actual: maharast

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: barricaded
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: neselius
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: indian
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 13%|████▏                            | 15386/122310 [31:47<97:21:39,  3.28s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.11242603550295859



 13%|████▏                            | 15394/122310 [31:48<67:14:55,  2.26s/it][A
 13%|████▏                            | 15402/122310 [31:49<47:06:18,  1.59s/it][A
 13%|████▏                            | 15407/122310 [31:49<37:46:47,  1.27s/it][A
 13%|████▏                            | 15415/122310 [31:50<26:07:20,  1.14it/s][A
 13%|████▏                            | 15429/122310 [31:51<14:58:48,  1.98it/s][A
 13%|████▏                            | 15437/122310 [31:52<11:40:45,  2.54it/s][A
 13%|████▎                             | 15444/122310 [31:52<9:31:21,  3.12it/s][A
 13%|████▎                             | 15448/122310 [31:53<8:51:27,  3.35it/s][A
 13%|████▎                             | 15455/122310 [31:54<7:10:20,  4.14it/s][A
 13%|████▎                             | 15469/122310 [31:55<4:35:31,  6.46it/s][A
 13%|████▎                             | 15483/122310 [31:55<3:23:51,  8.73it/s][A
 13%|████▎                             | 15491/122310 [31:56<3:14:43,  9.14

step: 31620, loss: 79.11819708994972, epoch: 2



 13%|████▎                             | 15562/122310 [32:03<2:37:38, 11.29it/s][A
 13%|████▎                             | 15586/122310 [32:04<1:46:36, 16.69it/s][A
 13%|████▎                             | 15597/122310 [32:04<1:50:00, 16.17it/s][A
 13%|████▎                             | 15612/122310 [32:05<1:42:56, 17.27it/s][A
 13%|████▎                             | 15624/122310 [32:06<1:45:05, 16.92it/s][A
 13%|████▎                             | 15633/122310 [32:07<1:55:04, 15.45it/s][A
 13%|████▎                             | 15636/122310 [32:07<2:26:01, 12.18it/s][A
 13%|████▎                             | 15641/122310 [32:08<2:49:28, 10.49it/s][A
 13%|████▎                             | 15650/122310 [32:09<2:42:38, 10.93it/s][A
 13%|████▎                             | 15657/122310 [32:10<2:49:49, 10.47it/s][A
 13%|████▎                             | 15662/122310 [32:10<3:09:48,  9.36it/s][A
 13%|████▎                             | 15668/122310 [32:11<3:19:04,  8.93

step: 31640, loss: 74.39744301850307, epoch: 2



 13%|████▎                             | 15735/122310 [32:18<2:40:34, 11.06it/s][A
 13%|████▍                             | 15743/122310 [32:19<2:41:48, 10.98it/s][A
 13%|████▍                             | 15752/122310 [32:19<2:37:21, 11.29it/s][A
 13%|████▍                             | 15763/122310 [32:20<2:24:43, 12.27it/s][A
 13%|████▍                             | 15768/122310 [32:21<2:47:01, 10.63it/s][A
 13%|████▍                             | 15783/122310 [32:22<2:12:24, 13.41it/s][A
 13%|████▍                             | 15796/122310 [32:22<2:01:12, 14.65it/s][A
 13%|████▍                             | 15807/122310 [32:23<2:01:21, 14.63it/s][A
 13%|████▍                             | 15814/122310 [32:24<2:16:08, 13.04it/s][A
 13%|████▍                             | 15821/122310 [32:25<2:28:35, 11.94it/s][A
 13%|████▍                             | 15833/122310 [32:25<2:14:48, 13.16it/s][A
 13%|████▍                             | 15838/122310 [32:26<2:38:18, 11.21

step: 31660, loss: 93.64232583661163, epoch: 2



 13%|████▍                             | 15893/122310 [32:33<3:45:04,  7.88it/s][A
 13%|████▍                             | 15901/122310 [32:34<3:24:40,  8.66it/s][A
 13%|████▍                             | 15909/122310 [32:34<3:11:56,  9.24it/s][A
 13%|████▍                             | 15928/122310 [32:35<2:08:30, 13.80it/s][A
 13%|████▍                             | 15935/122310 [32:36<2:21:17, 12.55it/s][A
 13%|████▍                             | 15946/122310 [32:37<2:15:06, 13.12it/s][A
 13%|████▍                             | 15950/122310 [32:37<2:43:40, 10.83it/s][A
 13%|████▍                             | 15954/122310 [32:38<3:12:47,  9.19it/s][A
 13%|████▍                             | 15960/122310 [32:39<3:21:10,  8.81it/s][A
 13%|████▍                             | 15964/122310 [32:40<3:47:59,  7.77it/s][A
 13%|████▍                             | 15981/122310 [32:40<2:25:17, 12.20it/s][A
 13%|████▍                             | 15985/122310 [32:41<2:55:48, 10.08

step: 31680, loss: 125.68627392581453, epoch: 2



 13%|████▍                             | 16047/122310 [32:48<3:12:43,  9.19it/s][A
 13%|████▍                             | 16060/122310 [32:49<2:37:28, 11.25it/s][A
 13%|████▍                             | 16062/122310 [32:49<3:17:31,  8.97it/s][A
 13%|████▍                             | 16071/122310 [32:50<3:00:36,  9.80it/s][A
 13%|████▍                             | 16076/122310 [32:51<3:18:13,  8.93it/s][A
 13%|████▍                             | 16093/122310 [32:52<2:17:17, 12.89it/s][A
 13%|████▍                             | 16101/122310 [32:52<2:24:31, 12.25it/s][A
 13%|████▍                             | 16107/122310 [32:53<2:40:54, 11.00it/s][A
 13%|████▍                             | 16111/122310 [32:54<3:09:57,  9.32it/s][A
 13%|████▍                             | 16115/122310 [32:55<3:38:12,  8.11it/s][A
 13%|████▍                             | 16116/122310 [32:55<4:49:49,  6.11it/s][A
 13%|████▍                             | 16125/122310 [32:56<3:45:36,  7.84

step: 31700, loss: 82.60751152485314, epoch: 2



 13%|████▌                             | 16191/122310 [33:04<4:05:10,  7.21it/s][A
 13%|████▌                             | 16199/122310 [33:04<3:38:42,  8.09it/s][A
 13%|████▌                             | 16204/122310 [33:05<3:49:18,  7.71it/s][A
 13%|████▌                             | 16215/122310 [33:06<3:04:21,  9.59it/s][A
 13%|████▌                             | 16219/122310 [33:07<4:30:54,  6.53it/s][A
 13%|████▌                             | 16221/122310 [33:08<5:12:59,  5.65it/s][A
 13%|████▌                             | 16226/122310 [33:09<4:59:57,  5.89it/s][A
 13%|████▌                             | 16234/122310 [33:10<4:07:05,  7.15it/s][A
 13%|████▌                             | 16244/122310 [33:10<3:19:45,  8.85it/s][A
 13%|████▌                             | 16251/122310 [33:11<3:17:13,  8.96it/s][A
 13%|████▌                             | 16266/122310 [33:13<3:08:10,  9.39it/s][A
 13%|████▌                             | 16276/122310 [33:13<2:50:16, 10.38

step: 31720, loss: 78.80386598708316, epoch: 2



 13%|████▌                             | 16323/122310 [33:18<2:33:54, 11.48it/s][A
 13%|████▌                             | 16331/122310 [33:19<2:36:26, 11.29it/s][A
 13%|████▌                             | 16343/122310 [33:20<2:21:47, 12.46it/s][A
 13%|████▌                             | 16347/122310 [33:20<2:46:49, 10.59it/s][A
 13%|████▌                             | 16356/122310 [33:21<2:41:11, 10.96it/s][A
 13%|████▌                             | 16363/122310 [33:22<2:48:41, 10.47it/s][A
 13%|████▌                             | 16371/122310 [33:23<2:48:14, 10.49it/s][A
 13%|████▌                             | 16379/122310 [33:23<2:47:50, 10.52it/s][A
 13%|████▌                             | 16388/122310 [33:24<2:42:08, 10.89it/s][A
 13%|████▌                             | 16395/122310 [33:25<2:50:00, 10.38it/s][A
 13%|████▌                             | 16413/122310 [33:26<2:02:44, 14.38it/s][A
 13%|████▌                             | 16423/122310 [33:26<2:05:42, 14.04

step: 31740, loss: 64.95052664846109, epoch: 2



 13%|████▌                             | 16510/122310 [33:33<2:41:33, 10.91it/s][A
 14%|████▌                             | 16514/122310 [33:34<3:09:48,  9.29it/s][A
 14%|████▌                             | 16521/122310 [33:35<3:09:24,  9.31it/s][A
 14%|████▌                             | 16534/122310 [33:35<2:30:57, 11.68it/s][A
 14%|████▌                             | 16539/122310 [33:36<2:53:12, 10.18it/s][A
 14%|████▌                             | 16541/122310 [33:37<3:44:02,  7.87it/s][A
 14%|████▌                             | 16543/122310 [33:38<4:37:36,  6.35it/s][A
 14%|████▌                             | 16546/122310 [33:38<5:12:07,  5.65it/s][A
 14%|████▌                             | 16557/122310 [33:39<3:32:20,  8.30it/s][A
 14%|████▌                             | 16568/122310 [33:40<2:52:52, 10.19it/s][A
 14%|████▌                             | 16582/122310 [33:41<2:18:45, 12.70it/s][A
 14%|████▌                             | 16593/122310 [33:41<2:13:23, 13.21

step: 31760, loss: 77.10927925633678, epoch: 2



 14%|████▋                             | 16669/122310 [33:48<2:43:18, 10.78it/s][A
 14%|████▋                             | 16675/122310 [33:49<2:57:06,  9.94it/s][A
 14%|████▋                             | 16682/122310 [33:51<3:56:37,  7.44it/s][A
 14%|████▋                             | 16691/122310 [33:51<3:26:08,  8.54it/s][A
 14%|████▋                             | 16708/122310 [33:52<2:24:50, 12.15it/s][A
 14%|████▋                             | 16716/122310 [33:53<2:29:52, 11.74it/s][A
 14%|████▋                             | 16730/122310 [33:54<2:08:55, 13.65it/s][A
 14%|████▋                             | 16738/122310 [33:54<2:18:03, 12.75it/s][A
 14%|████▋                             | 16748/122310 [33:55<2:16:41, 12.87it/s][A
 14%|████▋                             | 16755/122310 [33:56<2:28:55, 11.81it/s][A
 14%|████▋                             | 16761/122310 [33:57<2:44:42, 10.68it/s][A
 14%|████▋                             | 16776/122310 [33:57<2:12:33, 13.27

step: 31780, loss: 82.96097387903515, epoch: 2



 14%|████▋                             | 16839/122310 [34:03<2:25:19, 12.10it/s][A
 14%|████▋                             | 16858/122310 [34:04<1:49:24, 16.06it/s][A
 14%|████▋                             | 16865/122310 [34:05<2:05:30, 14.00it/s][A
 14%|████▋                             | 16871/122310 [34:06<2:24:19, 12.18it/s][A
 14%|████▋                             | 16875/122310 [34:06<2:53:50, 10.11it/s][A
 14%|████▋                             | 16887/122310 [34:07<2:27:58, 11.87it/s][A
 14%|████▋                             | 16890/122310 [34:08<3:04:36,  9.52it/s][A
 14%|████▋                             | 16897/122310 [34:09<3:06:44,  9.41it/s][A
 14%|████▋                             | 16902/122310 [34:09<3:24:09,  8.61it/s][A
 14%|████▋                             | 16910/122310 [34:10<3:10:56,  9.20it/s][A
 14%|████▋                             | 16915/122310 [34:11<3:29:42,  8.38it/s][A
 14%|████▋                             | 16923/122310 [34:12<3:13:57,  9.06

step: 31800, loss: 76.31640529167805, epoch: 2



 14%|████▋                             | 17023/122310 [34:19<2:40:43, 10.92it/s][A
 14%|████▋                             | 17030/122310 [34:19<2:47:18, 10.49it/s][A
 14%|████▋                             | 17035/122310 [34:20<3:07:54,  9.34it/s][A
 14%|████▋                             | 17044/122310 [34:21<2:53:09, 10.13it/s][A
 14%|████▋                             | 17050/122310 [34:22<3:05:14,  9.47it/s][A
 14%|████▋                             | 17063/122310 [34:22<2:28:12, 11.84it/s][A
 14%|████▋                             | 17072/122310 [34:23<2:27:39, 11.88it/s][A
 14%|████▋                             | 17082/122310 [34:24<2:22:33, 12.30it/s][A
 14%|████▊                             | 17092/122310 [34:25<2:19:12, 12.60it/s][A
 14%|████▊                             | 17105/122310 [34:25<2:05:44, 13.94it/s][A
 14%|████▊                             | 17123/122310 [34:26<1:44:16, 16.81it/s][A
 14%|████▊                             | 17129/122310 [34:27<2:03:42, 14.17

step: 31820, loss: 79.99641233396578, epoch: 2



 14%|████▊                             | 17198/122310 [34:34<2:40:36, 10.91it/s][A
 14%|████▊                             | 17212/122310 [34:35<2:52:40, 10.14it/s][A
 14%|████▊                             | 17219/122310 [34:36<2:55:31,  9.98it/s][A
 14%|████▊                             | 17232/122310 [34:37<2:27:32, 11.87it/s][A
 14%|████▊                             | 17240/122310 [34:37<2:32:16, 11.50it/s][A
 14%|████▊                             | 17252/122310 [34:38<2:18:00, 12.69it/s][A
 14%|████▊                             | 17268/122310 [34:39<1:55:54, 15.11it/s][A
 14%|████▊                             | 17276/122310 [34:40<2:07:02, 13.78it/s][A
 14%|████▊                             | 17287/122310 [34:40<2:04:50, 14.02it/s][A
 14%|████▊                             | 17294/122310 [34:41<2:19:11, 12.57it/s][A
 14%|████▊                             | 17297/122310 [34:42<2:54:23, 10.04it/s][A
 14%|████▊                             | 17304/122310 [34:43<2:58:03,  9.83

step: 31840, loss: 93.85611648987481, epoch: 2



 14%|████▊                             | 17363/122310 [34:49<2:51:28, 10.20it/s][A
 14%|████▊                             | 17373/122310 [34:50<2:37:25, 11.11it/s][A
 14%|████▊                             | 17380/122310 [34:50<2:45:45, 10.55it/s][A
 14%|████▊                             | 17386/122310 [34:51<2:58:26,  9.80it/s][A
 14%|████▊                             | 17396/122310 [34:52<2:41:40, 10.82it/s][A
 14%|████▊                             | 17404/122310 [34:53<2:42:31, 10.76it/s][A
 14%|████▊                             | 17415/122310 [34:53<2:26:57, 11.90it/s][A
 14%|████▊                             | 17423/122310 [34:54<2:32:06, 11.49it/s][A
 14%|████▊                             | 17427/122310 [34:55<3:01:08,  9.65it/s][A
 14%|████▊                             | 17431/122310 [34:56<3:29:48,  8.33it/s][A
 14%|████▊                             | 17439/122310 [34:56<3:13:54,  9.01it/s][A
 14%|████▊                             | 17443/122310 [34:57<3:42:17,  7.86

step: 31860, loss: 67.6780616389373, epoch: 2



 14%|████▊                             | 17509/122310 [35:04<3:55:31,  7.42it/s][A
 14%|████▊                             | 17518/122310 [35:05<3:19:09,  8.77it/s][A
 14%|████▊                             | 17521/122310 [35:05<3:57:30,  7.35it/s][A
 14%|████▊                             | 17530/122310 [35:06<3:20:06,  8.73it/s][A
 14%|████▉                             | 17538/122310 [35:07<3:08:37,  9.26it/s][A
 14%|████▉                             | 17542/122310 [35:08<3:36:00,  8.08it/s][A
 14%|████▉                             | 17551/122310 [35:09<4:07:37,  7.05it/s][A
 14%|████▉                             | 17554/122310 [35:10<4:35:48,  6.33it/s][A
 14%|████▉                             | 17560/122310 [35:11<4:19:59,  6.71it/s][A
 14%|████▉                             | 17569/122310 [35:12<4:32:58,  6.40it/s][A
 14%|████▉                             | 17580/122310 [35:13<3:29:40,  8.33it/s][A
 14%|████▉                             | 17589/122310 [35:14<3:09:07,  9.23

step: 31880, loss: 78.78106705451377, epoch: 2



 14%|████▉                             | 17628/122310 [35:19<3:25:12,  8.50it/s][A
 14%|████▉                             | 17636/122310 [35:20<3:11:14,  9.12it/s][A
 14%|████▉                             | 17649/122310 [35:21<2:31:29, 11.51it/s][A
 14%|████▉                             | 17656/122310 [35:21<2:40:26, 10.87it/s][A
 14%|████▉                             | 17663/122310 [35:22<2:48:06, 10.37it/s][A
 14%|████▉                             | 17667/122310 [35:23<3:16:42,  8.87it/s][A
 14%|████▉                             | 17686/122310 [35:24<2:07:45, 13.65it/s][A
 14%|████▉                             | 17693/122310 [35:24<2:21:43, 12.30it/s][A
 14%|████▉                             | 17703/122310 [35:25<2:18:39, 12.57it/s][A
 14%|████▉                             | 17710/122310 [35:26<2:30:37, 11.57it/s][A
 14%|████▉                             | 17716/122310 [35:27<2:46:39, 10.46it/s][A
 14%|████▉                             | 17724/122310 [35:27<2:47:01, 10.44

step: 31900, loss: 80.67473921425983, epoch: 2



 15%|████▉                             | 17807/122310 [35:34<2:31:52, 11.47it/s][A
 15%|████▉                             | 17815/122310 [35:35<2:34:10, 11.30it/s][A
 15%|████▉                             | 17823/122310 [35:36<2:35:55, 11.17it/s][A
 15%|████▉                             | 17833/122310 [35:36<2:26:36, 11.88it/s][A
 15%|████▉                             | 17841/122310 [35:37<2:30:38, 11.56it/s][A
 15%|████▉                             | 17846/122310 [35:38<2:52:27, 10.10it/s][A
 15%|████▉                             | 17860/122310 [35:39<2:16:52, 12.72it/s][A
 15%|████▉                             | 17869/122310 [35:39<2:18:36, 12.56it/s][A
 15%|████▉                             | 17881/122310 [35:40<2:07:43, 13.63it/s][A
 15%|████▉                             | 17896/122310 [35:41<1:51:24, 15.62it/s][A
 15%|████▉                             | 17898/122310 [35:42<2:28:38, 11.71it/s][A
 15%|████▉                             | 17904/122310 [35:42<2:43:14, 10.66

step: 31920, loss: 76.79596952741052, epoch: 2



 15%|█████                             | 18009/122310 [35:49<1:40:50, 17.24it/s][A
 15%|█████                             | 18021/122310 [35:50<1:42:35, 16.94it/s][A
 15%|█████                             | 18030/122310 [35:51<1:52:04, 15.51it/s][A
 15%|█████                             | 18037/122310 [35:51<2:06:39, 13.72it/s][A
 15%|█████                             | 18039/122310 [35:52<2:46:48, 10.42it/s][A
 15%|█████                             | 18047/122310 [35:53<2:45:19, 10.51it/s][A
 15%|█████                             | 18049/122310 [35:54<4:35:08,  6.32it/s][A
 15%|█████                             | 18058/122310 [35:55<3:43:21,  7.78it/s][A
 15%|█████                             | 18068/122310 [35:56<3:07:19,  9.27it/s][A
 15%|█████                             | 18077/122310 [35:56<2:53:30, 10.01it/s][A
 15%|█████                             | 18083/122310 [35:57<3:03:07,  9.49it/s][A
 15%|█████                             | 18091/122310 [35:58<2:56:04,  9.87

step: 31940, loss: 83.25452324917264, epoch: 2



 15%|█████                             | 18144/122310 [36:04<3:18:02,  8.77it/s][A
 15%|█████                             | 18157/122310 [36:05<2:35:23, 11.17it/s][A
 15%|█████                             | 18168/122310 [36:05<2:23:45, 12.07it/s][A
 15%|█████                             | 18174/122310 [36:06<2:41:22, 10.75it/s][A
 15%|█████                             | 18191/122310 [36:07<2:03:23, 14.06it/s][A
 15%|█████                             | 18199/122310 [36:08<2:14:19, 12.92it/s][A
 15%|█████                             | 18211/122310 [36:09<2:06:17, 13.74it/s][A
 15%|█████                             | 18225/122310 [36:09<1:54:17, 15.18it/s][A
 15%|█████                             | 18242/122310 [36:10<1:41:05, 17.16it/s][A
 15%|█████                             | 18248/122310 [36:11<2:00:01, 14.45it/s][A
 15%|█████                             | 18258/122310 [36:12<2:04:05, 13.97it/s][A
 15%|█████                             | 18266/122310 [36:12<2:14:29, 12.89

step: 31960, loss: 75.98443202739436, epoch: 2



 15%|█████                             | 18336/122310 [36:19<2:11:29, 13.18it/s][A
 15%|█████                             | 18342/122310 [36:20<2:27:41, 11.73it/s][A
 15%|█████                             | 18351/122310 [36:21<2:26:12, 11.85it/s][A
 15%|█████                             | 18355/122310 [36:21<2:53:35,  9.98it/s][A
 15%|█████                             | 18366/122310 [36:23<3:17:00,  8.79it/s][A
 15%|█████                             | 18378/122310 [36:24<2:43:22, 10.60it/s][A
 15%|█████                             | 18386/122310 [36:24<2:42:48, 10.64it/s][A
 15%|█████                             | 18395/122310 [36:25<2:37:29, 11.00it/s][A
 15%|█████                             | 18404/122310 [36:26<2:33:33, 11.28it/s][A
 15%|█████                             | 18414/122310 [36:27<2:25:34, 11.90it/s][A
 15%|█████                             | 18420/122310 [36:27<2:40:08, 10.81it/s][A
 15%|█████                             | 18430/122310 [36:28<2:29:25, 11.59

step: 31980, loss: 91.63937228673454, epoch: 2



 15%|█████▏                            | 18500/122310 [36:34<2:40:07, 10.80it/s][A
 15%|█████▏                            | 18505/122310 [36:35<2:59:56,  9.61it/s][A
 15%|█████▏                            | 18522/122310 [36:35<2:07:37, 13.55it/s][A
 15%|█████▏                            | 18525/122310 [36:36<2:42:17, 10.66it/s][A
 15%|█████▏                            | 18533/122310 [36:37<2:41:51, 10.69it/s][A
 15%|█████▏                            | 18535/122310 [36:38<3:29:47,  8.24it/s][A
 15%|█████▏                            | 18550/122310 [36:38<2:26:52, 11.77it/s][A
 15%|█████▏                            | 18567/122310 [36:39<1:54:29, 15.10it/s][A
 15%|█████▏                            | 18581/122310 [36:40<1:46:44, 16.20it/s][A
 15%|█████▏                            | 18593/122310 [36:41<1:46:43, 16.20it/s][A
 15%|█████▏                            | 18603/122310 [36:41<1:53:04, 15.29it/s][A
 15%|█████▏                            | 18606/122310 [36:42<2:24:55, 11.93

step: 32000, loss: 75.9575946722352, epoch: 2
sim1 and sim2 are 0.4440980110778267, 0.20817376835379692
cosine of pred and queen: 0.2578851023012275
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tickling
Actual: maharastr


 15%|█████▏                            | 18686/122310 [37:02<1:35:17, 18.12it/s][A

Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: spain
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: commission
Actual: sweden:swedish::netherlands:dutch, pred: germany
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: europe
Actual: denmark:danish::germany:german, pred: europe
Accuracy is 0.37777777777777777
Actual: walk:walks::vanish:vanishes, pred: chihiro
Actual: work:w


 15%|█████                            | 18690/122310 [38:10<67:24:05,  2.34s/it][A
 15%|█████                            | 18702/122310 [38:11<46:13:54,  1.61s/it][A
 15%|█████                            | 18704/122310 [38:11<43:41:55,  1.52s/it][A
 15%|█████                            | 18711/122310 [38:12<33:00:26,  1.15s/it][A
 15%|█████                            | 18719/122310 [38:13<23:53:45,  1.20it/s][A
 15%|█████                            | 18728/122310 [38:14<16:54:07,  1.70it/s][A
 15%|█████                            | 18738/122310 [38:14<11:52:37,  2.42it/s][A
 15%|█████▏                            | 18749/122310 [38:15<8:25:21,  3.42it/s][A
 15%|█████▏                            | 18755/122310 [38:16<7:23:19,  3.89it/s][A
 15%|█████▏                            | 18761/122310 [38:17<6:30:19,  4.42it/s][A
 15%|█████▏                            | 18778/122310 [38:17<3:57:57,  7.25it/s][A
 15%|█████▏                            | 18790/122310 [38:18<3:14:47,  8.86

step: 32020, loss: 76.80047103830034, epoch: 2



 15%|█████▎                            | 18894/122310 [38:26<2:15:17, 12.74it/s][A
 15%|█████▎                            | 18904/122310 [38:26<2:13:31, 12.91it/s][A
 15%|█████▎                            | 18908/122310 [38:27<2:42:00, 10.64it/s][A
 15%|█████▎                            | 18919/122310 [38:28<2:26:12, 11.79it/s][A
 15%|█████▎                            | 18940/122310 [38:29<1:43:53, 16.58it/s][A
 15%|█████▎                            | 18947/122310 [38:29<1:59:56, 14.36it/s][A
 16%|█████▎                            | 18962/122310 [38:30<1:47:29, 16.02it/s][A
 16%|█████▎                            | 18967/122310 [38:31<2:11:10, 13.13it/s][A
 16%|█████▎                            | 18973/122310 [38:32<2:28:43, 11.58it/s][A
 16%|█████▎                            | 18980/122310 [38:32<2:38:18, 10.88it/s][A
 16%|█████▎                            | 18988/122310 [38:33<2:40:18, 10.74it/s][A
 16%|█████▎                            | 18994/122310 [38:34<2:53:29,  9.93

step: 32040, loss: 75.71441753052433, epoch: 2



 16%|█████▎                            | 19076/122310 [38:41<2:54:29,  9.86it/s][A
 16%|█████▎                            | 19083/122310 [38:42<2:57:40,  9.68it/s][A
 16%|█████▎                            | 19089/122310 [38:42<3:08:11,  9.14it/s][A
 16%|█████▎                            | 19095/122310 [38:43<3:15:32,  8.80it/s][A
 16%|█████▎                            | 19112/122310 [38:44<2:14:08, 12.82it/s][A
 16%|█████▎                            | 19120/122310 [38:45<2:21:29, 12.15it/s][A
 16%|█████▎                            | 19129/122310 [38:45<2:22:56, 12.03it/s][A
 16%|█████▎                            | 19135/122310 [38:46<2:38:48, 10.83it/s][A
 16%|█████▎                            | 19145/122310 [38:47<2:29:13, 11.52it/s][A
 16%|█████▎                            | 19154/122310 [38:48<2:27:56, 11.62it/s][A
 16%|█████▎                            | 19158/122310 [38:48<2:56:48,  9.72it/s][A
 16%|█████▎                            | 19172/122310 [38:49<2:19:21, 12.34

step: 32060, loss: 93.3748952325404, epoch: 2



 16%|█████▎                            | 19278/122310 [38:56<2:04:11, 13.83it/s][A
 16%|█████▎                            | 19282/122310 [38:57<2:32:11, 11.28it/s][A
 16%|█████▎                            | 19296/122310 [38:58<2:08:32, 13.36it/s][A
 16%|█████▎                            | 19302/122310 [38:58<2:26:11, 11.74it/s][A
 16%|█████▎                            | 19315/122310 [38:59<2:08:34, 13.35it/s][A
 16%|█████▎                            | 19326/122310 [39:00<2:05:39, 13.66it/s][A
 16%|█████▎                            | 19332/122310 [39:01<2:23:21, 11.97it/s][A
 16%|█████▍                            | 19343/122310 [39:03<3:35:47,  7.95it/s][A
 16%|█████▍                            | 19352/122310 [39:04<3:15:21,  8.78it/s][A
 16%|█████▍                            | 19360/122310 [39:04<3:06:35,  9.20it/s][A
 16%|█████▍                            | 19372/122310 [39:05<2:38:16, 10.84it/s][A
 16%|█████▍                            | 19384/122310 [39:06<2:20:58, 12.17

step: 32080, loss: 109.23118479067962, epoch: 2



 16%|█████▍                            | 19433/122310 [39:11<3:55:56,  7.27it/s][A
 16%|█████▍                            | 19442/122310 [39:12<3:18:29,  8.64it/s][A
 16%|█████▍                            | 19455/122310 [39:13<2:33:55, 11.14it/s][A
 16%|█████▍                            | 19464/122310 [39:14<2:30:39, 11.38it/s][A
 16%|█████▍                            | 19477/122310 [39:14<2:11:33, 13.03it/s][A
 16%|█████▍                            | 19480/122310 [39:15<2:45:41, 10.34it/s][A
 16%|█████▍                            | 19487/122310 [39:16<2:51:13, 10.01it/s][A
 16%|█████▍                            | 19504/122310 [39:17<2:04:39, 13.74it/s][A
 16%|█████▍                            | 19516/122310 [39:17<1:59:33, 14.33it/s][A
 16%|█████▍                            | 19522/122310 [39:18<2:17:39, 12.45it/s][A
 16%|█████▍                            | 19530/122310 [39:19<2:24:29, 11.86it/s][A
 16%|█████▍                            | 19543/122310 [39:20<2:07:35, 13.42

step: 32100, loss: 75.9944362123483, epoch: 2



 16%|█████▍                            | 19635/122310 [39:26<2:16:43, 12.52it/s][A
 16%|█████▍                            | 19638/122310 [39:27<2:50:37, 10.03it/s][A
 16%|█████▍                            | 19646/122310 [39:28<2:45:45, 10.32it/s][A
 16%|█████▍                            | 19650/122310 [39:28<3:11:42,  8.92it/s][A
 16%|█████▍                            | 19655/122310 [39:29<3:25:55,  8.31it/s][A
 16%|█████▍                            | 19660/122310 [39:30<3:36:07,  7.92it/s][A
 16%|█████▍                            | 19668/122310 [39:31<3:12:54,  8.87it/s][A
 16%|█████▍                            | 19677/122310 [39:31<2:52:09,  9.94it/s][A
 16%|█████▍                            | 19681/122310 [39:32<3:18:29,  8.62it/s][A
 16%|█████▍                            | 19688/122310 [39:33<3:10:39,  8.97it/s][A
 16%|█████▍                            | 19697/122310 [39:34<2:51:15,  9.99it/s][A
 16%|█████▍                            | 19705/122310 [39:34<2:46:58, 10.24

step: 32120, loss: 70.91094177183636, epoch: 2



 16%|█████▍                            | 19777/122310 [39:41<2:13:50, 12.77it/s][A
 16%|█████▌                            | 19786/122310 [39:42<2:14:55, 12.66it/s][A
 16%|█████▌                            | 19805/122310 [39:42<1:42:04, 16.74it/s][A
 16%|█████▌                            | 19814/122310 [39:43<1:50:44, 15.42it/s][A
 16%|█████▌                            | 19821/122310 [39:44<2:04:26, 13.73it/s][A
 16%|█████▌                            | 19831/122310 [39:44<2:04:58, 13.67it/s][A
 16%|█████▌                            | 19853/122310 [39:45<1:31:42, 18.62it/s][A
 16%|█████▌                            | 19859/122310 [39:46<1:50:38, 15.43it/s][A
 16%|█████▌                            | 19869/122310 [39:47<1:54:16, 14.94it/s][A
 16%|█████▌                            | 19879/122310 [39:47<1:56:45, 14.62it/s][A
 16%|█████▌                            | 19885/122310 [39:48<2:14:10, 12.72it/s][A
 16%|█████▌                            | 19895/122310 [39:49<2:10:53, 13.04

step: 32140, loss: 74.73293314478884, epoch: 2



 16%|█████▌                            | 19975/122310 [39:55<2:10:14, 13.09it/s][A
 16%|█████▌                            | 19980/122310 [39:56<2:31:18, 11.27it/s][A
 16%|█████▌                            | 19988/122310 [39:57<2:32:04, 11.21it/s][A
 16%|█████▌                            | 19994/122310 [39:58<2:44:56, 10.34it/s][A
 16%|█████▌                            | 20000/122310 [39:58<2:55:27,  9.72it/s][A
 16%|█████▌                            | 20008/122310 [39:59<2:48:36, 10.11it/s][A
 16%|█████▌                            | 20024/122310 [40:00<2:04:29, 13.69it/s][A
 16%|█████▌                            | 20036/122310 [40:00<1:57:00, 14.57it/s][A
 16%|█████▌                            | 20044/122310 [40:01<2:06:12, 13.51it/s][A
 16%|█████▌                            | 20059/122310 [40:02<1:48:50, 15.66it/s][A
 16%|█████▌                            | 20064/122310 [40:03<2:11:36, 12.95it/s][A
 16%|█████▌                            | 20068/122310 [40:03<2:38:39, 10.74

step: 32160, loss: 116.92761945197792, epoch: 2



 16%|█████▍                           | 20079/122310 [40:14<15:12:58,  1.87it/s][A
 16%|█████▍                           | 20085/122310 [40:15<11:54:50,  2.38it/s][A
 16%|█████▌                            | 20090/122310 [40:16<9:57:39,  2.85it/s][A
 16%|█████▌                            | 20098/122310 [40:16<7:15:49,  3.91it/s][A
 16%|█████▌                            | 20114/122310 [40:17<4:12:55,  6.73it/s][A
 16%|█████▌                            | 20129/122310 [40:18<3:03:55,  9.26it/s][A
 16%|█████▌                            | 20138/122310 [40:19<2:51:46,  9.91it/s][A
 16%|█████▌                            | 20143/122310 [40:19<3:04:13,  9.24it/s][A
 16%|█████▌                            | 20151/122310 [40:20<2:55:52,  9.68it/s][A
 16%|█████▌                            | 20164/122310 [40:21<2:24:00, 11.82it/s][A
 16%|█████▌                            | 20174/122310 [40:21<2:18:08, 12.32it/s][A
 16%|█████▌                            | 20179/122310 [40:22<2:36:54, 10.85

step: 32180, loss: 72.22121637538058, epoch: 2



 17%|█████▌                            | 20206/122310 [40:24<2:27:06, 11.57it/s][A
 17%|█████▌                            | 20214/122310 [40:26<3:13:15,  8.81it/s][A
 17%|█████▌                            | 20218/122310 [40:27<3:32:32,  8.01it/s][A
 17%|█████▌                            | 20228/122310 [40:27<2:59:18,  9.49it/s][A
 17%|█████▋                            | 20237/122310 [40:28<2:45:56, 10.25it/s][A
 17%|█████▋                            | 20240/122310 [40:29<3:19:03,  8.55it/s][A
 17%|█████▋                            | 20250/122310 [40:29<2:49:38, 10.03it/s][A
 17%|█████▋                            | 20254/122310 [40:30<3:15:20,  8.71it/s][A
 17%|█████▋                            | 20268/122310 [40:31<2:24:36, 11.76it/s][A
 17%|█████▋                            | 20277/122310 [40:32<2:22:41, 11.92it/s][A
 17%|█████▋                            | 20279/122310 [40:32<3:03:42,  9.26it/s][A
 17%|█████▋                            | 20281/122310 [40:33<3:52:42,  7.31

step: 32200, loss: 90.91844549204073, epoch: 2



 17%|█████▋                            | 20345/122310 [40:39<2:42:40, 10.45it/s][A
 17%|█████▋                            | 20353/122310 [40:40<2:39:45, 10.64it/s][A
 17%|█████▋                            | 20363/122310 [40:40<2:27:12, 11.54it/s][A
 17%|█████▋                            | 20373/122310 [40:41<2:19:21, 12.19it/s][A
 17%|█████▋                            | 20381/122310 [40:42<2:23:37, 11.83it/s][A
 17%|█████▋                            | 20390/122310 [40:43<2:21:21, 12.02it/s][A
 17%|█████▋                            | 20395/122310 [40:43<2:42:13, 10.47it/s][A
 17%|█████▋                            | 20403/122310 [40:44<2:39:54, 10.62it/s][A
 17%|█████▋                            | 20423/122310 [40:45<1:48:38, 15.63it/s][A
 17%|█████▋                            | 20429/122310 [40:45<2:06:31, 13.42it/s][A
 17%|█████▋                            | 20440/122310 [40:46<2:02:05, 13.91it/s][A
 17%|█████▋                            | 20450/122310 [40:47<2:02:35, 13.85

step: 32220, loss: 73.01906495894389, epoch: 2



 17%|█████▋                            | 20555/122310 [40:53<1:34:05, 18.02it/s][A
 17%|█████▋                            | 20564/122310 [40:54<1:43:45, 16.34it/s][A
 17%|█████▋                            | 20572/122310 [40:55<1:54:52, 14.76it/s][A
 17%|█████▋                            | 20579/122310 [40:56<2:09:05, 13.13it/s][A
 17%|█████▋                            | 20582/122310 [40:56<2:42:25, 10.44it/s][A
 17%|█████▋                            | 20589/122310 [40:57<2:47:23, 10.13it/s][A
 17%|█████▋                            | 20598/122310 [40:58<2:36:20, 10.84it/s][A
 17%|█████▋                            | 20605/122310 [40:59<2:41:44, 10.48it/s][A
 17%|█████▋                            | 20611/122310 [40:59<2:53:33,  9.77it/s][A
 17%|█████▋                            | 20619/122310 [41:00<2:46:47, 10.16it/s][A
 17%|█████▋                            | 20625/122310 [41:01<2:56:43,  9.59it/s][A
 17%|█████▋                            | 20631/122310 [41:02<3:05:55,  9.11

step: 32240, loss: 95.18836104600716, epoch: 2



 17%|█████▊                            | 20708/122310 [41:08<2:18:15, 12.25it/s][A
 17%|█████▊                            | 20719/122310 [41:09<2:08:53, 13.14it/s][A
 17%|█████▊                            | 20729/122310 [41:09<2:06:47, 13.35it/s][A
 17%|█████▊                            | 20745/122310 [41:10<1:46:28, 15.90it/s][A
 17%|█████▊                            | 20756/122310 [41:11<1:47:53, 15.69it/s][A
 17%|█████▊                            | 20768/122310 [41:12<1:46:21, 15.91it/s][A
 17%|█████▊                            | 20778/122310 [41:12<1:51:07, 15.23it/s][A
 17%|█████▊                            | 20780/122310 [41:13<2:26:16, 11.57it/s][A
 17%|█████▊                            | 20789/122310 [41:14<2:23:25, 11.80it/s][A
 17%|█████▊                            | 20804/122310 [41:15<1:56:41, 14.50it/s][A
 17%|█████▊                            | 20809/122310 [41:15<2:18:34, 12.21it/s][A
 17%|█████▊                            | 20818/122310 [41:16<2:17:36, 12.29

step: 32260, loss: 87.1265972482703, epoch: 2



 17%|█████▊                            | 20904/122310 [41:23<2:07:17, 13.28it/s][A
 17%|█████▊                            | 20908/122310 [41:23<2:34:01, 10.97it/s][A
 17%|█████▊                            | 20916/122310 [41:24<2:33:38, 11.00it/s][A
 17%|█████▊                            | 20929/122310 [41:25<2:09:05, 13.09it/s][A
 17%|█████▊                            | 20940/122310 [41:25<2:03:34, 13.67it/s][A
 17%|█████▊                            | 20942/122310 [41:26<2:41:50, 10.44it/s][A
 17%|█████▊                            | 20947/122310 [41:27<3:00:13,  9.37it/s][A
 17%|█████▊                            | 20956/122310 [41:28<2:44:25, 10.27it/s][A
 17%|█████▊                            | 20970/122310 [41:28<2:10:01, 12.99it/s][A
 17%|█████▊                            | 20978/122310 [41:29<2:16:10, 12.40it/s][A
 17%|█████▊                            | 20987/122310 [41:30<2:15:53, 12.43it/s][A
 17%|█████▊                            | 20993/122310 [41:31<2:31:00, 11.18

step: 32280, loss: 80.37166545701584, epoch: 2



 17%|█████▊                            | 21040/122310 [41:37<3:07:15,  9.01it/s][A
 17%|█████▊                            | 21046/122310 [41:38<3:11:34,  8.81it/s][A
 17%|█████▊                            | 21053/122310 [41:38<3:06:34,  9.05it/s][A
 17%|█████▊                            | 21061/122310 [41:39<2:55:14,  9.63it/s][A
 17%|█████▊                            | 21068/122310 [41:40<2:54:55,  9.65it/s][A
 17%|█████▊                            | 21076/122310 [41:41<2:48:03, 10.04it/s][A
 17%|█████▊                            | 21091/122310 [41:41<2:07:23, 13.24it/s][A
 17%|█████▊                            | 21099/122310 [41:42<2:14:00, 12.59it/s][A
 17%|█████▊                            | 21106/122310 [41:43<2:24:31, 11.67it/s][A
 17%|█████▊                            | 21114/122310 [41:44<2:26:20, 11.53it/s][A
 17%|█████▊                            | 21122/122310 [41:44<2:28:28, 11.36it/s][A
 17%|█████▊                            | 21127/122310 [41:45<2:48:12, 10.03

step: 32300, loss: 83.96129875074534, epoch: 2



 17%|█████▉                            | 21213/122310 [41:52<2:23:31, 11.74it/s][A
 17%|█████▉                            | 21220/122310 [41:52<2:31:33, 11.12it/s][A
 17%|█████▉                            | 21225/122310 [41:53<2:51:37,  9.82it/s][A
 17%|█████▉                            | 21230/122310 [41:54<3:07:39,  8.98it/s][A
 17%|█████▉                            | 21236/122310 [41:54<3:12:18,  8.76it/s][A
 17%|█████▉                            | 21243/122310 [41:55<3:05:51,  9.06it/s][A
 17%|█████▉                            | 21249/122310 [41:56<3:12:00,  8.77it/s][A
 17%|█████▉                            | 21263/122310 [41:57<2:21:06, 11.94it/s][A
 17%|█████▉                            | 21274/122310 [41:57<2:10:47, 12.87it/s][A
 17%|█████▉                            | 21278/122310 [41:58<2:37:55, 10.66it/s][A
 17%|█████▉                            | 21282/122310 [41:59<3:04:11,  9.14it/s][A
 17%|█████▉                            | 21292/122310 [42:00<2:39:48, 10.54

step: 32320, loss: 82.25185634462825, epoch: 2



 17%|█████▉                            | 21379/122310 [42:06<2:02:56, 13.68it/s][A
 17%|█████▉                            | 21386/122310 [42:07<2:14:42, 12.49it/s][A
 17%|█████▉                            | 21393/122310 [42:08<2:24:19, 11.65it/s][A
 18%|█████▉                            | 21408/122310 [42:08<1:57:08, 14.36it/s][A
 18%|█████▉                            | 21415/122310 [42:09<2:09:56, 12.94it/s][A
 18%|█████▉                            | 21424/122310 [42:10<2:11:42, 12.77it/s][A
 18%|█████▉                            | 21439/122310 [42:10<1:51:13, 15.11it/s][A
 18%|█████▉                            | 21451/122310 [42:12<2:20:29, 11.97it/s][A
 18%|█████▉                            | 21455/122310 [42:13<2:41:48, 10.39it/s][A
 18%|█████▉                            | 21462/122310 [42:13<2:44:49, 10.20it/s][A
 18%|█████▉                            | 21470/122310 [42:14<2:41:09, 10.43it/s][A
 18%|█████▉                            | 21490/122310 [42:15<1:50:30, 15.21

step: 32340, loss: 67.80517378530749, epoch: 2



 18%|█████▉                            | 21546/122310 [42:21<2:52:05,  9.76it/s][A
 18%|█████▉                            | 21551/122310 [42:22<4:04:11,  6.88it/s][A
 18%|█████▉                            | 21555/122310 [42:23<4:15:46,  6.57it/s][A
 18%|█████▉                            | 21572/122310 [42:23<2:32:54, 10.98it/s][A
 18%|█████▉                            | 21582/122310 [42:24<2:23:34, 11.69it/s][A
 18%|██████                            | 21591/122310 [42:25<2:20:55, 11.91it/s][A
 18%|██████                            | 21601/122310 [42:26<2:15:09, 12.42it/s][A
 18%|██████                            | 21613/122310 [42:26<2:03:15, 13.62it/s][A
 18%|██████                            | 21621/122310 [42:27<2:10:13, 12.89it/s][A
 18%|██████                            | 21632/122310 [42:28<2:03:40, 13.57it/s][A
 18%|██████                            | 21634/122310 [42:31<5:04:20,  5.51it/s][A
 18%|██████                            | 21638/122310 [42:31<5:03:50,  5.52

step: 32360, loss: 69.71440547677055, epoch: 2



 18%|██████                            | 21672/122310 [42:35<3:30:39,  7.96it/s][A
 18%|██████                            | 21679/122310 [42:36<3:18:16,  8.46it/s][A
 18%|██████                            | 21692/122310 [42:37<3:12:35,  8.71it/s][A
 18%|██████                            | 21700/122310 [42:38<3:00:29,  9.29it/s][A
 18%|██████                            | 21709/122310 [42:39<2:46:21, 10.08it/s][A
 18%|██████                            | 21721/122310 [42:39<2:22:07, 11.80it/s][A
 18%|██████                            | 21734/122310 [42:40<2:04:25, 13.47it/s][A
 18%|██████                            | 21739/122310 [42:41<2:24:29, 11.60it/s][A
 18%|██████                            | 21754/122310 [42:42<1:57:47, 14.23it/s][A
 18%|██████                            | 21761/122310 [42:42<2:10:05, 12.88it/s][A
 18%|██████                            | 21772/122310 [42:43<2:03:37, 13.56it/s][A
 18%|██████                            | 21783/122310 [42:44<1:59:54, 13.97

step: 32380, loss: 74.67025749413948, epoch: 2



 18%|██████                            | 21830/122310 [42:50<3:32:07,  7.89it/s][A
 18%|██████                            | 21836/122310 [42:50<3:28:33,  8.03it/s][A
 18%|██████                            | 21853/122310 [42:51<2:12:33, 12.63it/s][A
 18%|██████                            | 21862/122310 [42:52<2:12:36, 12.62it/s][A
 18%|██████                            | 21867/122310 [42:52<2:33:28, 10.91it/s][A
 18%|██████                            | 21871/122310 [42:53<2:59:44,  9.31it/s][A
 18%|██████                            | 21881/122310 [42:54<2:36:53, 10.67it/s][A
 18%|██████                            | 21890/122310 [42:55<2:29:22, 11.20it/s][A
 18%|██████                            | 21908/122310 [42:55<1:49:19, 15.31it/s][A
 18%|██████                            | 21925/122310 [42:56<1:34:05, 17.78it/s][A
 18%|██████                            | 21930/122310 [42:57<1:55:56, 14.43it/s][A
 18%|██████                            | 21937/122310 [42:57<2:08:20, 13.03

step: 32400, loss: 65.20148462754864, epoch: 2
sim1 and sim2 are 0.33658994075275966, 0.21886783971747825
cosine of pred and queen: 0.0814182395006286
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: respect
Actual: man:woman::brothers:sisters, pred: paradis
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: pradesh
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 18%|█████▊                          | 21981/122310 [44:18<134:58:14,  4.84s/it][A

Actual: india:rupee::denmark:krone, pred: netherlands
Accuracy is 0.1242603550295858



 18%|█████▊                          | 21984/122310 [44:19<107:53:08,  3.87s/it][A
 18%|█████▉                           | 21994/122310 [44:20<54:46:33,  1.97s/it][A
 18%|█████▉                           | 21995/122310 [44:21<52:26:14,  1.88s/it][A
 18%|█████▉                           | 22001/122310 [44:21<34:32:52,  1.24s/it][A
 18%|█████▉                           | 22005/122310 [44:22<26:56:23,  1.03it/s][A
 18%|█████▉                           | 22010/122310 [44:23<19:42:33,  1.41it/s][A
 18%|█████▉                           | 22014/122310 [44:23<15:48:33,  1.76it/s][A
 18%|█████▉                           | 22020/122310 [44:24<11:16:54,  2.47it/s][A
 18%|██████                            | 22028/122310 [44:25<7:42:14,  3.62it/s][A
 18%|██████▏                           | 22037/122310 [44:26<5:31:38,  5.04it/s][A
 18%|██████▏                           | 22045/122310 [44:26<4:30:43,  6.17it/s][A
 18%|██████▏                           | 22051/122310 [44:27<4:11:56,  6.63

step: 32420, loss: 74.22751865044778, epoch: 2



 18%|██████▏                           | 22121/122310 [44:33<2:22:21, 11.73it/s][A
 18%|██████▏                           | 22131/122310 [44:34<2:14:28, 12.42it/s][A
 18%|██████▏                           | 22141/122310 [44:35<2:09:43, 12.87it/s][A
 18%|██████▏                           | 22155/122310 [44:36<1:52:36, 14.82it/s][A
 18%|██████▏                           | 22163/122310 [44:36<2:01:25, 13.75it/s][A
 18%|██████▏                           | 22173/122310 [44:37<2:01:51, 13.70it/s][A
 18%|██████▏                           | 22177/122310 [44:38<2:27:19, 11.33it/s][A
 18%|██████▏                           | 22187/122310 [44:38<2:17:42, 12.12it/s][A
 18%|██████▏                           | 22202/122310 [44:39<1:53:21, 14.72it/s][A
 18%|██████▏                           | 22218/122310 [44:40<1:38:16, 16.98it/s][A
 18%|██████▏                           | 22234/122310 [44:41<1:29:57, 18.54it/s][A
 18%|██████▏                           | 22244/122310 [44:41<1:37:20, 17.13

step: 32440, loss: 65.25104980421533, epoch: 2



 18%|██████▏                           | 22326/122310 [44:48<2:10:23, 12.78it/s][A
 18%|██████▏                           | 22331/122310 [44:49<2:30:41, 11.06it/s][A
 18%|██████▏                           | 22342/122310 [44:49<2:15:29, 12.30it/s][A
 18%|██████▏                           | 22349/122310 [44:50<2:23:56, 11.57it/s][A
 18%|██████▏                           | 22363/122310 [44:51<1:59:36, 13.93it/s][A
 18%|██████▏                           | 22367/122310 [44:51<2:25:59, 11.41it/s][A
 18%|██████▏                           | 22372/122310 [44:52<2:45:25, 10.07it/s][A
 18%|██████▏                           | 22380/122310 [44:53<2:41:06, 10.34it/s][A
 18%|██████▏                           | 22386/122310 [44:54<2:51:45,  9.70it/s][A
 18%|██████▏                           | 22399/122310 [44:54<2:16:12, 12.22it/s][A
 18%|██████▏                           | 22411/122310 [44:55<2:03:48, 13.45it/s][A
 18%|██████▏                           | 22416/122310 [44:56<2:26:41, 11.35

step: 32460, loss: 76.80013191617178, epoch: 2



 18%|██████▎                           | 22499/122310 [45:03<2:01:14, 13.72it/s][A
 18%|██████▎                           | 22509/122310 [45:03<2:02:20, 13.60it/s][A
 18%|██████▎                           | 22520/122310 [45:04<1:59:32, 13.91it/s][A
 18%|██████▎                           | 22529/122310 [45:05<2:04:47, 13.33it/s][A
 18%|██████▎                           | 22535/122310 [45:06<2:21:58, 11.71it/s][A
 18%|██████▎                           | 22549/122310 [45:06<2:01:21, 13.70it/s][A
 18%|██████▎                           | 22552/122310 [45:07<2:33:37, 10.82it/s][A
 18%|██████▎                           | 22571/122310 [45:08<1:49:49, 15.14it/s][A
 18%|██████▎                           | 22580/122310 [45:09<1:57:41, 14.12it/s][A
 18%|██████▎                           | 22584/122310 [45:09<2:24:18, 11.52it/s][A
 18%|██████▎                           | 22598/122310 [45:10<2:02:09, 13.60it/s][A
 18%|██████▎                           | 22605/122310 [45:11<2:15:51, 12.23

step: 32480, loss: 102.65141814703514, epoch: 2



 19%|██████▎                           | 22679/122310 [45:18<2:24:17, 11.51it/s][A
 19%|██████▎                           | 22686/122310 [45:19<2:33:01, 10.85it/s][A
 19%|██████▎                           | 22700/122310 [45:19<2:06:03, 13.17it/s][A
 19%|██████▎                           | 22708/122310 [45:20<2:13:34, 12.43it/s][A
 19%|██████▎                           | 22720/122310 [45:21<2:03:12, 13.47it/s][A
 19%|██████▎                           | 22731/122310 [45:22<2:00:16, 13.80it/s][A
 19%|██████▎                           | 22745/122310 [45:22<1:49:00, 15.22it/s][A
 19%|██████▎                           | 22750/122310 [45:23<2:11:10, 12.65it/s][A
 19%|██████▎                           | 22753/122310 [45:24<2:45:29, 10.03it/s][A
 19%|██████▎                           | 22762/122310 [45:25<2:38:47, 10.45it/s][A
 19%|██████▎                           | 22773/122310 [45:25<2:22:04, 11.68it/s][A
 19%|██████▎                           | 22781/122310 [45:26<2:26:22, 11.33

step: 32500, loss: 79.31528910777557, epoch: 2



 19%|██████▎                           | 22864/122310 [45:33<1:53:51, 14.56it/s][A
 19%|██████▎                           | 22870/122310 [45:34<2:11:02, 12.65it/s][A
 19%|██████▎                           | 22879/122310 [45:34<2:13:29, 12.41it/s][A
 19%|██████▎                           | 22893/122310 [45:35<1:56:47, 14.19it/s][A
 19%|██████▎                           | 22900/122310 [45:36<2:09:49, 12.76it/s][A
 19%|██████▎                           | 22914/122310 [45:37<1:54:23, 14.48it/s][A
 19%|██████▎                           | 22930/122310 [45:37<1:40:24, 16.49it/s][A
 19%|██████▍                           | 22937/122310 [45:38<1:55:42, 14.31it/s][A
 19%|██████▍                           | 22947/122310 [45:39<1:58:54, 13.93it/s][A
 19%|██████▍                           | 22952/122310 [45:40<2:21:18, 11.72it/s][A
 19%|██████▍                           | 22961/122310 [45:41<2:21:20, 11.71it/s][A
 19%|██████▍                           | 22973/122310 [45:41<2:08:23, 12.89

step: 32520, loss: 67.06981060146637, epoch: 2



 19%|██████▍                           | 23033/122310 [45:48<2:56:47,  9.36it/s][A
 19%|██████▍                           | 23046/122310 [45:49<2:21:28, 11.69it/s][A
 19%|██████▍                           | 23055/122310 [45:50<2:20:15, 11.79it/s][A
 19%|██████▍                           | 23060/122310 [45:50<2:41:40, 10.23it/s][A
 19%|██████▍                           | 23070/122310 [45:51<2:28:21, 11.15it/s][A
 19%|██████▍                           | 23077/122310 [45:52<2:36:57, 10.54it/s][A
 19%|██████▍                           | 23087/122310 [45:53<2:25:29, 11.37it/s][A
 19%|██████▍                           | 23090/122310 [45:53<3:01:16,  9.12it/s][A
 19%|██████▍                           | 23106/122310 [45:54<2:09:51, 12.73it/s][A
 19%|██████▍                           | 23108/122310 [45:56<3:41:07,  7.48it/s][A
 19%|██████▍                           | 23122/122310 [45:56<2:41:55, 10.21it/s][A
 19%|██████▍                           | 23127/122310 [45:57<2:58:02,  9.28

step: 32540, loss: 85.81573877684085, epoch: 2



 19%|██████▍                           | 23189/122310 [46:03<2:48:34,  9.80it/s][A
 19%|██████▍                           | 23197/122310 [46:04<2:44:56, 10.01it/s][A
 19%|██████▍                           | 23213/122310 [46:05<2:03:14, 13.40it/s][A
 19%|██████▍                           | 23223/122310 [46:06<2:04:02, 13.31it/s][A
 19%|██████▍                           | 23229/122310 [46:06<2:20:49, 11.73it/s][A
 19%|██████▍                           | 23246/122310 [46:07<1:50:40, 14.92it/s][A
 19%|██████▍                           | 23251/122310 [46:08<2:12:57, 12.42it/s][A
 19%|██████▍                           | 23254/122310 [46:09<2:46:29,  9.92it/s][A
 19%|██████▍                           | 23262/122310 [46:09<2:43:50, 10.08it/s][A
 19%|██████▍                           | 23266/122310 [46:10<3:10:47,  8.65it/s][A
 19%|██████▍                           | 23276/122310 [46:11<2:45:18,  9.98it/s][A
 19%|██████▍                           | 23281/122310 [46:12<3:03:43,  8.98

step: 32560, loss: 72.68479637641558, epoch: 2



 19%|██████▍                           | 23360/122310 [46:18<2:43:32, 10.08it/s][A
 19%|██████▍                           | 23374/122310 [46:19<2:10:16, 12.66it/s][A
 19%|██████▍                           | 23381/122310 [46:20<2:21:48, 11.63it/s][A
 19%|██████▌                           | 23388/122310 [46:21<2:30:49, 10.93it/s][A
 19%|██████▌                           | 23392/122310 [46:21<2:58:29,  9.24it/s][A
 19%|██████▌                           | 23395/122310 [46:22<3:37:22,  7.58it/s][A
 19%|██████▌                           | 23402/122310 [46:23<3:22:26,  8.14it/s][A
 19%|██████▌                           | 23412/122310 [46:24<2:50:42,  9.66it/s][A
 19%|██████▌                           | 23418/122310 [46:24<3:00:09,  9.15it/s][A
 19%|██████▌                           | 23427/122310 [46:25<2:45:29,  9.96it/s][A
 19%|██████▌                           | 23434/122310 [46:26<2:49:31,  9.72it/s][A
 19%|██████▌                           | 23442/122310 [46:27<2:45:31,  9.95

step: 32580, loss: 68.7249860145411, epoch: 2



 19%|██████▌                           | 23507/122310 [46:34<2:52:44,  9.53it/s][A
 19%|██████▌                           | 23516/122310 [46:34<2:40:02, 10.29it/s][A
 19%|██████▌                           | 23519/122310 [46:35<3:15:51,  8.41it/s][A
 19%|██████▌                           | 23530/122310 [46:36<2:40:25, 10.26it/s][A
 19%|██████▌                           | 23534/122310 [46:37<3:07:27,  8.78it/s][A
 19%|██████▌                           | 23542/122310 [46:37<2:56:20,  9.33it/s][A
 19%|██████▌                           | 23555/122310 [46:38<2:21:06, 11.66it/s][A
 19%|██████▌                           | 23566/122310 [46:39<2:10:59, 12.56it/s][A
 19%|██████▌                           | 23578/122310 [46:40<2:01:35, 13.53it/s][A
 19%|██████▌                           | 23586/122310 [46:40<2:09:54, 12.67it/s][A
 19%|██████▌                           | 23590/122310 [46:41<2:37:42, 10.43it/s][A
 19%|██████▌                           | 23609/122310 [46:42<1:50:48, 14.85

step: 32600, loss: 83.747519441533, epoch: 2



 19%|██████▌                           | 23692/122310 [46:49<2:17:22, 11.96it/s][A
 19%|██████▌                           | 23696/122310 [46:49<2:44:37,  9.98it/s][A
 19%|██████▌                           | 23711/122310 [46:50<2:07:10, 12.92it/s][A
 19%|██████▌                           | 23724/122310 [46:51<1:55:31, 14.22it/s][A
 19%|██████▌                           | 23731/122310 [46:52<2:08:59, 12.74it/s][A
 19%|██████▌                           | 23735/122310 [46:52<2:36:40, 10.49it/s][A
 19%|██████▌                           | 23741/122310 [46:53<2:49:23,  9.70it/s][A
 19%|██████▌                           | 23756/122310 [46:54<2:08:49, 12.75it/s][A
 19%|██████▌                           | 23767/122310 [46:55<2:03:47, 13.27it/s][A
 19%|██████▌                           | 23785/122310 [46:55<1:40:26, 16.35it/s][A
 19%|██████▌                           | 23790/122310 [46:56<2:01:41, 13.49it/s][A
 19%|██████▌                           | 23794/122310 [46:57<2:29:27, 10.99

step: 32620, loss: 83.81275449200854, epoch: 2



 20%|██████▋                           | 23852/122310 [47:04<2:56:56,  9.27it/s][A
 20%|██████▋                           | 23857/122310 [47:05<3:19:31,  8.22it/s][A
 20%|██████▋                           | 23874/122310 [47:06<2:22:50, 11.48it/s][A
 20%|██████▋                           | 23883/122310 [47:06<2:24:43, 11.33it/s][A
 20%|██████▋                           | 23885/122310 [47:07<3:05:10,  8.86it/s][A
 20%|██████▋                           | 23894/122310 [47:08<2:51:19,  9.57it/s][A
 20%|██████▋                           | 23899/122310 [47:09<3:12:12,  8.53it/s][A
 20%|██████▋                           | 23915/122310 [47:10<2:18:00, 11.88it/s][A
 20%|██████▋                           | 23918/122310 [47:11<2:54:32,  9.40it/s][A
 20%|██████▋                           | 23933/122310 [47:11<2:16:13, 12.04it/s][A
 20%|██████▋                           | 23939/122310 [47:12<2:34:44, 10.59it/s][A
 20%|██████▋                           | 23950/122310 [47:13<2:22:33, 11.50

step: 32640, loss: 79.6604741265298, epoch: 2



 20%|██████▋                           | 24020/122310 [47:20<3:04:31,  8.88it/s][A
 20%|██████▋                           | 24021/122310 [47:21<4:02:34,  6.75it/s][A
 20%|██████▋                           | 24035/122310 [47:22<2:41:24, 10.15it/s][A
 20%|██████▋                           | 24042/122310 [47:23<2:45:35,  9.89it/s][A
 20%|██████▋                           | 24046/122310 [47:23<3:11:51,  8.54it/s][A
 20%|██████▋                           | 24054/122310 [47:24<2:59:41,  9.11it/s][A
 20%|██████▋                           | 24071/122310 [47:25<2:06:21, 12.96it/s][A
 20%|██████▋                           | 24073/122310 [47:26<2:44:38,  9.94it/s][A
 20%|██████▋                           | 24083/122310 [47:26<2:31:20, 10.82it/s][A
 20%|██████▋                           | 24093/122310 [47:27<2:22:20, 11.50it/s][A
 20%|██████▋                           | 24101/122310 [47:28<2:25:59, 11.21it/s][A
 20%|██████▋                           | 24107/122310 [47:29<2:40:17, 10.21

step: 32660, loss: 68.69867787096337, epoch: 2



 20%|██████▋                           | 24198/122310 [47:36<2:24:32, 11.31it/s][A
 20%|██████▋                           | 24205/122310 [47:36<2:32:21, 10.73it/s][A
 20%|██████▋                           | 24211/122310 [47:37<2:45:28,  9.88it/s][A
 20%|██████▋                           | 24214/122310 [47:38<3:21:55,  8.10it/s][A
 20%|██████▋                           | 24224/122310 [47:39<2:50:14,  9.60it/s][A
 20%|██████▋                           | 24232/122310 [47:39<2:45:13,  9.89it/s][A
 20%|██████▋                           | 24242/122310 [47:40<2:30:32, 10.86it/s][A
 20%|██████▋                           | 24245/122310 [47:41<3:07:06,  8.73it/s][A
 20%|██████▋                           | 24250/122310 [47:42<3:22:53,  8.06it/s][A
 20%|██████▋                           | 24260/122310 [47:43<3:42:02,  7.36it/s][A
 20%|██████▋                           | 24265/122310 [47:44<3:47:07,  7.19it/s][A
 20%|██████▋                           | 24273/122310 [47:45<3:23:07,  8.04

step: 32680, loss: 80.0557474650117, epoch: 2



 20%|██████▊                           | 24345/122310 [47:51<2:09:03, 12.65it/s][A
 20%|██████▊                           | 24355/122310 [47:52<2:07:09, 12.84it/s][A
 20%|██████▊                           | 24367/122310 [47:52<1:58:50, 13.74it/s][A
 20%|██████▊                           | 24381/122310 [47:53<1:47:51, 15.13it/s][A
 20%|██████▊                           | 24386/122310 [47:54<2:09:57, 12.56it/s][A
 20%|██████▊                           | 24397/122310 [47:55<2:04:35, 13.10it/s][A
 20%|██████▊                           | 24399/122310 [47:55<2:43:27,  9.98it/s][A
 20%|██████▊                           | 24407/122310 [47:56<2:41:00, 10.13it/s][A
 20%|██████▊                           | 24416/122310 [47:57<2:32:46, 10.68it/s][A
 20%|██████▊                           | 24421/122310 [47:58<2:52:47,  9.44it/s][A
 20%|██████▊                           | 24427/122310 [47:58<3:01:41,  8.98it/s][A
 20%|██████▊                           | 24439/122310 [47:59<2:28:07, 11.01

step: 32700, loss: 70.04578428912225, epoch: 2



 20%|██████▌                          | 24491/122310 [48:15<17:54:24,  1.52it/s][A
 20%|██████▌                          | 24497/122310 [48:16<13:41:08,  1.99it/s][A
 20%|██████▊                           | 24505/122310 [48:16<9:43:02,  2.80it/s][A
 20%|██████▊                           | 24509/122310 [48:17<8:48:26,  3.08it/s][A
 20%|██████▊                           | 24518/122310 [48:18<6:13:35,  4.36it/s][A
 20%|██████▊                           | 24523/122310 [48:19<5:43:47,  4.74it/s][A
 20%|██████▊                           | 24541/122310 [48:19<3:12:19,  8.47it/s][A
 20%|██████▊                           | 24557/122310 [48:20<2:25:16, 11.21it/s][A

step: 32720, loss: 71.88839123739577, epoch: 2



 20%|██████▊                           | 24562/122310 [48:21<2:41:46, 10.07it/s][A
 20%|██████▊                           | 24575/122310 [48:22<2:16:42, 11.92it/s][A
 20%|██████▊                           | 24577/122310 [48:22<2:51:44,  9.48it/s][A
 20%|██████▊                           | 24584/122310 [48:23<2:52:20,  9.45it/s][A
 20%|██████▊                           | 24595/122310 [48:24<2:28:57, 10.93it/s][A
 20%|██████▊                           | 24603/122310 [48:25<2:30:01, 10.85it/s][A
 20%|██████▊                           | 24616/122310 [48:25<2:07:57, 12.72it/s][A
 20%|██████▊                           | 24623/122310 [48:26<2:18:19, 11.77it/s][A
 20%|██████▊                           | 24633/122310 [48:27<2:13:11, 12.22it/s][A
 20%|██████▊                           | 24638/122310 [48:28<2:34:20, 10.55it/s][A
 20%|██████▊                           | 24645/122310 [48:28<2:39:05, 10.23it/s][A
 20%|██████▊                           | 24650/122310 [48:31<4:42:53,  5.75

step: 32740, loss: 96.88381567862118, epoch: 2



 20%|██████▊                           | 24715/122310 [48:36<3:01:54,  8.94it/s][A
 20%|██████▊                           | 24723/122310 [48:37<2:51:42,  9.47it/s][A
 20%|██████▊                           | 24729/122310 [48:37<2:59:37,  9.05it/s][A
 20%|██████▉                           | 24737/122310 [48:38<2:51:22,  9.49it/s][A
 20%|██████▉                           | 24740/122310 [48:39<3:26:13,  7.89it/s][A
 20%|██████▉                           | 24749/122310 [48:40<2:58:17,  9.12it/s][A
 20%|██████▉                           | 24764/122310 [48:40<2:11:12, 12.39it/s][A
 20%|██████▉                           | 24777/122310 [48:41<1:57:06, 13.88it/s][A
 20%|██████▉                           | 24786/122310 [48:42<2:01:59, 13.32it/s][A
 20%|██████▉                           | 24800/122310 [48:43<1:48:37, 14.96it/s][A
 20%|██████▉                           | 24806/122310 [48:43<2:06:03, 12.89it/s][A
 20%|██████▉                           | 24816/122310 [48:44<2:04:54, 13.01

step: 32760, loss: 88.26846795156503, epoch: 2



 20%|██████▉                           | 24888/122310 [48:51<2:10:50, 12.41it/s][A
 20%|██████▉                           | 24894/122310 [48:52<2:25:26, 11.16it/s][A
 20%|██████▉                           | 24907/122310 [48:52<2:04:20, 13.06it/s][A
 20%|██████▉                           | 24915/122310 [48:53<2:11:12, 12.37it/s][A
 20%|██████▉                           | 24920/122310 [48:54<2:32:00, 10.68it/s][A
 20%|██████▉                           | 24930/122310 [48:55<2:21:46, 11.45it/s][A
 20%|██████▉                           | 24933/122310 [48:55<2:55:58,  9.22it/s][A
 20%|██████▉                           | 24941/122310 [48:56<2:48:23,  9.64it/s][A
 20%|██████▉                           | 24944/122310 [48:57<3:23:43,  7.97it/s][A
 20%|██████▉                           | 24949/122310 [48:58<3:34:41,  7.56it/s][A
 20%|██████▉                           | 24955/122310 [48:58<3:31:10,  7.68it/s][A
 20%|██████▉                           | 24960/122310 [48:59<3:42:39,  7.29

step: 32780, loss: 75.2089392238306, epoch: 2



 20%|██████▉                           | 25033/122310 [49:06<2:17:37, 11.78it/s][A
 20%|██████▉                           | 25036/122310 [49:07<2:50:37,  9.50it/s][A
 20%|██████▉                           | 25044/122310 [49:07<2:44:19,  9.86it/s][A
 20%|██████▉                           | 25055/122310 [49:08<2:23:07, 11.32it/s][A
 20%|██████▉                           | 25064/122310 [49:09<2:20:28, 11.54it/s][A
 20%|██████▉                           | 25068/122310 [49:11<4:26:48,  6.07it/s][A
 21%|██████▉                           | 25076/122310 [49:12<3:49:34,  7.06it/s][A
 21%|██████▉                           | 25088/122310 [49:13<2:57:12,  9.14it/s][A
 21%|██████▉                           | 25094/122310 [49:13<3:02:53,  8.86it/s][A
 21%|██████▉                           | 25100/122310 [49:14<3:07:37,  8.64it/s][A
 21%|██████▉                           | 25111/122310 [49:15<2:37:33, 10.28it/s][A
 21%|██████▉                           | 25117/122310 [49:16<2:47:26,  9.67

step: 32800, loss: 86.12012713559976, epoch: 2
sim1 and sim2 are 0.358987463103431, 0.21686469142406828
cosine of pred and queen: 0.18248923620580784
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: germany
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahar


 21%|██████▉                           | 25164/122310 [49:33<2:37:20, 10.29it/s][A

Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: spain
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: commission
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: ukrainian
Actual: denmark:danish::germany:german, pred


 21%|██████▊                          | 25176/122310 [50:37<70:33:52,  2.62s/it][A

Actual: india:rupee::denmark:krone, pred: spain
Accuracy is 0.13609467455621302



 21%|██████▊                          | 25182/122310 [50:37<55:39:23,  2.06s/it][A
 21%|██████▊                          | 25192/122310 [50:38<37:07:56,  1.38s/it][A
 21%|██████▊                          | 25197/122310 [50:39<30:32:44,  1.13s/it][A
 21%|██████▊                          | 25208/122310 [50:39<19:30:54,  1.38it/s][A
 21%|██████▊                          | 25223/122310 [50:40<11:43:29,  2.30it/s][A
 21%|███████                           | 25234/122310 [50:41<8:38:56,  3.12it/s][A
 21%|███████                           | 25238/122310 [50:42<8:07:38,  3.32it/s][A
 21%|███████                           | 25246/122310 [50:42<6:31:46,  4.13it/s][A
 21%|███████                           | 25250/122310 [50:43<6:14:19,  4.32it/s][A
 21%|███████                           | 25259/122310 [50:44<4:48:31,  5.61it/s][A
 21%|███████                           | 25263/122310 [50:44<4:49:05,  5.60it/s][A
 21%|███████                           | 25272/122310 [50:45<3:50:31,  7.02

step: 32820, loss: 74.32678660866732, epoch: 2



 21%|███████                           | 25326/122310 [50:52<2:55:50,  9.19it/s][A
 21%|███████                           | 25339/122310 [50:52<2:17:28, 11.76it/s][A
 21%|███████                           | 25348/122310 [50:53<2:15:22, 11.94it/s][A
 21%|███████                           | 25357/122310 [50:54<2:14:02, 12.06it/s][A
 21%|███████                           | 25364/122310 [50:55<2:22:56, 11.30it/s][A
 21%|███████                           | 25386/122310 [50:55<1:35:44, 16.87it/s][A
 21%|███████                           | 25400/122310 [50:56<1:32:01, 17.55it/s][A
 21%|███████                           | 25409/122310 [50:57<1:40:49, 16.02it/s][A
 21%|███████                           | 25422/122310 [50:58<1:37:54, 16.49it/s][A
 21%|███████                           | 25430/122310 [50:58<1:48:48, 14.84it/s][A
 21%|███████                           | 25436/122310 [50:59<2:05:09, 12.90it/s][A
 21%|███████                           | 25446/122310 [51:00<2:02:55, 13.13

step: 32840, loss: 92.03833173737411, epoch: 2



 21%|███████                           | 25515/122310 [51:06<2:40:04, 10.08it/s][A
 21%|███████                           | 25524/122310 [51:07<2:30:05, 10.75it/s][A
 21%|███████                           | 25531/122310 [51:08<2:35:01, 10.40it/s][A
 21%|███████                           | 25542/122310 [51:09<2:17:18, 11.75it/s][A
 21%|███████                           | 25555/122310 [51:09<1:59:20, 13.51it/s][A
 21%|███████                           | 25569/122310 [51:10<1:46:11, 15.18it/s][A
 21%|███████                           | 25576/122310 [51:11<1:59:42, 13.47it/s][A
 21%|███████                           | 25581/122310 [51:11<2:20:16, 11.49it/s][A
 21%|███████                           | 25592/122310 [51:12<2:08:37, 12.53it/s][A
 21%|███████                           | 25594/122310 [51:13<2:47:34,  9.62it/s][A
 21%|███████                           | 25604/122310 [51:14<2:28:41, 10.84it/s][A
 21%|███████                           | 25612/122310 [51:14<2:28:16, 10.87

step: 32860, loss: 88.64901641271085, epoch: 2



 21%|███████▏                          | 25681/122310 [51:21<2:35:43, 10.34it/s][A
 21%|███████▏                          | 25691/122310 [51:22<2:21:44, 11.36it/s][A
 21%|███████▏                          | 25701/122310 [51:22<2:13:27, 12.07it/s][A
 21%|███████▏                          | 25713/122310 [51:23<2:00:22, 13.37it/s][A
 21%|███████▏                          | 25719/122310 [51:24<2:16:21, 11.81it/s][A
 21%|███████▏                          | 25734/122310 [51:25<1:51:55, 14.38it/s][A
 21%|███████▏                          | 25755/122310 [51:25<1:26:23, 18.63it/s][A
 21%|███████▏                          | 25768/122310 [51:26<1:27:59, 18.29it/s][A
 21%|███████▏                          | 25772/122310 [51:27<1:52:28, 14.30it/s][A
 21%|███████▏                          | 25777/122310 [51:28<2:13:00, 12.10it/s][A
 21%|███████▏                          | 25795/122310 [51:28<1:41:44, 15.81it/s][A
 21%|███████▏                          | 25797/122310 [51:29<2:15:06, 11.90

step: 32880, loss: 87.75764627653287, epoch: 2



 21%|███████▏                          | 25865/122310 [51:36<2:32:09, 10.56it/s][A
 21%|███████▏                          | 25871/122310 [51:36<2:43:43,  9.82it/s][A
 21%|███████▏                          | 25878/122310 [51:37<2:44:15,  9.79it/s][A
 21%|███████▏                          | 25883/122310 [51:38<3:00:40,  8.89it/s][A
 21%|███████▏                          | 25891/122310 [51:39<2:49:17,  9.49it/s][A
 21%|███████▏                          | 25902/122310 [51:39<2:24:31, 11.12it/s][A
 21%|███████▏                          | 25909/122310 [51:40<2:30:33, 10.67it/s][A
 21%|███████▏                          | 25923/122310 [51:41<2:01:38, 13.21it/s][A
 21%|███████▏                          | 25925/122310 [51:42<2:39:46, 10.05it/s][A
 21%|███████▏                          | 25930/122310 [51:42<2:57:47,  9.04it/s][A
 21%|███████▏                          | 25931/122310 [51:43<3:56:48,  6.78it/s][A
 21%|███████▏                          | 25936/122310 [51:44<3:56:18,  6.80

step: 32900, loss: 93.23797738459922, epoch: 2



 21%|███████▏                          | 26014/122310 [51:50<2:33:02, 10.49it/s][A
 21%|███████▏                          | 26020/122310 [51:51<2:43:46,  9.80it/s][A
 21%|███████▏                          | 26027/122310 [51:52<2:44:47,  9.74it/s][A
 21%|███████▏                          | 26042/122310 [51:52<2:03:32, 12.99it/s][A
 21%|███████▏                          | 26055/122310 [51:53<1:51:14, 14.42it/s][A
 21%|███████▏                          | 26064/122310 [51:54<1:56:35, 13.76it/s][A
 21%|███████▏                          | 26069/122310 [51:55<2:17:47, 11.64it/s][A
 21%|███████▎                          | 26082/122310 [51:55<1:58:54, 13.49it/s][A
 21%|███████▎                          | 26099/122310 [51:56<1:37:56, 16.37it/s][A
 21%|███████▎                          | 26117/122310 [51:57<1:25:15, 18.80it/s][A
 21%|███████▎                          | 26125/122310 [51:58<1:37:20, 16.47it/s][A
 21%|███████▎                          | 26135/122310 [51:58<1:42:39, 15.61

step: 32920, loss: 73.46090438622747, epoch: 2



 21%|███████▎                          | 26202/122310 [52:07<3:47:01,  7.06it/s][A
 21%|███████▎                          | 26210/122310 [52:07<3:24:10,  7.84it/s][A
 21%|███████▎                          | 26214/122310 [52:08<3:38:51,  7.32it/s][A
 21%|███████▎                          | 26218/122310 [52:09<3:53:29,  6.86it/s][A
 21%|███████▎                          | 26230/122310 [52:09<2:51:24,  9.34it/s][A
 21%|███████▎                          | 26240/122310 [52:10<2:32:25, 10.50it/s][A
 21%|███████▎                          | 26248/122310 [52:11<2:31:10, 10.59it/s][A
 21%|███████▎                          | 26254/122310 [52:12<2:41:19,  9.92it/s][A
 21%|███████▎                          | 26268/122310 [52:12<2:07:20, 12.57it/s][A
 21%|███████▎                          | 26276/122310 [52:13<2:12:19, 12.10it/s][A
 21%|███████▎                          | 26287/122310 [52:14<2:03:35, 12.95it/s][A
 21%|███████▎                          | 26294/122310 [52:15<2:13:46, 11.96

step: 32940, loss: 86.68868745518317, epoch: 2



 22%|███████▎                          | 26348/122310 [52:20<2:38:29, 10.09it/s][A
 22%|███████▎                          | 26362/122310 [52:20<2:04:55, 12.80it/s][A
 22%|███████▎                          | 26379/122310 [52:21<1:40:37, 15.89it/s][A
 22%|███████▎                          | 26389/122310 [52:22<1:45:01, 15.22it/s][A
 22%|███████▎                          | 26402/122310 [52:23<1:39:59, 15.99it/s][A
 22%|███████▎                          | 26410/122310 [52:23<1:50:42, 14.44it/s][A
 22%|███████▎                          | 26416/122310 [52:24<2:07:19, 12.55it/s][A
 22%|███████▎                          | 26419/122310 [52:25<2:39:15, 10.04it/s][A
 22%|███████▎                          | 26427/122310 [52:26<2:35:21, 10.29it/s][A
 22%|███████▎                          | 26436/122310 [52:26<2:27:04, 10.86it/s][A
 22%|███████▎                          | 26444/122310 [52:27<2:26:55, 10.88it/s][A
 22%|███████▎                          | 26456/122310 [52:28<2:07:51, 12.50

step: 32960, loss: 90.6631342882859, epoch: 2



 22%|███████▍                          | 26535/122310 [52:34<1:59:15, 13.38it/s][A
 22%|███████▍                          | 26545/122310 [52:35<1:58:40, 13.45it/s][A
 22%|███████▍                          | 26551/122310 [52:36<2:13:49, 11.93it/s][A
 22%|███████▍                          | 26560/122310 [52:37<2:12:47, 12.02it/s][A
 22%|███████▍                          | 26564/122310 [52:37<2:37:58, 10.10it/s][A
 22%|███████▍                          | 26577/122310 [52:38<2:09:05, 12.36it/s][A
 22%|███████▍                          | 26582/122310 [52:39<2:30:11, 10.62it/s][A
 22%|███████▍                          | 26591/122310 [52:40<2:23:39, 11.10it/s][A
 22%|███████▍                          | 26597/122310 [52:40<2:35:47, 10.24it/s][A
 22%|███████▍                          | 26609/122310 [52:41<2:12:15, 12.06it/s][A
 22%|███████▍                          | 26611/122310 [52:42<2:52:35,  9.24it/s][A
 22%|███████▍                          | 26622/122310 [52:43<2:25:18, 10.98

step: 32980, loss: 66.92264824008627, epoch: 2



 22%|███████▍                          | 26700/122310 [52:49<2:23:32, 11.10it/s][A
 22%|███████▍                          | 26714/122310 [52:50<1:58:15, 13.47it/s][A
 22%|███████▍                          | 26718/122310 [52:51<2:23:40, 11.09it/s][A
 22%|███████▍                          | 26734/122310 [52:51<1:51:52, 14.24it/s][A
 22%|███████▍                          | 26738/122310 [52:52<2:17:01, 11.62it/s][A
 22%|███████▍                          | 26749/122310 [52:53<2:05:44, 12.67it/s][A
 22%|███████▍                          | 26760/122310 [52:54<1:59:24, 13.34it/s][A
 22%|███████▍                          | 26770/122310 [52:54<1:58:44, 13.41it/s][A
 22%|███████▍                          | 26777/122310 [52:55<2:09:47, 12.27it/s][A
 22%|███████▍                          | 26788/122310 [52:56<2:01:58, 13.05it/s][A
 22%|███████▍                          | 26795/122310 [52:57<2:12:38, 12.00it/s][A
 22%|███████▍                          | 26805/122310 [52:57<2:07:43, 12.46

step: 33000, loss: 78.62869582940976, epoch: 2
saving weights



 22%|███████▍                          | 26878/122310 [53:04<3:08:28,  8.44it/s][A
 22%|███████▍                          | 26885/122310 [53:05<3:04:34,  8.62it/s][A
 22%|███████▍                          | 26893/122310 [53:06<2:53:45,  9.15it/s][A
 22%|███████▍                          | 26907/122310 [53:06<2:15:30, 11.73it/s][A
 22%|███████▍                          | 26915/122310 [53:07<2:18:56, 11.44it/s][A
 22%|███████▍                          | 26922/122310 [53:08<2:26:16, 10.87it/s][A
 22%|███████▍                          | 26933/122310 [53:09<2:12:34, 11.99it/s][A
 22%|███████▍                          | 26944/122310 [53:09<2:03:46, 12.84it/s][A
 22%|███████▍                          | 26959/122310 [53:10<1:45:38, 15.04it/s][A
 22%|███████▍                          | 26964/122310 [53:11<2:06:19, 12.58it/s][A
 22%|███████▍                          | 26970/122310 [53:11<2:20:55, 11.28it/s][A
 22%|███████▌                          | 26981/122310 [53:12<2:08:45, 12.34

step: 33020, loss: 116.46455517535462, epoch: 2



 22%|███████▌                          | 27056/122310 [53:20<2:50:08,  9.33it/s][A
 22%|███████▌                          | 27070/122310 [53:20<2:09:43, 12.24it/s][A
 22%|███████▌                          | 27076/122310 [53:21<2:24:02, 11.02it/s][A
 22%|███████▌                          | 27092/122310 [53:22<1:51:19, 14.25it/s][A
 22%|███████▌                          | 27097/122310 [53:23<2:12:18, 11.99it/s][A
 22%|███████▌                          | 27104/122310 [53:23<2:21:08, 11.24it/s][A
 22%|███████▌                          | 27113/122310 [53:24<2:17:56, 11.50it/s][A
 22%|███████▌                          | 27121/122310 [53:25<2:19:41, 11.36it/s][A
 22%|███████▌                          | 27128/122310 [53:25<2:27:08, 10.78it/s][A
 22%|███████▌                          | 27138/122310 [53:26<2:16:10, 11.65it/s][A
 22%|███████▌                          | 27147/122310 [53:27<2:14:14, 11.81it/s][A
 22%|███████▌                          | 27152/122310 [53:28<2:34:00, 10.30

step: 33040, loss: 88.86162462413456, epoch: 2



 22%|███████▌                          | 27219/122310 [53:34<2:18:38, 11.43it/s][A
 22%|███████▌                          | 27231/122310 [53:35<2:02:52, 12.90it/s][A
 22%|███████▌                          | 27242/122310 [53:36<1:57:32, 13.48it/s][A
 22%|███████▌                          | 27249/122310 [53:36<2:08:56, 12.29it/s][A
 22%|███████▌                          | 27255/122310 [53:37<2:22:43, 11.10it/s][A
 22%|███████▌                          | 27265/122310 [53:38<2:14:01, 11.82it/s][A
 22%|███████▌                          | 27273/122310 [53:39<2:17:15, 11.54it/s][A
 22%|███████▌                          | 27282/122310 [53:39<2:14:42, 11.76it/s][A
 22%|███████▌                          | 27285/122310 [53:40<2:47:42,  9.44it/s][A
 22%|███████▌                          | 27290/122310 [53:42<3:58:44,  6.63it/s][A
 22%|███████▌                          | 27293/122310 [53:42<4:22:25,  6.03it/s][A
 22%|███████▌                          | 27306/122310 [53:43<2:54:15,  9.09

step: 33060, loss: 80.88166738445346, epoch: 2



 22%|███████▌                          | 27371/122310 [53:49<2:31:14, 10.46it/s][A
 22%|███████▌                          | 27378/122310 [53:50<2:35:32, 10.17it/s][A
 22%|███████▌                          | 27385/122310 [53:50<2:38:22,  9.99it/s][A
 22%|███████▌                          | 27394/122310 [53:51<2:28:06, 10.68it/s][A
 22%|███████▌                          | 27403/122310 [53:52<2:22:29, 11.10it/s][A
 22%|███████▌                          | 27405/122310 [53:53<3:03:33,  8.62it/s][A
 22%|███████▌                          | 27414/122310 [53:53<2:42:31,  9.73it/s][A
 22%|███████▌                          | 27429/122310 [53:54<2:02:18, 12.93it/s][A
 22%|███████▋                          | 27435/122310 [53:55<2:17:32, 11.50it/s][A
 22%|███████▋                          | 27451/122310 [53:56<1:49:09, 14.48it/s][A
 22%|███████▋                          | 27458/122310 [53:56<2:00:54, 13.08it/s][A
 22%|███████▋                          | 27468/122310 [53:58<2:35:41, 10.15

step: 33080, loss: 74.82922204548933, epoch: 2



 23%|███████▋                          | 27529/122310 [54:04<2:58:08,  8.87it/s][A
 23%|███████▋                          | 27540/122310 [54:04<2:28:07, 10.66it/s][A
 23%|███████▋                          | 27553/122310 [54:05<2:03:40, 12.77it/s][A
 23%|███████▋                          | 27563/122310 [54:06<2:01:08, 13.03it/s][A
 23%|███████▋                          | 27569/122310 [54:07<2:16:29, 11.57it/s][A
 23%|███████▋                          | 27574/122310 [54:07<2:35:45, 10.14it/s][A
 23%|███████▋                          | 27577/122310 [54:08<3:10:44,  8.28it/s][A
 23%|███████▋                          | 27578/122310 [54:09<4:13:10,  6.24it/s][A
 23%|███████▋                          | 27592/122310 [54:10<2:36:50, 10.06it/s][A
 23%|███████▋                          | 27601/122310 [54:10<2:27:37, 10.69it/s][A
 23%|███████▋                          | 27611/122310 [54:11<2:16:27, 11.57it/s][A
 23%|███████▋                          | 27623/122310 [54:12<2:02:00, 12.93

step: 33100, loss: 71.41049873077314, epoch: 2



 23%|███████▋                          | 27698/122310 [54:18<2:20:35, 11.22it/s][A
 23%|███████▋                          | 27709/122310 [54:19<2:07:52, 12.33it/s][A
 23%|███████▋                          | 27714/122310 [54:20<2:27:30, 10.69it/s][A
 23%|███████▋                          | 27718/122310 [54:21<2:53:12,  9.10it/s][A
 23%|███████▋                          | 27721/122310 [54:21<3:27:10,  7.61it/s][A
 23%|███████▋                          | 27733/122310 [54:22<2:34:50, 10.18it/s][A
 23%|███████▋                          | 27737/122310 [54:23<2:59:16,  8.79it/s][A
 23%|███████▋                          | 27743/122310 [54:24<3:04:34,  8.54it/s][A
 23%|███████▋                          | 27744/122310 [54:24<4:05:37,  6.42it/s][A
 23%|███████▋                          | 27751/122310 [54:27<5:45:01,  4.57it/s][A
 23%|███████▋                          | 27766/122310 [54:27<3:20:17,  7.87it/s][A
 23%|███████▋                          | 27770/122310 [54:28<3:35:13,  7.32

step: 33120, loss: 69.72381515412158, epoch: 2



 23%|███████▋                          | 27843/122310 [54:33<2:09:57, 12.12it/s][A
 23%|███████▋                          | 27850/122310 [54:34<2:18:57, 11.33it/s][A
 23%|███████▋                          | 27861/122310 [54:35<2:07:08, 12.38it/s][A
 23%|███████▋                          | 27867/122310 [54:35<2:21:40, 11.11it/s][A
 23%|███████▋                          | 27873/122310 [54:36<2:34:03, 10.22it/s][A
 23%|███████▋                          | 27877/122310 [54:37<2:58:56,  8.80it/s][A
 23%|███████▊                          | 27889/122310 [54:38<2:22:55, 11.01it/s][A
 23%|███████▊                          | 27895/122310 [54:38<2:34:11, 10.21it/s][A
 23%|███████▊                          | 27907/122310 [54:39<2:11:09, 12.00it/s][A
 23%|███████▊                          | 27914/122310 [54:40<2:19:28, 11.28it/s][A
 23%|███████▊                          | 27919/122310 [54:41<2:38:55,  9.90it/s][A
 23%|███████▊                          | 27931/122310 [54:41<2:13:04, 11.82

step: 33140, loss: 71.34100360057924, epoch: 2



 23%|███████▊                          | 28009/122310 [54:48<1:52:02, 14.03it/s][A
 23%|███████▊                          | 28015/122310 [54:49<2:08:16, 12.25it/s][A
 23%|███████▊                          | 28020/122310 [54:49<2:27:38, 10.64it/s][A
 23%|███████▊                          | 28024/122310 [54:50<2:53:05,  9.08it/s][A
 23%|███████▊                          | 28029/122310 [54:52<4:03:52,  6.44it/s][A
 23%|███████▊                          | 28036/122310 [54:52<3:37:36,  7.22it/s][A
 23%|███████▊                          | 28047/122310 [54:53<2:50:03,  9.24it/s][A
 23%|███████▊                          | 28052/122310 [54:54<3:03:19,  8.57it/s][A
 23%|███████▊                          | 28059/122310 [54:55<2:58:01,  8.82it/s][A
 23%|███████▊                          | 28071/122310 [54:55<2:23:25, 10.95it/s][A
 23%|███████▊                          | 28077/122310 [54:56<2:34:45, 10.15it/s][A
 23%|███████▊                          | 28087/122310 [54:57<2:20:30, 11.18

step: 33160, loss: 82.80723486827875, epoch: 2



 23%|███████▊                          | 28145/122310 [55:03<2:23:54, 10.91it/s][A
 23%|███████▊                          | 28149/122310 [55:03<2:48:46,  9.30it/s][A
 23%|███████▊                          | 28159/122310 [55:04<2:28:05, 10.60it/s][A
 23%|███████▊                          | 28167/122310 [55:05<2:26:56, 10.68it/s][A
 23%|███████▊                          | 28171/122310 [55:06<2:52:05,  9.12it/s][A
 23%|███████▊                          | 28181/122310 [55:06<2:29:48, 10.47it/s][A
 23%|███████▊                          | 28194/122310 [55:07<2:04:28, 12.60it/s][A
 23%|███████▊                          | 28197/122310 [55:08<2:35:37, 10.08it/s][A
 23%|███████▊                          | 28204/122310 [55:09<2:38:30,  9.89it/s][A
 23%|███████▊                          | 28216/122310 [55:09<2:12:47, 11.81it/s][A
 23%|███████▊                          | 28231/122310 [55:10<1:49:00, 14.39it/s][A
 23%|███████▊                          | 28244/122310 [55:11<1:41:55, 15.38

step: 33180, loss: 74.44782712788565, epoch: 2



 23%|███████▊                          | 28326/122310 [55:17<2:30:58, 10.38it/s][A
 23%|███████▉                          | 28337/122310 [55:18<2:13:16, 11.75it/s][A
 23%|███████▉                          | 28347/122310 [55:19<2:07:23, 12.29it/s][A
 23%|███████▉                          | 28349/122310 [55:20<2:45:47,  9.45it/s][A
 23%|███████▉                          | 28362/122310 [55:20<2:11:20, 11.92it/s][A
 23%|███████▉                          | 28371/122310 [55:21<2:10:24, 12.01it/s][A
 23%|███████▉                          | 28385/122310 [55:22<1:50:51, 14.12it/s][A
 23%|███████▉                          | 28389/122310 [55:23<2:16:07, 11.50it/s][A
 23%|███████▉                          | 28396/122310 [55:23<2:23:39, 10.90it/s][A
 23%|███████▉                          | 28402/122310 [55:24<2:36:11, 10.02it/s][A
 23%|███████▉                          | 28407/122310 [55:25<2:53:10,  9.04it/s][A
 23%|███████▉                          | 28414/122310 [55:25<2:50:25,  9.18

step: 33200, loss: 93.38827116758726, epoch: 2
sim1 and sim2 are 0.5113404115032227, 0.2104911974881895
cosine of pred and queen: 0.27697083314930854
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maha


 23%|███████▉                          | 28478/122310 [55:43<2:19:40, 11.20it/s][A

Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: cricket
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: spain
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: complex
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish


 23%|███████▋                         | 28484/122310 [56:48<81:22:57,  3.12s/it][A

Actual: india:rupee::denmark:krone, pred: spain
Accuracy is 0.14201183431952663



 23%|███████▋                         | 28492/122310 [56:48<56:45:23,  2.18s/it][A
 23%|███████▋                         | 28494/122310 [56:49<52:00:32,  2.00s/it][A
 23%|███████▋                         | 28498/122310 [56:50<41:30:04,  1.59s/it][A
 23%|███████▋                         | 28503/122310 [56:51<30:46:30,  1.18s/it][A
 23%|███████▋                         | 28515/122310 [56:51<16:22:01,  1.59it/s][A
 23%|███████▋                         | 28521/122310 [56:52<12:56:00,  2.01it/s][A
 23%|███████▉                          | 28533/122310 [56:53<8:06:02,  3.22it/s][A
 23%|███████▉                          | 28535/122310 [56:54<8:16:56,  3.15it/s][A
 23%|███████▉                          | 28549/122310 [56:54<4:59:33,  5.22it/s][A
 23%|███████▉                          | 28558/122310 [56:55<4:07:46,  6.31it/s][A
 23%|███████▉                          | 28563/122310 [56:56<4:04:30,  6.39it/s][A
 23%|███████▉                          | 28575/122310 [56:57<3:05:21,  8.43

step: 33220, loss: 93.78845225123438, epoch: 2



 23%|███████▉                          | 28654/122310 [57:03<2:27:28, 10.58it/s][A
 23%|███████▉                          | 28660/122310 [57:04<2:38:40,  9.84it/s][A
 23%|███████▉                          | 28672/122310 [57:05<2:12:37, 11.77it/s][A
 23%|███████▉                          | 28680/122310 [57:05<2:15:44, 11.50it/s][A
 23%|███████▉                          | 28692/122310 [57:06<2:01:06, 12.88it/s][A
 23%|███████▉                          | 28698/122310 [57:07<2:16:01, 11.47it/s][A
 23%|███████▉                          | 28707/122310 [57:08<2:13:32, 11.68it/s][A
 23%|███████▉                          | 28718/122310 [57:08<2:03:31, 12.63it/s][A
 23%|███████▉                          | 28725/122310 [57:09<2:13:34, 11.68it/s][A
 23%|███████▉                          | 28735/122310 [57:10<2:07:37, 12.22it/s][A
 23%|███████▉                          | 28741/122310 [57:11<2:22:15, 10.96it/s][A
 24%|███████▉                          | 28744/122310 [57:11<2:55:25,  8.89

step: 33240, loss: 70.64114524025845, epoch: 2



 24%|████████                          | 28815/122310 [57:18<2:42:12,  9.61it/s][A
 24%|████████                          | 28828/122310 [57:19<2:10:10, 11.97it/s][A
 24%|████████                          | 28846/122310 [57:20<1:40:06, 15.56it/s][A
 24%|████████                          | 28854/122310 [57:20<1:50:02, 14.15it/s][A
 24%|████████                          | 28860/122310 [57:21<2:06:15, 12.34it/s][A
 24%|████████                          | 28864/122310 [57:22<2:31:51, 10.26it/s][A
 24%|████████                          | 28873/122310 [57:23<2:23:38, 10.84it/s][A
 24%|████████                          | 28880/122310 [57:23<2:29:21, 10.43it/s][A
 24%|████████                          | 28890/122310 [57:24<2:17:26, 11.33it/s][A
 24%|████████                          | 28902/122310 [57:25<2:03:05, 12.65it/s][A
 24%|████████                          | 28914/122310 [57:26<1:54:04, 13.65it/s][A
 24%|████████                          | 28925/122310 [57:26<1:51:08, 14.00

step: 33260, loss: 64.91083686020283, epoch: 2



 24%|████████                          | 28995/122310 [57:33<3:05:36,  8.38it/s][A
 24%|████████                          | 28999/122310 [57:34<3:30:09,  7.40it/s][A
 24%|████████                          | 29011/122310 [57:35<2:36:18,  9.95it/s][A
 24%|████████                          | 29017/122310 [57:35<2:45:38,  9.39it/s][A
 24%|████████                          | 29018/122310 [57:36<3:42:50,  6.98it/s][A
 24%|████████                          | 29021/122310 [57:37<4:18:45,  6.01it/s][A
 24%|████████                          | 29028/122310 [57:38<3:43:16,  6.96it/s][A
 24%|████████                          | 29036/122310 [57:38<3:12:13,  8.09it/s][A
 24%|████████                          | 29043/122310 [57:39<3:03:16,  8.48it/s][A
 24%|████████                          | 29049/122310 [57:40<3:06:08,  8.35it/s][A
 24%|████████                          | 29051/122310 [57:41<3:53:38,  6.65it/s][A
 24%|████████                          | 29058/122310 [57:41<3:29:07,  7.43

step: 33280, loss: 81.98391883306398, epoch: 2



 24%|████████                          | 29126/122310 [57:48<2:16:45, 11.36it/s][A
 24%|████████                          | 29131/122310 [57:49<2:34:58, 10.02it/s][A
 24%|████████                          | 29142/122310 [57:50<2:15:38, 11.45it/s][A
 24%|████████                          | 29153/122310 [57:50<2:04:31, 12.47it/s][A
 24%|████████                          | 29166/122310 [57:51<1:51:09, 13.97it/s][A
 24%|████████                          | 29171/122310 [57:52<2:11:31, 11.80it/s][A
 24%|████████                          | 29176/122310 [57:53<2:30:43, 10.30it/s][A
 24%|████████                          | 29187/122310 [57:53<2:13:21, 11.64it/s][A
 24%|████████                          | 29189/122310 [57:54<2:53:59,  8.92it/s][A
 24%|████████                          | 29196/122310 [57:55<2:50:56,  9.08it/s][A
 24%|████████                          | 29209/122310 [57:56<2:14:37, 11.53it/s][A
 24%|████████                          | 29213/122310 [57:56<2:40:00,  9.70

step: 33300, loss: 73.9641858120139, epoch: 2



 24%|████████▏                         | 29289/122310 [58:03<2:54:59,  8.86it/s][A
 24%|████████▏                         | 29295/122310 [58:04<2:59:43,  8.63it/s][A
 24%|████████▏                         | 29316/122310 [58:05<1:46:54, 14.50it/s][A
 24%|████████▏                         | 29322/122310 [58:05<2:03:21, 12.56it/s][A
 24%|████████▏                         | 29331/122310 [58:06<2:04:40, 12.43it/s][A
 24%|████████▏                         | 29341/122310 [58:07<2:01:48, 12.72it/s][A
 24%|████████▏                         | 29344/122310 [58:08<2:33:20, 10.10it/s][A
 24%|████████▏                         | 29354/122310 [58:08<2:20:13, 11.05it/s][A
 24%|████████▏                         | 29356/122310 [58:09<3:00:33,  8.58it/s][A
 24%|████████▏                         | 29367/122310 [58:10<2:28:28, 10.43it/s][A
 24%|████████▏                         | 29378/122310 [58:11<2:12:20, 11.70it/s][A
 24%|████████▏                         | 29390/122310 [58:11<1:59:02, 13.01

step: 33320, loss: 70.6929294395792, epoch: 2



 24%|████████▏                         | 29462/122310 [58:18<2:21:06, 10.97it/s][A
 24%|████████▏                         | 29468/122310 [58:19<2:32:27, 10.15it/s][A
 24%|████████▏                         | 29476/122310 [58:20<2:30:30, 10.28it/s][A
 24%|████████▏                         | 29486/122310 [58:20<2:18:00, 11.21it/s][A
 24%|████████▏                         | 29500/122310 [58:21<1:55:17, 13.42it/s][A
 24%|████████▏                         | 29502/122310 [58:22<2:31:00, 10.24it/s][A
 24%|████████▏                         | 29506/122310 [58:23<2:55:31,  8.81it/s][A
 24%|████████▏                         | 29520/122310 [58:24<2:50:19,  9.08it/s][A
 24%|████████▏                         | 29526/122310 [58:25<2:54:56,  8.84it/s][A
 24%|████████▏                         | 29537/122310 [58:26<2:29:46, 10.32it/s][A
 24%|████████▏                         | 29542/122310 [58:26<2:45:46,  9.33it/s][A
 24%|████████▏                         | 29560/122310 [58:27<1:54:54, 13.45

step: 33340, loss: 72.81866142834672, epoch: 2



 24%|████████▏                         | 29637/122310 [58:33<2:13:56, 11.53it/s][A
 24%|████████▏                         | 29650/122310 [58:34<1:55:55, 13.32it/s][A
 24%|████████▏                         | 29657/122310 [58:34<2:07:02, 12.16it/s][A
 24%|████████▏                         | 29671/122310 [58:35<1:49:29, 14.10it/s][A
 24%|████████▎                         | 29680/122310 [58:36<1:54:18, 13.51it/s][A
 24%|████████▎                         | 29687/122310 [58:37<2:05:43, 12.28it/s][A
 24%|████████▎                         | 29700/122310 [58:37<1:51:30, 13.84it/s][A
 24%|████████▎                         | 29709/122310 [58:38<1:55:47, 13.33it/s][A
 24%|████████▎                         | 29716/122310 [58:39<2:07:08, 12.14it/s][A
 24%|████████▎                         | 29723/122310 [58:40<2:17:14, 11.24it/s][A
 24%|████████▎                         | 29737/122310 [58:40<1:54:25, 13.48it/s][A
 24%|████████▎                         | 29749/122310 [58:41<1:49:32, 14.08

step: 33360, loss: 87.54267290472319, epoch: 2



 24%|████████▎                         | 29829/122310 [58:48<1:52:55, 13.65it/s][A
 24%|████████▎                         | 29836/122310 [58:49<2:04:22, 12.39it/s][A
 24%|████████▎                         | 29844/122310 [58:49<2:09:59, 11.85it/s][A
 24%|████████▎                         | 29850/122310 [58:50<2:23:53, 10.71it/s][A
 24%|████████▎                         | 29865/122310 [58:51<1:54:10, 13.50it/s][A
 24%|████████▎                         | 29872/122310 [58:52<2:05:17, 12.30it/s][A
 24%|████████▎                         | 29882/122310 [58:52<2:01:45, 12.65it/s][A
 24%|████████▎                         | 29886/122310 [58:53<2:27:25, 10.45it/s][A
 24%|████████▎                         | 29895/122310 [58:54<2:20:52, 10.93it/s][A
 24%|████████▎                         | 29901/122310 [58:55<2:33:04, 10.06it/s][A
 24%|████████▎                         | 29907/122310 [58:55<2:44:34,  9.36it/s][A
 24%|████████▎                         | 29923/122310 [58:56<1:58:45, 12.97

step: 33380, loss: 79.23336585929137, epoch: 2



 25%|████████▎                         | 29997/122310 [59:03<2:27:37, 10.42it/s][A
 25%|████████▎                         | 30011/122310 [59:04<2:00:43, 12.74it/s][A
 25%|████████▎                         | 30015/122310 [59:05<2:27:54, 10.40it/s][A
 25%|████████▎                         | 30020/122310 [59:05<2:47:59,  9.16it/s][A
 25%|████████▎                         | 30027/122310 [59:06<2:47:48,  9.17it/s][A
 25%|████████▎                         | 30033/122310 [59:07<2:54:32,  8.81it/s][A
 25%|████████▎                         | 30038/122310 [59:08<3:09:40,  8.11it/s][A
 25%|████████▎                         | 30043/122310 [59:08<3:26:21,  7.45it/s][A
 25%|████████▎                         | 30055/122310 [59:09<2:37:47,  9.74it/s][A
 25%|████████▎                         | 30063/122310 [59:10<2:35:20,  9.90it/s][A
 25%|████████▎                         | 30070/122310 [59:11<2:40:06,  9.60it/s][A
 25%|████████▎                         | 30077/122310 [59:12<2:43:42,  9.39

step: 33400, loss: 84.07072488118436, epoch: 2



 25%|████████▍                         | 30166/122310 [59:18<2:01:32, 12.63it/s][A
 25%|████████▍                         | 30171/122310 [59:19<2:21:29, 10.85it/s][A
 25%|████████▍                         | 30174/122310 [59:20<2:54:16,  8.81it/s][A
 25%|████████▍                         | 30183/122310 [59:21<2:36:35,  9.81it/s][A
 25%|████████▍                         | 30191/122310 [59:21<2:32:16, 10.08it/s][A
 25%|████████▍                         | 30200/122310 [59:22<2:23:53, 10.67it/s][A
 25%|████████▍                         | 30208/122310 [59:23<2:24:03, 10.66it/s][A
 25%|████████▍                         | 30222/122310 [59:24<1:57:42, 13.04it/s][A
 25%|████████▍                         | 30237/122310 [59:24<1:41:58, 15.05it/s][A
 25%|████████▍                         | 30243/122310 [59:25<1:59:11, 12.87it/s][A
 25%|████████▍                         | 30245/122310 [59:26<2:36:25,  9.81it/s][A
 25%|████████▍                         | 30255/122310 [59:27<2:21:39, 10.83

step: 33420, loss: 62.09707286640747, epoch: 2



 25%|████████▍                         | 30322/122310 [59:33<2:12:13, 11.60it/s][A
 25%|████████▍                         | 30331/122310 [59:34<2:10:34, 11.74it/s][A
 25%|████████▍                         | 30339/122310 [59:35<2:14:03, 11.43it/s][A
 25%|████████▍                         | 30347/122310 [59:36<2:17:29, 11.15it/s][A
 25%|████████▍                         | 30359/122310 [59:36<2:02:10, 12.54it/s][A
 25%|████████▍                         | 30373/122310 [59:37<1:46:39, 14.37it/s][A
 25%|████████▍                         | 30377/122310 [59:38<2:10:56, 11.70it/s][A
 25%|████████▍                         | 30387/122310 [59:39<2:06:08, 12.14it/s][A
 25%|████████▍                         | 30397/122310 [59:39<2:03:36, 12.39it/s][A
 25%|████████▍                         | 30406/122310 [59:40<2:04:53, 12.26it/s][A
 25%|████████▍                         | 30419/122310 [59:41<1:51:07, 13.78it/s][A
 25%|████████▍                         | 30425/122310 [59:42<2:07:05, 12.05

step: 33440, loss: 75.02134594620969, epoch: 2



 25%|████████▍                         | 30502/122310 [59:48<1:58:56, 12.86it/s][A
 25%|████████▍                         | 30510/122310 [59:49<2:05:11, 12.22it/s][A
 25%|████████▍                         | 30521/122310 [59:50<1:58:27, 12.91it/s][A
 25%|████████▍                         | 30528/122310 [59:51<2:09:15, 11.83it/s][A
 25%|████████▍                         | 30544/122310 [59:51<1:44:18, 14.66it/s][A
 25%|████████▍                         | 30552/122310 [59:52<1:55:30, 13.24it/s][A
 25%|████████▍                         | 30558/122310 [59:53<2:17:08, 11.15it/s][A
 25%|████████▍                         | 30575/122310 [59:54<1:49:01, 14.02it/s][A
 25%|████████▌                         | 30581/122310 [59:55<2:06:05, 12.13it/s][A
 25%|████████▌                         | 30597/122310 [59:56<1:46:21, 14.37it/s][A
 25%|████████▌                         | 30609/122310 [59:56<1:44:41, 14.60it/s][A
 25%|████████▌                         | 30630/122310 [59:57<1:23:55, 18.21

step: 33460, loss: 72.77615416593112, epoch: 2



 25%|████████                        | 30713/122310 [1:00:05<2:39:17,  9.58it/s][A
 25%|████████                        | 30717/122310 [1:00:05<3:02:09,  8.38it/s][A
 25%|████████                        | 30728/122310 [1:00:06<2:34:15,  9.89it/s][A
 25%|████████                        | 30737/122310 [1:00:07<2:26:53, 10.39it/s][A
 25%|████████                        | 30744/122310 [1:00:08<2:30:30, 10.14it/s][A
 25%|████████                        | 30746/122310 [1:00:08<3:11:14,  7.98it/s][A
 25%|████████                        | 30754/122310 [1:00:09<2:53:59,  8.77it/s][A
 25%|████████                        | 30762/122310 [1:00:10<2:43:13,  9.35it/s][A
 25%|████████                        | 30774/122310 [1:00:11<2:16:48, 11.15it/s][A
 25%|████████                        | 30781/122310 [1:00:12<2:24:56, 10.52it/s][A
 25%|████████                        | 30793/122310 [1:00:12<2:05:36, 12.14it/s][A
 25%|████████                        | 30800/122310 [1:00:13<2:14:19, 11.35

step: 33480, loss: 79.04972434334904, epoch: 2



 25%|████████                        | 30867/122310 [1:00:20<2:55:39,  8.68it/s][A
 25%|████████                        | 30872/122310 [1:00:21<3:08:18,  8.09it/s][A
 25%|████████                        | 30877/122310 [1:00:21<3:16:50,  7.74it/s][A
 25%|████████                        | 30886/122310 [1:00:22<2:48:15,  9.06it/s][A
 25%|████████                        | 30894/122310 [1:00:23<2:39:30,  9.55it/s][A
 25%|████████                        | 30903/122310 [1:00:24<2:30:26, 10.13it/s][A
 25%|████████                        | 30919/122310 [1:00:24<1:53:04, 13.47it/s][A
 25%|████████                        | 30926/122310 [1:00:25<2:04:23, 12.24it/s][A
 25%|████████                        | 30940/122310 [1:00:26<1:47:18, 14.19it/s][A
 25%|████████                        | 30950/122310 [1:00:27<1:49:39, 13.88it/s][A
 25%|████████                        | 30959/122310 [1:00:27<1:53:57, 13.36it/s][A
 25%|████████                        | 30970/122310 [1:00:28<1:50:27, 13.78

step: 33500, loss: 86.74040492551336, epoch: 2



 25%|████████▏                       | 31075/122310 [1:00:35<1:45:11, 14.46it/s][A
 25%|████████▏                       | 31079/122310 [1:00:36<2:11:06, 11.60it/s][A
 25%|████████▏                       | 31081/122310 [1:00:36<2:48:49,  9.01it/s][A
 25%|████████▏                       | 31087/122310 [1:00:37<2:53:57,  8.74it/s][A
 25%|████████▏                       | 31097/122310 [1:00:38<2:29:13, 10.19it/s][A
 25%|████████▏                       | 31110/122310 [1:00:38<2:03:01, 12.36it/s][A
 25%|████████▏                       | 31116/122310 [1:00:39<2:17:08, 11.08it/s][A
 25%|████████▏                       | 31125/122310 [1:00:40<2:12:51, 11.44it/s][A
 25%|████████▏                       | 31136/122310 [1:00:41<2:06:00, 12.06it/s][A
 25%|████████▏                       | 31143/122310 [1:00:42<2:14:54, 11.26it/s][A
 25%|████████▏                       | 31158/122310 [1:00:42<1:53:29, 13.39it/s][A
 25%|████████▏                       | 31169/122310 [1:00:43<1:50:17, 13.77

step: 33520, loss: 67.42247121161257, epoch: 2



 26%|████████▏                       | 31216/122310 [1:00:50<3:58:22,  6.37it/s][A
 26%|████████▏                       | 31220/122310 [1:00:51<4:09:27,  6.09it/s][A
 26%|████████▏                       | 31232/122310 [1:00:51<2:46:39,  9.11it/s][A
 26%|████████▏                       | 31250/122310 [1:00:52<1:51:23, 13.62it/s][A
 26%|████████▏                       | 31254/122310 [1:00:53<2:15:36, 11.19it/s][A
 26%|████████▏                       | 31258/122310 [1:00:54<2:40:46,  9.44it/s][A
 26%|████████▏                       | 31261/122310 [1:00:54<3:13:53,  7.83it/s][A
 26%|████████▏                       | 31271/122310 [1:00:55<2:39:36,  9.51it/s][A
 26%|████████▏                       | 31282/122310 [1:00:56<2:17:14, 11.05it/s][A
 26%|████████▏                       | 31284/122310 [1:00:57<2:56:40,  8.59it/s][A
 26%|████████▏                       | 31285/122310 [1:00:57<3:56:31,  6.41it/s][A
 26%|████████▏                       | 31295/122310 [1:00:58<2:57:49,  8.53

step: 33540, loss: 93.66867671979463, epoch: 2



 26%|████████▏                       | 31387/122310 [1:01:05<1:58:07, 12.83it/s][A
 26%|████████▏                       | 31394/122310 [1:01:05<2:07:49, 11.85it/s][A
 26%|████████▏                       | 31407/122310 [1:01:06<1:51:37, 13.57it/s][A
 26%|████████▏                       | 31425/122310 [1:01:07<1:30:05, 16.81it/s][A
 26%|████████▏                       | 31431/122310 [1:01:08<1:46:36, 14.21it/s][A
 26%|████████▏                       | 31440/122310 [1:01:08<1:51:36, 13.57it/s][A
 26%|████████▏                       | 31446/122310 [1:01:09<2:07:07, 11.91it/s][A
 26%|████████▏                       | 31456/122310 [1:01:10<2:02:10, 12.39it/s][A
 26%|████████▏                       | 31468/122310 [1:01:11<1:51:48, 13.54it/s][A
 26%|████████▏                       | 31475/122310 [1:01:11<2:03:08, 12.29it/s][A
 26%|████████▏                       | 31481/122310 [1:01:12<2:18:34, 10.92it/s][A
 26%|████████▏                       | 31493/122310 [1:01:13<2:02:25, 12.36

step: 33560, loss: 82.515338910253, epoch: 2



 26%|████████▎                       | 31589/122310 [1:01:20<1:40:33, 15.04it/s][A
 26%|████████▎                       | 31600/122310 [1:01:21<1:40:54, 14.98it/s][A
 26%|████████▎                       | 31607/122310 [1:01:21<1:54:11, 13.24it/s][A
 26%|████████▎                       | 31619/122310 [1:01:22<1:48:51, 13.89it/s][A
 26%|████████▎                       | 31623/122310 [1:01:23<2:16:02, 11.11it/s][A
 26%|████████▎                       | 31629/122310 [1:01:24<2:33:41,  9.83it/s][A
 26%|████████▎                       | 31634/122310 [1:01:25<2:50:22,  8.87it/s][A
 26%|████████▎                       | 31642/122310 [1:01:25<2:41:32,  9.35it/s][A
 26%|████████▎                       | 31649/122310 [1:01:26<2:43:40,  9.23it/s][A
 26%|████████▎                       | 31650/122310 [1:01:27<3:39:56,  6.87it/s][A
 26%|████████▎                       | 31657/122310 [1:01:28<3:20:28,  7.54it/s][A
 26%|████████▎                       | 31663/122310 [1:01:28<3:17:04,  7.67

step: 33580, loss: 85.2786390213522, epoch: 2



 26%|████████▎                       | 31734/122310 [1:01:35<2:30:59, 10.00it/s][A
 26%|████████▎                       | 31744/122310 [1:01:36<2:19:44, 10.80it/s][A
 26%|████████▎                       | 31758/122310 [1:01:37<1:57:38, 12.83it/s][A
 26%|████████▎                       | 31762/122310 [1:01:38<2:23:51, 10.49it/s][A
 26%|████████▎                       | 31771/122310 [1:01:39<2:20:39, 10.73it/s][A
 26%|████████▎                       | 31782/122310 [1:01:39<2:09:07, 11.68it/s][A
 26%|████████▎                       | 31788/122310 [1:01:40<2:23:57, 10.48it/s][A
 26%|████████▎                       | 31800/122310 [1:01:41<2:07:05, 11.87it/s][A
 26%|████████▎                       | 31808/122310 [1:01:42<2:12:47, 11.36it/s][A
 26%|████████▎                       | 31818/122310 [1:01:43<2:07:27, 11.83it/s][A
 26%|████████▎                       | 31825/122310 [1:01:43<2:15:44, 11.11it/s][A
 26%|████████▎                       | 31836/122310 [1:01:44<2:04:09, 12.14

step: 33600, loss: 92.17120015643624, epoch: 2
sim1 and sim2 are 0.5212279338364668, 0.21108878760656768
cosine of pred and queen: 0.2752879293264117
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: waiting
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: tickling
Actual: mahara


 26%|████████▎                       | 31908/122310 [1:02:03<1:49:36, 13.75it/s][A

Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: spain
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: electricity
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: statement
Actual: denmark:danish::germany:german, pred: europe
Accuracy is 0.4
Actual: walk:walks::vanish:vani


 26%|████████                       | 31927/122310 [1:03:08<46:30:57,  1.85s/it][A

Actual: india:rupee::denmark:krone, pred: spain
Accuracy is 0.15384615384615385



 26%|████████                       | 31933/122310 [1:03:09<39:51:21,  1.59s/it][A
 26%|████████                       | 31944/122310 [1:03:10<28:19:08,  1.13s/it][A
 26%|████████                       | 31953/122310 [1:03:11<21:28:57,  1.17it/s][A
 26%|████████                       | 31958/122310 [1:03:11<18:28:29,  1.36it/s][A
 26%|████████                       | 31963/122310 [1:03:12<15:36:46,  1.61it/s][A
 26%|████████▎                       | 31977/122310 [1:03:13<9:18:47,  2.69it/s][A
 26%|████████▎                       | 31993/122310 [1:03:14<5:53:45,  4.26it/s][A
 26%|████████▎                       | 32006/122310 [1:03:14<4:27:10,  5.63it/s][A
 26%|████████▍                       | 32018/122310 [1:03:15<3:35:13,  6.99it/s][A
 26%|████████▍                       | 32028/122310 [1:03:16<3:08:41,  7.97it/s][A
 26%|████████▍                       | 32035/122310 [1:03:17<3:02:41,  8.24it/s][A
 26%|████████▍                       | 32040/122310 [1:03:17<3:10:03,  7.92

step: 33620, loss: 70.90402558535439, epoch: 2



 26%|████████▍                       | 32113/122310 [1:03:24<3:08:49,  7.96it/s][A
 26%|████████▍                       | 32119/122310 [1:03:25<3:09:03,  7.95it/s][A
 26%|████████▍                       | 32122/122310 [1:03:26<3:36:21,  6.95it/s][A
 26%|████████▍                       | 32131/122310 [1:03:26<3:02:26,  8.24it/s][A
 26%|████████▍                       | 32132/122310 [1:03:27<3:55:16,  6.39it/s][A
 26%|████████▍                       | 32141/122310 [1:03:28<3:09:35,  7.93it/s][A
 26%|████████▍                       | 32149/122310 [1:03:29<2:52:01,  8.74it/s][A
 26%|████████▍                       | 32163/122310 [1:03:29<2:09:02, 11.64it/s][A
 26%|████████▍                       | 32177/122310 [1:03:30<1:49:01, 13.78it/s][A
 26%|████████▍                       | 32185/122310 [1:03:31<1:57:42, 12.76it/s][A
 26%|████████▍                       | 32194/122310 [1:03:32<2:00:14, 12.49it/s][A
 26%|████████▍                       | 32209/122310 [1:03:32<1:41:41, 14.77

step: 33640, loss: 72.09040513179654, epoch: 2



 26%|████████▍                       | 32273/122310 [1:03:38<2:10:47, 11.47it/s][A
 26%|████████▍                       | 32281/122310 [1:03:39<2:15:16, 11.09it/s][A
 26%|████████▍                       | 32286/122310 [1:03:40<2:33:02,  9.80it/s][A
 26%|████████▍                       | 32301/122310 [1:03:41<1:57:03, 12.81it/s][A
 26%|████████▍                       | 32304/122310 [1:03:41<2:27:33, 10.17it/s][A
 26%|████████▍                       | 32307/122310 [1:03:42<3:03:21,  8.18it/s][A
 26%|████████▍                       | 32316/122310 [1:03:43<2:40:44,  9.33it/s][A
 26%|████████▍                       | 32328/122310 [1:03:44<2:11:35, 11.40it/s][A
 26%|████████▍                       | 32333/122310 [1:03:44<2:31:50,  9.88it/s][A
 26%|████████▍                       | 32349/122310 [1:03:45<1:52:58, 13.27it/s][A
 26%|████████▍                       | 32356/122310 [1:03:46<2:08:47, 11.64it/s][A
 26%|████████▍                       | 32359/122310 [1:03:47<2:39:38,  9.39

step: 33660, loss: 79.90238785539422, epoch: 2



 27%|████████▍                       | 32439/122310 [1:03:53<1:43:25, 14.48it/s][A
 27%|████████▍                       | 32445/122310 [1:03:54<1:59:04, 12.58it/s][A
 27%|████████▍                       | 32450/122310 [1:03:55<2:17:38, 10.88it/s][A
 27%|████████▍                       | 32463/122310 [1:03:56<1:55:39, 12.95it/s][A
 27%|████████▍                       | 32467/122310 [1:03:56<2:19:48, 10.71it/s][A
 27%|████████▍                       | 32469/122310 [1:03:57<3:00:02,  8.32it/s][A
 27%|████████▍                       | 32475/122310 [1:03:58<2:59:49,  8.33it/s][A
 27%|████████▍                       | 32482/122310 [1:03:59<2:52:20,  8.69it/s][A
 27%|████████▌                       | 32490/122310 [1:03:59<2:39:50,  9.37it/s][A
 27%|████████▌                       | 32498/122310 [1:04:00<2:31:38,  9.87it/s][A
 27%|████████▌                       | 32513/122310 [1:04:01<1:54:33, 13.06it/s][A
 27%|████████▌                       | 32522/122310 [1:04:01<1:56:30, 12.84

step: 33680, loss: 49.58000318404054, epoch: 2



 27%|████████▌                       | 32597/122310 [1:04:08<2:33:22,  9.75it/s][A
 27%|████████▌                       | 32602/122310 [1:04:09<2:48:58,  8.85it/s][A
 27%|████████▌                       | 32610/122310 [1:04:09<2:38:09,  9.45it/s][A
 27%|████████▌                       | 32616/122310 [1:04:10<2:52:29,  8.67it/s][A
 27%|████████▌                       | 32619/122310 [1:04:11<3:25:04,  7.29it/s][A
 27%|████████▌                       | 32620/122310 [1:04:12<4:37:25,  5.39it/s][A
 27%|████████▌                       | 32623/122310 [1:04:13<5:00:33,  4.97it/s][A
 27%|████████▌                       | 32634/122310 [1:04:13<3:12:06,  7.78it/s][A
 27%|████████▌                       | 32645/122310 [1:04:14<2:32:17,  9.81it/s][A
 27%|████████▌                       | 32656/122310 [1:04:15<2:12:31, 11.28it/s][A
 27%|████████▌                       | 32663/122310 [1:04:16<2:20:00, 10.67it/s][A
 27%|████████▌                       | 32666/122310 [1:04:16<2:55:06,  8.53

step: 33700, loss: 79.08435413192845, epoch: 2



 27%|████████▌                       | 32731/122310 [1:04:23<2:25:08, 10.29it/s][A
 27%|████████▌                       | 32743/122310 [1:04:24<2:04:25, 12.00it/s][A
 27%|████████▌                       | 32748/122310 [1:04:25<2:23:11, 10.42it/s][A
 27%|████████▌                       | 32754/122310 [1:04:26<2:32:53,  9.76it/s][A
 27%|████████▌                       | 32767/122310 [1:04:26<2:03:53, 12.05it/s][A
 27%|████████▌                       | 32781/122310 [1:04:27<1:45:50, 14.10it/s][A
 27%|████████▌                       | 32789/122310 [1:04:28<1:54:07, 13.07it/s][A
 27%|████████▌                       | 32792/122310 [1:04:29<3:06:11,  8.01it/s][A
 27%|████████▌                       | 32801/122310 [1:04:30<2:45:04,  9.04it/s][A
 27%|████████▌                       | 32814/122310 [1:04:31<2:13:26, 11.18it/s][A
 27%|████████▌                       | 32820/122310 [1:04:32<2:23:57, 10.36it/s][A
 27%|████████▌                       | 32834/122310 [1:04:32<1:57:19, 12.71

step: 33720, loss: 120.6681710595149, epoch: 2



 27%|████████▌                       | 32908/122310 [1:04:38<1:51:52, 13.32it/s][A
 27%|████████▌                       | 32916/122310 [1:04:39<1:58:29, 12.57it/s][A
 27%|████████▌                       | 32923/122310 [1:04:40<2:07:41, 11.67it/s][A
 27%|████████▌                       | 32930/122310 [1:04:40<2:15:22, 11.00it/s][A
 27%|████████▌                       | 32936/122310 [1:04:41<2:27:54, 10.07it/s][A
 27%|████████▌                       | 32941/122310 [1:04:42<2:42:54,  9.14it/s][A
 27%|████████▌                       | 32951/122310 [1:04:43<2:22:29, 10.45it/s][A
 27%|████████▌                       | 32957/122310 [1:04:43<2:32:22,  9.77it/s][A
 27%|████████▌                       | 32964/122310 [1:04:44<2:33:37,  9.69it/s][A
 27%|████████▋                       | 32976/122310 [1:04:45<2:07:35, 11.67it/s][A
 27%|████████▋                       | 32980/122310 [1:04:46<2:32:25,  9.77it/s][A
 27%|████████▋                       | 32991/122310 [1:04:46<2:11:44, 11.30

step: 33740, loss: 94.57716850556258, epoch: 2



 27%|████████▋                       | 33047/122310 [1:04:53<3:40:02,  6.76it/s][A
 27%|████████▋                       | 33054/122310 [1:04:54<3:15:26,  7.61it/s][A
 27%|████████▋                       | 33059/122310 [1:04:54<3:22:23,  7.35it/s][A
 27%|████████▋                       | 33066/122310 [1:04:55<3:05:20,  8.02it/s][A
 27%|████████▋                       | 33074/122310 [1:04:56<2:47:34,  8.88it/s][A
 27%|████████▋                       | 33078/122310 [1:04:57<3:09:29,  7.85it/s][A
 27%|████████▋                       | 33085/122310 [1:04:57<2:58:09,  8.35it/s][A
 27%|████████▋                       | 33094/122310 [1:04:58<2:35:57,  9.53it/s][A
 27%|████████▋                       | 33103/122310 [1:04:59<2:23:42, 10.35it/s][A
 27%|████████▋                       | 33115/122310 [1:05:00<2:02:13, 12.16it/s][A
 27%|████████▋                       | 33131/122310 [1:05:00<1:38:55, 15.03it/s][A
 27%|████████▋                       | 33133/122310 [1:05:01<2:11:09, 11.33

step: 33760, loss: 81.87024746355287, epoch: 2



 27%|████████▋                       | 33200/122310 [1:05:08<2:17:52, 10.77it/s][A
 27%|████████▋                       | 33212/122310 [1:05:08<1:59:41, 12.41it/s][A
 27%|████████▋                       | 33218/122310 [1:05:09<2:12:47, 11.18it/s][A
 27%|████████▋                       | 33222/122310 [1:05:10<2:37:20,  9.44it/s][A
 27%|████████▋                       | 33227/122310 [1:05:11<2:51:19,  8.67it/s][A
 27%|████████▋                       | 33243/122310 [1:05:11<1:58:27, 12.53it/s][A
 27%|████████▋                       | 33250/122310 [1:05:12<2:07:05, 11.68it/s][A
 27%|████████▋                       | 33255/122310 [1:05:13<2:25:20, 10.21it/s][A
 27%|████████▋                       | 33274/122310 [1:05:14<1:39:28, 14.92it/s][A
 27%|████████▋                       | 33294/122310 [1:05:14<1:19:57, 18.55it/s][A
 27%|████████▋                       | 33299/122310 [1:05:15<1:38:22, 15.08it/s][A
 27%|████████▋                       | 33313/122310 [1:05:16<1:31:05, 16.28

step: 33780, loss: 78.18904479087115, epoch: 2



 27%|████████▋                       | 33391/122310 [1:05:22<1:39:58, 14.82it/s][A
 27%|████████▋                       | 33405/122310 [1:05:23<1:32:13, 16.07it/s][A
 27%|████████▋                       | 33414/122310 [1:05:24<1:39:24, 14.90it/s][A
 27%|████████▋                       | 33424/122310 [1:05:25<1:42:00, 14.52it/s][A
 27%|████████▋                       | 33430/122310 [1:05:25<1:57:39, 12.59it/s][A
 27%|████████▋                       | 33435/122310 [1:05:26<2:16:17, 10.87it/s][A
 27%|████████▋                       | 33442/122310 [1:05:27<2:21:40, 10.45it/s][A
 27%|████████▊                       | 33452/122310 [1:05:28<2:10:02, 11.39it/s][A
 27%|████████▊                       | 33456/122310 [1:05:28<2:33:47,  9.63it/s][A
 27%|████████▊                       | 33465/122310 [1:05:29<2:22:11, 10.41it/s][A
 27%|████████▊                       | 33471/122310 [1:05:30<2:32:30,  9.71it/s][A
 27%|████████▊                       | 33482/122310 [1:05:31<2:11:18, 11.27

step: 33800, loss: 68.51372490019297, epoch: 2



 27%|████████▊                       | 33571/122310 [1:05:37<2:03:57, 11.93it/s][A
 27%|████████▊                       | 33586/122310 [1:05:38<1:42:44, 14.39it/s][A
 27%|████████▊                       | 33591/122310 [1:05:39<2:01:45, 12.14it/s][A
 27%|████████▊                       | 33610/122310 [1:05:39<1:31:14, 16.20it/s][A
 27%|████████▊                       | 33619/122310 [1:05:42<3:06:40,  7.92it/s][A
 27%|████████▊                       | 33626/122310 [1:05:43<3:00:12,  8.20it/s][A
 28%|████████▊                       | 33636/122310 [1:05:44<2:38:23,  9.33it/s][A
 28%|████████▊                       | 33645/122310 [1:05:44<2:27:36, 10.01it/s][A
 28%|████████▊                       | 33656/122310 [1:05:45<2:12:16, 11.17it/s][A
 28%|████████▊                       | 33658/122310 [1:05:46<2:44:01,  9.01it/s][A
 28%|████████▊                       | 33662/122310 [1:05:47<3:02:59,  8.07it/s][A
 28%|████████▊                       | 33671/122310 [1:05:47<2:39:46,  9.25

step: 33820, loss: 66.49278007351754, epoch: 2



 28%|████████▊                       | 33726/122310 [1:05:52<2:00:19, 12.27it/s][A
 28%|████████▊                       | 33738/122310 [1:05:53<1:49:41, 13.46it/s][A
 28%|████████▊                       | 33754/122310 [1:05:53<1:32:42, 15.92it/s][A
 28%|████████▊                       | 33762/122310 [1:05:54<1:42:59, 14.33it/s][A
 28%|████████▊                       | 33772/122310 [1:05:55<1:44:24, 14.13it/s][A
 28%|████████▊                       | 33775/122310 [1:05:56<2:13:15, 11.07it/s][A
 28%|████████▊                       | 33780/122310 [1:05:56<2:30:24,  9.81it/s][A
 28%|████████▊                       | 33787/122310 [1:05:57<2:31:56,  9.71it/s][A
 28%|████████▊                       | 33798/122310 [1:05:58<2:10:36, 11.30it/s][A
 28%|████████▊                       | 33806/122310 [1:05:58<2:12:30, 11.13it/s][A
 28%|████████▊                       | 33819/122310 [1:05:59<1:52:32, 13.10it/s][A
 28%|████████▊                       | 33824/122310 [1:06:00<2:11:26, 11.22

step: 33840, loss: 68.56167624665937, epoch: 2



 28%|████████▊                       | 33900/122310 [1:06:07<1:52:25, 13.11it/s][A
 28%|████████▊                       | 33908/122310 [1:06:07<1:58:25, 12.44it/s][A
 28%|████████▉                       | 33923/122310 [1:06:08<1:39:21, 14.83it/s][A
 28%|████████▉                       | 33940/122310 [1:06:09<1:24:59, 17.33it/s][A
 28%|████████▉                       | 33951/122310 [1:06:10<1:30:07, 16.34it/s][A
 28%|████████▉                       | 33958/122310 [1:06:10<1:44:33, 14.08it/s][A
 28%|████████▉                       | 33965/122310 [1:06:11<1:56:48, 12.61it/s][A
 28%|████████▉                       | 33971/122310 [1:06:12<2:12:12, 11.14it/s][A
 28%|████████▉                       | 33981/122310 [1:06:13<2:05:32, 11.73it/s][A
 28%|████████▉                       | 33993/122310 [1:06:13<1:54:04, 12.90it/s][A
 28%|████████▉                       | 34000/122310 [1:06:14<2:05:14, 11.75it/s][A
 28%|████████▉                       | 34010/122310 [1:06:15<2:01:07, 12.15

step: 33860, loss: 80.60150361329164, epoch: 2



 28%|████████▉                       | 34094/122310 [1:06:22<1:55:55, 12.68it/s][A
 28%|████████▉                       | 34107/122310 [1:06:23<1:45:44, 13.90it/s][A
 28%|████████▉                       | 34117/122310 [1:06:23<1:47:51, 13.63it/s][A
 28%|████████▉                       | 34133/122310 [1:06:24<1:33:15, 15.76it/s][A
 28%|████████▉                       | 34138/122310 [1:06:25<1:53:13, 12.98it/s][A
 28%|████████▉                       | 34143/122310 [1:06:26<2:12:55, 11.05it/s][A
 28%|████████▉                       | 34151/122310 [1:06:26<2:15:05, 10.88it/s][A
 28%|████████▉                       | 34161/122310 [1:06:27<2:07:47, 11.50it/s][A
 28%|████████▉                       | 34166/122310 [1:06:28<2:27:26,  9.96it/s][A
 28%|████████▉                       | 34169/122310 [1:06:29<2:58:45,  8.22it/s][A
 28%|████████▉                       | 34188/122310 [1:06:30<1:51:24, 13.18it/s][A
 28%|████████▉                       | 34198/122310 [1:06:30<1:51:50, 13.13

step: 33880, loss: 77.21390446832233, epoch: 2



 28%|████████▉                       | 34275/122310 [1:06:37<2:11:14, 11.18it/s][A
 28%|████████▉                       | 34279/122310 [1:06:39<3:20:31,  7.32it/s][A
 28%|████████▉                       | 34280/122310 [1:06:39<4:09:24,  5.88it/s][A
 28%|████████▉                       | 34286/122310 [1:06:40<3:46:43,  6.47it/s][A
 28%|████████▉                       | 34296/122310 [1:06:41<2:55:05,  8.38it/s][A
 28%|████████▉                       | 34307/122310 [1:06:42<2:23:52, 10.19it/s][A
 28%|████████▉                       | 34318/122310 [1:06:42<2:06:39, 11.58it/s][A
 28%|████████▉                       | 34322/122310 [1:06:43<2:29:30,  9.81it/s][A
 28%|████████▉                       | 34329/122310 [1:06:44<2:30:35,  9.74it/s][A
 28%|████████▉                       | 34341/122310 [1:06:45<2:05:38, 11.67it/s][A
 28%|████████▉                       | 34356/122310 [1:06:45<1:42:55, 14.24it/s][A
 28%|████████▉                       | 34361/122310 [1:06:46<2:01:48, 12.03

step: 33900, loss: 65.82284012418982, epoch: 2



 28%|█████████                       | 34420/122310 [1:06:52<2:09:59, 11.27it/s][A
 28%|█████████                       | 34431/122310 [1:06:53<1:58:45, 12.33it/s][A
 28%|█████████                       | 34436/122310 [1:06:53<2:16:17, 10.75it/s][A
 28%|█████████                       | 34447/122310 [1:06:54<2:02:04, 12.00it/s][A
 28%|█████████                       | 34453/122310 [1:06:56<2:54:59,  8.37it/s][A
 28%|█████████                       | 34456/122310 [1:06:56<3:19:29,  7.34it/s][A
 28%|█████████                       | 34477/122310 [1:06:57<1:53:49, 12.86it/s][A
 28%|█████████                       | 34485/122310 [1:06:58<1:59:28, 12.25it/s][A
 28%|█████████                       | 34496/122310 [1:06:59<1:52:32, 13.01it/s][A
 28%|█████████                       | 34504/122310 [1:06:59<1:58:04, 12.39it/s][A
 28%|█████████                       | 34508/122310 [1:07:00<2:20:42, 10.40it/s][A
 28%|█████████                       | 34515/122310 [1:07:01<2:24:25, 10.13

step: 33920, loss: 75.34883345796959, epoch: 2



 28%|█████████                       | 34567/122310 [1:07:07<2:47:05,  8.75it/s][A
 28%|█████████                       | 34571/122310 [1:07:07<3:08:12,  7.77it/s][A
 28%|█████████                       | 34581/122310 [1:07:08<2:33:38,  9.52it/s][A
 28%|█████████                       | 34584/122310 [1:07:09<3:05:48,  7.87it/s][A
 28%|█████████                       | 34590/122310 [1:07:10<3:03:26,  7.97it/s][A
 28%|█████████                       | 34598/122310 [1:07:10<2:45:09,  8.85it/s][A
 28%|█████████                       | 34610/122310 [1:07:11<2:11:53, 11.08it/s][A
 28%|█████████                       | 34616/122310 [1:07:12<2:22:45, 10.24it/s][A
 28%|█████████                       | 34623/122310 [1:07:13<2:26:08, 10.00it/s][A
 28%|█████████                       | 34630/122310 [1:07:13<2:28:03,  9.87it/s][A
 28%|█████████                       | 34636/122310 [1:07:14<2:36:25,  9.34it/s][A
 28%|█████████                       | 34641/122310 [1:07:15<2:49:34,  8.62

step: 33940, loss: 84.63308491980031, epoch: 2



 28%|█████████                       | 34703/122310 [1:07:21<2:14:04, 10.89it/s][A
 28%|█████████                       | 34716/122310 [1:07:22<1:52:37, 12.96it/s][A
 28%|█████████                       | 34723/122310 [1:07:23<2:02:21, 11.93it/s][A
 28%|█████████                       | 34730/122310 [1:07:24<2:10:03, 11.22it/s][A
 28%|█████████                       | 34744/122310 [1:07:24<1:47:53, 13.53it/s][A
 28%|█████████                       | 34750/122310 [1:07:25<2:04:44, 11.70it/s][A
 28%|█████████                       | 34754/122310 [1:07:26<2:30:57,  9.67it/s][A
 28%|█████████                       | 34761/122310 [1:07:27<2:33:23,  9.51it/s][A
 28%|█████████                       | 34766/122310 [1:07:27<2:49:37,  8.60it/s][A
 28%|█████████                       | 34780/122310 [1:07:28<2:06:47, 11.51it/s][A
 28%|█████████                       | 34787/122310 [1:07:29<2:15:17, 10.78it/s][A
 28%|█████████                       | 34796/122310 [1:07:30<2:11:47, 11.07

step: 33960, loss: 107.74154516902556, epoch: 2



 29%|█████████▏                      | 34888/122310 [1:07:37<2:24:25, 10.09it/s][A
 29%|█████████▏                      | 34901/122310 [1:07:37<2:01:13, 12.02it/s][A
 29%|█████████▏                      | 34921/122310 [1:07:39<1:57:23, 12.41it/s][A
 29%|█████████▏                      | 34924/122310 [1:07:40<2:20:01, 10.40it/s][A
 29%|█████████▏                      | 34931/122310 [1:07:40<2:25:00, 10.04it/s][A
 29%|█████████▏                      | 34934/122310 [1:07:41<2:52:52,  8.42it/s][A
 29%|█████████▏                      | 34942/122310 [1:07:42<2:41:59,  8.99it/s][A
 29%|█████████▏                      | 34952/122310 [1:07:43<2:23:28, 10.15it/s][A
 29%|█████████▏                      | 34957/122310 [1:07:43<2:39:44,  9.11it/s][A
 29%|█████████▏                      | 34969/122310 [1:07:44<2:12:15, 11.01it/s][A
 29%|█████████▏                      | 34982/122310 [1:07:45<1:54:03, 12.76it/s][A
 29%|█████████▏                      | 34986/122310 [1:07:46<2:18:20, 10.52

step: 33980, loss: 80.90113363170542, epoch: 2



 29%|█████████▏                      | 35047/122310 [1:07:52<2:42:02,  8.98it/s][A
 29%|█████████▏                      | 35052/122310 [1:07:53<2:57:07,  8.21it/s][A
 29%|█████████▏                      | 35058/122310 [1:07:53<2:59:24,  8.11it/s][A
 29%|█████████▏                      | 35063/122310 [1:07:54<3:10:24,  7.64it/s][A
 29%|█████████▏                      | 35071/122310 [1:07:55<2:51:42,  8.47it/s][A
 29%|█████████▏                      | 35078/122310 [1:07:56<2:47:44,  8.67it/s][A
 29%|█████████▏                      | 35091/122310 [1:07:57<2:10:04, 11.18it/s][A
 29%|█████████▏                      | 35108/122310 [1:07:57<1:39:25, 14.62it/s][A
 29%|█████████▏                      | 35117/122310 [1:07:58<1:44:28, 13.91it/s][A
 29%|█████████▏                      | 35121/122310 [1:07:59<2:06:46, 11.46it/s][A
 29%|█████████▏                      | 35129/122310 [1:07:59<2:08:28, 11.31it/s][A
 29%|█████████▏                      | 35135/122310 [1:08:00<2:19:55, 10.38

step: 34000, loss: 67.67600800435561, epoch: 2
sim1 and sim2 are 0.5031800028516461, 0.203698859860098
cosine of pred and queen: 0.26842332921366135
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: media
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahar

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: paradis
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: karnataka
Actual: tripura:agartala::odisha:bhubaneswar, pred: shri
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar
Actual: india:rupee::d


 29%|████████▉                      | 35195/122310 [1:09:22<64:15:33,  2.66s/it][A
 29%|████████▉                      | 35206/122310 [1:09:23<42:40:31,  1.76s/it][A
 29%|████████▉                      | 35210/122310 [1:09:24<36:52:16,  1.52s/it][A
 29%|████████▉                      | 35215/122310 [1:09:25<29:46:02,  1.23s/it][A
 29%|████████▉                      | 35225/122310 [1:09:25<19:10:01,  1.26it/s][A
 29%|████████▉                      | 35231/122310 [1:09:26<15:13:20,  1.59it/s][A
 29%|████████▉                      | 35237/122310 [1:09:27<12:02:16,  2.01it/s][A
 29%|█████████▏                      | 35246/122310 [1:09:28<8:25:56,  2.87it/s][A
 29%|█████████▏                      | 35251/122310 [1:09:28<7:20:54,  3.29it/s][A
 29%|█████████▏                      | 35256/122310 [1:09:29<6:25:34,  3.76it/s][A
 29%|█████████▏                      | 35266/122310 [1:09:30<4:31:24,  5.35it/s][A
 29%|█████████▏                      | 35279/122310 [1:09:31<3:44:39,  6.46

step: 34020, loss: 106.54563733982258, epoch: 2



 29%|█████████▏                      | 35351/122310 [1:09:38<2:22:01, 10.20it/s][A
 29%|█████████▎                      | 35358/122310 [1:09:39<2:24:11, 10.05it/s][A
 29%|█████████▎                      | 35367/122310 [1:09:39<2:14:39, 10.76it/s][A
 29%|█████████▎                      | 35384/122310 [1:09:40<1:39:39, 14.54it/s][A
 29%|█████████▎                      | 35392/122310 [1:09:41<1:47:28, 13.48it/s][A
 29%|█████████▎                      | 35396/122310 [1:09:41<2:10:18, 11.12it/s][A
 29%|█████████▎                      | 35406/122310 [1:09:42<2:01:26, 11.93it/s][A
 29%|█████████▎                      | 35416/122310 [1:09:43<1:55:59, 12.49it/s][A
 29%|█████████▎                      | 35425/122310 [1:09:44<1:56:07, 12.47it/s][A
 29%|█████████▎                      | 35430/122310 [1:09:44<2:14:12, 10.79it/s][A
 29%|█████████▎                      | 35442/122310 [1:09:45<1:56:02, 12.48it/s][A
 29%|█████████▎                      | 35446/122310 [1:09:46<2:19:28, 10.38

step: 34040, loss: 101.02050038907622, epoch: 2



 29%|█████████▎                      | 35499/122310 [1:09:52<3:09:14,  7.65it/s][A
 29%|█████████▎                      | 35509/122310 [1:09:53<2:39:35,  9.07it/s][A
 29%|█████████▎                      | 35517/122310 [1:09:54<2:31:51,  9.53it/s][A
 29%|█████████▎                      | 35527/122310 [1:09:55<2:15:45, 10.65it/s][A
 29%|█████████▎                      | 35536/122310 [1:09:55<2:10:10, 11.11it/s][A
 29%|█████████▎                      | 35546/122310 [1:09:56<2:02:06, 11.84it/s][A
 29%|█████████▎                      | 35568/122310 [1:09:57<1:24:28, 17.11it/s][A
 29%|█████████▎                      | 35573/122310 [1:09:57<1:43:00, 14.03it/s][A
 29%|█████████▎                      | 35579/122310 [1:09:58<1:57:02, 12.35it/s][A
 29%|█████████▎                      | 35586/122310 [1:09:59<2:05:03, 11.56it/s][A
 29%|█████████▎                      | 35600/122310 [1:10:00<1:44:29, 13.83it/s][A
 29%|█████████▎                      | 35602/122310 [1:10:00<2:17:13, 10.53

step: 34060, loss: 72.29635674806475, epoch: 2



 29%|█████████▎                      | 35667/122310 [1:10:07<1:51:35, 12.94it/s][A
 29%|█████████▎                      | 35673/122310 [1:10:08<2:07:15, 11.35it/s][A
 29%|█████████▎                      | 35681/122310 [1:10:09<2:09:58, 11.11it/s][A
 29%|█████████▎                      | 35698/122310 [1:10:10<1:40:26, 14.37it/s][A
 29%|█████████▎                      | 35712/122310 [1:10:10<1:33:59, 15.36it/s][A
 29%|█████████▎                      | 35720/122310 [1:10:11<1:44:40, 13.79it/s][A
 29%|█████████▎                      | 35725/122310 [1:10:12<2:06:18, 11.42it/s][A
 29%|█████████▎                      | 35740/122310 [1:10:13<1:44:30, 13.81it/s][A
 29%|█████████▎                      | 35746/122310 [1:10:14<1:59:41, 12.05it/s][A
 29%|█████████▎                      | 35755/122310 [1:10:14<1:59:59, 12.02it/s][A
 29%|█████████▎                      | 35768/122310 [1:10:15<1:46:34, 13.53it/s][A
 29%|█████████▎                      | 35777/122310 [1:10:16<1:51:33, 12.93

step: 34080, loss: 72.2216260062279, epoch: 2



 29%|█████████▍                      | 35873/122310 [1:10:23<1:33:59, 15.33it/s][A
 29%|█████████▍                      | 35880/122310 [1:10:24<1:46:52, 13.48it/s][A
 29%|█████████▍                      | 35888/122310 [1:10:24<1:53:57, 12.64it/s][A
 29%|█████████▍                      | 35900/122310 [1:10:25<1:45:57, 13.59it/s][A
 29%|█████████▍                      | 35905/122310 [1:10:26<2:05:24, 11.48it/s][A
 29%|█████████▍                      | 35911/122310 [1:10:27<2:20:34, 10.24it/s][A
 29%|█████████▍                      | 35920/122310 [1:10:27<2:14:21, 10.72it/s][A
 29%|█████████▍                      | 35929/122310 [1:10:29<2:52:04,  8.37it/s][A
 29%|█████████▍                      | 35930/122310 [1:10:30<3:35:43,  6.67it/s][A
 29%|█████████▍                      | 35940/122310 [1:10:31<2:53:02,  8.32it/s][A
 29%|█████████▍                      | 35946/122310 [1:10:31<3:01:03,  7.95it/s][A
 29%|█████████▍                      | 35955/122310 [1:10:32<2:39:36,  9.02

step: 34100, loss: 67.27337125312752, epoch: 2



 29%|█████████▍                      | 36031/122310 [1:10:38<1:51:00, 12.95it/s][A
 29%|█████████▍                      | 36039/122310 [1:10:39<1:57:48, 12.20it/s][A
 29%|█████████▍                      | 36049/122310 [1:10:40<1:55:13, 12.48it/s][A
 29%|█████████▍                      | 36054/122310 [1:10:41<2:14:37, 10.68it/s][A
 29%|█████████▍                      | 36060/122310 [1:10:41<2:27:07,  9.77it/s][A
 29%|█████████▍                      | 36070/122310 [1:10:42<2:14:09, 10.71it/s][A
 30%|█████████▍                      | 36082/122310 [1:10:43<1:58:11, 12.16it/s][A
 30%|█████████▍                      | 36088/122310 [1:10:44<2:11:40, 10.91it/s][A
 30%|█████████▍                      | 36096/122310 [1:10:44<2:13:43, 10.74it/s][A
 30%|█████████▍                      | 36101/122310 [1:10:45<2:32:05,  9.45it/s][A
 30%|█████████▍                      | 36106/122310 [1:10:46<2:47:54,  8.56it/s][A
 30%|█████████▍                      | 36113/122310 [1:10:47<2:44:36,  8.73

step: 34120, loss: 74.82898149330777, epoch: 2



 30%|█████████▍                      | 36166/122310 [1:10:54<2:50:26,  8.42it/s][A
 30%|█████████▍                      | 36176/122310 [1:10:55<2:28:08,  9.69it/s][A
 30%|█████████▍                      | 36177/122310 [1:10:55<3:17:28,  7.27it/s][A
 30%|█████████▍                      | 36183/122310 [1:10:56<3:13:46,  7.41it/s][A
 30%|█████████▍                      | 36199/122310 [1:10:57<2:05:50, 11.40it/s][A
 30%|█████████▍                      | 36209/122310 [1:10:58<2:00:35, 11.90it/s][A
 30%|█████████▍                      | 36220/122310 [1:10:58<1:53:46, 12.61it/s][A
 30%|█████████▍                      | 36225/122310 [1:10:59<2:13:49, 10.72it/s][A
 30%|█████████▍                      | 36229/122310 [1:11:00<2:37:04,  9.13it/s][A
 30%|█████████▍                      | 36238/122310 [1:11:01<2:25:18,  9.87it/s][A
 30%|█████████▍                      | 36248/122310 [1:11:01<2:12:40, 10.81it/s][A
 30%|█████████▍                      | 36255/122310 [1:11:02<2:20:53, 10.18

step: 34140, loss: 94.56839940052076, epoch: 2



 30%|█████████▌                      | 36359/122310 [1:11:09<1:34:53, 15.10it/s][A
 30%|█████████▌                      | 36362/122310 [1:11:10<2:02:04, 11.74it/s][A
 30%|█████████▌                      | 36376/122310 [1:11:11<1:45:02, 13.64it/s][A
 30%|█████████▌                      | 36387/122310 [1:11:12<1:43:54, 13.78it/s][A
 30%|█████████▌                      | 36398/122310 [1:11:12<1:44:48, 13.66it/s][A
 30%|█████████▌                      | 36404/122310 [1:11:13<1:59:35, 11.97it/s][A
 30%|█████████▌                      | 36408/122310 [1:11:14<2:23:18,  9.99it/s][A
 30%|█████████▌                      | 36415/122310 [1:11:15<2:26:50,  9.75it/s][A
 30%|█████████▌                      | 36419/122310 [1:11:16<2:52:58,  8.28it/s][A
 30%|█████████▌                      | 36427/122310 [1:11:16<2:41:39,  8.85it/s][A
 30%|█████████▌                      | 36435/122310 [1:11:17<2:33:55,  9.30it/s][A
 30%|█████████▌                      | 36438/122310 [1:11:18<3:09:08,  7.57

step: 34160, loss: 80.5073396435495, epoch: 2



 30%|█████████▌                      | 36512/122310 [1:11:25<2:23:43,  9.95it/s][A
 30%|█████████▌                      | 36528/122310 [1:11:26<1:49:20, 13.07it/s][A
 30%|█████████▌                      | 36530/122310 [1:11:27<2:25:10,  9.85it/s][A
 30%|█████████▌                      | 36540/122310 [1:11:27<2:12:01, 10.83it/s][A
 30%|█████████▌                      | 36549/122310 [1:11:28<2:09:29, 11.04it/s][A
 30%|█████████▌                      | 36565/122310 [1:11:29<1:42:32, 13.94it/s][A
 30%|█████████▌                      | 36569/122310 [1:11:30<2:07:21, 11.22it/s][A
 30%|█████████▌                      | 36576/122310 [1:11:30<2:17:27, 10.40it/s][A
 30%|█████████▌                      | 36588/122310 [1:11:31<2:00:34, 11.85it/s][A
 30%|█████████▌                      | 36593/122310 [1:11:32<2:19:57, 10.21it/s][A
 30%|█████████▌                      | 36601/122310 [1:11:33<2:21:49, 10.07it/s][A
 30%|█████████▌                      | 36607/122310 [1:11:34<2:31:54,  9.40

step: 34180, loss: 76.90355203615185, epoch: 2



 30%|█████████▌                      | 36695/122310 [1:11:41<2:14:03, 10.64it/s][A
 30%|█████████▌                      | 36707/122310 [1:11:41<1:58:19, 12.06it/s][A
 30%|█████████▌                      | 36716/122310 [1:11:42<2:03:22, 11.56it/s][A
 30%|█████████▌                      | 36728/122310 [1:11:44<2:28:42,  9.59it/s][A
 30%|█████████▌                      | 36731/122310 [1:11:45<2:54:16,  8.18it/s][A
 30%|█████████▌                      | 36738/122310 [1:11:46<2:50:51,  8.35it/s][A
 30%|█████████▌                      | 36741/122310 [1:11:46<3:18:32,  7.18it/s][A
 30%|█████████▌                      | 36752/122310 [1:11:47<2:36:46,  9.10it/s][A
 30%|█████████▌                      | 36763/122310 [1:11:48<2:15:49, 10.50it/s][A
 30%|█████████▌                      | 36775/122310 [1:11:49<2:00:14, 11.86it/s][A
 30%|█████████▌                      | 36787/122310 [1:11:49<1:51:17, 12.81it/s][A
 30%|█████████▋                      | 36807/122310 [1:11:50<1:26:05, 16.55

step: 34200, loss: 67.29270936853379, epoch: 2



 30%|█████████▋                      | 36876/122310 [1:11:57<2:42:17,  8.77it/s][A
 30%|█████████▋                      | 36879/122310 [1:11:58<4:10:23,  5.69it/s][A
 30%|█████████▋                      | 36891/122310 [1:11:59<2:55:20,  8.12it/s][A
 30%|█████████▋                      | 36899/122310 [1:12:00<2:45:20,  8.61it/s][A
 30%|█████████▋                      | 36916/122310 [1:12:00<1:56:57, 12.17it/s][A
 30%|█████████▋                      | 36920/122310 [1:12:01<2:21:00, 10.09it/s][A
 30%|█████████▋                      | 36935/122310 [1:12:02<1:52:18, 12.67it/s][A
 30%|█████████▋                      | 36947/122310 [1:12:03<1:45:46, 13.45it/s][A
 30%|█████████▋                      | 36952/122310 [1:12:04<2:04:31, 11.43it/s][A
 30%|█████████▋                      | 36958/122310 [1:12:04<2:17:08, 10.37it/s][A
 30%|█████████▋                      | 36979/122310 [1:12:05<1:33:13, 15.25it/s][A
 30%|█████████▋                      | 36983/122310 [1:12:06<1:56:03, 12.25

step: 34220, loss: 83.08764112396042, epoch: 2



 30%|█████████▋                      | 37057/122310 [1:12:12<2:08:40, 11.04it/s][A
 30%|█████████▋                      | 37076/122310 [1:12:13<1:34:01, 15.11it/s][A
 30%|█████████▋                      | 37085/122310 [1:12:14<1:42:24, 13.87it/s][A
 30%|█████████▋                      | 37092/122310 [1:12:15<1:55:04, 12.34it/s][A
 30%|█████████▋                      | 37094/122310 [1:12:15<2:30:26,  9.44it/s][A
 30%|█████████▋                      | 37103/122310 [1:12:16<2:20:20, 10.12it/s][A
 30%|█████████▋                      | 37114/122310 [1:12:17<2:05:15, 11.34it/s][A
 30%|█████████▋                      | 37128/122310 [1:12:18<1:46:02, 13.39it/s][A
 30%|█████████▋                      | 37133/122310 [1:12:18<2:05:23, 11.32it/s][A
 30%|█████████▋                      | 37139/122310 [1:12:19<2:18:12, 10.27it/s][A
 30%|█████████▋                      | 37145/122310 [1:12:20<2:29:17,  9.51it/s][A
 30%|█████████▋                      | 37161/122310 [1:12:21<1:51:28, 12.73

step: 34240, loss: 81.27978542957011, epoch: 2



 30%|█████████▋                      | 37231/122310 [1:12:29<3:03:13,  7.74it/s][A
 30%|█████████▋                      | 37242/122310 [1:12:30<3:09:15,  7.49it/s][A
 30%|█████████▋                      | 37250/122310 [1:12:31<2:55:41,  8.07it/s][A
 30%|█████████▋                      | 37266/122310 [1:12:32<2:09:30, 10.94it/s][A
 30%|█████████▊                      | 37277/122310 [1:12:32<2:00:46, 11.73it/s][A
 30%|█████████▊                      | 37283/122310 [1:12:33<2:12:14, 10.72it/s][A
 30%|█████████▊                      | 37294/122310 [1:12:34<2:02:10, 11.60it/s][A
 30%|█████████▊                      | 37299/122310 [1:12:35<2:18:48, 10.21it/s][A
 31%|█████████▊                      | 37307/122310 [1:12:36<2:18:01, 10.26it/s][A
 31%|█████████▊                      | 37317/122310 [1:12:36<2:08:29, 11.02it/s][A
 31%|█████████▊                      | 37324/122310 [1:12:37<2:15:25, 10.46it/s][A
 31%|█████████▊                      | 37334/122310 [1:12:38<2:06:39, 11.18

step: 34260, loss: 81.17581722002232, epoch: 2



 31%|█████████▊                      | 37382/122310 [1:12:43<3:09:15,  7.48it/s][A
 31%|█████████▊                      | 37388/122310 [1:12:44<3:06:47,  7.58it/s][A
 31%|█████████▊                      | 37397/122310 [1:12:45<2:41:07,  8.78it/s][A
 31%|█████████▊                      | 37408/122310 [1:12:46<2:15:43, 10.43it/s][A
 31%|█████████▊                      | 37419/122310 [1:12:46<2:02:29, 11.55it/s][A
 31%|█████████▊                      | 37432/122310 [1:12:47<1:47:53, 13.11it/s][A
 31%|█████████▊                      | 37438/122310 [1:12:48<2:02:32, 11.54it/s][A
 31%|█████████▊                      | 37443/122310 [1:12:49<2:23:13,  9.88it/s][A
 31%|█████████▊                      | 37445/122310 [1:12:50<3:02:59,  7.73it/s][A
 31%|█████████▊                      | 37453/122310 [1:12:50<2:49:00,  8.37it/s][A
 31%|█████████▊                      | 37468/122310 [1:12:51<2:03:07, 11.48it/s][A
 31%|█████████▊                      | 37478/122310 [1:12:52<1:59:13, 11.86

step: 34280, loss: 96.8147915017439, epoch: 2



 31%|█████████▊                      | 37550/122310 [1:12:59<2:21:26,  9.99it/s][A
 31%|█████████▊                      | 37559/122310 [1:13:00<2:20:19, 10.07it/s][A
 31%|█████████▊                      | 37566/122310 [1:13:01<2:25:15,  9.72it/s][A
 31%|█████████▊                      | 37570/122310 [1:13:01<2:49:57,  8.31it/s][A
 31%|█████████▊                      | 37579/122310 [1:13:02<2:31:57,  9.29it/s][A
 31%|█████████▊                      | 37590/122310 [1:13:03<2:12:48, 10.63it/s][A
 31%|█████████▊                      | 37612/122310 [1:13:04<1:29:20, 15.80it/s][A
 31%|█████████▊                      | 37616/122310 [1:13:05<1:52:16, 12.57it/s][A
 31%|█████████▊                      | 37627/122310 [1:13:05<1:47:55, 13.08it/s][A
 31%|█████████▊                      | 37640/122310 [1:13:06<1:38:57, 14.26it/s][A
 31%|█████████▊                      | 37653/122310 [1:13:07<1:33:54, 15.02it/s][A
 31%|█████████▊                      | 37659/122310 [1:13:08<1:49:52, 12.84

step: 34300, loss: 76.83009775093218, epoch: 2



 31%|█████████▊                      | 37727/122310 [1:13:15<2:21:28,  9.96it/s][A
 31%|█████████▊                      | 37732/122310 [1:13:15<2:36:59,  8.98it/s][A
 31%|█████████▊                      | 37737/122310 [1:13:16<2:52:04,  8.19it/s][A
 31%|█████████▉                      | 37746/122310 [1:13:17<2:31:30,  9.30it/s][A
 31%|█████████▉                      | 37753/122310 [1:13:18<2:31:35,  9.30it/s][A
 31%|█████████▉                      | 37762/122310 [1:13:18<2:21:24,  9.96it/s][A
 31%|█████████▉                      | 37768/122310 [1:13:19<2:32:49,  9.22it/s][A
 31%|█████████▉                      | 37780/122310 [1:13:20<2:08:27, 10.97it/s][A
 31%|█████████▉                      | 37786/122310 [1:13:22<3:05:36,  7.59it/s][A
 31%|█████████▉                      | 37794/122310 [1:13:22<2:51:04,  8.23it/s][A
 31%|█████████▉                      | 37804/122310 [1:13:23<2:31:29,  9.30it/s][A
 31%|█████████▉                      | 37815/122310 [1:13:24<2:13:45, 10.53

step: 34320, loss: 92.65758847534642, epoch: 2



 31%|█████████▉                      | 37889/122310 [1:13:30<1:59:53, 11.74it/s][A
 31%|█████████▉                      | 37893/122310 [1:13:31<2:23:51,  9.78it/s][A
 31%|█████████▉                      | 37900/122310 [1:13:32<2:28:02,  9.50it/s][A
 31%|█████████▉                      | 37904/122310 [1:13:33<2:52:05,  8.17it/s][A
 31%|█████████▉                      | 37918/122310 [1:13:34<2:07:24, 11.04it/s][A
 31%|█████████▉                      | 37931/122310 [1:13:34<1:51:06, 12.66it/s][A
 31%|█████████▉                      | 37945/122310 [1:13:35<1:39:31, 14.13it/s][A
 31%|█████████▉                      | 37951/122310 [1:13:36<1:54:44, 12.25it/s][A
 31%|█████████▉                      | 37959/122310 [1:13:37<2:01:12, 11.60it/s][A
 31%|█████████▉                      | 37973/122310 [1:13:38<1:44:32, 13.45it/s][A
 31%|█████████▉                      | 37981/122310 [1:13:38<1:53:28, 12.39it/s][A
 31%|█████████▉                      | 37989/122310 [1:13:39<2:00:06, 11.70

step: 34340, loss: 72.5823227431806, epoch: 2



 31%|█████████▉                      | 38069/122310 [1:13:46<2:04:04, 11.32it/s][A
 31%|█████████▉                      | 38078/122310 [1:13:47<2:02:44, 11.44it/s][A
 31%|█████████▉                      | 38087/122310 [1:13:48<2:02:47, 11.43it/s][A
 31%|█████████▉                      | 38089/122310 [1:13:49<2:39:26,  8.80it/s][A
 31%|█████████▉                      | 38099/122310 [1:13:49<2:21:30,  9.92it/s][A
 31%|█████████▉                      | 38103/122310 [1:13:50<2:45:11,  8.50it/s][A
 31%|█████████▉                      | 38112/122310 [1:13:51<2:28:18,  9.46it/s][A
 31%|█████████▉                      | 38122/122310 [1:13:52<2:15:18, 10.37it/s][A
 31%|█████████▉                      | 38127/122310 [1:13:53<2:32:08,  9.22it/s][A
 31%|█████████▉                      | 38139/122310 [1:13:53<2:07:15, 11.02it/s][A
 31%|█████████▉                      | 38144/122310 [1:13:54<2:26:22,  9.58it/s][A
 31%|█████████▉                      | 38154/122310 [1:13:55<2:14:21, 10.44

step: 34360, loss: 84.73875168729529, epoch: 2



 31%|█████████▉                      | 38221/122310 [1:14:02<1:59:21, 11.74it/s][A
 31%|██████████                      | 38232/122310 [1:14:03<1:52:09, 12.49it/s][A
 31%|██████████                      | 38242/122310 [1:14:04<1:50:37, 12.67it/s][A
 31%|██████████                      | 38247/122310 [1:14:04<2:11:07, 10.68it/s][A
 31%|██████████                      | 38263/122310 [1:14:05<1:42:29, 13.67it/s][A
 31%|██████████                      | 38277/122310 [1:14:06<1:34:04, 14.89it/s][A
 31%|██████████                      | 38294/122310 [1:14:07<1:23:17, 16.81it/s][A
 31%|██████████                      | 38300/122310 [1:14:08<1:39:35, 14.06it/s][A
 31%|██████████                      | 38313/122310 [1:14:08<1:33:45, 14.93it/s][A
 31%|██████████                      | 38316/122310 [1:14:09<2:00:22, 11.63it/s][A
 31%|██████████                      | 38322/122310 [1:14:10<2:13:12, 10.51it/s][A
 31%|██████████                      | 38333/122310 [1:14:11<1:59:56, 11.67

step: 34380, loss: 76.16724754174749, epoch: 2



 31%|██████████                      | 38429/122310 [1:14:18<1:46:14, 13.16it/s][A
 31%|██████████                      | 38434/122310 [1:14:19<2:07:31, 10.96it/s][A
 31%|██████████                      | 38440/122310 [1:14:19<2:19:22, 10.03it/s][A
 31%|██████████                      | 38447/122310 [1:14:20<2:22:35,  9.80it/s][A
 31%|██████████                      | 38455/122310 [1:14:21<2:20:09,  9.97it/s][A
 31%|██████████                      | 38457/122310 [1:14:22<2:57:25,  7.88it/s][A
 31%|██████████                      | 38465/122310 [1:14:22<2:42:02,  8.62it/s][A
 31%|██████████                      | 38470/122310 [1:14:23<2:54:39,  8.00it/s][A
 31%|██████████                      | 38472/122310 [1:14:24<3:38:14,  6.40it/s][A
 31%|██████████                      | 38482/122310 [1:14:25<2:46:50,  8.37it/s][A
 31%|██████████                      | 38486/122310 [1:14:26<3:09:10,  7.39it/s][A
 31%|██████████                      | 38494/122310 [1:14:26<2:51:42,  8.14

step: 34400, loss: 98.20831531019729, epoch: 2
sim1 and sim2 are 0.4976637988330223, 0.2207297437092842
cosine of pred and queen: 0.21569906855202567
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: romania
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma


 32%|██████████                      | 38558/122310 [1:14:43<1:56:38, 11.97it/s][A

Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: confuse
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: sberbank
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: netherlands
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: electricity
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::m


 32%|█████████▊                     | 38563/122310 [1:15:48<67:38:08,  2.91s/it][A

Actual: india:rupee::denmark:krone, pred: netherlands
Accuracy is 0.14792899408284024



 32%|█████████▊                     | 38577/122310 [1:15:48<39:12:22,  1.69s/it][A
 32%|█████████▊                     | 38589/122310 [1:15:49<26:12:30,  1.13s/it][A
 32%|█████████▊                     | 38602/122310 [1:15:50<17:33:31,  1.32it/s][A
 32%|█████████▊                     | 38605/122310 [1:15:51<16:21:46,  1.42it/s][A
 32%|█████████▊                     | 38615/122310 [1:15:51<11:33:18,  2.01it/s][A
 32%|██████████                      | 38628/122310 [1:15:52<7:40:58,  3.03it/s][A
 32%|██████████                      | 38641/122310 [1:15:53<5:27:18,  4.26it/s][A
 32%|██████████                      | 38643/122310 [1:15:54<5:41:38,  4.08it/s][A
 32%|██████████                      | 38649/122310 [1:15:54<5:02:28,  4.61it/s][A
 32%|██████████                      | 38666/122310 [1:15:55<3:05:49,  7.50it/s][A
 32%|██████████                      | 38676/122310 [1:15:56<2:42:43,  8.57it/s][A
 32%|██████████                      | 38681/122310 [1:15:58<4:08:37,  5.61

step: 34420, loss: 85.19959813748727, epoch: 2



 32%|██████████▏                     | 38728/122310 [1:16:04<2:47:09,  8.33it/s][A
 32%|██████████▏                     | 38739/122310 [1:16:04<2:18:14, 10.08it/s][A
 32%|██████████▏                     | 38744/122310 [1:16:05<2:32:32,  9.13it/s][A
 32%|██████████▏                     | 38755/122310 [1:16:06<2:09:44, 10.73it/s][A
 32%|██████████▏                     | 38766/122310 [1:16:07<1:57:17, 11.87it/s][A
 32%|██████████▏                     | 38771/122310 [1:16:07<2:14:43, 10.33it/s][A
 32%|██████████▏                     | 38782/122310 [1:16:08<1:59:52, 11.61it/s][A
 32%|██████████▏                     | 38791/122310 [1:16:09<1:58:30, 11.75it/s][A
 32%|██████████▏                     | 38802/122310 [1:16:10<1:50:17, 12.62it/s][A
 32%|██████████▏                     | 38814/122310 [1:16:10<1:42:31, 13.57it/s][A
 32%|██████████▏                     | 38829/122310 [1:16:11<1:30:03, 15.45it/s][A
 32%|██████████▏                     | 38835/122310 [1:16:12<1:45:47, 13.15

step: 34440, loss: 73.48158516010216, epoch: 2



 32%|██████████▏                     | 38935/122310 [1:16:21<3:32:55,  6.53it/s][A
 32%|██████████▏                     | 38947/122310 [1:16:22<2:50:22,  8.16it/s][A
 32%|██████████▏                     | 38949/122310 [1:16:23<3:20:45,  6.92it/s][A
 32%|██████████▏                     | 38954/122310 [1:16:24<3:23:43,  6.82it/s][A
 32%|██████████▏                     | 38969/122310 [1:16:24<2:21:06,  9.84it/s][A
 32%|██████████▏                     | 38982/122310 [1:16:25<2:01:29, 11.43it/s][A
 32%|██████████▏                     | 38993/122310 [1:16:26<1:55:09, 12.06it/s][A
 32%|██████████▏                     | 39001/122310 [1:16:27<2:01:16, 11.45it/s][A
 32%|██████████▏                     | 39006/122310 [1:16:28<2:18:39, 10.01it/s][A
 32%|██████████▏                     | 39013/122310 [1:16:28<2:22:56,  9.71it/s][A
 32%|██████████▏                     | 39021/122310 [1:16:29<2:20:42,  9.87it/s][A
 32%|██████████▏                     | 39025/122310 [1:16:30<2:44:06,  8.46

step: 34460, loss: 85.63593622874134, epoch: 2



 32%|██████████▏                     | 39057/122310 [1:16:35<3:04:26,  7.52it/s][A
 32%|██████████▏                     | 39074/122310 [1:16:35<1:56:32, 11.90it/s][A
 32%|██████████▏                     | 39078/122310 [1:16:36<2:19:02,  9.98it/s][A
 32%|██████████▏                     | 39087/122310 [1:16:37<2:11:23, 10.56it/s][A
 32%|██████████▏                     | 39094/122310 [1:16:38<2:15:32, 10.23it/s][A
 32%|██████████▏                     | 39099/122310 [1:16:38<2:31:51,  9.13it/s][A
 32%|██████████▏                     | 39107/122310 [1:16:39<2:24:32,  9.59it/s][A
 32%|██████████▏                     | 39119/122310 [1:16:40<2:01:18, 11.43it/s][A
 32%|██████████▏                     | 39126/122310 [1:16:41<2:08:25, 10.80it/s][A
 32%|██████████▏                     | 39142/122310 [1:16:41<1:39:52, 13.88it/s][A
 32%|██████████▏                     | 39154/122310 [1:16:42<1:35:47, 14.47it/s][A
 32%|██████████▏                     | 39165/122310 [1:16:43<1:35:31, 14.51

step: 34480, loss: 82.34418703347178, epoch: 2



 32%|██████████▎                     | 39243/122310 [1:16:50<2:14:45, 10.27it/s][A
 32%|██████████▎                     | 39248/122310 [1:16:50<2:30:40,  9.19it/s][A
 32%|██████████▎                     | 39268/122310 [1:16:51<1:35:50, 14.44it/s][A
 32%|██████████▎                     | 39276/122310 [1:16:52<1:43:38, 13.35it/s][A
 32%|██████████▎                     | 39296/122310 [1:16:53<1:19:27, 17.41it/s][A
 32%|██████████▎                     | 39306/122310 [1:16:53<1:25:33, 16.17it/s][A
 32%|██████████▎                     | 39313/122310 [1:16:54<1:37:24, 14.20it/s][A
 32%|██████████▎                     | 39327/122310 [1:16:55<1:28:36, 15.61it/s][A
 32%|██████████▎                     | 39340/122310 [1:16:56<1:25:36, 16.15it/s][A
 32%|██████████▎                     | 39347/122310 [1:16:56<1:38:35, 14.02it/s][A
 32%|██████████▎                     | 39349/122310 [1:16:57<2:09:17, 10.69it/s][A
 32%|██████████▎                     | 39357/122310 [1:16:58<2:09:05, 10.71

step: 34500, loss: 80.0855299380756, epoch: 2



 32%|██████████▎                     | 39445/122310 [1:17:05<1:32:13, 14.98it/s][A
 32%|██████████▎                     | 39454/122310 [1:17:05<1:38:09, 14.07it/s][A
 32%|██████████▎                     | 39463/122310 [1:17:06<1:42:19, 13.49it/s][A
 32%|██████████▎                     | 39474/122310 [1:17:07<1:39:49, 13.83it/s][A
 32%|██████████▎                     | 39482/122310 [1:17:08<1:46:39, 12.94it/s][A
 32%|██████████▎                     | 39489/122310 [1:17:08<1:56:33, 11.84it/s][A
 32%|██████████▎                     | 39499/122310 [1:17:09<1:52:52, 12.23it/s][A
 32%|██████████▎                     | 39508/122310 [1:17:10<1:53:07, 12.20it/s][A
 32%|██████████▎                     | 39515/122310 [1:17:11<2:01:33, 11.35it/s][A
 32%|██████████▎                     | 39520/122310 [1:17:11<2:18:51,  9.94it/s][A
 32%|██████████▎                     | 39531/122310 [1:17:12<2:00:54, 11.41it/s][A
 32%|██████████▎                     | 39538/122310 [1:17:13<2:07:49, 10.79

step: 34520, loss: 77.58424088077955, epoch: 2



 32%|██████████▎                     | 39624/122310 [1:17:19<1:39:27, 13.86it/s][A
 32%|██████████▎                     | 39628/122310 [1:17:20<2:01:52, 11.31it/s][A
 32%|██████████▎                     | 39638/122310 [1:17:21<1:54:37, 12.02it/s][A
 32%|██████████▎                     | 39645/122310 [1:17:22<2:02:51, 11.21it/s][A
 32%|██████████▍                     | 39656/122310 [1:17:22<1:52:06, 12.29it/s][A
 32%|██████████▍                     | 39663/122310 [1:17:23<2:00:50, 11.40it/s][A
 32%|██████████▍                     | 39673/122310 [1:17:24<1:54:27, 12.03it/s][A
 32%|██████████▍                     | 39678/122310 [1:17:25<2:12:19, 10.41it/s][A
 32%|██████████▍                     | 39685/122310 [1:17:25<2:16:51, 10.06it/s][A
 32%|██████████▍                     | 39688/122310 [1:17:26<2:47:09,  8.24it/s][A
 32%|██████████▍                     | 39691/122310 [1:17:27<3:16:10,  7.02it/s][A
 32%|██████████▍                     | 39694/122310 [1:17:28<3:45:57,  6.09

step: 34540, loss: 84.55060193943166, epoch: 2



 33%|██████████▍                     | 39762/122310 [1:17:35<2:46:32,  8.26it/s][A
 33%|██████████▍                     | 39772/122310 [1:17:36<2:29:33,  9.20it/s][A
 33%|██████████▍                     | 39778/122310 [1:17:37<2:37:09,  8.75it/s][A
 33%|██████████▍                     | 39782/122310 [1:17:37<3:00:16,  7.63it/s][A
 33%|██████████▍                     | 39792/122310 [1:17:38<2:30:59,  9.11it/s][A
 33%|██████████▍                     | 39796/122310 [1:17:39<2:52:39,  7.97it/s][A
 33%|██████████▍                     | 39798/122310 [1:17:40<3:33:41,  6.44it/s][A
 33%|██████████▍                     | 39802/122310 [1:17:40<3:45:03,  6.11it/s][A
 33%|██████████▍                     | 39809/122310 [1:17:41<3:15:07,  7.05it/s][A
 33%|██████████▍                     | 39814/122310 [1:17:42<3:18:37,  6.92it/s][A
 33%|██████████▍                     | 39827/122310 [1:17:43<2:17:31, 10.00it/s][A
 33%|██████████▍                     | 39830/122310 [1:17:43<2:47:08,  8.22

step: 34560, loss: 70.92206604612464, epoch: 2



 33%|██████████▍                     | 39920/122310 [1:17:50<2:02:19, 11.23it/s][A
 33%|██████████▍                     | 39925/122310 [1:17:51<2:19:20,  9.85it/s][A
 33%|██████████▍                     | 39932/122310 [1:17:52<2:21:36,  9.70it/s][A
 33%|██████████▍                     | 39940/122310 [1:17:52<2:17:24,  9.99it/s][A
 33%|██████████▍                     | 39942/122310 [1:17:53<2:55:58,  7.80it/s][A
 33%|██████████▍                     | 39948/122310 [1:17:54<2:55:11,  7.84it/s][A
 33%|██████████▍                     | 39956/122310 [1:17:55<2:37:39,  8.71it/s][A
 33%|██████████▍                     | 39964/122310 [1:17:55<2:27:57,  9.28it/s][A
 33%|██████████▍                     | 39970/122310 [1:17:56<2:34:05,  8.91it/s][A
 33%|██████████▍                     | 39975/122310 [1:17:57<2:46:39,  8.23it/s][A
 33%|██████████▍                     | 39979/122310 [1:17:58<3:06:13,  7.37it/s][A
 33%|██████████▍                     | 39986/122310 [1:17:58<2:53:22,  7.91

step: 34580, loss: 75.02533574496343, epoch: 2



 33%|██████████▍                     | 40050/122310 [1:18:05<2:10:07, 10.54it/s][A
 33%|██████████▍                     | 40058/122310 [1:18:06<2:09:21, 10.60it/s][A
 33%|██████████▍                     | 40065/122310 [1:18:07<2:13:58, 10.23it/s][A
 33%|██████████▍                     | 40071/122310 [1:18:07<2:23:17,  9.57it/s][A
 33%|██████████▍                     | 40074/122310 [1:18:08<2:53:40,  7.89it/s][A
 33%|██████████▍                     | 40090/122310 [1:18:09<1:55:38, 11.85it/s][A
 33%|██████████▍                     | 40094/122310 [1:18:10<2:18:04,  9.92it/s][A
 33%|██████████▍                     | 40101/122310 [1:18:10<2:20:38,  9.74it/s][A
 33%|██████████▍                     | 40107/122310 [1:18:11<2:29:12,  9.18it/s][A
 33%|██████████▍                     | 40108/122310 [1:18:12<3:21:29,  6.80it/s][A
 33%|██████████▍                     | 40111/122310 [1:18:13<3:49:34,  5.97it/s][A
 33%|██████████▍                     | 40121/122310 [1:18:13<2:47:00,  8.20

step: 34600, loss: 117.6711362738824, epoch: 2



 33%|██████████▌                     | 40188/122310 [1:18:20<2:08:50, 10.62it/s][A
 33%|██████████▌                     | 40195/122310 [1:18:21<2:13:30, 10.25it/s][A
 33%|██████████▌                     | 40201/122310 [1:18:22<2:22:32,  9.60it/s][A
 33%|██████████▌                     | 40213/122310 [1:18:22<1:58:37, 11.53it/s][A
 33%|██████████▌                     | 40220/122310 [1:18:23<2:05:46, 10.88it/s][A
 33%|██████████▌                     | 40224/122310 [1:18:24<2:28:56,  9.19it/s][A
 33%|██████████▌                     | 40232/122310 [1:18:25<2:22:23,  9.61it/s][A
 33%|██████████▌                     | 40241/122310 [1:18:25<2:12:00, 10.36it/s][A
 33%|██████████▌                     | 40254/122310 [1:18:26<1:49:43, 12.46it/s][A
 33%|██████████▌                     | 40269/122310 [1:18:27<1:33:15, 14.66it/s][A
 33%|██████████▌                     | 40275/122310 [1:18:28<1:47:35, 12.71it/s][A
 33%|██████████▌                     | 40279/122310 [1:18:28<2:10:30, 10.48

step: 34620, loss: 85.90894968032808, epoch: 2



 33%|██████████▌                     | 40344/122310 [1:18:35<1:55:05, 11.87it/s][A
 33%|██████████▌                     | 40357/122310 [1:18:36<1:41:15, 13.49it/s][A
 33%|██████████▌                     | 40362/122310 [1:18:37<1:58:53, 11.49it/s][A
 33%|██████████▌                     | 40374/122310 [1:18:37<1:45:59, 12.88it/s][A
 33%|██████████▌                     | 40384/122310 [1:18:38<1:45:04, 13.00it/s][A
 33%|██████████▌                     | 40392/122310 [1:18:39<1:51:17, 12.27it/s][A
 33%|██████████▌                     | 40395/122310 [1:18:40<2:19:25,  9.79it/s][A
 33%|██████████▌                     | 40404/122310 [1:18:40<2:11:00, 10.42it/s][A
 33%|██████████▌                     | 40408/122310 [1:18:41<2:33:22,  8.90it/s][A
 33%|██████████▌                     | 40430/122310 [1:18:42<1:30:49, 15.03it/s][A
 33%|██████████▌                     | 40437/122310 [1:18:43<1:42:25, 13.32it/s][A
 33%|██████████▌                     | 40449/122310 [1:18:43<1:36:46, 14.10

step: 34640, loss: 74.42976927784905, epoch: 2



 33%|██████████▌                     | 40539/122310 [1:18:50<1:43:26, 13.18it/s][A
 33%|██████████▌                     | 40550/122310 [1:18:51<1:40:05, 13.61it/s][A
 33%|██████████▌                     | 40562/122310 [1:18:52<1:34:50, 14.37it/s][A
 33%|██████████▌                     | 40567/122310 [1:18:52<1:53:26, 12.01it/s][A
 33%|██████████▌                     | 40571/122310 [1:18:53<2:15:35, 10.05it/s][A
 33%|██████████▌                     | 40584/122310 [1:18:54<1:51:17, 12.24it/s][A
 33%|██████████▌                     | 40591/122310 [1:18:55<1:59:50, 11.37it/s][A
 33%|██████████▌                     | 40601/122310 [1:18:55<1:53:46, 11.97it/s][A
 33%|██████████▋                     | 40611/122310 [1:18:56<1:50:04, 12.37it/s][A
 33%|██████████▋                     | 40618/122310 [1:18:57<1:58:29, 11.49it/s][A
 33%|██████████▋                     | 40626/122310 [1:18:58<2:01:12, 11.23it/s][A
 33%|██████████▋                     | 40631/122310 [1:18:58<2:17:55,  9.87

step: 34660, loss: 68.82950011243415, epoch: 2



 33%|██████████▋                     | 40718/122310 [1:19:05<1:50:56, 12.26it/s][A
 33%|██████████▋                     | 40729/122310 [1:19:06<1:44:37, 13.00it/s][A
 33%|██████████▋                     | 40740/122310 [1:19:07<1:40:27, 13.53it/s][A
 33%|██████████▋                     | 40753/122310 [1:19:07<1:33:54, 14.47it/s][A
 33%|██████████▋                     | 40759/122310 [1:19:08<1:49:27, 12.42it/s][A
 33%|██████████▋                     | 40772/122310 [1:19:09<1:39:54, 13.60it/s][A
 33%|██████████▋                     | 40787/122310 [1:19:10<1:28:57, 15.27it/s][A
 33%|██████████▋                     | 40795/122310 [1:19:11<1:38:10, 13.84it/s][A
 33%|██████████▋                     | 40799/122310 [1:19:11<2:00:50, 11.24it/s][A
 33%|██████████▋                     | 40804/122310 [1:19:12<2:17:46,  9.86it/s][A
 33%|██████████▋                     | 40814/122310 [1:19:13<2:05:36, 10.81it/s][A
 33%|██████████▋                     | 40822/122310 [1:19:14<2:06:24, 10.74

step: 34680, loss: 71.28805464242326, epoch: 2



 33%|██████████▋                     | 40912/122310 [1:19:21<1:44:22, 13.00it/s][A
 33%|██████████▋                     | 40918/122310 [1:19:21<1:58:08, 11.48it/s][A
 33%|██████████▋                     | 40927/122310 [1:19:22<1:56:42, 11.62it/s][A
 33%|██████████▋                     | 40933/122310 [1:19:23<2:08:50, 10.53it/s][A
 33%|██████████▋                     | 40948/122310 [1:19:24<1:41:43, 13.33it/s][A
 33%|██████████▋                     | 40953/122310 [1:19:24<2:00:47, 11.23it/s][A
 33%|██████████▋                     | 40956/122310 [1:19:25<2:30:39,  9.00it/s][A
 34%|██████████▋                     | 40975/122310 [1:19:26<1:38:21, 13.78it/s][A
 34%|██████████▋                     | 40981/122310 [1:19:27<1:52:55, 12.00it/s][A
 34%|██████████▋                     | 40986/122310 [1:19:27<2:10:29, 10.39it/s][A
 34%|██████████▋                     | 40996/122310 [1:19:28<2:01:00, 11.20it/s][A
 34%|██████████▋                     | 41003/122310 [1:19:29<2:08:14, 10.57

step: 34700, loss: 79.45238427354485, epoch: 2



 34%|██████████▊                     | 41095/122310 [1:19:37<2:29:15,  9.07it/s][A
 34%|██████████▊                     | 41102/122310 [1:19:38<2:29:13,  9.07it/s][A
 34%|██████████▊                     | 41114/122310 [1:19:39<2:12:29, 10.21it/s][A
 34%|██████████▊                     | 41121/122310 [1:19:40<2:18:35,  9.76it/s][A
 34%|██████████▊                     | 41139/122310 [1:19:41<1:46:19, 12.72it/s][A
 34%|██████████▊                     | 41144/122310 [1:19:42<2:03:36, 10.94it/s][A
 34%|██████████▊                     | 41155/122310 [1:19:42<1:54:35, 11.80it/s][A
 34%|██████████▊                     | 41165/122310 [1:19:43<1:51:02, 12.18it/s][A
 34%|██████████▊                     | 41179/122310 [1:19:44<1:39:29, 13.59it/s][A
 34%|██████████▊                     | 41186/122310 [1:19:45<1:52:09, 12.06it/s][A
 34%|██████████▊                     | 41199/122310 [1:19:46<1:41:18, 13.34it/s][A
 34%|██████████▊                     | 41203/122310 [1:19:46<2:03:20, 10.96

step: 34720, loss: 65.73277631026376, epoch: 2



 34%|██████████▊                     | 41254/122310 [1:19:52<2:53:08,  7.80it/s][A
 34%|██████████▊                     | 41265/122310 [1:19:53<2:17:53,  9.80it/s][A
 34%|██████████▊                     | 41275/122310 [1:19:53<2:07:58, 10.55it/s][A
 34%|██████████▊                     | 41283/122310 [1:19:54<2:10:28, 10.35it/s][A
 34%|██████████▊                     | 41296/122310 [1:19:55<1:49:19, 12.35it/s][A
 34%|██████████▊                     | 41302/122310 [1:19:56<2:03:18, 10.95it/s][A
 34%|██████████▊                     | 41309/122310 [1:19:57<2:08:38, 10.49it/s][A
 34%|██████████▊                     | 41315/122310 [1:19:57<2:18:11,  9.77it/s][A
 34%|██████████▊                     | 41325/122310 [1:19:58<2:08:54, 10.47it/s][A
 34%|██████████▊                     | 41334/122310 [1:19:59<2:05:27, 10.76it/s][A
 34%|██████████▊                     | 41341/122310 [1:20:00<2:10:23, 10.35it/s][A
 34%|██████████▊                     | 41350/122310 [1:20:00<2:05:16, 10.77

step: 34740, loss: 73.59485106733267, epoch: 2



 34%|██████████▊                     | 41431/122310 [1:20:07<1:42:14, 13.19it/s][A
 34%|██████████▊                     | 41445/122310 [1:20:08<1:31:22, 14.75it/s][A
 34%|██████████▊                     | 41451/122310 [1:20:09<1:45:49, 12.73it/s][A
 34%|██████████▊                     | 41459/122310 [1:20:10<1:51:36, 12.07it/s][A
 34%|██████████▊                     | 41467/122310 [1:20:10<1:55:39, 11.65it/s][A
 34%|██████████▊                     | 41482/122310 [1:20:11<1:35:57, 14.04it/s][A
 34%|██████████▊                     | 41485/122310 [1:20:12<2:02:32, 10.99it/s][A
 34%|██████████▊                     | 41492/122310 [1:20:13<2:09:09, 10.43it/s][A
 34%|██████████▊                     | 41503/122310 [1:20:14<2:29:32,  9.01it/s][A
 34%|██████████▊                     | 41508/122310 [1:20:15<2:41:58,  8.31it/s][A
 34%|██████████▊                     | 41516/122310 [1:20:16<2:32:49,  8.81it/s][A
 34%|██████████▊                     | 41525/122310 [1:20:16<2:21:18,  9.53

step: 34760, loss: 78.09147375905759, epoch: 2



 34%|██████████▉                     | 41579/122310 [1:20:23<2:32:49,  8.80it/s][A
 34%|██████████▉                     | 41597/122310 [1:20:23<1:43:03, 13.05it/s][A
 34%|██████████▉                     | 41604/122310 [1:20:24<1:56:22, 11.56it/s][A
 34%|██████████▉                     | 41613/122310 [1:20:25<2:00:12, 11.19it/s][A
 34%|██████████▉                     | 41624/122310 [1:20:26<1:52:26, 11.96it/s][A
 34%|██████████▉                     | 41634/122310 [1:20:27<1:50:38, 12.15it/s][A
 34%|██████████▉                     | 41643/122310 [1:20:28<1:52:43, 11.93it/s][A
 34%|██████████▉                     | 41645/122310 [1:20:28<2:27:09,  9.14it/s][A
 34%|██████████▉                     | 41648/122310 [1:20:29<2:57:12,  7.59it/s][A
 34%|██████████▉                     | 41656/122310 [1:20:30<2:39:43,  8.42it/s][A
 34%|██████████▉                     | 41663/122310 [1:20:31<2:37:33,  8.53it/s][A
 34%|██████████▉                     | 41670/122310 [1:20:31<2:33:48,  8.74

step: 34780, loss: 75.77182755029403, epoch: 2



 34%|██████████▉                     | 41742/122310 [1:20:38<2:24:30,  9.29it/s][A
 34%|██████████▉                     | 41749/122310 [1:20:39<2:25:36,  9.22it/s][A
 34%|██████████▉                     | 41757/122310 [1:20:40<2:20:13,  9.57it/s][A
 34%|██████████▉                     | 41765/122310 [1:20:41<2:16:26,  9.84it/s][A
 34%|██████████▉                     | 41773/122310 [1:20:41<2:12:57, 10.10it/s][A
 34%|██████████▉                     | 41780/122310 [1:20:42<2:17:17,  9.78it/s][A
 34%|██████████▉                     | 41791/122310 [1:20:43<1:58:50, 11.29it/s][A
 34%|██████████▉                     | 41798/122310 [1:20:44<2:04:29, 10.78it/s][A
 34%|██████████▉                     | 41813/122310 [1:20:44<1:38:58, 13.56it/s][A
 34%|██████████▉                     | 41815/122310 [1:20:45<2:08:58, 10.40it/s][A
 34%|██████████▉                     | 41818/122310 [1:20:46<2:37:19,  8.53it/s][A
 34%|██████████▉                     | 41827/122310 [1:20:47<2:20:29,  9.55

step: 34800, loss: 71.84364675651184, epoch: 2
sim1 and sim2 are 0.4950787116217203, 0.23808994228039904
cosine of pred and queen: 0.2407042587067657
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: wages
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: greece
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: mahar


 34%|██████████▉                     | 41886/122310 [1:21:03<2:04:49, 10.74it/s][A

Actual: india:delhi::serbia:belgrade, pred: confuse
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: swedish
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: netherlands
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: electricity
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred:


 34%|██████████▌                    | 41895/122310 [1:22:08<63:14:09,  2.83s/it][A

Actual: india:rupee::denmark:krone, pred: netherlands
Accuracy is 0.14201183431952663



 34%|██████████▌                    | 41896/122310 [1:22:09<60:55:12,  2.73s/it][A
 34%|██████████▌                    | 41907/122310 [1:22:10<34:58:44,  1.57s/it][A
 34%|██████████▌                    | 41919/122310 [1:22:10<21:21:44,  1.05it/s][A
 34%|██████████▋                    | 41926/122310 [1:22:11<16:33:49,  1.35it/s][A
 34%|██████████▉                     | 41942/122310 [1:22:12<9:32:45,  2.34it/s][A
 34%|██████████▉                     | 41951/122310 [1:22:13<7:29:09,  2.98it/s][A
 34%|██████████▉                     | 41962/122310 [1:22:13<5:34:56,  4.00it/s][A
 34%|██████████▉                     | 41965/122310 [1:22:15<6:09:46,  3.62it/s][A
 34%|██████████▉                     | 41971/122310 [1:22:15<5:19:55,  4.19it/s][A
 34%|██████████▉                     | 41980/122310 [1:22:16<4:08:24,  5.39it/s][A
 34%|██████████▉                     | 41991/122310 [1:22:17<3:08:52,  7.09it/s][A
 34%|██████████▉                     | 42001/122310 [1:22:18<2:39:00,  8.42

step: 34820, loss: 73.36053609411883, epoch: 2



 34%|███████████                     | 42085/122310 [1:22:23<1:55:10, 11.61it/s][A
 34%|███████████                     | 42090/122310 [1:22:24<2:12:00, 10.13it/s][A
 34%|███████████                     | 42098/122310 [1:22:25<2:07:56, 10.45it/s][A
 34%|███████████                     | 42103/122310 [1:22:26<2:22:44,  9.37it/s][A
 34%|███████████                     | 42109/122310 [1:22:26<2:27:57,  9.03it/s][A
 34%|███████████                     | 42125/122310 [1:22:27<1:43:48, 12.87it/s][A
 34%|███████████                     | 42135/122310 [1:22:28<1:41:47, 13.13it/s][A
 34%|███████████                     | 42145/122310 [1:22:29<1:40:23, 13.31it/s][A
 34%|███████████                     | 42157/122310 [1:22:29<1:33:58, 14.22it/s][A
 34%|███████████                     | 42164/122310 [1:22:30<1:43:59, 12.84it/s][A
 34%|███████████                     | 42176/122310 [1:22:31<1:36:08, 13.89it/s][A
 34%|███████████                     | 42184/122310 [1:22:31<1:42:32, 13.02

step: 34840, loss: 83.28287987715288, epoch: 2



 35%|███████████                     | 42259/122310 [1:22:39<2:30:38,  8.86it/s][A
 35%|███████████                     | 42262/122310 [1:22:40<2:52:10,  7.75it/s][A
 35%|███████████                     | 42276/122310 [1:22:40<2:05:04, 10.67it/s][A
 35%|███████████                     | 42286/122310 [1:22:41<1:56:55, 11.41it/s][A
 35%|███████████                     | 42297/122310 [1:22:42<1:47:36, 12.39it/s][A
 35%|███████████                     | 42310/122310 [1:22:42<1:35:47, 13.92it/s][A
 35%|███████████                     | 42326/122310 [1:22:43<1:22:31, 16.15it/s][A
 35%|███████████                     | 42336/122310 [1:22:44<1:26:29, 15.41it/s][A
 35%|███████████                     | 42344/122310 [1:22:45<1:35:00, 14.03it/s][A
 35%|███████████                     | 42347/122310 [1:22:45<1:59:57, 11.11it/s][A
 35%|███████████                     | 42352/122310 [1:22:46<2:15:27,  9.84it/s][A
 35%|███████████                     | 42357/122310 [1:22:47<2:28:43,  8.96

step: 34860, loss: 78.85474193079419, epoch: 2



 35%|███████████                     | 42432/122310 [1:22:53<1:40:38, 13.23it/s][A
 35%|███████████                     | 42439/122310 [1:22:53<1:49:38, 12.14it/s][A
 35%|███████████                     | 42450/122310 [1:22:54<1:42:29, 12.99it/s][A
 35%|███████████                     | 42460/122310 [1:22:55<1:40:43, 13.21it/s][A
 35%|███████████                     | 42472/122310 [1:22:56<1:34:23, 14.10it/s][A
 35%|███████████                     | 42475/122310 [1:22:56<1:59:41, 11.12it/s][A
 35%|███████████                     | 42486/122310 [1:22:57<1:48:37, 12.25it/s][A
 35%|███████████                     | 42497/122310 [1:22:58<1:41:38, 13.09it/s][A
 35%|███████████                     | 42511/122310 [1:22:59<1:29:25, 14.87it/s][A
 35%|███████████                     | 42520/122310 [1:22:59<1:34:28, 14.08it/s][A
 35%|███████████▏                    | 42524/122310 [1:23:00<1:55:17, 11.53it/s][A
 35%|███████████▏                    | 42535/122310 [1:23:01<1:45:40, 12.58

step: 34880, loss: 75.64849891434646, epoch: 2



 35%|███████████▏                    | 42603/122310 [1:23:07<2:00:28, 11.03it/s][A
 35%|███████████▏                    | 42620/122310 [1:23:08<1:31:12, 14.56it/s][A
 35%|███████████▏                    | 42630/122310 [1:23:09<1:33:19, 14.23it/s][A
 35%|███████████▏                    | 42638/122310 [1:23:10<1:40:25, 13.22it/s][A
 35%|███████████▏                    | 42648/122310 [1:23:10<1:41:42, 13.05it/s][A
 35%|███████████▏                    | 42657/122310 [1:23:11<1:44:25, 12.71it/s][A
 35%|███████████▏                    | 42666/122310 [1:23:12<1:45:57, 12.53it/s][A
 35%|███████████▏                    | 42672/122310 [1:23:13<1:59:14, 11.13it/s][A
 35%|███████████▏                    | 42679/122310 [1:23:13<2:07:24, 10.42it/s][A
 35%|███████████▏                    | 42681/122310 [1:23:14<2:43:26,  8.12it/s][A
 35%|███████████▏                    | 42684/122310 [1:23:15<3:10:26,  6.97it/s][A
 35%|███████████▏                    | 42694/122310 [1:23:16<2:30:05,  8.84

step: 34900, loss: 99.2805931203246, epoch: 2



 35%|███████████▏                    | 42763/122310 [1:23:22<2:03:51, 10.70it/s][A
 35%|███████████▏                    | 42773/122310 [1:23:23<1:55:47, 11.45it/s][A
 35%|███████████▏                    | 42783/122310 [1:23:24<1:49:36, 12.09it/s][A
 35%|███████████▏                    | 42789/122310 [1:23:25<2:03:19, 10.75it/s][A
 35%|███████████▏                    | 42798/122310 [1:23:25<2:00:01, 11.04it/s][A
 35%|███████████▏                    | 42803/122310 [1:23:26<2:16:02,  9.74it/s][A
 35%|███████████▏                    | 42809/122310 [1:23:27<2:22:34,  9.29it/s][A
 35%|███████████▏                    | 42818/122310 [1:23:28<2:11:03, 10.11it/s][A
 35%|███████████▏                    | 42824/122310 [1:23:28<2:19:31,  9.50it/s][A
 35%|███████████▏                    | 42833/122310 [1:23:29<2:08:09, 10.34it/s][A
 35%|███████████▏                    | 42842/122310 [1:23:30<2:02:27, 10.82it/s][A
 35%|███████████▏                    | 42845/122310 [1:23:31<2:30:35,  8.79

step: 34920, loss: 79.43121259443113, epoch: 2



 35%|███████████▏                    | 42909/122310 [1:23:37<1:53:46, 11.63it/s][A
 35%|███████████▏                    | 42919/122310 [1:23:38<1:49:01, 12.14it/s][A
 35%|███████████▏                    | 42929/122310 [1:23:39<1:45:54, 12.49it/s][A
 35%|███████████▏                    | 42935/122310 [1:23:40<1:57:26, 11.26it/s][A
 35%|███████████▏                    | 42944/122310 [1:23:40<1:55:23, 11.46it/s][A
 35%|███████████▏                    | 42955/122310 [1:23:41<1:46:51, 12.38it/s][A
 35%|███████████▏                    | 42963/122310 [1:23:42<1:51:43, 11.84it/s][A
 35%|███████████▏                    | 42966/122310 [1:23:43<2:17:46,  9.60it/s][A
 35%|███████████▏                    | 42975/122310 [1:23:43<2:07:31, 10.37it/s][A
 35%|███████████▏                    | 42990/122310 [1:23:44<1:39:32, 13.28it/s][A
 35%|███████████▏                    | 42998/122310 [1:23:45<1:46:19, 12.43it/s][A
 35%|███████████▎                    | 43008/122310 [1:23:46<1:43:20, 12.79

step: 34940, loss: 75.92088049645874, epoch: 2



 35%|███████████▎                    | 43068/122310 [1:23:52<2:32:10,  8.68it/s][A
 35%|███████████▎                    | 43081/122310 [1:23:53<1:58:44, 11.12it/s][A
 35%|███████████▎                    | 43092/122310 [1:23:54<1:49:29, 12.06it/s][A
 35%|███████████▎                    | 43098/122310 [1:23:55<2:01:57, 10.83it/s][A
 35%|███████████▎                    | 43106/122310 [1:23:56<2:02:56, 10.74it/s][A
 35%|███████████▎                    | 43113/122310 [1:23:56<2:07:50, 10.32it/s][A
 35%|███████████▎                    | 43121/122310 [1:23:57<2:06:45, 10.41it/s][A
 35%|███████████▎                    | 43133/122310 [1:23:58<1:51:38, 11.82it/s][A
 35%|███████████▎                    | 43143/122310 [1:23:59<1:47:51, 12.23it/s][A
 35%|███████████▎                    | 43145/122310 [1:23:59<2:20:59,  9.36it/s][A
 35%|███████████▎                    | 43154/122310 [1:24:00<2:10:01, 10.15it/s][A
 35%|███████████▎                    | 43173/122310 [1:24:01<1:29:51, 14.68

step: 34960, loss: 152.8476708345484, epoch: 2



 35%|███████████▎                    | 43239/122310 [1:24:08<1:57:36, 11.21it/s][A
 35%|███████████▎                    | 43245/122310 [1:24:08<2:08:36, 10.25it/s][A
 35%|███████████▎                    | 43253/122310 [1:24:09<2:07:32, 10.33it/s][A
 35%|███████████▎                    | 43264/122310 [1:24:10<1:54:58, 11.46it/s][A
 35%|███████████▎                    | 43267/122310 [1:24:11<2:21:12,  9.33it/s][A
 35%|███████████▎                    | 43276/122310 [1:24:11<2:10:07, 10.12it/s][A
 35%|███████████▎                    | 43284/122310 [1:24:12<2:08:17, 10.27it/s][A
 35%|███████████▎                    | 43288/122310 [1:24:13<2:30:15,  8.77it/s][A
 35%|███████████▎                    | 43298/122310 [1:24:14<2:13:41,  9.85it/s][A
 35%|███████████▎                    | 43308/122310 [1:24:15<2:02:57, 10.71it/s][A
 35%|███████████▎                    | 43310/122310 [1:24:15<2:37:18,  8.37it/s][A
 35%|███████████▎                    | 43322/122310 [1:24:16<2:05:08, 10.52

step: 34980, loss: 76.4996721957576, epoch: 2



 35%|███████████▎                    | 43403/122310 [1:24:23<1:41:50, 12.91it/s][A
 35%|███████████▎                    | 43409/122310 [1:24:24<1:54:53, 11.44it/s][A
 35%|███████████▎                    | 43416/122310 [1:24:24<2:01:42, 10.80it/s][A
 36%|███████████▎                    | 43425/122310 [1:24:25<1:58:48, 11.07it/s][A
 36%|███████████▎                    | 43436/122310 [1:24:26<1:48:30, 12.11it/s][A
 36%|███████████▎                    | 43447/122310 [1:24:27<1:45:14, 12.49it/s][A
 36%|███████████▎                    | 43467/122310 [1:24:28<1:19:25, 16.55it/s][A
 36%|███████████▎                    | 43473/122310 [1:24:28<1:33:56, 13.99it/s][A
 36%|███████████▍                    | 43480/122310 [1:24:29<1:44:34, 12.56it/s][A
 36%|███████████▍                    | 43491/122310 [1:24:30<1:39:28, 13.21it/s][A
 36%|███████████▍                    | 43500/122310 [1:24:31<1:42:08, 12.86it/s][A
 36%|███████████▍                    | 43504/122310 [1:24:31<2:04:04, 10.59

step: 35000, loss: 71.38287375931934, epoch: 2
saving weights



 36%|███████████▍                    | 43563/122310 [1:24:38<2:31:32,  8.66it/s][A
 36%|███████████▍                    | 43581/122310 [1:24:39<1:47:23, 12.22it/s][A
 36%|███████████▍                    | 43585/122310 [1:24:40<2:05:48, 10.43it/s][A
 36%|███████████▍                    | 43592/122310 [1:24:41<2:11:24,  9.98it/s][A
 36%|███████████▍                    | 43601/122310 [1:24:41<2:05:03, 10.49it/s][A
 36%|███████████▍                    | 43604/122310 [1:24:42<2:31:33,  8.65it/s][A
 36%|███████████▍                    | 43610/122310 [1:24:43<2:34:03,  8.51it/s][A
 36%|███████████▍                    | 43617/122310 [1:24:44<2:29:10,  8.79it/s][A
 36%|███████████▍                    | 43625/122310 [1:24:44<2:19:56,  9.37it/s][A
 36%|███████████▍                    | 43636/122310 [1:24:45<1:59:30, 10.97it/s][A
 36%|███████████▍                    | 43646/122310 [1:24:46<1:53:55, 11.51it/s][A
 36%|███████████▍                    | 43651/122310 [1:24:47<2:10:40, 10.03

step: 35020, loss: 74.44743581619015, epoch: 2



 36%|███████████▍                    | 43741/122310 [1:24:54<1:52:41, 11.62it/s][A
 36%|███████████▍                    | 43756/122310 [1:24:55<1:32:17, 14.19it/s][A
 36%|███████████▍                    | 43761/122310 [1:24:56<1:49:34, 11.95it/s][A
 36%|███████████▍                    | 43766/122310 [1:24:56<2:06:21, 10.36it/s][A
 36%|███████████▍                    | 43770/122310 [1:24:57<2:29:48,  8.74it/s][A
 36%|███████████▍                    | 43771/122310 [1:24:58<3:20:36,  6.53it/s][A
 36%|███████████▍                    | 43777/122310 [1:24:59<3:07:55,  6.96it/s][A
 36%|███████████▍                    | 43782/122310 [1:24:59<3:10:49,  6.86it/s][A
 36%|███████████▍                    | 43792/122310 [1:25:00<2:28:43,  8.80it/s][A
 36%|███████████▍                    | 43800/122310 [1:25:01<2:19:52,  9.35it/s][A
 36%|███████████▍                    | 43814/122310 [1:25:02<1:48:21, 12.07it/s][A
 36%|███████████▍                    | 43818/122310 [1:25:02<2:09:29, 10.10

step: 35040, loss: 74.47048143495198, epoch: 2



 36%|███████████▍                    | 43891/122310 [1:25:09<1:59:16, 10.96it/s][A
 36%|███████████▍                    | 43895/122310 [1:25:10<2:19:16,  9.38it/s][A
 36%|███████████▍                    | 43903/122310 [1:25:11<2:13:58,  9.75it/s][A
 36%|███████████▍                    | 43909/122310 [1:25:11<2:20:36,  9.29it/s][A
 36%|███████████▍                    | 43920/122310 [1:25:12<2:00:30, 10.84it/s][A
 36%|███████████▍                    | 43929/122310 [1:25:13<1:56:57, 11.17it/s][A
 36%|███████████▍                    | 43936/122310 [1:25:14<2:03:20, 10.59it/s][A
 36%|███████████▍                    | 43942/122310 [1:25:14<2:16:38,  9.56it/s][A
 36%|███████████▍                    | 43944/122310 [1:25:15<2:54:27,  7.49it/s][A
 36%|███████████▍                    | 43949/122310 [1:25:16<2:59:41,  7.27it/s][A
 36%|███████████▍                    | 43955/122310 [1:25:17<2:56:49,  7.39it/s][A
 36%|███████████▌                    | 43957/122310 [1:25:17<3:36:03,  6.04

step: 35060, loss: 83.74147824664007, epoch: 2



 36%|███████████▌                    | 43994/122310 [1:25:24<6:27:21,  3.37it/s][A
 36%|███████████▌                    | 43995/122310 [1:25:25<7:23:09,  2.95it/s][A
 36%|███████████▌                    | 44011/122310 [1:25:26<3:05:06,  7.05it/s][A
 36%|███████████▌                    | 44025/122310 [1:25:27<2:13:08,  9.80it/s][A
 36%|███████████▌                    | 44033/122310 [1:25:28<2:11:00,  9.96it/s][A
 36%|███████████▌                    | 44040/122310 [1:25:28<2:14:38,  9.69it/s][A
 36%|███████████▌                    | 44061/122310 [1:25:29<1:29:36, 14.55it/s][A
 36%|███████████▌                    | 44065/122310 [1:25:31<2:52:44,  7.55it/s][A
 36%|███████████▌                    | 44069/122310 [1:25:32<3:04:22,  7.07it/s][A
 36%|███████████▌                    | 44079/122310 [1:25:33<2:35:21,  8.39it/s][A
 36%|███████████▌                    | 44093/122310 [1:25:34<2:00:28, 10.82it/s][A
 36%|███████████▌                    | 44104/122310 [1:25:34<1:51:42, 11.67

step: 35080, loss: 74.33292862520219, epoch: 2



 36%|███████████▌                    | 44165/122310 [1:25:40<3:07:32,  6.94it/s][A
 36%|███████████▌                    | 44172/122310 [1:25:41<2:53:04,  7.52it/s][A
 36%|███████████▌                    | 44182/122310 [1:25:42<2:23:35,  9.07it/s][A
 36%|███████████▌                    | 44189/122310 [1:25:43<2:22:29,  9.14it/s][A
 36%|███████████▌                    | 44205/122310 [1:25:43<1:43:37, 12.56it/s][A
 36%|███████████▌                    | 44212/122310 [1:25:44<1:51:49, 11.64it/s][A
 36%|███████████▌                    | 44226/122310 [1:25:45<1:34:56, 13.71it/s][A
 36%|███████████▌                    | 44230/122310 [1:25:46<1:55:37, 11.26it/s][A
 36%|███████████▌                    | 44236/122310 [1:25:46<2:06:49, 10.26it/s][A
 36%|███████████▌                    | 44239/122310 [1:25:47<2:33:28,  8.48it/s][A
 36%|███████████▌                    | 44245/122310 [1:25:48<2:38:30,  8.21it/s][A
 36%|███████████▌                    | 44259/122310 [1:25:49<1:55:42, 11.24

step: 35100, loss: 72.22783028051806, epoch: 2



 36%|███████████▌                    | 44348/122310 [1:25:55<1:37:02, 13.39it/s][A
 36%|███████████▌                    | 44352/122310 [1:25:56<2:00:57, 10.74it/s][A
 36%|███████████▌                    | 44359/122310 [1:25:56<2:05:51, 10.32it/s][A
 36%|███████████▌                    | 44367/122310 [1:25:57<2:04:44, 10.41it/s][A
 36%|███████████▌                    | 44377/122310 [1:25:58<1:54:43, 11.32it/s][A
 36%|███████████▌                    | 44388/122310 [1:25:59<1:45:14, 12.34it/s][A
 36%|███████████▌                    | 44398/122310 [1:25:59<1:42:25, 12.68it/s][A
 36%|███████████▌                    | 44409/122310 [1:26:00<1:37:46, 13.28it/s][A
 36%|███████████▌                    | 44419/122310 [1:26:01<1:37:29, 13.32it/s][A
 36%|███████████▌                    | 44425/122310 [1:26:02<1:51:13, 11.67it/s][A
 36%|███████████▋                    | 44433/122310 [1:26:02<1:53:45, 11.41it/s][A
 36%|███████████▋                    | 44441/122310 [1:26:03<1:55:53, 11.20

step: 35120, loss: 86.5858516473676, epoch: 2



 36%|███████████▋                    | 44501/122310 [1:26:10<2:47:12,  7.76it/s][A
 36%|███████████▋                    | 44507/122310 [1:26:11<2:47:00,  7.76it/s][A
 36%|███████████▋                    | 44515/122310 [1:26:11<2:30:36,  8.61it/s][A
 36%|███████████▋                    | 44525/122310 [1:26:12<2:09:05, 10.04it/s][A
 36%|███████████▋                    | 44532/122310 [1:26:13<2:13:08,  9.74it/s][A
 36%|███████████▋                    | 44546/122310 [1:26:14<1:45:02, 12.34it/s][A
 36%|███████████▋                    | 44553/122310 [1:26:14<1:56:10, 11.16it/s][A
 36%|███████████▋                    | 44570/122310 [1:26:15<1:29:18, 14.51it/s][A
 36%|███████████▋                    | 44574/122310 [1:26:16<1:50:28, 11.73it/s][A
 36%|███████████▋                    | 44580/122310 [1:26:17<2:03:50, 10.46it/s][A
 36%|███████████▋                    | 44590/122310 [1:26:17<1:54:06, 11.35it/s][A
 36%|███████████▋                    | 44597/122310 [1:26:18<2:00:31, 10.75

step: 35140, loss: 77.11085176638372, epoch: 2



 37%|███████████▋                    | 44671/122310 [1:26:25<1:52:29, 11.50it/s][A
 37%|███████████▋                    | 44677/122310 [1:26:26<2:04:14, 10.41it/s][A
 37%|███████████▋                    | 44687/122310 [1:26:27<1:57:07, 11.04it/s][A
 37%|███████████▋                    | 44696/122310 [1:26:27<1:54:58, 11.25it/s][A
 37%|███████████▋                    | 44708/122310 [1:26:28<1:42:29, 12.62it/s][A
 37%|███████████▋                    | 44714/122310 [1:26:29<1:55:26, 11.20it/s][A
 37%|███████████▋                    | 44716/122310 [1:26:30<2:27:44,  8.75it/s][A
 37%|███████████▋                    | 44726/122310 [1:26:30<2:07:27, 10.15it/s][A
 37%|███████████▋                    | 44736/122310 [1:26:31<1:56:30, 11.10it/s][A
 37%|███████████▋                    | 44743/122310 [1:26:32<2:02:22, 10.56it/s][A
 37%|███████████▋                    | 44746/122310 [1:26:33<2:30:06,  8.61it/s][A
 37%|███████████▋                    | 44754/122310 [1:26:33<2:19:56,  9.24

step: 35160, loss: 75.53658113045829, epoch: 2



 37%|███████████▋                    | 44852/122310 [1:26:40<1:23:19, 15.49it/s][A
 37%|███████████▋                    | 44857/122310 [1:26:41<1:40:50, 12.80it/s][A
 37%|███████████▋                    | 44860/122310 [1:26:42<2:07:35, 10.12it/s][A
 37%|███████████▋                    | 44863/122310 [1:26:42<2:35:25,  8.31it/s][A
 37%|███████████▋                    | 44869/122310 [1:26:43<2:36:29,  8.25it/s][A
 37%|███████████▋                    | 44881/122310 [1:26:44<2:02:08, 10.56it/s][A
 37%|███████████▋                    | 44891/122310 [1:26:45<1:52:49, 11.44it/s][A
 37%|███████████▋                    | 44893/122310 [1:26:45<2:27:04,  8.77it/s][A
 37%|███████████▋                    | 44902/122310 [1:26:46<2:11:29,  9.81it/s][A
 37%|███████████▊                    | 44913/122310 [1:26:47<1:54:38, 11.25it/s][A
 37%|███████████▊                    | 44916/122310 [1:26:48<2:21:54,  9.09it/s][A
 37%|███████████▊                    | 44928/122310 [1:26:48<1:55:22, 11.18

step: 35180, loss: 81.77581573921718, epoch: 2



 37%|███████████▊                    | 45025/122310 [1:26:55<1:40:21, 12.83it/s][A
 37%|███████████▊                    | 45030/122310 [1:26:56<1:57:03, 11.00it/s][A
 37%|███████████▊                    | 45047/122310 [1:26:57<1:28:45, 14.51it/s][A
 37%|███████████▊                    | 45060/122310 [1:26:57<1:23:46, 15.37it/s][A
 37%|███████████▊                    | 45070/122310 [1:26:58<1:27:13, 14.76it/s][A
 37%|███████████▊                    | 45082/122310 [1:26:59<1:25:03, 15.13it/s][A
 37%|███████████▊                    | 45091/122310 [1:27:00<1:30:58, 14.15it/s][A
 37%|███████████▊                    | 45100/122310 [1:27:00<1:34:48, 13.57it/s][A
 37%|███████████▊                    | 45110/122310 [1:27:01<1:35:08, 13.52it/s][A
 37%|███████████▊                    | 45113/122310 [1:27:02<2:00:53, 10.64it/s][A
 37%|███████████▊                    | 45115/122310 [1:27:03<2:36:58,  8.20it/s][A
 37%|███████████▊                    | 45118/122310 [1:27:03<3:04:04,  6.99

step: 35200, loss: 76.00327861425448, epoch: 2
sim1 and sim2 are 0.4760055173752718, 0.23708113322189606
cosine of pred and queen: 0.1795396353545413
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma




Actual: armenia:dram::iran:rial, pred: footstep


 37%|███████████▊                    | 45171/122310 [1:27:23<2:02:19, 10.51it/s][A

Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: electricity
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: statement
Actual: denmark:danish::germany:german, pred: europe
Accuracy is 0.4
Actual: walk:walks::vanish:vanishes, pred: beholders
Actual: work:works::generate:generates, pred: pleaseth
Actual: think:thinks::talk:talks, pred: oxnard
Actual: vanish:vanishe


 37%|███████████▍                   | 45181/122310 [1:28:26<59:58:14,  2.80s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.14201183431952663



 37%|███████████▍                   | 45186/122310 [1:28:28<49:12:07,  2.30s/it][A
 37%|███████████▍                   | 45192/122310 [1:28:28<37:16:34,  1.74s/it][A
 37%|███████████▍                   | 45200/122310 [1:28:29<25:40:19,  1.20s/it][A
 37%|███████████▍                   | 45206/122310 [1:28:30<19:43:49,  1.09it/s][A
 37%|███████████▍                   | 45212/122310 [1:28:31<15:06:17,  1.42it/s][A
 37%|███████████▍                   | 45218/122310 [1:28:31<11:38:35,  1.84it/s][A
 37%|███████████▊                    | 45224/122310 [1:28:32<9:04:13,  2.36it/s][A
 37%|███████████▊                    | 45233/122310 [1:28:33<6:19:24,  3.39it/s][A
 37%|███████████▊                    | 45236/122310 [1:28:34<6:08:59,  3.48it/s][A
 37%|███████████▊                    | 45254/122310 [1:28:34<3:10:05,  6.76it/s][A
 37%|███████████▊                    | 45265/122310 [1:28:35<2:35:29,  8.26it/s][A
 37%|███████████▊                    | 45270/122310 [1:28:36<2:43:56,  7.83

step: 35220, loss: 71.1644252705629, epoch: 2



 37%|███████████▊                    | 45343/122310 [1:28:42<2:06:28, 10.14it/s][A
 37%|███████████▊                    | 45352/122310 [1:28:43<1:59:42, 10.71it/s][A
 37%|███████████▊                    | 45362/122310 [1:28:43<1:51:08, 11.54it/s][A
 37%|███████████▊                    | 45373/122310 [1:28:44<1:42:25, 12.52it/s][A
 37%|███████████▊                    | 45379/122310 [1:28:45<1:54:16, 11.22it/s][A
 37%|███████████▊                    | 45388/122310 [1:28:46<1:51:24, 11.51it/s][A
 37%|███████████▉                    | 45394/122310 [1:28:46<2:02:04, 10.50it/s][A
 37%|███████████▉                    | 45402/122310 [1:28:47<2:00:46, 10.61it/s][A
 37%|███████████▉                    | 45409/122310 [1:28:48<2:06:42, 10.12it/s][A
 37%|███████████▉                    | 45414/122310 [1:28:49<2:20:53,  9.10it/s][A
 37%|███████████▉                    | 45427/122310 [1:28:50<2:26:08,  8.77it/s][A
 37%|███████████▉                    | 45433/122310 [1:28:51<2:29:13,  8.59

step: 35240, loss: 98.66021989952387, epoch: 2



 37%|███████████▉                    | 45495/122310 [1:28:57<2:23:31,  8.92it/s][A
 37%|███████████▉                    | 45500/122310 [1:28:58<2:34:47,  8.27it/s][A
 37%|███████████▉                    | 45506/122310 [1:28:58<2:35:05,  8.25it/s][A
 37%|███████████▉                    | 45516/122310 [1:28:59<2:10:36,  9.80it/s][A
 37%|███████████▉                    | 45525/122310 [1:29:00<2:03:46, 10.34it/s][A
 37%|███████████▉                    | 45530/122310 [1:29:01<2:17:55,  9.28it/s][A
 37%|███████████▉                    | 45538/122310 [1:29:01<2:11:28,  9.73it/s][A
 37%|███████████▉                    | 45546/122310 [1:29:02<2:07:24, 10.04it/s][A
 37%|███████████▉                    | 45560/122310 [1:29:03<1:40:43, 12.70it/s][A
 37%|███████████▉                    | 45568/122310 [1:29:04<1:45:28, 12.13it/s][A
 37%|███████████▉                    | 45577/122310 [1:29:06<2:51:46,  7.44it/s][A
 37%|███████████▉                    | 45585/122310 [1:29:07<2:36:59,  8.15

step: 35260, loss: 72.68628793746367, epoch: 2



 37%|███████████▉                    | 45634/122310 [1:29:12<2:00:07, 10.64it/s][A
 37%|███████████▉                    | 45642/122310 [1:29:13<1:59:47, 10.67it/s][A
 37%|███████████▉                    | 45656/122310 [1:29:14<1:38:15, 13.00it/s][A
 37%|███████████▉                    | 45659/122310 [1:29:14<2:02:47, 10.40it/s][A
 37%|███████████▉                    | 45664/122310 [1:29:15<2:16:37,  9.35it/s][A
 37%|███████████▉                    | 45666/122310 [1:29:16<2:53:41,  7.35it/s][A
 37%|███████████▉                    | 45671/122310 [1:29:16<2:57:41,  7.19it/s][A
 37%|███████████▉                    | 45675/122310 [1:29:17<3:11:38,  6.66it/s][A
 37%|███████████▉                    | 45686/122310 [1:29:18<2:20:22,  9.10it/s][A
 37%|███████████▉                    | 45696/122310 [1:29:19<2:02:32, 10.42it/s][A
 37%|███████████▉                    | 45703/122310 [1:29:19<2:06:39, 10.08it/s][A
 37%|███████████▉                    | 45714/122310 [1:29:20<1:51:09, 11.48

step: 35280, loss: 102.63738256232129, epoch: 2



 37%|███████████▉                    | 45771/122310 [1:29:27<2:32:31,  8.36it/s][A
 37%|███████████▉                    | 45777/122310 [1:29:28<2:34:33,  8.25it/s][A
 37%|███████████▉                    | 45780/122310 [1:29:28<3:03:24,  6.95it/s][A
 37%|███████████▉                    | 45783/122310 [1:29:29<3:28:26,  6.12it/s][A
 37%|███████████▉                    | 45792/122310 [1:29:30<2:40:51,  7.93it/s][A
 37%|███████████▉                    | 45801/122310 [1:29:31<2:18:58,  9.18it/s][A
 37%|███████████▉                    | 45812/122310 [1:29:31<1:57:24, 10.86it/s][A
 37%|███████████▉                    | 45822/122310 [1:29:32<1:50:06, 11.58it/s][A
 37%|███████████▉                    | 45831/122310 [1:29:33<1:48:42, 11.73it/s][A
 37%|███████████▉                    | 45839/122310 [1:29:34<1:53:17, 11.25it/s][A
 37%|███████████▉                    | 45846/122310 [1:29:34<1:59:20, 10.68it/s][A
 37%|███████████▉                    | 45862/122310 [1:29:35<1:32:31, 13.77

step: 35300, loss: 97.43046445522526, epoch: 2



 38%|████████████                    | 45939/122310 [1:29:42<2:06:16, 10.08it/s][A
 38%|████████████                    | 45953/122310 [1:29:43<1:40:16, 12.69it/s][A
 38%|████████████                    | 45960/122310 [1:29:44<1:48:59, 11.67it/s][A
 38%|████████████                    | 45965/122310 [1:29:44<2:04:21, 10.23it/s][A
 38%|████████████                    | 45969/122310 [1:29:45<2:25:20,  8.75it/s][A
 38%|████████████                    | 45977/122310 [1:29:46<2:16:41,  9.31it/s][A
 38%|████████████                    | 45984/122310 [1:29:47<2:15:57,  9.36it/s][A
 38%|████████████                    | 45996/122310 [1:29:47<1:51:36, 11.40it/s][A
 38%|████████████                    | 46003/122310 [1:29:48<1:57:38, 10.81it/s][A
 38%|████████████                    | 46009/122310 [1:29:49<2:07:33,  9.97it/s][A
 38%|████████████                    | 46012/122310 [1:29:49<2:34:35,  8.23it/s][A
 38%|████████████                    | 46026/122310 [1:29:50<1:51:43, 11.38

step: 35320, loss: 75.07187537078768, epoch: 2



 38%|████████████                    | 46101/122310 [1:29:57<1:59:48, 10.60it/s][A
 38%|████████████                    | 46111/122310 [1:29:58<1:51:00, 11.44it/s][A
 38%|████████████                    | 46118/122310 [1:29:58<1:58:05, 10.75it/s][A
 38%|████████████                    | 46135/122310 [1:29:59<1:28:38, 14.32it/s][A
 38%|████████████                    | 46147/122310 [1:30:00<1:25:59, 14.76it/s][A
 38%|████████████                    | 46153/122310 [1:30:01<1:39:23, 12.77it/s][A
 38%|████████████                    | 46170/122310 [1:30:01<1:21:17, 15.61it/s][A
 38%|████████████                    | 46180/122310 [1:30:02<1:24:32, 15.01it/s][A
 38%|████████████                    | 46195/122310 [1:30:03<1:17:26, 16.38it/s][A
 38%|████████████                    | 46201/122310 [1:30:04<1:32:22, 13.73it/s][A
 38%|████████████                    | 46203/122310 [1:30:05<2:01:51, 10.41it/s][A
 38%|████████████                    | 46208/122310 [1:30:05<2:16:55,  9.26

step: 35340, loss: 99.16032247747208, epoch: 2



 38%|████████████                    | 46268/122310 [1:30:12<2:16:03,  9.31it/s][A
 38%|████████████                    | 46273/122310 [1:30:13<2:28:10,  8.55it/s][A
 38%|████████████                    | 46286/122310 [1:30:14<1:53:45, 11.14it/s][A
 38%|████████████                    | 46291/122310 [1:30:14<2:10:49,  9.69it/s][A
 38%|████████████                    | 46304/122310 [1:30:15<1:47:20, 11.80it/s][A
 38%|████████████                    | 46316/122310 [1:30:16<1:39:17, 12.76it/s][A
 38%|████████████                    | 46334/122310 [1:30:17<1:18:56, 16.04it/s][A
 38%|████████████▏                   | 46346/122310 [1:30:17<1:18:52, 16.05it/s][A
 38%|████████████▏                   | 46358/122310 [1:30:18<1:18:51, 16.05it/s][A
 38%|████████████▏                   | 46368/122310 [1:30:19<1:23:14, 15.20it/s][A
 38%|████████████▏                   | 46375/122310 [1:30:20<1:33:58, 13.47it/s][A
 38%|████████████▏                   | 46390/122310 [1:30:21<1:23:19, 15.19

step: 35360, loss: 76.35965508235793, epoch: 2



 38%|████████████▏                   | 46458/122310 [1:30:28<2:46:12,  7.61it/s][A
 38%|████████████▏                   | 46462/122310 [1:30:29<3:00:11,  7.02it/s][A
 38%|████████████▏                   | 46469/122310 [1:30:30<2:45:29,  7.64it/s][A
 38%|████████████▏                   | 46479/122310 [1:30:30<2:17:30,  9.19it/s][A
 38%|████████████▏                   | 46484/122310 [1:30:31<2:28:49,  8.49it/s][A
 38%|████████████▏                   | 46496/122310 [1:30:32<1:58:36, 10.65it/s][A
 38%|████████████▏                   | 46511/122310 [1:30:33<1:34:09, 13.42it/s][A
 38%|████████████▏                   | 46517/122310 [1:30:33<1:47:45, 11.72it/s][A
 38%|████████████▏                   | 46530/122310 [1:30:34<1:34:14, 13.40it/s][A
 38%|████████████▏                   | 46540/122310 [1:30:35<1:34:36, 13.35it/s][A
 38%|████████████▏                   | 46554/122310 [1:30:36<1:25:17, 14.80it/s][A
 38%|████████████▏                   | 46559/122310 [1:30:36<1:42:57, 12.26

step: 35380, loss: 77.56797573644711, epoch: 2



 38%|████████████▏                   | 46651/122310 [1:30:43<1:23:05, 15.18it/s][A
 38%|████████████▏                   | 46668/122310 [1:30:43<1:12:43, 17.34it/s][A
 38%|████████████▏                   | 46676/122310 [1:30:44<1:22:22, 15.30it/s][A
 38%|████████████▏                   | 46682/122310 [1:30:45<1:36:01, 13.13it/s][A
 38%|████████████▏                   | 46684/122310 [1:30:46<2:05:22, 10.05it/s][A
 38%|████████████▏                   | 46695/122310 [1:30:46<1:51:02, 11.35it/s][A
 38%|████████████▏                   | 46703/122310 [1:30:47<1:53:36, 11.09it/s][A
 38%|████████████▏                   | 46711/122310 [1:30:48<1:54:58, 10.96it/s][A
 38%|████████████▏                   | 46721/122310 [1:30:49<1:50:30, 11.40it/s][A
 38%|████████████▏                   | 46732/122310 [1:30:49<1:42:49, 12.25it/s][A
 38%|████████████▏                   | 46739/122310 [1:30:50<1:51:56, 11.25it/s][A
 38%|████████████▏                   | 46746/122310 [1:30:51<1:58:07, 10.66

step: 35400, loss: 71.1024444929824, epoch: 2



 38%|████████████▎                   | 46828/122310 [1:30:58<2:00:15, 10.46it/s][A
 38%|████████████▎                   | 46854/122310 [1:30:59<1:13:10, 17.19it/s][A
 38%|████████████▎                   | 46864/122310 [1:30:59<1:18:21, 16.05it/s][A
 38%|████████████▎                   | 46869/122310 [1:31:00<1:34:30, 13.30it/s][A
 38%|████████████▎                   | 46877/122310 [1:31:02<2:10:27,  9.64it/s][A
 38%|████████████▎                   | 46889/122310 [1:31:02<1:52:14, 11.20it/s][A
 38%|████████████▎                   | 46894/122310 [1:31:03<2:05:07, 10.05it/s][A
 38%|████████████▎                   | 46910/122310 [1:31:04<1:36:21, 13.04it/s][A
 38%|████████████▎                   | 46912/122310 [1:31:05<2:03:38, 10.16it/s][A
 38%|████████████▎                   | 46923/122310 [1:31:05<1:49:46, 11.45it/s][A
 38%|████████████▎                   | 46932/122310 [1:31:06<1:48:12, 11.61it/s][A
 38%|████████████▎                   | 46936/122310 [1:31:07<2:08:27,  9.78

step: 35420, loss: 80.84910374539726, epoch: 2



 38%|████████████▎                   | 47008/122310 [1:31:13<1:53:05, 11.10it/s][A
 38%|████████████▎                   | 47016/122310 [1:31:14<1:54:46, 10.93it/s][A
 38%|████████████▎                   | 47020/122310 [1:31:15<2:15:10,  9.28it/s][A
 38%|████████████▎                   | 47030/122310 [1:31:15<2:00:18, 10.43it/s][A
 38%|████████████▎                   | 47042/122310 [1:31:16<1:43:39, 12.10it/s][A
 38%|████████████▎                   | 47050/122310 [1:31:17<1:47:28, 11.67it/s][A
 38%|████████████▎                   | 47059/122310 [1:31:18<1:46:56, 11.73it/s][A
 38%|████████████▎                   | 47066/122310 [1:31:18<1:53:59, 11.00it/s][A
 38%|████████████▎                   | 47078/122310 [1:31:19<1:40:06, 12.53it/s][A
 38%|████████████▎                   | 47089/122310 [1:31:20<1:35:39, 13.10it/s][A
 39%|████████████▎                   | 47091/122310 [1:31:21<2:05:16, 10.01it/s][A
 39%|████████████▎                   | 47096/122310 [1:31:21<2:18:52,  9.03

step: 35440, loss: 71.30356693716702, epoch: 2



 39%|████████████▎                   | 47186/122310 [1:31:28<1:41:02, 12.39it/s][A
 39%|████████████▎                   | 47191/122310 [1:31:30<3:07:27,  6.68it/s][A
 39%|████████████▎                   | 47197/122310 [1:31:31<2:59:59,  6.96it/s][A
 39%|████████████▎                   | 47203/122310 [1:31:32<2:55:28,  7.13it/s][A
 39%|████████████▎                   | 47212/122310 [1:31:33<2:31:15,  8.28it/s][A
 39%|████████████▎                   | 47216/122310 [1:31:33<2:45:42,  7.55it/s][A
 39%|████████████▎                   | 47225/122310 [1:31:34<2:23:56,  8.69it/s][A
 39%|████████████▎                   | 47234/122310 [1:31:35<2:09:33,  9.66it/s][A
 39%|████████████▎                   | 47239/122310 [1:31:36<2:25:27,  8.60it/s][A
 39%|████████████▎                   | 47246/122310 [1:31:36<2:22:21,  8.79it/s][A
 39%|████████████▎                   | 47254/122310 [1:31:37<2:15:07,  9.26it/s][A
 39%|████████████▎                   | 47268/122310 [1:31:38<1:44:45, 11.94

step: 35460, loss: 77.90145889908784, epoch: 2



 39%|████████████▍                   | 47346/122310 [1:31:43<1:27:21, 14.30it/s][A
 39%|████████████▍                   | 47351/122310 [1:31:44<1:43:33, 12.06it/s][A
 39%|████████████▍                   | 47359/122310 [1:31:45<1:47:03, 11.67it/s][A
 39%|████████████▍                   | 47366/122310 [1:31:46<1:53:36, 10.99it/s][A
 39%|████████████▍                   | 47374/122310 [1:31:46<1:54:15, 10.93it/s][A
 39%|████████████▍                   | 47383/122310 [1:31:47<1:50:51, 11.26it/s][A
 39%|████████████▍                   | 47391/122310 [1:31:48<1:51:56, 11.15it/s][A
 39%|████████████▍                   | 47397/122310 [1:31:49<2:02:15, 10.21it/s][A
 39%|████████████▍                   | 47408/122310 [1:31:49<1:48:08, 11.54it/s][A
 39%|████████████▍                   | 47416/122310 [1:31:50<1:51:50, 11.16it/s][A
 39%|████████████▍                   | 47426/122310 [1:31:51<1:45:37, 11.82it/s][A
 39%|████████████▍                   | 47443/122310 [1:31:52<1:23:39, 14.92

step: 35480, loss: 104.86457592069462, epoch: 2



 39%|████████████▍                   | 47516/122310 [1:31:58<1:27:50, 14.19it/s][A
 39%|████████████▍                   | 47518/122310 [1:31:59<1:56:19, 10.72it/s][A
 39%|████████████▍                   | 47536/122310 [1:32:00<1:25:22, 14.60it/s][A
 39%|████████████▍                   | 47540/122310 [1:32:01<1:45:12, 11.84it/s][A
 39%|████████████▍                   | 47558/122310 [1:32:02<1:21:07, 15.36it/s][A
 39%|████████████▍                   | 47562/122310 [1:32:02<1:40:30, 12.40it/s][A
 39%|████████████▍                   | 47575/122310 [1:32:03<1:29:25, 13.93it/s][A
 39%|████████████▍                   | 47591/122310 [1:32:04<1:18:08, 15.94it/s][A
 39%|████████████▍                   | 47597/122310 [1:32:05<1:31:47, 13.56it/s][A
 39%|████████████▍                   | 47614/122310 [1:32:05<1:16:48, 16.21it/s][A
 39%|████████████▍                   | 47624/122310 [1:32:07<1:47:31, 11.58it/s][A
 39%|████████████▍                   | 47633/122310 [1:32:08<1:46:47, 11.65

step: 35500, loss: 62.64070187552443, epoch: 2



 39%|████████████▍                   | 47689/122310 [1:32:14<2:25:25,  8.55it/s][A
 39%|████████████▍                   | 47694/122310 [1:32:14<2:35:06,  8.02it/s][A
 39%|████████████▍                   | 47702/122310 [1:32:15<2:21:40,  8.78it/s][A
 39%|████████████▍                   | 47712/122310 [1:32:16<2:02:15, 10.17it/s][A
 39%|████████████▍                   | 47734/122310 [1:32:17<1:18:09, 15.90it/s][A
 39%|████████████▍                   | 47740/122310 [1:32:17<1:31:32, 13.58it/s][A
 39%|████████████▍                   | 47742/122310 [1:32:18<2:00:02, 10.35it/s][A
 39%|████████████▍                   | 47751/122310 [1:32:19<1:55:17, 10.78it/s][A
 39%|████████████▍                   | 47757/122310 [1:32:20<2:05:21,  9.91it/s][A
 39%|████████████▍                   | 47760/122310 [1:32:20<2:33:01,  8.12it/s][A
 39%|████████████▍                   | 47769/122310 [1:32:21<2:13:53,  9.28it/s][A
 39%|████████████▌                   | 47780/122310 [1:32:22<1:54:54, 10.81

step: 35520, loss: 87.1565473184385, epoch: 2



 39%|████████████▌                   | 47868/122310 [1:32:29<1:28:00, 14.10it/s][A
 39%|████████████▌                   | 47874/122310 [1:32:30<1:40:39, 12.33it/s][A
 39%|████████████▌                   | 47884/122310 [1:32:30<1:38:27, 12.60it/s][A
 39%|████████████▌                   | 47888/122310 [1:32:31<1:58:28, 10.47it/s][A
 39%|████████████▌                   | 47899/122310 [1:32:32<1:46:11, 11.68it/s][A
 39%|████████████▌                   | 47907/122310 [1:32:33<1:49:01, 11.37it/s][A
 39%|████████████▌                   | 47916/122310 [1:32:33<1:46:50, 11.61it/s][A
 39%|████████████▌                   | 47930/122310 [1:32:34<1:29:57, 13.78it/s][A
 39%|████████████▌                   | 47937/122310 [1:32:35<1:39:41, 12.43it/s][A
 39%|████████████▌                   | 47946/122310 [1:32:36<1:40:19, 12.35it/s][A
 39%|████████████▌                   | 47952/122310 [1:32:36<1:53:00, 10.97it/s][A
 39%|████████████▌                   | 47960/122310 [1:32:37<1:54:00, 10.87

step: 35540, loss: 79.90224399168808, epoch: 2



 39%|████████████▌                   | 48016/122310 [1:32:44<2:58:43,  6.93it/s][A
 39%|████████████▌                   | 48031/122310 [1:32:45<1:54:03, 10.85it/s][A
 39%|████████████▌                   | 48035/122310 [1:32:46<2:14:09,  9.23it/s][A
 39%|████████████▌                   | 48044/122310 [1:32:46<2:02:42, 10.09it/s][A
 39%|████████████▌                   | 48048/122310 [1:32:47<2:22:37,  8.68it/s][A
 39%|████████████▌                   | 48056/122310 [1:32:48<2:14:43,  9.19it/s][A
 39%|████████████▌                   | 48071/122310 [1:32:49<1:40:10, 12.35it/s][A
 39%|████████████▌                   | 48077/122310 [1:32:49<1:51:20, 11.11it/s][A
 39%|████████████▌                   | 48093/122310 [1:32:50<1:26:58, 14.22it/s][A
 39%|████████████▌                   | 48099/122310 [1:32:51<1:41:51, 12.14it/s][A
 39%|████████████▌                   | 48111/122310 [1:32:52<1:33:02, 13.29it/s][A
 39%|████████████▌                   | 48125/122310 [1:32:52<1:23:55, 14.73

step: 35560, loss: 80.98823144722638, epoch: 2



 39%|████████████▌                   | 48183/122310 [1:32:59<2:16:23,  9.06it/s][A
 39%|████████████▌                   | 48192/122310 [1:33:00<2:06:34,  9.76it/s][A
 39%|████████████▌                   | 48200/122310 [1:33:01<2:02:55, 10.05it/s][A
 39%|████████████▌                   | 48205/122310 [1:33:01<2:15:21,  9.12it/s][A
 39%|████████████▌                   | 48213/122310 [1:33:02<2:10:28,  9.46it/s][A
 39%|████████████▌                   | 48222/122310 [1:33:03<2:01:32, 10.16it/s][A
 39%|████████████▌                   | 48227/122310 [1:33:04<2:14:38,  9.17it/s][A
 39%|████████████▌                   | 48232/122310 [1:33:04<2:26:09,  8.45it/s][A
 39%|████████████▌                   | 48238/122310 [1:33:05<2:28:53,  8.29it/s][A
 39%|████████████▌                   | 48241/122310 [1:33:06<2:55:14,  7.04it/s][A
 39%|████████████▌                   | 48244/122310 [1:33:07<3:20:38,  6.15it/s][A
 39%|████████████▌                   | 48252/122310 [1:33:07<2:44:25,  7.51

step: 35580, loss: 81.14757186196388, epoch: 2



 40%|████████████▋                   | 48330/122310 [1:33:14<1:24:31, 14.59it/s][A
 40%|████████████▋                   | 48348/122310 [1:33:15<1:12:18, 17.05it/s][A
 40%|████████████▋                   | 48359/122310 [1:33:16<1:16:05, 16.20it/s][A
 40%|████████████▋                   | 48363/122310 [1:33:17<1:35:30, 12.90it/s][A
 40%|████████████▋                   | 48373/122310 [1:33:18<1:35:45, 12.87it/s][A
 40%|████████████▋                   | 48381/122310 [1:33:18<1:42:37, 12.01it/s][A
 40%|████████████▋                   | 48397/122310 [1:33:19<1:24:48, 14.53it/s][A
 40%|████████████▋                   | 48406/122310 [1:33:20<1:32:15, 13.35it/s][A
 40%|████████████▋                   | 48412/122310 [1:33:21<1:45:08, 11.71it/s][A
 40%|████████████▋                   | 48420/122310 [1:33:22<1:49:51, 11.21it/s][A
 40%|████████████▋                   | 48426/122310 [1:33:22<2:01:05, 10.17it/s][A
 40%|████████████▋                   | 48436/122310 [1:33:23<1:53:05, 10.89

step: 35600, loss: 68.90481990111311, epoch: 2
sim1 and sim2 are 0.47560986985493353, 0.24185175281319812
cosine of pred and queen: 0.20082871590437051
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: could
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: eluana
Actual: mahara

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: respect
Actual: man:woman::brothers:sisters, pred: sscs
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: pradesh
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: pradesh
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 40%|████████████▎                  | 48490/122310 [1:34:50<70:22:41,  3.43s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.13609467455621302



 40%|████████████▎                  | 48501/122310 [1:34:51<43:17:48,  2.11s/it][A
 40%|████████████▎                  | 48509/122310 [1:34:52<31:18:18,  1.53s/it][A
 40%|████████████▎                  | 48518/122310 [1:34:52<21:55:07,  1.07s/it][A
 40%|████████████▎                  | 48524/122310 [1:34:53<17:26:12,  1.18it/s][A
 40%|████████████▎                  | 48531/122310 [1:34:54<13:12:22,  1.55it/s][A
 40%|████████████▎                  | 48534/122310 [1:34:55<12:51:31,  1.59it/s][A
 40%|████████████▎                  | 48539/122310 [1:34:56<10:24:00,  1.97it/s][A
 40%|████████████▋                   | 48542/122310 [1:34:57<9:26:50,  2.17it/s][A
 40%|████████████▋                   | 48557/122310 [1:34:58<4:41:09,  4.37it/s][A
 40%|████████████▋                   | 48561/122310 [1:34:59<4:32:12,  4.52it/s][A
 40%|████████████▋                   | 48568/122310 [1:34:59<3:48:38,  5.38it/s][A
 40%|████████████▋                   | 48571/122310 [1:35:00<4:00:42,  5.11

step: 35620, loss: 86.55435778274489, epoch: 2



 40%|████████████▋                   | 48631/122310 [1:35:06<1:49:17, 11.24it/s][A
 40%|████████████▋                   | 48637/122310 [1:35:07<1:58:19, 10.38it/s][A
 40%|████████████▋                   | 48645/122310 [1:35:08<1:56:56, 10.50it/s][A
 40%|████████████▋                   | 48658/122310 [1:35:08<1:37:31, 12.59it/s][A
 40%|████████████▋                   | 48671/122310 [1:35:09<1:26:51, 14.13it/s][A
 40%|████████████▋                   | 48673/122310 [1:35:10<1:56:53, 10.50it/s][A
 40%|████████████▋                   | 48683/122310 [1:35:11<1:49:38, 11.19it/s][A
 40%|████████████▋                   | 48690/122310 [1:35:12<2:00:36, 10.17it/s][A
 40%|████████████▋                   | 48696/122310 [1:35:12<2:15:25,  9.06it/s][A
 40%|████████████▋                   | 48702/122310 [1:35:13<2:19:36,  8.79it/s][A
 40%|████████████▋                   | 48708/122310 [1:35:14<2:24:45,  8.47it/s][A
 40%|████████████▋                   | 48720/122310 [1:35:15<1:55:41, 10.60

step: 35640, loss: 69.52480106043222, epoch: 2



 40%|████████████▊                   | 48795/122310 [1:35:22<1:48:10, 11.33it/s][A
 40%|████████████▊                   | 48802/122310 [1:35:22<1:55:09, 10.64it/s][A
 40%|████████████▊                   | 48811/122310 [1:35:23<1:53:24, 10.80it/s][A
 40%|████████████▊                   | 48821/122310 [1:35:24<1:47:31, 11.39it/s][A
 40%|████████████▊                   | 48829/122310 [1:35:25<1:49:07, 11.22it/s][A
 40%|████████████▊                   | 48840/122310 [1:35:26<1:39:55, 12.25it/s][A
 40%|████████████▊                   | 48847/122310 [1:35:26<1:47:21, 11.40it/s][A
 40%|████████████▊                   | 48852/122310 [1:35:27<2:01:58, 10.04it/s][A
 40%|████████████▊                   | 48862/122310 [1:35:28<1:51:17, 11.00it/s][A
 40%|████████████▊                   | 48865/122310 [1:35:28<2:16:22,  8.98it/s][A
 40%|████████████▊                   | 48875/122310 [1:35:29<1:58:34, 10.32it/s][A
 40%|████████████▊                   | 48886/122310 [1:35:30<1:44:54, 11.67

step: 35660, loss: 76.19093184666067, epoch: 2



 40%|████████████▊                   | 48957/122310 [1:35:37<1:54:48, 10.65it/s][A
 40%|████████████▊                   | 48979/122310 [1:35:37<1:16:08, 16.05it/s][A
 40%|████████████▊                   | 48982/122310 [1:35:38<1:37:21, 12.55it/s][A
 40%|████████████▊                   | 48985/122310 [1:35:39<2:01:14, 10.08it/s][A
 40%|████████████▊                   | 48993/122310 [1:35:40<1:58:42, 10.29it/s][A
 40%|████████████▊                   | 49001/122310 [1:35:40<1:57:02, 10.44it/s][A
 40%|████████████▊                   | 49012/122310 [1:35:41<1:44:32, 11.69it/s][A
 40%|████████████▊                   | 49016/122310 [1:35:42<2:04:16,  9.83it/s][A
 40%|████████████▊                   | 49021/122310 [1:35:43<2:17:23,  8.89it/s][A
 40%|████████████▊                   | 49028/122310 [1:35:43<2:14:41,  9.07it/s][A
 40%|████████████▊                   | 49040/122310 [1:35:44<1:49:12, 11.18it/s][A
 40%|████████████▊                   | 49050/122310 [1:35:45<1:42:41, 11.89

step: 35680, loss: 78.58755158200172, epoch: 2



 40%|████████████▊                   | 49117/122310 [1:35:52<2:32:05,  8.02it/s][A
 40%|████████████▊                   | 49123/122310 [1:35:52<2:31:33,  8.05it/s][A
 40%|████████████▊                   | 49134/122310 [1:35:53<2:03:58,  9.84it/s][A
 40%|████████████▊                   | 49143/122310 [1:35:54<1:56:35, 10.46it/s][A
 40%|████████████▊                   | 49152/122310 [1:35:54<1:52:23, 10.85it/s][A
 40%|████████████▊                   | 49165/122310 [1:35:55<1:35:38, 12.75it/s][A
 40%|████████████▊                   | 49181/122310 [1:35:56<1:19:33, 15.32it/s][A
 40%|████████████▊                   | 49192/122310 [1:35:57<1:20:38, 15.11it/s][A
 40%|████████████▊                   | 49198/122310 [1:35:57<1:33:31, 13.03it/s][A
 40%|████████████▊                   | 49205/122310 [1:35:58<1:42:06, 11.93it/s][A
 40%|████████████▊                   | 49209/122310 [1:35:59<2:01:59,  9.99it/s][A
 40%|████████████▉                   | 49219/122310 [1:36:00<1:50:14, 11.05

step: 35700, loss: 74.28127967013349, epoch: 2



 40%|████████████▉                   | 49304/122310 [1:36:06<1:33:20, 13.03it/s][A
 40%|████████████▉                   | 49320/122310 [1:36:07<1:18:12, 15.55it/s][A
 40%|████████████▉                   | 49331/122310 [1:36:08<1:19:19, 15.33it/s][A
 40%|████████████▉                   | 49335/122310 [1:36:09<1:38:45, 12.32it/s][A
 40%|████████████▉                   | 49337/122310 [1:36:09<2:08:48,  9.44it/s][A
 40%|████████████▉                   | 49346/122310 [1:36:11<2:34:00,  7.90it/s][A
 40%|████████████▉                   | 49355/122310 [1:36:12<2:16:08,  8.93it/s][A
 40%|████████████▉                   | 49370/122310 [1:36:12<1:42:22, 11.87it/s][A
 40%|████████████▉                   | 49379/122310 [1:36:13<1:42:00, 11.92it/s][A
 40%|████████████▉                   | 49387/122310 [1:36:14<1:44:52, 11.59it/s][A
 40%|████████████▉                   | 49394/122310 [1:36:15<1:51:00, 10.95it/s][A
 40%|████████████▉                   | 49408/122310 [1:36:15<1:31:59, 13.21

step: 35720, loss: 88.34331102808297, epoch: 2



 40%|████████████▉                   | 49462/122310 [1:36:21<2:00:46, 10.05it/s][A
 40%|████████████▉                   | 49473/122310 [1:36:22<1:45:26, 11.51it/s][A
 40%|████████████▉                   | 49481/122310 [1:36:23<1:47:34, 11.28it/s][A
 40%|████████████▉                   | 49489/122310 [1:36:24<1:49:41, 11.07it/s][A
 40%|████████████▉                   | 49497/122310 [1:36:24<1:51:01, 10.93it/s][A
 40%|████████████▉                   | 49506/122310 [1:36:25<1:48:16, 11.21it/s][A
 40%|████████████▉                   | 49515/122310 [1:36:26<1:46:13, 11.42it/s][A
 40%|████████████▉                   | 49517/122310 [1:36:27<2:18:35,  8.75it/s][A
 40%|████████████▉                   | 49526/122310 [1:36:27<2:04:33,  9.74it/s][A
 40%|████████████▉                   | 49535/122310 [1:36:28<1:56:37, 10.40it/s][A
 41%|████████████▉                   | 49544/122310 [1:36:29<1:51:57, 10.83it/s][A
 41%|████████████▉                   | 49552/122310 [1:36:30<1:53:10, 10.71

step: 35740, loss: 84.16956057352964, epoch: 2



 41%|████████████▉                   | 49626/122310 [1:36:36<1:42:03, 11.87it/s][A
 41%|████████████▉                   | 49639/122310 [1:36:37<1:29:41, 13.50it/s][A
 41%|████████████▉                   | 49654/122310 [1:36:38<1:17:52, 15.55it/s][A
 41%|████████████▉                   | 49658/122310 [1:36:39<1:36:57, 12.49it/s][A
 41%|████████████▉                   | 49663/122310 [1:36:39<1:52:13, 10.79it/s][A
 41%|████████████▉                   | 49681/122310 [1:36:41<1:46:20, 11.38it/s][A
 41%|████████████▉                   | 49687/122310 [1:36:42<1:54:47, 10.54it/s][A
 41%|█████████████                   | 49702/122310 [1:36:42<1:32:40, 13.06it/s][A
 41%|█████████████                   | 49710/122310 [1:36:43<1:37:22, 12.43it/s][A
 41%|█████████████                   | 49718/122310 [1:36:44<1:40:52, 11.99it/s][A
 41%|█████████████                   | 49724/122310 [1:36:45<1:51:22, 10.86it/s][A
 41%|█████████████                   | 49730/122310 [1:36:45<2:00:18, 10.05

step: 35760, loss: 70.95995149733521, epoch: 2



 41%|█████████████                   | 49804/122310 [1:36:51<1:25:46, 14.09it/s][A
 41%|█████████████                   | 49806/122310 [1:36:52<1:53:58, 10.60it/s][A
 41%|█████████████                   | 49823/122310 [1:36:53<1:25:58, 14.05it/s][A
 41%|█████████████                   | 49830/122310 [1:36:53<1:35:14, 12.68it/s][A
 41%|█████████████                   | 49835/122310 [1:36:54<1:51:12, 10.86it/s][A
 41%|█████████████                   | 49845/122310 [1:36:55<1:45:32, 11.44it/s][A
 41%|█████████████                   | 49858/122310 [1:36:56<1:31:16, 13.23it/s][A
 41%|█████████████                   | 49866/122310 [1:36:57<1:36:48, 12.47it/s][A
 41%|█████████████                   | 49876/122310 [1:36:57<1:35:37, 12.63it/s][A
 41%|█████████████                   | 49889/122310 [1:36:58<1:26:07, 14.02it/s][A
 41%|█████████████                   | 49895/122310 [1:36:59<1:40:55, 11.96it/s][A
 41%|█████████████                   | 49906/122310 [1:37:00<1:34:51, 12.72

step: 35780, loss: 148.00934402828506, epoch: 2



 41%|█████████████                   | 49962/122310 [1:37:06<2:40:00,  7.54it/s][A
 41%|█████████████                   | 49968/122310 [1:37:07<2:36:07,  7.72it/s][A
 41%|█████████████                   | 49978/122310 [1:37:08<2:07:38,  9.44it/s][A
 41%|█████████████                   | 49989/122310 [1:37:09<1:48:46, 11.08it/s][A
 41%|█████████████                   | 49994/122310 [1:37:09<2:04:05,  9.71it/s][A
 41%|█████████████                   | 50006/122310 [1:37:10<1:43:40, 11.62it/s][A
 41%|█████████████                   | 50016/122310 [1:37:11<1:38:59, 12.17it/s][A
 41%|█████████████                   | 50021/122310 [1:37:12<1:54:05, 10.56it/s][A
 41%|█████████████                   | 50032/122310 [1:37:12<1:42:34, 11.74it/s][A
 41%|█████████████                   | 50045/122310 [1:37:13<1:29:32, 13.45it/s][A
 41%|█████████████                   | 50054/122310 [1:37:14<1:32:47, 12.98it/s][A
 41%|█████████████                   | 50058/122310 [1:37:15<1:53:22, 10.62

step: 35800, loss: 71.64690051490514, epoch: 2



 41%|█████████████                   | 50133/122310 [1:37:21<2:02:20,  9.83it/s][A
 41%|█████████████                   | 50140/122310 [1:37:22<2:04:38,  9.65it/s][A
 41%|█████████████                   | 50152/122310 [1:37:23<1:43:37, 11.61it/s][A
 41%|█████████████                   | 50165/122310 [1:37:24<1:29:42, 13.40it/s][A
 41%|█████████████▏                  | 50175/122310 [1:37:24<1:29:50, 13.38it/s][A
 41%|█████████████▏                  | 50187/122310 [1:37:25<1:25:03, 14.13it/s][A
 41%|█████████████▏                  | 50196/122310 [1:37:26<1:28:51, 13.53it/s][A
 41%|█████████████▏                  | 50205/122310 [1:37:27<1:31:42, 13.10it/s][A
 41%|█████████████▏                  | 50212/122310 [1:37:27<1:39:56, 12.02it/s][A
 41%|█████████████▏                  | 50232/122310 [1:37:28<1:13:05, 16.43it/s][A
 41%|█████████████▏                  | 50242/122310 [1:37:29<1:17:11, 15.56it/s][A
 41%|█████████████▏                  | 50248/122310 [1:37:30<1:30:13, 13.31

step: 35820, loss: 85.04403397814286, epoch: 2



 41%|█████████████▏                  | 50336/122310 [1:37:36<1:23:14, 14.41it/s][A
 41%|█████████████▏                  | 50340/122310 [1:37:37<1:43:03, 11.64it/s][A
 41%|█████████████▏                  | 50347/122310 [1:37:38<1:49:36, 10.94it/s][A
 41%|█████████████▏                  | 50353/122310 [1:37:39<1:59:09, 10.07it/s][A
 41%|█████████████▏                  | 50358/122310 [1:37:39<2:12:00,  9.08it/s][A
 41%|█████████████▏                  | 50372/122310 [1:37:40<1:40:09, 11.97it/s][A
 41%|█████████████▏                  | 50390/122310 [1:37:41<1:16:48, 15.61it/s][A
 41%|█████████████▏                  | 50402/122310 [1:37:42<1:15:59, 15.77it/s][A
 41%|█████████████▏                  | 50412/122310 [1:37:42<1:19:35, 15.06it/s][A
 41%|█████████████▏                  | 50416/122310 [1:37:43<1:39:32, 12.04it/s][A
 41%|█████████████▏                  | 50425/122310 [1:37:44<1:39:26, 12.05it/s][A
 41%|█████████████▏                  | 50431/122310 [1:37:45<1:52:57, 10.61

step: 35840, loss: 71.67319757673286, epoch: 2



 41%|█████████████▏                  | 50500/122310 [1:37:51<2:09:50,  9.22it/s][A
 41%|█████████████▏                  | 50506/122310 [1:37:52<2:15:24,  8.84it/s][A
 41%|█████████████▏                  | 50507/122310 [1:37:53<3:01:38,  6.59it/s][A
 41%|█████████████▏                  | 50519/122310 [1:37:54<2:06:16,  9.48it/s][A
 41%|█████████████▏                  | 50522/122310 [1:37:54<2:34:50,  7.73it/s][A
 41%|█████████████▏                  | 50534/122310 [1:37:55<1:57:05, 10.22it/s][A
 41%|█████████████▏                  | 50546/122310 [1:37:56<1:40:00, 11.96it/s][A
 41%|█████████████▏                  | 50554/122310 [1:37:57<1:43:29, 11.56it/s][A
 41%|█████████████▏                  | 50564/122310 [1:37:57<1:39:06, 12.06it/s][A
 41%|█████████████▏                  | 50576/122310 [1:37:58<1:30:41, 13.18it/s][A
 41%|█████████████▏                  | 50584/122310 [1:37:59<1:36:52, 12.34it/s][A
 41%|█████████████▏                  | 50588/122310 [1:38:00<1:56:41, 10.24

step: 35860, loss: 84.42586768681704, epoch: 2



 41%|█████████████▎                  | 50648/122310 [1:38:06<2:15:11,  8.83it/s][A
 41%|█████████████▎                  | 50651/122310 [1:38:08<3:33:09,  5.60it/s][A
 41%|█████████████▎                  | 50654/122310 [1:38:09<3:48:30,  5.23it/s][A
 41%|█████████████▎                  | 50663/122310 [1:38:10<2:52:33,  6.92it/s][A
 41%|█████████████▎                  | 50674/122310 [1:38:10<2:12:34,  9.01it/s][A
 41%|█████████████▎                  | 50676/122310 [1:38:11<2:46:57,  7.15it/s][A
 41%|█████████████▎                  | 50682/122310 [1:38:13<3:26:50,  5.77it/s][A
 41%|█████████████▎                  | 50690/122310 [1:38:13<2:52:46,  6.91it/s][A
 41%|█████████████▎                  | 50695/122310 [1:38:14<2:55:25,  6.80it/s][A
 41%|█████████████▎                  | 50707/122310 [1:38:15<2:11:36,  9.07it/s][A
 41%|█████████████▎                  | 50715/122310 [1:38:16<2:04:51,  9.56it/s][A
 41%|█████████████▎                  | 50729/122310 [1:38:16<1:38:17, 12.14

step: 35880, loss: 75.05393448075621, epoch: 2



 42%|█████████████▎                  | 50790/122310 [1:38:22<1:36:30, 12.35it/s][A
 42%|█████████████▎                  | 50797/122310 [1:38:22<1:45:13, 11.33it/s][A
 42%|█████████████▎                  | 50807/122310 [1:38:23<1:39:33, 11.97it/s][A
 42%|█████████████▎                  | 50818/122310 [1:38:24<1:33:56, 12.68it/s][A
 42%|█████████████▎                  | 50830/122310 [1:38:25<1:27:03, 13.69it/s][A
 42%|█████████████▎                  | 50840/122310 [1:38:25<1:28:17, 13.49it/s][A
 42%|█████████████▎                  | 50848/122310 [1:38:26<1:33:51, 12.69it/s][A
 42%|█████████████▎                  | 50858/122310 [1:38:27<1:33:09, 12.78it/s][A
 42%|█████████████▎                  | 50861/122310 [1:38:28<1:56:52, 10.19it/s][A
 42%|█████████████▎                  | 50866/122310 [1:38:28<2:10:17,  9.14it/s][A
 42%|█████████████▎                  | 50876/122310 [1:38:29<1:55:14, 10.33it/s][A
 42%|█████████████▎                  | 50879/122310 [1:38:30<2:21:59,  8.38

step: 35900, loss: 75.64486571142254, epoch: 2



 42%|█████████████▎                  | 50966/122310 [1:38:37<1:39:05, 12.00it/s][A
 42%|█████████████▎                  | 50975/122310 [1:38:38<1:39:47, 11.91it/s][A
 42%|█████████████▎                  | 50984/122310 [1:38:38<1:40:23, 11.84it/s][A
 42%|█████████████▎                  | 50989/122310 [1:38:39<1:56:01, 10.25it/s][A
 42%|█████████████▎                  | 50996/122310 [1:38:40<2:00:11,  9.89it/s][A
 42%|█████████████▎                  | 51010/122310 [1:38:41<1:36:34, 12.30it/s][A
 42%|█████████████▎                  | 51023/122310 [1:38:42<1:27:39, 13.55it/s][A
 42%|█████████████▎                  | 51025/122310 [1:38:42<1:55:00, 10.33it/s][A
 42%|█████████████▎                  | 51031/122310 [1:38:43<2:04:52,  9.51it/s][A
 42%|█████████████▎                  | 51032/122310 [1:38:44<2:46:47,  7.12it/s][A
 42%|█████████████▎                  | 51036/122310 [1:38:45<3:01:38,  6.54it/s][A
 42%|█████████████▎                  | 51042/122310 [1:38:45<2:51:18,  6.93

step: 35920, loss: 76.30863190635824, epoch: 2



 42%|█████████████▎                  | 51108/122310 [1:38:52<2:13:15,  8.91it/s][A
 42%|█████████████▎                  | 51114/122310 [1:38:53<2:18:04,  8.59it/s][A
 42%|█████████████▎                  | 51120/122310 [1:38:54<2:22:03,  8.35it/s][A
 42%|█████████████▍                  | 51129/122310 [1:38:55<2:06:41,  9.36it/s][A
 42%|█████████████▍                  | 51140/122310 [1:38:55<1:49:48, 10.80it/s][A
 42%|█████████████▍                  | 51152/122310 [1:38:56<1:36:50, 12.25it/s][A
 42%|█████████████▍                  | 51164/122310 [1:38:57<1:29:05, 13.31it/s][A
 42%|█████████████▍                  | 51179/122310 [1:38:58<1:18:03, 15.19it/s][A
 42%|█████████████▍                  | 51181/122310 [1:38:58<1:43:48, 11.42it/s][A
 42%|█████████████▍                  | 51187/122310 [1:38:59<1:54:27, 10.36it/s][A
 42%|█████████████▍                  | 51192/122310 [1:39:00<2:08:24,  9.23it/s][A
 42%|█████████████▍                  | 51197/122310 [1:39:01<3:03:22,  6.46

step: 35940, loss: 72.45315366673545, epoch: 2



 42%|█████████████▍                  | 51264/122310 [1:39:08<1:47:13, 11.04it/s][A
 42%|█████████████▍                  | 51284/122310 [1:39:08<1:16:30, 15.47it/s][A
 42%|█████████████▍                  | 51296/122310 [1:39:09<1:16:44, 15.42it/s][A
 42%|█████████████▍                  | 51300/122310 [1:39:10<1:37:27, 12.14it/s][A
 42%|█████████████▍                  | 51303/122310 [1:39:11<2:03:01,  9.62it/s][A
 42%|█████████████▍                  | 51311/122310 [1:39:12<2:01:06,  9.77it/s][A
 42%|█████████████▍                  | 51319/122310 [1:39:12<2:02:16,  9.68it/s][A
 42%|█████████████▍                  | 51329/122310 [1:39:13<1:52:12, 10.54it/s][A
 42%|█████████████▍                  | 51337/122310 [1:39:14<1:53:37, 10.41it/s][A
 42%|█████████████▍                  | 51347/122310 [1:39:15<1:48:17, 10.92it/s][A
 42%|█████████████▍                  | 51354/122310 [1:39:16<1:54:17, 10.35it/s][A
 42%|█████████████▍                  | 51363/122310 [1:39:16<1:50:35, 10.69

step: 35960, loss: 71.91990275683642, epoch: 2



 42%|█████████████▍                  | 51428/122310 [1:39:23<2:34:51,  7.63it/s][A
 42%|█████████████▍                  | 51433/122310 [1:39:24<2:42:01,  7.29it/s][A
 42%|█████████████▍                  | 51439/122310 [1:39:25<2:41:40,  7.31it/s][A
 42%|█████████████▍                  | 51443/122310 [1:39:26<2:55:59,  6.71it/s][A
 42%|█████████████▍                  | 51451/122310 [1:39:27<2:33:22,  7.70it/s][A
 42%|█████████████▍                  | 51458/122310 [1:39:27<2:26:50,  8.04it/s][A
 42%|█████████████▍                  | 51463/122310 [1:39:28<2:39:57,  7.38it/s][A
 42%|█████████████▍                  | 51471/122310 [1:39:29<2:23:38,  8.22it/s][A
 42%|█████████████▍                  | 51490/122310 [1:39:30<1:31:14, 12.94it/s][A
 42%|█████████████▍                  | 51502/122310 [1:39:31<1:26:18, 13.67it/s][A
 42%|█████████████▍                  | 51510/122310 [1:39:31<1:33:18, 12.65it/s][A
 42%|█████████████▍                  | 51526/122310 [1:39:32<1:18:45, 14.98

step: 35980, loss: 111.04169703153937, epoch: 2



 42%|█████████████▍                  | 51595/122310 [1:39:39<1:50:13, 10.69it/s][A
 42%|█████████████▌                  | 51607/122310 [1:39:40<1:37:37, 12.07it/s][A
 42%|█████████████▌                  | 51618/122310 [1:39:41<1:34:38, 12.45it/s][A
 42%|█████████████▌                  | 51628/122310 [1:39:42<1:35:53, 12.29it/s][A
 42%|█████████████▌                  | 51632/122310 [1:39:43<1:55:37, 10.19it/s][A
 42%|█████████████▌                  | 51634/122310 [1:39:43<2:26:53,  8.02it/s][A
 42%|█████████████▌                  | 51645/122310 [1:39:44<2:00:46,  9.75it/s][A
 42%|█████████████▌                  | 51656/122310 [1:39:45<1:45:59, 11.11it/s][A
 42%|█████████████▌                  | 51665/122310 [1:39:46<1:45:18, 11.18it/s][A
 42%|█████████████▌                  | 51683/122310 [1:39:47<1:20:15, 14.67it/s][A
 42%|█████████████▌                  | 51694/122310 [1:39:47<1:21:38, 14.42it/s][A
 42%|█████████████▌                  | 51703/122310 [1:39:48<1:26:55, 13.54

step: 36000, loss: 73.6944268453059, epoch: 2
sim1 and sim2 are 0.4329454207839571, 0.24274018953671656
cosine of pred and queen: 0.20781474072193626
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: ma


 42%|█████████████▌                  | 51765/122310 [1:40:14<1:44:51, 11.21it/s][A

Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: statement
Actual: denmark:danish::germany:german, pred: europe
Accuracy is 0.4222222222222222
Actual: walk:walks::vanish:vanishes, pred: beholders
Actual: work:works::generate:generates, pred: pleaseth
Actual: think:thinks::talk:talks, pred: learnings
Actual: vanish:vanishes::eat:eats, pred: placed
Actual: sing:sings::shuffle:shuffles, pred: andujar
Actual: sit:sits::go:goes, pred: heroin
Actual: say:says::provide:provides, pred: revenues
Actual: scream:screams::sing:sings, pred: zephaniah
Actual: play:plays::listen:listens, pred: discarded
Actual: predict:predicts::search:searches, pred: pebbles
Actual: machine:machines::lion:lions, pred: holds
Actual: mango:mangoes::onion:onions, pred: mier
Actual: man:men::mango:mangoes


 42%|█████████████                  | 51776/122310 [1:41:16<52:36:07,  2.68s/it][A
 42%|█████████████                  | 51782/122310 [1:41:17<41:50:10,  2.14s/it][A
 42%|█████████████▏                 | 51789/122310 [1:41:17<31:22:53,  1.60s/it][A
 42%|█████████████▏                 | 51794/122310 [1:41:18<25:22:03,  1.30s/it][A
 42%|█████████████▏                 | 51814/122310 [1:41:19<11:53:13,  1.65it/s][A
 42%|█████████████▏                 | 51818/122310 [1:41:20<10:47:00,  1.82it/s][A
 42%|█████████████▌                  | 51829/122310 [1:41:21<7:30:38,  2.61it/s][A
 42%|█████████████▌                  | 51834/122310 [1:41:21<6:41:59,  2.92it/s][A
 42%|█████████████▌                  | 51836/122310 [1:41:23<7:32:05,  2.60it/s][A
 42%|█████████████▌                  | 51849/122310 [1:41:24<4:32:53,  4.30it/s][A
 42%|█████████████▌                  | 51857/122310 [1:41:24<3:45:57,  5.20it/s][A
 42%|█████████████▌                  | 51868/122310 [1:41:25<2:53:53,  6.75

step: 36020, loss: 84.60344322433531, epoch: 2



 42%|█████████████▌                  | 51954/122310 [1:41:32<1:32:10, 12.72it/s][A
 42%|█████████████▌                  | 51964/122310 [1:41:33<1:31:42, 12.78it/s][A
 42%|█████████████▌                  | 51972/122310 [1:41:34<1:36:42, 12.12it/s][A
 42%|█████████████▌                  | 51979/122310 [1:41:35<1:46:57, 10.96it/s][A
 43%|█████████████▌                  | 51987/122310 [1:41:35<1:48:35, 10.79it/s][A
 43%|█████████████▌                  | 51990/122310 [1:41:37<2:52:41,  6.79it/s][A
 43%|█████████████▌                  | 51999/122310 [1:41:38<2:29:33,  7.84it/s][A
 43%|█████████████▌                  | 52006/122310 [1:41:39<2:26:28,  8.00it/s][A
 43%|█████████████▌                  | 52014/122310 [1:41:39<2:15:51,  8.62it/s][A
 43%|█████████████▌                  | 52021/122310 [1:41:40<2:13:57,  8.75it/s][A
 43%|█████████████▌                  | 52027/122310 [1:41:41<2:19:00,  8.43it/s][A
 43%|█████████████▌                  | 52035/122310 [1:41:42<2:10:46,  8.96

step: 36040, loss: 81.38560138482462, epoch: 2



 43%|█████████████▋                  | 52092/122310 [1:41:48<2:03:04,  9.51it/s][A
 43%|█████████████▋                  | 52103/122310 [1:41:49<1:48:38, 10.77it/s][A
 43%|█████████████▋                  | 52115/122310 [1:41:49<1:37:13, 12.03it/s][A
 43%|█████████████▋                  | 52121/122310 [1:41:50<1:48:14, 10.81it/s][A
 43%|█████████████▋                  | 52126/122310 [1:41:51<2:03:01,  9.51it/s][A
 43%|█████████████▋                  | 52137/122310 [1:41:52<1:47:21, 10.89it/s][A
 43%|█████████████▋                  | 52142/122310 [1:41:53<2:03:16,  9.49it/s][A
 43%|█████████████▋                  | 52148/122310 [1:41:53<2:11:18,  8.91it/s][A
 43%|█████████████▋                  | 52157/122310 [1:41:54<2:00:38,  9.69it/s][A
 43%|█████████████▋                  | 52167/122310 [1:41:55<1:49:38, 10.66it/s][A
 43%|█████████████▋                  | 52185/122310 [1:41:56<1:22:05, 14.24it/s][A
 43%|█████████████▋                  | 52191/122310 [1:41:57<1:35:16, 12.27

step: 36060, loss: 89.78133196533877, epoch: 2



 43%|█████████████▋                  | 52250/122310 [1:42:04<2:15:02,  8.65it/s][A
 43%|█████████████▋                  | 52255/122310 [1:42:04<2:24:57,  8.05it/s][A
 43%|█████████████▋                  | 52267/122310 [1:42:05<1:53:26, 10.29it/s][A
 43%|█████████████▋                  | 52276/122310 [1:42:06<1:49:19, 10.68it/s][A
 43%|█████████████▋                  | 52285/122310 [1:42:07<1:47:11, 10.89it/s][A
 43%|█████████████▋                  | 52287/122310 [1:42:14<8:32:13,  2.28it/s][A
 43%|█████████████▋                  | 52293/122310 [1:42:15<6:51:43,  2.83it/s][A
 43%|█████████████▋                  | 52300/122310 [1:42:16<5:21:08,  3.63it/s][A
 43%|█████████████▋                  | 52311/122310 [1:42:17<3:42:00,  5.25it/s][A
 43%|█████████████▋                  | 52317/122310 [1:42:18<3:24:38,  5.70it/s][A
 43%|█████████████▋                  | 52329/122310 [1:42:18<2:31:48,  7.68it/s][A

step: 36080, loss: 85.37029262116226, epoch: 2



 43%|█████████████▋                  | 52332/122310 [1:42:19<2:52:14,  6.77it/s][A
 43%|█████████████▋                  | 52338/122310 [1:42:20<2:46:05,  7.02it/s][A
 43%|█████████████▋                  | 52342/122310 [1:42:21<2:58:02,  6.55it/s][A
 43%|█████████████▋                  | 52350/122310 [1:42:22<2:36:09,  7.47it/s][A
 43%|█████████████▋                  | 52362/122310 [1:42:22<1:59:56,  9.72it/s][A
 43%|█████████████▋                  | 52369/122310 [1:42:23<2:03:30,  9.44it/s][A
 43%|█████████████▋                  | 52387/122310 [1:42:24<1:27:03, 13.39it/s][A
 43%|█████████████▋                  | 52408/122310 [1:42:25<1:08:23, 17.03it/s][A
 43%|█████████████▋                  | 52424/122310 [1:42:26<1:22:11, 14.17it/s][A
 43%|█████████████▋                  | 52436/122310 [1:42:27<1:20:12, 14.52it/s][A
 43%|█████████████▋                  | 52443/122310 [1:42:28<1:28:43, 13.13it/s][A
 43%|█████████████▋                  | 52447/122310 [1:42:29<1:45:39, 11.02

step: 36100, loss: 72.97285926521174, epoch: 2



 43%|█████████████▋                  | 52493/122310 [1:42:35<3:13:55,  6.00it/s][A
 43%|█████████████▋                  | 52500/122310 [1:42:35<2:50:22,  6.83it/s][A
 43%|█████████████▋                  | 52507/122310 [1:42:36<2:35:45,  7.47it/s][A
 43%|█████████████▋                  | 52520/122310 [1:42:37<1:54:24, 10.17it/s][A
 43%|█████████████▋                  | 52524/122310 [1:42:38<2:13:20,  8.72it/s][A
 43%|█████████████▋                  | 52530/122310 [1:42:39<2:17:40,  8.45it/s][A
 43%|█████████████▋                  | 52539/122310 [1:42:39<2:03:50,  9.39it/s][A
 43%|█████████████▋                  | 52546/122310 [1:42:40<2:04:59,  9.30it/s][A
 43%|█████████████▊                  | 52557/122310 [1:42:41<1:48:28, 10.72it/s][A
 43%|█████████████▊                  | 52572/122310 [1:42:42<1:27:17, 13.31it/s][A
 43%|█████████████▊                  | 52583/122310 [1:42:42<1:25:08, 13.65it/s][A
 43%|█████████████▊                  | 52590/122310 [1:42:43<1:34:30, 12.29

step: 36120, loss: 71.90454052707955, epoch: 2



 43%|█████████████▊                  | 52699/122310 [1:42:50<1:13:32, 15.78it/s][A
 43%|█████████████▊                  | 52717/122310 [1:42:51<1:04:13, 18.06it/s][A
 43%|█████████████▊                  | 52724/122310 [1:42:52<1:15:25, 15.38it/s][A
 43%|█████████████▊                  | 52728/122310 [1:42:52<1:33:31, 12.40it/s][A
 43%|█████████████▊                  | 52742/122310 [1:42:53<1:21:53, 14.16it/s][A
 43%|█████████████▊                  | 52754/122310 [1:42:54<1:19:22, 14.61it/s][A
 43%|█████████████▊                  | 52763/122310 [1:42:55<1:24:46, 13.67it/s][A
 43%|█████████████▊                  | 52770/122310 [1:42:55<1:34:04, 12.32it/s][A
 43%|█████████████▊                  | 52779/122310 [1:42:56<1:35:34, 12.13it/s][A
 43%|█████████████▊                  | 52790/122310 [1:42:57<1:30:27, 12.81it/s][A
 43%|█████████████▊                  | 52799/122310 [1:42:58<1:32:53, 12.47it/s][A
 43%|█████████████▊                  | 52806/122310 [1:42:58<1:40:49, 11.49

step: 36140, loss: 68.37159020929045, epoch: 2



 43%|█████████████▊                  | 52893/122310 [1:43:05<1:14:20, 15.56it/s][A
 43%|█████████████▊                  | 52896/122310 [1:43:06<1:35:46, 12.08it/s][A
 43%|█████████████▊                  | 52910/122310 [1:43:07<1:22:47, 13.97it/s][A
 43%|█████████████▊                  | 52921/122310 [1:43:08<1:22:24, 14.03it/s][A
 43%|█████████████▊                  | 52926/122310 [1:43:08<1:37:56, 11.81it/s][A
 43%|█████████████▊                  | 52936/122310 [1:43:09<1:35:21, 12.13it/s][A
 43%|█████████████▊                  | 52945/122310 [1:43:10<1:36:31, 11.98it/s][A
 43%|█████████████▊                  | 52951/122310 [1:43:11<1:47:39, 10.74it/s][A
 43%|█████████████▊                  | 52963/122310 [1:43:11<1:34:55, 12.18it/s][A
 43%|█████████████▊                  | 52972/122310 [1:43:12<1:35:48, 12.06it/s][A
 43%|█████████████▊                  | 52994/122310 [1:43:13<1:08:03, 16.97it/s][A
 43%|█████████████▊                  | 53010/122310 [1:43:14<1:03:47, 18.10

step: 36160, loss: 69.50926746045974, epoch: 2



 43%|█████████████▉                  | 53082/122310 [1:43:21<1:32:10, 12.52it/s][A
 43%|█████████████▉                  | 53090/122310 [1:43:21<1:36:51, 11.91it/s][A
 43%|█████████████▉                  | 53095/122310 [1:43:22<1:52:01, 10.30it/s][A
 43%|█████████████▉                  | 53103/122310 [1:43:23<1:51:19, 10.36it/s][A
 43%|█████████████▉                  | 53118/122310 [1:43:24<1:27:45, 13.14it/s][A
 43%|█████████████▉                  | 53129/122310 [1:43:24<1:25:28, 13.49it/s][A
 43%|█████████████▉                  | 53135/122310 [1:43:25<1:38:07, 11.75it/s][A
 43%|█████████████▉                  | 53147/122310 [1:43:26<1:29:06, 12.94it/s][A
 43%|█████████████▉                  | 53152/122310 [1:43:27<1:44:33, 11.02it/s][A
 43%|█████████████▉                  | 53156/122310 [1:43:28<2:03:58,  9.30it/s][A
 43%|█████████████▉                  | 53174/122310 [1:43:28<1:24:51, 13.58it/s][A
 43%|█████████████▉                  | 53182/122310 [1:43:29<1:31:19, 12.61

step: 36180, loss: 78.9015759807701, epoch: 2



 44%|█████████████▉                  | 53268/122310 [1:43:36<1:28:37, 12.98it/s][A
 44%|█████████████▉                  | 53274/122310 [1:43:37<1:41:59, 11.28it/s][A
 44%|█████████████▉                  | 53277/122310 [1:43:38<2:06:24,  9.10it/s][A
 44%|█████████████▉                  | 53285/122310 [1:43:38<2:02:08,  9.42it/s][A
 44%|█████████████▉                  | 53293/122310 [1:43:39<1:58:50,  9.68it/s][A
 44%|█████████████▉                  | 53301/122310 [1:43:40<1:56:56,  9.83it/s][A
 44%|█████████████▉                  | 53302/122310 [1:43:41<2:37:02,  7.32it/s][A
 44%|█████████████▉                  | 53318/122310 [1:43:41<1:41:32, 11.32it/s][A
 44%|█████████████▉                  | 53325/122310 [1:43:43<2:19:36,  8.24it/s][A
 44%|█████████████▉                  | 53339/122310 [1:43:44<1:47:20, 10.71it/s][A
 44%|█████████████▉                  | 53353/122310 [1:43:44<1:30:36, 12.68it/s][A
 44%|█████████████▉                  | 53359/122310 [1:43:45<1:40:34, 11.43

step: 36200, loss: 73.25725076336363, epoch: 2



 44%|█████████████▉                  | 53421/122310 [1:43:51<1:58:15,  9.71it/s][A
 44%|█████████████▉                  | 53428/122310 [1:43:52<2:00:50,  9.50it/s][A
 44%|█████████████▉                  | 53438/122310 [1:43:53<1:49:26, 10.49it/s][A
 44%|█████████████▉                  | 53444/122310 [1:43:54<1:59:17,  9.62it/s][A
 44%|█████████████▉                  | 53449/122310 [1:43:55<2:13:10,  8.62it/s][A
 44%|█████████████▉                  | 53458/122310 [1:43:55<2:00:48,  9.50it/s][A
 44%|█████████████▉                  | 53462/122310 [1:43:56<2:19:10,  8.24it/s][A
 44%|█████████████▉                  | 53469/122310 [1:43:57<2:15:29,  8.47it/s][A
 44%|█████████████▉                  | 53471/122310 [1:43:58<2:53:36,  6.61it/s][A
 44%|█████████████▉                  | 53478/122310 [1:43:59<2:38:59,  7.22it/s][A
 44%|█████████████▉                  | 53484/122310 [1:43:59<2:34:17,  7.43it/s][A
 44%|█████████████▉                  | 53491/122310 [1:44:00<2:24:02,  7.96

step: 36220, loss: 70.31949445926425, epoch: 2



 44%|██████████████                  | 53567/122310 [1:44:07<1:25:48, 13.35it/s][A
 44%|██████████████                  | 53575/122310 [1:44:08<1:31:35, 12.51it/s][A
 44%|██████████████                  | 53581/122310 [1:44:08<1:42:51, 11.14it/s][A
 44%|██████████████                  | 53596/122310 [1:44:09<1:23:27, 13.72it/s][A
 44%|██████████████                  | 53602/122310 [1:44:10<1:35:28, 12.00it/s][A
 44%|██████████████                  | 53608/122310 [1:44:11<1:46:04, 10.79it/s][A
 44%|██████████████                  | 53621/122310 [1:44:11<1:30:02, 12.71it/s][A
 44%|██████████████                  | 53631/122310 [1:44:12<1:28:59, 12.86it/s][A
 44%|██████████████                  | 53637/122310 [1:44:13<1:40:18, 11.41it/s][A
 44%|██████████████                  | 53641/122310 [1:44:14<2:00:22,  9.51it/s][A
 44%|██████████████                  | 53643/122310 [1:44:14<2:32:31,  7.50it/s][A
 44%|██████████████                  | 53649/122310 [1:44:15<2:29:42,  7.64

step: 36240, loss: 75.49445761883754, epoch: 2



 44%|██████████████                  | 53728/122310 [1:44:22<1:51:18, 10.27it/s][A
 44%|██████████████                  | 53734/122310 [1:44:23<1:59:32,  9.56it/s][A
 44%|██████████████                  | 53742/122310 [1:44:24<1:55:55,  9.86it/s][A
 44%|██████████████                  | 53747/122310 [1:44:24<2:08:09,  8.92it/s][A
 44%|██████████████                  | 53752/122310 [1:44:25<2:19:12,  8.21it/s][A
 44%|██████████████                  | 53756/122310 [1:44:26<2:35:46,  7.33it/s][A
 44%|██████████████                  | 53767/122310 [1:44:27<2:00:22,  9.49it/s][A
 44%|██████████████                  | 53775/122310 [1:44:27<1:56:50,  9.78it/s][A
 44%|██████████████                  | 53782/122310 [1:44:28<1:58:18,  9.65it/s][A
 44%|██████████████                  | 53793/122310 [1:44:29<1:42:22, 11.15it/s][A
 44%|██████████████                  | 53799/122310 [1:44:30<1:52:13, 10.17it/s][A
 44%|██████████████                  | 53813/122310 [1:44:30<1:30:59, 12.55

step: 36260, loss: 74.12109265747902, epoch: 2



 44%|██████████████                  | 53893/122310 [1:44:38<2:07:59,  8.91it/s][A
 44%|██████████████                  | 53898/122310 [1:44:39<2:16:46,  8.34it/s][A
 44%|██████████████                  | 53900/122310 [1:44:40<2:46:02,  6.87it/s][A
 44%|██████████████                  | 53911/122310 [1:44:40<2:08:07,  8.90it/s][A
 44%|██████████████                  | 53920/122310 [1:44:41<1:57:54,  9.67it/s][A
 44%|██████████████                  | 53925/122310 [1:44:44<3:23:55,  5.59it/s][A
 44%|██████████████                  | 53932/122310 [1:44:44<3:01:11,  6.29it/s][A
 44%|██████████████                  | 53942/122310 [1:44:45<2:26:04,  7.80it/s][A
 44%|██████████████                  | 53955/122310 [1:44:46<1:54:46,  9.93it/s][A
 44%|██████████████                  | 53961/122310 [1:44:47<2:04:36,  9.14it/s][A
 44%|██████████████                  | 53965/122310 [1:44:48<2:21:43,  8.04it/s][A
 44%|██████████████                  | 53976/122310 [1:44:48<1:58:12,  9.63

step: 36280, loss: 82.1779466102706, epoch: 2



 44%|██████████████▏                 | 54029/122310 [1:44:53<1:38:33, 11.55it/s][A
 44%|██████████████▏                 | 54040/122310 [1:44:54<1:33:38, 12.15it/s][A
 44%|██████████████▏                 | 54048/122310 [1:44:55<1:39:17, 11.46it/s][A
 44%|██████████████▏                 | 54062/122310 [1:44:56<1:24:46, 13.42it/s][A
 44%|██████████████▏                 | 54076/122310 [1:44:56<1:17:42, 14.63it/s][A
 44%|██████████████▏                 | 54084/122310 [1:44:57<1:25:41, 13.27it/s][A
 44%|██████████████▏                 | 54091/122310 [1:44:58<1:34:21, 12.05it/s][A
 44%|██████████████▏                 | 54099/122310 [1:44:59<1:39:38, 11.41it/s][A
 44%|██████████████▏                 | 54107/122310 [1:44:59<1:43:24, 10.99it/s][A
 44%|██████████████▏                 | 54117/122310 [1:45:00<1:38:21, 11.55it/s][A
 44%|██████████████▏                 | 54124/122310 [1:45:01<1:44:51, 10.84it/s][A
 44%|██████████████▏                 | 54137/122310 [1:45:02<1:30:00, 12.62

step: 36300, loss: 71.53766782047441, epoch: 2



 44%|██████████████▏                 | 54200/122310 [1:45:09<2:07:07,  8.93it/s][A
 44%|██████████████▏                 | 54211/122310 [1:45:10<1:47:53, 10.52it/s][A
 44%|██████████████▏                 | 54222/122310 [1:45:10<1:37:37, 11.62it/s][A
 44%|██████████████▏                 | 54231/122310 [1:45:11<1:37:26, 11.64it/s][A
 44%|██████████████▏                 | 54240/122310 [1:45:12<1:37:28, 11.64it/s][A
 44%|██████████████▏                 | 54247/122310 [1:45:13<1:44:06, 10.90it/s][A
 44%|██████████████▏                 | 54252/122310 [1:45:13<2:02:43,  9.24it/s][A
 44%|██████████████▏                 | 54254/122310 [1:45:14<2:35:39,  7.29it/s][A
 44%|██████████████▏                 | 54258/122310 [1:45:15<2:51:39,  6.61it/s][A
 44%|██████████████▏                 | 54265/122310 [1:45:16<2:37:47,  7.19it/s][A
 44%|██████████████▏                 | 54277/122310 [1:45:17<1:56:59,  9.69it/s][A
 44%|██████████████▏                 | 54291/122310 [1:45:17<1:32:33, 12.25

step: 36320, loss: 72.45016832192312, epoch: 2



 44%|██████████████▏                 | 54356/122310 [1:45:24<1:52:14, 10.09it/s][A
 44%|██████████████▏                 | 54365/122310 [1:45:25<1:46:02, 10.68it/s][A
 44%|██████████████▏                 | 54368/122310 [1:45:26<2:10:23,  8.68it/s][A
 44%|██████████████▏                 | 54375/122310 [1:45:26<2:06:39,  8.94it/s][A
 44%|██████████████▏                 | 54382/122310 [1:45:27<2:06:05,  8.98it/s][A
 44%|██████████████▏                 | 54388/122310 [1:45:28<2:10:46,  8.66it/s][A
 44%|██████████████▏                 | 54402/122310 [1:45:29<1:37:16, 11.63it/s][A
 44%|██████████████▏                 | 54416/122310 [1:45:30<1:22:49, 13.66it/s][A
 45%|██████████████▏                 | 54428/122310 [1:45:30<1:19:47, 14.18it/s][A
 45%|██████████████▏                 | 54432/122310 [1:45:31<1:37:43, 11.58it/s][A
 45%|██████████████▏                 | 54445/122310 [1:45:32<1:24:58, 13.31it/s][A
 45%|██████████████▏                 | 54452/122310 [1:45:33<1:33:23, 12.11

step: 36340, loss: 75.75186212081906, epoch: 2



 45%|██████████████▎                 | 54526/122310 [1:45:40<1:43:45, 10.89it/s][A
 45%|██████████████▎                 | 54539/122310 [1:45:40<1:29:42, 12.59it/s][A
 45%|██████████████▎                 | 54547/122310 [1:45:41<1:34:52, 11.90it/s][A
 45%|██████████████▎                 | 54558/122310 [1:45:42<1:28:15, 12.79it/s][A
 45%|██████████████▎                 | 54564/122310 [1:45:43<1:38:31, 11.46it/s][A
 45%|██████████████▎                 | 54575/122310 [1:45:43<1:32:01, 12.27it/s][A
 45%|██████████████▎                 | 54583/122310 [1:45:44<1:36:08, 11.74it/s][A
 45%|██████████████▎                 | 54587/122310 [1:45:45<1:56:05,  9.72it/s][A
 45%|██████████████▎                 | 54594/122310 [1:45:46<1:57:24,  9.61it/s][A
 45%|██████████████▎                 | 54605/122310 [1:45:46<1:41:56, 11.07it/s][A
 45%|██████████████▎                 | 54614/122310 [1:45:47<1:40:12, 11.26it/s][A
 45%|██████████████▎                 | 54620/122310 [1:45:48<1:48:58, 10.35

step: 36360, loss: 79.22534036390003, epoch: 2



 45%|███████████████▏                  | 54729/122310 [1:45:55<55:56, 20.13it/s][A
 45%|██████████████▎                 | 54734/122310 [1:45:55<1:10:11, 16.04it/s][A
 45%|██████████████▎                 | 54738/122310 [1:45:56<1:27:23, 12.89it/s][A
 45%|██████████████▎                 | 54741/122310 [1:45:57<1:50:49, 10.16it/s][A
 45%|██████████████▎                 | 54752/122310 [1:45:58<1:38:17, 11.46it/s][A
 45%|██████████████▎                 | 54764/122310 [1:45:58<1:28:27, 12.73it/s][A
 45%|██████████████▎                 | 54777/122310 [1:45:59<1:20:24, 14.00it/s][A
 45%|██████████████▎                 | 54783/122310 [1:46:00<1:31:39, 12.28it/s][A
 45%|██████████████▎                 | 54792/122310 [1:46:01<1:32:04, 12.22it/s][A
 45%|██████████████▎                 | 54799/122310 [1:46:01<1:39:37, 11.30it/s][A
 45%|██████████████▎                 | 54812/122310 [1:46:02<1:25:29, 13.16it/s][A
 45%|██████████████▎                 | 54824/122310 [1:46:03<1:21:13, 13.85

step: 36380, loss: 84.3669896401469, epoch: 2



 45%|██████████████▎                 | 54923/122310 [1:46:10<1:13:00, 15.38it/s][A
 45%|██████████████▎                 | 54926/122310 [1:46:10<1:32:40, 12.12it/s][A
 45%|██████████████▎                 | 54941/122310 [1:46:11<1:16:58, 14.59it/s][A
 45%|██████████████▍                 | 54945/122310 [1:46:12<1:34:28, 11.88it/s][A
 45%|██████████████▍                 | 54960/122310 [1:46:13<1:17:40, 14.45it/s][A
 45%|██████████████▍                 | 54966/122310 [1:46:13<1:28:58, 12.62it/s][A
 45%|██████████████▍                 | 54969/122310 [1:46:14<1:51:37, 10.05it/s][A
 45%|██████████████▍                 | 54973/122310 [1:46:15<2:08:42,  8.72it/s][A
 45%|██████████████▍                 | 54985/122310 [1:46:15<1:41:51, 11.02it/s][A
 45%|██████████████▍                 | 54991/122310 [1:46:16<1:50:07, 10.19it/s][A
 45%|██████████████▍                 | 54999/122310 [1:46:17<1:47:51, 10.40it/s][A
 45%|██████████████▍                 | 55001/122310 [1:46:18<2:18:15,  8.11

step: 36400, loss: 82.69243719357179, epoch: 2
sim1 and sim2 are 0.46686960547193357, 0.2446830619452145
cosine of pred and queen: 0.22466768300490952
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: m


 45%|██████████████▍                 | 55069/122310 [1:46:34<1:33:08, 12.03it/s][A

Actual: jharkhand:ranchi::punjab:chandigarh, pred: chandigarh
Actual: tripura:agartala::kerala:thiruvananthapuram, pred: thiruvananthapuram
Actual: india:delhi::serbia:belgrade, pred: subsequent
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: peso
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: electricity
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::it


 45%|█████████████▉                 | 55073/122310 [1:47:40<59:51:44,  3.21s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.13609467455621302



 45%|█████████████▉                 | 55082/122310 [1:47:41<39:50:33,  2.13s/it][A
 45%|█████████████▉                 | 55092/122310 [1:47:41<26:16:03,  1.41s/it][A
 45%|█████████████▉                 | 55104/122310 [1:47:42<16:47:26,  1.11it/s][A
 45%|█████████████▉                 | 55116/122310 [1:47:43<11:18:36,  1.65it/s][A
 45%|█████████████▉                 | 55120/122310 [1:47:44<10:10:51,  1.83it/s][A
 45%|██████████████▍                 | 55127/122310 [1:47:44<8:01:00,  2.33it/s][A
 45%|██████████████▍                 | 55138/122310 [1:47:45<5:30:11,  3.39it/s][A
 45%|██████████████▍                 | 55143/122310 [1:47:46<4:57:47,  3.76it/s][A
 45%|██████████████▍                 | 55150/122310 [1:47:47<4:07:16,  4.53it/s][A
 45%|██████████████▍                 | 55156/122310 [1:47:47<3:42:27,  5.03it/s][A
 45%|██████████████▍                 | 55167/122310 [1:47:48<2:43:27,  6.85it/s][A
 45%|██████████████▍                 | 55173/122310 [1:47:49<2:36:50,  7.13

step: 36420, loss: 63.73492211487988, epoch: 2



 45%|██████████████▍                 | 55258/122310 [1:47:55<1:11:01, 15.73it/s][A
 45%|██████████████▍                 | 55265/122310 [1:47:56<1:20:26, 13.89it/s][A
 45%|██████████████▍                 | 55274/122310 [1:47:57<1:23:23, 13.40it/s][A
 45%|██████████████▍                 | 55282/122310 [1:47:58<1:30:40, 12.32it/s][A
 45%|██████████████▍                 | 55291/122310 [1:47:58<1:31:22, 12.22it/s][A
 45%|██████████████▍                 | 55300/122310 [1:47:59<1:31:21, 12.23it/s][A
 45%|██████████████▍                 | 55314/122310 [1:48:00<1:18:26, 14.24it/s][A
 45%|██████████████▍                 | 55320/122310 [1:48:01<1:32:10, 12.11it/s][A
 45%|██████████████▍                 | 55335/122310 [1:48:01<1:18:33, 14.21it/s][A
 45%|██████████████▍                 | 55338/122310 [1:48:02<1:42:07, 10.93it/s][A
 45%|██████████████▍                 | 55347/122310 [1:48:03<1:41:20, 11.01it/s][A
 45%|██████████████▍                 | 55356/122310 [1:48:04<1:39:46, 11.18

step: 36440, loss: 84.29240303701835, epoch: 2



 45%|██████████████▌                 | 55439/122310 [1:48:11<1:29:56, 12.39it/s][A
 45%|██████████████▌                 | 55447/122310 [1:48:12<1:35:02, 11.73it/s][A
 45%|██████████████▌                 | 55453/122310 [1:48:13<1:45:18, 10.58it/s][A
 45%|██████████████▌                 | 55464/122310 [1:48:13<1:36:01, 11.60it/s][A
 45%|██████████████▌                 | 55477/122310 [1:48:14<1:25:30, 13.03it/s][A
 45%|██████████████▌                 | 55486/122310 [1:48:15<1:31:31, 12.17it/s][A
 45%|██████████████▌                 | 55491/122310 [1:48:16<1:46:00, 10.50it/s][A
 45%|██████████████▌                 | 55498/122310 [1:48:17<1:50:37, 10.07it/s][A
 45%|██████████████▌                 | 55504/122310 [1:48:17<1:59:18,  9.33it/s][A
 45%|██████████████▌                 | 55517/122310 [1:48:18<1:36:12, 11.57it/s][A
 45%|██████████████▌                 | 55519/122310 [1:48:19<2:06:05,  8.83it/s][A
 45%|██████████████▌                 | 55538/122310 [1:48:20<1:22:58, 13.41

step: 36460, loss: 84.32712365862571, epoch: 2



 45%|██████████████▌                 | 55618/122310 [1:48:27<1:27:42, 12.67it/s][A
 45%|██████████████▌                 | 55620/122310 [1:48:27<1:53:38,  9.78it/s][A
 45%|██████████████▌                 | 55629/122310 [1:48:28<1:46:14, 10.46it/s][A
 45%|██████████████▌                 | 55643/122310 [1:48:29<1:25:46, 12.95it/s][A
 45%|██████████████▌                 | 55645/122310 [1:48:30<1:52:15,  9.90it/s][A
 46%|██████████████▌                 | 55657/122310 [1:48:30<1:33:56, 11.82it/s][A
 46%|██████████████▌                 | 55665/122310 [1:48:31<1:40:43, 11.03it/s][A
 46%|██████████████▌                 | 55669/122310 [1:48:32<2:00:36,  9.21it/s][A
 46%|██████████████▌                 | 55678/122310 [1:48:33<1:51:24,  9.97it/s][A
 46%|██████████████▌                 | 55690/122310 [1:48:34<1:33:49, 11.83it/s][A
 46%|██████████████▌                 | 55699/122310 [1:48:34<1:33:14, 11.91it/s][A
 46%|██████████████▌                 | 55707/122310 [1:48:35<1:35:27, 11.63

step: 36480, loss: 83.12297013015315, epoch: 2



 46%|██████████████▌                 | 55765/122310 [1:48:43<3:28:35,  5.32it/s][A
 46%|██████████████▌                 | 55775/122310 [1:48:44<2:42:29,  6.82it/s][A
 46%|██████████████▌                 | 55783/122310 [1:48:45<2:27:24,  7.52it/s][A
 46%|██████████████▌                 | 55785/122310 [1:48:45<2:52:13,  6.44it/s][A
 46%|██████████████▌                 | 55792/122310 [1:48:46<2:35:35,  7.13it/s][A
 46%|██████████████▌                 | 55802/122310 [1:48:47<2:05:55,  8.80it/s][A
 46%|██████████████▌                 | 55812/122310 [1:48:48<1:49:39, 10.11it/s][A
 46%|██████████████▌                 | 55820/122310 [1:48:48<1:47:05, 10.35it/s][A
 46%|██████████████▌                 | 55827/122310 [1:48:49<1:49:33, 10.11it/s][A
 46%|██████████████▌                 | 55835/122310 [1:48:50<1:47:48, 10.28it/s][A
 46%|██████████████▌                 | 55851/122310 [1:48:51<1:21:33, 13.58it/s][A
 46%|██████████████▌                 | 55859/122310 [1:48:51<1:26:49, 12.76

step: 36500, loss: 88.07214265811668, epoch: 2



 46%|██████████████▋                 | 55904/122310 [1:48:57<1:54:47,  9.64it/s][A
 46%|██████████████▋                 | 55916/122310 [1:48:57<1:35:08, 11.63it/s][A
 46%|██████████████▋                 | 55920/122310 [1:48:58<1:53:13,  9.77it/s][A
 46%|██████████████▋                 | 55935/122310 [1:48:59<1:25:09, 12.99it/s][A
 46%|██████████████▋                 | 55946/122310 [1:48:59<1:21:36, 13.55it/s][A
 46%|██████████████▋                 | 55952/122310 [1:49:00<1:32:24, 11.97it/s][A
 46%|██████████████▋                 | 55969/122310 [1:49:01<1:12:26, 15.26it/s][A
 46%|██████████████▋                 | 55981/122310 [1:49:02<1:12:25, 15.26it/s][A
 46%|██████████████▋                 | 55989/122310 [1:49:02<1:20:15, 13.77it/s][A
 46%|██████████████▋                 | 55993/122310 [1:49:03<1:38:30, 11.22it/s][A
 46%|██████████████▋                 | 55996/122310 [1:49:04<2:03:02,  8.98it/s][A
 46%|██████████████▋                 | 56005/122310 [1:49:05<1:52:31,  9.82

step: 36520, loss: 98.3899077670927, epoch: 2



 46%|██████████████▋                 | 56090/122310 [1:49:12<1:33:30, 11.80it/s][A
 46%|██████████████▋                 | 56097/122310 [1:49:13<1:41:38, 10.86it/s][A
 46%|██████████████▋                 | 56101/122310 [1:49:13<2:00:26,  9.16it/s][A
 46%|██████████████▋                 | 56109/122310 [1:49:14<1:56:02,  9.51it/s][A
 46%|██████████████▋                 | 56120/122310 [1:49:15<1:42:07, 10.80it/s][A
 46%|██████████████▋                 | 56129/122310 [1:49:16<1:39:42, 11.06it/s][A
 46%|██████████████▋                 | 56136/122310 [1:49:16<1:45:50, 10.42it/s][A
 46%|██████████████▋                 | 56141/122310 [1:49:17<1:58:32,  9.30it/s][A
 46%|██████████████▋                 | 56150/122310 [1:49:18<1:50:49,  9.95it/s][A
 46%|██████████████▋                 | 56159/122310 [1:49:19<1:44:59, 10.50it/s][A
 46%|██████████████▋                 | 56167/122310 [1:49:20<1:46:20, 10.37it/s][A
 46%|██████████████▋                 | 56175/122310 [1:49:20<1:46:20, 10.36

step: 36540, loss: 110.1919293749192, epoch: 2



 46%|██████████████▋                 | 56234/122310 [1:49:28<2:17:08,  8.03it/s][A
 46%|██████████████▋                 | 56252/122310 [1:49:28<1:30:38, 12.15it/s][A
 46%|██████████████▋                 | 56263/122310 [1:49:29<1:26:39, 12.70it/s][A
 46%|██████████████▋                 | 56269/122310 [1:49:30<1:37:42, 11.26it/s][A
 46%|██████████████▋                 | 56274/122310 [1:49:31<1:50:32,  9.96it/s][A
 46%|██████████████▋                 | 56287/122310 [1:49:31<1:30:24, 12.17it/s][A
 46%|██████████████▋                 | 56295/122310 [1:49:32<1:33:18, 11.79it/s][A
 46%|██████████████▋                 | 56304/122310 [1:49:33<1:32:16, 11.92it/s][A
 46%|██████████████▋                 | 56307/122310 [1:49:34<1:54:10,  9.64it/s][A
 46%|██████████████▋                 | 56314/122310 [1:49:34<1:56:30,  9.44it/s][A
 46%|██████████████▋                 | 56316/122310 [1:49:35<2:28:35,  7.40it/s][A
 46%|██████████████▋                 | 56323/122310 [1:49:36<2:17:13,  8.01

step: 36560, loss: 73.41094057995757, epoch: 2



 46%|██████████████▊                 | 56399/122310 [1:49:43<1:39:36, 11.03it/s][A
 46%|██████████████▊                 | 56405/122310 [1:49:43<1:48:03, 10.17it/s][A
 46%|██████████████▊                 | 56409/122310 [1:49:44<2:04:56,  8.79it/s][A
 46%|██████████████▊                 | 56419/122310 [1:49:45<1:46:56, 10.27it/s][A
 46%|██████████████▊                 | 56428/122310 [1:49:45<1:41:03, 10.87it/s][A
 46%|██████████████▊                 | 56440/122310 [1:49:46<1:27:31, 12.54it/s][A
 46%|██████████████▊                 | 56447/122310 [1:49:47<1:34:08, 11.66it/s][A
 46%|██████████████▊                 | 56454/122310 [1:49:48<1:39:19, 11.05it/s][A
 46%|██████████████▊                 | 56459/122310 [1:49:48<1:52:04,  9.79it/s][A
 46%|██████████████▊                 | 56468/122310 [1:49:49<1:44:00, 10.55it/s][A
 46%|██████████████▊                 | 56484/122310 [1:49:50<1:18:25, 13.99it/s][A
 46%|██████████████▊                 | 56487/122310 [1:49:51<1:39:36, 11.01

step: 36580, loss: 97.78355581632626, epoch: 2



 46%|██████████████▊                 | 56526/122310 [1:49:57<2:53:49,  6.31it/s][A
 46%|██████████████▊                 | 56544/122310 [1:49:58<1:55:50,  9.46it/s][A
 46%|██████████████▊                 | 56553/122310 [1:49:59<1:49:24, 10.02it/s][A
 46%|██████████████▊                 | 56556/122310 [1:49:59<2:05:28,  8.73it/s][A
 46%|██████████████▊                 | 56566/122310 [1:50:00<1:50:25,  9.92it/s][A
 46%|██████████████▊                 | 56583/122310 [1:50:01<1:22:33, 13.27it/s][A
 46%|██████████████▊                 | 56590/122310 [1:50:02<1:29:05, 12.29it/s][A
 46%|██████████████▊                 | 56604/122310 [1:50:02<1:17:17, 14.17it/s][A
 46%|██████████████▊                 | 56609/122310 [1:50:03<1:30:16, 12.13it/s][A
 46%|██████████████▊                 | 56615/122310 [1:50:04<1:39:51, 10.96it/s][A
 46%|██████████████▊                 | 56617/122310 [1:50:05<2:07:17,  8.60it/s][A
 46%|██████████████▊                 | 56623/122310 [1:50:05<2:08:53,  8.49

step: 36600, loss: 70.52155460733891, epoch: 2



 46%|██████████████▊                 | 56714/122310 [1:50:12<1:43:57, 10.52it/s][A
 46%|██████████████▊                 | 56725/122310 [1:50:13<1:31:51, 11.90it/s][A
 46%|██████████████▊                 | 56735/122310 [1:50:13<1:27:50, 12.44it/s][A
 46%|██████████████▊                 | 56743/122310 [1:50:14<1:31:32, 11.94it/s][A
 46%|██████████████▊                 | 56748/122310 [1:50:15<1:44:42, 10.44it/s][A
 46%|██████████████▊                 | 56755/122310 [1:50:15<1:47:35, 10.16it/s][A
 46%|██████████████▊                 | 56765/122310 [1:50:16<1:37:09, 11.24it/s][A
 46%|██████████████▊                 | 56774/122310 [1:50:17<1:34:32, 11.55it/s][A
 46%|██████████████▊                 | 56781/122310 [1:50:18<1:40:11, 10.90it/s][A
 46%|██████████████▊                 | 56790/122310 [1:50:18<1:36:05, 11.36it/s][A
 46%|██████████████▊                 | 56798/122310 [1:50:19<1:37:24, 11.21it/s][A
 46%|██████████████▊                 | 56804/122310 [1:50:20<1:45:53, 10.31

step: 36620, loss: 72.17225164512394, epoch: 2



 47%|██████████████▉                 | 56875/122310 [1:50:26<1:54:24,  9.53it/s][A
 47%|██████████████▉                 | 56882/122310 [1:50:27<1:54:23,  9.53it/s][A
 47%|██████████████▉                 | 56900/122310 [1:50:28<1:17:49, 14.01it/s][A
 47%|██████████████▉                 | 56904/122310 [1:50:29<1:35:09, 11.45it/s][A
 47%|██████████████▉                 | 56914/122310 [1:50:29<1:29:54, 12.12it/s][A
 47%|██████████████▉                 | 56918/122310 [1:50:30<1:47:31, 10.14it/s][A
 47%|██████████████▉                 | 56925/122310 [1:50:31<1:49:17,  9.97it/s][A
 47%|██████████████▉                 | 56931/122310 [1:50:32<1:55:28,  9.44it/s][A
 47%|██████████████▉                 | 56936/122310 [1:50:32<2:05:54,  8.65it/s][A
 47%|██████████████▉                 | 56939/122310 [1:50:33<2:29:59,  7.26it/s][A
 47%|██████████████▉                 | 56943/122310 [1:50:34<2:41:44,  6.74it/s][A
 47%|██████████████▉                 | 56952/122310 [1:50:35<2:09:28,  8.41

step: 36640, loss: 86.46351557717902, epoch: 2



 47%|██████████████▉                 | 57024/122310 [1:50:41<1:42:23, 10.63it/s][A
 47%|██████████████▉                 | 57033/122310 [1:50:42<1:37:40, 11.14it/s][A
 47%|██████████████▉                 | 57035/122310 [1:50:43<2:46:55,  6.52it/s][A
 47%|██████████████▉                 | 57040/122310 [1:50:44<2:46:07,  6.55it/s][A
 47%|██████████████▉                 | 57043/122310 [1:50:45<3:04:56,  5.88it/s][A
 47%|██████████████▉                 | 57052/122310 [1:50:46<2:24:03,  7.55it/s][A
 47%|██████████████▉                 | 57062/122310 [1:50:46<1:58:32,  9.17it/s][A
 47%|██████████████▉                 | 57071/122310 [1:50:47<1:48:57,  9.98it/s][A
 47%|██████████████▉                 | 57081/122310 [1:50:48<1:39:28, 10.93it/s][A
 47%|██████████████▉                 | 57093/122310 [1:50:49<1:27:33, 12.41it/s][A
 47%|██████████████▉                 | 57102/122310 [1:50:49<1:28:31, 12.28it/s][A
 47%|██████████████▉                 | 57112/122310 [1:50:50<1:26:13, 12.60

step: 36660, loss: 71.44740443954845, epoch: 2



 47%|██████████████▉                 | 57205/122310 [1:50:56<1:15:48, 14.31it/s][A
 47%|██████████████▉                 | 57222/122310 [1:50:57<1:04:34, 16.80it/s][A
 47%|██████████████▉                 | 57230/122310 [1:50:58<1:12:31, 14.96it/s][A
 47%|██████████████▉                 | 57246/122310 [1:50:58<1:04:26, 16.83it/s][A
 47%|██████████████▉                 | 57249/122310 [1:50:59<1:23:23, 13.00it/s][A
 47%|██████████████▉                 | 57254/122310 [1:51:00<1:37:29, 11.12it/s][A
 47%|██████████████▉                 | 57265/122310 [1:51:01<1:29:05, 12.17it/s][A
 47%|██████████████▉                 | 57280/122310 [1:51:01<1:14:44, 14.50it/s][A
 47%|██████████████▉                 | 57287/122310 [1:51:02<1:23:59, 12.90it/s][A
 47%|██████████████▉                 | 57289/122310 [1:51:03<1:49:23,  9.91it/s][A
 47%|██████████████▉                 | 57303/122310 [1:51:04<1:26:23, 12.54it/s][A
 47%|██████████████▉                 | 57318/122310 [1:51:04<1:13:30, 14.73

step: 36680, loss: 70.78847702511361, epoch: 2



 47%|███████████████                 | 57415/122310 [1:51:11<1:20:20, 13.46it/s][A
 47%|███████████████                 | 57423/122310 [1:51:12<1:26:37, 12.49it/s][A
 47%|███████████████                 | 57436/122310 [1:51:13<1:21:50, 13.21it/s][A
 47%|███████████████                 | 57446/122310 [1:51:14<1:23:20, 12.97it/s][A
 47%|███████████████                 | 57450/122310 [1:51:14<1:41:40, 10.63it/s][A
 47%|███████████████                 | 57453/122310 [1:51:15<2:05:59,  8.58it/s][A
 47%|███████████████                 | 57459/122310 [1:51:16<2:10:21,  8.29it/s][A
 47%|███████████████                 | 57477/122310 [1:51:17<1:25:58, 12.57it/s][A
 47%|███████████████                 | 57482/122310 [1:51:18<1:40:29, 10.75it/s][A
 47%|███████████████                 | 57494/122310 [1:51:18<1:29:01, 12.13it/s][A
 47%|███████████████                 | 57502/122310 [1:51:19<1:33:07, 11.60it/s][A
 47%|███████████████                 | 57507/122310 [1:51:20<1:46:12, 10.17

step: 36700, loss: 65.10590048527087, epoch: 2



 47%|███████████████                 | 57594/122310 [1:51:27<1:29:12, 12.09it/s][A
 47%|███████████████                 | 57603/122310 [1:51:28<1:31:29, 11.79it/s][A
 47%|███████████████                 | 57609/122310 [1:51:28<1:41:25, 10.63it/s][A
 47%|███████████████                 | 57618/122310 [1:51:29<1:38:53, 10.90it/s][A
 47%|███████████████                 | 57623/122310 [1:51:30<1:53:15,  9.52it/s][A
 47%|███████████████                 | 57632/122310 [1:51:31<1:47:47, 10.00it/s][A
 47%|███████████████                 | 57637/122310 [1:51:32<2:00:44,  8.93it/s][A
 47%|███████████████                 | 57656/122310 [1:51:32<1:20:03, 13.46it/s][A
 47%|███████████████                 | 57664/122310 [1:51:34<1:52:06,  9.61it/s][A
 47%|███████████████                 | 57668/122310 [1:51:35<2:06:00,  8.55it/s][A
 47%|███████████████                 | 57680/122310 [1:51:35<1:44:58, 10.26it/s][A
 47%|███████████████                 | 57682/122310 [1:51:36<2:10:55,  8.23

step: 36720, loss: 96.90478945675008, epoch: 2



 47%|███████████████                 | 57744/122310 [1:51:43<2:57:18,  6.07it/s][A
 47%|███████████████                 | 57751/122310 [1:51:43<2:37:27,  6.83it/s][A
 47%|███████████████                 | 57755/122310 [1:51:44<2:46:46,  6.45it/s][A
 47%|███████████████                 | 57770/122310 [1:51:45<1:47:27, 10.01it/s][A
 47%|███████████████                 | 57773/122310 [1:51:46<2:08:47,  8.35it/s][A
 47%|███████████████                 | 57784/122310 [1:51:46<1:46:24, 10.11it/s][A
 47%|███████████████                 | 57789/122310 [1:51:47<1:57:54,  9.12it/s][A
 47%|███████████████                 | 57804/122310 [1:51:48<1:27:48, 12.24it/s][A
 47%|███████████████▏                | 57811/122310 [1:51:49<1:34:24, 11.39it/s][A
 47%|███████████████▏                | 57819/122310 [1:51:49<1:36:09, 11.18it/s][A
 47%|███████████████▏                | 57822/122310 [1:51:50<1:59:09,  9.02it/s][A
 47%|███████████████▏                | 57826/122310 [1:51:51<2:16:02,  7.90

step: 36740, loss: 92.24242200869557, epoch: 2



 47%|███████████████▏                | 57918/122310 [1:51:58<1:08:00, 15.78it/s][A
 47%|███████████████▏                | 57933/122310 [1:51:58<1:03:08, 16.99it/s][A
 47%|███████████████▏                | 57939/122310 [1:51:59<1:15:08, 14.28it/s][A
 47%|███████████████▏                | 57948/122310 [1:52:00<1:19:09, 13.55it/s][A
 47%|███████████████▏                | 57953/122310 [1:52:01<1:33:16, 11.50it/s][A
 47%|███████████████▏                | 57962/122310 [1:52:01<1:32:15, 11.62it/s][A
 47%|███████████████▏                | 57976/122310 [1:52:02<1:18:24, 13.68it/s][A
 47%|███████████████▏                | 57984/122310 [1:52:03<1:23:59, 12.77it/s][A
 47%|███████████████▏                | 57996/122310 [1:52:04<1:18:18, 13.69it/s][A
 47%|███████████████▏                | 57999/122310 [1:52:04<1:39:36, 10.76it/s][A
 47%|███████████████▏                | 58003/122310 [1:52:05<1:57:33,  9.12it/s][A
 47%|███████████████▏                | 58012/122310 [1:52:06<1:47:44,  9.95

step: 36760, loss: 71.7248048980795, epoch: 2



 47%|███████████████▏                | 58095/122310 [1:52:13<1:32:39, 11.55it/s][A
 48%|███████████████▏                | 58100/122310 [1:52:14<1:47:38,  9.94it/s][A
 48%|███████████████▏                | 58104/122310 [1:52:14<2:07:40,  8.38it/s][A
 48%|███████████████▏                | 58113/122310 [1:52:15<1:55:14,  9.28it/s][A
 48%|███████████████▏                | 58118/122310 [1:52:16<2:07:24,  8.40it/s][A
 48%|███████████████▏                | 58125/122310 [1:52:17<2:04:17,  8.61it/s][A
 48%|███████████████▏                | 58134/122310 [1:52:18<1:54:33,  9.34it/s][A
 48%|███████████████▏                | 58145/122310 [1:52:18<1:40:22, 10.65it/s][A
 48%|███████████████▏                | 58149/122310 [1:52:19<1:57:22,  9.11it/s][A
 48%|███████████████▏                | 58162/122310 [1:52:20<1:34:00, 11.37it/s][A
 48%|███████████████▏                | 58167/122310 [1:52:21<1:47:09,  9.98it/s][A
 48%|███████████████▏                | 58172/122310 [1:52:21<1:58:38,  9.01

step: 36780, loss: 69.6965261366697, epoch: 2



 48%|███████████████▏                | 58261/122310 [1:52:28<1:11:03, 15.02it/s][A
 48%|███████████████▏                | 58268/122310 [1:52:29<1:20:05, 13.33it/s][A
 48%|███████████████▏                | 58276/122310 [1:52:30<1:25:34, 12.47it/s][A
 48%|███████████████▎                | 58292/122310 [1:52:31<1:10:56, 15.04it/s][A
 48%|███████████████▎                | 58304/122310 [1:52:31<1:09:53, 15.26it/s][A
 48%|███████████████▎                | 58315/122310 [1:52:32<1:10:39, 15.09it/s][A
 48%|███████████████▎                | 58328/122310 [1:52:34<1:28:45, 12.01it/s][A
 48%|███████████████▎                | 58341/122310 [1:52:35<1:20:26, 13.25it/s][A
 48%|███████████████▎                | 58348/122310 [1:52:35<1:27:09, 12.23it/s][A
 48%|███████████████▎                | 58353/122310 [1:52:36<1:39:13, 10.74it/s][A
 48%|███████████████▎                | 58362/122310 [1:52:37<1:35:58, 11.11it/s][A
 48%|███████████████▎                | 58375/122310 [1:52:38<1:22:36, 12.90

step: 36800, loss: 90.43885580905403, epoch: 2
sim1 and sim2 are 0.43316761840817186, 0.2577557515834831
cosine of pred and queen: 0.23032754749001466
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: hollande
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual:


 48%|███████████████▎                | 58442/122310 [1:52:54<1:29:14, 11.93it/s][A

Actual: india:delhi::serbia:belgrade, pred: crowe
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: finland
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: reports
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: english
Actu


 48%|██████████████▊                | 58452/122310 [1:54:01<45:33:56,  2.57s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.1242603550295858



 48%|██████████████▊                | 58467/122310 [1:54:02<27:28:04,  1.55s/it][A
 48%|██████████████▊                | 58474/122310 [1:54:03<22:02:37,  1.24s/it][A
 48%|██████████████▊                | 58482/122310 [1:54:03<16:48:37,  1.05it/s][A
 48%|██████████████▊                | 58492/122310 [1:54:04<11:56:59,  1.48it/s][A
 48%|███████████████▎                | 58504/122310 [1:54:05<8:10:04,  2.17it/s][A
 48%|███████████████▎                | 58519/122310 [1:54:06<5:22:45,  3.29it/s][A
 48%|███████████████▎                | 58535/122310 [1:54:07<3:42:42,  4.77it/s][A
 48%|███████████████▎                | 58549/122310 [1:54:07<2:51:19,  6.20it/s][A
 48%|███████████████▎                | 58556/122310 [1:54:08<2:41:45,  6.57it/s][A
 48%|███████████████▎                | 58567/122310 [1:54:09<2:16:58,  7.76it/s][A
 48%|███████████████▎                | 58572/122310 [1:54:10<2:22:11,  7.47it/s][A
 48%|███████████████▎                | 58580/122310 [1:54:11<2:11:52,  8.05

step: 36820, loss: 63.34300805472944, epoch: 2



 48%|███████████████▎                | 58665/122310 [1:54:18<1:37:44, 10.85it/s][A
 48%|███████████████▎                | 58676/122310 [1:54:18<1:31:48, 11.55it/s][A
 48%|███████████████▎                | 58682/122310 [1:54:19<1:42:43, 10.32it/s][A
 48%|███████████████▎                | 58691/122310 [1:54:20<1:40:11, 10.58it/s][A
 48%|███████████████▎                | 58697/122310 [1:54:21<1:48:42,  9.75it/s][A
 48%|███████████████▎                | 58703/122310 [1:54:22<1:54:51,  9.23it/s][A
 48%|███████████████▎                | 58709/122310 [1:54:22<1:59:47,  8.85it/s][A
 48%|███████████████▎                | 58721/122310 [1:54:23<1:37:27, 10.88it/s][A
 48%|███████████████▎                | 58731/122310 [1:54:24<1:31:38, 11.56it/s][A
 48%|███████████████▎                | 58737/122310 [1:54:25<1:41:27, 10.44it/s][A
 48%|███████████████▎                | 58747/122310 [1:54:25<1:34:34, 11.20it/s][A
 48%|███████████████▎                | 58753/122310 [1:54:26<1:44:02, 10.18

step: 36840, loss: 70.90987128137121, epoch: 2



 48%|███████████████▍                | 58849/122310 [1:54:33<1:14:56, 14.11it/s][A
 48%|███████████████▍                | 58867/122310 [1:54:34<1:02:18, 16.97it/s][A
 48%|███████████████▍                | 58871/122310 [1:54:35<1:18:55, 13.40it/s][A
 48%|███████████████▍                | 58873/122310 [1:54:35<1:44:31, 10.12it/s][A
 48%|███████████████▍                | 58879/122310 [1:54:36<1:51:36,  9.47it/s][A
 48%|███████████████▍                | 58888/122310 [1:54:37<1:44:00, 10.16it/s][A
 48%|███████████████▍                | 58899/122310 [1:54:38<1:32:21, 11.44it/s][A
 48%|███████████████▍                | 58906/122310 [1:54:38<1:38:43, 10.70it/s][A
 48%|███████████████▍                | 58913/122310 [1:54:39<1:43:32, 10.21it/s][A
 48%|███████████████▍                | 58925/122310 [1:54:40<1:29:26, 11.81it/s][A
 48%|███████████████▍                | 58933/122310 [1:54:41<1:32:45, 11.39it/s][A
 48%|███████████████▍                | 58947/122310 [1:54:42<1:18:41, 13.42

step: 36860, loss: 71.12075270222307, epoch: 2



 48%|███████████████▍                | 59005/122310 [1:54:48<2:17:24,  7.68it/s][A
 48%|███████████████▍                | 59023/122310 [1:54:49<1:24:42, 12.45it/s][A
 48%|███████████████▍                | 59027/122310 [1:54:50<1:42:45, 10.26it/s][A
 48%|███████████████▍                | 59031/122310 [1:54:51<2:00:40,  8.74it/s][A
 48%|███████████████▍                | 59037/122310 [1:54:52<2:04:52,  8.44it/s][A
 48%|███████████████▍                | 59041/122310 [1:54:52<2:20:28,  7.51it/s][A
 48%|███████████████▍                | 59055/122310 [1:54:53<1:38:07, 10.74it/s][A
 48%|███████████████▍                | 59067/122310 [1:54:54<1:26:22, 12.20it/s][A
 48%|███████████████▍                | 59089/122310 [1:54:55<1:01:06, 17.24it/s][A
 48%|███████████████▍                | 59096/122310 [1:54:55<1:10:17, 14.99it/s][A
 48%|███████████████▍                | 59106/122310 [1:54:56<1:12:17, 14.57it/s][A
 48%|███████████████▍                | 59116/122310 [1:54:57<1:14:01, 14.23

step: 36880, loss: 68.64955784162626, epoch: 2



 48%|███████████████▍                | 59210/122310 [1:55:03<1:07:05, 15.67it/s][A
 48%|███████████████▍                | 59212/122310 [1:55:04<1:29:13, 11.79it/s][A
 48%|███████████████▍                | 59220/122310 [1:55:05<1:31:20, 11.51it/s][A
 48%|███████████████▍                | 59237/122310 [1:55:06<1:10:38, 14.88it/s][A
 48%|███████████████▍                | 59242/122310 [1:55:06<1:23:50, 12.54it/s][A
 48%|███████████████▌                | 59251/122310 [1:55:07<1:24:33, 12.43it/s][A
 48%|███████████████▌                | 59265/122310 [1:55:08<1:13:42, 14.26it/s][A
 48%|███████████████▌                | 59268/122310 [1:55:09<1:33:50, 11.20it/s][A
 48%|███████████████▌                | 59273/122310 [1:55:09<1:46:06,  9.90it/s][A
 48%|███████████████▌                | 59279/122310 [1:55:10<1:52:11,  9.36it/s][A
 48%|███████████████▌                | 59290/122310 [1:55:11<1:35:18, 11.02it/s][A
 48%|███████████████▌                | 59300/122310 [1:55:12<1:29:12, 11.77

step: 36900, loss: 86.62796289352711, epoch: 2



 49%|███████████████▌                | 59366/122310 [1:55:18<1:28:30, 11.85it/s][A
 49%|███████████████▌                | 59385/122310 [1:55:19<1:07:01, 15.65it/s][A
 49%|███████████████▌                | 59388/122310 [1:55:20<1:25:11, 12.31it/s][A
 49%|███████████████▌                | 59391/122310 [1:55:20<1:44:55,  9.99it/s][A
 49%|███████████████▌                | 59401/122310 [1:55:21<1:35:35, 10.97it/s][A
 49%|███████████████▌                | 59403/122310 [1:55:22<2:02:13,  8.58it/s][A
 49%|███████████████▌                | 59412/122310 [1:55:23<1:48:24,  9.67it/s][A
 49%|███████████████▌                | 59415/122310 [1:55:23<2:10:58,  8.00it/s][A
 49%|███████████████▌                | 59422/122310 [1:55:24<2:04:37,  8.41it/s][A
 49%|███████████████▌                | 59428/122310 [1:55:25<2:05:58,  8.32it/s][A
 49%|███████████████▌                | 59438/122310 [1:55:26<1:45:55,  9.89it/s][A
 49%|███████████████▌                | 59444/122310 [1:55:27<2:25:37,  7.20

step: 36920, loss: 85.37527078068167, epoch: 2



 49%|███████████████▌                | 59523/122310 [1:55:33<1:26:42, 12.07it/s][A
 49%|███████████████▌                | 59535/122310 [1:55:34<1:18:29, 13.33it/s][A
 49%|███████████████▌                | 59538/122310 [1:55:35<1:39:48, 10.48it/s][A
 49%|███████████████▌                | 59542/122310 [1:55:35<1:56:00,  9.02it/s][A
 49%|███████████████▌                | 59544/122310 [1:55:36<2:29:16,  7.01it/s][A
 49%|███████████████▌                | 59552/122310 [1:55:37<2:11:03,  7.98it/s][A
 49%|███████████████▌                | 59565/122310 [1:55:38<1:38:12, 10.65it/s][A
 49%|███████████████▌                | 59568/122310 [1:55:38<2:01:05,  8.64it/s][A
 49%|███████████████▌                | 59580/122310 [1:55:39<1:37:54, 10.68it/s][A
 49%|███████████████▌                | 59591/122310 [1:55:40<1:28:58, 11.75it/s][A
 49%|███████████████▌                | 59596/122310 [1:55:41<1:42:44, 10.17it/s][A
 49%|███████████████▌                | 59608/122310 [1:55:41<1:28:29, 11.81

step: 36940, loss: 67.83833001782277, epoch: 2



 49%|███████████████▌                | 59668/122310 [1:55:48<2:31:07,  6.91it/s][A
 49%|███████████████▌                | 59671/122310 [1:55:49<2:53:45,  6.01it/s][A
 49%|███████████████▌                | 59684/122310 [1:55:50<1:53:08,  9.23it/s][A
 49%|███████████████▌                | 59694/122310 [1:55:51<1:41:11, 10.31it/s][A
 49%|███████████████▌                | 59703/122310 [1:55:52<1:38:05, 10.64it/s][A
 49%|███████████████▌                | 59705/122310 [1:55:52<2:06:09,  8.27it/s][A
 49%|███████████████▌                | 59710/122310 [1:55:53<2:15:10,  7.72it/s][A
 49%|███████████████▌                | 59719/122310 [1:55:54<1:57:18,  8.89it/s][A
 49%|███████████████▋                | 59725/122310 [1:55:55<2:01:51,  8.56it/s][A
 49%|███████████████▋                | 59730/122310 [1:55:55<2:11:10,  7.95it/s][A
 49%|███████████████▋                | 59738/122310 [1:55:56<2:00:38,  8.64it/s][A
 49%|███████████████▋                | 59764/122310 [1:55:57<1:04:35, 16.14

step: 36960, loss: 70.73530365029077, epoch: 2



 49%|███████████████▋                | 59854/122310 [1:56:04<1:16:03, 13.69it/s][A
 49%|███████████████▋                | 59862/122310 [1:56:05<1:21:06, 12.83it/s][A
 49%|███████████████▋                | 59874/122310 [1:56:05<1:15:29, 13.78it/s][A
 49%|███████████████▋                | 59881/122310 [1:56:06<1:23:19, 12.49it/s][A
 49%|███████████████▋                | 59885/122310 [1:56:07<1:40:09, 10.39it/s][A
 49%|███████████████▋                | 59898/122310 [1:56:08<1:23:20, 12.48it/s][A
 49%|███████████████▋                | 59911/122310 [1:56:08<1:14:30, 13.96it/s][A
 49%|███████████████▋                | 59918/122310 [1:56:09<1:22:53, 12.55it/s][A
 49%|███████████████▋                | 59929/122310 [1:56:10<1:18:47, 13.20it/s][A
 49%|███████████████▋                | 59933/122310 [1:56:11<1:35:38, 10.87it/s][A
 49%|███████████████▋                | 59941/122310 [1:56:11<1:36:23, 10.78it/s][A
 49%|███████████████▋                | 59949/122310 [1:56:12<1:36:26, 10.78

step: 36980, loss: 75.72216287549563, epoch: 2



 49%|███████████████▋                | 60038/122310 [1:56:19<1:04:51, 16.00it/s][A
 49%|███████████████▋                | 60049/122310 [1:56:19<1:06:09, 15.69it/s][A
 49%|███████████████▋                | 60061/122310 [1:56:20<1:05:46, 15.77it/s][A
 49%|███████████████▋                | 60074/122310 [1:56:21<1:03:31, 16.33it/s][A
 49%|███████████████▋                | 60078/122310 [1:56:22<1:19:18, 13.08it/s][A
 49%|███████████████▋                | 60087/122310 [1:56:22<1:21:18, 12.75it/s][A
 49%|███████████████▋                | 60097/122310 [1:56:23<1:19:59, 12.96it/s][A
 49%|███████████████▋                | 60101/122310 [1:56:24<1:37:37, 10.62it/s][A
 49%|███████████████▋                | 60110/122310 [1:56:25<1:33:42, 11.06it/s][A
 49%|███████████████▋                | 60117/122310 [1:56:25<1:37:52, 10.59it/s][A
 49%|███████████████▋                | 60134/122310 [1:56:26<1:12:45, 14.24it/s][A
 49%|███████████████▋                | 60142/122310 [1:56:27<1:18:31, 13.20

step: 37000, loss: 73.47460982179037, epoch: 2
saving weights



 49%|███████████████▊                | 60208/122310 [1:56:34<2:44:17,  6.30it/s][A
 49%|███████████████▊                | 60215/122310 [1:56:35<2:26:51,  7.05it/s][A
 49%|███████████████▊                | 60224/122310 [1:56:35<2:04:10,  8.33it/s][A
 49%|███████████████▊                | 60234/122310 [1:56:36<1:46:36,  9.70it/s][A
 49%|███████████████▊                | 60241/122310 [1:56:37<1:47:37,  9.61it/s][A
 49%|███████████████▊                | 60248/122310 [1:56:38<1:48:10,  9.56it/s][A
 49%|███████████████▊                | 60254/122310 [1:56:38<1:53:23,  9.12it/s][A
 49%|███████████████▊                | 60261/122310 [1:56:39<1:53:09,  9.14it/s][A
 49%|███████████████▊                | 60268/122310 [1:56:40<1:52:09,  9.22it/s][A
 49%|███████████████▊                | 60279/122310 [1:56:41<1:35:14, 10.86it/s][A
 49%|███████████████▊                | 60289/122310 [1:56:41<1:29:01, 11.61it/s][A
 49%|███████████████▊                | 60294/122310 [1:56:42<1:42:07, 10.12

step: 37020, loss: 75.6334288205612, epoch: 2



 49%|███████████████▊                | 60384/122310 [1:56:49<1:19:20, 13.01it/s][A
 49%|███████████████▊                | 60390/122310 [1:56:50<1:30:24, 11.42it/s][A
 49%|███████████████▊                | 60393/122310 [1:56:51<1:52:32,  9.17it/s][A
 49%|███████████████▊                | 60398/122310 [1:56:52<2:02:12,  8.44it/s][A
 49%|███████████████▊                | 60415/122310 [1:56:53<1:20:50, 12.76it/s][A
 49%|███████████████▊                | 60427/122310 [1:56:53<1:15:25, 13.67it/s][A
 49%|███████████████▊                | 60439/122310 [1:56:54<1:11:40, 14.39it/s][A
 49%|███████████████▊                | 60443/122310 [1:56:55<1:28:08, 11.70it/s][A
 49%|███████████████▊                | 60454/122310 [1:56:56<1:21:55, 12.58it/s][A
 49%|███████████████▊                | 60469/122310 [1:56:56<1:09:52, 14.75it/s][A
 49%|███████████████▊                | 60473/122310 [1:56:57<1:26:28, 11.92it/s][A
 49%|███████████████▊                | 60476/122310 [1:56:58<1:47:20,  9.60

step: 37040, loss: 82.32113682256978, epoch: 2



 50%|███████████████▊                | 60567/122310 [1:57:05<2:12:08,  7.79it/s][A
 50%|███████████████▊                | 60578/122310 [1:57:06<1:47:42,  9.55it/s][A
 50%|███████████████▊                | 60584/122310 [1:57:07<1:52:22,  9.15it/s][A
 50%|███████████████▊                | 60598/122310 [1:57:07<1:27:23, 11.77it/s][A
 50%|███████████████▊                | 60603/122310 [1:57:08<1:39:20, 10.35it/s][A
 50%|███████████████▊                | 60610/122310 [1:57:09<1:42:07, 10.07it/s][A
 50%|███████████████▊                | 60616/122310 [1:57:10<1:49:33,  9.39it/s][A
 50%|███████████████▊                | 60626/122310 [1:57:10<1:38:35, 10.43it/s][A
 50%|███████████████▊                | 60633/122310 [1:57:11<1:42:36, 10.02it/s][A
 50%|███████████████▊                | 60640/122310 [1:57:12<1:45:24,  9.75it/s][A
 50%|███████████████▊                | 60646/122310 [1:57:13<1:52:22,  9.15it/s][A
 50%|███████████████▊                | 60659/122310 [1:57:14<1:29:42, 11.45

step: 37060, loss: 70.81560806998074, epoch: 2



 50%|███████████████▉                | 60737/122310 [1:57:20<1:30:16, 11.37it/s][A
 50%|███████████████▉                | 60746/122310 [1:57:20<1:27:58, 11.66it/s][A
 50%|███████████████▉                | 60752/122310 [1:57:21<1:36:48, 10.60it/s][A
 50%|███████████████▉                | 60763/122310 [1:57:22<1:26:22, 11.88it/s][A
 50%|███████████████▉                | 60771/122310 [1:57:23<1:28:42, 11.56it/s][A
 50%|███████████████▉                | 60775/122310 [1:57:23<1:46:19,  9.65it/s][A
 50%|███████████████▉                | 60780/122310 [1:57:24<1:56:41,  8.79it/s][A
 50%|███████████████▉                | 60792/122310 [1:57:25<1:33:17, 10.99it/s][A
 50%|███████████████▉                | 60801/122310 [1:57:26<1:30:28, 11.33it/s][A
 50%|███████████████▉                | 60813/122310 [1:57:26<1:20:30, 12.73it/s][A
 50%|███████████████▉                | 60821/122310 [1:57:27<1:24:48, 12.08it/s][A
 50%|███████████████▉                | 60828/122310 [1:57:28<1:30:43, 11.29

step: 37080, loss: 92.61681110259204, epoch: 2



 50%|███████████████▉                | 60913/122310 [1:57:34<1:35:08, 10.76it/s][A
 50%|███████████████▉                | 60918/122310 [1:57:35<1:46:51,  9.58it/s][A
 50%|███████████████▉                | 60925/122310 [1:57:36<1:46:52,  9.57it/s][A
 50%|███████████████▉                | 60934/122310 [1:57:37<1:38:47, 10.35it/s][A
 50%|███████████████▉                | 60939/122310 [1:57:37<1:50:14,  9.28it/s][A
 50%|███████████████▉                | 60949/122310 [1:57:38<1:37:09, 10.53it/s][A
 50%|███████████████▉                | 60962/122310 [1:57:39<1:20:59, 12.62it/s][A
 50%|███████████████▉                | 60974/122310 [1:57:40<1:14:58, 13.64it/s][A
 50%|███████████████▉                | 60982/122310 [1:57:40<1:20:04, 12.76it/s][A
 50%|███████████████▉                | 60988/122310 [1:57:41<1:29:57, 11.36it/s][A
 50%|███████████████▉                | 61001/122310 [1:57:42<1:17:18, 13.22it/s][A
 50%|███████████████▉                | 61007/122310 [1:57:43<1:27:32, 11.67

step: 37100, loss: 88.77668323624589, epoch: 2



 50%|███████████████▉                | 61080/122310 [1:57:49<1:39:22, 10.27it/s][A
 50%|███████████████▉                | 61092/122310 [1:57:50<1:26:00, 11.86it/s][A
 50%|███████████████▉                | 61103/122310 [1:57:51<1:20:32, 12.67it/s][A
 50%|███████████████▉                | 61115/122310 [1:57:52<1:14:35, 13.67it/s][A
 50%|███████████████▉                | 61123/122310 [1:57:52<1:19:27, 12.83it/s][A
 50%|███████████████▉                | 61135/122310 [1:57:53<1:13:41, 13.84it/s][A
 50%|███████████████▉                | 61143/122310 [1:57:54<1:18:55, 12.92it/s][A
 50%|███████████████▉                | 61149/122310 [1:57:55<1:28:55, 11.46it/s][A
 50%|███████████████▉                | 61154/122310 [1:57:55<1:41:49, 10.01it/s][A
 50%|████████████████                | 61177/122310 [1:57:56<1:02:44, 16.24it/s][A
 50%|████████████████                | 61185/122310 [1:57:57<1:10:01, 14.55it/s][A
 50%|████████████████                | 61187/122310 [1:57:58<1:31:44, 11.10

step: 37120, loss: 94.54877267391844, epoch: 2



 50%|████████████████                | 61252/122310 [1:58:04<1:15:44, 13.44it/s][A
 50%|████████████████                | 61257/122310 [1:58:05<1:27:27, 11.63it/s][A
 50%|████████████████                | 61263/122310 [1:58:06<1:35:51, 10.61it/s][A
 50%|████████████████                | 61273/122310 [1:58:06<1:28:54, 11.44it/s][A
 50%|████████████████                | 61275/122310 [1:58:07<1:53:59,  8.92it/s][A
 50%|████████████████                | 61282/122310 [1:58:08<1:52:22,  9.05it/s][A
 50%|████████████████                | 61293/122310 [1:58:09<1:35:18, 10.67it/s][A
 50%|████████████████                | 61297/122310 [1:58:09<1:51:15,  9.14it/s][A
 50%|████████████████                | 61304/122310 [1:58:10<1:50:16,  9.22it/s][A
 50%|████████████████                | 61316/122310 [1:58:11<1:29:56, 11.30it/s][A
 50%|████████████████                | 61321/122310 [1:58:12<1:42:32,  9.91it/s][A
 50%|████████████████                | 61332/122310 [1:58:12<1:29:21, 11.37

step: 37140, loss: 74.48289188242885, epoch: 2



 50%|████████████████                | 61416/122310 [1:58:19<1:20:14, 12.65it/s][A
 50%|████████████████                | 61424/122310 [1:58:20<1:23:41, 12.13it/s][A
 50%|████████████████                | 61440/122310 [1:58:21<1:08:12, 14.87it/s][A
 50%|████████████████                | 61451/122310 [1:58:21<1:08:22, 14.84it/s][A
 50%|████████████████                | 61461/122310 [1:58:22<1:10:27, 14.39it/s][A
 50%|████████████████                | 61464/122310 [1:58:23<1:29:51, 11.29it/s][A
 50%|████████████████                | 61475/122310 [1:58:24<1:22:31, 12.29it/s][A
 50%|████████████████                | 61483/122310 [1:58:24<1:25:50, 11.81it/s][A
 50%|████████████████                | 61488/122310 [1:58:25<1:38:50, 10.26it/s][A
 50%|████████████████                | 61503/122310 [1:58:26<1:16:39, 13.22it/s][A
 50%|████████████████                | 61510/122310 [1:58:27<1:24:01, 12.06it/s][A
 50%|████████████████                | 61515/122310 [1:58:27<1:36:54, 10.46

step: 37160, loss: 76.87717631436536, epoch: 2



 50%|████████████████                | 61605/122310 [1:58:34<1:22:10, 12.31it/s][A
 50%|████████████████                | 61611/122310 [1:58:35<1:31:32, 11.05it/s][A
 50%|████████████████                | 61613/122310 [1:58:36<1:58:48,  8.51it/s][A
 50%|████████████████                | 61621/122310 [1:58:36<1:50:08,  9.18it/s][A
 50%|████████████████                | 61625/122310 [1:58:37<2:05:41,  8.05it/s][A
 50%|████████████████                | 61632/122310 [1:58:38<1:59:46,  8.44it/s][A
 50%|████████████████▏               | 61638/122310 [1:58:39<2:02:24,  8.26it/s][A
 50%|████████████████▏               | 61641/122310 [1:58:39<2:23:27,  7.05it/s][A
 50%|████████████████▏               | 61643/122310 [1:58:40<2:56:44,  5.72it/s][A
 50%|████████████████▏               | 61656/122310 [1:58:41<1:49:38,  9.22it/s][A
 50%|████████████████▏               | 61669/122310 [1:58:42<1:26:37, 11.67it/s][A
 50%|████████████████▏               | 61676/122310 [1:58:42<1:31:52, 11.00

step: 37180, loss: 81.53015712825145, epoch: 2



 50%|████████████████▏               | 61748/122310 [1:58:49<1:33:27, 10.80it/s][A
 50%|████████████████▏               | 61755/122310 [1:58:50<1:37:26, 10.36it/s][A
 50%|████████████████▏               | 61758/122310 [1:58:51<1:58:47,  8.50it/s][A
 51%|████████████████▏               | 61770/122310 [1:58:51<1:33:39, 10.77it/s][A
 51%|████████████████▏               | 61778/122310 [1:58:52<1:34:01, 10.73it/s][A
 51%|████████████████▏               | 61783/122310 [1:58:53<1:45:41,  9.55it/s][A
 51%|████████████████▏               | 61799/122310 [1:58:54<1:17:01, 13.09it/s][A
 51%|████████████████▏               | 61815/122310 [1:58:54<1:04:29, 15.63it/s][A
 51%|████████████████▏               | 61825/122310 [1:58:55<1:07:24, 14.95it/s][A
 51%|████████████████▏               | 61829/122310 [1:58:56<1:23:32, 12.07it/s][A
 51%|████████████████▏               | 61832/122310 [1:58:57<2:15:57,  7.41it/s][A
 51%|████████████████▏               | 61845/122310 [1:58:58<1:41:41,  9.91

step: 37200, loss: 78.19473198321418, epoch: 2
sim1 and sim2 are 0.3642830515174635, 0.26673782404060004
cosine of pred and queen: 0.20049387277111871
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: ireland
Actual: mumbai:asia::nairobi:africa, pred: population
Actual:


 51%|████████████████▏               | 61893/122310 [1:59:14<1:31:24, 11.02it/s][A

Actual: tripura:agartala::kerala:thiruvananthapuram, pred: sessions
Actual: india:delhi::serbia:belgrade, pred: crowe
Actual: spain:spanish::korea:korean, pred: korean
Actual: syria:arabic::australia:english, pred: scrubs
Actual: mouse:squeak::elephant:trumpet, pred: prescribed
Actual: algeria:dinar::usa:dollar, pred: nipple
Actual: argentina:peso::russia:ruble, pred: dollar
Actual: armenia:dram::iran:rial, pred: footstep
Actual: brazil:real::sweden:krona, pred: london
Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: netherlands
Actual: usa:dollar::nigeria:naira, pred: market
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: electricity
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pr


 51%|███████████████▋               | 61899/122310 [2:00:19<51:15:22,  3.05s/it][A

Actual: india:rupee::denmark:krone, pred: netherlands
Accuracy is 0.14201183431952663



 51%|███████████████▋               | 61908/122310 [2:00:20<34:37:49,  2.06s/it][A
 51%|███████████████▋               | 61916/122310 [2:00:21<24:47:10,  1.48s/it][A
 51%|███████████████▋               | 61926/122310 [2:00:22<16:36:45,  1.01it/s][A
 51%|███████████████▋               | 61930/122310 [2:00:22<14:21:51,  1.17it/s][A
 51%|████████████████▏               | 61946/122310 [2:00:23<7:45:26,  2.16it/s][A
 51%|████████████████▏               | 61965/122310 [2:00:24<4:32:27,  3.69it/s][A
 51%|████████████████▏               | 61976/122310 [2:00:24<3:37:00,  4.63it/s][A
 51%|████████████████▏               | 61986/122310 [2:00:25<3:00:25,  5.57it/s][A
 51%|████████████████▏               | 61989/122310 [2:00:26<3:07:52,  5.35it/s][A
 51%|████████████████▏               | 61993/122310 [2:00:27<3:07:16,  5.37it/s][A
 51%|████████████████▏               | 61997/122310 [2:00:27<3:06:48,  5.38it/s][A
 51%|████████████████▏               | 62006/122310 [2:00:28<2:28:17,  6.78

step: 37220, loss: 83.7097615122013, epoch: 2



 51%|████████████████▏               | 62091/122310 [2:00:35<1:08:49, 14.58it/s][A
 51%|████████████████▏               | 62098/122310 [2:00:36<1:16:15, 13.16it/s][A
 51%|████████████████▏               | 62104/122310 [2:00:36<1:25:04, 11.79it/s][A
 51%|████████████████▎               | 62111/122310 [2:00:37<1:29:57, 11.15it/s][A
 51%|████████████████▎               | 62119/122310 [2:00:38<1:30:20, 11.10it/s][A
 51%|████████████████▎               | 62128/122310 [2:00:38<1:27:43, 11.43it/s][A
 51%|████████████████▎               | 62133/122310 [2:00:39<1:39:53, 10.04it/s][A
 51%|████████████████▎               | 62145/122310 [2:00:40<1:24:02, 11.93it/s][A
 51%|████████████████▎               | 62155/122310 [2:00:41<1:20:53, 12.39it/s][A
 51%|████████████████▎               | 62162/122310 [2:00:41<1:26:52, 11.54it/s][A
 51%|████████████████▎               | 62164/122310 [2:00:42<1:52:50,  8.88it/s][A
 51%|████████████████▎               | 62173/122310 [2:00:43<1:41:12,  9.90

step: 37240, loss: 76.99004194197175, epoch: 2



 51%|████████████████▎               | 62244/122310 [2:00:50<1:50:42,  9.04it/s][A
 51%|████████████████▎               | 62252/122310 [2:00:50<1:45:06,  9.52it/s][A
 51%|████████████████▎               | 62259/122310 [2:00:51<1:45:06,  9.52it/s][A
 51%|████████████████▎               | 62267/122310 [2:00:52<1:40:59,  9.91it/s][A
 51%|████████████████▎               | 62281/122310 [2:00:52<1:19:52, 12.53it/s][A
 51%|████████████████▎               | 62289/122310 [2:00:53<1:23:35, 11.97it/s][A
 51%|████████████████▎               | 62296/122310 [2:00:54<1:29:01, 11.23it/s][A
 51%|████████████████▎               | 62303/122310 [2:00:55<1:33:18, 10.72it/s][A
 51%|████████████████▎               | 62310/122310 [2:00:55<1:36:50, 10.33it/s][A
 51%|████████████████▎               | 62322/122310 [2:00:56<1:22:39, 12.10it/s][A
 51%|████████████████▎               | 62329/122310 [2:00:57<1:28:16, 11.32it/s][A
 51%|████████████████▎               | 62340/122310 [2:00:58<1:20:39, 12.39

step: 37260, loss: 69.21033912142622, epoch: 2



 51%|████████████████▎               | 62410/122310 [2:01:05<1:24:45, 11.78it/s][A
 51%|████████████████▎               | 62416/122310 [2:01:05<1:32:59, 10.73it/s][A
 51%|████████████████▎               | 62424/122310 [2:01:06<1:35:02, 10.50it/s][A
 51%|████████████████▎               | 62439/122310 [2:01:08<1:37:34, 10.23it/s][A
 51%|████████████████▎               | 62450/122310 [2:01:08<1:28:47, 11.24it/s][A
 51%|████████████████▎               | 62457/122310 [2:01:09<1:33:08, 10.71it/s][A
 51%|████████████████▎               | 62464/122310 [2:01:10<1:36:58, 10.28it/s][A
 51%|████████████████▎               | 62472/122310 [2:01:11<1:36:50, 10.30it/s][A
 51%|████████████████▎               | 62477/122310 [2:01:12<1:46:58,  9.32it/s][A
 51%|████████████████▎               | 62481/122310 [2:01:12<2:01:55,  8.18it/s][A
 51%|████████████████▎               | 62487/122310 [2:01:13<2:02:44,  8.12it/s][A
 51%|████████████████▎               | 62491/122310 [2:01:14<2:15:55,  7.34

step: 37280, loss: 87.47094018180294, epoch: 2



 51%|████████████████▎               | 62561/122310 [2:01:20<1:17:34, 12.84it/s][A
 51%|████████████████▎               | 62578/122310 [2:01:20<1:02:47, 15.85it/s][A
 51%|████████████████▎               | 62583/122310 [2:01:21<1:15:37, 13.16it/s][A
 51%|████████████████▍               | 62599/122310 [2:01:22<1:03:36, 15.65it/s][A
 51%|████████████████▍               | 62608/122310 [2:01:23<1:08:08, 14.60it/s][A
 51%|████████████████▍               | 62610/122310 [2:01:23<1:30:26, 11.00it/s][A
 51%|████████████████▍               | 62617/122310 [2:01:24<1:34:24, 10.54it/s][A
 51%|████████████████▍               | 62629/122310 [2:01:25<1:21:49, 12.16it/s][A
 51%|████████████████▍               | 62633/122310 [2:01:26<1:38:03, 10.14it/s][A
 51%|████████████████▍               | 62641/122310 [2:01:26<1:36:21, 10.32it/s][A
 51%|████████████████▍               | 62651/122310 [2:01:27<1:28:41, 11.21it/s][A
 51%|████████████████▍               | 62663/122310 [2:01:28<1:18:46, 12.62

step: 37300, loss: 87.1452831521754, epoch: 2



 51%|████████████████▍               | 62783/122310 [2:01:35<1:10:16, 14.12it/s][A
 51%|████████████████▍               | 62792/122310 [2:01:35<1:13:30, 13.50it/s][A
 51%|████████████████▍               | 62803/122310 [2:01:36<1:11:35, 13.85it/s][A
 51%|████████████████▍               | 62814/122310 [2:01:37<1:10:41, 14.03it/s][A
 51%|████████████████▍               | 62818/122310 [2:01:38<1:26:23, 11.48it/s][A
 51%|████████████████▍               | 62825/122310 [2:01:38<1:31:04, 10.89it/s][A
 51%|████████████████▍               | 62832/122310 [2:01:39<1:35:36, 10.37it/s][A
 51%|████████████████▍               | 62849/122310 [2:01:40<1:10:21, 14.09it/s][A
 51%|████████████████▍               | 62858/122310 [2:01:41<1:13:24, 13.50it/s][A
 51%|████████████████▍               | 62865/122310 [2:01:41<1:20:53, 12.25it/s][A
 51%|████████████████▍               | 62875/122310 [2:01:42<1:19:28, 12.46it/s][A
 51%|████████████████▍               | 62882/122310 [2:01:43<1:25:39, 11.56

step: 37320, loss: 72.74438477683725, epoch: 2



 51%|████████████████▍               | 62952/122310 [2:01:50<1:28:26, 11.19it/s][A
 51%|████████████████▍               | 62962/122310 [2:01:50<1:23:24, 11.86it/s][A
 51%|████████████████▍               | 62968/122310 [2:01:51<1:32:43, 10.67it/s][A
 51%|████████████████▍               | 62971/122310 [2:01:52<1:53:57,  8.68it/s][A
 51%|████████████████▍               | 62972/122310 [2:01:53<2:32:00,  6.51it/s][A
 51%|████████████████▍               | 62979/122310 [2:01:53<2:15:09,  7.32it/s][A
 51%|████████████████▍               | 62984/122310 [2:01:54<2:18:28,  7.14it/s][A
 52%|████████████████▍               | 62992/122310 [2:01:55<2:00:26,  8.21it/s][A
 52%|████████████████▍               | 62998/122310 [2:01:56<2:01:07,  8.16it/s][A
 52%|████████████████▍               | 63010/122310 [2:01:56<1:33:44, 10.54it/s][A
 52%|████████████████▍               | 63020/122310 [2:01:57<1:26:34, 11.41it/s][A
 52%|████████████████▍               | 63032/122310 [2:01:58<1:17:03, 12.82

step: 37340, loss: 74.98681001697707, epoch: 2



 52%|████████████████▌               | 63110/122310 [2:02:05<1:26:43, 11.38it/s][A
 52%|████████████████▌               | 63122/122310 [2:02:05<1:16:59, 12.81it/s][A
 52%|████████████████▌               | 63130/122310 [2:02:06<1:20:44, 12.22it/s][A
 52%|████████████████▌               | 63146/122310 [2:02:07<1:05:44, 15.00it/s][A
 52%|████████████████▌               | 63151/122310 [2:02:08<1:18:38, 12.54it/s][A
 52%|████████████████▌               | 63161/122310 [2:02:08<1:17:12, 12.77it/s][A
 52%|████████████████▌               | 63177/122310 [2:02:09<1:03:57, 15.41it/s][A
 52%|████████████████▌               | 63186/122310 [2:02:10<1:08:26, 14.40it/s][A
 52%|████████████████▌               | 63195/122310 [2:02:11<1:11:57, 13.69it/s][A
 52%|████████████████▌               | 63202/122310 [2:02:11<1:19:20, 12.42it/s][A
 52%|████████████████▌               | 63208/122310 [2:02:12<1:28:37, 11.11it/s][A
 52%|████████████████▌               | 63214/122310 [2:02:13<1:36:39, 10.19

step: 37360, loss: 68.83705484563532, epoch: 2



 52%|████████████████▌               | 63270/122310 [2:02:20<2:04:08,  7.93it/s][A
 52%|████████████████▌               | 63274/122310 [2:02:20<2:15:43,  7.25it/s][A
 52%|████████████████▌               | 63288/122310 [2:02:21<1:33:16, 10.55it/s][A
 52%|████████████████▌               | 63293/122310 [2:02:22<1:44:19,  9.43it/s][A
 52%|████████████████▌               | 63304/122310 [2:02:23<1:29:30, 10.99it/s][A
 52%|████████████████▌               | 63314/122310 [2:02:23<1:23:56, 11.71it/s][A
 52%|████████████████▌               | 63321/122310 [2:02:24<1:29:07, 11.03it/s][A
 52%|████████████████▌               | 63331/122310 [2:02:25<1:24:01, 11.70it/s][A
 52%|████████████████▌               | 63333/122310 [2:02:26<1:49:18,  8.99it/s][A
 52%|████████████████▌               | 63336/122310 [2:02:26<2:10:43,  7.52it/s][A
 52%|████████████████▌               | 63345/122310 [2:02:27<1:50:41,  8.88it/s][A
 52%|████████████████▌               | 63355/122310 [2:02:28<1:36:09, 10.22

step: 37380, loss: 97.47909329699199, epoch: 2



 52%|████████████████▌               | 63404/122310 [2:02:34<2:32:25,  6.44it/s][A
 52%|████████████████▌               | 63410/122310 [2:02:35<2:21:17,  6.95it/s][A
 52%|████████████████▌               | 63415/122310 [2:02:36<2:22:51,  6.87it/s][A
 52%|████████████████▌               | 63425/122310 [2:02:37<1:51:01,  8.84it/s][A
 52%|████████████████▌               | 63427/122310 [2:02:38<3:02:20,  5.38it/s][A
 52%|████████████████▌               | 63441/122310 [2:02:39<1:52:52,  8.69it/s][A
 52%|████████████████▌               | 63449/122310 [2:02:40<1:46:30,  9.21it/s][A
 52%|████████████████▌               | 63456/122310 [2:02:40<1:45:50,  9.27it/s][A
 52%|████████████████▌               | 63461/122310 [2:02:41<1:55:21,  8.50it/s][A
 52%|████████████████▌               | 63471/122310 [2:02:43<2:08:42,  7.62it/s][A
 52%|████████████████▌               | 63474/122310 [2:02:43<2:24:29,  6.79it/s][A
 52%|████████████████▌               | 63485/122310 [2:02:44<1:57:02,  8.38

step: 37400, loss: 85.073342102495, epoch: 2



 52%|████████████████▋               | 63553/122310 [2:02:50<1:26:07, 11.37it/s][A
 52%|████████████████▋               | 63555/122310 [2:02:50<1:51:10,  8.81it/s][A
 52%|████████████████▋               | 63561/122310 [2:02:51<1:53:44,  8.61it/s][A
 52%|████████████████▋               | 63569/122310 [2:02:52<1:46:12,  9.22it/s][A
 52%|████████████████▋               | 63577/122310 [2:02:53<1:40:56,  9.70it/s][A
 52%|████████████████▋               | 63592/122310 [2:02:53<1:16:10, 12.85it/s][A
 52%|████████████████▋               | 63601/122310 [2:02:54<1:18:05, 12.53it/s][A
 52%|████████████████▋               | 63605/122310 [2:02:55<1:33:55, 10.42it/s][A
 52%|████████████████▋               | 63620/122310 [2:02:56<1:14:10, 13.19it/s][A
 52%|████████████████▋               | 63628/122310 [2:02:56<1:18:27, 12.47it/s][A
 52%|████████████████▋               | 63638/122310 [2:02:57<1:19:13, 12.34it/s][A
 52%|████████████████▋               | 63644/122310 [2:02:58<1:30:07, 10.85

step: 37420, loss: 69.57095817261492, epoch: 2



 52%|████████████████▋               | 63721/122310 [2:03:05<1:32:15, 10.58it/s][A
 52%|████████████████▋               | 63730/122310 [2:03:06<1:29:57, 10.85it/s][A
 52%|████████████████▋               | 63737/122310 [2:03:07<1:35:03, 10.27it/s][A
 52%|████████████████▋               | 63750/122310 [2:03:07<1:19:59, 12.20it/s][A
 52%|████████████████▋               | 63758/122310 [2:03:08<1:23:55, 11.63it/s][A
 52%|████████████████▋               | 63763/122310 [2:03:09<1:37:37,  9.99it/s][A
 52%|████████████████▋               | 63765/122310 [2:03:10<2:04:25,  7.84it/s][A
 52%|████████████████▋               | 63770/122310 [2:03:10<2:11:58,  7.39it/s][A
 52%|████████████████▋               | 63777/122310 [2:03:11<2:03:31,  7.90it/s][A
 52%|████████████████▋               | 63782/122310 [2:03:12<2:11:27,  7.42it/s][A
 52%|████████████████▋               | 63784/122310 [2:03:13<2:42:20,  6.01it/s][A
 52%|████████████████▋               | 63790/122310 [2:03:14<2:30:41,  6.47

step: 37440, loss: 69.42634451854433, epoch: 2



 52%|████████████████▋               | 63881/122310 [2:03:21<1:12:33, 13.42it/s][A
 52%|████████████████▋               | 63893/122310 [2:03:21<1:08:01, 14.31it/s][A
 52%|████████████████▋               | 63901/122310 [2:03:22<1:13:16, 13.29it/s][A
 52%|████████████████▋               | 63915/122310 [2:03:23<1:04:59, 14.97it/s][A
 52%|████████████████▋               | 63922/122310 [2:03:23<1:12:58, 13.34it/s][A
 52%|████████████████▋               | 63928/122310 [2:03:24<1:22:41, 11.77it/s][A
 52%|████████████████▋               | 63937/122310 [2:03:25<1:21:38, 11.92it/s][A
 52%|████████████████▋               | 63946/122310 [2:03:26<1:21:27, 11.94it/s][A
 52%|████████████████▋               | 63953/122310 [2:03:26<1:27:10, 11.16it/s][A
 52%|████████████████▋               | 63964/122310 [2:03:27<1:19:25, 12.24it/s][A
 52%|████████████████▋               | 63975/122310 [2:03:28<1:14:47, 13.00it/s][A
 52%|████████████████▋               | 63981/122310 [2:03:29<1:25:56, 11.31

step: 37460, loss: 107.43018575047263, epoch: 2



 52%|████████████████▊               | 64043/122310 [2:03:35<2:03:12,  7.88it/s][A
 52%|████████████████▊               | 64054/122310 [2:03:36<1:38:38,  9.84it/s][A
 52%|████████████████▊               | 64061/122310 [2:03:37<1:39:47,  9.73it/s][A
 52%|████████████████▊               | 64064/122310 [2:03:38<2:00:08,  8.08it/s][A
 52%|████████████████▊               | 64076/122310 [2:03:38<1:33:04, 10.43it/s][A
 52%|████████████████▊               | 64082/122310 [2:03:39<1:39:52,  9.72it/s][A
 52%|████████████████▊               | 64092/122310 [2:03:40<1:31:05, 10.65it/s][A
 52%|████████████████▊               | 64096/122310 [2:03:41<1:49:04,  8.90it/s][A
 52%|████████████████▊               | 64107/122310 [2:03:42<1:36:03, 10.10it/s][A
 52%|████████████████▊               | 64113/122310 [2:03:42<1:43:28,  9.37it/s][A
 52%|████████████████▊               | 64120/122310 [2:03:43<1:47:53,  8.99it/s][A
 52%|████████████████▊               | 64124/122310 [2:03:44<2:06:14,  7.68

step: 37480, loss: 68.65714612644202, epoch: 2



 52%|████████████████▊               | 64204/122310 [2:03:51<1:25:34, 11.32it/s][A
 52%|████████████████▊               | 64207/122310 [2:03:52<1:46:16,  9.11it/s][A
 52%|████████████████▊               | 64210/122310 [2:03:53<2:07:53,  7.57it/s][A
 53%|████████████████▊               | 64220/122310 [2:03:53<1:45:18,  9.19it/s][A
 53%|████████████████▊               | 64231/122310 [2:03:54<1:30:13, 10.73it/s][A
 53%|████████████████▊               | 64245/122310 [2:03:55<1:14:22, 13.01it/s][A
 53%|████████████████▊               | 64249/122310 [2:03:56<1:30:26, 10.70it/s][A
 53%|████████████████▊               | 64258/122310 [2:03:56<1:27:44, 11.03it/s][A
 53%|████████████████▊               | 64269/122310 [2:03:57<1:20:31, 12.01it/s][A
 53%|████████████████▊               | 64276/122310 [2:03:58<1:26:25, 11.19it/s][A
 53%|████████████████▊               | 64288/122310 [2:03:59<1:17:00, 12.56it/s][A
 53%|████████████████▊               | 64303/122310 [2:04:00<1:05:40, 14.72

step: 37500, loss: 77.86045840144256, epoch: 2



 53%|████████████████▊               | 64381/122310 [2:04:06<1:08:52, 14.02it/s][A
 53%|████████████████▊               | 64387/122310 [2:04:07<1:18:35, 12.28it/s][A
 53%|████████████████▊               | 64396/122310 [2:04:08<1:19:25, 12.15it/s][A
 53%|████████████████▊               | 64405/122310 [2:04:09<1:19:35, 12.13it/s][A
 53%|████████████████▊               | 64411/122310 [2:04:09<1:28:24, 10.92it/s][A
 53%|████████████████▊               | 64420/122310 [2:04:10<1:25:42, 11.26it/s][A
 53%|████████████████▊               | 64434/122310 [2:04:11<1:11:07, 13.56it/s][A
 53%|████████████████▊               | 64439/122310 [2:04:12<1:23:39, 11.53it/s][A
 53%|████████████████▊               | 64441/122310 [2:04:12<1:48:17,  8.91it/s][A
 53%|████████████████▊               | 64448/122310 [2:04:13<1:47:45,  8.95it/s][A
 53%|████████████████▊               | 64458/122310 [2:04:14<1:34:10, 10.24it/s][A
 53%|████████████████▊               | 64461/122310 [2:04:15<1:56:20,  8.29

step: 37520, loss: 69.99656032483476, epoch: 2



 53%|████████████████▉               | 64530/122310 [2:04:21<1:13:01, 13.19it/s][A
 53%|████████████████▉               | 64535/122310 [2:04:22<1:25:13, 11.30it/s][A
 53%|████████████████▉               | 64543/122310 [2:04:23<1:26:19, 11.15it/s][A
 53%|████████████████▉               | 64553/122310 [2:04:24<1:20:46, 11.92it/s][A
 53%|████████████████▉               | 64561/122310 [2:04:24<1:22:47, 11.62it/s][A
 53%|████████████████▉               | 64565/122310 [2:04:25<1:38:19,  9.79it/s][A
 53%|████████████████▉               | 64575/122310 [2:04:26<1:27:56, 10.94it/s][A
 53%|████████████████▉               | 64580/122310 [2:04:26<1:39:04,  9.71it/s][A
 53%|████████████████▉               | 64591/122310 [2:04:27<1:25:07, 11.30it/s][A
 53%|████████████████▉               | 64598/122310 [2:04:28<1:29:37, 10.73it/s][A
 53%|████████████████▉               | 64607/122310 [2:04:29<1:25:48, 11.21it/s][A
 53%|████████████████▉               | 64615/122310 [2:04:29<1:26:29, 11.12

step: 37540, loss: 93.38564426594967, epoch: 2



 53%|████████████████▉               | 64692/122310 [2:04:36<1:10:23, 13.64it/s][A
 53%|████████████████▉               | 64698/122310 [2:04:37<1:19:46, 12.04it/s][A
 53%|████████████████▉               | 64709/122310 [2:04:37<1:14:14, 12.93it/s][A
 53%|████████████████▉               | 64711/122310 [2:04:38<1:37:29,  9.85it/s][A
 53%|████████████████▉               | 64715/122310 [2:04:39<1:52:08,  8.56it/s][A
 53%|████████████████▉               | 64724/122310 [2:04:40<1:39:09,  9.68it/s][A
 53%|████████████████▉               | 64733/122310 [2:04:40<1:31:35, 10.48it/s][A
 53%|████████████████▉               | 64744/122310 [2:04:41<1:20:59, 11.85it/s][A
 53%|████████████████▉               | 64753/122310 [2:04:42<1:20:14, 11.96it/s][A
 53%|████████████████▉               | 64765/122310 [2:04:43<1:12:23, 13.25it/s][A
 53%|████████████████▉               | 64773/122310 [2:04:43<1:16:11, 12.59it/s][A
 53%|████████████████▉               | 64776/122310 [2:04:44<1:35:27, 10.05

step: 37560, loss: 82.83953394803589, epoch: 2



 53%|████████████████▉               | 64866/122310 [2:04:51<1:11:55, 13.31it/s][A
 53%|████████████████▉               | 64871/122310 [2:04:51<1:24:21, 11.35it/s][A
 53%|████████████████▉               | 64876/122310 [2:04:52<1:35:16, 10.05it/s][A
 53%|████████████████▉               | 64896/122310 [2:04:53<1:02:44, 15.25it/s][A
 53%|████████████████▉               | 64905/122310 [2:04:54<1:06:44, 14.34it/s][A
 53%|████████████████▉               | 64917/122310 [2:04:54<1:04:04, 14.93it/s][A
 53%|████████████████▉               | 64923/122310 [2:04:55<1:14:15, 12.88it/s][A
 53%|████████████████▉               | 64929/122310 [2:04:56<1:23:16, 11.48it/s][A
 53%|████████████████▉               | 64935/122310 [2:04:57<1:31:04, 10.50it/s][A
 53%|████████████████▉               | 64948/122310 [2:04:57<1:15:38, 12.64it/s][A
 53%|████████████████▉               | 64953/122310 [2:04:58<1:27:45, 10.89it/s][A
 53%|████████████████▉               | 64960/122310 [2:04:59<1:31:21, 10.46

step: 37580, loss: 79.73896758112522, epoch: 2



 53%|█████████████████               | 65034/122310 [2:05:05<1:28:08, 10.83it/s][A
 53%|█████████████████               | 65041/122310 [2:05:06<1:31:18, 10.45it/s][A
 53%|█████████████████               | 65053/122310 [2:05:07<1:17:59, 12.24it/s][A
 53%|█████████████████               | 65061/122310 [2:05:07<1:20:21, 11.87it/s][A
 53%|█████████████████               | 65066/122310 [2:05:08<1:32:11, 10.35it/s][A
 53%|█████████████████               | 65070/122310 [2:05:09<1:47:29,  8.87it/s][A
 53%|█████████████████               | 65086/122310 [2:05:10<1:14:47, 12.75it/s][A
 53%|█████████████████               | 65099/122310 [2:05:10<1:07:00, 14.23it/s][A
 53%|█████████████████               | 65109/122310 [2:05:11<1:07:51, 14.05it/s][A
 53%|█████████████████               | 65116/122310 [2:05:12<1:15:00, 12.71it/s][A
 53%|█████████████████               | 65124/122310 [2:05:13<1:18:22, 12.16it/s][A
 53%|█████████████████               | 65139/122310 [2:05:13<1:05:12, 14.61

step: 37600, loss: 92.87362656347155, epoch: 2
sim1 and sim2 are 0.4158680131251884, 0.26183562487780526
cosine of pred and queen: 0.2360054013553689
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: ireland
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 


 53%|█████████████████               | 65197/122310 [2:05:34<1:10:54, 13.43it/s][A

Actual: europe:euro::japan:yen, pred: japanese
Actual: india:rupee::denmark:krone, pred: austria
Actual: usa:dollar::nigeria:naira, pred: currency
Actual: switzerland:swiss::spain:spanish, pred: spanish
Actual: thailand:thai::india:indian, pred: commission
Actual: sweden:swedish::netherlands:dutch, pred: dutch
Actual: russia:russian::germany:german, pred: german
Actual: portugal:portuguese::slovakia:slovakian, pred: marins
Actual: poland:polish::italy:italian, pred: italian
Actual: norway:norwegian::mexico:mexican, pred: mexican
Actual: japan:japanese::australia:australian, pred: australian
Actual: italy:italian::ireland:irish, pred: irish
Actual: croatia:croatian::france:french, pred: socialist
Actual: denmark:danish::germany:german, pred: europe
Accuracy is 0.4
Actual: walk:walks::vanish:vanishes, pred: beholders
Actual: work:works::generate:generates, pred: pleaseth
Actual: think:thinks::talk:talks, pred: somnambulisms
Actual: vanish:vanishes::eat:eats, pred: basket
Actual: sing:sin


 53%|████████████████▌              | 65205/122310 [2:06:35<40:11:43,  2.53s/it][A

Actual: india:rupee::denmark:krone, pred: austria
Accuracy is 0.14201183431952663



 53%|████████████████▌              | 65215/122310 [2:06:36<27:52:25,  1.76s/it][A
 53%|████████████████▌              | 65219/122310 [2:06:38<24:29:10,  1.54s/it][A
 53%|████████████████▌              | 65227/122310 [2:06:38<17:28:22,  1.10s/it][A
 53%|████████████████▌              | 65237/122310 [2:06:39<11:43:40,  1.35it/s][A
 53%|█████████████████               | 65250/122310 [2:06:40<7:26:33,  2.13it/s][A
 53%|█████████████████               | 65255/122310 [2:06:40<6:31:25,  2.43it/s][A
 53%|█████████████████               | 65265/122310 [2:06:41<4:42:31,  3.37it/s][A
 53%|█████████████████               | 65277/122310 [2:06:42<3:20:30,  4.74it/s][A
 53%|█████████████████               | 65279/122310 [2:06:43<3:31:53,  4.49it/s][A
 53%|█████████████████               | 65281/122310 [2:06:43<3:44:44,  4.23it/s][A
 53%|█████████████████               | 65291/122310 [2:06:44<2:38:29,  6.00it/s][A
 53%|█████████████████               | 65302/122310 [2:06:45<2:00:01,  7.92

step: 37620, loss: 73.53345193263478, epoch: 2



 53%|█████████████████               | 65366/122310 [2:06:51<1:50:35,  8.58it/s][A
 53%|█████████████████               | 65375/122310 [2:06:51<1:37:36,  9.72it/s][A
 53%|█████████████████               | 65387/122310 [2:06:52<1:20:41, 11.76it/s][A
 53%|█████████████████               | 65393/122310 [2:06:53<1:28:25, 10.73it/s][A
 53%|█████████████████               | 65401/122310 [2:06:53<1:27:28, 10.84it/s][A
 53%|█████████████████               | 65409/122310 [2:06:54<1:27:01, 10.90it/s][A
 53%|█████████████████               | 65414/122310 [2:06:55<1:37:46,  9.70it/s][A
 53%|█████████████████               | 65424/122310 [2:06:56<1:26:42, 10.93it/s][A
 53%|█████████████████               | 65432/122310 [2:06:58<2:44:36,  5.76it/s][A
 54%|█████████████████               | 65437/122310 [2:06:59<2:38:28,  5.98it/s][A
 54%|█████████████████               | 65448/122310 [2:07:00<1:59:59,  7.90it/s][A
 54%|█████████████████▏              | 65459/122310 [2:07:01<1:39:08,  9.56

step: 37640, loss: 91.97516696211667, epoch: 2



 54%|█████████████████▏              | 65513/122310 [2:07:05<1:15:31, 12.53it/s][A
 54%|█████████████████▏              | 65527/122310 [2:07:06<1:05:10, 14.52it/s][A
 54%|█████████████████▏              | 65532/122310 [2:07:06<1:17:09, 12.26it/s][A
 54%|█████████████████▏              | 65540/122310 [2:07:07<1:19:20, 11.92it/s][A
 54%|█████████████████▏              | 65543/122310 [2:07:08<1:38:25,  9.61it/s][A
 54%|█████████████████▏              | 65548/122310 [2:07:09<1:47:37,  8.79it/s][A
 54%|█████████████████▏              | 65552/122310 [2:07:09<2:01:00,  7.82it/s][A
 54%|██████████████████▏               | 65577/122310 [2:07:10<59:48, 15.81it/s][A
 54%|██████████████████▏               | 65595/122310 [2:07:11<51:10, 18.47it/s][A
 54%|█████████████████▏              | 65597/122310 [2:07:12<1:29:41, 10.54it/s][A
 54%|█████████████████▏              | 65601/122310 [2:07:13<1:41:38,  9.30it/s][A
 54%|█████████████████▏              | 65609/122310 [2:07:14<1:36:53,  9.75

step: 37660, loss: 70.08236625613984, epoch: 2



 54%|█████████████████▏              | 65671/122310 [2:07:20<1:27:14, 10.82it/s][A
 54%|█████████████████▏              | 65673/122310 [2:07:29<8:08:23,  1.93it/s][A
 54%|█████████████████▏              | 65676/122310 [2:07:29<7:25:22,  2.12it/s][A
 54%|█████████████████▏              | 65681/122310 [2:07:30<5:58:19,  2.63it/s][A
 54%|█████████████████▏              | 65688/122310 [2:07:31<4:24:20,  3.57it/s][A
 54%|█████████████████▏              | 65698/122310 [2:07:32<3:01:59,  5.18it/s][A
 54%|█████████████████▏              | 65701/122310 [2:07:32<3:10:27,  4.95it/s][A
 54%|█████████████████▏              | 65712/122310 [2:07:33<2:14:05,  7.03it/s][A
 54%|█████████████████▏              | 65715/122310 [2:07:34<2:29:34,  6.31it/s][A

step: 37680, loss: 69.66978188119552, epoch: 2



 54%|█████████████████▏              | 65727/122310 [2:07:35<1:48:18,  8.71it/s][A
 54%|█████████████████▏              | 65745/122310 [2:07:35<1:14:12, 12.71it/s][A
 54%|█████████████████▏              | 65752/122310 [2:07:36<1:20:02, 11.78it/s][A
 54%|█████████████████▏              | 65758/122310 [2:07:37<1:28:08, 10.69it/s][A
 54%|█████████████████▏              | 65769/122310 [2:07:38<1:19:55, 11.79it/s][A
 54%|█████████████████▏              | 65781/122310 [2:07:39<1:12:49, 12.94it/s][A
 54%|█████████████████▏              | 65783/122310 [2:07:39<1:34:33,  9.96it/s][A
 54%|██████████████████▎               | 65806/122310 [2:07:40<58:53, 15.99it/s][A
 54%|█████████████████▏              | 65811/122310 [2:07:41<1:11:37, 13.15it/s][A
 54%|█████████████████▏              | 65816/122310 [2:07:42<1:48:48,  8.65it/s][A
 54%|█████████████████▏              | 65819/122310 [2:07:43<2:05:27,  7.50it/s][A
 54%|█████████████████▏              | 65831/122310 [2:07:44<1:37:25,  9.66

step: 37700, loss: 82.31318167279431, epoch: 2



 54%|█████████████████▏              | 65896/122310 [2:07:50<1:15:14, 12.50it/s][A
 54%|█████████████████▏              | 65906/122310 [2:07:51<1:14:05, 12.69it/s][A
 54%|█████████████████▏              | 65917/122310 [2:07:51<1:10:56, 13.25it/s][A
 54%|█████████████████▏              | 65930/122310 [2:07:52<1:05:14, 14.40it/s][A
 54%|█████████████████▎              | 65939/122310 [2:07:53<1:08:41, 13.68it/s][A
 54%|█████████████████▎              | 65949/122310 [2:07:54<1:09:32, 13.51it/s][A
 54%|██████████████████▎               | 65965/122310 [2:07:54<59:25, 15.80it/s][A
 54%|█████████████████▎              | 65976/122310 [2:07:55<1:00:49, 15.44it/s][A
 54%|█████████████████▎              | 65980/122310 [2:07:56<1:16:11, 12.32it/s][A
 54%|█████████████████▎              | 65985/122310 [2:07:57<1:28:29, 10.61it/s][A
 54%|█████████████████▎              | 65992/122310 [2:07:57<1:31:41, 10.24it/s][A
 54%|█████████████████▎              | 66005/122310 [2:07:58<1:16:17, 12.30

step: 37720, loss: 70.50686899752124, epoch: 2



 54%|█████████████████▎              | 66083/122310 [2:08:05<1:18:25, 11.95it/s][A
 54%|█████████████████▎              | 66088/122310 [2:08:06<1:30:26, 10.36it/s][A
 54%|█████████████████▎              | 66100/122310 [2:08:07<1:18:01, 12.01it/s][A
 54%|█████████████████▎              | 66112/122310 [2:08:07<1:11:10, 13.16it/s][A
 54%|█████████████████▎              | 66115/122310 [2:08:08<1:30:06, 10.39it/s][A
 54%|█████████████████▎              | 66118/122310 [2:08:10<2:23:35,  6.52it/s][A
 54%|█████████████████▎              | 66121/122310 [2:08:10<2:39:12,  5.88it/s][A
 54%|█████████████████▎              | 66138/122310 [2:08:11<1:31:49, 10.20it/s][A
 54%|█████████████████▎              | 66142/122310 [2:08:12<1:45:38,  8.86it/s][A
 54%|█████████████████▎              | 66146/122310 [2:08:13<2:32:51,  6.12it/s][A
 54%|█████████████████▎              | 66148/122310 [2:08:14<2:55:55,  5.32it/s][A
 54%|█████████████████▎              | 66162/122310 [2:08:15<1:47:55,  8.67

step: 37740, loss: 78.04109565503558, epoch: 2



 54%|█████████████████▎              | 66217/122310 [2:08:20<1:31:04, 10.27it/s][A
 54%|█████████████████▎              | 66223/122310 [2:08:21<1:37:31,  9.59it/s][A
 54%|█████████████████▎              | 66228/122310 [2:08:22<2:19:23,  6.71it/s][A
 54%|█████████████████▎              | 66235/122310 [2:08:23<2:08:01,  7.30it/s][A
 54%|█████████████████▎              | 66237/122310 [2:08:24<2:33:00,  6.11it/s][A
 54%|█████████████████▎              | 66244/122310 [2:08:25<2:14:17,  6.96it/s][A
 54%|█████████████████▎              | 66248/122310 [2:08:25<2:23:58,  6.49it/s][A
 54%|█████████████████▎              | 66261/122310 [2:08:26<1:38:16,  9.51it/s][A
 54%|█████████████████▎              | 66267/122310 [2:08:27<1:43:14,  9.05it/s][A
 54%|█████████████████▎              | 66275/122310 [2:08:28<1:38:27,  9.49it/s][A
 54%|█████████████████▎              | 66284/122310 [2:08:28<1:31:44, 10.18it/s][A
 54%|█████████████████▎              | 66292/122310 [2:08:29<1:30:51, 10.28

step: 37760, loss: 91.62649947117781, epoch: 2



 54%|█████████████████▎              | 66346/122310 [2:08:37<1:48:16,  8.61it/s][A
 54%|█████████████████▎              | 66352/122310 [2:08:38<1:54:17,  8.16it/s][A
 54%|█████████████████▎              | 66361/122310 [2:08:39<1:47:16,  8.69it/s][A
 54%|█████████████████▎              | 66373/122310 [2:08:40<1:32:51, 10.04it/s][A
 54%|█████████████████▎              | 66387/122310 [2:08:41<1:20:19, 11.60it/s][A
 54%|█████████████████▎              | 66392/122310 [2:08:42<1:33:59,  9.92it/s][A
 54%|█████████████████▎              | 66396/122310 [2:08:43<1:51:25,  8.36it/s][A
 54%|█████████████████▎              | 66403/122310 [2:08:44<1:52:21,  8.29it/s][A
 54%|█████████████████▍              | 66415/122310 [2:08:44<1:34:15,  9.88it/s][A
 54%|█████████████████▍              | 66420/122310 [2:08:45<1:47:20,  8.68it/s][A
 54%|█████████████████▍              | 66429/122310 [2:08:46<1:41:48,  9.15it/s][A
 54%|█████████████████▍              | 66441/122310 [2:08:47<1:28:34, 10.51

step: 37780, loss: 82.20033754786346, epoch: 2



 54%|█████████████████▍              | 66530/122310 [2:08:55<1:24:09, 11.05it/s][A
 54%|█████████████████▍              | 66535/122310 [2:08:56<1:38:57,  9.39it/s][A
 54%|█████████████████▍              | 66550/122310 [2:08:57<1:19:55, 11.63it/s][A
 54%|█████████████████▍              | 66561/122310 [2:08:58<1:18:14, 11.88it/s][A
 54%|█████████████████▍              | 66576/122310 [2:08:59<1:08:50, 13.49it/s][A
 54%|█████████████████▍              | 66584/122310 [2:08:59<1:16:11, 12.19it/s][A
 54%|█████████████████▍              | 66594/122310 [2:09:00<1:17:26, 11.99it/s][A
 54%|█████████████████▍              | 66602/122310 [2:09:01<1:23:15, 11.15it/s][A
 54%|█████████████████▍              | 66612/122310 [2:09:02<1:22:35, 11.24it/s][A
 54%|█████████████████▍              | 66623/122310 [2:09:03<1:19:57, 11.61it/s][A
 54%|█████████████████▍              | 66633/122310 [2:09:04<1:20:11, 11.57it/s][A
 54%|█████████████████▍              | 66639/122310 [2:09:05<1:31:21, 10.16

step: 37800, loss: 80.10604551852224, epoch: 2



 55%|█████████████████▍              | 66739/122310 [2:09:13<1:20:01, 11.57it/s][A
 55%|█████████████████▍              | 66756/122310 [2:09:13<1:06:45, 13.87it/s][A
 55%|█████████████████▍              | 66766/122310 [2:09:14<1:10:02, 13.22it/s][A
 55%|█████████████████▍              | 66781/122310 [2:09:15<1:04:11, 14.42it/s][A
 55%|█████████████████▍              | 66786/122310 [2:09:16<1:18:27, 11.79it/s][A
 55%|█████████████████▍              | 66795/122310 [2:09:17<1:22:42, 11.19it/s][A
 55%|█████████████████▍              | 66797/122310 [2:09:18<1:51:43,  8.28it/s][A
 55%|█████████████████▍              | 66804/122310 [2:09:19<1:53:48,  8.13it/s][A
 55%|█████████████████▍              | 66806/122310 [2:09:21<3:11:36,  4.83it/s][A
 55%|█████████████████▍              | 66815/122310 [2:09:22<2:35:41,  5.94it/s][A
 55%|█████████████████▍              | 66822/122310 [2:09:23<2:27:57,  6.25it/s][A
 55%|█████████████████▍              | 66833/122310 [2:09:24<2:04:01,  7.45

step: 37820, loss: 83.87228973311544, epoch: 2



 55%|█████████████████▌              | 66920/122310 [2:09:32<1:12:44, 12.69it/s][A
 55%|█████████████████▌              | 66935/122310 [2:09:33<1:07:17, 13.72it/s][A
 55%|█████████████████▌              | 66950/122310 [2:09:33<1:04:06, 14.39it/s][A
 55%|█████████████████▌              | 66956/122310 [2:09:34<1:17:19, 11.93it/s][A
 55%|█████████████████▌              | 66967/122310 [2:09:35<1:19:41, 11.58it/s][A
 55%|█████████████████▌              | 66972/122310 [2:09:36<1:36:03,  9.60it/s][A
 55%|█████████████████▌              | 66986/122310 [2:09:37<1:22:45, 11.14it/s][A
 55%|█████████████████▌              | 66991/122310 [2:09:38<1:38:46,  9.33it/s][A
 55%|█████████████████▌              | 66999/122310 [2:09:39<1:41:40,  9.07it/s][A
 55%|█████████████████▌              | 67011/122310 [2:09:40<1:31:32, 10.07it/s][A
 55%|█████████████████▌              | 67023/122310 [2:09:41<1:27:50, 10.49it/s][A
 55%|█████████████████▌              | 67029/122310 [2:09:42<1:40:52,  9.13

step: 37840, loss: 72.75462680039469, epoch: 2



 55%|█████████████████▌              | 67106/122310 [2:09:51<1:25:21, 10.78it/s][A
 55%|█████████████████▌              | 67125/122310 [2:09:52<1:07:01, 13.72it/s][A
 55%|█████████████████▌              | 67141/122310 [2:09:53<1:02:34, 14.70it/s][A
 55%|█████████████████▌              | 67154/122310 [2:09:54<1:03:18, 14.52it/s][A
 55%|█████████████████▌              | 67162/122310 [2:09:55<1:11:49, 12.80it/s][A
 55%|█████████████████▌              | 67175/122310 [2:09:56<1:09:53, 13.15it/s][A
 55%|█████████████████▌              | 67190/122310 [2:09:57<1:05:11, 14.09it/s][A
 55%|█████████████████▌              | 67197/122310 [2:09:57<1:15:17, 12.20it/s][A
 55%|█████████████████▌              | 67199/122310 [2:09:58<1:40:11,  9.17it/s][A
 55%|█████████████████▌              | 67212/122310 [2:09:59<1:25:49, 10.70it/s][A
 55%|█████████████████▌              | 67222/122310 [2:10:00<1:25:15, 10.77it/s][A
 55%|█████████████████▌              | 67237/122310 [2:10:01<1:13:40, 12.46

step: 37860, loss: 79.17828347628223, epoch: 2



 55%|█████████████████▌              | 67311/122310 [2:10:10<1:25:46, 10.69it/s][A
 55%|█████████████████▌              | 67323/122310 [2:10:11<1:20:56, 11.32it/s][A
 55%|█████████████████▌              | 67336/122310 [2:10:12<1:15:46, 12.09it/s][A
 55%|█████████████████▌              | 67343/122310 [2:10:13<1:24:57, 10.78it/s][A
 55%|█████████████████▌              | 67347/122310 [2:10:13<1:42:49,  8.91it/s][A
 55%|█████████████████▌              | 67356/122310 [2:10:14<1:39:45,  9.18it/s][A
 55%|█████████████████▌              | 67364/122310 [2:10:15<1:40:52,  9.08it/s][A
 55%|█████████████████▋              | 67376/122310 [2:10:16<1:29:05, 10.28it/s][A
 55%|█████████████████▋              | 67383/122310 [2:10:17<1:36:24,  9.50it/s][A
 55%|█████████████████▋              | 67393/122310 [2:10:18<1:32:28,  9.90it/s][A
 55%|█████████████████▋              | 67406/122310 [2:10:19<1:21:55, 11.17it/s][A
 55%|█████████████████▋              | 67413/122310 [2:10:20<1:30:06, 10.15

step: 37880, loss: 88.13684411378821, epoch: 2



 55%|█████████████████▋              | 67476/122310 [2:10:28<1:31:44,  9.96it/s][A
 55%|█████████████████▋              | 67479/122310 [2:10:29<1:51:40,  8.18it/s][A
 55%|█████████████████▋              | 67489/122310 [2:10:30<1:40:32,  9.09it/s][A
 55%|█████████████████▋              | 67501/122310 [2:10:31<1:28:37, 10.31it/s][A
 55%|█████████████████▋              | 67510/122310 [2:10:32<1:28:47, 10.29it/s][A
 55%|█████████████████▋              | 67518/122310 [2:10:33<1:33:07,  9.81it/s][A
 55%|█████████████████▋              | 67523/122310 [2:10:34<1:45:45,  8.63it/s][A
 55%|█████████████████▋              | 67524/122310 [2:10:35<2:22:31,  6.41it/s][A
 55%|█████████████████▋              | 67529/122310 [2:10:35<2:28:09,  6.16it/s][A
 55%|█████████████████▋              | 67536/122310 [2:10:36<2:16:59,  6.66it/s][A
 55%|█████████████████▋              | 67544/122310 [2:10:37<2:04:00,  7.36it/s][A
 55%|█████████████████▋              | 67549/122310 [2:10:38<2:13:13,  6.85

step: 37900, loss: 70.22200557828742, epoch: 2



 55%|█████████████████▋              | 67649/122310 [2:10:46<1:09:42, 13.07it/s][A
 55%|█████████████████▋              | 67654/122310 [2:10:47<1:24:04, 10.83it/s][A
 55%|█████████████████▋              | 67660/122310 [2:10:48<1:35:17,  9.56it/s][A
 55%|█████████████████▋              | 67663/122310 [2:10:49<1:56:59,  7.78it/s][A
 55%|█████████████████▋              | 67672/122310 [2:10:50<1:47:11,  8.50it/s][A
 55%|█████████████████▋              | 67677/122310 [2:10:51<2:02:56,  7.41it/s][A
 55%|█████████████████▋              | 67686/122310 [2:10:52<1:56:31,  7.81it/s][A
 55%|█████████████████▋              | 67694/122310 [2:10:53<1:56:08,  7.84it/s][A
 55%|█████████████████▋              | 67711/122310 [2:10:54<1:24:56, 10.71it/s][A
 55%|█████████████████▋              | 67719/122310 [2:10:55<1:30:16, 10.08it/s][A
 55%|█████████████████▋              | 67732/122310 [2:10:56<1:21:18, 11.19it/s][A
 55%|█████████████████▋              | 67743/122310 [2:10:57<1:22:11, 11.06

step: 37920, loss: 101.3239411497588, epoch: 2



 55%|█████████████████▋              | 67812/122310 [2:11:05<1:57:06,  7.76it/s][A
 55%|█████████████████▋              | 67818/122310 [2:11:06<2:01:11,  7.49it/s][A
 55%|█████████████████▋              | 67829/122310 [2:11:07<1:43:32,  8.77it/s][A
 55%|█████████████████▋              | 67842/122310 [2:11:08<1:28:19, 10.28it/s][A
 55%|█████████████████▊              | 67849/122310 [2:11:09<1:37:04,  9.35it/s][A
 55%|█████████████████▊              | 67858/122310 [2:11:10<1:35:58,  9.46it/s][A
 55%|█████████████████▊              | 67872/122310 [2:11:11<1:22:40, 10.97it/s][A
 55%|█████████████████▊              | 67881/122310 [2:11:12<1:26:52, 10.44it/s][A
 56%|█████████████████▊              | 67890/122310 [2:11:13<1:28:53, 10.20it/s][A
 56%|█████████████████▊              | 67904/122310 [2:11:14<1:18:41, 11.52it/s][A
 56%|█████████████████▊              | 67909/122310 [2:11:15<1:35:08,  9.53it/s][A
 56%|█████████████████▊              | 67916/122310 [2:11:15<1:42:36,  8.83

step: 37940, loss: 83.05788893982943, epoch: 2



 56%|█████████████████▊              | 67978/122310 [2:11:24<1:58:28,  7.64it/s][A
 56%|█████████████████▊              | 67984/122310 [2:11:25<2:04:50,  7.25it/s][A
 56%|█████████████████▊              | 67995/122310 [2:11:26<1:45:16,  8.60it/s][A
 56%|█████████████████▊              | 67996/122310 [2:11:27<2:22:57,  6.33it/s][A
 56%|█████████████████▊              | 68000/122310 [2:11:28<2:40:09,  5.65it/s][A
 56%|█████████████████▊              | 68010/122310 [2:11:29<2:05:55,  7.19it/s][A
 56%|█████████████████▊              | 68019/122310 [2:11:30<1:52:48,  8.02it/s][A
 56%|█████████████████▊              | 68029/122310 [2:11:31<1:41:30,  8.91it/s][A
 56%|█████████████████▊              | 68048/122310 [2:11:32<1:12:25, 12.49it/s][A
 56%|█████████████████▊              | 68056/122310 [2:11:34<1:47:21,  8.42it/s][A
 56%|█████████████████▊              | 68058/122310 [2:11:35<2:10:37,  6.92it/s][A
 56%|█████████████████▊              | 68069/122310 [2:11:36<1:49:39,  8.24

step: 37960, loss: 69.489491948508, epoch: 2



 56%|█████████████████▊              | 68144/122310 [2:11:43<1:30:43,  9.95it/s][A
 56%|█████████████████▊              | 68152/122310 [2:11:44<1:34:19,  9.57it/s][A
 56%|█████████████████▊              | 68163/122310 [2:11:44<1:27:28, 10.32it/s][A
 56%|█████████████████▊              | 68166/122310 [2:11:45<1:51:53,  8.07it/s][A
 56%|█████████████████▊              | 68176/122310 [2:11:46<1:43:43,  8.70it/s][A
 56%|█████████████████▊              | 68189/122310 [2:11:47<1:29:01, 10.13it/s][A
 56%|█████████████████▊              | 68197/122310 [2:11:48<1:32:36,  9.74it/s][A
 56%|█████████████████▊              | 68209/122310 [2:11:49<1:25:21, 10.56it/s][A
 56%|█████████████████▊              | 68218/122310 [2:11:50<1:27:32, 10.30it/s][A
 56%|█████████████████▊              | 68222/122310 [2:11:51<1:45:06,  8.58it/s][A
 56%|█████████████████▊              | 68226/122310 [2:11:52<2:03:28,  7.30it/s][A
 56%|█████████████████▊              | 68238/122310 [2:11:53<1:40:08,  9.00

step: 37980, loss: 71.9638771174396, epoch: 2



 56%|█████████████████▉              | 68325/122310 [2:12:01<1:16:19, 11.79it/s][A
 56%|█████████████████▉              | 68338/122310 [2:12:02<1:12:11, 12.46it/s][A
 56%|█████████████████▉              | 68350/122310 [2:12:03<1:11:45, 12.53it/s][A
 56%|█████████████████▉              | 68356/122310 [2:12:04<1:23:53, 10.72it/s][A
 56%|█████████████████▉              | 68364/122310 [2:12:05<1:29:35, 10.04it/s][A
 56%|█████████████████▉              | 68380/122310 [2:12:06<1:13:37, 12.21it/s][A
 56%|█████████████████▉              | 68385/122310 [2:12:07<1:27:56, 10.22it/s][A
 56%|█████████████████▉              | 68398/122310 [2:12:08<1:20:10, 11.21it/s][A
 56%|█████████████████▉              | 68408/122310 [2:12:09<1:21:07, 11.07it/s][A
 56%|█████████████████▉              | 68418/122310 [2:12:10<1:22:34, 10.88it/s][A
 56%|█████████████████▉              | 68426/122310 [2:12:11<1:28:14, 10.18it/s][A
 56%|█████████████████▉              | 68433/122310 [2:12:12<1:35:16,  9.43

step: 38000, loss: 84.00773942098444, epoch: 2
sim1 and sim2 are 0.4015797302818165, 0.22151131479980607
cosine of pred and queen: 0.21147376015909747
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: europe
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: sister
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: equally
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: rajasthan
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: rajasthan
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar
Actual:


 56%|█████████████████▎             | 68490/122310 [2:14:05<65:24:12,  4.37s/it][A
 56%|█████████████████▎             | 68500/122310 [2:14:06<42:09:37,  2.82s/it][A
 56%|█████████████████▎             | 68505/122310 [2:14:07<34:13:09,  2.29s/it][A
 56%|█████████████████▎             | 68508/122310 [2:14:08<29:57:45,  2.00s/it][A
 56%|█████████████████▎             | 68518/122310 [2:14:09<18:06:31,  1.21s/it][A
 56%|█████████████████▎             | 68524/122310 [2:14:11<14:04:32,  1.06it/s][A
 56%|█████████████████▎             | 68530/122310 [2:14:12<10:54:25,  1.37it/s][A
 56%|█████████████████▉              | 68536/122310 [2:14:13<8:30:55,  1.75it/s][A
 56%|█████████████████▉              | 68545/122310 [2:14:14<5:56:05,  2.52it/s][A
 56%|█████████████████▉              | 68556/122310 [2:14:15<4:14:34,  3.52it/s][A
 56%|█████████████████▉              | 68561/122310 [2:14:16<4:06:43,  3.63it/s][A
 56%|█████████████████▉              | 68565/122310 [2:14:17<4:11:02,  3.57

step: 38020, loss: 77.59037391001799, epoch: 2



 56%|█████████████████▉              | 68640/122310 [2:14:29<2:11:35,  6.80it/s][A
 56%|█████████████████▉              | 68652/122310 [2:14:30<1:55:18,  7.76it/s][A
 56%|█████████████████▉              | 68659/122310 [2:14:32<2:06:59,  7.04it/s][A
 56%|█████████████████▉              | 68670/122310 [2:14:33<1:59:35,  7.48it/s][A
 56%|█████████████████▉              | 68674/122310 [2:14:34<2:21:49,  6.30it/s][A
 56%|█████████████████▉              | 68680/122310 [2:14:35<2:32:22,  5.87it/s][A
 56%|█████████████████▉              | 68690/122310 [2:14:37<2:14:47,  6.63it/s][A
 56%|█████████████████▉              | 68697/122310 [2:14:38<2:18:38,  6.45it/s][A
 56%|█████████████████▉              | 68698/122310 [2:14:39<3:04:29,  4.84it/s][A
 56%|█████████████████▉              | 68708/122310 [2:14:40<2:28:16,  6.03it/s][A
 56%|█████████████████▉              | 68717/122310 [2:14:41<2:14:34,  6.64it/s][A
 56%|█████████████████▉              | 68727/122310 [2:14:42<2:02:01,  7.32

step: 38040, loss: 78.38980165323436, epoch: 2



 56%|██████████████████              | 68819/122310 [2:14:53<1:32:14,  9.66it/s][A
 56%|██████████████████              | 68833/122310 [2:14:54<1:21:19, 10.96it/s][A
 56%|██████████████████              | 68837/122310 [2:14:55<1:39:50,  8.93it/s][A
 56%|██████████████████              | 68846/122310 [2:14:55<1:37:48,  9.11it/s][A
 56%|██████████████████              | 68852/122310 [2:14:56<1:46:27,  8.37it/s][A
 56%|██████████████████              | 68857/122310 [2:14:57<2:02:06,  7.30it/s][A
 56%|██████████████████              | 68860/122310 [2:14:58<2:26:06,  6.10it/s][A
 56%|██████████████████              | 68863/122310 [2:14:59<2:52:26,  5.17it/s][A
 56%|██████████████████              | 68870/122310 [2:15:00<2:33:42,  5.79it/s][A
 56%|██████████████████              | 68879/122310 [2:15:01<2:09:46,  6.86it/s][A
 56%|██████████████████              | 68890/122310 [2:15:02<1:46:38,  8.35it/s][A
 56%|██████████████████              | 68894/122310 [2:15:03<2:03:41,  7.20

step: 38060, loss: 83.65364886954241, epoch: 2



 56%|██████████████████              | 68973/122310 [2:15:13<1:43:58,  8.55it/s][A
 56%|██████████████████              | 68978/122310 [2:15:14<1:57:12,  7.58it/s][A
 56%|██████████████████              | 68986/122310 [2:15:15<1:52:27,  7.90it/s][A
 56%|██████████████████              | 69008/122310 [2:15:16<1:13:57, 12.01it/s][A
 56%|██████████████████              | 69011/122310 [2:15:17<1:37:40,  9.09it/s][A
 56%|██████████████████              | 69021/122310 [2:15:18<1:36:02,  9.25it/s][A
 56%|██████████████████              | 69029/122310 [2:15:19<1:38:13,  9.04it/s][A
 56%|██████████████████              | 69048/122310 [2:15:20<1:12:23, 12.26it/s][A
 56%|██████████████████              | 69051/122310 [2:15:21<1:31:49,  9.67it/s][A
 56%|██████████████████              | 69058/122310 [2:15:21<1:38:08,  9.04it/s][A
 56%|██████████████████              | 69061/122310 [2:15:22<2:00:43,  7.35it/s][A
 56%|██████████████████              | 69072/122310 [2:15:23<1:46:02,  8.37

step: 38080, loss: 71.60527802274854, epoch: 2



 57%|██████████████████              | 69142/122310 [2:15:32<1:34:12,  9.41it/s][A
 57%|██████████████████              | 69151/122310 [2:15:33<1:33:44,  9.45it/s][A
 57%|██████████████████              | 69156/122310 [2:15:34<1:47:42,  8.23it/s][A
 57%|██████████████████              | 69170/122310 [2:15:35<1:29:14,  9.92it/s][A
 57%|██████████████████              | 69176/122310 [2:15:36<1:43:07,  8.59it/s][A
 57%|██████████████████              | 69179/122310 [2:15:37<2:07:17,  6.96it/s][A
 57%|██████████████████              | 69186/122310 [2:15:38<2:06:49,  6.98it/s][A
 57%|██████████████████              | 69202/122310 [2:15:39<1:30:09,  9.82it/s][A
 57%|██████████████████              | 69218/122310 [2:15:40<1:14:53, 11.81it/s][A
 57%|██████████████████              | 69223/122310 [2:15:41<1:36:47,  9.14it/s][A
 57%|██████████████████              | 69226/122310 [2:15:42<2:02:34,  7.22it/s][A
 57%|██████████████████              | 69239/122310 [2:15:43<1:37:34,  9.06

step: 38100, loss: 72.82748699877207, epoch: 2



 57%|██████████████████▏             | 69312/122310 [2:15:52<1:40:04,  8.83it/s][A
 57%|██████████████████▏             | 69326/122310 [2:15:53<1:24:17, 10.48it/s][A
 57%|██████████████████▏             | 69334/122310 [2:15:54<1:30:54,  9.71it/s][A
 57%|██████████████████▏             | 69342/122310 [2:15:55<1:35:08,  9.28it/s][A
 57%|██████████████████▏             | 69352/122310 [2:15:56<1:34:50,  9.31it/s][A
 57%|██████████████████▏             | 69368/122310 [2:15:57<1:16:26, 11.54it/s][A
 57%|██████████████████▏             | 69376/122310 [2:15:58<1:22:17, 10.72it/s][A
 57%|██████████████████▏             | 69383/122310 [2:15:58<1:30:13,  9.78it/s][A
 57%|██████████████████▏             | 69396/122310 [2:15:59<1:19:21, 11.11it/s][A
 57%|██████████████████▏             | 69407/122310 [2:16:00<1:16:35, 11.51it/s][A
 57%|███████████████████▎              | 69427/122310 [2:16:01<59:40, 14.77it/s][A
 57%|███████████████████▎              | 69442/122310 [2:16:02<57:23, 15.35

step: 38120, loss: 87.14851860732767, epoch: 2



 57%|██████████████████▏             | 69518/122310 [2:16:10<1:26:00, 10.23it/s][A
 57%|██████████████████▏             | 69538/122310 [2:16:11<1:03:25, 13.87it/s][A
 57%|██████████████████▏             | 69552/122310 [2:16:12<1:01:16, 14.35it/s][A
 57%|██████████████████▏             | 69562/122310 [2:16:13<1:05:20, 13.46it/s][A
 57%|██████████████████▏             | 69571/122310 [2:16:14<1:10:35, 12.45it/s][A
 57%|██████████████████▏             | 69576/122310 [2:16:14<1:24:22, 10.42it/s][A
 57%|██████████████████▏             | 69584/122310 [2:16:15<1:27:45, 10.01it/s][A
 57%|██████████████████▏             | 69593/122310 [2:16:16<1:27:16, 10.07it/s][A
 57%|██████████████████▏             | 69610/122310 [2:16:17<1:08:29, 12.82it/s][A
 57%|██████████████████▏             | 69614/122310 [2:16:18<1:25:18, 10.29it/s][A
 57%|██████████████████▏             | 69624/122310 [2:16:19<1:22:39, 10.62it/s][A
 57%|██████████████████▏             | 69638/122310 [2:16:20<1:11:59, 12.19

step: 38140, loss: 82.85928522812038, epoch: 2



 57%|██████████████████▏             | 69729/122310 [2:16:28<1:40:14,  8.74it/s][A
 57%|██████████████████▏             | 69742/122310 [2:16:29<1:24:29, 10.37it/s][A
 57%|██████████████████▎             | 69757/122310 [2:16:30<1:11:08, 12.31it/s][A
 57%|██████████████████▎             | 69767/122310 [2:16:31<1:12:42, 12.04it/s][A
 57%|██████████████████▎             | 69783/122310 [2:16:31<1:03:12, 13.85it/s][A
 57%|██████████████████▎             | 69791/122310 [2:16:32<1:10:25, 12.43it/s][A
 57%|██████████████████▎             | 69804/122310 [2:16:33<1:06:46, 13.11it/s][A
 57%|██████████████████▎             | 69813/122310 [2:16:34<1:12:00, 12.15it/s][A
 57%|██████████████████▎             | 69826/122310 [2:16:35<1:07:46, 12.91it/s][A
 57%|██████████████████▎             | 69828/122310 [2:16:36<1:29:49,  9.74it/s][A
 57%|██████████████████▎             | 69838/122310 [2:16:37<1:25:34, 10.22it/s][A
 57%|██████████████████▎             | 69853/122310 [2:16:38<1:11:24, 12.24

step: 38160, loss: 76.79584080393244, epoch: 2



 57%|██████████████████▎             | 69895/122310 [2:16:46<2:43:53,  5.33it/s][A
 57%|██████████████████▎             | 69909/122310 [2:16:47<1:47:34,  8.12it/s][A
 57%|██████████████████▎             | 69916/122310 [2:16:47<1:48:04,  8.08it/s][A
 57%|██████████████████▎             | 69923/122310 [2:16:48<1:49:03,  8.01it/s][A
 57%|██████████████████▎             | 69927/122310 [2:16:49<2:03:59,  7.04it/s][A
 57%|██████████████████▎             | 69943/122310 [2:16:50<1:25:45, 10.18it/s][A
 57%|██████████████████▎             | 69951/122310 [2:16:51<1:29:22,  9.76it/s][A
 57%|██████████████████▎             | 69956/122310 [2:16:52<1:41:37,  8.59it/s][A
 57%|██████████████████▎             | 69966/122310 [2:16:53<1:33:23,  9.34it/s][A
 57%|██████████████████▎             | 69973/122310 [2:16:54<1:38:17,  8.87it/s][A
 57%|██████████████████▎             | 69982/122310 [2:16:55<1:34:50,  9.20it/s][A
 57%|██████████████████▎             | 69993/122310 [2:16:56<1:26:19, 10.10

step: 38180, loss: 73.5992020931761, epoch: 2



 57%|██████████████████▎             | 70070/122310 [2:17:04<1:16:35, 11.37it/s][A
 57%|███████████████████▍              | 70090/122310 [2:17:04<59:04, 14.73it/s][A
 57%|███████████████████▍              | 70103/122310 [2:17:05<59:03, 14.73it/s][A
 57%|██████████████████▎             | 70112/122310 [2:17:07<1:26:07, 10.10it/s][A
 57%|██████████████████▎             | 70116/122310 [2:17:08<1:40:18,  8.67it/s][A
 57%|██████████████████▎             | 70121/122310 [2:17:09<1:50:34,  7.87it/s][A
 57%|██████████████████▎             | 70122/122310 [2:17:10<2:24:13,  6.03it/s][A
 57%|██████████████████▎             | 70128/122310 [2:17:11<2:19:55,  6.22it/s][A
 57%|██████████████████▎             | 70137/122310 [2:17:12<1:59:21,  7.29it/s][A
 57%|██████████████████▎             | 70153/122310 [2:17:13<1:24:01, 10.35it/s][A
 57%|██████████████████▎             | 70159/122310 [2:17:13<1:33:07,  9.33it/s][A
 57%|██████████████████▎             | 70166/122310 [2:17:14<1:37:32,  8.91

step: 38200, loss: 67.04041118973969, epoch: 2



 57%|██████████████████▍             | 70245/122310 [2:17:22<1:08:39, 12.64it/s][A
 57%|██████████████████▍             | 70247/122310 [2:17:22<1:30:43,  9.56it/s][A
 57%|██████████████████▍             | 70261/122310 [2:17:23<1:16:06, 11.40it/s][A
 57%|██████████████████▍             | 70270/122310 [2:17:24<1:18:52, 11.00it/s][A
 57%|██████████████████▍             | 70272/122310 [2:17:25<1:44:54,  8.27it/s][A
 57%|██████████████████▍             | 70276/122310 [2:17:26<2:02:20,  7.09it/s][A
 57%|██████████████████▍             | 70287/122310 [2:17:27<1:41:12,  8.57it/s][A
 57%|██████████████████▍             | 70295/122310 [2:17:28<1:40:15,  8.65it/s][A
 57%|██████████████████▍             | 70305/122310 [2:17:29<1:32:31,  9.37it/s][A
 57%|██████████████████▍             | 70310/122310 [2:17:30<1:44:43,  8.28it/s][A
 57%|██████████████████▍             | 70316/122310 [2:17:31<1:50:49,  7.82it/s][A
 57%|██████████████████▍             | 70328/122310 [2:17:31<1:30:58,  9.52

step: 38220, loss: 73.99473962210504, epoch: 2



 58%|██████████████████▍             | 70419/122310 [2:17:40<1:12:22, 11.95it/s][A
 58%|██████████████████▍             | 70422/122310 [2:17:40<1:31:59,  9.40it/s][A
 58%|██████████████████▍             | 70432/122310 [2:17:41<1:28:28,  9.77it/s][A
 58%|██████████████████▍             | 70441/122310 [2:17:42<1:27:43,  9.86it/s][A
 58%|██████████████████▍             | 70449/122310 [2:17:48<3:42:56,  3.88it/s][A
 58%|██████████████████▍             | 70455/122310 [2:17:48<3:20:56,  4.30it/s][A
 58%|██████████████████▍             | 70460/122310 [2:17:49<3:10:44,  4.53it/s][A
 58%|██████████████████▍             | 70479/122310 [2:17:50<1:51:02,  7.78it/s][A
 58%|██████████████████▍             | 70494/122310 [2:17:51<1:28:40,  9.74it/s][A
 58%|██████████████████▍             | 70508/122310 [2:17:52<1:17:23, 11.16it/s][A
 58%|██████████████████▍             | 70518/122310 [2:17:53<1:17:17, 11.17it/s][A
 58%|██████████████████▍             | 70530/122310 [2:17:54<1:13:19, 11.77

step: 38240, loss: 77.79857062254763, epoch: 2



 58%|██████████████████▍             | 70563/122310 [2:17:57<1:26:22,  9.98it/s][A
 58%|██████████████████▍             | 70567/122310 [2:17:58<1:42:32,  8.41it/s][A
 58%|██████████████████▍             | 70573/122310 [2:17:59<1:49:21,  7.88it/s][A
 58%|██████████████████▍             | 70581/122310 [2:18:00<1:44:47,  8.23it/s][A
 58%|██████████████████▍             | 70588/122310 [2:18:01<1:46:27,  8.10it/s][A
 58%|██████████████████▍             | 70593/122310 [2:18:02<1:56:43,  7.38it/s][A
 58%|██████████████████▍             | 70600/122310 [2:18:03<1:54:09,  7.55it/s][A
 58%|██████████████████▍             | 70614/122310 [2:18:04<1:26:01, 10.02it/s][A
 58%|██████████████████▍             | 70623/122310 [2:18:05<1:25:46, 10.04it/s][A
 58%|██████████████████▍             | 70636/122310 [2:18:05<1:15:16, 11.44it/s][A
 58%|██████████████████▍             | 70651/122310 [2:18:06<1:05:59, 13.05it/s][A
 58%|██████████████████▍             | 70660/122310 [2:18:07<1:10:42, 12.17

step: 38260, loss: 68.67521931458622, epoch: 2



 58%|██████████████████▌             | 70746/122310 [2:18:15<1:02:12, 13.82it/s][A
 58%|██████████████████▌             | 70749/122310 [2:18:16<1:20:10, 10.72it/s][A
 58%|██████████████████▌             | 70767/122310 [2:18:17<1:03:21, 13.56it/s][A
 58%|██████████████████▌             | 70773/122310 [2:18:18<1:14:53, 11.47it/s][A
 58%|██████████████████▌             | 70786/122310 [2:18:19<1:09:09, 12.42it/s][A
 58%|██████████████████▌             | 70798/122310 [2:18:20<1:07:19, 12.75it/s][A
 58%|██████████████████▌             | 70811/122310 [2:18:21<1:04:25, 13.32it/s][A
 58%|██████████████████▌             | 70823/122310 [2:18:22<1:04:10, 13.37it/s][A
 58%|██████████████████▌             | 70837/122310 [2:18:22<1:00:44, 14.12it/s][A
 58%|██████████████████▌             | 70846/122310 [2:18:23<1:06:32, 12.89it/s][A
 58%|██████████████████▌             | 70856/122310 [2:18:24<1:09:43, 12.30it/s][A
 58%|██████████████████▌             | 70859/122310 [2:18:25<1:28:41,  9.67

step: 38280, loss: 82.16933702475042, epoch: 2



 58%|██████████████████▌             | 70946/122310 [2:18:33<1:14:38, 11.47it/s][A
 58%|██████████████████▌             | 70953/122310 [2:18:34<1:22:18, 10.40it/s][A
 58%|██████████████████▌             | 70961/122310 [2:18:35<1:25:28, 10.01it/s][A
 58%|██████████████████▌             | 70973/122310 [2:18:36<1:17:28, 11.04it/s][A
 58%|██████████████████▌             | 70978/122310 [2:18:37<1:30:31,  9.45it/s][A
 58%|██████████████████▌             | 70983/122310 [2:18:38<1:42:52,  8.32it/s][A
 58%|██████████████████▌             | 70991/122310 [2:18:38<1:40:37,  8.50it/s][A
 58%|██████████████████▌             | 70997/122310 [2:18:39<1:48:43,  7.87it/s][A
 58%|██████████████████▌             | 71005/122310 [2:18:42<2:47:19,  5.11it/s][A
 58%|██████████████████▌             | 71012/122310 [2:18:43<2:30:48,  5.67it/s][A
 58%|██████████████████▌             | 71016/122310 [2:18:44<2:38:07,  5.41it/s][A
 58%|██████████████████▌             | 71024/122310 [2:18:45<2:15:34,  6.31

step: 38300, loss: 79.12581417621396, epoch: 2



 58%|██████████████████▌             | 71110/122310 [2:18:51<1:07:57, 12.56it/s][A
 58%|██████████████████▌             | 71113/122310 [2:18:52<1:26:10,  9.90it/s][A
 58%|██████████████████▌             | 71126/122310 [2:18:53<1:15:27, 11.31it/s][A
 58%|██████████████████▌             | 71140/122310 [2:18:54<1:07:47, 12.58it/s][A
 58%|██████████████████▌             | 71144/122310 [2:18:55<1:24:01, 10.15it/s][A
 58%|██████████████████▌             | 71157/122310 [2:18:55<1:14:52, 11.39it/s][A
 58%|██████████████████▌             | 71174/122310 [2:18:56<1:02:19, 13.67it/s][A
 58%|██████████████████▌             | 71179/122310 [2:18:57<1:15:42, 11.26it/s][A
 58%|██████████████████▌             | 71188/122310 [2:18:58<1:18:31, 10.85it/s][A
 58%|██████████████████▋             | 71197/122310 [2:18:59<1:20:38, 10.56it/s][A
 58%|██████████████████▋             | 71201/122310 [2:19:00<1:37:07,  8.77it/s][A
 58%|██████████████████▋             | 71213/122310 [2:19:01<1:23:32, 10.19

step: 38320, loss: 96.22456434481629, epoch: 2



 58%|██████████████████▋             | 71291/122310 [2:19:09<1:30:42,  9.37it/s][A
 58%|██████████████████▋             | 71298/122310 [2:19:10<1:35:29,  8.90it/s][A
 58%|██████████████████▋             | 71302/122310 [2:19:11<1:52:07,  7.58it/s][A
 58%|██████████████████▋             | 71314/122310 [2:19:12<1:31:14,  9.32it/s][A
 58%|██████████████████▋             | 71323/122310 [2:19:12<1:29:28,  9.50it/s][A
 58%|██████████████████▋             | 71338/122310 [2:19:13<1:12:46, 11.67it/s][A
 58%|██████████████████▋             | 71351/122310 [2:19:14<1:07:49, 12.52it/s][A
 58%|██████████████████▋             | 71353/122310 [2:19:15<1:29:52,  9.45it/s][A
 58%|██████████████████▋             | 71360/122310 [2:19:16<1:34:36,  8.98it/s][A
 58%|██████████████████▋             | 71373/122310 [2:19:17<1:21:26, 10.42it/s][A
 58%|██████████████████▋             | 71379/122310 [2:19:18<1:31:06,  9.32it/s][A
 58%|██████████████████▋             | 71384/122310 [2:19:19<1:43:24,  8.21

step: 38340, loss: 67.36564273689885, epoch: 2



 58%|██████████████████▋             | 71454/122310 [2:19:27<1:46:27,  7.96it/s][A
 58%|██████████████████▋             | 71467/122310 [2:19:28<1:25:24,  9.92it/s][A
 58%|██████████████████▋             | 71471/122310 [2:19:29<1:42:28,  8.27it/s][A
 58%|██████████████████▋             | 71477/122310 [2:19:30<1:48:16,  7.83it/s][A
 58%|██████████████████▋             | 71486/122310 [2:19:30<1:39:59,  8.47it/s][A
 58%|██████████████████▋             | 71491/122310 [2:19:31<1:50:49,  7.64it/s][A
 58%|██████████████████▋             | 71494/122310 [2:19:32<2:13:07,  6.36it/s][A
 58%|██████████████████▋             | 71505/122310 [2:19:33<1:43:38,  8.17it/s][A
 58%|██████████████████▋             | 71515/122310 [2:19:34<1:33:17,  9.07it/s][A
 58%|██████████████████▋             | 71517/122310 [2:19:35<1:59:54,  7.06it/s][A
 58%|██████████████████▋             | 71524/122310 [2:19:36<1:55:36,  7.32it/s][A
 58%|██████████████████▋             | 71530/122310 [2:19:37<1:58:20,  7.15

step: 38360, loss: 71.31656715542907, epoch: 2



 59%|██████████████████▋             | 71626/122310 [2:19:45<1:23:06, 10.16it/s][A
 59%|██████████████████▋             | 71637/122310 [2:19:46<1:17:57, 10.83it/s][A
 59%|██████████████████▋             | 71645/122310 [2:19:47<1:22:01, 10.29it/s][A
 59%|██████████████████▋             | 71651/122310 [2:19:47<1:31:07,  9.27it/s][A
 59%|██████████████████▋             | 71661/122310 [2:19:48<1:25:38,  9.86it/s][A
 59%|██████████████████▊             | 71668/122310 [2:19:49<1:31:54,  9.18it/s][A
 59%|██████████████████▊             | 71669/122310 [2:19:50<2:03:31,  6.83it/s][A
 59%|██████████████████▊             | 71676/122310 [2:19:51<1:59:01,  7.09it/s][A
 59%|██████████████████▊             | 71681/122310 [2:19:52<2:07:35,  6.61it/s][A
 59%|██████████████████▊             | 71689/122310 [2:19:53<1:55:18,  7.32it/s][A
 59%|██████████████████▊             | 71698/122310 [2:19:54<1:44:10,  8.10it/s][A
 59%|██████████████████▊             | 71706/122310 [2:19:55<1:41:26,  8.31

step: 38380, loss: 74.40918652900298, epoch: 2



 59%|██████████████████▊             | 71780/122310 [2:20:03<1:35:27,  8.82it/s][A
 59%|██████████████████▊             | 71797/122310 [2:20:04<1:10:43, 11.90it/s][A
 59%|██████████████████▊             | 71804/122310 [2:20:05<1:19:07, 10.64it/s][A
 59%|██████████████████▊             | 71808/122310 [2:20:05<1:35:18,  8.83it/s][A
 59%|██████████████████▊             | 71812/122310 [2:20:06<1:51:28,  7.55it/s][A
 59%|██████████████████▊             | 71820/122310 [2:20:07<1:45:53,  7.95it/s][A
 59%|██████████████████▊             | 71824/122310 [2:20:08<2:02:05,  6.89it/s][A
 59%|██████████████████▊             | 71831/122310 [2:20:09<1:57:21,  7.17it/s][A
 59%|██████████████████▊             | 71835/122310 [2:20:10<2:13:10,  6.32it/s][A
 59%|██████████████████▊             | 71842/122310 [2:20:11<2:05:18,  6.71it/s][A
 59%|██████████████████▊             | 71847/122310 [2:20:12<2:11:43,  6.39it/s][A
 59%|██████████████████▊             | 71856/122310 [2:20:13<1:51:56,  7.51

step: 38400, loss: 89.88661756646168, epoch: 2
sim1 and sim2 are 0.43476700197827983, 0.1981994210012501
cosine of pred and queen: 0.16022818630309563
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: spans
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: europe
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: thaws
Actual: man:woman::brothers:sisters, pred: multiplying
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: mumbai
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: state
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 59%|██████████████████▏            | 71928/122310 [2:21:50<37:36:50,  2.69s/it][A

Actual: india:rupee::denmark:krone, pred: netherlands
Accuracy is 0.15976331360946747



 59%|██████████████████▏            | 71938/122310 [2:21:51<27:01:18,  1.93s/it][A
 59%|██████████████████▏            | 71945/122310 [2:21:52<21:21:19,  1.53s/it][A
 59%|██████████████████▏            | 71958/122310 [2:21:53<13:43:30,  1.02it/s][A
 59%|██████████████████▏            | 71966/122310 [2:21:54<10:43:19,  1.30it/s][A
 59%|██████████████████▊             | 71974/122310 [2:21:55<8:20:29,  1.68it/s][A
 59%|██████████████████▊             | 71981/122310 [2:21:56<6:55:52,  2.02it/s][A
 59%|██████████████████▊             | 71990/122310 [2:21:57<5:22:21,  2.60it/s][A
 59%|██████████████████▊             | 71995/122310 [2:21:59<4:55:14,  2.84it/s][A
 59%|██████████████████▊             | 72000/122310 [2:22:00<4:30:42,  3.10it/s][A
 59%|██████████████████▊             | 72009/122310 [2:22:01<3:30:37,  3.98it/s][A
 59%|██████████████████▊             | 72017/122310 [2:22:02<2:58:50,  4.69it/s][A
 59%|██████████████████▊             | 72020/122310 [2:22:03<3:12:01,  4.36

step: 38420, loss: 67.79864243828975, epoch: 2



 59%|██████████████████▊             | 72091/122310 [2:22:11<1:40:39,  8.31it/s][A
 59%|██████████████████▊             | 72102/122310 [2:22:12<1:28:23,  9.47it/s][A
 59%|██████████████████▊             | 72113/122310 [2:22:13<1:21:11, 10.30it/s][A
 59%|██████████████████▊             | 72120/122310 [2:22:14<1:27:14,  9.59it/s][A
 59%|██████████████████▊             | 72124/122310 [2:22:14<1:43:38,  8.07it/s][A
 59%|██████████████████▊             | 72128/122310 [2:22:15<1:59:34,  6.99it/s][A
 59%|██████████████████▊             | 72139/122310 [2:22:16<1:37:08,  8.61it/s][A
 59%|██████████████████▉             | 72148/122310 [2:22:17<1:32:06,  9.08it/s][A
 59%|██████████████████▉             | 72156/122310 [2:22:18<1:32:26,  9.04it/s][A
 59%|██████████████████▉             | 72174/122310 [2:22:19<1:07:13, 12.43it/s][A
 59%|██████████████████▉             | 72186/122310 [2:22:20<1:05:23, 12.78it/s][A
 59%|██████████████████▉             | 72198/122310 [2:22:21<1:04:09, 13.02

step: 38440, loss: 70.04261950380231, epoch: 2



 59%|██████████████████▉             | 72280/122310 [2:22:29<1:14:42, 11.16it/s][A
 59%|██████████████████▉             | 72286/122310 [2:22:30<1:24:39,  9.85it/s][A
 59%|██████████████████▉             | 72298/122310 [2:22:30<1:15:52, 10.99it/s][A
 59%|██████████████████▉             | 72306/122310 [2:22:31<1:20:02, 10.41it/s][A
 59%|██████████████████▉             | 72312/122310 [2:22:32<1:29:24,  9.32it/s][A
 59%|██████████████████▉             | 72320/122310 [2:22:33<1:30:09,  9.24it/s][A
 59%|██████████████████▉             | 72330/122310 [2:22:34<1:24:22,  9.87it/s][A
 59%|██████████████████▉             | 72340/122310 [2:22:35<1:20:46, 10.31it/s][A
 59%|██████████████████▉             | 72358/122310 [2:22:36<1:02:29, 13.32it/s][A
 59%|██████████████████▉             | 72369/122310 [2:22:37<1:03:45, 13.05it/s][A
 59%|██████████████████▉             | 72378/122310 [2:22:38<1:08:16, 12.19it/s][A
 59%|██████████████████▉             | 72386/122310 [2:22:38<1:14:04, 11.23

step: 38460, loss: 85.68851254603742, epoch: 2



 59%|██████████████████▉             | 72474/122310 [2:22:46<1:13:57, 11.23it/s][A
 59%|██████████████████▉             | 72481/122310 [2:22:47<1:21:10, 10.23it/s][A
 59%|██████████████████▉             | 72488/122310 [2:22:48<1:27:10,  9.53it/s][A
 59%|██████████████████▉             | 72498/122310 [2:22:49<1:22:41, 10.04it/s][A
 59%|██████████████████▉             | 72504/122310 [2:22:50<1:32:00,  9.02it/s][A
 59%|██████████████████▉             | 72512/122310 [2:22:51<1:32:00,  9.02it/s][A
 59%|██████████████████▉             | 72520/122310 [2:22:52<1:31:47,  9.04it/s][A
 59%|██████████████████▉             | 72529/122310 [2:22:53<1:28:56,  9.33it/s][A
 59%|██████████████████▉             | 72541/122310 [2:22:54<1:18:36, 10.55it/s][A
 59%|██████████████████▉             | 72554/122310 [2:22:55<1:10:55, 11.69it/s][A
 59%|██████████████████▉             | 72559/122310 [2:22:55<1:24:13,  9.85it/s][A
 59%|██████████████████▉             | 72564/122310 [2:22:56<1:36:42,  8.57

step: 38480, loss: 64.84154680458923, epoch: 2



 59%|███████████████████             | 72640/122310 [2:23:04<1:04:38, 12.81it/s][A
 59%|███████████████████             | 72650/122310 [2:23:05<1:06:34, 12.43it/s][A
 59%|███████████████████             | 72658/122310 [2:23:06<1:11:52, 11.51it/s][A
 59%|███████████████████             | 72671/122310 [2:23:07<1:05:56, 12.55it/s][A
 59%|███████████████████             | 72677/122310 [2:23:08<1:16:18, 10.84it/s][A
 59%|███████████████████             | 72690/122310 [2:23:09<1:09:14, 11.94it/s][A
 59%|███████████████████             | 72692/122310 [2:23:10<1:58:06,  7.00it/s][A
 59%|███████████████████             | 72697/122310 [2:23:11<2:06:04,  6.56it/s][A
 59%|███████████████████             | 72708/122310 [2:23:12<1:42:22,  8.08it/s][A
 59%|███████████████████             | 72714/122310 [2:23:13<1:46:31,  7.76it/s][A
 59%|███████████████████             | 72720/122310 [2:23:14<1:50:26,  7.48it/s][A
 59%|███████████████████             | 72724/122310 [2:23:15<2:04:53,  6.62

step: 38500, loss: 78.42933736854334, epoch: 2



 60%|███████████████████             | 72784/122310 [2:23:22<1:57:27,  7.03it/s][A
 60%|███████████████████             | 72785/122310 [2:23:24<3:31:02,  3.91it/s][A
 60%|███████████████████             | 72791/122310 [2:23:25<3:04:37,  4.47it/s][A
 60%|███████████████████             | 72804/122310 [2:23:26<2:00:57,  6.82it/s][A
 60%|███████████████████             | 72814/122310 [2:23:27<1:46:09,  7.77it/s][A
 60%|███████████████████             | 72829/122310 [2:23:29<1:45:14,  7.84it/s][A
 60%|███████████████████             | 72835/122310 [2:23:30<1:50:21,  7.47it/s][A
 60%|███████████████████             | 72838/122310 [2:23:31<2:09:05,  6.39it/s][A
 60%|███████████████████             | 72848/122310 [2:23:32<1:50:27,  7.46it/s][A
 60%|███████████████████             | 72853/122310 [2:23:33<2:00:15,  6.85it/s][A
 60%|███████████████████             | 72861/122310 [2:23:34<1:52:42,  7.31it/s][A
 60%|███████████████████             | 72877/122310 [2:23:35<1:22:36,  9.97

step: 38520, loss: 82.61770935680649, epoch: 2



 60%|███████████████████             | 72951/122310 [2:23:41<1:09:17, 11.87it/s][A
 60%|███████████████████             | 72963/122310 [2:23:42<1:07:56, 12.11it/s][A
 60%|███████████████████             | 72973/122310 [2:23:43<1:10:56, 11.59it/s][A
 60%|███████████████████             | 72978/122310 [2:23:44<1:24:19,  9.75it/s][A
 60%|███████████████████             | 72985/122310 [2:23:45<1:30:39,  9.07it/s][A
 60%|███████████████████             | 72999/122310 [2:23:46<1:16:05, 10.80it/s][A
 60%|███████████████████             | 73003/122310 [2:23:47<1:32:58,  8.84it/s][A
 60%|███████████████████             | 73007/122310 [2:23:48<1:50:57,  7.41it/s][A
 60%|███████████████████             | 73008/122310 [2:23:49<2:30:03,  5.48it/s][A
 60%|███████████████████             | 73017/122310 [2:23:50<2:02:58,  6.68it/s][A
 60%|███████████████████             | 73026/122310 [2:23:51<1:49:06,  7.53it/s][A
 60%|███████████████████             | 73032/122310 [2:23:52<1:54:39,  7.16

step: 38540, loss: 87.16635051876092, epoch: 2



 60%|███████████████████▏            | 73114/122310 [2:24:01<1:40:13,  8.18it/s][A
 60%|███████████████████▏            | 73121/122310 [2:24:02<1:44:43,  7.83it/s][A
 60%|███████████████████▏            | 73126/122310 [2:24:03<1:55:41,  7.09it/s][A
 60%|███████████████████▏            | 73139/122310 [2:24:04<1:31:20,  8.97it/s][A
 60%|███████████████████▏            | 73145/122310 [2:24:05<1:41:27,  8.08it/s][A
 60%|███████████████████▏            | 73155/122310 [2:24:07<2:23:01,  5.73it/s][A
 60%|███████████████████▏            | 73162/122310 [2:24:08<2:17:13,  5.97it/s][A
 60%|███████████████████▏            | 73178/122310 [2:24:09<1:39:48,  8.20it/s][A
 60%|███████████████████▏            | 73186/122310 [2:24:10<1:41:51,  8.04it/s][A
 60%|███████████████████▏            | 73197/122310 [2:24:11<1:34:00,  8.71it/s][A
 60%|███████████████████▏            | 73205/122310 [2:24:12<1:36:09,  8.51it/s][A
 60%|███████████████████▏            | 73213/122310 [2:24:13<1:35:13,  8.59

step: 38560, loss: 71.62872716501955, epoch: 2



 60%|███████████████████▏            | 73294/122310 [2:24:22<1:59:57,  6.81it/s][A
 60%|███████████████████▏            | 73296/122310 [2:24:24<3:25:40,  3.97it/s][A
 60%|███████████████████▏            | 73304/122310 [2:24:26<2:59:34,  4.55it/s][A
 60%|███████████████████▏            | 73314/122310 [2:24:27<2:30:17,  5.43it/s][A
 60%|███████████████████▏            | 73326/122310 [2:24:28<2:07:09,  6.42it/s][A
 60%|███████████████████▏            | 73336/122310 [2:24:29<1:58:02,  6.92it/s][A
 60%|███████████████████▏            | 73344/122310 [2:24:31<1:59:21,  6.84it/s][A
 60%|███████████████████▏            | 73358/122310 [2:24:32<1:40:41,  8.10it/s][A
 60%|███████████████████▏            | 73367/122310 [2:24:33<1:42:08,  7.99it/s][A
 60%|███████████████████▏            | 73371/122310 [2:24:34<2:01:17,  6.72it/s][A
 60%|███████████████████▏            | 73384/122310 [2:24:35<1:38:21,  8.29it/s][A
 60%|███████████████████▏            | 73396/122310 [2:24:36<1:26:25,  9.43

step: 38580, loss: 74.27291013888757, epoch: 2



 60%|███████████████████▏            | 73464/122310 [2:24:44<1:21:20, 10.01it/s][A
 60%|███████████████████▏            | 73475/122310 [2:24:45<1:17:49, 10.46it/s][A
 60%|███████████████████▏            | 73481/122310 [2:24:46<1:28:50,  9.16it/s][A
 60%|███████████████████▏            | 73486/122310 [2:24:47<1:42:28,  7.94it/s][A
 60%|███████████████████▏            | 73493/122310 [2:24:48<1:43:55,  7.83it/s][A
 60%|███████████████████▏            | 73511/122310 [2:24:49<1:14:20, 10.94it/s][A
 60%|███████████████████▏            | 73518/122310 [2:24:50<1:22:23,  9.87it/s][A
 60%|███████████████████▏            | 73524/122310 [2:24:51<1:30:35,  8.98it/s][A
 60%|███████████████████▏            | 73531/122310 [2:24:52<1:37:15,  8.36it/s][A
 60%|███████████████████▏            | 73537/122310 [2:24:53<1:45:21,  7.72it/s][A
 60%|███████████████████▏            | 73551/122310 [2:24:54<1:23:10,  9.77it/s][A
 60%|███████████████████▏            | 73558/122310 [2:24:54<1:28:38,  9.17

step: 38600, loss: 86.72419372943746, epoch: 2



 60%|███████████████████▎            | 73630/122310 [2:25:03<1:57:20,  6.91it/s][A
 60%|███████████████████▎            | 73645/122310 [2:25:04<1:25:35,  9.48it/s][A
 60%|███████████████████▎            | 73649/122310 [2:25:05<1:40:59,  8.03it/s][A
 60%|███████████████████▎            | 73658/122310 [2:25:06<1:37:44,  8.30it/s][A
 60%|███████████████████▎            | 73665/122310 [2:25:07<1:38:37,  8.22it/s][A
 60%|███████████████████▎            | 73685/122310 [2:25:08<1:07:39, 11.98it/s][A
 60%|███████████████████▎            | 73697/122310 [2:25:09<1:05:09, 12.44it/s][A
 60%|████████████████████▍             | 73718/122310 [2:25:10<52:27, 15.44it/s][A
 60%|████████████████████▍             | 73726/122310 [2:25:11<59:49, 13.54it/s][A
 60%|███████████████████▎            | 73733/122310 [2:25:12<1:09:35, 11.63it/s][A
 60%|███████████████████▎            | 73744/122310 [2:25:13<1:10:13, 11.53it/s][A
 60%|███████████████████▎            | 73755/122310 [2:25:15<1:48:31,  7.46

step: 38620, loss: 73.3885827043224, epoch: 2



 60%|███████████████████▎            | 73816/122310 [2:25:22<1:41:17,  7.98it/s][A
 60%|███████████████████▎            | 73829/122310 [2:25:23<1:21:22,  9.93it/s][A
 60%|███████████████████▎            | 73837/122310 [2:25:23<1:25:01,  9.50it/s][A
 60%|███████████████████▎            | 73844/122310 [2:25:24<1:29:32,  9.02it/s][A
 60%|███████████████████▎            | 73854/122310 [2:25:25<1:25:19,  9.46it/s][A
 60%|███████████████████▎            | 73863/122310 [2:25:26<1:23:48,  9.63it/s][A
 60%|███████████████████▎            | 73867/122310 [2:25:27<1:41:37,  7.94it/s][A
 60%|███████████████████▎            | 73868/122310 [2:25:28<2:13:55,  6.03it/s][A
 60%|███████████████████▎            | 73881/122310 [2:25:29<1:36:38,  8.35it/s][A
 60%|███████████████████▎            | 73894/122310 [2:25:30<1:18:59, 10.22it/s][A
 60%|███████████████████▎            | 73898/122310 [2:25:31<1:36:43,  8.34it/s][A
 60%|███████████████████▎            | 73902/122310 [2:25:32<1:51:27,  7.24

step: 38640, loss: 73.58344229818107, epoch: 2



 60%|███████████████████▎            | 73969/122310 [2:25:40<1:24:08,  9.57it/s][A
 60%|███████████████████▎            | 73972/122310 [2:25:41<1:45:27,  7.64it/s][A
 60%|███████████████████▎            | 73977/122310 [2:25:42<1:53:53,  7.07it/s][A
 60%|███████████████████▎            | 73985/122310 [2:25:43<1:47:06,  7.52it/s][A
 61%|███████████████████▎            | 74001/122310 [2:25:44<1:18:06, 10.31it/s][A
 61%|███████████████████▎            | 74011/122310 [2:25:45<1:17:38, 10.37it/s][A
 61%|███████████████████▎            | 74022/122310 [2:25:46<1:13:44, 10.91it/s][A
 61%|███████████████████▎            | 74032/122310 [2:25:47<1:13:56, 10.88it/s][A
 61%|███████████████████▎            | 74041/122310 [2:25:47<1:15:58, 10.59it/s][A
 61%|███████████████████▍            | 74055/122310 [2:25:48<1:07:32, 11.91it/s][A
 61%|███████████████████▍            | 74067/122310 [2:25:49<1:05:37, 12.25it/s][A
 61%|███████████████████▍            | 74075/122310 [2:25:50<1:12:09, 11.14

step: 38660, loss: 87.42361220979733, epoch: 2



 61%|███████████████████▍            | 74163/122310 [2:25:59<1:10:57, 11.31it/s][A
 61%|███████████████████▍            | 74166/122310 [2:25:59<1:28:32,  9.06it/s][A
 61%|███████████████████▍            | 74173/122310 [2:26:00<1:32:52,  8.64it/s][A
 61%|███████████████████▍            | 74183/122310 [2:26:01<1:25:18,  9.40it/s][A
 61%|███████████████████▍            | 74191/122310 [2:26:02<1:29:17,  8.98it/s][A
 61%|███████████████████▍            | 74195/122310 [2:26:03<1:44:01,  7.71it/s][A
 61%|███████████████████▍            | 74202/122310 [2:26:05<2:15:40,  5.91it/s][A
 61%|███████████████████▍            | 74211/122310 [2:26:06<1:56:51,  6.86it/s][A
 61%|███████████████████▍            | 74218/122310 [2:26:07<1:52:53,  7.10it/s][A
 61%|███████████████████▍            | 74231/122310 [2:26:08<1:29:13,  8.98it/s][A
 61%|███████████████████▍            | 74243/122310 [2:26:09<1:19:29, 10.08it/s][A
 61%|███████████████████▍            | 74248/122310 [2:26:09<1:30:40,  8.83

step: 38680, loss: 77.36759065318925, epoch: 2



 61%|███████████████████▍            | 74289/122310 [2:26:17<1:38:16,  8.14it/s][A
 61%|███████████████████▍            | 74301/122310 [2:26:18<1:24:57,  9.42it/s][A
 61%|███████████████████▍            | 74306/122310 [2:26:19<1:36:06,  8.32it/s][A
 61%|███████████████████▍            | 74310/122310 [2:26:20<1:52:59,  7.08it/s][A
 61%|███████████████████▍            | 74316/122310 [2:26:21<1:58:10,  6.77it/s][A
 61%|███████████████████▍            | 74326/122310 [2:26:22<1:41:57,  7.84it/s][A
 61%|███████████████████▍            | 74336/122310 [2:26:22<1:31:58,  8.69it/s][A
 61%|███████████████████▍            | 74342/122310 [2:26:23<1:40:17,  7.97it/s][A
 61%|███████████████████▍            | 74348/122310 [2:26:24<1:45:50,  7.55it/s][A
 61%|███████████████████▍            | 74359/122310 [2:26:25<1:31:25,  8.74it/s][A
 61%|███████████████████▍            | 74367/122310 [2:26:26<1:30:42,  8.81it/s][A
 61%|███████████████████▍            | 74377/122310 [2:26:27<1:26:23,  9.25

step: 38700, loss: 82.29041560261281, epoch: 2



 61%|███████████████████▍            | 74439/122310 [2:26:36<1:55:29,  6.91it/s][A
 61%|███████████████████▍            | 74447/122310 [2:26:37<1:48:12,  7.37it/s][A
 61%|███████████████████▍            | 74462/122310 [2:26:37<1:18:58, 10.10it/s][A
 61%|███████████████████▍            | 74467/122310 [2:26:38<1:32:26,  8.63it/s][A
 61%|███████████████████▍            | 74483/122310 [2:26:39<1:11:06, 11.21it/s][A
 61%|███████████████████▍            | 74487/122310 [2:26:40<1:25:55,  9.28it/s][A
 61%|███████████████████▍            | 74497/122310 [2:26:41<1:23:17,  9.57it/s][A
 61%|███████████████████▍            | 74506/122310 [2:26:42<1:23:06,  9.59it/s][A
 61%|███████████████████▍            | 74515/122310 [2:26:43<1:21:32,  9.77it/s][A
 61%|███████████████████▍            | 74525/122310 [2:26:44<1:17:19, 10.30it/s][A
 61%|███████████████████▍            | 74531/122310 [2:26:45<1:28:42,  8.98it/s][A
 61%|███████████████████▌            | 74541/122310 [2:26:46<1:23:05,  9.58

step: 38720, loss: 96.73706178142481, epoch: 2



 61%|███████████████████▌            | 74634/122310 [2:26:54<1:05:15, 12.17it/s][A
 61%|███████████████████▌            | 74647/122310 [2:26:55<1:01:55, 12.83it/s][A
 61%|███████████████████▌            | 74651/122310 [2:26:56<1:18:13, 10.15it/s][A
 61%|███████████████████▌            | 74669/122310 [2:26:57<1:00:49, 13.06it/s][A
 61%|███████████████████▌            | 74676/122310 [2:26:58<1:10:36, 11.24it/s][A
 61%|███████████████████▌            | 74684/122310 [2:26:59<1:15:04, 10.57it/s][A
 61%|███████████████████▌            | 74686/122310 [2:27:00<1:39:48,  7.95it/s][A
 61%|███████████████████▌            | 74688/122310 [2:27:01<2:06:33,  6.27it/s][A
 61%|███████████████████▌            | 74693/122310 [2:27:02<2:13:03,  5.96it/s][A
 61%|███████████████████▌            | 74698/122310 [2:27:03<2:16:37,  5.81it/s][A
 61%|███████████████████▌            | 74705/122310 [2:27:03<2:04:13,  6.39it/s][A
 61%|███████████████████▌            | 74716/122310 [2:27:04<1:36:47,  8.20

step: 38740, loss: 80.58558720514695, epoch: 2



 61%|███████████████████▌            | 74820/122310 [2:27:13<1:15:23, 10.50it/s][A
 61%|███████████████████▌            | 74826/122310 [2:27:14<1:24:10,  9.40it/s][A
 61%|███████████████████▌            | 74833/122310 [2:27:14<1:27:59,  8.99it/s][A
 61%|███████████████████▌            | 74843/122310 [2:27:15<1:21:29,  9.71it/s][A
 61%|████████████████████▊             | 74864/122310 [2:27:16<57:00, 13.87it/s][A
 61%|███████████████████▌            | 74872/122310 [2:27:17<1:03:32, 12.44it/s][A
 61%|███████████████████▌            | 74877/122310 [2:27:18<1:15:45, 10.44it/s][A
 61%|███████████████████▌            | 74881/122310 [2:27:19<1:34:26,  8.37it/s][A
 61%|███████████████████▌            | 74891/122310 [2:27:20<1:25:11,  9.28it/s][A
 61%|███████████████████▌            | 74898/122310 [2:27:21<1:29:19,  8.85it/s][A
 61%|███████████████████▌            | 74904/122310 [2:27:22<1:35:48,  8.25it/s][A
 61%|███████████████████▌            | 74914/122310 [2:27:22<1:25:56,  9.19

step: 38760, loss: 78.69119539817795, epoch: 2



 61%|███████████████████▌            | 75004/122310 [2:27:30<1:24:33,  9.32it/s][A
 61%|███████████████████▋            | 75019/122310 [2:27:32<1:11:46, 10.98it/s][A
 61%|███████████████████▋            | 75028/122310 [2:27:33<1:15:55, 10.38it/s][A
 61%|███████████████████▋            | 75041/122310 [2:27:33<1:07:44, 11.63it/s][A
 61%|███████████████████▋            | 75052/122310 [2:27:34<1:06:34, 11.83it/s][A
 61%|███████████████████▋            | 75060/122310 [2:27:35<1:15:27, 10.44it/s][A
 61%|███████████████████▋            | 75065/122310 [2:27:36<1:30:47,  8.67it/s][A
 61%|███████████████████▋            | 75077/122310 [2:27:37<1:21:10,  9.70it/s][A
 61%|███████████████████▋            | 75086/122310 [2:27:38<1:21:42,  9.63it/s][A
 61%|███████████████████▋            | 75090/122310 [2:27:39<1:38:30,  7.99it/s][A
 61%|███████████████████▋            | 75098/122310 [2:27:40<1:37:01,  8.11it/s][A
 61%|███████████████████▋            | 75105/122310 [2:27:41<1:40:18,  7.84

step: 38780, loss: 81.07920151063256, epoch: 2



 61%|███████████████████▋            | 75165/122310 [2:27:50<2:31:34,  5.18it/s][A
 61%|███████████████████▋            | 75176/122310 [2:27:52<2:03:10,  6.38it/s][A
 61%|███████████████████▋            | 75193/122310 [2:27:53<1:31:34,  8.58it/s][A
 61%|███████████████████▋            | 75199/122310 [2:27:54<1:42:08,  7.69it/s][A
 61%|███████████████████▋            | 75200/122310 [2:27:56<2:44:44,  4.77it/s][A
 61%|███████████████████▋            | 75206/122310 [2:27:57<2:33:48,  5.10it/s][A
 61%|███████████████████▋            | 75216/122310 [2:27:58<2:02:41,  6.40it/s][A
 62%|███████████████████▋            | 75221/122310 [2:27:59<2:08:31,  6.11it/s][A
 62%|███████████████████▋            | 75232/122310 [2:28:00<1:43:38,  7.57it/s][A
 62%|███████████████████▋            | 75237/122310 [2:28:01<1:51:50,  7.01it/s][A
 62%|███████████████████▋            | 75245/122310 [2:28:02<1:44:52,  7.48it/s][A
 62%|███████████████████▋            | 75254/122310 [2:28:03<1:38:25,  7.97

step: 38800, loss: 79.34871486934357, epoch: 2
sim1 and sim2 are 0.48675493063266007, 0.21833744822746703
cosine of pred and queen: 0.2020124055032845
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: scooper
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: europe
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: europe
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: 

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: son
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: neselius
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: bihar
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 62%|███████████████████            | 75318/122310 [2:30:07<43:39:19,  3.34s/it][A

Actual: india:rupee::denmark:krone, pred: currency
Accuracy is 0.15976331360946747



 62%|███████████████████            | 75325/122310 [2:30:08<34:35:41,  2.65s/it][A
 62%|███████████████████            | 75342/122310 [2:30:09<20:05:09,  1.54s/it][A
 62%|███████████████████            | 75357/122310 [2:30:10<13:20:13,  1.02s/it][A
 62%|███████████████████            | 75367/122310 [2:30:11<10:19:24,  1.26it/s][A
 62%|███████████████████▋            | 75375/122310 [2:30:12<8:23:01,  1.56it/s][A
 62%|███████████████████▋            | 75381/122310 [2:30:13<7:11:34,  1.81it/s][A
 62%|███████████████████▋            | 75383/122310 [2:30:14<7:07:11,  1.83it/s][A
 62%|███████████████████▋            | 75397/122310 [2:30:15<4:15:54,  3.06it/s][A
 62%|███████████████████▋            | 75402/122310 [2:30:16<3:56:49,  3.30it/s][A
 62%|███████████████████▋            | 75413/122310 [2:30:17<2:54:15,  4.49it/s][A
 62%|███████████████████▋            | 75425/122310 [2:30:18<2:12:21,  5.90it/s][A
 62%|███████████████████▋            | 75429/122310 [2:30:19<2:20:15,  5.57

step: 38820, loss: 100.13398144494411, epoch: 2



 62%|███████████████████▋            | 75478/122310 [2:30:29<2:50:38,  4.57it/s][A
 62%|███████████████████▋            | 75483/122310 [2:30:30<2:56:04,  4.43it/s][A
 62%|███████████████████▊            | 75489/122310 [2:30:31<2:51:25,  4.55it/s][A
 62%|███████████████████▊            | 75494/122310 [2:30:32<2:54:26,  4.47it/s][A
 62%|███████████████████▊            | 75497/122310 [2:30:33<3:17:46,  3.94it/s][A
 62%|███████████████████▊            | 75505/122310 [2:30:35<2:42:22,  4.80it/s][A
 62%|███████████████████▊            | 75512/122310 [2:30:36<2:31:17,  5.16it/s][A
 62%|███████████████████▊            | 75525/122310 [2:30:37<1:53:20,  6.88it/s][A
 62%|███████████████████▊            | 75534/122310 [2:30:38<1:51:47,  6.97it/s][A
 62%|███████████████████▊            | 75537/122310 [2:30:39<2:18:49,  5.62it/s][A
 62%|███████████████████▊            | 75546/122310 [2:30:41<2:08:26,  6.07it/s][A
 62%|███████████████████▊            | 75552/122310 [2:30:42<2:15:16,  5.76

step: 38840, loss: 82.02771698146411, epoch: 2



 62%|███████████████████▊            | 75620/122310 [2:30:52<2:23:18,  5.43it/s][A
 62%|███████████████████▊            | 75628/122310 [2:30:54<2:14:18,  5.79it/s][A
 62%|███████████████████▊            | 75634/122310 [2:30:55<2:19:20,  5.58it/s][A
 62%|███████████████████▊            | 75643/122310 [2:30:56<2:06:55,  6.13it/s][A
 62%|███████████████████▊            | 75652/122310 [2:30:57<1:58:21,  6.57it/s][A
 62%|███████████████████▊            | 75661/122310 [2:30:58<1:52:21,  6.92it/s][A
 62%|███████████████████▊            | 75669/122310 [2:31:00<1:52:45,  6.89it/s][A
 62%|███████████████████▊            | 75687/122310 [2:31:01<1:22:09,  9.46it/s][A
 62%|███████████████████▊            | 75694/122310 [2:31:02<1:33:03,  8.35it/s][A
 62%|███████████████████▊            | 75703/122310 [2:31:03<1:36:15,  8.07it/s][A
 62%|███████████████████▊            | 75704/122310 [2:31:04<2:11:27,  5.91it/s][A
 62%|███████████████████▊            | 75711/122310 [2:31:06<2:12:01,  5.88

step: 38860, loss: 78.1207913941153, epoch: 2



 62%|███████████████████▊            | 75792/122310 [2:31:16<1:24:49,  9.14it/s][A
 62%|███████████████████▊            | 75801/122310 [2:31:17<1:30:41,  8.55it/s][A
 62%|███████████████████▊            | 75806/122310 [2:31:18<1:47:47,  7.19it/s][A
 62%|███████████████████▊            | 75814/122310 [2:31:20<1:51:12,  6.97it/s][A
 62%|███████████████████▊            | 75832/122310 [2:31:21<1:23:10,  9.31it/s][A
 62%|███████████████████▊            | 75839/122310 [2:31:22<1:33:54,  8.25it/s][A
 62%|███████████████████▊            | 75845/122310 [2:31:23<1:46:41,  7.26it/s][A
 62%|███████████████████▊            | 75855/122310 [2:31:24<1:43:06,  7.51it/s][A
 62%|███████████████████▊            | 75863/122310 [2:31:26<1:47:14,  7.22it/s][A
 62%|███████████████████▊            | 75879/122310 [2:31:27<1:26:14,  8.97it/s][A
 62%|███████████████████▊            | 75885/122310 [2:31:28<1:40:17,  7.71it/s][A
 62%|███████████████████▊            | 75887/122310 [2:31:29<2:09:44,  5.96

step: 38880, loss: 87.46621874002005, epoch: 2



 62%|███████████████████▊            | 75960/122310 [2:31:40<2:29:37,  5.16it/s][A
 62%|███████████████████▉            | 75970/122310 [2:31:42<2:06:10,  6.12it/s][A
 62%|███████████████████▉            | 75979/122310 [2:31:43<1:57:50,  6.55it/s][A
 62%|███████████████████▉            | 75986/122310 [2:31:44<2:00:35,  6.40it/s][A
 62%|███████████████████▉            | 75992/122310 [2:31:45<2:09:01,  5.98it/s][A
 62%|███████████████████▉            | 75999/122310 [2:31:46<2:09:21,  5.97it/s][A
 62%|███████████████████▉            | 76002/122310 [2:31:48<2:37:39,  4.90it/s][A
 62%|███████████████████▉            | 76011/122310 [2:31:49<2:17:26,  5.61it/s][A
 62%|███████████████████▉            | 76017/122310 [2:31:50<2:21:54,  5.44it/s][A
 62%|███████████████████▉            | 76023/122310 [2:31:51<2:25:20,  5.31it/s][A
 62%|███████████████████▉            | 76026/122310 [2:31:52<2:53:22,  4.45it/s][A
 62%|███████████████████▉            | 76035/122310 [2:31:54<2:23:29,  5.37

step: 38900, loss: 69.5173703027885, epoch: 2



 62%|███████████████████▉            | 76104/122310 [2:32:04<2:09:42,  5.94it/s][A
 62%|███████████████████▉            | 76123/122310 [2:32:05<1:26:12,  8.93it/s][A
 62%|███████████████████▉            | 76130/122310 [2:32:07<1:36:19,  7.99it/s][A
 62%|███████████████████▉            | 76135/122310 [2:32:08<1:52:07,  6.86it/s][A
 62%|███████████████████▉            | 76140/122310 [2:32:09<2:09:42,  5.93it/s][A
 62%|███████████████████▉            | 76152/122310 [2:32:10<1:49:45,  7.01it/s][A
 62%|███████████████████▉            | 76160/122310 [2:32:12<1:52:42,  6.82it/s][A
 62%|███████████████████▉            | 76178/122310 [2:32:13<1:22:57,  9.27it/s][A
 62%|███████████████████▉            | 76185/122310 [2:32:14<1:33:09,  8.25it/s][A
 62%|███████████████████▉            | 76192/122310 [2:32:15<1:41:32,  7.57it/s][A
 62%|███████████████████▉            | 76203/122310 [2:32:16<1:34:44,  8.11it/s][A
 62%|███████████████████▉            | 76206/122310 [2:32:18<1:59:19,  6.44

step: 38920, loss: 83.98343258143487, epoch: 2



 62%|███████████████████▉            | 76291/122310 [2:32:28<1:24:30,  9.08it/s][A
 62%|███████████████████▉            | 76300/122310 [2:32:29<1:28:17,  8.69it/s][A
 62%|███████████████████▉            | 76309/122310 [2:32:31<1:31:08,  8.41it/s][A
 62%|███████████████████▉            | 76317/122310 [2:32:32<1:36:23,  7.95it/s][A
 62%|███████████████████▉            | 76328/122310 [2:32:33<1:32:08,  8.32it/s][A
 62%|███████████████████▉            | 76336/122310 [2:32:34<1:38:21,  7.79it/s][A
 62%|███████████████████▉            | 76341/122310 [2:32:35<1:52:57,  6.78it/s][A
 62%|███████████████████▉            | 76347/122310 [2:32:37<2:02:25,  6.26it/s][A
 62%|███████████████████▉            | 76353/122310 [2:32:38<2:09:36,  5.91it/s][A
 62%|███████████████████▉            | 76364/122310 [2:32:39<1:50:59,  6.90it/s][A
 62%|███████████████████▉            | 76371/122310 [2:32:40<1:55:53,  6.61it/s][A
 62%|███████████████████▉            | 76383/122310 [2:32:41<1:40:15,  7.63

step: 38940, loss: 71.51220083205811, epoch: 2



 63%|████████████████████            | 76445/122310 [2:32:52<2:04:46,  6.13it/s][A
 63%|████████████████████            | 76455/122310 [2:32:54<1:51:30,  6.85it/s][A
 63%|████████████████████            | 76459/122310 [2:32:55<2:07:16,  6.00it/s][A
 63%|████████████████████            | 76466/122310 [2:32:56<2:06:22,  6.05it/s][A
 63%|████████████████████            | 76475/122310 [2:32:57<1:57:53,  6.48it/s][A
 63%|████████████████████            | 76478/122310 [2:32:58<2:22:54,  5.35it/s][A
 63%|████████████████████            | 76485/122310 [2:32:59<2:14:39,  5.67it/s][A
 63%|████████████████████            | 76497/122310 [2:33:00<1:44:13,  7.33it/s][A
 63%|████████████████████            | 76503/122310 [2:33:01<1:52:45,  6.77it/s][A
 63%|████████████████████            | 76515/122310 [2:33:02<1:35:20,  8.01it/s][A
 63%|████████████████████            | 76525/122310 [2:33:04<1:30:21,  8.45it/s][A
 63%|████████████████████            | 76533/122310 [2:33:04<1:31:27,  8.34

step: 38960, loss: 69.80493292863638, epoch: 2



 63%|████████████████████            | 76606/122310 [2:33:13<1:33:33,  8.14it/s][A
 63%|████████████████████            | 76617/122310 [2:33:14<1:23:22,  9.13it/s][A
 63%|████████████████████            | 76631/122310 [2:33:15<1:10:44, 10.76it/s][A
 63%|████████████████████            | 76637/122310 [2:33:16<1:21:41,  9.32it/s][A
 63%|████████████████████            | 76649/122310 [2:33:17<1:15:01, 10.14it/s][A
 63%|████████████████████            | 76660/122310 [2:33:18<1:12:52, 10.44it/s][A
 63%|████████████████████            | 76667/122310 [2:33:19<1:24:10,  9.04it/s][A
 63%|████████████████████            | 76677/122310 [2:33:21<1:28:26,  8.60it/s][A
 63%|████████████████████            | 76683/122310 [2:33:22<1:42:41,  7.40it/s][A
 63%|████████████████████            | 76696/122310 [2:33:23<1:31:08,  8.34it/s][A
 63%|████████████████████            | 76703/122310 [2:33:24<1:42:37,  7.41it/s][A
 63%|████████████████████            | 76710/122310 [2:33:26<1:50:43,  6.86

step: 38980, loss: 97.19077566987256, epoch: 2



 63%|████████████████████            | 76764/122310 [2:33:37<2:30:10,  5.05it/s][A
 63%|████████████████████            | 76774/122310 [2:33:38<2:08:19,  5.91it/s][A
 63%|████████████████████            | 76786/122310 [2:33:39<1:47:52,  7.03it/s][A
 63%|████████████████████            | 76795/122310 [2:33:40<1:45:37,  7.18it/s][A
 63%|████████████████████            | 76806/122310 [2:33:41<1:36:52,  7.83it/s][A
 63%|████████████████████            | 76813/122310 [2:33:43<1:44:01,  7.29it/s][A
 63%|████████████████████            | 76829/122310 [2:33:44<1:24:13,  9.00it/s][A
 63%|████████████████████            | 76831/122310 [2:33:45<1:50:11,  6.88it/s][A
 63%|████████████████████            | 76841/122310 [2:33:46<1:44:01,  7.28it/s][A
 63%|████████████████████            | 76851/122310 [2:33:47<1:39:08,  7.64it/s][A
 63%|████████████████████            | 76855/122310 [2:33:49<2:00:37,  6.28it/s][A
 63%|████████████████████            | 76863/122310 [2:33:50<1:59:50,  6.32

step: 39000, loss: 71.53496435753725, epoch: 2
saving weights



 63%|████████████████████▏           | 76942/122310 [2:34:01<1:53:37,  6.65it/s][A
 63%|████████████████████▏           | 76950/122310 [2:34:02<1:54:09,  6.62it/s][A
 63%|████████████████████▏           | 76958/122310 [2:34:04<1:55:04,  6.57it/s][A
 63%|████████████████████▏           | 76963/122310 [2:34:05<2:07:51,  5.91it/s][A
 63%|████████████████████▏           | 76974/122310 [2:34:06<1:52:13,  6.73it/s][A
 63%|████████████████████▏           | 76978/122310 [2:34:07<2:10:46,  5.78it/s][A
 63%|████████████████████▏           | 76989/122310 [2:34:09<1:51:46,  6.76it/s][A
 63%|████████████████████▏           | 76993/122310 [2:34:10<2:10:29,  5.79it/s][A
 63%|████████████████████▏           | 77010/122310 [2:34:11<1:32:00,  8.21it/s][A
 63%|████████████████████▏           | 77015/122310 [2:34:12<1:47:58,  6.99it/s][A
 63%|████████████████████▏           | 77027/122310 [2:34:13<1:36:24,  7.83it/s][A
 63%|████████████████████▏           | 77037/122310 [2:34:15<1:33:57,  8.03

step: 39020, loss: 67.6904537227366, epoch: 2



 63%|████████████████████▏           | 77131/122310 [2:34:27<1:46:43,  7.06it/s][A
 63%|████████████████████▏           | 77140/122310 [2:34:28<1:45:00,  7.17it/s][A
 63%|████████████████████▏           | 77145/122310 [2:34:29<2:01:29,  6.20it/s][A
 63%|████████████████████▏           | 77152/122310 [2:34:30<2:04:08,  6.06it/s][A
 63%|████████████████████▏           | 77159/122310 [2:34:31<2:06:31,  5.95it/s][A
 63%|████████████████████▏           | 77168/122310 [2:34:33<1:57:54,  6.38it/s][A
 63%|████████████████████▏           | 77171/122310 [2:34:34<2:22:34,  5.28it/s][A
 63%|████████████████████▏           | 77183/122310 [2:34:35<1:51:31,  6.74it/s][A
 63%|████████████████████▏           | 77188/122310 [2:34:36<2:05:45,  5.98it/s][A
 63%|████████████████████▏           | 77189/122310 [2:34:37<2:50:00,  4.42it/s][A
 63%|████████████████████▏           | 77191/122310 [2:34:39<3:29:07,  3.60it/s][A
 63%|████████████████████▏           | 77193/122310 [2:34:40<4:07:04,  3.04

step: 39040, loss: 75.26581293902935, epoch: 2



 63%|████████████████████▏           | 77249/122310 [2:34:49<2:11:00,  5.73it/s][A
 63%|████████████████████▏           | 77257/122310 [2:34:50<2:03:14,  6.09it/s][A
 63%|████████████████████▏           | 77260/122310 [2:34:52<2:34:05,  4.87it/s][A
 63%|████████████████████▏           | 77264/122310 [2:34:53<2:44:44,  4.56it/s][A
 63%|████████████████████▏           | 77271/122310 [2:34:54<2:24:30,  5.19it/s][A
 63%|████████████████████▏           | 77280/122310 [2:34:55<2:00:19,  6.24it/s][A
 63%|████████████████████▏           | 77290/122310 [2:34:56<1:43:07,  7.28it/s][A
 63%|████████████████████▏           | 77299/122310 [2:34:57<1:36:35,  7.77it/s][A
 63%|████████████████████▏           | 77305/122310 [2:34:58<1:52:16,  6.68it/s][A
 63%|████████████████████▏           | 77310/122310 [2:34:59<2:09:05,  5.81it/s][A
 63%|████████████████████▏           | 77317/122310 [2:35:00<2:09:51,  5.77it/s][A
 63%|████████████████████▏           | 77325/122310 [2:35:02<2:06:50,  5.91

step: 39060, loss: 80.2788096322825, epoch: 2



 63%|████████████████████▎           | 77412/122310 [2:35:12<1:40:21,  7.46it/s][A
 63%|████████████████████▎           | 77423/122310 [2:35:13<1:29:40,  8.34it/s][A
 63%|████████████████████▎           | 77429/122310 [2:35:14<1:37:57,  7.64it/s][A
 63%|████████████████████▎           | 77441/122310 [2:35:15<1:25:52,  8.71it/s][A
 63%|████████████████████▎           | 77446/122310 [2:35:16<1:36:40,  7.73it/s][A
 63%|████████████████████▎           | 77455/122310 [2:35:17<1:34:47,  7.89it/s][A
 63%|████████████████████▎           | 77465/122310 [2:35:18<1:29:12,  8.38it/s][A
 63%|████████████████████▎           | 77478/122310 [2:35:20<1:43:09,  7.24it/s][A
 63%|████████████████████▎           | 77488/122310 [2:35:21<1:33:25,  8.00it/s][A
 63%|████████████████████▎           | 77499/122310 [2:35:22<1:32:58,  8.03it/s][A
 63%|████████████████████▎           | 77507/122310 [2:35:23<1:35:27,  7.82it/s][A
 63%|████████████████████▎           | 77519/122310 [2:35:24<1:25:59,  8.68

step: 39080, loss: 66.25640781832303, epoch: 2



 63%|████████████████████▎           | 77576/122310 [2:35:34<2:17:38,  5.42it/s][A
 63%|████████████████████▎           | 77595/122310 [2:35:36<1:28:11,  8.45it/s][A
 63%|████████████████████▎           | 77602/122310 [2:35:37<1:33:36,  7.96it/s][A
 63%|████████████████████▎           | 77608/122310 [2:35:38<1:40:32,  7.41it/s][A
 63%|████████████████████▎           | 77618/122310 [2:35:39<1:34:17,  7.90it/s][A
 63%|████████████████████▎           | 77621/122310 [2:35:40<1:52:31,  6.62it/s][A
 63%|████████████████████▎           | 77633/122310 [2:35:41<1:30:39,  8.21it/s][A
 63%|████████████████████▎           | 77637/122310 [2:35:42<1:44:31,  7.12it/s][A
 63%|████████████████████▎           | 77643/122310 [2:35:43<1:46:56,  6.96it/s][A
 63%|████████████████████▎           | 77650/122310 [2:35:43<1:43:40,  7.18it/s][A
 63%|████████████████████▎           | 77656/122310 [2:35:44<1:46:07,  7.01it/s][A
 63%|████████████████████▎           | 77659/122310 [2:35:45<2:06:44,  5.87

step: 39100, loss: 121.63139381325539, epoch: 2



 64%|████████████████████▎           | 77761/122310 [2:35:54<1:02:25, 11.89it/s][A
 64%|████████████████████▎           | 77767/122310 [2:35:55<1:15:38,  9.81it/s][A
 64%|████████████████████▎           | 77773/122310 [2:36:10<7:17:57,  1.69it/s][A
 64%|████████████████████▎           | 77776/122310 [2:36:11<7:01:33,  1.76it/s][A
 64%|████████████████████▎           | 77783/122310 [2:36:12<5:34:57,  2.22it/s][A
 64%|████████████████████▎           | 77790/122310 [2:36:14<4:36:13,  2.69it/s][A

step: 39120, loss: 66.26458329314966, epoch: 2



 64%|████████████████████▎           | 77794/122310 [2:36:15<4:27:13,  2.78it/s][A
 64%|████████████████████▎           | 77805/122310 [2:36:16<3:07:15,  3.96it/s][A
 64%|████████████████████▎           | 77812/122310 [2:36:18<2:52:11,  4.31it/s][A
 64%|████████████████████▎           | 77829/122310 [2:36:19<1:54:40,  6.46it/s][A
 64%|████████████████████▎           | 77840/122310 [2:36:20<1:44:47,  7.07it/s][A
 64%|████████████████████▎           | 77854/122310 [2:36:21<1:31:06,  8.13it/s][A
 64%|████████████████████▎           | 77863/122310 [2:36:23<1:34:14,  7.86it/s][A
 64%|████████████████████▎           | 77867/122310 [2:36:24<1:53:11,  6.54it/s][A
 64%|████████████████████▍           | 77887/122310 [2:36:25<1:19:35,  9.30it/s][A
 64%|████████████████████▍           | 77897/122310 [2:36:26<1:22:48,  8.94it/s][A
 64%|████████████████████▍           | 77905/122310 [2:36:28<1:30:33,  8.17it/s][A
 64%|████████████████████▍           | 77915/122310 [2:36:29<1:31:25,  8.09

step: 39140, loss: 86.17752246458058, epoch: 2



 64%|████████████████████▍           | 78010/122310 [2:36:40<1:51:16,  6.64it/s][A
 64%|████████████████████▍           | 78021/122310 [2:36:41<1:42:00,  7.24it/s][A
 64%|████████████████████▍           | 78030/122310 [2:36:42<1:44:37,  7.05it/s][A
 64%|████████████████████▍           | 78033/122310 [2:36:44<2:07:59,  5.77it/s][A
 64%|████████████████████▍           | 78044/122310 [2:36:45<1:48:54,  6.77it/s][A
 64%|████████████████████▍           | 78048/122310 [2:36:46<2:08:52,  5.72it/s][A
 64%|████████████████████▍           | 78054/122310 [2:36:47<2:14:46,  5.47it/s][A
 64%|████████████████████▍           | 78069/122310 [2:36:48<1:38:11,  7.51it/s][A
 64%|████████████████████▍           | 78072/122310 [2:36:50<2:02:59,  5.99it/s][A
 64%|████████████████████▍           | 78075/122310 [2:36:51<2:28:00,  4.98it/s][A
 64%|████████████████████▍           | 78080/122310 [2:36:52<2:35:00,  4.76it/s][A
 64%|████████████████████▍           | 78085/122310 [2:36:53<2:41:52,  4.55

step: 39160, loss: 67.99359994566294, epoch: 2



 64%|████████████████████▍           | 78145/122310 [2:37:04<2:08:39,  5.72it/s][A
 64%|████████████████████▍           | 78156/122310 [2:37:06<1:49:53,  6.70it/s][A
 64%|████████████████████▍           | 78169/122310 [2:37:07<1:32:40,  7.94it/s][A
 64%|████████████████████▍           | 78175/122310 [2:37:08<1:43:54,  7.08it/s][A
 64%|████████████████████▍           | 78186/122310 [2:37:09<1:34:45,  7.76it/s][A
 64%|████████████████████▍           | 78194/122310 [2:37:10<1:38:35,  7.46it/s][A
 64%|████████████████████▍           | 78209/122310 [2:37:12<1:21:24,  9.03it/s][A
 64%|████████████████████▍           | 78217/122310 [2:37:13<1:27:57,  8.36it/s][A
 64%|████████████████████▍           | 78233/122310 [2:37:14<1:15:33,  9.72it/s][A
 64%|████████████████████▍           | 78235/122310 [2:37:15<1:39:55,  7.35it/s][A
 64%|████████████████████▍           | 78243/122310 [2:37:16<1:42:00,  7.20it/s][A
 64%|████████████████████▍           | 78257/122310 [2:37:18<1:25:27,  8.59

step: 39180, loss: 74.49638027755583, epoch: 2



 64%|████████████████████▍           | 78352/122310 [2:37:29<1:27:36,  8.36it/s][A
 64%|████████████████████▌           | 78357/122310 [2:37:30<1:44:05,  7.04it/s][A
 64%|████████████████████▌           | 78366/122310 [2:37:31<1:43:39,  7.07it/s][A
 64%|████████████████████▌           | 78372/122310 [2:37:32<1:55:06,  6.36it/s][A
 64%|████████████████████▌           | 78382/122310 [2:37:34<1:46:11,  6.89it/s][A
 64%|████████████████████▌           | 78391/122310 [2:37:35<1:43:10,  7.09it/s][A
 64%|████████████████████▌           | 78400/122310 [2:37:36<1:41:27,  7.21it/s][A
 64%|████████████████████▌           | 78403/122310 [2:37:37<2:05:41,  5.82it/s][A
 64%|████████████████████▌           | 78416/122310 [2:37:38<1:41:09,  7.23it/s][A
 64%|████████████████████▌           | 78421/122310 [2:37:40<1:56:16,  6.29it/s][A
 64%|████████████████████▌           | 78424/122310 [2:37:41<2:22:48,  5.12it/s][A
 64%|████████████████████▌           | 78427/122310 [2:37:42<2:48:54,  4.33

step: 39200, loss: 78.11541109928119, epoch: 2
sim1 and sim2 are 0.4610722882797997, 0.2192737828292647
cosine of pred and queen: 0.19319285250793197
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: jimnah
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: charts
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: shows
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: italy
Actual: mumbai:asia::nairobi:africa, pred: population
Actual: maha

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: son
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: neselius
Actual: man:woman::brothers:sisters, pred: soused
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: pradesh
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 64%|███████████████████▉           | 78476/122310 [2:40:00<78:32:11,  6.45s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.14792899408284024



 64%|███████████████████▉           | 78488/122310 [2:40:01<43:20:57,  3.56s/it][A
 64%|███████████████████▉           | 78493/122310 [2:40:02<34:43:16,  2.85s/it][A
 64%|███████████████████▉           | 78502/122310 [2:40:04<23:01:46,  1.89s/it][A
 64%|███████████████████▉           | 78509/122310 [2:40:05<17:10:01,  1.41s/it][A
 64%|███████████████████▉           | 78512/122310 [2:40:06<15:22:29,  1.26s/it][A
 64%|████████████████████▌           | 78522/122310 [2:40:07<9:38:59,  1.26it/s][A
 64%|████████████████████▌           | 78528/122310 [2:40:09<7:46:18,  1.56it/s][A
 64%|████████████████████▌           | 78538/122310 [2:40:10<5:21:50,  2.27it/s][A
 64%|████████████████████▌           | 78549/122310 [2:40:11<3:51:16,  3.15it/s][A
 64%|████████████████████▌           | 78561/122310 [2:40:12<2:52:28,  4.23it/s][A
 64%|████████████████████▌           | 78567/122310 [2:40:13<2:46:26,  4.38it/s][A
 64%|████████████████████▌           | 78578/122310 [2:40:15<2:16:47,  5.33

step: 39220, loss: 83.64743395386041, epoch: 2



 64%|████████████████████▌           | 78651/122310 [2:40:26<1:54:42,  6.34it/s][A
 64%|████████████████████▌           | 78666/122310 [2:40:27<1:32:11,  7.89it/s][A
 64%|████████████████████▌           | 78682/122310 [2:40:29<1:17:20,  9.40it/s][A
 64%|████████████████████▌           | 78687/122310 [2:40:30<1:32:07,  7.89it/s][A
 64%|████████████████████▌           | 78704/122310 [2:40:31<1:14:51,  9.71it/s][A
 64%|████████████████████▌           | 78717/122310 [2:40:32<1:12:26, 10.03it/s][A
 64%|████████████████████▌           | 78727/122310 [2:40:33<1:16:50,  9.45it/s][A
 64%|████████████████████▌           | 78736/122310 [2:40:35<1:21:29,  8.91it/s][A
 64%|████████████████████▌           | 78740/122310 [2:40:36<1:40:31,  7.22it/s][A
 64%|████████████████████▌           | 78745/122310 [2:40:37<1:57:11,  6.20it/s][A
 64%|████████████████████▌           | 78750/122310 [2:40:38<2:11:25,  5.52it/s][A
 64%|████████████████████▌           | 78764/122310 [2:40:40<1:40:14,  7.24

step: 39240, loss: 82.12895601136442, epoch: 2



 64%|████████████████████▋           | 78843/122310 [2:40:50<1:31:09,  7.95it/s][A
 64%|████████████████████▋           | 78850/122310 [2:40:52<1:39:18,  7.29it/s][A
 64%|████████████████████▋           | 78861/122310 [2:40:53<1:31:32,  7.91it/s][A
 64%|████████████████████▋           | 78877/122310 [2:40:54<1:15:53,  9.54it/s][A
 64%|████████████████████▋           | 78881/122310 [2:40:55<1:33:31,  7.74it/s][A
 65%|████████████████████▋           | 78891/122310 [2:40:56<1:31:52,  7.88it/s][A
 65%|████████████████████▋           | 78898/122310 [2:40:58<1:40:01,  7.23it/s][A
 65%|████████████████████▋           | 78903/122310 [2:40:59<1:54:55,  6.30it/s][A
 65%|████████████████████▋           | 78907/122310 [2:41:00<2:13:30,  5.42it/s][A
 65%|████████████████████▋           | 78913/122310 [2:41:01<2:18:02,  5.24it/s][A
 65%|████████████████████▋           | 78919/122310 [2:41:03<2:21:41,  5.10it/s][A
 65%|████████████████████▋           | 78928/122310 [2:41:04<2:05:44,  5.75

step: 39260, loss: 71.98602134161632, epoch: 2



 65%|████████████████████▋           | 79002/122310 [2:41:15<1:57:46,  6.13it/s][A
 65%|████████████████████▋           | 79005/122310 [2:41:16<2:22:47,  5.05it/s][A
 65%|████████████████████▋           | 79014/122310 [2:41:17<2:06:00,  5.73it/s][A
 65%|████████████████████▋           | 79025/122310 [2:41:19<1:47:47,  6.69it/s][A
 65%|████████████████████▋           | 79034/122310 [2:41:20<1:45:49,  6.82it/s][A
 65%|████████████████████▋           | 79043/122310 [2:41:21<1:44:53,  6.87it/s][A
 65%|████████████████████▋           | 79045/122310 [2:41:22<2:13:53,  5.39it/s][A
 65%|████████████████████▋           | 79049/122310 [2:41:24<2:34:34,  4.66it/s][A
 65%|████████████████████▋           | 79059/122310 [2:41:25<2:08:50,  5.60it/s][A
 65%|████████████████████▋           | 79065/122310 [2:41:26<2:13:29,  5.40it/s][A
 65%|████████████████████▋           | 79074/122310 [2:41:27<1:59:50,  6.01it/s][A
 65%|████████████████████▋           | 79082/122310 [2:41:29<1:56:32,  6.18

step: 39280, loss: 82.55047262407851, epoch: 2



 65%|████████████████████▋           | 79144/122310 [2:41:40<2:11:32,  5.47it/s][A
 65%|████████████████████▋           | 79152/122310 [2:41:41<2:02:35,  5.87it/s][A
 65%|████████████████████▋           | 79161/122310 [2:41:42<1:54:14,  6.30it/s][A
 65%|████████████████████▋           | 79163/122310 [2:41:43<2:25:47,  4.93it/s][A
 65%|████████████████████▋           | 79181/122310 [2:41:44<1:30:48,  7.92it/s][A
 65%|████████████████████▋           | 79187/122310 [2:41:45<1:40:26,  7.16it/s][A
 65%|████████████████████▋           | 79198/122310 [2:41:47<1:32:19,  7.78it/s][A
 65%|████████████████████▋           | 79217/122310 [2:41:48<1:11:22, 10.06it/s][A
 65%|████████████████████▋           | 79219/122310 [2:41:49<1:34:39,  7.59it/s][A
 65%|████████████████████▋           | 79226/122310 [2:41:50<1:41:50,  7.05it/s][A
 65%|████████████████████▋           | 79234/122310 [2:41:52<1:45:51,  6.78it/s][A
 65%|████████████████████▋           | 79238/122310 [2:41:53<2:05:08,  5.74

step: 39300, loss: 95.12890542231355, epoch: 2



 65%|████████████████████▋           | 79306/122310 [2:42:04<1:57:03,  6.12it/s][A
 65%|████████████████████▋           | 79310/122310 [2:42:05<2:15:03,  5.31it/s][A
 65%|████████████████████▊           | 79314/122310 [2:42:06<2:33:26,  4.67it/s][A
 65%|████████████████████▊           | 79320/122310 [2:42:07<2:28:25,  4.83it/s][A
 65%|████████████████████▊           | 79327/122310 [2:42:08<2:19:14,  5.14it/s][A
 65%|████████████████████▊           | 79339/122310 [2:42:10<1:48:06,  6.62it/s][A
 65%|████████████████████▊           | 79348/122310 [2:42:13<2:53:32,  4.13it/s][A
 65%|████████████████████▊           | 79352/122310 [2:42:15<3:01:41,  3.94it/s][A
 65%|████████████████████▊           | 79359/122310 [2:42:16<2:45:45,  4.32it/s][A
 65%|████████████████████▊           | 79366/122310 [2:42:17<2:34:03,  4.65it/s][A
 65%|████████████████████▊           | 79372/122310 [2:42:18<2:31:59,  4.71it/s][A
 65%|████████████████████▊           | 79379/122310 [2:42:20<2:24:18,  4.96

step: 39320, loss: 93.93453753576127, epoch: 2



 65%|████████████████████▊           | 79431/122310 [2:42:28<1:44:50,  6.82it/s][A
 65%|████████████████████▊           | 79436/122310 [2:42:29<1:58:18,  6.04it/s][A
 65%|████████████████████▊           | 79439/122310 [2:42:31<2:22:39,  5.01it/s][A
 65%|████████████████████▊           | 79448/122310 [2:42:32<2:04:41,  5.73it/s][A
 65%|████████████████████▊           | 79455/122310 [2:42:33<2:03:37,  5.78it/s][A
 65%|████████████████████▊           | 79464/122310 [2:42:34<1:53:48,  6.27it/s][A
 65%|████████████████████▊           | 79467/122310 [2:42:35<2:18:52,  5.14it/s][A
 65%|████████████████████▊           | 79475/122310 [2:42:37<2:07:00,  5.62it/s][A
 65%|████████████████████▊           | 79476/122310 [2:42:40<4:34:07,  2.60it/s][A
 65%|████████████████████▊           | 79479/122310 [2:42:41<4:36:02,  2.59it/s][A
 65%|████████████████████▊           | 79487/122310 [2:42:43<3:22:11,  3.53it/s][A
 65%|████████████████████▊           | 79494/122310 [2:42:44<2:53:48,  4.11

step: 39340, loss: 67.4131921942475, epoch: 2



 65%|████████████████████▊           | 79576/122310 [2:42:52<1:15:03,  9.49it/s][A
 65%|████████████████████▊           | 79579/122310 [2:42:53<1:36:13,  7.40it/s][A
 65%|████████████████████▊           | 79586/122310 [2:42:55<1:42:57,  6.92it/s][A
 65%|████████████████████▊           | 79598/122310 [2:42:57<1:58:17,  6.02it/s][A
 65%|████████████████████▊           | 79608/122310 [2:42:58<1:50:32,  6.44it/s][A
 65%|████████████████████▊           | 79614/122310 [2:43:00<1:59:44,  5.94it/s][A
 65%|████████████████████▊           | 79617/122310 [2:43:01<2:26:07,  4.87it/s][A
 65%|████████████████████▊           | 79629/122310 [2:43:02<1:48:39,  6.55it/s][A
 65%|████████████████████▊           | 79632/122310 [2:43:03<2:04:41,  5.70it/s][A
 65%|████████████████████▊           | 79636/122310 [2:43:04<2:14:31,  5.29it/s][A
 65%|████████████████████▊           | 79642/122310 [2:43:05<2:14:16,  5.30it/s][A
 65%|████████████████████▊           | 79649/122310 [2:43:06<2:10:59,  5.43

step: 39360, loss: 79.6338323226148, epoch: 2



 65%|████████████████████▊           | 79733/122310 [2:43:15<1:18:30,  9.04it/s][A
 65%|████████████████████▊           | 79746/122310 [2:43:16<1:11:14,  9.96it/s][A
 65%|████████████████████▊           | 79755/122310 [2:43:17<1:13:07,  9.70it/s][A
 65%|████████████████████▊           | 79756/122310 [2:43:18<1:39:20,  7.14it/s][A
 65%|████████████████████▊           | 79761/122310 [2:43:19<1:50:56,  6.39it/s][A
 65%|████████████████████▊           | 79767/122310 [2:43:20<1:51:57,  6.33it/s][A
 65%|████████████████████▊           | 79773/122310 [2:43:21<1:52:59,  6.27it/s][A
 65%|████████████████████▊           | 79779/122310 [2:43:22<1:55:11,  6.15it/s][A
 65%|████████████████████▊           | 79784/122310 [2:43:23<2:03:19,  5.75it/s][A
 65%|████████████████████▉           | 79793/122310 [2:43:24<1:46:25,  6.66it/s][A
 65%|████████████████████▉           | 79799/122310 [2:43:25<1:48:37,  6.52it/s][A
 65%|████████████████████▉           | 79809/122310 [2:43:26<1:32:44,  7.64

step: 39380, loss: 73.75358244169907, epoch: 2



 65%|████████████████████▉           | 79878/122310 [2:43:35<1:26:51,  8.14it/s][A
 65%|████████████████████▉           | 79885/122310 [2:43:36<1:34:08,  7.51it/s][A
 65%|████████████████████▉           | 79890/122310 [2:43:37<1:51:34,  6.34it/s][A
 65%|████████████████████▉           | 79891/122310 [2:43:38<2:28:06,  4.77it/s][A
 65%|████████████████████▉           | 79895/122310 [2:43:39<2:35:16,  4.55it/s][A
 65%|████████████████████▉           | 79902/122310 [2:43:40<2:14:59,  5.24it/s][A
 65%|████████████████████▉           | 79916/122310 [2:43:42<1:34:13,  7.50it/s][A
 65%|████████████████████▉           | 79927/122310 [2:43:43<1:27:33,  8.07it/s][A
 65%|████████████████████▉           | 79937/122310 [2:43:44<1:27:38,  8.06it/s][A
 65%|████████████████████▉           | 79947/122310 [2:43:45<1:25:51,  8.22it/s][A
 65%|████████████████████▉           | 79953/122310 [2:43:46<1:34:32,  7.47it/s][A
 65%|████████████████████▉           | 79959/122310 [2:43:47<1:36:51,  7.29

step: 39400, loss: 68.49589002676412, epoch: 2



 65%|████████████████████▉           | 80023/122310 [2:43:55<1:45:09,  6.70it/s][A
 65%|████████████████████▉           | 80031/122310 [2:43:56<1:35:33,  7.37it/s][A
 65%|████████████████████▉           | 80041/122310 [2:43:57<1:22:26,  8.54it/s][A
 65%|████████████████████▉           | 80050/122310 [2:43:58<1:17:54,  9.04it/s][A
 65%|████████████████████▉           | 80055/122310 [2:43:59<1:27:44,  8.03it/s][A
 65%|████████████████████▉           | 80065/122310 [2:44:00<1:18:07,  9.01it/s][A
 65%|████████████████████▉           | 80071/122310 [2:44:00<1:24:05,  8.37it/s][A
 65%|████████████████████▉           | 80078/122310 [2:44:01<1:25:12,  8.26it/s][A
 65%|████████████████████▉           | 80087/122310 [2:44:02<1:19:42,  8.83it/s][A
 65%|████████████████████▉           | 80096/122310 [2:44:03<1:15:55,  9.27it/s][A
 65%|████████████████████▉           | 80101/122310 [2:44:04<1:26:00,  8.18it/s][A
 65%|████████████████████▉           | 80107/122310 [2:44:05<1:30:42,  7.75

step: 39420, loss: 74.34505986103679, epoch: 2



 66%|████████████████████▉           | 80189/122310 [2:44:13<1:35:56,  7.32it/s][A
 66%|████████████████████▉           | 80200/122310 [2:44:14<1:19:05,  8.87it/s][A
 66%|████████████████████▉           | 80208/122310 [2:44:15<1:18:38,  8.92it/s][A
 66%|████████████████████▉           | 80213/122310 [2:44:15<1:28:18,  7.95it/s][A
 66%|████████████████████▉           | 80219/122310 [2:44:16<1:32:33,  7.58it/s][A
 66%|████████████████████▉           | 80225/122310 [2:44:17<1:35:18,  7.36it/s][A
 66%|████████████████████▉           | 80232/122310 [2:44:18<1:33:24,  7.51it/s][A
 66%|████████████████████▉           | 80247/122310 [2:44:19<1:07:39, 10.36it/s][A
 66%|████████████████████▉           | 80256/122310 [2:44:20<1:07:51, 10.33it/s][A
 66%|█████████████████████           | 80266/122310 [2:44:21<1:05:48, 10.65it/s][A
 66%|██████████████████████▎           | 80280/122310 [2:44:22<57:29, 12.19it/s][A
 66%|█████████████████████           | 80289/122310 [2:44:23<1:00:26, 11.59

step: 39440, loss: 79.89743252912376, epoch: 2



 66%|█████████████████████           | 80368/122310 [2:44:31<1:19:17,  8.82it/s][A
 66%|█████████████████████           | 80370/122310 [2:44:32<1:44:21,  6.70it/s][A
 66%|█████████████████████           | 80382/122310 [2:44:32<1:22:22,  8.48it/s][A
 66%|█████████████████████           | 80391/122310 [2:44:34<1:21:38,  8.56it/s][A
 66%|█████████████████████           | 80393/122310 [2:44:35<1:46:30,  6.56it/s][A
 66%|█████████████████████           | 80403/122310 [2:44:35<1:30:15,  7.74it/s][A
 66%|█████████████████████           | 80414/122310 [2:44:36<1:19:13,  8.81it/s][A
 66%|█████████████████████           | 80424/122310 [2:44:37<1:15:34,  9.24it/s][A
 66%|██████████████████████▎           | 80447/122310 [2:44:38<51:38, 13.51it/s][A
 66%|██████████████████████▎           | 80458/122310 [2:44:39<53:41, 12.99it/s][A
 66%|██████████████████████▎           | 80467/122310 [2:44:40<59:12, 11.78it/s][A
 66%|█████████████████████           | 80470/122310 [2:44:41<1:16:06,  9.16

step: 39460, loss: 95.37868030113987, epoch: 2



 66%|█████████████████████           | 80540/122310 [2:44:50<1:32:40,  7.51it/s][A
 66%|█████████████████████           | 80553/122310 [2:44:51<1:14:10,  9.38it/s][A
 66%|█████████████████████           | 80563/122310 [2:44:52<1:12:16,  9.63it/s][A
 66%|█████████████████████           | 80569/122310 [2:44:53<1:20:56,  8.59it/s][A
 66%|█████████████████████           | 80576/122310 [2:44:54<1:24:48,  8.20it/s][A
 66%|█████████████████████           | 80578/122310 [2:44:56<2:23:02,  4.86it/s][A
 66%|█████████████████████           | 80581/122310 [2:44:57<2:38:14,  4.39it/s][A
 66%|█████████████████████           | 80597/122310 [2:44:58<1:34:27,  7.36it/s][A
 66%|█████████████████████           | 80607/122310 [2:44:59<1:24:48,  8.20it/s][A
 66%|█████████████████████           | 80623/122310 [2:45:00<1:05:52, 10.55it/s][A
 66%|█████████████████████           | 80635/122310 [2:45:01<1:02:45, 11.07it/s][A
 66%|█████████████████████           | 80644/122310 [2:45:02<1:05:39, 10.58

step: 39480, loss: 69.89037586845487, epoch: 2



 66%|█████████████████████           | 80700/122310 [2:45:10<1:46:14,  6.53it/s][A
 66%|█████████████████████           | 80714/122310 [2:45:11<1:21:59,  8.45it/s][A
 66%|█████████████████████           | 80717/122310 [2:45:12<1:40:36,  6.89it/s][A
 66%|█████████████████████           | 80722/122310 [2:45:13<1:46:58,  6.48it/s][A
 66%|█████████████████████           | 80725/122310 [2:45:14<2:04:42,  5.56it/s][A
 66%|█████████████████████           | 80734/122310 [2:45:15<1:47:38,  6.44it/s][A
 66%|█████████████████████           | 80739/122310 [2:45:16<1:54:15,  6.06it/s][A
 66%|█████████████████████▏          | 80744/122310 [2:45:17<1:58:30,  5.85it/s][A
 66%|█████████████████████▏          | 80753/122310 [2:45:18<1:41:37,  6.82it/s][A
 66%|█████████████████████▏          | 80764/122310 [2:45:19<1:26:56,  7.96it/s][A
 66%|█████████████████████▏          | 80774/122310 [2:45:20<1:20:02,  8.65it/s][A
 66%|█████████████████████▏          | 80776/122310 [2:45:21<1:41:53,  6.79

step: 39500, loss: 74.44478206370826, epoch: 2



 66%|█████████████████████▏          | 80852/122310 [2:45:29<1:09:48,  9.90it/s][A
 66%|█████████████████████▏          | 80861/122310 [2:45:30<1:09:14,  9.98it/s][A
 66%|█████████████████████▏          | 80868/122310 [2:45:31<1:13:45,  9.36it/s][A
 66%|█████████████████████▏          | 80875/122310 [2:45:32<1:17:17,  8.93it/s][A
 66%|█████████████████████▏          | 80888/122310 [2:45:33<1:05:01, 10.62it/s][A
 66%|█████████████████████▏          | 80891/122310 [2:45:34<1:21:19,  8.49it/s][A
 66%|█████████████████████▏          | 80904/122310 [2:45:35<1:06:38, 10.36it/s][A
 66%|██████████████████████▍           | 80921/122310 [2:45:36<52:59, 13.02it/s][A
 66%|██████████████████████▍           | 80933/122310 [2:45:37<52:18, 13.18it/s][A
 66%|█████████████████████▏          | 80937/122310 [2:45:37<1:05:06, 10.59it/s][A
 66%|█████████████████████▏          | 80949/122310 [2:45:38<1:00:11, 11.45it/s][A
 66%|█████████████████████▏          | 80953/122310 [2:45:39<1:13:59,  9.32

step: 39520, loss: 81.00277531844415, epoch: 2



 66%|█████████████████████▏          | 81035/122310 [2:45:48<1:20:47,  8.52it/s][A
 66%|█████████████████████▏          | 81044/122310 [2:45:50<1:24:59,  8.09it/s][A
 66%|█████████████████████▏          | 81057/122310 [2:45:51<1:19:44,  8.62it/s][A
 66%|█████████████████████▏          | 81068/122310 [2:45:52<1:20:38,  8.52it/s][A
 66%|█████████████████████▏          | 81072/122310 [2:45:53<1:34:39,  7.26it/s][A
 66%|█████████████████████▏          | 81075/122310 [2:45:54<1:51:13,  6.18it/s][A
 66%|█████████████████████▏          | 81087/122310 [2:45:55<1:28:29,  7.76it/s][A
 66%|█████████████████████▏          | 81093/122310 [2:45:56<1:34:47,  7.25it/s][A
 66%|█████████████████████▏          | 81100/122310 [2:45:57<1:35:30,  7.19it/s][A
 66%|█████████████████████▏          | 81112/122310 [2:45:58<1:20:35,  8.52it/s][A
 66%|█████████████████████▏          | 81124/122310 [2:45:59<1:12:23,  9.48it/s][A
 66%|█████████████████████▏          | 81138/122310 [2:46:00<1:03:06, 10.87

step: 39540, loss: 88.27008721503178, epoch: 2



 66%|█████████████████████▏          | 81195/122310 [2:46:11<2:49:43,  4.04it/s][A
 66%|█████████████████████▏          | 81203/122310 [2:46:12<2:18:00,  4.96it/s][A
 66%|█████████████████████▏          | 81211/122310 [2:46:13<1:58:04,  5.80it/s][A
 66%|█████████████████████▏          | 81219/122310 [2:46:14<1:45:23,  6.50it/s][A
 66%|█████████████████████▎          | 81230/122310 [2:46:15<1:26:08,  7.95it/s][A
 66%|█████████████████████▎          | 81235/122310 [2:46:16<1:41:20,  6.76it/s][A
 66%|█████████████████████▎          | 81243/122310 [2:46:17<1:35:15,  7.19it/s][A
 66%|█████████████████████▎          | 81251/122310 [2:46:18<1:32:47,  7.37it/s][A
 66%|█████████████████████▎          | 81254/122310 [2:46:19<1:48:31,  6.31it/s][A
 66%|█████████████████████▎          | 81263/122310 [2:46:20<1:33:28,  7.32it/s][A
 66%|█████████████████████▎          | 81271/122310 [2:46:21<1:28:16,  7.75it/s][A
 66%|█████████████████████▎          | 81276/122310 [2:46:22<1:36:18,  7.10

step: 39560, loss: 88.3098635501704, epoch: 2



 66%|█████████████████████▎          | 81315/122310 [2:46:29<3:01:06,  3.77it/s][A
 66%|█████████████████████▎          | 81325/122310 [2:46:30<2:09:36,  5.27it/s][A
 66%|█████████████████████▎          | 81334/122310 [2:46:31<1:47:59,  6.32it/s][A
 67%|█████████████████████▎          | 81344/122310 [2:46:32<1:30:27,  7.55it/s][A
 67%|█████████████████████▎          | 81355/122310 [2:46:33<1:17:16,  8.83it/s][A
 67%|█████████████████████▎          | 81370/122310 [2:46:34<1:01:43, 11.05it/s][A
 67%|██████████████████████▌           | 81381/122310 [2:46:35<59:19, 11.50it/s][A
 67%|██████████████████████▋           | 81391/122310 [2:46:36<59:33, 11.45it/s][A
 67%|█████████████████████▎          | 81394/122310 [2:46:36<1:14:29,  9.15it/s][A
 67%|█████████████████████▎          | 81402/122310 [2:46:37<1:14:33,  9.14it/s][A
 67%|█████████████████████▎          | 81411/122310 [2:46:38<1:11:48,  9.49it/s][A
 67%|██████████████████████▋           | 81428/122310 [2:46:39<55:01, 12.38

step: 39580, loss: 82.87588016842626, epoch: 2



 67%|█████████████████████▎          | 81477/122310 [2:46:46<1:59:04,  5.72it/s][A
 67%|█████████████████████▎          | 81488/122310 [2:46:47<1:31:46,  7.41it/s][A
 67%|█████████████████████▎          | 81498/122310 [2:46:48<1:20:16,  8.47it/s][A
 67%|█████████████████████▎          | 81509/122310 [2:46:49<1:10:42,  9.62it/s][A
 67%|█████████████████████▎          | 81518/122310 [2:46:50<1:09:21,  9.80it/s][A
 67%|█████████████████████▎          | 81526/122310 [2:46:50<1:10:43,  9.61it/s][A
 67%|█████████████████████▎          | 81536/122310 [2:46:51<1:06:58, 10.15it/s][A
 67%|█████████████████████▎          | 81543/122310 [2:46:52<1:11:25,  9.51it/s][A
 67%|█████████████████████▎          | 81548/122310 [2:46:53<1:21:11,  8.37it/s][A
 67%|█████████████████████▎          | 81555/122310 [2:46:54<1:22:26,  8.24it/s][A
 67%|█████████████████████▎          | 81566/122310 [2:46:55<1:11:33,  9.49it/s][A
 67%|█████████████████████▎          | 81571/122310 [2:46:56<1:21:30,  8.33

step: 39600, loss: 82.76125642724809, epoch: 2
sim1 and sim2 are 0.5029375406037273, 0.2180055346736538
cosine of pred and queen: 0.22968053692322551
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: maladie
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: analysing
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: hollande
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: france
Actual: mumbai:asia::nairobi:africa, pred: population
Actu

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: hawser
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: another
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: neselius
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: karnataka
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 67%|████████████████████▋          | 81625/122310 [2:48:35<44:05:56,  3.90s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.14792899408284024



 67%|████████████████████▋          | 81637/122310 [2:48:36<26:14:54,  2.32s/it][A
 67%|████████████████████▋          | 81648/122310 [2:48:37<17:22:39,  1.54s/it][A
 67%|████████████████████▋          | 81655/122310 [2:48:38<13:32:22,  1.20s/it][A
 67%|█████████████████████▎          | 81665/122310 [2:48:39<9:26:48,  1.20it/s][A
 67%|█████████████████████▎          | 81669/122310 [2:48:40<8:21:39,  1.35it/s][A
 67%|█████████████████████▎          | 81677/122310 [2:48:41<6:10:18,  1.83it/s][A
 67%|█████████████████████▎          | 81685/122310 [2:48:42<4:40:50,  2.41it/s][A
 67%|█████████████████████▎          | 81695/122310 [2:48:42<3:23:48,  3.32it/s][A
 67%|█████████████████████▍          | 81701/122310 [2:48:43<3:02:04,  3.72it/s][A
 67%|█████████████████████▍          | 81711/122310 [2:48:44<2:20:26,  4.82it/s][A
 67%|█████████████████████▍          | 81722/122310 [2:48:45<1:51:40,  6.06it/s][A
 67%|█████████████████████▍          | 81729/122310 [2:48:46<1:47:08,  6.31

step: 39620, loss: 70.38621714792659, epoch: 2



 67%|█████████████████████▍          | 81793/122310 [2:48:55<1:36:45,  6.98it/s][A
 67%|█████████████████████▍          | 81802/122310 [2:48:56<1:28:56,  7.59it/s][A
 67%|█████████████████████▍          | 81808/122310 [2:48:57<1:31:39,  7.36it/s][A
 67%|█████████████████████▍          | 81819/122310 [2:48:58<1:17:33,  8.70it/s][A
 67%|█████████████████████▍          | 81829/122310 [2:48:59<1:11:25,  9.45it/s][A
 67%|█████████████████████▍          | 81834/122310 [2:49:00<1:20:40,  8.36it/s][A
 67%|█████████████████████▍          | 81837/122310 [2:49:01<1:36:19,  7.00it/s][A
 67%|█████████████████████▍          | 81842/122310 [2:49:01<1:41:35,  6.64it/s][A
 67%|█████████████████████▍          | 81852/122310 [2:49:02<1:23:35,  8.07it/s][A
 67%|█████████████████████▍          | 81864/122310 [2:49:03<1:09:27,  9.71it/s][A
 67%|█████████████████████▍          | 81872/122310 [2:49:04<1:11:11,  9.47it/s][A
 67%|█████████████████████▍          | 81881/122310 [2:49:05<1:09:09,  9.74

step: 39640, loss: 75.63326947296297, epoch: 2



 67%|█████████████████████▍          | 81946/122310 [2:49:13<1:11:16,  9.44it/s][A
 67%|█████████████████████▍          | 81956/122310 [2:49:14<1:06:51, 10.06it/s][A
 67%|█████████████████████▍          | 81965/122310 [2:49:15<1:06:21, 10.13it/s][A
 67%|██████████████████████▊           | 81978/122310 [2:49:15<58:06, 11.57it/s][A
 67%|██████████████████████▊           | 81991/122310 [2:49:16<53:38, 12.53it/s][A
 67%|█████████████████████▍          | 82003/122310 [2:49:18<1:07:32,  9.95it/s][A
 67%|██████████████████████▊           | 82020/122310 [2:49:19<54:26, 12.33it/s][A
 67%|██████████████████████▊           | 82030/122310 [2:49:20<55:31, 12.09it/s][A
 67%|█████████████████████▍          | 82034/122310 [2:49:21<1:06:47, 10.05it/s][A
 67%|█████████████████████▍          | 82043/122310 [2:49:22<1:06:21, 10.11it/s][A
 67%|█████████████████████▍          | 82049/122310 [2:49:22<1:12:55,  9.20it/s][A
 67%|█████████████████████▍          | 82061/122310 [2:49:23<1:04:11, 10.45

step: 39660, loss: 75.73417079687727, epoch: 2



 67%|█████████████████████▍          | 82116/122310 [2:49:30<1:20:59,  8.27it/s][A
 67%|█████████████████████▍          | 82123/122310 [2:49:31<1:21:49,  8.19it/s][A
 67%|█████████████████████▍          | 82131/122310 [2:49:32<1:18:54,  8.49it/s][A
 67%|█████████████████████▍          | 82146/122310 [2:49:33<1:00:52, 11.00it/s][A
 67%|██████████████████████▊           | 82158/122310 [2:49:34<56:29, 11.85it/s][A
 67%|██████████████████████▊           | 82169/122310 [2:49:35<55:22, 12.08it/s][A
 67%|██████████████████████▊           | 82184/122310 [2:49:36<49:20, 13.56it/s][A
 67%|██████████████████████▊           | 82191/122310 [2:49:37<55:53, 11.96it/s][A
 67%|██████████████████████▊           | 82199/122310 [2:49:37<59:58, 11.15it/s][A
 67%|█████████████████████▌          | 82207/122310 [2:49:38<1:03:38, 10.50it/s][A
 67%|█████████████████████▌          | 82213/122310 [2:49:39<1:11:14,  9.38it/s][A
 67%|█████████████████████▌          | 82223/122310 [2:49:40<1:07:06,  9.96

step: 39680, loss: 79.78409087170071, epoch: 2



 67%|█████████████████████▌          | 82283/122310 [2:49:48<1:56:37,  5.72it/s][A
 67%|█████████████████████▌          | 82296/122310 [2:49:49<1:22:08,  8.12it/s][A
 67%|█████████████████████▌          | 82302/122310 [2:49:50<1:25:55,  7.76it/s][A
 67%|█████████████████████▌          | 82308/122310 [2:49:51<1:28:46,  7.51it/s][A
 67%|█████████████████████▌          | 82314/122310 [2:49:51<1:30:58,  7.33it/s][A
 67%|█████████████████████▌          | 82323/122310 [2:49:52<1:21:27,  8.18it/s][A
 67%|█████████████████████▌          | 82336/122310 [2:49:53<1:05:51, 10.12it/s][A
 67%|█████████████████████▌          | 82342/122310 [2:49:54<1:12:58,  9.13it/s][A
 67%|██████████████████████▉           | 82360/122310 [2:49:55<53:28, 12.45it/s][A
 67%|██████████████████████▉           | 82372/122310 [2:49:56<52:11, 12.75it/s][A
 67%|█████████████████████▌          | 82378/122310 [2:49:57<1:00:12, 11.05it/s][A
 67%|██████████████████████▉           | 82394/122310 [2:49:58<50:19, 13.22

step: 39700, loss: 71.06404210084736, epoch: 2



 67%|█████████████████████▌          | 82457/122310 [2:50:06<1:27:03,  7.63it/s][A
 67%|█████████████████████▌          | 82472/122310 [2:50:06<1:03:11, 10.51it/s][A
 67%|██████████████████████▉           | 82486/122310 [2:50:07<54:33, 12.16it/s][A
 67%|██████████████████████▉           | 82496/122310 [2:50:08<55:43, 11.91it/s][A
 67%|█████████████████████▌          | 82502/122310 [2:50:09<1:04:03, 10.36it/s][A
 67%|█████████████████████▌          | 82511/122310 [2:50:10<1:04:23, 10.30it/s][A
 67%|██████████████████████▉           | 82525/122310 [2:50:11<55:18, 11.99it/s][A
 67%|█████████████████████▌          | 82532/122310 [2:50:12<1:01:41, 10.75it/s][A
 67%|█████████████████████▌          | 82542/122310 [2:50:13<1:00:28, 10.96it/s][A
 67%|█████████████████████▌          | 82550/122310 [2:50:13<1:03:30, 10.43it/s][A
 68%|██████████████████████▉           | 82567/122310 [2:50:14<50:27, 13.13it/s][A
 68%|██████████████████████▉           | 82578/122310 [2:50:15<51:00, 12.98

step: 39720, loss: 80.21509324016507, epoch: 2



 68%|██████████████████████▉           | 82672/122310 [2:50:23<56:18, 11.73it/s][A
 68%|██████████████████████▉           | 82681/122310 [2:50:24<58:47, 11.23it/s][A
 68%|█████████████████████▋          | 82686/122310 [2:50:25<1:08:51,  9.59it/s][A
 68%|█████████████████████▋          | 82690/122310 [2:50:26<1:21:58,  8.06it/s][A
 68%|█████████████████████▋          | 82701/122310 [2:50:27<1:10:18,  9.39it/s][A
 68%|█████████████████████▋          | 82707/122310 [2:50:27<1:16:42,  8.60it/s][A
 68%|█████████████████████▋          | 82710/122310 [2:50:32<3:24:44,  3.22it/s][A
 68%|█████████████████████▋          | 82711/122310 [2:50:33<3:48:30,  2.89it/s][A
 68%|█████████████████████▋          | 82721/122310 [2:50:34<2:26:35,  4.50it/s][A
 68%|█████████████████████▋          | 82732/122310 [2:50:34<1:46:09,  6.21it/s][A
 68%|█████████████████████▋          | 82735/122310 [2:50:35<1:58:12,  5.58it/s][A
 68%|█████████████████████▋          | 82746/122310 [2:50:36<1:29:48,  7.34

step: 39740, loss: 71.26324034145935, epoch: 2



 68%|█████████████████████▋          | 82802/122310 [2:50:41<1:01:43, 10.67it/s][A
 68%|█████████████████████▋          | 82806/122310 [2:50:41<1:14:08,  8.88it/s][A
 68%|█████████████████████▋          | 82812/122310 [2:50:42<1:19:33,  8.27it/s][A
 68%|█████████████████████▋          | 82824/122310 [2:50:43<1:06:49,  9.85it/s][A
 68%|█████████████████████▋          | 82833/122310 [2:50:44<1:06:05,  9.95it/s][A
 68%|█████████████████████▋          | 82837/122310 [2:50:45<1:18:27,  8.39it/s][A
 68%|█████████████████████▋          | 82845/122310 [2:50:46<1:16:29,  8.60it/s][A
 68%|█████████████████████▋          | 82857/122310 [2:50:47<1:04:52, 10.14it/s][A
 68%|███████████████████████           | 82875/122310 [2:50:48<49:34, 13.26it/s][A
 68%|███████████████████████           | 82882/122310 [2:50:48<56:11, 11.70it/s][A
 68%|█████████████████████▋          | 82889/122310 [2:50:49<1:01:58, 10.60it/s][A
 68%|█████████████████████▋          | 82891/122310 [2:50:50<1:21:24,  8.07

step: 39760, loss: 81.4676484807473, epoch: 2



 68%|█████████████████████▋          | 82967/122310 [2:50:58<1:17:33,  8.45it/s][A
 68%|█████████████████████▋          | 82970/122310 [2:50:59<1:34:21,  6.95it/s][A
 68%|█████████████████████▋          | 82975/122310 [2:51:00<1:39:16,  6.60it/s][A
 68%|█████████████████████▋          | 82985/122310 [2:51:01<1:21:28,  8.04it/s][A
 68%|█████████████████████▋          | 82992/122310 [2:51:02<1:21:22,  8.05it/s][A
 68%|███████████████████████           | 83017/122310 [2:51:03<46:02, 14.22it/s][A
 68%|███████████████████████           | 83022/122310 [2:51:03<56:04, 11.68it/s][A
 68%|█████████████████████▋          | 83025/122310 [2:51:04<1:11:14,  9.19it/s][A
 68%|█████████████████████▋          | 83036/122310 [2:51:05<1:04:14, 10.19it/s][A
 68%|█████████████████████▋          | 83040/122310 [2:51:06<1:16:40,  8.54it/s][A
 68%|█████████████████████▋          | 83048/122310 [2:51:07<1:15:10,  8.71it/s][A
 68%|█████████████████████▋          | 83052/122310 [2:51:08<1:28:08,  7.42

step: 39780, loss: 69.29882877627216, epoch: 2



 68%|█████████████████████▋          | 83123/122310 [2:51:16<1:06:19,  9.85it/s][A
 68%|█████████████████████▋          | 83129/122310 [2:51:17<1:12:46,  8.97it/s][A
 68%|█████████████████████▊          | 83133/122310 [2:51:18<1:26:07,  7.58it/s][A
 68%|█████████████████████▊          | 83136/122310 [2:51:18<1:42:45,  6.35it/s][A
 68%|█████████████████████▊          | 83139/122310 [2:51:19<1:58:56,  5.49it/s][A
 68%|█████████████████████▊          | 83145/122310 [2:51:20<1:50:54,  5.89it/s][A
 68%|█████████████████████▊          | 83151/122310 [2:51:21<1:46:05,  6.15it/s][A
 68%|█████████████████████▊          | 83159/122310 [2:51:22<1:32:39,  7.04it/s][A
 68%|█████████████████████▊          | 83171/122310 [2:51:23<1:12:05,  9.05it/s][A
 68%|█████████████████████▊          | 83181/122310 [2:51:24<1:07:48,  9.62it/s][A
 68%|█████████████████████▊          | 83187/122310 [2:51:25<1:14:43,  8.73it/s][A
 68%|█████████████████████▊          | 83191/122310 [2:51:25<1:26:50,  7.51

step: 39800, loss: 81.49293927400784, epoch: 2



 68%|█████████████████████▊          | 83277/122310 [2:51:34<1:20:48,  8.05it/s][A
 68%|█████████████████████▊          | 83287/122310 [2:51:35<1:13:17,  8.87it/s][A
 68%|█████████████████████▊          | 83295/122310 [2:51:36<1:12:50,  8.93it/s][A
 68%|█████████████████████▊          | 83301/122310 [2:51:37<1:17:41,  8.37it/s][A
 68%|█████████████████████▊          | 83315/122310 [2:51:38<1:01:52, 10.50it/s][A
 68%|███████████████████████▏          | 83329/122310 [2:51:39<54:04, 12.01it/s][A
 68%|███████████████████████▏          | 83344/122310 [2:51:40<48:42, 13.33it/s][A
 68%|███████████████████████▏          | 83354/122310 [2:51:41<51:14, 12.67it/s][A
 68%|███████████████████████▏          | 83360/122310 [2:51:41<59:20, 10.94it/s][A
 68%|█████████████████████▊          | 83368/122310 [2:51:42<1:02:38, 10.36it/s][A
 68%|███████████████████████▏          | 83379/122310 [2:51:43<58:54, 11.01it/s][A
 68%|█████████████████████▊          | 83383/122310 [2:51:44<1:11:09,  9.12

step: 39820, loss: 66.10406603286108, epoch: 2



 68%|█████████████████████▊          | 83429/122310 [2:51:51<1:34:20,  6.87it/s][A
 68%|█████████████████████▊          | 83439/122310 [2:51:52<1:19:04,  8.19it/s][A
 68%|█████████████████████▊          | 83443/122310 [2:51:53<1:30:46,  7.14it/s][A
 68%|█████████████████████▊          | 83453/122310 [2:51:54<1:17:12,  8.39it/s][A
 68%|█████████████████████▊          | 83466/122310 [2:51:55<1:03:01, 10.27it/s][A
 68%|███████████████████████▏          | 83479/122310 [2:51:55<55:46, 11.60it/s][A
 68%|███████████████████████▏          | 83488/122310 [2:51:56<57:48, 11.19it/s][A
 68%|███████████████████████▏          | 83505/122310 [2:51:57<47:30, 13.62it/s][A
 68%|█████████████████████▊          | 83507/122310 [2:51:58<1:03:23, 10.20it/s][A
 68%|█████████████████████▊          | 83515/122310 [2:51:59<1:05:23,  9.89it/s][A
 68%|█████████████████████▊          | 83521/122310 [2:52:00<1:11:58,  8.98it/s][A
 68%|█████████████████████▊          | 83527/122310 [2:52:01<1:17:36,  8.33

step: 39840, loss: 67.56146291543308, epoch: 2



 68%|███████████████████████▏          | 83599/122310 [2:52:09<59:32, 10.83it/s][A
 68%|███████████████████████▏          | 83613/122310 [2:52:10<52:09, 12.36it/s][A
 68%|███████████████████████▏          | 83621/122310 [2:52:10<57:03, 11.30it/s][A
 68%|███████████████████████▏          | 83631/122310 [2:52:11<56:44, 11.36it/s][A
 68%|█████████████████████▉          | 83636/122310 [2:52:12<1:06:37,  9.68it/s][A
 68%|█████████████████████▉          | 83647/122310 [2:52:13<1:01:00, 10.56it/s][A
 68%|█████████████████████▉          | 83653/122310 [2:52:14<1:08:29,  9.41it/s][A
 68%|█████████████████████▉          | 83662/122310 [2:52:15<1:06:38,  9.67it/s][A
 68%|█████████████████████▉          | 83666/122310 [2:52:16<1:19:08,  8.14it/s][A
 68%|█████████████████████▉          | 83671/122310 [2:52:17<1:26:31,  7.44it/s][A
 68%|█████████████████████▉          | 83682/122310 [2:52:17<1:11:46,  8.97it/s][A
 68%|███████████████████████▎          | 83695/122310 [2:52:18<59:59, 10.73

step: 39860, loss: 75.96530223504884, epoch: 2



 69%|███████████████████████▎          | 83787/122310 [2:52:26<48:30, 13.24it/s][A
 69%|███████████████████████▎          | 83795/122310 [2:52:27<53:05, 12.09it/s][A
 69%|███████████████████████▎          | 83808/122310 [2:52:28<49:48, 12.88it/s][A
 69%|███████████████████████▎          | 83815/122310 [2:52:29<56:14, 11.41it/s][A
 69%|███████████████████████▎          | 83823/122310 [2:52:30<59:52, 10.71it/s][A
 69%|█████████████████████▉          | 83830/122310 [2:52:31<1:04:37,  9.92it/s][A
 69%|█████████████████████▉          | 83832/122310 [2:52:32<1:24:04,  7.63it/s][A
 69%|█████████████████████▉          | 83837/122310 [2:52:32<1:30:54,  7.05it/s][A
 69%|█████████████████████▉          | 83843/122310 [2:52:33<1:31:22,  7.02it/s][A
 69%|█████████████████████▉          | 83857/122310 [2:52:34<1:05:57,  9.72it/s][A
 69%|█████████████████████▉          | 83867/122310 [2:52:35<1:02:47, 10.20it/s][A
 69%|███████████████████████▎          | 83879/122310 [2:52:36<57:06, 11.21

step: 39880, loss: 72.02591647973448, epoch: 2



 69%|███████████████████████▎          | 83965/122310 [2:52:44<56:18, 11.35it/s][A
 69%|█████████████████████▉          | 83968/122310 [2:52:45<1:11:03,  8.99it/s][A
 69%|█████████████████████▉          | 83975/122310 [2:52:46<1:13:46,  8.66it/s][A
 69%|█████████████████████▉          | 83982/122310 [2:52:47<1:15:50,  8.42it/s][A
 69%|█████████████████████▉          | 83995/122310 [2:52:47<1:01:38, 10.36it/s][A
 69%|███████████████████████▎          | 84006/122310 [2:52:48<57:48, 11.04it/s][A
 69%|█████████████████████▉          | 84012/122310 [2:52:49<1:05:22,  9.76it/s][A
 69%|█████████████████████▉          | 84018/122310 [2:52:50<1:11:42,  8.90it/s][A
 69%|█████████████████████▉          | 84027/122310 [2:52:51<1:08:28,  9.32it/s][A
 69%|█████████████████████▉          | 84037/122310 [2:52:52<1:04:00,  9.97it/s][A
 69%|█████████████████████▉          | 84046/122310 [2:52:53<1:03:23, 10.06it/s][A
 69%|███████████████████████▎          | 84060/122310 [2:52:54<54:05, 11.79

step: 39900, loss: 97.82742542689164, epoch: 2



 69%|██████████████████████          | 84122/122310 [2:53:01<1:17:28,  8.22it/s][A
 69%|██████████████████████          | 84130/122310 [2:53:02<1:14:46,  8.51it/s][A
 69%|██████████████████████          | 84136/122310 [2:53:03<1:19:29,  8.00it/s][A
 69%|██████████████████████          | 84145/122310 [2:53:04<1:13:10,  8.69it/s][A
 69%|██████████████████████          | 84154/122310 [2:53:05<1:09:20,  9.17it/s][A
 69%|██████████████████████          | 84165/122310 [2:53:06<1:02:18, 10.20it/s][A
 69%|██████████████████████          | 84170/122310 [2:53:07<1:11:41,  8.87it/s][A
 69%|██████████████████████          | 84180/122310 [2:53:08<1:05:56,  9.64it/s][A
 69%|██████████████████████          | 84185/122310 [2:53:08<1:15:20,  8.43it/s][A
 69%|██████████████████████          | 84196/122310 [2:53:09<1:05:48,  9.65it/s][A
 69%|███████████████████████▍          | 84208/122310 [2:53:10<58:35, 10.84it/s][A
 69%|███████████████████████▍          | 84222/122310 [2:53:11<51:52, 12.24

step: 39920, loss: 73.15615184447375, epoch: 2



 69%|███████████████████████▍          | 84310/122310 [2:53:19<55:08, 11.48it/s][A
 69%|██████████████████████          | 84317/122310 [2:53:20<1:00:28, 10.47it/s][A
 69%|███████████████████████▍          | 84332/122310 [2:53:21<51:01, 12.40it/s][A
 69%|██████████████████████          | 84337/122310 [2:53:22<1:00:39, 10.43it/s][A
 69%|██████████████████████          | 84341/122310 [2:53:23<1:13:39,  8.59it/s][A
 69%|██████████████████████          | 84348/122310 [2:53:23<1:15:08,  8.42it/s][A
 69%|██████████████████████          | 84360/122310 [2:53:24<1:03:36,  9.94it/s][A
 69%|███████████████████████▍          | 84373/122310 [2:53:25<55:39, 11.36it/s][A
 69%|██████████████████████          | 84379/122310 [2:53:26<1:03:31,  9.95it/s][A
 69%|██████████████████████          | 84389/122310 [2:53:27<1:02:03, 10.19it/s][A
 69%|███████████████████████▍          | 84405/122310 [2:53:28<50:26, 12.52it/s][A
 69%|███████████████████████▍          | 84415/122310 [2:53:29<51:43, 12.21

step: 39940, loss: 79.31986137087395, epoch: 2



 69%|██████████████████████          | 84478/122310 [2:53:37<1:19:26,  7.94it/s][A
 69%|██████████████████████          | 84487/122310 [2:53:38<1:12:52,  8.65it/s][A
 69%|██████████████████████          | 84500/122310 [2:53:38<1:00:07, 10.48it/s][A
 69%|██████████████████████          | 84508/122310 [2:53:39<1:02:55, 10.01it/s][A
 69%|██████████████████████          | 84513/122310 [2:53:40<1:12:23,  8.70it/s][A
 69%|██████████████████████          | 84524/122310 [2:53:41<1:04:14,  9.80it/s][A
 69%|██████████████████████          | 84533/122310 [2:53:42<1:03:22,  9.94it/s][A
 69%|██████████████████████          | 84541/122310 [2:53:43<1:04:58,  9.69it/s][A
 69%|███████████████████████▌          | 84552/122310 [2:53:44<59:32, 10.57it/s][A
 69%|███████████████████████▌          | 84562/122310 [2:53:45<58:09, 10.82it/s][A
 69%|███████████████████████▌          | 84572/122310 [2:53:45<57:12, 11.00it/s][A
 69%|███████████████████████▌          | 84588/122310 [2:53:46<47:41, 13.18

step: 39960, loss: 88.45154771894384, epoch: 2



 69%|██████████████████████▏         | 84663/122310 [2:53:54<1:20:10,  7.83it/s][A
 69%|██████████████████████▏         | 84677/122310 [2:53:55<1:01:21, 10.22it/s][A
 69%|███████████████████████▌          | 84691/122310 [2:53:56<53:41, 11.68it/s][A
 69%|███████████████████████▌          | 84705/122310 [2:53:57<49:35, 12.64it/s][A
 69%|███████████████████████▌          | 84715/122310 [2:53:58<52:19, 11.97it/s][A
 69%|███████████████████████▌          | 84722/122310 [2:53:59<58:58, 10.62it/s][A
 69%|██████████████████████▏         | 84724/122310 [2:54:00<1:17:25,  8.09it/s][A
 69%|██████████████████████▏         | 84732/122310 [2:54:01<1:15:59,  8.24it/s][A
 69%|██████████████████████▏         | 84738/122310 [2:54:02<1:20:35,  7.77it/s][A
 69%|██████████████████████▏         | 84747/122310 [2:54:03<1:15:05,  8.34it/s][A
 69%|██████████████████████▏         | 84754/122310 [2:54:04<1:16:23,  8.19it/s][A
 69%|██████████████████████▏         | 84759/122310 [2:54:04<1:24:16,  7.43

step: 39980, loss: 77.40177808561566, epoch: 2



 69%|██████████████████████▏         | 84836/122310 [2:54:12<1:15:48,  8.24it/s][A
 69%|██████████████████████▏         | 84848/122310 [2:54:13<1:03:25,  9.84it/s][A
 69%|██████████████████████▏         | 84850/122310 [2:54:14<1:21:48,  7.63it/s][A
 69%|██████████████████████▏         | 84861/122310 [2:54:15<1:08:55,  9.05it/s][A
 69%|██████████████████████▏         | 84865/122310 [2:54:16<1:20:39,  7.74it/s][A
 69%|██████████████████████▏         | 84871/122310 [2:54:17<1:23:20,  7.49it/s][A
 69%|███████████████████████▌          | 84887/122310 [2:54:18<58:03, 10.74it/s][A
 69%|███████████████████████▌          | 84899/122310 [2:54:19<53:35, 11.63it/s][A
 69%|██████████████████████▏         | 84905/122310 [2:54:19<1:01:03, 10.21it/s][A
 69%|██████████████████████▏         | 84911/122310 [2:54:20<1:07:27,  9.24it/s][A
 69%|██████████████████████▏         | 84918/122310 [2:54:21<1:10:25,  8.85it/s][A
 69%|██████████████████████▏         | 84926/122310 [2:54:23<1:32:48,  6.71

step: 40000, loss: 74.14210804682803, epoch: 2
sim1 and sim2 are 0.448061078896882, 0.2014848950997317
cosine of pred and queen: 0.19445169982504684
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: maladie
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: scooper
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: hollande
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: ireland
Actual: mumbai:asia::nairobi:africa, pred: population
Actual

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: meditated
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: respect
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: karnataka
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar
Actual: 


 69%|█████████████████████▌         | 84983/122310 [2:56:01<33:26:28,  3.23s/it][A
 69%|█████████████████████▌         | 84993/122310 [2:56:01<22:47:15,  2.20s/it][A
 69%|█████████████████████▌         | 85003/122310 [2:56:02<15:50:09,  1.53s/it][A
 70%|█████████████████████▌         | 85010/122310 [2:56:04<12:23:46,  1.20s/it][A
 70%|█████████████████████▌         | 85015/122310 [2:56:05<10:21:36,  1.00s/it][A
 70%|██████████████████████▏         | 85019/122310 [2:56:06<8:57:30,  1.16it/s][A
 70%|██████████████████████▏         | 85028/122310 [2:56:07<6:03:58,  1.71it/s][A
 70%|██████████████████████▏         | 85040/122310 [2:56:08<3:56:50,  2.62it/s][A
 70%|██████████████████████▎         | 85047/122310 [2:56:09<3:20:02,  3.10it/s][A
 70%|██████████████████████▎         | 85061/122310 [2:56:10<2:16:40,  4.54it/s][A
 70%|██████████████████████▎         | 85068/122310 [2:56:11<2:06:43,  4.90it/s][A
 70%|██████████████████████▎         | 85074/122310 [2:56:12<2:02:27,  5.07

step: 40020, loss: 72.6304626343293, epoch: 2



 70%|██████████████████████▎         | 85167/122310 [2:56:22<1:08:41,  9.01it/s][A
 70%|██████████████████████▎         | 85178/122310 [2:56:23<1:02:47,  9.85it/s][A
 70%|██████████████████████▎         | 85180/122310 [2:56:24<1:21:10,  7.62it/s][A
 70%|██████████████████████▎         | 85182/122310 [2:56:25<1:43:12,  6.00it/s][A
 70%|██████████████████████▎         | 85192/122310 [2:56:25<1:23:30,  7.41it/s][A
 70%|███████████████████████▋          | 85209/122310 [2:56:26<58:03, 10.65it/s][A
 70%|███████████████████████▋          | 85221/122310 [2:56:27<54:12, 11.40it/s][A
 70%|██████████████████████▎         | 85226/122310 [2:56:28<1:03:53,  9.67it/s][A
 70%|███████████████████████▋          | 85238/122310 [2:56:29<57:34, 10.73it/s][A
 70%|██████████████████████▎         | 85246/122310 [2:56:30<1:00:40, 10.18it/s][A
 70%|███████████████████████▋          | 85260/122310 [2:56:31<53:14, 11.60it/s][A
 70%|███████████████████████▋          | 85268/122310 [2:56:32<57:47, 10.68

step: 40040, loss: 73.49317575618778, epoch: 2



 70%|██████████████████████▎         | 85318/122310 [2:56:40<1:43:08,  5.98it/s][A
 70%|██████████████████████▎         | 85324/122310 [2:56:41<1:40:10,  6.15it/s][A
 70%|██████████████████████▎         | 85336/122310 [2:56:42<1:18:14,  7.88it/s][A
 70%|██████████████████████▎         | 85349/122310 [2:56:43<1:07:31,  9.12it/s][A
 70%|███████████████████████▋          | 85362/122310 [2:56:44<59:59, 10.27it/s][A
 70%|██████████████████████▎         | 85368/122310 [2:56:45<1:08:47,  8.95it/s][A
 70%|██████████████████████▎         | 85375/122310 [2:56:46<1:14:03,  8.31it/s][A
 70%|██████████████████████▎         | 85380/122310 [2:56:47<1:22:28,  7.46it/s][A
 70%|██████████████████████▎         | 85391/122310 [2:56:48<1:11:02,  8.66it/s][A
 70%|██████████████████████▎         | 85399/122310 [2:56:49<1:11:43,  8.58it/s][A
 70%|██████████████████████▎         | 85408/122310 [2:56:50<1:09:20,  8.87it/s][A
 70%|███████████████████████▋          | 85424/122310 [2:56:51<54:37, 11.25

step: 40060, loss: 77.56441890999282, epoch: 2



 70%|███████████████████████▊          | 85510/122310 [2:56:59<56:11, 10.92it/s][A
 70%|██████████████████████▎         | 85512/122310 [2:57:00<1:13:21,  8.36it/s][A
 70%|██████████████████████▎         | 85516/122310 [2:57:01<1:25:54,  7.14it/s][A
 70%|███████████████████████▊          | 85533/122310 [2:57:02<58:52, 10.41it/s][A
 70%|██████████████████████▍         | 85540/122310 [2:57:03<1:04:19,  9.53it/s][A
 70%|██████████████████████▍         | 85549/122310 [2:57:04<1:04:11,  9.55it/s][A
 70%|██████████████████████▍         | 85557/122310 [2:57:05<1:06:04,  9.27it/s][A
 70%|██████████████████████▍         | 85565/122310 [2:57:06<1:08:01,  9.00it/s][A
 70%|██████████████████████▍         | 85577/122310 [2:57:07<1:00:25, 10.13it/s][A
 70%|███████████████████████▊          | 85588/122310 [2:57:08<57:47, 10.59it/s][A
 70%|███████████████████████▊          | 85598/122310 [2:57:09<57:50, 10.58it/s][A
 70%|███████████████████████▊          | 85610/122310 [2:57:10<54:40, 11.19

step: 40080, loss: 73.40037726421934, epoch: 2



 70%|██████████████████████▍         | 85685/122310 [2:57:18<1:05:33,  9.31it/s][A
 70%|██████████████████████▍         | 85689/122310 [2:57:19<1:17:22,  7.89it/s][A
 70%|██████████████████████▍         | 85701/122310 [2:57:20<1:03:41,  9.58it/s][A
 70%|██████████████████████▍         | 85711/122310 [2:57:21<1:00:23, 10.10it/s][A
 70%|██████████████████████▍         | 85719/122310 [2:57:21<1:02:24,  9.77it/s][A
 70%|███████████████████████▊          | 85730/122310 [2:57:22<57:39, 10.57it/s][A
 70%|███████████████████████▊          | 85740/122310 [2:57:23<56:42, 10.75it/s][A
 70%|███████████████████████▊          | 85749/122310 [2:57:24<57:51, 10.53it/s][A
 70%|██████████████████████▍         | 85755/122310 [2:57:25<1:04:56,  9.38it/s][A
 70%|██████████████████████▍         | 85765/122310 [2:57:26<1:01:35,  9.89it/s][A
 70%|██████████████████████▍         | 85770/122310 [2:57:27<1:10:32,  8.63it/s][A
 70%|██████████████████████▍         | 85778/122310 [2:57:28<1:10:49,  8.60

step: 40100, loss: 78.43102338624561, epoch: 2



 70%|███████████████████████▊          | 85856/122310 [2:57:36<57:50, 10.50it/s][A
 70%|███████████████████████▊          | 85867/122310 [2:57:37<55:26, 10.96it/s][A
 70%|███████████████████████▊          | 85878/122310 [2:57:38<53:26, 11.36it/s][A
 70%|███████████████████████▊          | 85885/122310 [2:57:38<58:51, 10.31it/s][A
 70%|███████████████████████▉          | 85896/122310 [2:57:39<55:40, 10.90it/s][A
 70%|███████████████████████▉          | 85906/122310 [2:57:40<55:16, 10.98it/s][A
 70%|██████████████████████▍         | 85910/122310 [2:57:41<1:07:50,  8.94it/s][A
 70%|██████████████████████▍         | 85916/122310 [2:57:42<1:13:23,  8.26it/s][A
 70%|██████████████████████▍         | 85925/122310 [2:57:43<1:08:43,  8.82it/s][A
 70%|██████████████████████▍         | 85931/122310 [2:57:44<1:13:35,  8.24it/s][A
 70%|██████████████████████▍         | 85938/122310 [2:57:45<1:14:35,  8.13it/s][A
 70%|██████████████████████▍         | 85941/122310 [2:57:46<1:30:42,  6.68

step: 40120, loss: 88.60388066691907, epoch: 2



 70%|██████████████████████▌         | 86018/122310 [2:57:54<1:12:15,  8.37it/s][A
 70%|██████████████████████▌         | 86024/122310 [2:57:54<1:17:00,  7.85it/s][A
 70%|██████████████████████▌         | 86033/122310 [2:57:55<1:11:13,  8.49it/s][A
 70%|██████████████████████▌         | 86045/122310 [2:57:56<1:00:44,  9.95it/s][A
 70%|██████████████████████▌         | 86053/122310 [2:57:57<1:02:25,  9.68it/s][A
 70%|███████████████████████▉          | 86071/122310 [2:57:58<47:00, 12.85it/s][A
 70%|███████████████████████▉          | 86082/122310 [2:57:59<47:19, 12.76it/s][A
 70%|███████████████████████▉          | 86093/122310 [2:58:00<47:53, 12.60it/s][A
 70%|███████████████████████▉          | 86107/122310 [2:58:01<44:25, 13.58it/s][A
 70%|███████████████████████▉          | 86114/122310 [2:58:02<50:44, 11.89it/s][A
 70%|██████████████████████▌         | 86117/122310 [2:58:02<1:04:40,  9.33it/s][A
 70%|██████████████████████▌         | 86123/122310 [2:58:03<1:10:19,  8.58

step: 40140, loss: 68.29080817602237, epoch: 2



 70%|███████████████████████▉          | 86224/122310 [2:58:11<58:48, 10.23it/s][A
 71%|███████████████████████▉          | 86233/122310 [2:58:12<59:36, 10.09it/s][A
 71%|██████████████████████▌         | 86241/122310 [2:58:13<1:02:53,  9.56it/s][A
 71%|██████████████████████▌         | 86249/122310 [2:58:14<1:07:48,  8.86it/s][A
 71%|██████████████████████▌         | 86257/122310 [2:58:15<1:10:50,  8.48it/s][A
 71%|██████████████████████▌         | 86264/122310 [2:58:16<1:15:06,  8.00it/s][A
 71%|██████████████████████▌         | 86265/122310 [2:58:17<1:41:07,  5.94it/s][A
 71%|██████████████████████▌         | 86266/122310 [2:58:18<2:14:45,  4.46it/s][A
 71%|██████████████████████▌         | 86271/122310 [2:58:21<3:27:01,  2.90it/s][A
 71%|██████████████████████▌         | 86273/122310 [2:58:22<3:42:29,  2.70it/s][A
 71%|██████████████████████▌         | 86279/122310 [2:58:23<2:53:11,  3.47it/s][A
 71%|██████████████████████▌         | 86286/122310 [2:58:24<2:16:36,  4.39

step: 40160, loss: 72.47178399056502, epoch: 2



 71%|██████████████████████▌         | 86343/122310 [2:58:31<1:14:02,  8.10it/s][A
 71%|██████████████████████▌         | 86346/122310 [2:58:32<1:28:17,  6.79it/s][A
 71%|██████████████████████▌         | 86362/122310 [2:58:33<1:00:06,  9.97it/s][A
 71%|████████████████████████          | 86375/122310 [2:58:34<52:26, 11.42it/s][A
 71%|██████████████████████▌         | 86383/122310 [2:58:35<1:12:03,  8.31it/s][A
 71%|██████████████████████▌         | 86387/122310 [2:58:36<1:21:07,  7.38it/s][A
 71%|██████████████████████▌         | 86400/122310 [2:58:37<1:04:17,  9.31it/s][A
 71%|██████████████████████▌         | 86405/122310 [2:58:38<1:12:07,  8.30it/s][A
 71%|██████████████████████▌         | 86417/122310 [2:58:39<1:01:42,  9.69it/s][A
 71%|██████████████████████▌         | 86423/122310 [2:58:40<1:08:00,  8.79it/s][A
 71%|████████████████████████          | 86444/122310 [2:58:41<47:10, 12.67it/s][A
 71%|██████████████████████▌         | 86446/122310 [2:58:42<1:05:40,  9.10

step: 40180, loss: 78.82825806957065, epoch: 2



 71%|██████████████████████▋         | 86521/122310 [2:58:50<1:19:39,  7.49it/s][A
 71%|██████████████████████▋         | 86533/122310 [2:58:51<1:09:17,  8.61it/s][A
 71%|██████████████████████▋         | 86535/122310 [2:58:52<1:30:27,  6.59it/s][A
 71%|██████████████████████▋         | 86544/122310 [2:58:53<1:19:56,  7.46it/s][A
 71%|██████████████████████▋         | 86557/122310 [2:58:54<1:05:39,  9.08it/s][A
 71%|██████████████████████▋         | 86563/122310 [2:58:55<1:11:31,  8.33it/s][A
 71%|██████████████████████▋         | 86574/122310 [2:58:56<1:04:56,  9.17it/s][A
 71%|██████████████████████▋         | 86582/122310 [2:58:57<1:06:25,  8.96it/s][A
 71%|██████████████████████▋         | 86589/122310 [2:58:58<1:11:08,  8.37it/s][A
 71%|██████████████████████▋         | 86597/122310 [2:58:59<1:10:14,  8.47it/s][A
 71%|██████████████████████▋         | 86610/122310 [2:59:00<1:00:05,  9.90it/s][A
 71%|██████████████████████▋         | 86618/122310 [2:59:01<1:02:08,  9.57

step: 40200, loss: 70.914503592118, epoch: 2



 71%|██████████████████████▋         | 86688/122310 [2:59:09<1:22:12,  7.22it/s][A
 71%|██████████████████████▋         | 86701/122310 [2:59:10<1:04:40,  9.18it/s][A
 71%|██████████████████████▋         | 86707/122310 [2:59:11<1:13:17,  8.10it/s][A
 71%|██████████████████████▋         | 86717/122310 [2:59:12<1:07:39,  8.77it/s][A
 71%|████████████████████████          | 86733/122310 [2:59:13<53:20, 11.12it/s][A
 71%|████████████████████████          | 86743/122310 [2:59:14<53:41, 11.04it/s][A
 71%|██████████████████████▋         | 86747/122310 [2:59:15<1:05:07,  9.10it/s][A
 71%|██████████████████████▋         | 86752/122310 [2:59:16<1:13:24,  8.07it/s][A
 71%|██████████████████████▋         | 86760/122310 [2:59:17<1:12:34,  8.16it/s][A
 71%|████████████████████████          | 86781/122310 [2:59:18<47:29, 12.47it/s][A
 71%|████████████████████████▏         | 86789/122310 [2:59:19<52:32, 11.27it/s][A
 71%|████████████████████████▏         | 86803/122310 [2:59:20<48:58, 12.08

step: 40220, loss: 79.37622882350111, epoch: 2



 71%|████████████████████████▏         | 86903/122310 [2:59:29<59:12,  9.97it/s][A
 71%|██████████████████████▋         | 86908/122310 [2:59:30<1:10:20,  8.39it/s][A
 71%|██████████████████████▋         | 86916/122310 [2:59:31<1:12:32,  8.13it/s][A
 71%|████████████████████████▏         | 86930/122310 [2:59:32<58:45, 10.04it/s][A
 71%|██████████████████████▋         | 86938/122310 [2:59:33<1:01:57,  9.51it/s][A
 71%|██████████████████████▋         | 86944/122310 [2:59:34<1:09:16,  8.51it/s][A
 71%|██████████████████████▊         | 86956/122310 [2:59:35<1:00:45,  9.70it/s][A
 71%|██████████████████████▊         | 86961/122310 [2:59:36<1:11:40,  8.22it/s][A
 71%|██████████████████████▊         | 86970/122310 [2:59:37<1:10:40,  8.33it/s][A
 71%|██████████████████████▊         | 86982/122310 [2:59:38<1:02:48,  9.37it/s][A
 71%|██████████████████████▊         | 86989/122310 [2:59:39<1:06:58,  8.79it/s][A
 71%|████████████████████████▏         | 87003/122310 [2:59:40<56:18, 10.45

step: 40240, loss: 74.78246490016537, epoch: 2



 71%|██████████████████████▊         | 87087/122310 [2:59:49<1:01:21,  9.57it/s][A
 71%|██████████████████████▊         | 87092/122310 [2:59:50<1:10:18,  8.35it/s][A
 71%|██████████████████████▊         | 87099/122310 [2:59:51<1:12:41,  8.07it/s][A
 71%|██████████████████████▊         | 87110/122310 [2:59:52<1:04:25,  9.11it/s][A
 71%|██████████████████████▊         | 87113/122310 [2:59:53<1:19:03,  7.42it/s][A
 71%|██████████████████████▊         | 87118/122310 [2:59:54<1:26:10,  6.81it/s][A
 71%|██████████████████████▊         | 87123/122310 [2:59:54<1:31:51,  6.38it/s][A
 71%|████████████████████████▏         | 87140/122310 [2:59:55<59:27,  9.86it/s][A
 71%|████████████████████████▏         | 87150/122310 [2:59:56<57:56, 10.11it/s][A
 71%|████████████████████████▏         | 87159/122310 [2:59:57<58:29, 10.02it/s][A
 71%|████████████████████████▏         | 87170/122310 [2:59:58<56:28, 10.37it/s][A
 71%|██████████████████████▊         | 87174/122310 [2:59:59<1:07:46,  8.64

step: 40260, loss: 89.00672029978213, epoch: 2



 71%|██████████████████████▊         | 87224/122310 [3:00:07<1:25:52,  6.81it/s][A
 71%|██████████████████████▊         | 87232/122310 [3:00:08<1:19:56,  7.31it/s][A
 71%|██████████████████████▊         | 87242/122310 [3:00:09<1:09:57,  8.36it/s][A
 71%|██████████████████████▊         | 87249/122310 [3:00:10<1:12:30,  8.06it/s][A
 71%|████████████████████████▎         | 87262/122310 [3:00:11<59:52,  9.76it/s][A
 71%|████████████████████████▎         | 87272/122310 [3:00:12<58:28,  9.99it/s][A
 71%|██████████████████████▊         | 87273/122310 [3:00:13<1:19:26,  7.35it/s][A
 71%|████████████████████████▎         | 87288/122310 [3:00:14<58:56,  9.90it/s][A
 71%|████████████████████████▎         | 87298/122310 [3:00:15<58:04, 10.05it/s][A
 71%|██████████████████████▊         | 87304/122310 [3:00:16<1:06:15,  8.80it/s][A
 71%|██████████████████████▊         | 87307/122310 [3:00:17<1:24:22,  6.91it/s][A
 71%|██████████████████████▊         | 87314/122310 [3:00:18<1:24:04,  6.94

step: 40280, loss: 78.59002766853432, epoch: 2



 71%|██████████████████████▊         | 87383/122310 [3:00:27<1:02:58,  9.24it/s][A
 71%|████████████████████████▎         | 87394/122310 [3:00:28<59:12,  9.83it/s][A
 71%|██████████████████████▊         | 87403/122310 [3:00:29<1:00:35,  9.60it/s][A
 71%|██████████████████████▊         | 87409/122310 [3:00:30<1:07:00,  8.68it/s][A
 71%|██████████████████████▊         | 87420/122310 [3:00:30<1:00:46,  9.57it/s][A
 71%|████████████████████████▎         | 87434/122310 [3:00:31<52:11, 11.14it/s][A
 71%|██████████████████████▉         | 87438/122310 [3:00:32<1:02:29,  9.30it/s][A
 72%|████████████████████████▎         | 87454/122310 [3:00:33<49:05, 11.83it/s][A
 72%|████████████████████████▎         | 87469/122310 [3:00:34<43:37, 13.31it/s][A
 72%|████████████████████████▎         | 87477/122310 [3:00:35<48:42, 11.92it/s][A
 72%|████████████████████████▎         | 87487/122310 [3:00:36<50:00, 11.60it/s][A
 72%|████████████████████████▎         | 87497/122310 [3:00:37<50:59, 11.38

step: 40300, loss: 70.97072554178818, epoch: 2



 72%|████████████████████████▎         | 87577/122310 [3:00:45<58:29,  9.90it/s][A
 72%|██████████████████████▉         | 87585/122310 [3:00:46<1:01:47,  9.37it/s][A
 72%|████████████████████████▎         | 87599/122310 [3:00:47<53:34, 10.80it/s][A
 72%|██████████████████████▉         | 87604/122310 [3:00:48<1:03:22,  9.13it/s][A
 72%|██████████████████████▉         | 87613/122310 [3:00:49<1:02:07,  9.31it/s][A
 72%|██████████████████████▉         | 87620/122310 [3:00:50<1:05:27,  8.83it/s][A
 72%|██████████████████████▉         | 87629/122310 [3:00:51<1:03:18,  9.13it/s][A
 72%|██████████████████████▉         | 87637/122310 [3:00:52<1:04:13,  9.00it/s][A
 72%|████████████████████████▎         | 87650/122310 [3:00:53<54:41, 10.56it/s][A
 72%|██████████████████████▉         | 87654/122310 [3:00:54<1:06:13,  8.72it/s][A
 72%|████████████████████████▎         | 87670/122310 [3:00:55<51:38, 11.18it/s][A
 72%|████████████████████████▎         | 87678/122310 [3:00:56<55:29, 10.40

step: 40320, loss: 85.40521975313321, epoch: 2



 72%|██████████████████████▉         | 87727/122310 [3:01:04<1:38:49,  5.83it/s][A
 72%|██████████████████████▉         | 87732/122310 [3:01:05<1:42:36,  5.62it/s][A
 72%|██████████████████████▉         | 87743/122310 [3:01:06<1:19:43,  7.23it/s][A
 72%|██████████████████████▉         | 87754/122310 [3:01:07<1:07:57,  8.47it/s][A
 72%|████████████████████████▍         | 87768/122310 [3:01:08<56:59, 10.10it/s][A
 72%|██████████████████████▉         | 87770/122310 [3:01:09<1:13:23,  7.84it/s][A
 72%|██████████████████████▉         | 87776/122310 [3:01:10<1:17:34,  7.42it/s][A
 72%|██████████████████████▉         | 87787/122310 [3:01:11<1:07:46,  8.49it/s][A
 72%|██████████████████████▉         | 87796/122310 [3:01:12<1:05:09,  8.83it/s][A
 72%|██████████████████████▉         | 87801/122310 [3:01:13<1:13:25,  7.83it/s][A
 72%|██████████████████████▉         | 87812/122310 [3:01:14<1:04:36,  8.90it/s][A
 72%|██████████████████████▉         | 87814/122310 [3:01:15<1:25:31,  6.72

step: 40340, loss: 85.51001826422784, epoch: 2



 72%|██████████████████████▉         | 87881/122310 [3:01:23<1:18:38,  7.30it/s][A
 72%|██████████████████████▉         | 87890/122310 [3:01:24<1:13:10,  7.84it/s][A
 72%|██████████████████████▉         | 87899/122310 [3:01:25<1:08:43,  8.34it/s][A
 72%|██████████████████████▉         | 87905/122310 [3:01:26<1:13:53,  7.76it/s][A
 72%|███████████████████████         | 87912/122310 [3:01:27<1:14:29,  7.70it/s][A
 72%|███████████████████████         | 87915/122310 [3:01:28<1:33:06,  6.16it/s][A
 72%|███████████████████████         | 87920/122310 [3:01:30<2:09:17,  4.43it/s][A
 72%|███████████████████████         | 87930/122310 [3:01:31<1:36:56,  5.91it/s][A
 72%|███████████████████████         | 87936/122310 [3:01:32<1:34:44,  6.05it/s][A
 72%|███████████████████████         | 87947/122310 [3:01:33<1:15:54,  7.55it/s][A
 72%|███████████████████████         | 87952/122310 [3:01:34<1:22:24,  6.95it/s][A
 72%|███████████████████████         | 87958/122310 [3:01:35<1:23:36,  6.85

step: 40360, loss: 113.26201067813389, epoch: 2



 72%|███████████████████████         | 88021/122310 [3:01:43<1:20:59,  7.06it/s][A
 72%|███████████████████████         | 88032/122310 [3:01:44<1:07:49,  8.42it/s][A
 72%|███████████████████████         | 88040/122310 [3:01:45<1:09:06,  8.26it/s][A
 72%|███████████████████████         | 88041/122310 [3:01:46<1:33:19,  6.12it/s][A
 72%|███████████████████████         | 88043/122310 [3:01:46<1:55:03,  4.96it/s][A
 72%|███████████████████████         | 88045/122310 [3:01:47<2:18:24,  4.13it/s][A
 72%|███████████████████████         | 88054/122310 [3:01:48<1:40:43,  5.67it/s][A
 72%|███████████████████████         | 88059/122310 [3:01:49<1:42:31,  5.57it/s][A
 72%|███████████████████████         | 88071/122310 [3:01:50<1:14:05,  7.70it/s][A
 72%|███████████████████████         | 88076/122310 [3:01:51<1:21:22,  7.01it/s][A
 72%|███████████████████████         | 88080/122310 [3:01:52<1:32:14,  6.19it/s][A
 72%|███████████████████████         | 88090/122310 [3:01:53<1:15:57,  7.51

step: 40380, loss: 68.89339876309751, epoch: 2



 72%|███████████████████████         | 88161/122310 [3:02:02<1:04:21,  8.84it/s][A
 72%|████████████████████████▌         | 88173/122310 [3:02:03<56:17, 10.11it/s][A
 72%|████████████████████████▌         | 88188/122310 [3:02:03<47:24, 12.00it/s][A
 72%|████████████████████████▌         | 88195/122310 [3:02:04<53:17, 10.67it/s][A
 72%|████████████████████████▌         | 88211/122310 [3:02:05<44:39, 12.72it/s][A
 72%|████████████████████████▌         | 88223/122310 [3:02:06<44:15, 12.84it/s][A
 72%|████████████████████████▌         | 88234/122310 [3:02:07<45:03, 12.60it/s][A
 72%|████████████████████████▌         | 88238/122310 [3:02:08<57:21,  9.90it/s][A
 72%|███████████████████████         | 88245/122310 [3:02:09<1:02:17,  9.11it/s][A
 72%|████████████████████████▌         | 88257/122310 [3:02:10<56:15, 10.09it/s][A
 72%|███████████████████████         | 88261/122310 [3:02:11<1:07:12,  8.44it/s][A
 72%|████████████████████████▌         | 88272/122310 [3:02:12<59:39,  9.51

step: 40400, loss: 78.43357458795349, epoch: 2
sim1 and sim2 are 0.4475030717111919, 0.19945724138292417
cosine of pred and queen: 0.19580946049999673
Actual: athens:greece::madrid:spain, pred: spain
Actual: bangkok:thailand::islamabad:pakistan, pred: pakistan
Actual: beijing:china::tokyo:japan, pred: japan
Actual: berlin:germany::rome:italy, pred: italy
Actual: cairo:egypt::ottawa:canada, pred: withs
Actual: kabul:afghanistan::hanoi:vietnam, pred: maladie
Actual: canberra:australia::doha:qatar, pred: qatar
Actual: stockholm:sweden::hanoi:vietnam, pred: henning
Actual: budapest:hungary::kathmandu:nepal, pred: attractiue
Actual: dhaka:bangladesh::kiev:ukraine, pred: ukraine
Actual: lisbon:portugal::riga:latvia, pred: aesthetes
Actual: india:asia::paris:europe, pred: hollande
Actual: china:asia::greece:europe, pred: athens
Actual: nigeria:africa::france:europe, pred: germany
Actual: kenya:africa::netherlands:europe, pred: ireland
Actual: mumbai:asia::nairobi:africa, pred: tickling
Actual

Actual: decided:undecided::pleasant:unpleasant, pred: immobility
Actual: cairo:egypt::manila:philippines, pred: lekakou
Actual: canberra:australia::dushanbe:tajikistan, pred: freighter
Actual: islamabad:pakistan::oslo:norway, pred: norway
Actual: grandfather:grandmother::father:mother, pred: brother
Actual: grandpa:grandma::sons:daughters, pred: brethren
Actual: king:queen::husband:wife, pred: respect
Actual: man:woman::brothers:sisters, pred: vicegerent
Actual: stepson:stepdaughter::stepfather:stepmother, pred: ravinder
Actual: uncle:aunt::grandson:granddaughter, pred: dumbness
Actual: fortunate:fortunately::efficient:efficiently, pred: shafik
Actual: free:freely::most:mostly, pred: acquitting
Actual: maharastra:india::kerala:india, pred: indian
Actual: maharastra:mumbai::kerala:thiruvananthapuram, pred: karnataka
Actual: tripura:agartala::odisha:bhubaneswar, pred: bhubaneswar
Actual: algeria:dinar::japan:yen, pred: assistants
Actual: argentina:peso::japan:yen, pred: dollar



 72%|██████████████████████▍        | 88338/122310 [3:03:57<36:53:34,  3.91s/it][A

Actual: india:rupee::denmark:krone, pred: finland
Accuracy is 0.14201183431952663



 72%|██████████████████████▍        | 88345/122310 [3:03:58<26:55:18,  2.85s/it][A
 72%|██████████████████████▍        | 88363/122310 [3:03:59<13:31:06,  1.43s/it][A
 72%|██████████████████████▍        | 88371/122310 [3:04:00<10:30:45,  1.12s/it][A
 72%|███████████████████████         | 88376/122310 [3:04:01<8:58:02,  1.05it/s][A
 72%|███████████████████████▏        | 88392/122310 [3:04:02<5:11:55,  1.81it/s][A
 72%|███████████████████████▏        | 88397/122310 [3:04:03<4:38:12,  2.03it/s][A
 72%|███████████████████████▏        | 88408/122310 [3:04:04<3:19:50,  2.83it/s][A
 72%|███████████████████████▏        | 88421/122310 [3:04:05<2:22:01,  3.98it/s][A
 72%|███████████████████████▏        | 88435/122310 [3:04:06<1:44:54,  5.38it/s][A
 72%|███████████████████████▏        | 88441/122310 [3:04:07<1:43:03,  5.48it/s][A
 72%|███████████████████████▏        | 88446/122310 [3:04:08<1:44:34,  5.40it/s][A
 72%|███████████████████████▏        | 88457/122310 [3:04:09<1:25:28,  6.60

step: 40420, loss: 63.595077266970804, epoch: 2



 72%|███████████████████████▏        | 88526/122310 [3:04:18<1:15:04,  7.50it/s][A
 72%|███████████████████████▏        | 88539/122310 [3:04:19<1:02:30,  9.00it/s][A
 72%|███████████████████████▏        | 88543/122310 [3:04:20<1:15:46,  7.43it/s][A
 72%|███████████████████████▏        | 88545/122310 [3:04:22<2:04:52,  4.51it/s][A
 72%|███████████████████████▏        | 88555/122310 [3:04:23<1:35:39,  5.88it/s][A
 72%|███████████████████████▏        | 88561/122310 [3:04:24<1:35:18,  5.90it/s][A
 72%|███████████████████████▏        | 88570/122310 [3:04:25<1:24:17,  6.67it/s][A
 72%|███████████████████████▏        | 88582/122310 [3:04:26<1:10:29,  7.97it/s][A
 72%|███████████████████████▏        | 88587/122310 [3:04:28<1:41:04,  5.56it/s][A
 72%|███████████████████████▏        | 88590/122310 [3:04:29<1:52:43,  4.99it/s][A
 72%|███████████████████████▏        | 88598/122310 [3:04:30<1:35:58,  5.85it/s][A
 72%|███████████████████████▏        | 88605/122310 [3:04:31<1:30:15,  6.22

step: 40440, loss: 73.92833585525645, epoch: 2



 73%|███████████████████████▏        | 88676/122310 [3:04:38<1:01:14,  9.15it/s][A
 73%|███████████████████████▏        | 88681/122310 [3:04:39<1:10:02,  8.00it/s][A
 73%|███████████████████████▏        | 88691/122310 [3:04:40<1:04:58,  8.62it/s][A
 73%|███████████████████████▏        | 88700/122310 [3:04:41<1:03:55,  8.76it/s][A
 73%|████████████████████████▋         | 88714/122310 [3:04:42<54:31, 10.27it/s][A
 73%|████████████████████████▋         | 88725/122310 [3:04:43<53:06, 10.54it/s][A
 73%|████████████████████████▋         | 88736/122310 [3:04:44<52:09, 10.73it/s][A
 73%|███████████████████████▏        | 88739/122310 [3:04:44<1:05:23,  8.56it/s][A
 73%|████████████████████████▋         | 88752/122310 [3:04:45<56:07,  9.96it/s][A
 73%|███████████████████████▏        | 88757/122310 [3:04:47<1:07:09,  8.33it/s][A
 73%|███████████████████████▏        | 88764/122310 [3:04:48<1:10:23,  7.94it/s][A
 73%|███████████████████████▏        | 88768/122310 [3:04:48<1:21:48,  6.83

step: 40460, loss: 72.23323404428186, epoch: 2



 73%|███████████████████████▏        | 88860/122310 [3:04:57<1:04:07,  8.69it/s][A
 73%|███████████████████████▎        | 88866/122310 [3:04:58<1:09:24,  8.03it/s][A
 73%|████████████████████████▋         | 88883/122310 [3:04:59<51:02, 10.91it/s][A
 73%|████████████████████████▋         | 88893/122310 [3:05:00<51:35, 10.80it/s][A
 73%|████████████████████████▋         | 88915/122310 [3:05:01<38:28, 14.47it/s][A
 73%|████████████████████████▋         | 88922/122310 [3:05:02<45:11, 12.31it/s][A
 73%|███████████████████████▎        | 88927/122310 [3:05:04<1:10:26,  7.90it/s][A
 73%|███████████████████████▎        | 88935/122310 [3:05:05<1:09:44,  7.98it/s][A
 73%|███████████████████████▎        | 88943/122310 [3:05:06<1:09:35,  7.99it/s][A
 73%|███████████████████████▎        | 88952/122310 [3:05:07<1:06:19,  8.38it/s][A
 73%|████████████████████████▋         | 88964/122310 [3:05:08<58:22,  9.52it/s][A
 73%|████████████████████████▋         | 88974/122310 [3:05:09<56:32,  9.83

step: 40480, loss: 80.08680085950014, epoch: 2



 73%|████████████████████████▊         | 89046/122310 [3:05:17<46:49, 11.84it/s][A
 73%|████████████████████████▊         | 89057/122310 [3:05:18<46:21, 11.96it/s][A
 73%|████████████████████████▊         | 89070/122310 [3:05:19<45:16, 12.24it/s][A
 73%|████████████████████████▊         | 89082/122310 [3:05:20<45:21, 12.21it/s][A
 73%|████████████████████████▊         | 89090/122310 [3:05:21<50:29, 10.97it/s][A
 73%|████████████████████████▊         | 89103/122310 [3:05:22<47:27, 11.66it/s][A
 73%|████████████████████████▊         | 89123/122310 [3:05:23<38:34, 14.34it/s][A
 73%|████████████████████████▊         | 89130/122310 [3:05:24<46:29, 11.89it/s][A
 73%|████████████████████████▊         | 89141/122310 [3:05:25<47:46, 11.57it/s][A
 73%|████████████████████████▊         | 89153/122310 [3:05:26<46:30, 11.88it/s][A
 73%|████████████████████████▊         | 89159/122310 [3:05:27<53:37, 10.30it/s][A
 73%|███████████████████████▎        | 89163/122310 [3:05:28<1:04:03,  8.62

step: 40500, loss: 99.27689447904763, epoch: 2



 73%|███████████████████████▎        | 89223/122310 [3:05:36<1:04:53,  8.50it/s][A
 73%|████████████████████████▊         | 89235/122310 [3:05:37<58:02,  9.50it/s][A
 73%|████████████████████████▊         | 89249/122310 [3:05:38<50:45, 10.86it/s][A
 73%|████████████████████████▊         | 89256/122310 [3:05:39<57:06,  9.65it/s][A
 73%|████████████████████████▊         | 89266/122310 [3:05:40<56:17,  9.78it/s][A
 73%|████████████████████████▊         | 89284/122310 [3:05:41<45:01, 12.22it/s][A
 73%|████████████████████████▊         | 89287/122310 [3:05:42<57:34,  9.56it/s][A
 73%|████████████████████████▊         | 89299/122310 [3:05:43<51:53, 10.60it/s][A
 73%|████████████████████████▊         | 89313/122310 [3:05:44<46:56, 11.71it/s][A
 73%|████████████████████████▊         | 89328/122310 [3:05:45<43:58, 12.50it/s][A
 73%|████████████████████████▊         | 89337/122310 [3:05:46<49:08, 11.18it/s][A
 73%|███████████████████████▎        | 89339/122310 [3:05:47<1:05:52,  8.34

step: 40520, loss: 95.23860885674287, epoch: 2



 73%|███████████████████████▍        | 89394/122310 [3:05:56<1:25:34,  6.41it/s][A
 73%|███████████████████████▍        | 89399/122310 [3:05:57<1:29:41,  6.12it/s][A
 73%|███████████████████████▍        | 89407/122310 [3:05:58<1:21:07,  6.76it/s][A
 73%|███████████████████████▍        | 89411/122310 [3:05:59<1:31:40,  5.98it/s][A
 73%|███████████████████████▍        | 89423/122310 [3:06:00<1:08:10,  8.04it/s][A
 73%|███████████████████████▍        | 89426/122310 [3:06:01<1:24:29,  6.49it/s][A
 73%|███████████████████████▍        | 89432/122310 [3:06:01<1:23:48,  6.54it/s][A
 73%|███████████████████████▍        | 89441/122310 [3:06:02<1:13:36,  7.44it/s][A
 73%|████████████████████████▊         | 89459/122310 [3:06:03<51:04, 10.72it/s][A
 73%|████████████████████████▊         | 89466/122310 [3:06:04<56:46,  9.64it/s][A
 73%|████████████████████████▊         | 89475/122310 [3:06:05<56:20,  9.71it/s][A
 73%|████████████████████████▉         | 89487/122310 [3:06:06<51:32, 10.61

step: 40540, loss: 71.30520862679445, epoch: 2



 73%|███████████████████████▍        | 89554/122310 [3:06:16<1:17:36,  7.03it/s][A
 73%|███████████████████████▍        | 89566/122310 [3:06:17<1:02:27,  8.74it/s][A
 73%|███████████████████████▍        | 89573/122310 [3:06:18<1:06:46,  8.17it/s][A
 73%|███████████████████████▍        | 89577/122310 [3:06:19<1:18:29,  6.95it/s][A
 73%|███████████████████████▍        | 89589/122310 [3:06:20<1:02:50,  8.68it/s][A
 73%|███████████████████████▍        | 89595/122310 [3:06:20<1:07:17,  8.10it/s][A
 73%|███████████████████████▍        | 89599/122310 [3:06:21<1:19:42,  6.84it/s][A
 73%|███████████████████████▍        | 89606/122310 [3:06:22<1:19:17,  6.87it/s][A
 73%|████████████████████████▉         | 89620/122310 [3:06:23<59:49,  9.11it/s][A
 73%|███████████████████████▍        | 89627/122310 [3:06:24<1:03:49,  8.53it/s][A
 73%|███████████████████████▍        | 89633/122310 [3:06:25<1:10:35,  7.71it/s][A
 73%|███████████████████████▍        | 89639/122310 [3:06:26<1:15:28,  7.22

KeyboardInterrupt: 

In [12]:
rm -r .ipynb_checkpoints

In [8]:
ls

 Analogy_dataset.txt   skip-gram-final.ipynb    Validation.txt
 [0m[01;34mdata[0m/                 skip-gram-pre-v1.ipynb   w2vecbow_v4.pkl
 gutenberg.txt        [01;34m'Untitled Folder'[0m/


In [9]:
cd data/data

/home/pranav/jupyter_dir/Skip Gram/data/data


In [15]:
pwd

'/home/pranav/jupyter_dir/Skip Gram'