In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
import os
import csv
from tqdm import tqdm
import argparse
from glob import glob
import faiss
from multiprocessing import Pool, cpu_count
from math import ceil
from collections import defaultdict


In [4]:

#loading question and answer embeddings of train and validation data from disk
train_embeds=pd.read_csv('re_train_embeds.csv')
validation_embeds=pd.read_csv('re_validation_embeds.csv')

In [5]:

#function to preprocess question and answer embeddings post loading from disk
def preprocess_embeds(embed):
  output=[]
  embed=embed[2:-2]
  embed=embed.replace('\n','')
  for i in embed.split(' '):
    if i!=' ' and i!='':
      output.append(i)
  return (output)

     

In [6]:
#preprocessing embeddings post loading from disk for train data
from tqdm.notebook import tqdm
tqdm.pandas()
train_embeds['question_embeds']=train_embeds.short_question_embed_numpy.progress_apply(lambda x: preprocess_embeds(x))
train_embeds['answer_embeds']=train_embeds.short_answer_embed_numpy.progress_apply(lambda x: preprocess_embeds(x))

  0%|          | 0/5003 [00:00<?, ?it/s]

  0%|          | 0/5003 [00:00<?, ?it/s]

In [7]:
#preprocessing embeddings post loading from validation data
validation_embeds['question_embeds']=validation_embeds.short_question_embed_numpy.progress_apply(lambda x: preprocess_embeds(x))
validation_embeds['answer_embeds']=validation_embeds.short_answer_embed_numpy.progress_apply(lambda x: preprocess_embeds(x))

  0%|          | 0/504 [00:00<?, ?it/s]

  0%|          | 0/504 [00:00<?, ?it/s]

In [8]:
#function to l2 normalize each embedding
def normalize(x):
  x=np.array(x,dtype=np.float64)
  return (x)/(np.linalg.norm(x))

In [9]:
#normalizing train question and answer embeddings
train_embeds['question_embeds_norm']=train_embeds['question_embeds'].apply(lambda x: normalize(x))
train_embeds['answer_embeds_norm']=train_embeds['answer_embeds'].apply(lambda x: normalize(x))

In [10]:
#normalizing validation question and answer embeddings
validation_embeds['question_embeds_norm']=validation_embeds['question_embeds'].apply(lambda x: normalize(x))
validation_embeds['answer_embeds_norm']=validation_embeds['answer_embeds'].apply(lambda x: normalize(x))
     

In [11]:
#Cleaning the gpt train data
train_gpt_data=train_embeds[['short_question','short_answer','question_embeds_norm','answer_embeds_norm']].copy()
train_gpt_data.columns=['question','answer','Q_FFNN_embeds','A_FFNN_embeds']
train_gpt_data.head(3)

Unnamed: 0,question,answer,Q_FFNN_embeds,A_FFNN_embeds
0,can an antibiotic through an iv give you a ras...,yes it can even after you have finished the pr...,"[-0.01421890905663768, 0.025619907450341046, 0...","[-0.007459985581738011, -0.02122592276723626, ..."
1,can you test positive from having the hep b va...,test positive for what if you had a hep b vacc...,"[-0.013814892581875434, 0.02668001998051444, 0...","[0.00223136463861142, -0.012820215500427647, 0..."
2,what are the dietary restrictions for celiac d...,omitting gluten from the diet is the key to co...,"[-0.014280090332285491, 0.02902447327765528, 0...","[-0.005688997413084786, -0.02194953130653521, ..."


In [12]:
#saving gpt training data to disk
train_gpt_data.to_pickle("./train_gpt_data.pkl")
     

In [13]:
#cleaning gpt validation data
validation_gpt_data=validation_embeds[['short_question','short_answer','question_embeds_norm','answer_embeds_norm']].copy()
validation_gpt_data.columns=['question','answer','Q_FFNN_embeds','A_FFNN_embeds']
validation_gpt_data.head(3)

Unnamed: 0,question,answer,Q_FFNN_embeds,A_FFNN_embeds
0,do i have a yeast infection,hi this can be a vaginal fungal infection whic...,"[-0.008608112898811646, 0.025272781187763167, ...","[-0.0087260827168495, -0.025829989834866878, 0..."
1,i need to buy health insurance asap what do i ...,go to healthcare gov call my husband 407 222 9...,"[-0.0022776732616655664, 0.02531196420973276, ...","[-0.010670890831805894, 0.012798066486932491, ..."
2,i had an acute ebv antibody test done and my r...,hi yes a chronic ebv infection also can be rul...,"[-0.007839831409961463, 0.024712667389081715, ...","[-0.0025002140233858117, -0.020223583268815057..."


In [14]:
#saving the gpt validation data to disk
validation_gpt_data.to_pickle("./validation_gpt_data.pkl")

In [15]:

#loading train and validation gpt data from disk
train_gpt_data=pd.read_pickle('./train_gpt_data.pkl')
validation_gpt_data=pd.read_pickle('./validation_gpt_data.pkl')

In [16]:
train_gpt_data.shape,validation_gpt_data.shape

((5003, 4), (504, 4))

In [17]:

#displaying the train gpt data
train_gpt_data.head(5)

Unnamed: 0,question,answer,Q_FFNN_embeds,A_FFNN_embeds
0,can an antibiotic through an iv give you a ras...,yes it can even after you have finished the pr...,"[-0.01421890905663768, 0.025619907450341046, 0...","[-0.007459985581738011, -0.02122592276723626, ..."
1,can you test positive from having the hep b va...,test positive for what if you had a hep b vacc...,"[-0.013814892581875434, 0.02668001998051444, 0...","[0.00223136463861142, -0.012820215500427647, 0..."
2,what are the dietary restrictions for celiac d...,omitting gluten from the diet is the key to co...,"[-0.014280090332285491, 0.02902447327765528, 0...","[-0.005688997413084786, -0.02194953130653521, ..."
3,i have had a pneumonia shot can i get either a...,you can always catch an illness from a child b...,"[-0.0051098851649243136, 0.026251631984036797,...","[-0.0028335161187219376, -0.017787302858114903..."
4,my baby ate her on poop my baby ate poop 4 day...,hi one of ours did that she is now 47 and a mu...,"[-0.006687587964836193, 0.023149305110760594, ...","[-0.00492109446937413, -0.02058334904862069, 0..."


In [18]:
import os
import csv
from tqdm import tqdm
import argparse
from glob import glob
import faiss
from multiprocessing import Pool, cpu_count
from math import ceil
from collections import defaultdict

#creating faiss index for semantic search 
#https://github.com/ash3n/DocProduct/blob/master/docproduct/train_embedding_to_gpt2_data.py
qa = pd.read_pickle('./train_gpt_data.pkl')
question_bert = qa["Q_FFNN_embeds"].tolist()
answer_bert = qa["A_FFNN_embeds"].tolist()
question_bert = np.array(question_bert)
answer_bert = np.array(answer_bert)

question_bert = question_bert.astype('float32')
answer_bert = answer_bert.astype('float32')
answer_index = faiss.IndexFlatIP(answer_bert.shape[-1])
answer_index.add(answer_bert)

In [21]:

#defining function to prepare the actual gpt training data by retrieving similar question and answer pairs
#https://github.com/ash3n/DocProduct/blob/master/docproduct/train_embedding_to_gpt2_data.py
def preparing_gpt_training_data(question,answer,question_embedding):
  topk=20
  scores,indices=answer_index.search(
                  np.array([question_embedding]).astype('float32'), topk)
  q_sub=qa.iloc[indices.reshape(20)]
  
  line = '`QUESTION: %s `ANSWER: %s' % (
                        question, answer)
  encoded_len=len(tokenizer.encode(line))
  for i in q_sub.iterrows():
    line='`QUESTION: %s `ANSWER: %s ' % (i[1]['question'],i[1]['answer']) + line
    line=line.replace('\n','')
    encoded_len=len(tokenizer.encode(line))
    if encoded_len>=1024:
      break
  return tokenizer.encode(line)[-1024:]
     

In [23]:
#loading pre-trained gpt2 tokenizer and gpt2 model
from transformers import GPT2Tokenizer,TFGPT2LMHeadModel
tokenizer=GPT2Tokenizer.from_pretrained("gpt2")
tf_gpt2_model=TFGPT2LMHeadModel.from_pretrained("gpt2")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [24]:
qa['gpt_data']=qa.progress_apply(lambda x: preparing_gpt_training_data(x.question,x.answer,x.Q_FFNN_embeds),axis=1)

  0%|          | 0/5003 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1031 > 1024). Running this sequence through the model will result in indexing errors


In [26]:
validation_gpt_data['gpt_data']=validation_gpt_data.progress_apply(lambda x: preparing_gpt_training_data(x.question,x.answer,x.Q_FFNN_embeds),axis=1)
     

  0%|          | 0/504 [00:00<?, ?it/s]

In [27]:
#function to get the start of positive mask to be used while creating the masks
def mask_start(gpt_data):
  return 1024-gpt_data[::-1].index(4600)+1

In [28]:

#finding the mask start for the train dataset
qa['mask_start']=qa.gpt_data.progress_apply(lambda x: mask_start(x))

  0%|          | 0/5003 [00:00<?, ?it/s]

In [29]:

#finding the mask start for the validation dataset
validation_gpt_data['mask_start']=validation_gpt_data.gpt_data.progress_apply(lambda x: mask_start(x))

  0%|          | 0/504 [00:00<?, ?it/s]

In [30]:
#saving train data to disk
qa.to_pickle('./qa_gpt_inbuild.pkl')
     

In [31]:

#finding length of the gpt data for train dataset
qa['gpt_lens']=qa.gpt_data.apply(lambda x: len(x))

In [32]:
#finding length of the gpt data for validation dataset
validation_gpt_data['gpt_lens']=validation_gpt_data.gpt_data.apply(lambda x: len(x))

In [33]:

#finding number of train datapoints that has sequence length less than 1024
qa[qa.gpt_lens<1024].shape

(54, 7)

In [34]:
#finding number of validation datapoints that has sequence length less than 1024
validation_gpt_data[validation_gpt_data.gpt_lens<1024].shape
     

(1, 7)

In [35]:

#finding train datapoints that has sequence length of 1024
gpt_data_cleaned=qa[qa.gpt_lens==1024]
gpt_data_cleaned.shape

(4949, 7)

In [36]:
#finding validation datapoints that has sequence length of 1024
validation_gpt_data_cleaned=validation_gpt_data[validation_gpt_data.gpt_lens==1024]
validation_gpt_data_cleaned.shape

(503, 7)

In [37]:

#function to create the loss mask given mask start
def return_loss_mask(mask_start):
  return [0]*mask_start+[1]*(1024-mask_start)
     

In [38]:
#creating loss mask for train data
gpt_data_cleaned['mask']=gpt_data_cleaned.mask_start.apply(lambda x: return_loss_mask(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gpt_data_cleaned['mask']=gpt_data_cleaned.mask_start.apply(lambda x: return_loss_mask(x))


In [39]:
#creating loss mask for validation data
validation_gpt_data_cleaned['mask']=validation_gpt_data_cleaned.mask_start.apply(lambda x: return_loss_mask(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  validation_gpt_data_cleaned['mask']=validation_gpt_data_cleaned.mask_start.apply(lambda x: return_loss_mask(x))


In [40]:

#saving train gpt data with loss mask to disk
gpt_data_cleaned.to_pickle('./qa_gpt_inbuild_cleaned.pkl')

In [41]:

#saving validation gpt data with loss mask to disk
validation_gpt_data_cleaned.to_pickle('./validation_gpt_inbuild_cleaned.pkl')

In [42]:
#loading gpt train and validation data from disk
gpt_data_cleaned=pd.read_pickle('./qa_gpt_inbuild_cleaned.pkl')
validation_gpt_data_cleaned=pd.read_pickle('./validation_gpt_inbuild_cleaned.pkl')
gpt_data_cleaned.shape,validation_gpt_data_cleaned.shape

((4949, 8), (503, 8))

In [44]:
#extracting gpt data for train data
gpt_data=[]
for i in gpt_data_cleaned.gpt_data.values:
  gpt_data.append(i)
gpt_data=np.array(gpt_data)
gpt_data.shape

(4949, 1024)

In [45]:
#extracting gpt data for validation data
validation_gpt_data=[]
for i in validation_gpt_data_cleaned.gpt_data.values:
  validation_gpt_data.append(i)
validation_gpt_data=np.array(validation_gpt_data)
validation_gpt_data.shape

(503, 1024)

In [46]:
#extracting loss mask for train data
loss_mask=[]
for i in gpt_data_cleaned['mask'].values:
  loss_mask.append(i)
loss_mask=np.array(loss_mask)
loss_mask.shape
     

(4949, 1024)

In [47]:
#extracting loss mask for validation data
validation_loss_mask=[]
for i in validation_gpt_data_cleaned['mask'].values:
  validation_loss_mask.append(i)
validation_loss_mask=np.array(validation_loss_mask)
validation_loss_mask.shape

(503, 1024)

In [48]:

#https://blog.tensorflow.org/2019/05/transformer-chatbot-tutorial-with-tensorflow-2.html
#preparing the gpt train dataset
import tensorflow as tf
BATCH_SIZE = 1
BUFFER_SIZE = 20000
gpt_train_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'context': gpt_data,

        'label': gpt_data[:,1:],
        'loss_mask':loss_mask[:,:-1]
    },
))


gpt_train_dataset = gpt_train_dataset.shuffle(BUFFER_SIZE)
gpt_train_dataset = gpt_train_dataset.batch(BATCH_SIZE)
gpt_train_dataset = gpt_train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
     

In [49]:
#https://blog.tensorflow.org/2019/05/transformer-chatbot-tutorial-with-tensorflow-2.html
#preparing the gpt validation dataset
import tensorflow as tf
BATCH_SIZE = 1
BUFFER_SIZE = 20000
validation_gpt_train_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'context': validation_gpt_data,

        'label': validation_gpt_data[:,1:],
        'loss_mask':validation_loss_mask[:,:-1]
    },
))

validation_gpt_train_dataset = validation_gpt_train_dataset.shuffle(BUFFER_SIZE)
validation_gpt_train_dataset = validation_gpt_train_dataset.batch(BATCH_SIZE)
validation_gpt_train_dataset = validation_gpt_train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
     

In [50]:

#displaying sample gpt training data
next(iter(gpt_train_dataset))
     

({'context': <tf.Tensor: shape=(1, 1024), dtype=int32, numpy=array([[ 423,  407, 1760, ...,  714, 1037,  517]])>,
  'label': <tf.Tensor: shape=(1, 1023), dtype=int32, numpy=array([[ 407, 1760,  616, ...,  714, 1037,  517]])>,
  'loss_mask': <tf.Tensor: shape=(1, 1023), dtype=int32, numpy=array([[0, 0, 0, ..., 1, 1, 1]])>},)

In [51]:
#training the gpt2 model
learning_rate=0.0001
optim=tf.optimizers.Adam(learning_rate)
j=0
loss_cum=0
epoch_loss=0
stop_training=False
epochs=1
for epoch in range(epochs):
  print('epoch',epoch)
  for i in tqdm(gpt_train_dataset):
    with tf.GradientTape() as tape:
      context=i[0]['context']
      loss_mask=tf.cast(i[0]['loss_mask'],tf.float32)
      output=tf_gpt2_model(context,training=True)['logits']
      loss_value = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=np.array(i[0]['label']), logits=output[ :,:-1])*loss_mask)
      j=j+1
      loss_cum+=loss_value
      epoch_loss+=loss_value/100
      if j%50==0:
        print(loss_cum/50)
        grads=tape.gradient(loss_cum/50,tf_gpt2_model.trainable_variables)
        optim.apply_gradients(zip(grads,tf_gpt2_model.trainable_variables))
        loss_cum=0
      if tf.math.is_nan(loss_value):
        print('loss became nan')
        stop_training=True
        break
      if j%10000==0:
        tf_gpt2_model.save_pretrained("tf_gpt2_model_2_10"+str(epoch)+'_'+str(j))
  print('epoch_loss by 100',epoch_loss/237)
  epoch_loss=0
  if stop_training:
    break
  tf_gpt2_model.save_pretrained("tf_gpt2_model_2_10"+str(epoch))

epoch 0


  1%|▊                                                                             | 49/4949 [04:08<6:30:11,  4.78s/it]

tf.Tensor(497.96515, shape=(), dtype=float32)


  2%|█▌                                                                            | 99/4949 [08:13<6:20:53,  4.71s/it]

tf.Tensor(384.63486, shape=(), dtype=float32)


  3%|██▎                                                                          | 149/4949 [12:19<6:11:32,  4.64s/it]

tf.Tensor(400.5328, shape=(), dtype=float32)


  4%|███                                                                          | 199/4949 [16:23<6:11:25,  4.69s/it]

tf.Tensor(370.24014, shape=(), dtype=float32)


  5%|███▊                                                                         | 249/4949 [20:35<6:13:15,  4.76s/it]

tf.Tensor(500.93015, shape=(), dtype=float32)


  6%|████▋                                                                        | 299/4949 [24:44<6:14:13,  4.83s/it]

tf.Tensor(330.01758, shape=(), dtype=float32)


  7%|█████▍                                                                       | 349/4949 [28:51<6:23:26,  5.00s/it]

tf.Tensor(412.23483, shape=(), dtype=float32)


  8%|██████▏                                                                      | 399/4949 [32:57<6:09:36,  4.87s/it]

tf.Tensor(425.42566, shape=(), dtype=float32)


  9%|██████▉                                                                      | 449/4949 [37:06<6:01:36,  4.82s/it]

tf.Tensor(349.67508, shape=(), dtype=float32)


 10%|███████▊                                                                     | 499/4949 [41:11<5:46:16,  4.67s/it]

tf.Tensor(333.39423, shape=(), dtype=float32)


 11%|████████▌                                                                    | 549/4949 [45:15<5:43:28,  4.68s/it]

tf.Tensor(389.4888, shape=(), dtype=float32)


 12%|█████████▎                                                                   | 599/4949 [49:17<5:41:27,  4.71s/it]

tf.Tensor(446.5237, shape=(), dtype=float32)


 13%|██████████                                                                   | 649/4949 [53:18<5:33:04,  4.65s/it]

tf.Tensor(355.37863, shape=(), dtype=float32)


 14%|██████████▉                                                                  | 699/4949 [57:17<5:28:28,  4.64s/it]

tf.Tensor(443.5714, shape=(), dtype=float32)


 15%|███████████▎                                                               | 749/4949 [1:01:17<5:22:57,  4.61s/it]

tf.Tensor(317.95282, shape=(), dtype=float32)


 16%|████████████                                                               | 799/4949 [1:05:18<5:22:04,  4.66s/it]

tf.Tensor(442.6173, shape=(), dtype=float32)


 17%|████████████▊                                                              | 849/4949 [1:09:18<5:17:56,  4.65s/it]

tf.Tensor(317.94595, shape=(), dtype=float32)


 18%|█████████████▌                                                             | 899/4949 [1:13:18<5:13:20,  4.64s/it]

tf.Tensor(385.86124, shape=(), dtype=float32)


 19%|██████████████▍                                                            | 949/4949 [1:17:19<5:10:59,  4.66s/it]

tf.Tensor(392.66824, shape=(), dtype=float32)


 20%|███████████████▏                                                           | 999/4949 [1:21:19<5:04:50,  4.63s/it]

tf.Tensor(338.4593, shape=(), dtype=float32)


 21%|███████████████▋                                                          | 1049/4949 [1:25:21<5:01:21,  4.64s/it]

tf.Tensor(328.72263, shape=(), dtype=float32)


 22%|████████████████▍                                                         | 1099/4949 [1:29:22<5:04:12,  4.74s/it]

tf.Tensor(391.1426, shape=(), dtype=float32)


 23%|█████████████████▏                                                        | 1149/4949 [1:33:30<5:04:26,  4.81s/it]

tf.Tensor(369.96902, shape=(), dtype=float32)


 24%|█████████████████▉                                                        | 1199/4949 [1:37:40<5:05:44,  4.89s/it]

tf.Tensor(278.24466, shape=(), dtype=float32)


 25%|██████████████████▋                                                       | 1249/4949 [1:41:48<4:53:04,  4.75s/it]

tf.Tensor(426.24377, shape=(), dtype=float32)


 26%|███████████████████▍                                                      | 1299/4949 [1:45:49<4:30:21,  4.44s/it]

tf.Tensor(357.11356, shape=(), dtype=float32)


 27%|████████████████████▏                                                     | 1349/4949 [1:49:40<4:38:52,  4.65s/it]

tf.Tensor(366.20813, shape=(), dtype=float32)


 28%|████████████████████▉                                                     | 1399/4949 [1:53:28<4:19:37,  4.39s/it]

tf.Tensor(409.06335, shape=(), dtype=float32)


 29%|█████████████████████▋                                                    | 1449/4949 [1:57:16<4:46:50,  4.92s/it]

tf.Tensor(402.1869, shape=(), dtype=float32)


 30%|██████████████████████▍                                                   | 1499/4949 [2:01:05<4:13:28,  4.41s/it]

tf.Tensor(417.61383, shape=(), dtype=float32)


 31%|███████████████████████▏                                                  | 1549/4949 [2:04:56<4:14:12,  4.49s/it]

tf.Tensor(345.27893, shape=(), dtype=float32)


 32%|███████████████████████▉                                                  | 1599/4949 [2:08:45<4:10:13,  4.48s/it]

tf.Tensor(425.90118, shape=(), dtype=float32)


 33%|████████████████████████▋                                                 | 1649/4949 [2:12:36<4:04:12,  4.44s/it]

tf.Tensor(338.26282, shape=(), dtype=float32)


 34%|█████████████████████████▍                                                | 1699/4949 [2:16:26<4:00:04,  4.43s/it]

tf.Tensor(457.87463, shape=(), dtype=float32)


 35%|██████████████████████████▏                                               | 1749/4949 [2:20:17<3:55:59,  4.42s/it]

tf.Tensor(393.09344, shape=(), dtype=float32)


 36%|██████████████████████████▉                                               | 1799/4949 [2:24:10<3:51:01,  4.40s/it]

tf.Tensor(420.3781, shape=(), dtype=float32)


 37%|███████████████████████████▋                                              | 1849/4949 [2:27:59<3:47:49,  4.41s/it]

tf.Tensor(396.62692, shape=(), dtype=float32)


 38%|████████████████████████████▍                                             | 1899/4949 [2:31:49<3:44:45,  4.42s/it]

tf.Tensor(444.51157, shape=(), dtype=float32)


 39%|█████████████████████████████▏                                            | 1949/4949 [2:35:43<3:42:20,  4.45s/it]

tf.Tensor(431.2471, shape=(), dtype=float32)


 40%|█████████████████████████████▉                                            | 1999/4949 [2:39:34<3:43:14,  4.54s/it]

tf.Tensor(450.29022, shape=(), dtype=float32)


 41%|██████████████████████████████▋                                           | 2049/4949 [2:44:10<4:31:36,  5.62s/it]

tf.Tensor(375.19165, shape=(), dtype=float32)


 42%|███████████████████████████████▍                                          | 2099/4949 [2:48:19<3:38:55,  4.61s/it]

tf.Tensor(329.60822, shape=(), dtype=float32)


 43%|████████████████████████████████▏                                         | 2149/4949 [2:52:17<3:34:31,  4.60s/it]

tf.Tensor(413.21036, shape=(), dtype=float32)


 44%|████████████████████████████████▉                                         | 2199/4949 [2:56:18<3:31:56,  4.62s/it]

tf.Tensor(420.6994, shape=(), dtype=float32)


 45%|█████████████████████████████████▋                                        | 2249/4949 [3:00:16<3:28:13,  4.63s/it]

tf.Tensor(401.29575, shape=(), dtype=float32)


 46%|██████████████████████████████████▍                                       | 2299/4949 [3:04:15<3:23:10,  4.60s/it]

tf.Tensor(390.10202, shape=(), dtype=float32)


 47%|███████████████████████████████████                                       | 2349/4949 [3:08:12<3:17:50,  4.57s/it]

tf.Tensor(358.27454, shape=(), dtype=float32)


 48%|███████████████████████████████████▊                                      | 2399/4949 [3:12:14<3:20:25,  4.72s/it]

tf.Tensor(344.6356, shape=(), dtype=float32)


 49%|████████████████████████████████████▌                                     | 2449/4949 [3:16:26<3:49:21,  5.50s/it]

tf.Tensor(411.663, shape=(), dtype=float32)


 50%|█████████████████████████████████████▎                                    | 2499/4949 [3:21:33<3:59:11,  5.86s/it]

tf.Tensor(365.70352, shape=(), dtype=float32)


 52%|██████████████████████████████████████                                    | 2549/4949 [3:26:15<3:07:37,  4.69s/it]

tf.Tensor(352.98837, shape=(), dtype=float32)


 53%|██████████████████████████████████████▊                                   | 2599/4949 [3:30:18<3:06:07,  4.75s/it]

tf.Tensor(375.19293, shape=(), dtype=float32)


 54%|███████████████████████████████████████▌                                  | 2649/4949 [3:34:18<3:05:48,  4.85s/it]

tf.Tensor(313.02286, shape=(), dtype=float32)


 55%|████████████████████████████████████████▎                                 | 2699/4949 [3:38:19<2:52:27,  4.60s/it]

tf.Tensor(283.46005, shape=(), dtype=float32)


 56%|█████████████████████████████████████████                                 | 2749/4949 [3:42:21<2:48:27,  4.59s/it]

tf.Tensor(365.62833, shape=(), dtype=float32)


 57%|█████████████████████████████████████████▊                                | 2799/4949 [3:46:19<2:43:52,  4.57s/it]

tf.Tensor(407.7548, shape=(), dtype=float32)


 58%|██████████████████████████████████████████▌                               | 2849/4949 [3:50:18<2:42:48,  4.65s/it]

tf.Tensor(335.17728, shape=(), dtype=float32)


 59%|███████████████████████████████████████████▎                              | 2899/4949 [3:54:16<2:36:49,  4.59s/it]

tf.Tensor(409.28113, shape=(), dtype=float32)


 60%|████████████████████████████████████████████                              | 2949/4949 [3:58:13<2:32:37,  4.58s/it]

tf.Tensor(390.73062, shape=(), dtype=float32)


 61%|████████████████████████████████████████████▊                             | 2999/4949 [4:02:10<2:29:50,  4.61s/it]

tf.Tensor(426.93628, shape=(), dtype=float32)


 62%|█████████████████████████████████████████████▌                            | 3049/4949 [4:06:07<2:25:44,  4.60s/it]

tf.Tensor(343.3544, shape=(), dtype=float32)


 63%|██████████████████████████████████████████████▎                           | 3099/4949 [4:10:04<2:20:52,  4.57s/it]

tf.Tensor(325.1101, shape=(), dtype=float32)


 64%|███████████████████████████████████████████████                           | 3149/4949 [4:14:03<2:18:19,  4.61s/it]

tf.Tensor(347.08508, shape=(), dtype=float32)


 65%|███████████████████████████████████████████████▊                          | 3199/4949 [4:18:00<2:13:16,  4.57s/it]

tf.Tensor(364.7386, shape=(), dtype=float32)


 66%|████████████████████████████████████████████████▌                         | 3249/4949 [4:22:01<2:16:43,  4.83s/it]

tf.Tensor(355.4608, shape=(), dtype=float32)


 67%|█████████████████████████████████████████████████▎                        | 3299/4949 [4:26:00<2:06:05,  4.58s/it]

tf.Tensor(460.91263, shape=(), dtype=float32)


 68%|██████████████████████████████████████████████████                        | 3349/4949 [4:29:59<2:00:31,  4.52s/it]

tf.Tensor(465.20428, shape=(), dtype=float32)


 69%|██████████████████████████████████████████████████▊                       | 3399/4949 [4:33:55<1:56:59,  4.53s/it]

tf.Tensor(376.90845, shape=(), dtype=float32)


 70%|███████████████████████████████████████████████████▌                      | 3449/4949 [4:37:52<1:52:54,  4.52s/it]

tf.Tensor(369.74106, shape=(), dtype=float32)


 71%|████████████████████████████████████████████████████▎                     | 3499/4949 [4:41:46<1:49:11,  4.52s/it]

tf.Tensor(319.03613, shape=(), dtype=float32)


 72%|█████████████████████████████████████████████████████                     | 3549/4949 [4:45:43<1:46:32,  4.57s/it]

tf.Tensor(435.09818, shape=(), dtype=float32)


 73%|█████████████████████████████████████████████████████▊                    | 3599/4949 [4:49:39<1:42:26,  4.55s/it]

tf.Tensor(458.3142, shape=(), dtype=float32)


 74%|██████████████████████████████████████████████████████▌                   | 3649/4949 [4:53:39<1:36:51,  4.47s/it]

tf.Tensor(405.80875, shape=(), dtype=float32)


 75%|███████████████████████████████████████████████████████▎                  | 3699/4949 [4:57:38<1:36:23,  4.63s/it]

tf.Tensor(421.84232, shape=(), dtype=float32)


 76%|████████████████████████████████████████████████████████                  | 3749/4949 [5:01:33<1:31:14,  4.56s/it]

tf.Tensor(415.00348, shape=(), dtype=float32)


 77%|████████████████████████████████████████████████████████▊                 | 3799/4949 [5:05:30<1:26:53,  4.53s/it]

tf.Tensor(443.03897, shape=(), dtype=float32)


 78%|█████████████████████████████████████████████████████████▌                | 3849/4949 [5:09:26<1:26:14,  4.70s/it]

tf.Tensor(381.2192, shape=(), dtype=float32)


 79%|██████████████████████████████████████████████████████████▎               | 3899/4949 [5:13:21<1:19:40,  4.55s/it]

tf.Tensor(298.0725, shape=(), dtype=float32)


 80%|███████████████████████████████████████████████████████████               | 3949/4949 [5:17:21<1:19:03,  4.74s/it]

tf.Tensor(330.11673, shape=(), dtype=float32)


 81%|███████████████████████████████████████████████████████████▊              | 3999/4949 [5:21:24<1:13:25,  4.64s/it]

tf.Tensor(403.31363, shape=(), dtype=float32)


 82%|████████████████████████████████████████████████████████████▌             | 4049/4949 [5:25:27<1:12:59,  4.87s/it]

tf.Tensor(333.68372, shape=(), dtype=float32)


 83%|█████████████████████████████████████████████████████████████▎            | 4099/4949 [5:29:27<1:04:41,  4.57s/it]

tf.Tensor(350.9598, shape=(), dtype=float32)


 84%|██████████████████████████████████████████████████████████████            | 4149/4949 [5:33:24<1:00:27,  4.53s/it]

tf.Tensor(456.77493, shape=(), dtype=float32)


 85%|████████████████████████████████████████████████████████████████▍           | 4199/4949 [5:37:20<57:06,  4.57s/it]

tf.Tensor(436.23425, shape=(), dtype=float32)


 86%|█████████████████████████████████████████████████████████████████▎          | 4249/4949 [5:41:16<53:46,  4.61s/it]

tf.Tensor(431.308, shape=(), dtype=float32)


 87%|██████████████████████████████████████████████████████████████████          | 4299/4949 [5:45:15<49:10,  4.54s/it]

tf.Tensor(349.33707, shape=(), dtype=float32)


 88%|██████████████████████████████████████████████████████████████████▊         | 4349/4949 [5:49:10<45:21,  4.54s/it]

tf.Tensor(499.0071, shape=(), dtype=float32)


 89%|███████████████████████████████████████████████████████████████████▌        | 4399/4949 [5:53:09<41:36,  4.54s/it]

tf.Tensor(269.35388, shape=(), dtype=float32)


 90%|████████████████████████████████████████████████████████████████████▎       | 4449/4949 [5:57:07<38:10,  4.58s/it]

tf.Tensor(433.25427, shape=(), dtype=float32)


 91%|█████████████████████████████████████████████████████████████████████       | 4499/4949 [6:01:01<33:48,  4.51s/it]

tf.Tensor(392.92902, shape=(), dtype=float32)


 92%|█████████████████████████████████████████████████████████████████████▊      | 4549/4949 [6:04:57<30:16,  4.54s/it]

tf.Tensor(352.2395, shape=(), dtype=float32)


 93%|██████████████████████████████████████████████████████████████████████▋     | 4599/4949 [6:08:56<26:49,  4.60s/it]

tf.Tensor(346.57825, shape=(), dtype=float32)


 94%|███████████████████████████████████████████████████████████████████████▍    | 4649/4949 [6:12:55<22:37,  4.53s/it]

tf.Tensor(411.07724, shape=(), dtype=float32)


 95%|████████████████████████████████████████████████████████████████████████▏   | 4699/4949 [6:16:51<18:53,  4.53s/it]

tf.Tensor(375.90237, shape=(), dtype=float32)


 96%|████████████████████████████████████████████████████████████████████████▉   | 4749/4949 [6:20:48<15:33,  4.67s/it]

tf.Tensor(420.41165, shape=(), dtype=float32)


 97%|█████████████████████████████████████████████████████████████████████████▋  | 4799/4949 [6:24:51<12:06,  4.85s/it]

tf.Tensor(367.7257, shape=(), dtype=float32)


 98%|██████████████████████████████████████████████████████████████████████████▍ | 4849/4949 [6:28:55<07:41,  4.61s/it]

tf.Tensor(355.31622, shape=(), dtype=float32)


 99%|███████████████████████████████████████████████████████████████████████████▏| 4899/4949 [6:32:53<03:53,  4.68s/it]

tf.Tensor(419.272, shape=(), dtype=float32)


100%|████████████████████████████████████████████████████████████████████████████| 4949/4949 [6:36:51<00:00,  4.81s/it]


epoch_loss by 100 tf.Tensor(80.57373, shape=(), dtype=float32)


In [52]:
"tf_gpt2_model_2_10"+str(epoch)

'tf_gpt2_model_2_100'