In [None]:
# Development Plan

In [None]:
# Load training data into working directory:
# For training use the Cornell Movie-Dialog Corpus at path = kagglehub.dataset_download("rajathmc/cornell-moviedialog-corpus") 
#                           raw_scripts_urls.txt, 
#                           movie_titles_metadata.txt, 
#                           movie_lines.txt, 
#                           movie_conversations.txt, and 
#                           movie_characters_metadata.txt

# Next, read the relevant files directly and preprocess them to train the chatbot.

# The most important files for generating dialogues are movie_lines.txt and movie_conversations.txt, as they contain the actual dialogues and 
# conversation pairings.

# The chatbot implementation consists of:

#    Preprocessing:
#        The movie_lines.txt and movie_conversations.txt files are used to create input-output pairs for training the chatbot.
#        The data is Tokenized using a transformer-compatible tokenizer.

#    Model Training:
#        Transformer-based model GPT-2 is used for fine-tuning.

#    Multi-turn Conversations:
#        The conversation context is maintained to handle multi-turn dialogues.

#    Web Interface:
#        Gradio is used to build a web interface for user interaction.

In [None]:
# Preprocessing the Data

In [None]:
#    the movie_lines.txt and movie_conversations.txt will be read to extract dialogues and conversation pairs

In [1]:
import pandas as pd

# Paths to the files
movie_lines_path = './movie_lines.txt'
movie_conversations_path = './movie_conversations.txt'

# Step 1: Load movie lines into a dictionary
id2line = {}
with open(movie_lines_path, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        parts = line.strip().split(' +++$+++ ')
        if len(parts) == 5:
            id2line[parts[0]] = parts[4]  # parts[0] is the line ID, parts[4] is the actual line text

# Step 2: Load movie conversations and create conversation pairs
conversations = []
with open(movie_conversations_path, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        parts = line.strip().split(' +++$+++ ')
        if len(parts) == 4:
            # Extract line IDs from the conversation
            line_ids = eval(parts[3])
            for i in range(len(line_ids) - 1):
                # Create pairs of (input, output) for conversation
                input_line = id2line.get(line_ids[i], "")
                target_line = id2line.get(line_ids[i + 1], "")
                if input_line and target_line:
                    conversations.append((input_line, target_line))

# Print out a few examples
for i in range(5):
    print(f"Input: {conversations[i][0]}")
    print(f"Target: {conversations[i][1]}")
    print()

Input: Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.
Target: Well, I thought we'd start with pronunciation, if that's okay with you.

Input: Well, I thought we'd start with pronunciation, if that's okay with you.
Target: Not the hacking and gagging and spitting part.  Please.

Input: Not the hacking and gagging and spitting part.  Please.
Target: Okay... then how 'bout we try out some French cuisine.  Saturday?  Night?

Input: You're asking me out.  That's so cute. What's your name again?
Target: Forget it.

Input: No, no, it's my fault -- we didn't have a proper introduction ---
Target: Cameron.



In [None]:
# Fine-Tuning a Transformer Model (GPT-2)

In [None]:
#    Now, having the conversation pairs, the pre-trained model can be fine-tuned like GPT-2

In [2]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW
import torch
from torch.utils.data import Dataset, DataLoader

In [3]:
# Initialize the tokenizer and model (GPT-2)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [4]:
# Use eos_token as the pad_token
tokenizer.pad_token = tokenizer.eos_token

In [5]:
# Initialize the model
model = GPT2LMHeadModel.from_pretrained('gpt2')

In [6]:
class MovieDataset(Dataset):
    def __init__(self, conversations, tokenizer, max_length=512):
        self.conversations = conversations
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        input_text, target_text = self.conversations[idx]
        
        # Tokenize input and target text, with eos token and padding
        encoding = self.tokenizer(f"{input_text} {self.tokenizer.eos_token}", 
                                  f"{target_text} {self.tokenizer.eos_token}", 
                                  return_tensors='pt', 
                                  max_length=self.max_length, 
                                  truncation=True, 
                                  padding='max_length')
        
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        # In language modeling, input_ids are also used as labels
        labels = input_ids.clone()  # Copy of input_ids for labels

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels  # Include labels in the returned batch
        }

In [7]:
# Prepare the dataset and dataloader
dataset = MovieDataset(conversations, tokenizer)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

In [8]:
# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)



In [9]:
# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.train()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [14]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
import torch
from torch.utils.data import Dataset, DataLoader

# Initialize the tokenizer and smaller model (use 'gpt2' for the smallest model)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')  # Use 'gpt2'
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained('gpt2')  # Use 'gpt2'

In [15]:
# Tokenize the conversation pairs
class MovieDataset(Dataset):
    def __init__(self, conversations, tokenizer, max_length=512):
        self.conversations = conversations
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        input_text, target_text = self.conversations[idx]
        
        # Tokenize input and target text, with eos token and padding
        encoding = self.tokenizer(f"{input_text} {self.tokenizer.eos_token}", 
                                  f"{target_text} {self.tokenizer.eos_token}", 
                                  return_tensors='pt', 
                                  max_length=self.max_length, 
                                  truncation=True, 
                                  padding='max_length')
        
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        # In language modeling, input_ids are also used as labels
        labels = input_ids.clone()  # Copy of input_ids for labels

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels  # Include labels in the returned batch
        }

# Prepare the dataset and dataloader with smaller batch size
dataset = MovieDataset(conversations, tokenizer)
batch_size = 8  # Reduce batch size to avoid memory issues
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Optimizer with moderate learning rate
learning_rate = 5e-5  # Conservative learning rate
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Scheduler for learning rate decay
num_training_steps = len(dataloader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# Mixed precision training
scaler = torch.cuda.amp.GradScaler()

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.train()

  scaler = torch.cuda.amp.GradScaler()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [16]:
import random
from torch.utils.data import Subset

# Assuming `conversations` is your full list of conversation pairs
total_samples = len(conversations)  # This would be 305,000
subset_size = 3000  # Target smaller size

# Randomly select 3,000 indices from the full dataset
subset_indices = random.sample(range(total_samples), subset_size)

# Use Subset to create a smaller dataset from the full dataset
small_dataset = Subset(dataset, subset_indices)

# Now create a DataLoader for the smaller dataset
small_dataloader = DataLoader(small_dataset, batch_size=8, shuffle=True)


In [17]:
from tqdm import tqdm  # For tracking progress

In [21]:
# Training loop
for epoch in range(1):  # 1 epoch for demo purposes
    with tqdm(total=len(small_dataloader), desc=f"Epoch {epoch + 1}") as pbar:
        for batch in small_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)  # Fetch the labels
            
            # Pass the labels explicitly to the model
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)            
            #outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"Epoch {epoch + 1} loss: {loss.item()}")
            pbar.update(1)  # Update progress bar after each batch
    
    print(f"Epoch {epoch + 1} loss: {loss.item()}")

Epoch 1:   0%|▏                                                                    | 1/375 [02:02<12:45:52, 122.87s/it]

Epoch 1 loss: 0.6645388007164001


Epoch 1:   1%|▍                                                                      | 2/375 [02:58<8:36:54, 83.15s/it]

Epoch 1 loss: 0.3914414048194885


Epoch 1:   1%|▌                                                                      | 3/375 [03:59<7:33:03, 73.08s/it]

Epoch 1 loss: 0.7018475532531738


Epoch 1:   1%|▊                                                                      | 4/375 [04:55<6:51:01, 66.47s/it]

Epoch 1 loss: 0.2883152663707733


Epoch 1:   1%|▉                                                                      | 5/375 [05:57<6:39:37, 64.80s/it]

Epoch 1 loss: 0.32132935523986816


Epoch 1:   2%|█▏                                                                     | 6/375 [06:54<6:21:23, 62.02s/it]

Epoch 1 loss: 0.23371371626853943


Epoch 1:   2%|█▎                                                                     | 7/375 [07:47<6:03:23, 59.25s/it]

Epoch 1 loss: 0.3568488359451294


Epoch 1:   2%|█▌                                                                     | 8/375 [08:43<5:55:18, 58.09s/it]

Epoch 1 loss: 0.2571519613265991


Epoch 1:   2%|█▋                                                                     | 9/375 [09:35<5:43:57, 56.39s/it]

Epoch 1 loss: 0.40072768926620483


Epoch 1:   3%|█▊                                                                    | 10/375 [10:30<5:39:47, 55.86s/it]

Epoch 1 loss: 0.5037696957588196


Epoch 1:   3%|██                                                                    | 11/375 [11:24<5:34:55, 55.21s/it]

Epoch 1 loss: 0.325083464384079


Epoch 1:   3%|██▏                                                                   | 12/375 [12:31<5:55:50, 58.82s/it]

Epoch 1 loss: 0.374963641166687


Epoch 1:   3%|██▍                                                                   | 13/375 [13:55<6:40:52, 66.44s/it]

Epoch 1 loss: 0.5382941961288452


Epoch 1:   4%|██▌                                                                   | 14/375 [15:17<7:07:34, 71.07s/it]

Epoch 1 loss: 0.32232779264450073


Epoch 1:   4%|██▊                                                                   | 15/375 [16:43<7:33:45, 75.63s/it]

Epoch 1 loss: 0.3362205922603607


Epoch 1:   4%|██▉                                                                   | 16/375 [17:58<7:32:32, 75.63s/it]

Epoch 1 loss: 0.30303940176963806


Epoch 1:   5%|███▏                                                                  | 17/375 [19:11<7:26:01, 74.75s/it]

Epoch 1 loss: 0.24688026309013367


Epoch 1:   5%|███▎                                                                  | 18/375 [20:37<7:44:19, 78.04s/it]

Epoch 1 loss: 0.34349676966667175


Epoch 1:   5%|███▌                                                                  | 19/375 [21:56<7:44:07, 78.22s/it]

Epoch 1 loss: 0.31325608491897583


Epoch 1:   5%|███▋                                                                  | 20/375 [23:19<7:53:00, 79.95s/it]

Epoch 1 loss: 0.39247405529022217


Epoch 1:   6%|███▉                                                                  | 21/375 [24:42<7:56:44, 80.80s/it]

Epoch 1 loss: 0.22653596103191376


Epoch 1:   6%|████                                                                  | 22/375 [26:08<8:03:27, 82.17s/it]

Epoch 1 loss: 0.3216297924518585


Epoch 1:   6%|████▎                                                                 | 23/375 [27:24<7:51:04, 80.30s/it]

Epoch 1 loss: 0.30606064200401306


Epoch 1:   6%|████▍                                                                 | 24/375 [28:44<7:49:40, 80.29s/it]

Epoch 1 loss: 0.2701611816883087


Epoch 1:   7%|████▋                                                                 | 25/375 [29:46<7:16:32, 74.83s/it]

Epoch 1 loss: 0.26907745003700256


Epoch 1:   7%|████▊                                                                 | 26/375 [30:43<6:44:53, 69.61s/it]

Epoch 1 loss: 0.3090480864048004


Epoch 1:   7%|█████                                                                 | 27/375 [31:45<6:29:56, 67.23s/it]

Epoch 1 loss: 0.29047372937202454


Epoch 1:   7%|█████▏                                                                | 28/375 [32:42<6:10:17, 64.03s/it]

Epoch 1 loss: 0.24998284876346588


Epoch 1:   8%|█████▍                                                                | 29/375 [33:36<5:51:49, 61.01s/it]

Epoch 1 loss: 0.17196743190288544


Epoch 1:   8%|█████▌                                                                | 30/375 [34:43<6:01:08, 62.81s/it]

Epoch 1 loss: 0.5686936974525452


Epoch 1:   8%|█████▊                                                                | 31/375 [35:38<5:47:48, 60.66s/it]

Epoch 1 loss: 0.2798929214477539


Epoch 1:   9%|█████▉                                                                | 32/375 [36:46<5:59:26, 62.88s/it]

Epoch 1 loss: 0.21163205802440643


Epoch 1:   9%|██████▏                                                               | 33/375 [37:59<6:14:55, 65.78s/it]

Epoch 1 loss: 0.3324732780456543


Epoch 1:   9%|██████▎                                                               | 34/375 [39:03<6:10:38, 65.22s/it]

Epoch 1 loss: 0.2830335795879364


Epoch 1:   9%|██████▌                                                               | 35/375 [40:02<5:58:40, 63.29s/it]

Epoch 1 loss: 0.2559215724468231


Epoch 1:  10%|██████▋                                                               | 36/375 [41:02<5:53:40, 62.60s/it]

Epoch 1 loss: 0.3081146478652954


Epoch 1:  10%|██████▉                                                               | 37/375 [42:14<6:08:02, 65.33s/it]

Epoch 1 loss: 0.22402356564998627


Epoch 1:  10%|███████                                                               | 38/375 [43:19<6:06:48, 65.31s/it]

Epoch 1 loss: 0.49939224123954773


Epoch 1:  10%|███████▎                                                              | 39/375 [44:20<5:58:31, 64.02s/it]

Epoch 1 loss: 0.22039596736431122


Epoch 1:  11%|███████▍                                                              | 40/375 [45:16<5:44:03, 61.62s/it]

Epoch 1 loss: 0.3093191385269165


Epoch 1:  11%|███████▋                                                              | 41/375 [46:22<5:48:52, 62.67s/it]

Epoch 1 loss: 0.2634064555168152


Epoch 1:  11%|███████▊                                                              | 42/375 [47:17<5:36:25, 60.62s/it]

Epoch 1 loss: 0.23061291873455048


Epoch 1:  11%|████████                                                              | 43/375 [48:18<5:35:07, 60.56s/it]

Epoch 1 loss: 0.2521783113479614


Epoch 1:  12%|████████▏                                                             | 44/375 [49:40<6:09:08, 66.91s/it]

Epoch 1 loss: 0.3851137161254883


Epoch 1:  12%|████████▍                                                             | 45/375 [50:45<6:05:16, 66.41s/it]

Epoch 1 loss: 0.2979971766471863


Epoch 1:  12%|████████▌                                                             | 46/375 [51:58<6:14:43, 68.34s/it]

Epoch 1 loss: 0.41093435883522034


Epoch 1:  13%|████████▊                                                             | 47/375 [53:01<6:04:38, 66.70s/it]

Epoch 1 loss: 0.3301078677177429


Epoch 1:  13%|████████▉                                                             | 48/375 [53:57<5:46:26, 63.57s/it]

Epoch 1 loss: 0.21698592603206635


Epoch 1:  13%|█████████▏                                                            | 49/375 [54:45<5:20:59, 59.08s/it]

Epoch 1 loss: 0.3421480059623718


Epoch 1:  13%|█████████▎                                                            | 50/375 [55:46<5:22:58, 59.63s/it]

Epoch 1 loss: 0.26159074902534485


Epoch 1:  14%|█████████▌                                                            | 51/375 [56:46<5:21:26, 59.53s/it]

Epoch 1 loss: 0.27217140793800354


Epoch 1:  14%|█████████▋                                                            | 52/375 [57:43<5:17:00, 58.89s/it]

Epoch 1 loss: 0.18755148351192474


Epoch 1:  14%|█████████▉                                                            | 53/375 [58:44<5:19:19, 59.50s/it]

Epoch 1 loss: 0.18374310433864594


Epoch 1:  14%|██████████                                                            | 54/375 [59:44<5:19:23, 59.70s/it]

Epoch 1 loss: 0.5179066061973572


Epoch 1:  15%|█████████▉                                                          | 55/375 [1:00:39<5:10:58, 58.31s/it]

Epoch 1 loss: 0.24656179547309875


Epoch 1:  15%|██████████▏                                                         | 56/375 [1:01:44<5:20:54, 60.36s/it]

Epoch 1 loss: 0.2968803346157074


Epoch 1:  15%|██████████▎                                                         | 57/375 [1:02:48<5:24:35, 61.24s/it]

Epoch 1 loss: 0.20003119111061096


Epoch 1:  15%|██████████▌                                                         | 58/375 [1:03:48<5:22:46, 61.09s/it]

Epoch 1 loss: 0.23910316824913025


Epoch 1:  16%|██████████▋                                                         | 59/375 [1:04:50<5:22:46, 61.29s/it]

Epoch 1 loss: 0.26422253251075745


Epoch 1:  16%|██████████▉                                                         | 60/375 [1:05:47<5:14:22, 59.88s/it]

Epoch 1 loss: 0.260751336812973


Epoch 1:  16%|███████████                                                         | 61/375 [1:06:44<5:09:34, 59.15s/it]

Epoch 1 loss: 0.3298777639865875


Epoch 1:  17%|███████████▏                                                        | 62/375 [1:07:36<4:57:38, 57.06s/it]

Epoch 1 loss: 0.21300727128982544


Epoch 1:  17%|███████████▍                                                        | 63/375 [1:08:37<5:02:27, 58.17s/it]

Epoch 1 loss: 0.29338422417640686


Epoch 1:  17%|███████████▌                                                        | 64/375 [1:09:33<4:58:20, 57.56s/it]

Epoch 1 loss: 0.23064246773719788


Epoch 1:  17%|███████████▊                                                        | 65/375 [1:10:26<4:50:21, 56.20s/it]

Epoch 1 loss: 0.2146923989057541


Epoch 1:  18%|███████████▉                                                        | 66/375 [1:11:21<4:47:22, 55.80s/it]

Epoch 1 loss: 0.1824813038110733


Epoch 1:  18%|████████████▏                                                       | 67/375 [1:12:24<4:56:52, 57.83s/it]

Epoch 1 loss: 0.19270548224449158


Epoch 1:  18%|████████████▎                                                       | 68/375 [1:13:19<4:51:27, 56.96s/it]

Epoch 1 loss: 0.22900380194187164


Epoch 1:  18%|████████████▌                                                       | 69/375 [1:14:17<4:52:27, 57.34s/it]

Epoch 1 loss: 0.2137528657913208


Epoch 1:  19%|████████████▋                                                       | 70/375 [1:15:14<4:51:20, 57.31s/it]

Epoch 1 loss: 0.19491805136203766


Epoch 1:  19%|████████████▊                                                       | 71/375 [1:16:15<4:55:38, 58.35s/it]

Epoch 1 loss: 0.19295592606067657


Epoch 1:  19%|█████████████                                                       | 72/375 [1:17:14<4:56:26, 58.70s/it]

Epoch 1 loss: 0.2657311260700226


Epoch 1:  19%|█████████████▏                                                      | 73/375 [1:18:09<4:49:53, 57.59s/it]

Epoch 1 loss: 0.2655930817127228


Epoch 1:  20%|█████████████▍                                                      | 74/375 [1:19:05<4:45:22, 56.89s/it]

Epoch 1 loss: 0.2047782838344574


Epoch 1:  20%|█████████████▌                                                      | 75/375 [1:19:59<4:40:32, 56.11s/it]

Epoch 1 loss: 0.24514837563037872


Epoch 1:  20%|█████████████▊                                                      | 76/375 [1:20:59<4:45:22, 57.27s/it]

Epoch 1 loss: 0.31797030568122864


Epoch 1:  21%|█████████████▉                                                      | 77/375 [1:21:53<4:39:24, 56.26s/it]

Epoch 1 loss: 0.28865963220596313


Epoch 1:  21%|██████████████▏                                                     | 78/375 [1:22:51<4:41:40, 56.91s/it]

Epoch 1 loss: 0.24250365793704987


Epoch 1:  21%|██████████████▎                                                     | 79/375 [1:23:53<4:48:17, 58.44s/it]

Epoch 1 loss: 0.23748484253883362


Epoch 1:  21%|██████████████▌                                                     | 80/375 [1:25:12<5:17:04, 64.49s/it]

Epoch 1 loss: 0.3642924427986145


Epoch 1:  22%|██████████████▋                                                     | 81/375 [1:26:27<5:31:44, 67.70s/it]

Epoch 1 loss: 0.38804104924201965


Epoch 1:  22%|██████████████▊                                                     | 82/375 [1:27:48<5:49:38, 71.60s/it]

Epoch 1 loss: 0.36238786578178406


Epoch 1:  22%|███████████████                                                     | 83/375 [1:29:06<5:58:04, 73.58s/it]

Epoch 1 loss: 0.18654797971248627


Epoch 1:  22%|███████████████▏                                                    | 84/375 [1:30:24<6:03:23, 74.93s/it]

Epoch 1 loss: 0.24985964596271515


Epoch 1:  23%|███████████████▍                                                    | 85/375 [1:31:49<6:16:49, 77.96s/it]

Epoch 1 loss: 0.3097413182258606


Epoch 1:  23%|███████████████▌                                                    | 86/375 [1:33:03<6:09:56, 76.81s/it]

Epoch 1 loss: 0.2311326265335083


Epoch 1:  23%|███████████████▊                                                    | 87/375 [1:34:19<6:06:45, 76.41s/it]

Epoch 1 loss: 0.16783608496189117


Epoch 1:  23%|███████████████▉                                                    | 88/375 [1:35:45<6:20:02, 79.45s/it]

Epoch 1 loss: 0.21115432679653168


Epoch 1:  24%|████████████████▏                                                   | 89/375 [1:37:08<6:22:46, 80.30s/it]

Epoch 1 loss: 0.41305676102638245


Epoch 1:  24%|████████████████▎                                                   | 90/375 [1:38:26<6:18:19, 79.65s/it]

Epoch 1 loss: 0.2228759378194809


Epoch 1:  24%|████████████████▌                                                   | 91/375 [1:39:46<6:17:28, 79.75s/it]

Epoch 1 loss: 0.20384112000465393


Epoch 1:  25%|████████████████▋                                                   | 92/375 [1:41:01<6:10:36, 78.58s/it]

Epoch 1 loss: 0.36928510665893555


Epoch 1:  25%|████████████████▊                                                   | 93/375 [1:42:11<5:57:03, 75.97s/it]

Epoch 1 loss: 0.19642573595046997


Epoch 1:  25%|█████████████████                                                   | 94/375 [1:43:43<6:17:33, 80.62s/it]

Epoch 1 loss: 0.30683472752571106


Epoch 1:  25%|█████████████████▏                                                  | 95/375 [1:44:58<6:09:08, 79.10s/it]

Epoch 1 loss: 0.12263821810483932


Epoch 1:  26%|█████████████████▍                                                  | 96/375 [1:46:14<6:03:10, 78.10s/it]

Epoch 1 loss: 0.26665472984313965


Epoch 1:  26%|█████████████████▌                                                  | 97/375 [1:47:29<5:58:03, 77.28s/it]

Epoch 1 loss: 0.2971019744873047


Epoch 1:  26%|█████████████████▊                                                  | 98/375 [1:48:52<6:03:26, 78.72s/it]

Epoch 1 loss: 0.21800509095191956


Epoch 1:  26%|█████████████████▉                                                  | 99/375 [1:50:09<6:00:02, 78.27s/it]

Epoch 1 loss: 0.328420490026474


Epoch 1:  27%|█████████████████▊                                                 | 100/375 [1:51:24<5:54:46, 77.41s/it]

Epoch 1 loss: 0.21452873945236206


Epoch 1:  27%|██████████████████                                                 | 101/375 [1:52:41<5:52:28, 77.18s/it]

Epoch 1 loss: 0.20301741361618042


Epoch 1:  27%|██████████████████▏                                                | 102/375 [1:54:03<5:58:00, 78.68s/it]

Epoch 1 loss: 0.18314819037914276


Epoch 1:  27%|██████████████████▍                                                | 103/375 [1:55:24<5:59:37, 79.33s/it]

Epoch 1 loss: 0.3385367691516876


Epoch 1:  28%|██████████████████▌                                                | 104/375 [1:56:41<5:55:57, 78.81s/it]

Epoch 1 loss: 0.20084473490715027


Epoch 1:  28%|██████████████████▊                                                | 105/375 [1:57:58<5:51:18, 78.07s/it]

Epoch 1 loss: 0.32314416766166687


Epoch 1:  28%|██████████████████▉                                                | 106/375 [1:59:14<5:48:08, 77.65s/it]

Epoch 1 loss: 0.1837306171655655


Epoch 1:  29%|███████████████████                                                | 107/375 [2:00:35<5:50:06, 78.38s/it]

Epoch 1 loss: 0.22168341279029846


Epoch 1:  29%|███████████████████▎                                               | 108/375 [2:01:58<5:55:44, 79.94s/it]

Epoch 1 loss: 0.27701061964035034


Epoch 1:  29%|███████████████████▍                                               | 109/375 [2:03:10<5:43:51, 77.56s/it]

Epoch 1 loss: 0.13191765546798706


Epoch 1:  29%|███████████████████▋                                               | 110/375 [2:04:30<5:45:28, 78.22s/it]

Epoch 1 loss: 0.2909896671772003


Epoch 1:  30%|███████████████████▊                                               | 111/375 [2:05:51<5:48:34, 79.22s/it]

Epoch 1 loss: 0.19198958575725555


Epoch 1:  30%|████████████████████                                               | 112/375 [2:07:10<5:46:18, 79.01s/it]

Epoch 1 loss: 0.28786149621009827


Epoch 1:  30%|████████████████████▏                                              | 113/375 [2:08:25<5:39:39, 77.78s/it]

Epoch 1 loss: 0.13556669652462006


Epoch 1:  30%|████████████████████▎                                              | 114/375 [2:09:49<5:46:30, 79.66s/it]

Epoch 1 loss: 0.2235555499792099


Epoch 1:  31%|████████████████████▌                                              | 115/375 [2:11:06<5:42:09, 78.96s/it]

Epoch 1 loss: 0.33599892258644104


Epoch 1:  31%|████████████████████▋                                              | 116/375 [2:12:19<5:32:41, 77.07s/it]

Epoch 1 loss: 0.2934664487838745


Epoch 1:  31%|████████████████████▉                                              | 117/375 [2:13:34<5:28:40, 76.44s/it]

Epoch 1 loss: 0.1595221906900406


Epoch 1:  31%|█████████████████████                                              | 118/375 [2:14:48<5:25:00, 75.88s/it]

Epoch 1 loss: 0.2069530189037323


Epoch 1:  32%|█████████████████████▎                                             | 119/375 [2:16:05<5:24:48, 76.13s/it]

Epoch 1 loss: 0.2037612795829773


Epoch 1:  32%|█████████████████████▍                                             | 120/375 [2:17:27<5:30:52, 77.85s/it]

Epoch 1 loss: 0.1518806666135788


Epoch 1:  32%|█████████████████████▌                                             | 121/375 [2:18:51<5:36:48, 79.56s/it]

Epoch 1 loss: 0.3031806945800781


Epoch 1:  33%|█████████████████████▊                                             | 122/375 [2:20:10<5:35:48, 79.64s/it]

Epoch 1 loss: 0.28320321440696716


Epoch 1:  33%|█████████████████████▉                                             | 123/375 [2:21:23<5:24:57, 77.37s/it]

Epoch 1 loss: 0.22535482048988342


Epoch 1:  33%|██████████████████████▏                                            | 124/375 [2:22:44<5:28:31, 78.53s/it]

Epoch 1 loss: 0.16626307368278503


Epoch 1:  33%|██████████████████████▎                                            | 125/375 [2:24:04<5:29:40, 79.12s/it]

Epoch 1 loss: 0.37813976407051086


Epoch 1:  34%|██████████████████████▌                                            | 126/375 [2:25:17<5:20:26, 77.22s/it]

Epoch 1 loss: 0.24542035162448883


Epoch 1:  34%|██████████████████████▋                                            | 127/375 [2:26:40<5:25:42, 78.80s/it]

Epoch 1 loss: 0.318213552236557


Epoch 1:  34%|██████████████████████▊                                            | 128/375 [2:27:51<5:15:44, 76.70s/it]

Epoch 1 loss: 0.2594526708126068


Epoch 1:  34%|███████████████████████                                            | 129/375 [2:29:03<5:07:58, 75.12s/it]

Epoch 1 loss: 0.3822297155857086


Epoch 1:  35%|███████████████████████▏                                           | 130/375 [2:30:14<5:01:58, 73.95s/it]

Epoch 1 loss: 0.22060087323188782


Epoch 1:  35%|███████████████████████▍                                           | 131/375 [2:31:30<5:02:44, 74.45s/it]

Epoch 1 loss: 0.27231255173683167


Epoch 1:  35%|███████████████████████▌                                           | 132/375 [2:32:48<5:06:09, 75.59s/it]

Epoch 1 loss: 0.21318866312503815


Epoch 1:  35%|███████████████████████▊                                           | 133/375 [2:33:58<4:58:50, 74.09s/it]

Epoch 1 loss: 0.2838239371776581


Epoch 1:  36%|███████████████████████▉                                           | 134/375 [2:35:26<5:13:42, 78.10s/it]

Epoch 1 loss: 0.2217988818883896


Epoch 1:  36%|████████████████████████                                           | 135/375 [2:36:43<5:11:06, 77.78s/it]

Epoch 1 loss: 0.19336359202861786


Epoch 1:  36%|████████████████████████▎                                          | 136/375 [2:38:07<5:17:39, 79.75s/it]

Epoch 1 loss: 0.1695445030927658


Epoch 1:  37%|████████████████████████▍                                          | 137/375 [2:39:20<5:07:55, 77.63s/it]

Epoch 1 loss: 0.3156699240207672


Epoch 1:  37%|████████████████████████▋                                          | 138/375 [2:40:37<5:05:23, 77.31s/it]

Epoch 1 loss: 0.189987450838089


Epoch 1:  37%|████████████████████████▊                                          | 139/375 [2:41:52<5:02:29, 76.91s/it]

Epoch 1 loss: 0.26959478855133057


Epoch 1:  37%|█████████████████████████                                          | 140/375 [2:43:08<4:59:38, 76.50s/it]

Epoch 1 loss: 0.20270468294620514


Epoch 1:  38%|█████████████████████████▏                                         | 141/375 [2:44:24<4:57:50, 76.37s/it]

Epoch 1 loss: 0.19106531143188477


Epoch 1:  38%|█████████████████████████▎                                         | 142/375 [2:45:38<4:53:39, 75.62s/it]

Epoch 1 loss: 0.21459269523620605


Epoch 1:  38%|█████████████████████████▌                                         | 143/375 [2:47:06<5:06:18, 79.22s/it]

Epoch 1 loss: 0.28270313143730164


Epoch 1:  38%|█████████████████████████▋                                         | 144/375 [2:48:23<5:02:56, 78.69s/it]

Epoch 1 loss: 0.1463516354560852


Epoch 1:  39%|█████████████████████████▉                                         | 145/375 [2:49:42<5:01:36, 78.68s/it]

Epoch 1 loss: 0.21238285303115845


Epoch 1:  39%|██████████████████████████                                         | 146/375 [2:51:02<5:01:44, 79.06s/it]

Epoch 1 loss: 0.16872774064540863


Epoch 1:  39%|██████████████████████████▎                                        | 147/375 [2:52:18<4:57:09, 78.20s/it]

Epoch 1 loss: 0.18545885384082794


Epoch 1:  39%|██████████████████████████▍                                        | 148/375 [2:53:34<4:53:10, 77.49s/it]

Epoch 1 loss: 0.19096241891384125


Epoch 1:  40%|██████████████████████████▌                                        | 149/375 [2:54:49<4:49:58, 76.98s/it]

Epoch 1 loss: 0.3121223747730255


Epoch 1:  40%|██████████████████████████▊                                        | 150/375 [2:56:07<4:49:36, 77.23s/it]

Epoch 1 loss: 0.178695946931839


Epoch 1:  40%|██████████████████████████▉                                        | 151/375 [2:57:25<4:48:36, 77.31s/it]

Epoch 1 loss: 0.19700080156326294


Epoch 1:  41%|███████████████████████████▏                                       | 152/375 [2:58:40<4:45:02, 76.69s/it]

Epoch 1 loss: 0.24437406659126282


Epoch 1:  41%|███████████████████████████▎                                       | 153/375 [2:59:53<4:39:58, 75.67s/it]

Epoch 1 loss: 0.21726180613040924


Epoch 1:  41%|███████████████████████████▌                                       | 154/375 [3:01:16<4:46:22, 77.75s/it]

Epoch 1 loss: 0.2465936541557312


Epoch 1:  41%|███████████████████████████▋                                       | 155/375 [3:02:31<4:41:48, 76.86s/it]

Epoch 1 loss: 0.22982709109783173


Epoch 1:  42%|███████████████████████████▊                                       | 156/375 [3:03:49<4:42:10, 77.31s/it]

Epoch 1 loss: 0.2143491804599762


Epoch 1:  42%|████████████████████████████                                       | 157/375 [3:05:15<4:50:46, 80.03s/it]

Epoch 1 loss: 0.14856082201004028


Epoch 1:  42%|████████████████████████████▏                                      | 158/375 [3:06:30<4:43:39, 78.43s/it]

Epoch 1 loss: 0.228602796792984


Epoch 1:  42%|████████████████████████████▍                                      | 159/375 [3:07:46<4:39:05, 77.53s/it]

Epoch 1 loss: 0.23000843822956085


Epoch 1:  43%|████████████████████████████▌                                      | 160/375 [3:09:03<4:37:13, 77.37s/it]

Epoch 1 loss: 0.2999800145626068


Epoch 1:  43%|████████████████████████████▊                                      | 161/375 [3:10:17<4:32:51, 76.50s/it]

Epoch 1 loss: 0.32080113887786865


Epoch 1:  43%|████████████████████████████▉                                      | 162/375 [3:11:40<4:38:22, 78.41s/it]

Epoch 1 loss: 0.19365230202674866


Epoch 1:  43%|█████████████████████████████                                      | 163/375 [3:12:56<4:34:24, 77.66s/it]

Epoch 1 loss: 0.20850680768489838


Epoch 1:  44%|█████████████████████████████▎                                     | 164/375 [3:14:12<4:31:30, 77.20s/it]

Epoch 1 loss: 0.22246555984020233


Epoch 1:  44%|█████████████████████████████▍                                     | 165/375 [3:15:27<4:27:46, 76.51s/it]

Epoch 1 loss: 0.17202463746070862


Epoch 1:  44%|█████████████████████████████▋                                     | 166/375 [3:16:50<4:33:45, 78.59s/it]

Epoch 1 loss: 0.24324887990951538


Epoch 1:  45%|█████████████████████████████▊                                     | 167/375 [3:18:02<4:25:22, 76.55s/it]

Epoch 1 loss: 0.19829635322093964


Epoch 1:  45%|██████████████████████████████                                     | 168/375 [3:19:27<4:32:50, 79.08s/it]

Epoch 1 loss: 0.20410653948783875


Epoch 1:  45%|██████████████████████████████▏                                    | 169/375 [3:20:51<4:36:22, 80.50s/it]

Epoch 1 loss: 0.3620792031288147


Epoch 1:  45%|██████████████████████████████▎                                    | 170/375 [3:22:08<4:31:27, 79.45s/it]

Epoch 1 loss: 0.17306138575077057


Epoch 1:  46%|██████████████████████████████▌                                    | 171/375 [3:23:22<4:24:32, 77.81s/it]

Epoch 1 loss: 0.16809837520122528


Epoch 1:  46%|██████████████████████████████▋                                    | 172/375 [3:24:39<4:22:43, 77.65s/it]

Epoch 1 loss: 0.22065669298171997


Epoch 1:  46%|██████████████████████████████▉                                    | 173/375 [3:25:54<4:18:09, 76.68s/it]

Epoch 1 loss: 0.15355798602104187


Epoch 1:  46%|███████████████████████████████                                    | 174/375 [3:27:11<4:17:19, 76.81s/it]

Epoch 1 loss: 0.32833510637283325


Epoch 1:  47%|███████████████████████████████▎                                   | 175/375 [3:28:28<4:16:47, 77.04s/it]

Epoch 1 loss: 0.11953073740005493


Epoch 1:  47%|███████████████████████████████▍                                   | 176/375 [3:29:44<4:14:29, 76.73s/it]

Epoch 1 loss: 0.22965607047080994


Epoch 1:  47%|███████████████████████████████▌                                   | 177/375 [3:30:57<4:08:59, 75.45s/it]

Epoch 1 loss: 0.1709214150905609


Epoch 1:  47%|███████████████████████████████▊                                   | 178/375 [3:32:13<4:08:07, 75.57s/it]

Epoch 1 loss: 0.24887779355049133


Epoch 1:  48%|███████████████████████████████▉                                   | 179/375 [3:33:42<4:20:08, 79.64s/it]

Epoch 1 loss: 0.15640093386173248


Epoch 1:  48%|████████████████████████████████▏                                  | 180/375 [3:34:58<4:15:14, 78.54s/it]

Epoch 1 loss: 0.1701962649822235


Epoch 1:  48%|████████████████████████████████▎                                  | 181/375 [3:36:12<4:09:32, 77.18s/it]

Epoch 1 loss: 0.2860691249370575


Epoch 1:  49%|████████████████████████████████▌                                  | 182/375 [3:37:28<4:07:09, 76.84s/it]

Epoch 1 loss: 0.16013367474079132


Epoch 1:  49%|████████████████████████████████▋                                  | 183/375 [3:38:48<4:08:57, 77.80s/it]

Epoch 1 loss: 0.2701224088668823


Epoch 1:  49%|████████████████████████████████▊                                  | 184/375 [3:40:03<4:04:53, 76.93s/it]

Epoch 1 loss: 0.1787450611591339


Epoch 1:  49%|█████████████████████████████████                                  | 185/375 [3:41:17<4:01:02, 76.12s/it]

Epoch 1 loss: 0.2139604240655899


Epoch 1:  50%|█████████████████████████████████▏                                 | 186/375 [3:42:31<3:58:21, 75.67s/it]

Epoch 1 loss: 0.16261111199855804


Epoch 1:  50%|█████████████████████████████████▍                                 | 187/375 [3:43:47<3:56:37, 75.52s/it]

Epoch 1 loss: 0.27709874510765076


Epoch 1:  50%|█████████████████████████████████▌                                 | 188/375 [3:45:04<3:57:33, 76.22s/it]

Epoch 1 loss: 0.16675925254821777


Epoch 1:  50%|█████████████████████████████████▊                                 | 189/375 [3:46:21<3:56:39, 76.34s/it]

Epoch 1 loss: 0.26358744502067566


Epoch 1:  51%|█████████████████████████████████▉                                 | 190/375 [3:47:38<3:55:48, 76.48s/it]

Epoch 1 loss: 0.3540576100349426


Epoch 1:  51%|██████████████████████████████████▏                                | 191/375 [3:48:55<3:55:05, 76.66s/it]

Epoch 1 loss: 0.23434878885746002


Epoch 1:  51%|██████████████████████████████████▎                                | 192/375 [3:50:11<3:52:49, 76.33s/it]

Epoch 1 loss: 0.1611497402191162


Epoch 1:  51%|██████████████████████████████████▍                                | 193/375 [3:51:28<3:52:48, 76.75s/it]

Epoch 1 loss: 0.23599907755851746


Epoch 1:  52%|██████████████████████████████████▋                                | 194/375 [3:52:51<3:57:06, 78.60s/it]

Epoch 1 loss: 0.21365082263946533


Epoch 1:  52%|██████████████████████████████████▊                                | 195/375 [3:54:07<3:53:03, 77.69s/it]

Epoch 1 loss: 0.23641036450862885


Epoch 1:  52%|███████████████████████████████████                                | 196/375 [3:55:21<3:48:50, 76.70s/it]

Epoch 1 loss: 0.26607051491737366


Epoch 1:  53%|███████████████████████████████████▏                               | 197/375 [3:56:42<3:51:30, 78.03s/it]

Epoch 1 loss: 0.2753349244594574


Epoch 1:  53%|███████████████████████████████████▍                               | 198/375 [3:58:02<3:51:19, 78.41s/it]

Epoch 1 loss: 0.17997662723064423


Epoch 1:  53%|███████████████████████████████████▌                               | 199/375 [3:59:17<3:47:04, 77.41s/it]

Epoch 1 loss: 0.32290324568748474


Epoch 1:  53%|███████████████████████████████████▋                               | 200/375 [4:00:35<3:46:34, 77.68s/it]

Epoch 1 loss: 0.1870407909154892


Epoch 1:  54%|███████████████████████████████████▉                               | 201/375 [4:01:54<3:46:27, 78.09s/it]

Epoch 1 loss: 0.24651914834976196


Epoch 1:  54%|████████████████████████████████████                               | 202/375 [4:03:09<3:42:37, 77.21s/it]

Epoch 1 loss: 0.23845511674880981


Epoch 1:  54%|████████████████████████████████████▎                              | 203/375 [4:04:25<3:40:08, 76.79s/it]

Epoch 1 loss: 0.25542178750038147


Epoch 1:  54%|████████████████████████████████████▍                              | 204/375 [4:05:47<3:43:42, 78.49s/it]

Epoch 1 loss: 0.19213025271892548


Epoch 1:  55%|████████████████████████████████████▋                              | 205/375 [4:07:07<3:43:18, 78.81s/it]

Epoch 1 loss: 0.23424170911312103


Epoch 1:  55%|████████████████████████████████████▊                              | 206/375 [4:08:26<3:42:09, 78.88s/it]

Epoch 1 loss: 0.17160168290138245


Epoch 1:  55%|████████████████████████████████████▉                              | 207/375 [4:09:41<3:37:55, 77.83s/it]

Epoch 1 loss: 0.18360038101673126


Epoch 1:  55%|█████████████████████████████████████▏                             | 208/375 [4:11:04<3:40:52, 79.36s/it]

Epoch 1 loss: 0.26482129096984863


Epoch 1:  56%|█████████████████████████████████████▎                             | 209/375 [4:12:24<3:40:06, 79.56s/it]

Epoch 1 loss: 0.2909240126609802


Epoch 1:  56%|█████████████████████████████████████▌                             | 210/375 [4:13:40<3:35:22, 78.32s/it]

Epoch 1 loss: 0.16147010028362274


Epoch 1:  56%|█████████████████████████████████████▋                             | 211/375 [4:14:52<3:29:05, 76.50s/it]

Epoch 1 loss: 0.25583699345588684


Epoch 1:  57%|█████████████████████████████████████▉                             | 212/375 [4:16:09<3:28:09, 76.62s/it]

Epoch 1 loss: 0.23894594609737396


Epoch 1:  57%|██████████████████████████████████████                             | 213/375 [4:17:25<3:26:18, 76.41s/it]

Epoch 1 loss: 0.15628963708877563


Epoch 1:  57%|██████████████████████████████████████▏                            | 214/375 [4:18:40<3:23:35, 75.87s/it]

Epoch 1 loss: 0.1673642098903656


Epoch 1:  57%|██████████████████████████████████████▍                            | 215/375 [4:19:57<3:23:31, 76.32s/it]

Epoch 1 loss: 0.25806012749671936


Epoch 1:  58%|██████████████████████████████████████▌                            | 216/375 [4:21:14<3:23:06, 76.65s/it]

Epoch 1 loss: 0.18344330787658691


Epoch 1:  58%|██████████████████████████████████████▊                            | 217/375 [4:22:34<3:23:54, 77.44s/it]

Epoch 1 loss: 0.2356036752462387


Epoch 1:  58%|██████████████████████████████████████▉                            | 218/375 [4:24:00<3:29:21, 80.01s/it]

Epoch 1 loss: 0.2796153128147125


Epoch 1:  58%|███████████████████████████████████████▏                           | 219/375 [4:25:15<3:24:07, 78.51s/it]

Epoch 1 loss: 0.3867945373058319


Epoch 1:  59%|███████████████████████████████████████▎                           | 220/375 [4:26:30<3:20:23, 77.57s/it]

Epoch 1 loss: 0.21644283831119537


Epoch 1:  59%|███████████████████████████████████████▍                           | 221/375 [4:27:51<3:21:55, 78.67s/it]

Epoch 1 loss: 0.21486090123653412


Epoch 1:  59%|███████████████████████████████████████▋                           | 222/375 [4:29:04<3:16:11, 76.94s/it]

Epoch 1 loss: 0.22726283967494965


Epoch 1:  59%|███████████████████████████████████████▊                           | 223/375 [4:30:18<3:12:50, 76.12s/it]

Epoch 1 loss: 0.21513819694519043


Epoch 1:  60%|████████████████████████████████████████                           | 224/375 [4:31:44<3:18:46, 78.98s/it]

Epoch 1 loss: 0.179329514503479


Epoch 1:  60%|████████████████████████████████████████▏                          | 225/375 [4:32:58<3:13:31, 77.41s/it]

Epoch 1 loss: 0.20281614363193512


Epoch 1:  60%|████████████████████████████████████████▍                          | 226/375 [4:34:18<3:14:33, 78.35s/it]

Epoch 1 loss: 0.17777398228645325


Epoch 1:  61%|████████████████████████████████████████▌                          | 227/375 [4:35:42<3:17:18, 79.99s/it]

Epoch 1 loss: 0.2395419031381607


Epoch 1:  61%|████████████████████████████████████████▋                          | 228/375 [4:37:01<3:15:06, 79.64s/it]

Epoch 1 loss: 0.23707802593708038


Epoch 1:  61%|████████████████████████████████████████▉                          | 229/375 [4:38:15<3:09:52, 78.03s/it]

Epoch 1 loss: 0.1648823618888855


Epoch 1:  61%|█████████████████████████████████████████                          | 230/375 [4:39:35<3:09:55, 78.59s/it]

Epoch 1 loss: 0.1567401885986328


Epoch 1:  62%|█████████████████████████████████████████▎                         | 231/375 [4:40:55<3:09:29, 78.95s/it]

Epoch 1 loss: 0.21751447021961212


Epoch 1:  62%|█████████████████████████████████████████▍                         | 232/375 [4:42:10<3:05:45, 77.94s/it]

Epoch 1 loss: 0.1683049499988556


Epoch 1:  62%|█████████████████████████████████████████▋                         | 233/375 [4:43:34<3:08:35, 79.69s/it]

Epoch 1 loss: 0.22622762620449066


Epoch 1:  62%|█████████████████████████████████████████▊                         | 234/375 [4:44:52<3:05:45, 79.04s/it]

Epoch 1 loss: 0.20144054293632507


Epoch 1:  63%|█████████████████████████████████████████▉                         | 235/375 [4:46:05<3:00:20, 77.29s/it]

Epoch 1 loss: 0.24651065468788147


Epoch 1:  63%|██████████████████████████████████████████▏                        | 236/375 [4:47:22<2:59:09, 77.34s/it]

Epoch 1 loss: 0.27727794647216797


Epoch 1:  63%|██████████████████████████████████████████▎                        | 237/375 [4:48:36<2:55:17, 76.22s/it]

Epoch 1 loss: 0.15020576119422913


Epoch 1:  63%|██████████████████████████████████████████▌                        | 238/375 [4:50:04<3:02:05, 79.75s/it]

Epoch 1 loss: 0.18988841772079468


Epoch 1:  64%|██████████████████████████████████████████▋                        | 239/375 [4:51:20<2:58:02, 78.55s/it]

Epoch 1 loss: 0.14309459924697876


Epoch 1:  64%|██████████████████████████████████████████▉                        | 240/375 [4:52:36<2:55:18, 77.91s/it]

Epoch 1 loss: 0.4100983142852783


Epoch 1:  64%|███████████████████████████████████████████                        | 241/375 [4:53:56<2:55:09, 78.43s/it]

Epoch 1 loss: 0.16975316405296326


Epoch 1:  65%|███████████████████████████████████████████▏                       | 242/375 [4:55:11<2:51:53, 77.54s/it]

Epoch 1 loss: 0.20077219605445862


Epoch 1:  65%|███████████████████████████████████████████▍                       | 243/375 [4:56:35<2:54:35, 79.36s/it]

Epoch 1 loss: 0.18930388987064362


Epoch 1:  65%|███████████████████████████████████████████▌                       | 244/375 [4:57:57<2:55:20, 80.31s/it]

Epoch 1 loss: 0.23951464891433716


Epoch 1:  65%|███████████████████████████████████████████▊                       | 245/375 [4:59:24<2:57:58, 82.14s/it]

Epoch 1 loss: 0.1574621945619583


Epoch 1:  66%|███████████████████████████████████████████▉                       | 246/375 [5:00:45<2:55:56, 81.84s/it]

Epoch 1 loss: 0.239846333861351


Epoch 1:  66%|████████████████████████████████████████████▏                      | 247/375 [5:02:02<2:51:32, 80.41s/it]

Epoch 1 loss: 0.14992313086986542


Epoch 1:  66%|████████████████████████████████████████████▎                      | 248/375 [5:03:21<2:49:34, 80.12s/it]

Epoch 1 loss: 0.22492407262325287


Epoch 1:  66%|████████████████████████████████████████████▍                      | 249/375 [5:04:44<2:49:48, 80.86s/it]

Epoch 1 loss: 0.20869341492652893


Epoch 1:  67%|████████████████████████████████████████████▋                      | 250/375 [5:06:01<2:45:48, 79.59s/it]

Epoch 1 loss: 0.2668830156326294


Epoch 1:  67%|████████████████████████████████████████████▊                      | 251/375 [5:07:18<2:43:24, 79.07s/it]

Epoch 1 loss: 0.12909884750843048


Epoch 1:  67%|█████████████████████████████████████████████                      | 252/375 [5:08:35<2:40:20, 78.22s/it]

Epoch 1 loss: 0.2362642139196396


Epoch 1:  67%|█████████████████████████████████████████████▏                     | 253/375 [5:09:54<2:39:34, 78.48s/it]

Epoch 1 loss: 0.1837679147720337


Epoch 1:  68%|█████████████████████████████████████████████▍                     | 254/375 [5:11:10<2:36:58, 77.84s/it]

Epoch 1 loss: 0.2742252051830292


Epoch 1:  68%|█████████████████████████████████████████████▌                     | 255/375 [5:12:26<2:34:45, 77.38s/it]

Epoch 1 loss: 0.18694379925727844


Epoch 1:  68%|█████████████████████████████████████████████▋                     | 256/375 [5:13:50<2:37:16, 79.30s/it]

Epoch 1 loss: 0.14743800461292267


Epoch 1:  69%|█████████████████████████████████████████████▉                     | 257/375 [5:15:09<2:35:40, 79.16s/it]

Epoch 1 loss: 0.16841481626033783


Epoch 1:  69%|██████████████████████████████████████████████                     | 258/375 [5:16:28<2:34:08, 79.04s/it]

Epoch 1 loss: 0.29103776812553406


Epoch 1:  69%|██████████████████████████████████████████████▎                    | 259/375 [5:17:42<2:29:47, 77.48s/it]

Epoch 1 loss: 0.19520556926727295


Epoch 1:  69%|██████████████████████████████████████████████▍                    | 260/375 [5:18:59<2:28:13, 77.34s/it]

Epoch 1 loss: 0.320969820022583


Epoch 1:  70%|██████████████████████████████████████████████▋                    | 261/375 [5:20:14<2:25:44, 76.70s/it]

Epoch 1 loss: 0.1754775494337082


Epoch 1:  70%|██████████████████████████████████████████████▊                    | 262/375 [5:21:28<2:22:49, 75.83s/it]

Epoch 1 loss: 0.20257031917572021


Epoch 1:  70%|██████████████████████████████████████████████▉                    | 263/375 [5:22:49<2:24:19, 77.32s/it]

Epoch 1 loss: 0.27273258566856384


Epoch 1:  70%|███████████████████████████████████████████████▏                   | 264/375 [5:24:05<2:22:38, 77.10s/it]

Epoch 1 loss: 0.27612608671188354


Epoch 1:  71%|███████████████████████████████████████████████▎                   | 265/375 [5:25:18<2:19:12, 75.93s/it]

Epoch 1 loss: 0.21574680507183075


Epoch 1:  71%|███████████████████████████████████████████████▌                   | 266/375 [5:26:44<2:23:30, 79.00s/it]

Epoch 1 loss: 0.18798816204071045


Epoch 1:  71%|███████████████████████████████████████████████▋                   | 267/375 [5:28:10<2:25:46, 80.99s/it]

Epoch 1 loss: 0.16396792232990265


Epoch 1:  71%|███████████████████████████████████████████████▉                   | 268/375 [5:29:22<2:19:28, 78.21s/it]

Epoch 1 loss: 0.2543272078037262


Epoch 1:  72%|████████████████████████████████████████████████                   | 269/375 [5:30:32<2:13:51, 75.77s/it]

Epoch 1 loss: 0.26273539662361145


Epoch 1:  72%|████████████████████████████████████████████████▏                  | 270/375 [5:31:52<2:14:49, 77.04s/it]

Epoch 1 loss: 0.2771961987018585


Epoch 1:  72%|████████████████████████████████████████████████▍                  | 271/375 [5:33:05<2:11:25, 75.82s/it]

Epoch 1 loss: 0.20453879237174988


Epoch 1:  73%|████████████████████████████████████████████████▌                  | 272/375 [5:34:17<2:08:17, 74.74s/it]

Epoch 1 loss: 0.2177213728427887


Epoch 1:  73%|████████████████████████████████████████████████▊                  | 273/375 [5:35:35<2:08:51, 75.80s/it]

Epoch 1 loss: 0.13236220180988312


Epoch 1:  73%|████████████████████████████████████████████████▉                  | 274/375 [5:36:56<2:09:51, 77.14s/it]

Epoch 1 loss: 0.22176532447338104


Epoch 1:  73%|█████████████████████████████████████████████████▏                 | 275/375 [5:38:21<2:12:53, 79.74s/it]

Epoch 1 loss: 0.251875102519989


Epoch 1:  74%|█████████████████████████████████████████████████▎                 | 276/375 [5:39:44<2:13:12, 80.73s/it]

Epoch 1 loss: 0.15868496894836426


Epoch 1:  74%|█████████████████████████████████████████████████▍                 | 277/375 [5:41:00<2:09:18, 79.17s/it]

Epoch 1 loss: 0.2638276219367981


Epoch 1:  74%|█████████████████████████████████████████████████▋                 | 278/375 [5:42:15<2:06:00, 77.94s/it]

Epoch 1 loss: 0.22647573053836823


Epoch 1:  74%|█████████████████████████████████████████████████▊                 | 279/375 [5:43:36<2:06:14, 78.90s/it]

Epoch 1 loss: 0.200313538312912


Epoch 1:  75%|██████████████████████████████████████████████████                 | 280/375 [5:44:52<2:03:12, 77.82s/it]

Epoch 1 loss: 0.25952965021133423


Epoch 1:  75%|██████████████████████████████████████████████████▏                | 281/375 [5:46:06<2:00:32, 76.94s/it]

Epoch 1 loss: 0.1977260410785675


Epoch 1:  75%|██████████████████████████████████████████████████▍                | 282/375 [5:47:22<1:58:43, 76.60s/it]

Epoch 1 loss: 0.1830931156873703


Epoch 1:  75%|██████████████████████████████████████████████████▌                | 283/375 [5:48:41<1:58:37, 77.37s/it]

Epoch 1 loss: 0.1738412082195282


Epoch 1:  76%|██████████████████████████████████████████████████▋                | 284/375 [5:49:56<1:56:16, 76.67s/it]

Epoch 1 loss: 0.19132168591022491


Epoch 1:  76%|██████████████████████████████████████████████████▉                | 285/375 [5:51:13<1:54:51, 76.58s/it]

Epoch 1 loss: 0.1845117062330246


Epoch 1:  76%|███████████████████████████████████████████████████                | 286/375 [5:52:27<1:52:45, 76.02s/it]

Epoch 1 loss: 0.20106461644172668


Epoch 1:  77%|███████████████████████████████████████████████████▎               | 287/375 [5:53:40<1:50:07, 75.09s/it]

Epoch 1 loss: 0.1589382290840149


Epoch 1:  77%|███████████████████████████████████████████████████▍               | 288/375 [5:54:59<1:50:18, 76.07s/it]

Epoch 1 loss: 0.23727011680603027


Epoch 1:  77%|███████████████████████████████████████████████████▋               | 289/375 [5:56:14<1:48:32, 75.73s/it]

Epoch 1 loss: 0.17427188158035278


Epoch 1:  77%|███████████████████████████████████████████████████▊               | 290/375 [5:57:33<1:48:49, 76.82s/it]

Epoch 1 loss: 0.1947191208600998


Epoch 1:  78%|███████████████████████████████████████████████████▉               | 291/375 [5:58:54<1:49:22, 78.12s/it]

Epoch 1 loss: 0.19279241561889648


Epoch 1:  78%|████████████████████████████████████████████████████▏              | 292/375 [6:00:15<1:48:58, 78.78s/it]

Epoch 1 loss: 0.19754086434841156


Epoch 1:  78%|████████████████████████████████████████████████████▎              | 293/375 [6:01:30<1:46:12, 77.71s/it]

Epoch 1 loss: 0.2244626134634018


Epoch 1:  78%|████████████████████████████████████████████████████▌              | 294/375 [6:02:51<1:46:27, 78.86s/it]

Epoch 1 loss: 0.27945607900619507


Epoch 1:  79%|████████████████████████████████████████████████████▋              | 295/375 [6:04:10<1:45:01, 78.77s/it]

Epoch 1 loss: 0.19160981476306915


Epoch 1:  79%|████████████████████████████████████████████████████▉              | 296/375 [6:05:32<1:45:09, 79.87s/it]

Epoch 1 loss: 0.1653217226266861


Epoch 1:  79%|█████████████████████████████████████████████████████              | 297/375 [6:06:51<1:43:31, 79.63s/it]

Epoch 1 loss: 0.17330020666122437


Epoch 1:  79%|█████████████████████████████████████████████████████▏             | 298/375 [6:08:07<1:40:40, 78.44s/it]

Epoch 1 loss: 0.2772110104560852


Epoch 1:  80%|█████████████████████████████████████████████████████▍             | 299/375 [6:09:22<1:38:08, 77.48s/it]

Epoch 1 loss: 0.1863240897655487


Epoch 1:  80%|█████████████████████████████████████████████████████▌             | 300/375 [6:10:40<1:37:06, 77.69s/it]

Epoch 1 loss: 0.11091318726539612


Epoch 1:  80%|█████████████████████████████████████████████████████▊             | 301/375 [6:11:55<1:34:29, 76.61s/it]

Epoch 1 loss: 0.23799501359462738


Epoch 1:  81%|█████████████████████████████████████████████████████▉             | 302/375 [6:13:12<1:33:31, 76.87s/it]

Epoch 1 loss: 0.21124647557735443


Epoch 1:  81%|██████████████████████████████████████████████████████▏            | 303/375 [6:14:34<1:34:02, 78.37s/it]

Epoch 1 loss: 0.2090173065662384


Epoch 1:  81%|██████████████████████████████████████████████████████▎            | 304/375 [6:16:01<1:35:57, 81.09s/it]

Epoch 1 loss: 0.14110085368156433


Epoch 1:  81%|██████████████████████████████████████████████████████▍            | 305/375 [6:17:15<1:32:02, 78.90s/it]

Epoch 1 loss: 0.3886735439300537


Epoch 1:  82%|██████████████████████████████████████████████████████▋            | 306/375 [6:18:34<1:30:53, 79.03s/it]

Epoch 1 loss: 0.1625404953956604


Epoch 1:  82%|██████████████████████████████████████████████████████▊            | 307/375 [6:19:55<1:29:56, 79.36s/it]

Epoch 1 loss: 0.2117101401090622


Epoch 1:  82%|███████████████████████████████████████████████████████            | 308/375 [6:21:11<1:27:32, 78.40s/it]

Epoch 1 loss: 0.2113874852657318


Epoch 1:  82%|███████████████████████████████████████████████████████▏           | 309/375 [6:22:26<1:25:08, 77.41s/it]

Epoch 1 loss: 0.1903589814901352


Epoch 1:  83%|███████████████████████████████████████████████████████▍           | 310/375 [6:23:44<1:24:07, 77.65s/it]

Epoch 1 loss: 0.19728034734725952


Epoch 1:  83%|███████████████████████████████████████████████████████▌           | 311/375 [6:24:59<1:22:07, 76.99s/it]

Epoch 1 loss: 0.1676207333803177


Epoch 1:  83%|███████████████████████████████████████████████████████▋           | 312/375 [6:26:23<1:22:53, 78.95s/it]

Epoch 1 loss: 0.24640700221061707


Epoch 1:  83%|███████████████████████████████████████████████████████▉           | 313/375 [6:27:40<1:21:03, 78.44s/it]

Epoch 1 loss: 0.24555473029613495


Epoch 1:  84%|████████████████████████████████████████████████████████           | 314/375 [6:28:54<1:18:24, 77.12s/it]

Epoch 1 loss: 0.23883046209812164


Epoch 1:  84%|████████████████████████████████████████████████████████▎          | 315/375 [6:30:14<1:17:51, 77.86s/it]

Epoch 1 loss: 0.2473917156457901


Epoch 1:  84%|████████████████████████████████████████████████████████▍          | 316/375 [6:31:38<1:18:18, 79.64s/it]

Epoch 1 loss: 0.30205801129341125


Epoch 1:  85%|████████████████████████████████████████████████████████▋          | 317/375 [6:33:01<1:17:58, 80.66s/it]

Epoch 1 loss: 0.22493113577365875


Epoch 1:  85%|████████████████████████████████████████████████████████▊          | 318/375 [6:34:25<1:17:38, 81.72s/it]

Epoch 1 loss: 0.27568748593330383


Epoch 1:  85%|████████████████████████████████████████████████████████▉          | 319/375 [6:35:51<1:17:33, 83.09s/it]

Epoch 1 loss: 0.2297954261302948


Epoch 1:  85%|█████████████████████████████████████████████████████████▏         | 320/375 [6:37:19<1:17:21, 84.39s/it]

Epoch 1 loss: 0.2261686474084854


Epoch 1:  86%|█████████████████████████████████████████████████████████▎         | 321/375 [6:38:41<1:15:19, 83.70s/it]

Epoch 1 loss: 0.16957469284534454


Epoch 1:  86%|█████████████████████████████████████████████████████████▌         | 322/375 [6:40:03<1:13:26, 83.14s/it]

Epoch 1 loss: 0.19127951562404633


Epoch 1:  86%|█████████████████████████████████████████████████████████▋         | 323/375 [6:41:17<1:09:54, 80.66s/it]

Epoch 1 loss: 0.28012746572494507


Epoch 1:  86%|█████████████████████████████████████████████████████████▉         | 324/375 [6:42:35<1:07:53, 79.87s/it]

Epoch 1 loss: 0.15970343351364136


Epoch 1:  87%|██████████████████████████████████████████████████████████         | 325/375 [6:43:53<1:05:52, 79.06s/it]

Epoch 1 loss: 0.3124546408653259


Epoch 1:  87%|██████████████████████████████████████████████████████████▏        | 326/375 [6:45:08<1:03:40, 77.96s/it]

Epoch 1 loss: 0.2504369020462036


Epoch 1:  87%|██████████████████████████████████████████████████████████▍        | 327/375 [6:46:25<1:02:04, 77.60s/it]

Epoch 1 loss: 0.18590840697288513


Epoch 1:  87%|██████████████████████████████████████████████████████████▌        | 328/375 [6:47:45<1:01:18, 78.26s/it]

Epoch 1 loss: 0.28717342019081116


Epoch 1:  88%|████████████████████████████████████████████████████████████▌        | 329/375 [6:49:02<59:50, 78.06s/it]

Epoch 1 loss: 0.20039671659469604


Epoch 1:  88%|████████████████████████████████████████████████████████████▋        | 330/375 [6:50:18<58:05, 77.45s/it]

Epoch 1 loss: 0.19188162684440613


Epoch 1:  88%|████████████████████████████████████████████████████████████▉        | 331/375 [6:51:31<55:45, 76.04s/it]

Epoch 1 loss: 0.19353099167346954


Epoch 1:  89%|█████████████████████████████████████████████████████████████        | 332/375 [6:52:53<55:41, 77.70s/it]

Epoch 1 loss: 0.2628213167190552


Epoch 1:  89%|█████████████████████████████████████████████████████████████▎       | 333/375 [6:54:13<54:59, 78.56s/it]

Epoch 1 loss: 0.2693957984447479


Epoch 1:  89%|█████████████████████████████████████████████████████████████▍       | 334/375 [6:55:25<52:24, 76.70s/it]

Epoch 1 loss: 0.248300239443779


Epoch 1:  89%|█████████████████████████████████████████████████████████████▋       | 335/375 [6:56:52<53:02, 79.56s/it]

Epoch 1 loss: 0.21895699203014374


Epoch 1:  90%|█████████████████████████████████████████████████████████████▊       | 336/375 [6:58:11<51:37, 79.42s/it]

Epoch 1 loss: 0.26669061183929443


Epoch 1:  90%|██████████████████████████████████████████████████████████████       | 337/375 [6:59:28<49:55, 78.82s/it]

Epoch 1 loss: 0.19178368151187897


Epoch 1:  90%|██████████████████████████████████████████████████████████████▏      | 338/375 [7:00:55<50:00, 81.10s/it]

Epoch 1 loss: 0.23760101199150085


Epoch 1:  90%|██████████████████████████████████████████████████████████████▍      | 339/375 [7:02:12<47:57, 79.92s/it]

Epoch 1 loss: 0.16600793600082397


Epoch 1:  91%|██████████████████████████████████████████████████████████████▌      | 340/375 [7:03:29<46:13, 79.23s/it]

Epoch 1 loss: 0.252047598361969


Epoch 1:  91%|██████████████████████████████████████████████████████████████▋      | 341/375 [7:04:46<44:22, 78.32s/it]

Epoch 1 loss: 0.26156648993492126


Epoch 1:  91%|██████████████████████████████████████████████████████████████▉      | 342/375 [7:06:05<43:18, 78.74s/it]

Epoch 1 loss: 0.166200190782547


Epoch 1:  91%|███████████████████████████████████████████████████████████████      | 343/375 [7:07:25<42:10, 79.07s/it]

Epoch 1 loss: 0.19792695343494415


Epoch 1:  92%|███████████████████████████████████████████████████████████████▎     | 344/375 [7:08:42<40:32, 78.46s/it]

Epoch 1 loss: 0.2676089107990265


Epoch 1:  92%|███████████████████████████████████████████████████████████████▍     | 345/375 [7:09:56<38:35, 77.19s/it]

Epoch 1 loss: 0.2621585428714752


Epoch 1:  92%|███████████████████████████████████████████████████████████████▋     | 346/375 [7:11:17<37:48, 78.23s/it]

Epoch 1 loss: 0.17029482126235962


Epoch 1:  93%|███████████████████████████████████████████████████████████████▊     | 347/375 [7:12:32<36:04, 77.32s/it]

Epoch 1 loss: 0.17100133001804352


Epoch 1:  93%|████████████████████████████████████████████████████████████████     | 348/375 [7:13:52<35:08, 78.09s/it]

Epoch 1 loss: 0.23409828543663025


Epoch 1:  93%|████████████████████████████████████████████████████████████████▏    | 349/375 [7:15:03<32:55, 75.97s/it]

Epoch 1 loss: 0.2087758332490921


Epoch 1:  93%|████████████████████████████████████████████████████████████████▍    | 350/375 [7:16:25<32:24, 77.77s/it]

Epoch 1 loss: 0.19636383652687073


Epoch 1:  94%|████████████████████████████████████████████████████████████████▌    | 351/375 [7:17:43<31:04, 77.70s/it]

Epoch 1 loss: 0.15529562532901764


Epoch 1:  94%|████████████████████████████████████████████████████████████████▊    | 352/375 [7:18:58<29:33, 77.09s/it]

Epoch 1 loss: 0.257894366979599


Epoch 1:  94%|████████████████████████████████████████████████████████████████▉    | 353/375 [7:20:17<28:25, 77.50s/it]

Epoch 1 loss: 0.15754859149456024


Epoch 1:  94%|█████████████████████████████████████████████████████████████████▏   | 354/375 [7:21:34<27:02, 77.27s/it]

Epoch 1 loss: 0.21949926018714905


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▎   | 355/375 [7:22:51<25:46, 77.31s/it]

Epoch 1 loss: 0.15484052896499634


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▌   | 356/375 [7:24:11<24:43, 78.08s/it]

Epoch 1 loss: 0.18378320336341858


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▋   | 357/375 [7:25:26<23:10, 77.25s/it]

Epoch 1 loss: 0.18041686713695526


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▊   | 358/375 [7:26:45<21:59, 77.61s/it]

Epoch 1 loss: 0.2340681105852127


Epoch 1:  96%|██████████████████████████████████████████████████████████████████   | 359/375 [7:28:09<21:15, 79.75s/it]

Epoch 1 loss: 0.13815174996852875


Epoch 1:  96%|██████████████████████████████████████████████████████████████████▏  | 360/375 [7:29:24<19:31, 78.08s/it]

Epoch 1 loss: 0.23332417011260986


Epoch 1:  96%|██████████████████████████████████████████████████████████████████▍  | 361/375 [7:30:39<18:00, 77.16s/it]

Epoch 1 loss: 0.11307565867900848


Epoch 1:  97%|██████████████████████████████████████████████████████████████████▌  | 362/375 [7:31:55<16:41, 77.04s/it]

Epoch 1 loss: 0.13063320517539978


Epoch 1:  97%|██████████████████████████████████████████████████████████████████▊  | 363/375 [7:33:08<15:08, 75.69s/it]

Epoch 1 loss: 0.23012405633926392


Epoch 1:  97%|██████████████████████████████████████████████████████████████████▉  | 364/375 [7:34:30<14:15, 77.76s/it]

Epoch 1 loss: 0.17444802820682526


Epoch 1:  97%|███████████████████████████████████████████████████████████████████▏ | 365/375 [7:35:50<13:03, 78.40s/it]

Epoch 1 loss: 0.18436560034751892


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▎ | 366/375 [7:37:06<11:38, 77.59s/it]

Epoch 1 loss: 0.17811964452266693


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▌ | 367/375 [7:38:22<10:16, 77.02s/it]

Epoch 1 loss: 0.21612125635147095


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▋ | 368/375 [7:39:37<08:56, 76.62s/it]

Epoch 1 loss: 0.17430083453655243


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▉ | 369/375 [7:40:53<07:37, 76.23s/it]

Epoch 1 loss: 0.21988548338413239


Epoch 1:  99%|████████████████████████████████████████████████████████████████████ | 370/375 [7:42:14<06:28, 77.74s/it]

Epoch 1 loss: 0.14459094405174255


Epoch 1:  99%|████████████████████████████████████████████████████████████████████▎| 371/375 [7:43:33<05:12, 78.11s/it]

Epoch 1 loss: 0.2527790069580078


Epoch 1:  99%|████████████████████████████████████████████████████████████████████▍| 372/375 [7:44:48<03:51, 77.29s/it]

Epoch 1 loss: 0.26922404766082764


Epoch 1:  99%|████████████████████████████████████████████████████████████████████▋| 373/375 [7:46:03<02:32, 76.45s/it]

Epoch 1 loss: 0.21482917666435242


Epoch 1: 100%|████████████████████████████████████████████████████████████████████▊| 374/375 [7:47:18<01:16, 76.04s/it]

Epoch 1 loss: 0.23318830132484436


Epoch 1: 100%|█████████████████████████████████████████████████████████████████████| 375/375 [7:48:39<00:00, 74.98s/it]

Epoch 1 loss: 0.25335681438446045
Epoch 1 loss: 0.25335681438446045





In [22]:
# Save the fine-tuned model
model.save_pretrained('./movie_chatbot_model')
tokenizer.save_pretrained('./movie_chatbot_tokenizer')

('./movie_chatbot_tokenizer\\tokenizer_config.json',
 './movie_chatbot_tokenizer\\special_tokens_map.json',
 './movie_chatbot_tokenizer\\vocab.json',
 './movie_chatbot_tokenizer\\merges.txt',
 './movie_chatbot_tokenizer\\added_tokens.json')

In [None]:
# Multi-Turn Conversation with Context

In [None]:
#     handling multi-turn conversations by storing conversation history

In [23]:
def generate_response(model, tokenizer, conversation_history, max_length=512):
    input_text = " ".join(conversation_history) + tokenizer.eos_token
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    
    with torch.no_grad():
        output = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    
    response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response

In [None]:
# Web Interface using Gradio

In [None]:
#     using Gradio to build a simple web interface

In [25]:
import gradio as gr

# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained('./movie_chatbot_model')
tokenizer = GPT2Tokenizer.from_pretrained('./movie_chatbot_tokenizer')

conversation_history = []

def chatbot(user_input):
    global conversation_history
    conversation_history.append(user_input)
    response = generate_response(model, tokenizer, conversation_history)
    conversation_history.append(response)
    return response

# Build the Gradio interface
gr.Interface(fn=chatbot, inputs="text", outputs="text").launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [None]:
# Evaluation 

In [None]:
#    Evaluate the chatbot using BLEU for generated responses

In [None]:
from nltk.translate.bleu_score import sentence_bleu

def evaluate_bleu(reference, candidate):
    reference_tokens = nltk.word_tokenize(reference.lower())
    candidate_tokens = nltk.word_tokenize(candidate.lower())
    score = sentence_bleu([reference_tokens], candidate_tokens)
    return score

In [29]:
import torch
import math
from tqdm import tqdm  # Progress bar library
from nltk.translate.bleu_score import sentence_bleu

def evaluate(model, small_dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        # Use tqdm to wrap the dataloader for a progress bar
        for batch in tqdm(small_dataloader, desc="Evaluating", leave=False):
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
            total_loss += outputs.loss.item()
    
    avg_loss = total_loss / len(small_dataloader)
    perplexity = math.exp(avg_loss)
    return perplexity

# Calculate BLEU score for evaluation
def bleu_score(reference, candidate):
    return sentence_bleu([reference.split()], candidate.split())

# Example usage for evaluation with progress bar
perplexity = evaluate(model, small_dataloader)
print(f'Perplexity: {perplexity}')

                                                                                                                       

Perplexity: 1.2213929358027669




In [None]:
# Summary

In [None]:
#    Preprocessing: Loaded movie_lines.txt and movie_conversations.txt, tokenized dialogues.
#    Model: Fine-tuned GPT-2 for conversational modeling.
#    Context Management: Maintained conversation history for multi-turn interactions.
#    Web Interface: Used Gradio for a simple chatbot interface.
#    Evaluation: Added BLEU score as a metric.

#This implementation enables a transformer-based chatbot that handles multi-turn conversations, adapts to context, and uses the loaded data