In [None]:
# Development Plan

In [None]:
# Load training data into working directory:
# For training use the Cornell Movie-Dialog Corpus at path = kagglehub.dataset_download("rajathmc/cornell-moviedialog-corpus") 
#                           raw_scripts_urls.txt, 
#                           movie_titles_metadata.txt, 
#                           movie_lines.txt, 
#                           movie_conversations.txt, and 
#                           movie_characters_metadata.txt

# Next, read the relevant files directly and preprocess them to train the chatbot.

# The most important files for generating dialogues are movie_lines.txt and movie_conversations.txt, as they contain the actual dialogues and 
# conversation pairings.

# The chatbot implementation consists of:

#    Preprocessing:
#        The movie_lines.txt and movie_conversations.txt files are used to create input-output pairs for training the chatbot.
#        The data is Tokenized using a transformer-compatible tokenizer.

#    Model Training:
#        Transformer-based model GPT-2 is used for fine-tuning.

#    Multi-turn Conversations:
#        The conversation context is maintained to handle multi-turn dialogues.

#    Web Interface:
#        Gradio is used to build a web interface for user interaction.

In [None]:
# Preprocessing the Data

In [None]:
#    the movie_lines.txt and movie_conversations.txt will be read to extract dialogues and conversation pairs

In [1]:
import pandas as pd

# Paths to the files
movie_lines_path = './movie_lines.txt'
movie_conversations_path = './movie_conversations.txt'

# Step 1: Load movie lines into a dictionary
id2line = {}
with open(movie_lines_path, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        parts = line.strip().split(' +++$+++ ')
        if len(parts) == 5:
            id2line[parts[0]] = parts[4]  # parts[0] is the line ID, parts[4] is the actual line text

# Step 2: Load movie conversations and create conversation pairs
conversations = []
with open(movie_conversations_path, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        parts = line.strip().split(' +++$+++ ')
        if len(parts) == 4:
            # Extract line IDs from the conversation
            line_ids = eval(parts[3])
            for i in range(len(line_ids) - 1):
                # Create pairs of (input, output) for conversation
                input_line = id2line.get(line_ids[i], "")
                target_line = id2line.get(line_ids[i + 1], "")
                if input_line and target_line:
                    conversations.append((input_line, target_line))

# Print out a few examples
for i in range(5):
    print(f"Input: {conversations[i][0]}")
    print(f"Target: {conversations[i][1]}")
    print()

Input: Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.
Target: Well, I thought we'd start with pronunciation, if that's okay with you.

Input: Well, I thought we'd start with pronunciation, if that's okay with you.
Target: Not the hacking and gagging and spitting part.  Please.

Input: Not the hacking and gagging and spitting part.  Please.
Target: Okay... then how 'bout we try out some French cuisine.  Saturday?  Night?

Input: You're asking me out.  That's so cute. What's your name again?
Target: Forget it.

Input: No, no, it's my fault -- we didn't have a proper introduction ---
Target: Cameron.



In [None]:
# Fine-Tuning a Transformer Model (GPT-2)

In [None]:
#    Now, having the conversation pairs, the pre-trained model can be fine-tuned like GPT-2

In [2]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW
import torch
from torch.utils.data import Dataset, DataLoader

In [3]:
# Initialize the tokenizer and model (GPT-2)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [4]:
# Use eos_token as the pad_token
tokenizer.pad_token = tokenizer.eos_token

In [5]:
# Initialize the model
model = GPT2LMHeadModel.from_pretrained('gpt2')

In [6]:
class MovieDataset(Dataset):
    def __init__(self, conversations, tokenizer, max_length=512):
        self.conversations = conversations
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        input_text, target_text = self.conversations[idx]
        
        # Tokenize input and target text, with eos token and padding
        encoding = self.tokenizer(f"{input_text} {self.tokenizer.eos_token}", 
                                  f"{target_text} {self.tokenizer.eos_token}", 
                                  return_tensors='pt', 
                                  max_length=self.max_length, 
                                  truncation=True, 
                                  padding='max_length')
        
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        # In language modeling, input_ids are also used as labels
        labels = input_ids.clone()  # Copy of input_ids for labels

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels  # Include labels in the returned batch
        }

In [7]:
# Prepare the dataset and dataloader
dataset = MovieDataset(conversations, tokenizer)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

In [8]:
# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)



In [9]:
# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.train()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [10]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
import torch
from torch.utils.data import Dataset, DataLoader

# Initialize the tokenizer and smaller model (use 'gpt2' for the smallest model)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')  # Use 'gpt2'
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained('gpt2')  # Use 'gpt2'

In [11]:
# Tokenize the conversation pairs
class MovieDataset(Dataset):
    def __init__(self, conversations, tokenizer, max_length=512):
        self.conversations = conversations
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        input_text, target_text = self.conversations[idx]
        
        # Tokenize input and target text, with eos token and padding
        encoding = self.tokenizer(f"{input_text} {self.tokenizer.eos_token}", 
                                  f"{target_text} {self.tokenizer.eos_token}", 
                                  return_tensors='pt', 
                                  max_length=self.max_length, 
                                  truncation=True, 
                                  padding='max_length')
        
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        # In language modeling, input_ids are also used as labels
        labels = input_ids.clone()  # Copy of input_ids for labels

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels  # Include labels in the returned batch
        }

# Prepare the dataset and dataloader with smaller batch size
dataset = MovieDataset(conversations, tokenizer)
batch_size = 8  # Reduce batch size to avoid memory issues
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Optimizer with moderate learning rate
learning_rate = 5e-5  # Conservative learning rate
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Scheduler for learning rate decay
num_training_steps = len(dataloader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# Mixed precision training
scaler = torch.cuda.amp.GradScaler()

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.train()

  scaler = torch.cuda.amp.GradScaler()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [12]:
import random
from torch.utils.data import Subset

# Assuming `conversations` is your full list of conversation pairs
total_samples = len(conversations)  # This would be 305,000
subset_size = 3000  # Target smaller size

# Randomly select 3,000 indices from the full dataset
subset_indices = random.sample(range(total_samples), subset_size)

# Use Subset to create a smaller dataset from the full dataset
small_dataset = Subset(dataset, subset_indices)

# Now create a DataLoader for the smaller dataset
small_dataloader = DataLoader(small_dataset, batch_size=8, shuffle=True)


In [13]:
from tqdm import tqdm  # For tracking progress

In [14]:
# Training loop
for epoch in range(1):  # 1 epoch for demo purposes
    with tqdm(total=len(small_dataloader), desc=f"Epoch {epoch + 1}") as pbar:
        for batch in small_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)  # Fetch the labels
            
            # Pass the labels explicitly to the model
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)            
            #outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
            loss = outputs.loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"Epoch {epoch + 1} loss: {loss.item()}")
            pbar.update(1)  # Update progress bar after each batch
    
    print(f"Epoch {epoch + 1} loss: {loss.item()}")

Epoch 1:   0%|▏                                                                      | 1/375 [00:59<6:12:40, 59.79s/it]

Epoch 1 loss: 11.54411506652832


Epoch 1:   1%|▍                                                                      | 2/375 [02:16<7:13:19, 69.70s/it]

Epoch 1 loss: 7.488289833068848


Epoch 1:   1%|▌                                                                      | 3/375 [03:16<6:45:34, 65.41s/it]

Epoch 1 loss: 4.085651397705078


Epoch 1:   1%|▊                                                                      | 4/375 [04:14<6:24:59, 62.26s/it]

Epoch 1 loss: 1.8591548204421997


Epoch 1:   1%|▉                                                                      | 5/375 [05:19<6:30:21, 63.30s/it]

Epoch 1 loss: 0.7176163792610168


Epoch 1:   2%|█▏                                                                     | 6/375 [06:28<6:40:34, 65.13s/it]

Epoch 1 loss: 0.4764946401119232


Epoch 1:   2%|█▎                                                                     | 7/375 [07:40<6:53:43, 67.46s/it]

Epoch 1 loss: 0.3535977900028229


Epoch 1:   2%|█▌                                                                     | 8/375 [08:43<6:44:47, 66.18s/it]

Epoch 1 loss: 0.24594098329544067


Epoch 1:   2%|█▋                                                                     | 9/375 [09:52<6:49:08, 67.07s/it]

Epoch 1 loss: 0.26547083258628845


Epoch 1:   3%|█▊                                                                    | 10/375 [11:02<6:53:36, 67.99s/it]

Epoch 1 loss: 0.328127384185791


Epoch 1:   3%|██                                                                    | 11/375 [12:09<6:49:17, 67.46s/it]

Epoch 1 loss: 0.41619399189949036


Epoch 1:   3%|██▏                                                                   | 12/375 [13:12<6:40:25, 66.19s/it]

Epoch 1 loss: 0.22834080457687378


Epoch 1:   3%|██▍                                                                   | 13/375 [14:21<6:44:10, 66.99s/it]

Epoch 1 loss: 0.35749635100364685


Epoch 1:   4%|██▌                                                                   | 14/375 [15:28<6:42:57, 66.97s/it]

Epoch 1 loss: 0.41236719489097595


Epoch 1:   4%|██▊                                                                   | 15/375 [16:37<6:45:24, 67.57s/it]

Epoch 1 loss: 0.4540109634399414


Epoch 1:   4%|██▉                                                                   | 16/375 [17:38<6:34:06, 65.87s/it]

Epoch 1 loss: 0.2987411618232727


Epoch 1:   5%|███▏                                                                  | 17/375 [18:44<6:31:34, 65.63s/it]

Epoch 1 loss: 0.2294304072856903


Epoch 1:   5%|███▎                                                                  | 18/375 [19:45<6:23:00, 64.37s/it]

Epoch 1 loss: 0.30033135414123535


Epoch 1:   5%|███▌                                                                  | 19/375 [20:46<6:16:07, 63.39s/it]

Epoch 1 loss: 0.2937120497226715


Epoch 1:   5%|███▋                                                                  | 20/375 [21:56<6:26:06, 65.26s/it]

Epoch 1 loss: 0.5285212397575378


Epoch 1:   6%|███▉                                                                  | 21/375 [23:01<6:24:40, 65.20s/it]

Epoch 1 loss: 0.6450055837631226


Epoch 1:   6%|████                                                                  | 22/375 [24:02<6:17:19, 64.14s/it]

Epoch 1 loss: 0.2948150634765625


Epoch 1:   6%|████▎                                                                 | 23/375 [25:01<6:06:58, 62.55s/it]

Epoch 1 loss: 0.34747010469436646


Epoch 1:   6%|████▍                                                                 | 24/375 [26:03<6:05:07, 62.42s/it]

Epoch 1 loss: 0.20191776752471924


Epoch 1:   7%|████▋                                                                 | 25/375 [27:22<6:32:15, 67.24s/it]

Epoch 1 loss: 0.3440582752227783


Epoch 1:   7%|████▊                                                                 | 26/375 [28:58<7:21:09, 75.84s/it]

Epoch 1 loss: 0.2763502597808838


Epoch 1:   7%|█████                                                                 | 27/375 [30:22<7:33:57, 78.27s/it]

Epoch 1 loss: 0.23934458196163177


Epoch 1:   7%|█████▏                                                                | 28/375 [31:46<7:42:57, 80.05s/it]

Epoch 1 loss: 0.3959631621837616


Epoch 1:   8%|█████▍                                                                | 29/375 [33:11<7:51:00, 81.68s/it]

Epoch 1 loss: 0.42143362760543823


Epoch 1:   8%|█████▌                                                                | 30/375 [34:39<7:59:42, 83.43s/it]

Epoch 1 loss: 0.23247180879116058


Epoch 1:   8%|█████▊                                                                | 31/375 [36:04<8:01:07, 83.92s/it]

Epoch 1 loss: 0.2826172411441803


Epoch 1:   9%|█████▉                                                                | 32/375 [37:32<8:07:26, 85.27s/it]

Epoch 1 loss: 0.24083134531974792


Epoch 1:   9%|██████▏                                                               | 33/375 [39:05<8:18:24, 87.44s/it]

Epoch 1 loss: 0.40659990906715393


Epoch 1:   9%|██████▎                                                               | 34/375 [40:34<8:20:34, 88.08s/it]

Epoch 1 loss: 0.2571873068809509


Epoch 1:   9%|██████▌                                                               | 35/375 [42:04<8:21:04, 88.42s/it]

Epoch 1 loss: 0.46963727474212646


Epoch 1:  10%|██████▋                                                               | 36/375 [43:35<8:24:00, 89.21s/it]

Epoch 1 loss: 0.24853093922138214


Epoch 1:  10%|██████▉                                                               | 37/375 [44:56<8:09:16, 86.85s/it]

Epoch 1 loss: 0.21418412029743195


Epoch 1:  10%|███████                                                               | 38/375 [46:23<8:08:39, 87.00s/it]

Epoch 1 loss: 0.31143730878829956


Epoch 1:  10%|███████▎                                                              | 39/375 [47:52<8:10:24, 87.57s/it]

Epoch 1 loss: 0.2554986774921417


Epoch 1:  11%|███████▍                                                              | 40/375 [49:20<8:08:35, 87.51s/it]

Epoch 1 loss: 0.26493605971336365


Epoch 1:  11%|███████▋                                                              | 41/375 [50:41<7:56:44, 85.64s/it]

Epoch 1 loss: 0.30991968512535095


Epoch 1:  11%|███████▊                                                              | 42/375 [52:05<7:53:17, 85.28s/it]

Epoch 1 loss: 0.24609895050525665


Epoch 1:  11%|████████                                                              | 43/375 [53:35<7:58:35, 86.49s/it]

Epoch 1 loss: 0.32130205631256104


Epoch 1:  12%|████████▏                                                             | 44/375 [55:02<7:58:41, 86.77s/it]

Epoch 1 loss: 0.3163544237613678


Epoch 1:  12%|████████▍                                                             | 45/375 [56:22<7:45:18, 84.60s/it]

Epoch 1 loss: 0.2731981575489044


Epoch 1:  12%|████████▌                                                             | 46/375 [57:55<7:57:28, 87.08s/it]

Epoch 1 loss: 0.3043091297149658


Epoch 1:  13%|████████▊                                                             | 47/375 [59:13<7:41:41, 84.46s/it]

Epoch 1 loss: 0.25635451078414917


Epoch 1:  13%|████████▋                                                           | 48/375 [1:00:40<7:44:39, 85.26s/it]

Epoch 1 loss: 0.21266011893749237


Epoch 1:  13%|████████▉                                                           | 49/375 [1:01:55<7:26:55, 82.26s/it]

Epoch 1 loss: 0.3634120225906372


Epoch 1:  13%|█████████                                                           | 50/375 [1:03:16<7:23:26, 81.87s/it]

Epoch 1 loss: 0.3126867711544037


Epoch 1:  14%|█████████▏                                                          | 51/375 [1:04:42<7:27:55, 82.95s/it]

Epoch 1 loss: 0.19379670917987823


Epoch 1:  14%|█████████▍                                                          | 52/375 [1:06:09<7:33:23, 84.22s/it]

Epoch 1 loss: 0.29799139499664307


Epoch 1:  14%|█████████▌                                                          | 53/375 [1:07:35<7:35:23, 84.85s/it]

Epoch 1 loss: 0.24113363027572632


Epoch 1:  14%|█████████▊                                                          | 54/375 [1:09:04<7:40:26, 86.06s/it]

Epoch 1 loss: 0.28108635544776917


Epoch 1:  15%|█████████▉                                                          | 55/375 [1:10:34<7:45:05, 87.20s/it]

Epoch 1 loss: 0.3249521255493164


Epoch 1:  15%|██████████▏                                                         | 56/375 [1:12:01<7:43:19, 87.15s/it]

Epoch 1 loss: 0.3685038387775421


Epoch 1:  15%|██████████▎                                                         | 57/375 [1:13:30<7:44:35, 87.66s/it]

Epoch 1 loss: 0.27146896719932556


Epoch 1:  15%|██████████▌                                                         | 58/375 [1:14:54<7:38:06, 86.71s/it]

Epoch 1 loss: 0.19444091618061066


Epoch 1:  16%|██████████▋                                                         | 59/375 [1:16:21<7:36:02, 86.59s/it]

Epoch 1 loss: 0.23678873479366302


Epoch 1:  16%|██████████▉                                                         | 60/375 [1:17:47<7:34:35, 86.59s/it]

Epoch 1 loss: 0.16934257745742798


Epoch 1:  16%|███████████                                                         | 61/375 [1:19:13<7:31:57, 86.36s/it]

Epoch 1 loss: 0.2311277538537979


Epoch 1:  17%|███████████▏                                                        | 62/375 [1:20:41<7:32:57, 86.83s/it]

Epoch 1 loss: 0.2934792935848236


Epoch 1:  17%|███████████▍                                                        | 63/375 [1:22:03<7:24:46, 85.53s/it]

Epoch 1 loss: 0.16417349874973297


Epoch 1:  17%|███████████▌                                                        | 64/375 [1:23:35<7:32:32, 87.31s/it]

Epoch 1 loss: 0.19332264363765717


Epoch 1:  17%|███████████▊                                                        | 65/375 [1:24:58<7:24:06, 85.96s/it]

Epoch 1 loss: 0.31622472405433655


Epoch 1:  18%|███████████▉                                                        | 66/375 [1:26:18<7:14:37, 84.39s/it]

Epoch 1 loss: 0.22092783451080322


Epoch 1:  18%|████████████▏                                                       | 67/375 [1:27:47<7:19:22, 85.59s/it]

Epoch 1 loss: 0.1564406156539917


Epoch 1:  18%|████████████▎                                                       | 68/375 [1:29:07<7:09:06, 83.87s/it]

Epoch 1 loss: 0.27714234590530396


Epoch 1:  18%|████████████▌                                                       | 69/375 [1:30:41<7:23:48, 87.02s/it]

Epoch 1 loss: 0.3608831763267517


Epoch 1:  19%|████████████▋                                                       | 70/375 [1:32:13<7:29:20, 88.40s/it]

Epoch 1 loss: 0.1473533809185028


Epoch 1:  19%|████████████▊                                                       | 71/375 [1:33:45<7:33:12, 89.45s/it]

Epoch 1 loss: 0.2342183142900467


Epoch 1:  19%|█████████████                                                       | 72/375 [1:35:15<7:33:22, 89.78s/it]

Epoch 1 loss: 0.15901511907577515


Epoch 1:  19%|█████████████▏                                                      | 73/375 [1:36:36<7:18:44, 87.17s/it]

Epoch 1 loss: 0.2136346995830536


Epoch 1:  20%|█████████████▍                                                      | 74/375 [1:38:02<7:14:49, 86.68s/it]

Epoch 1 loss: 0.160169780254364


Epoch 1:  20%|█████████████▌                                                      | 75/375 [1:39:33<7:20:47, 88.16s/it]

Epoch 1 loss: 0.22779077291488647


Epoch 1:  20%|█████████████▊                                                      | 76/375 [1:41:02<7:20:36, 88.42s/it]

Epoch 1 loss: 0.2902350425720215


Epoch 1:  21%|█████████████▉                                                      | 77/375 [1:42:25<7:10:33, 86.69s/it]

Epoch 1 loss: 0.2134675681591034


Epoch 1:  21%|██████████████▏                                                     | 78/375 [1:43:51<7:07:24, 86.34s/it]

Epoch 1 loss: 0.3121868371963501


Epoch 1:  21%|██████████████▎                                                     | 79/375 [1:45:18<7:06:53, 86.53s/it]

Epoch 1 loss: 0.17214852571487427


Epoch 1:  21%|██████████████▌                                                     | 80/375 [1:46:48<7:11:21, 87.73s/it]

Epoch 1 loss: 0.2602909207344055


Epoch 1:  22%|██████████████▋                                                     | 81/375 [1:48:17<7:12:18, 88.23s/it]

Epoch 1 loss: 0.24296320974826813


Epoch 1:  22%|██████████████▊                                                     | 82/375 [1:49:39<7:00:52, 86.18s/it]

Epoch 1 loss: 0.1762809157371521


Epoch 1:  22%|███████████████                                                     | 83/375 [1:51:01<6:54:13, 85.11s/it]

Epoch 1 loss: 0.17944791913032532


Epoch 1:  22%|███████████████▏                                                    | 84/375 [1:52:30<6:57:59, 86.19s/it]

Epoch 1 loss: 0.22834685444831848


Epoch 1:  23%|███████████████▍                                                    | 85/375 [1:53:57<6:56:55, 86.26s/it]

Epoch 1 loss: 0.21979886293411255


Epoch 1:  23%|███████████████▌                                                    | 86/375 [1:55:21<6:53:23, 85.83s/it]

Epoch 1 loss: 0.2694370448589325


Epoch 1:  23%|███████████████▊                                                    | 87/375 [1:56:48<6:52:22, 85.91s/it]

Epoch 1 loss: 0.1849486380815506


Epoch 1:  23%|███████████████▉                                                    | 88/375 [1:58:15<6:52:48, 86.30s/it]

Epoch 1 loss: 0.1773313283920288


Epoch 1:  24%|████████████████▏                                                   | 89/375 [1:59:41<6:51:03, 86.24s/it]

Epoch 1 loss: 0.21009179949760437


Epoch 1:  24%|████████████████▎                                                   | 90/375 [2:01:05<6:46:19, 85.54s/it]

Epoch 1 loss: 0.4301316738128662


Epoch 1:  24%|████████████████▌                                                   | 91/375 [2:02:33<6:49:02, 86.42s/it]

Epoch 1 loss: 0.17650148272514343


Epoch 1:  25%|████████████████▋                                                   | 92/375 [2:04:04<6:53:34, 87.68s/it]

Epoch 1 loss: 0.30729183554649353


Epoch 1:  25%|████████████████▊                                                   | 93/375 [2:05:35<6:57:38, 88.86s/it]

Epoch 1 loss: 0.23271359503269196


Epoch 1:  25%|█████████████████                                                   | 94/375 [2:07:03<6:54:15, 88.45s/it]

Epoch 1 loss: 0.16764385998249054


Epoch 1:  25%|█████████████████▏                                                  | 95/375 [2:08:31<6:52:30, 88.40s/it]

Epoch 1 loss: 0.22343102097511292


Epoch 1:  26%|█████████████████▍                                                  | 96/375 [2:10:00<6:51:56, 88.59s/it]

Epoch 1 loss: 0.18045663833618164


Epoch 1:  26%|█████████████████▌                                                  | 97/375 [2:11:25<6:44:30, 87.30s/it]

Epoch 1 loss: 0.22535891830921173


Epoch 1:  26%|█████████████████▊                                                  | 98/375 [2:12:51<6:42:00, 87.08s/it]

Epoch 1 loss: 0.14775939285755157


Epoch 1:  26%|█████████████████▉                                                  | 99/375 [2:14:21<6:43:51, 87.79s/it]

Epoch 1 loss: 0.21623213589191437


Epoch 1:  27%|█████████████████▊                                                 | 100/375 [2:15:48<6:41:49, 87.67s/it]

Epoch 1 loss: 0.22507759928703308


Epoch 1:  27%|██████████████████                                                 | 101/375 [2:17:17<6:42:52, 88.22s/it]

Epoch 1 loss: 0.18265242874622345


Epoch 1:  27%|██████████████████▏                                                | 102/375 [2:18:45<6:39:46, 87.86s/it]

Epoch 1 loss: 0.24737080931663513


Epoch 1:  27%|██████████████████▍                                                | 103/375 [2:20:15<6:41:19, 88.53s/it]

Epoch 1 loss: 0.23260770738124847


Epoch 1:  28%|██████████████████▌                                                | 104/375 [2:21:43<6:39:09, 88.38s/it]

Epoch 1 loss: 0.23861034214496613


Epoch 1:  28%|██████████████████▊                                                | 105/375 [2:23:09<6:35:18, 87.84s/it]

Epoch 1 loss: 0.33938121795654297


Epoch 1:  28%|██████████████████▉                                                | 106/375 [2:24:50<6:50:38, 91.59s/it]

Epoch 1 loss: 0.1691051423549652


Epoch 1:  29%|███████████████████                                                | 107/375 [2:26:20<6:47:55, 91.33s/it]

Epoch 1 loss: 0.1975875049829483


Epoch 1:  29%|███████████████████▎                                               | 108/375 [2:27:48<6:41:00, 90.11s/it]

Epoch 1 loss: 0.22024571895599365


Epoch 1:  29%|███████████████████▍                                               | 109/375 [2:29:18<6:39:54, 90.20s/it]

Epoch 1 loss: 0.3629670739173889


Epoch 1:  29%|███████████████████▋                                               | 110/375 [2:30:47<6:37:17, 89.95s/it]

Epoch 1 loss: 0.30470314621925354


Epoch 1:  30%|███████████████████▊                                               | 111/375 [2:32:16<6:33:38, 89.46s/it]

Epoch 1 loss: 0.23542089760303497


Epoch 1:  30%|████████████████████                                               | 112/375 [2:33:41<6:27:13, 88.34s/it]

Epoch 1 loss: 0.2281745821237564


Epoch 1:  30%|████████████████████▏                                              | 113/375 [2:35:12<6:29:01, 89.09s/it]

Epoch 1 loss: 0.2085033506155014


Epoch 1:  30%|████████████████████▎                                              | 114/375 [2:36:41<6:27:40, 89.12s/it]

Epoch 1 loss: 0.2757434844970703


Epoch 1:  31%|████████████████████▌                                              | 115/375 [2:38:14<6:31:11, 90.28s/it]

Epoch 1 loss: 0.27750810980796814


Epoch 1:  31%|████████████████████▋                                              | 116/375 [2:39:34<6:15:47, 87.06s/it]

Epoch 1 loss: 0.23604828119277954


Epoch 1:  31%|████████████████████▉                                              | 117/375 [2:40:54<6:05:34, 85.02s/it]

Epoch 1 loss: 0.19889958202838898


Epoch 1:  31%|█████████████████████                                              | 118/375 [2:42:21<6:06:53, 85.66s/it]

Epoch 1 loss: 0.232496440410614


Epoch 1:  32%|█████████████████████▎                                             | 119/375 [2:43:38<5:53:28, 82.85s/it]

Epoch 1 loss: 0.30452731251716614


Epoch 1:  32%|█████████████████████▍                                             | 120/375 [2:44:55<5:45:45, 81.35s/it]

Epoch 1 loss: 0.22392797470092773


Epoch 1:  32%|█████████████████████▌                                             | 121/375 [2:46:22<5:51:00, 82.91s/it]

Epoch 1 loss: 0.11709102988243103


Epoch 1:  33%|█████████████████████▊                                             | 122/375 [2:47:55<6:02:19, 85.93s/it]

Epoch 1 loss: 0.2793005108833313


Epoch 1:  33%|█████████████████████▉                                             | 123/375 [2:49:23<6:03:06, 86.45s/it]

Epoch 1 loss: 0.20717133581638336


Epoch 1:  33%|██████████████████████▏                                            | 124/375 [2:50:51<6:04:02, 87.02s/it]

Epoch 1 loss: 0.14511972665786743


Epoch 1:  33%|██████████████████████▎                                            | 125/375 [2:52:11<5:53:54, 84.94s/it]

Epoch 1 loss: 0.24577707052230835


Epoch 1:  34%|██████████████████████▌                                            | 126/375 [2:53:44<6:02:51, 87.43s/it]

Epoch 1 loss: 0.20552097260951996


Epoch 1:  34%|██████████████████████▋                                            | 127/375 [2:55:12<6:01:35, 87.48s/it]

Epoch 1 loss: 0.3088981509208679


Epoch 1:  34%|██████████████████████▊                                            | 128/375 [2:56:43<6:04:27, 88.53s/it]

Epoch 1 loss: 0.12171515822410583


Epoch 1:  34%|███████████████████████                                            | 129/375 [2:58:21<6:14:17, 91.29s/it]

Epoch 1 loss: 0.25816452503204346


Epoch 1:  35%|███████████████████████▏                                           | 130/375 [2:59:55<6:16:34, 92.22s/it]

Epoch 1 loss: 0.1703125685453415


Epoch 1:  35%|███████████████████████▍                                           | 131/375 [3:01:27<6:14:55, 92.20s/it]

Epoch 1 loss: 0.20973879098892212


Epoch 1:  35%|███████████████████████▌                                           | 132/375 [3:02:56<6:09:46, 91.30s/it]

Epoch 1 loss: 0.26298877596855164


Epoch 1:  35%|███████████████████████▊                                           | 133/375 [3:04:22<6:01:45, 89.69s/it]

Epoch 1 loss: 0.20443065464496613


Epoch 1:  36%|███████████████████████▉                                           | 134/375 [3:05:45<5:52:06, 87.66s/it]

Epoch 1 loss: 0.22842393815517426


Epoch 1:  36%|████████████████████████                                           | 135/375 [3:07:15<5:53:13, 88.31s/it]

Epoch 1 loss: 0.18606224656105042


Epoch 1:  36%|████████████████████████▎                                          | 136/375 [3:08:41<5:49:19, 87.70s/it]

Epoch 1 loss: 0.1392945498228073


Epoch 1:  37%|████████████████████████▍                                          | 137/375 [3:10:05<5:43:14, 86.53s/it]

Epoch 1 loss: 0.23894861340522766


Epoch 1:  37%|████████████████████████▋                                          | 138/375 [3:11:28<5:36:53, 85.29s/it]

Epoch 1 loss: 0.24823711812496185


Epoch 1:  37%|████████████████████████▊                                          | 139/375 [3:12:48<5:30:18, 83.98s/it]

Epoch 1 loss: 0.16035450994968414


Epoch 1:  37%|█████████████████████████                                          | 140/375 [3:14:18<5:34:53, 85.51s/it]

Epoch 1 loss: 0.19555947184562683


Epoch 1:  38%|█████████████████████████▏                                         | 141/375 [3:15:50<5:41:11, 87.48s/it]

Epoch 1 loss: 0.20416314899921417


Epoch 1:  38%|█████████████████████████▎                                         | 142/375 [3:17:14<5:35:53, 86.50s/it]

Epoch 1 loss: 0.1635238081216812


Epoch 1:  38%|█████████████████████████▌                                         | 143/375 [3:18:40<5:34:04, 86.40s/it]

Epoch 1 loss: 0.2619374990463257


Epoch 1:  38%|█████████████████████████▋                                         | 144/375 [3:20:04<5:30:05, 85.74s/it]

Epoch 1 loss: 0.162563294172287


Epoch 1:  39%|█████████████████████████▉                                         | 145/375 [3:21:35<5:34:26, 87.25s/it]

Epoch 1 loss: 0.29556113481521606


Epoch 1:  39%|██████████████████████████                                         | 146/375 [3:23:02<5:33:16, 87.32s/it]

Epoch 1 loss: 0.22652536630630493


Epoch 1:  39%|██████████████████████████▎                                        | 147/375 [3:24:29<5:30:53, 87.08s/it]

Epoch 1 loss: 0.2148832529783249


Epoch 1:  39%|██████████████████████████▍                                        | 148/375 [3:25:48<5:19:42, 84.50s/it]

Epoch 1 loss: 0.172332301735878


Epoch 1:  40%|██████████████████████████▌                                        | 149/375 [3:27:08<5:13:16, 83.17s/it]

Epoch 1 loss: 0.2569970190525055


Epoch 1:  40%|██████████████████████████▊                                        | 150/375 [3:28:41<5:23:24, 86.24s/it]

Epoch 1 loss: 0.24957865476608276


Epoch 1:  40%|██████████████████████████▉                                        | 151/375 [3:30:12<5:27:43, 87.78s/it]

Epoch 1 loss: 0.16154827177524567


Epoch 1:  41%|███████████████████████████▏                                       | 152/375 [3:31:33<5:18:34, 85.72s/it]

Epoch 1 loss: 0.21968279778957367


Epoch 1:  41%|███████████████████████████▎                                       | 153/375 [3:33:00<5:18:48, 86.16s/it]

Epoch 1 loss: 0.14931254088878632


Epoch 1:  41%|███████████████████████████▌                                       | 154/375 [3:34:31<5:21:47, 87.36s/it]

Epoch 1 loss: 0.15804432332515717


Epoch 1:  41%|███████████████████████████▋                                       | 155/375 [3:35:54<5:15:34, 86.07s/it]

Epoch 1 loss: 0.18138177692890167


Epoch 1:  42%|███████████████████████████▊                                       | 156/375 [3:37:17<5:11:03, 85.22s/it]

Epoch 1 loss: 0.2345769852399826


Epoch 1:  42%|████████████████████████████                                       | 157/375 [3:38:45<5:12:15, 85.94s/it]

Epoch 1 loss: 0.2844213843345642


Epoch 1:  42%|████████████████████████████▏                                      | 158/375 [3:40:16<5:16:30, 87.51s/it]

Epoch 1 loss: 0.23776201903820038


Epoch 1:  42%|████████████████████████████▍                                      | 159/375 [3:41:44<5:16:16, 87.85s/it]

Epoch 1 loss: 0.21083514392375946


Epoch 1:  43%|████████████████████████████▌                                      | 160/375 [3:43:17<5:20:22, 89.41s/it]

Epoch 1 loss: 0.1856042444705963


Epoch 1:  43%|████████████████████████████▊                                      | 161/375 [3:44:42<5:13:53, 88.01s/it]

Epoch 1 loss: 0.1791369616985321


Epoch 1:  43%|████████████████████████████▉                                      | 162/375 [3:46:14<5:16:38, 89.20s/it]

Epoch 1 loss: 0.12381520867347717


Epoch 1:  43%|█████████████████████████████                                      | 163/375 [3:47:46<5:17:57, 89.99s/it]

Epoch 1 loss: 0.1120513305068016


Epoch 1:  44%|█████████████████████████████▎                                     | 164/375 [3:49:12<5:12:32, 88.87s/it]

Epoch 1 loss: 0.17149685323238373


Epoch 1:  44%|█████████████████████████████▍                                     | 165/375 [3:50:36<5:06:02, 87.44s/it]

Epoch 1 loss: 0.322625994682312


Epoch 1:  44%|█████████████████████████████▋                                     | 166/375 [3:51:58<4:59:02, 85.85s/it]

Epoch 1 loss: 0.16310153901576996


Epoch 1:  45%|█████████████████████████████▊                                     | 167/375 [3:53:26<4:59:17, 86.33s/it]

Epoch 1 loss: 0.15114514529705048


Epoch 1:  45%|██████████████████████████████                                     | 168/375 [3:54:42<4:47:15, 83.26s/it]

Epoch 1 loss: 0.2874123454093933


Epoch 1:  45%|██████████████████████████████▏                                    | 169/375 [3:55:58<4:38:03, 80.99s/it]

Epoch 1 loss: 0.21550121903419495


Epoch 1:  45%|██████████████████████████████▎                                    | 170/375 [3:57:21<4:39:14, 81.73s/it]

Epoch 1 loss: 0.17324095964431763


Epoch 1:  46%|██████████████████████████████▌                                    | 171/375 [3:58:52<4:47:16, 84.49s/it]

Epoch 1 loss: 0.3814414441585541


Epoch 1:  46%|██████████████████████████████▋                                    | 172/375 [4:00:23<4:52:40, 86.51s/it]

Epoch 1 loss: 0.3063541054725647


Epoch 1:  46%|██████████████████████████████▉                                    | 173/375 [4:01:47<4:48:11, 85.60s/it]

Epoch 1 loss: 0.2167772799730301


Epoch 1:  46%|███████████████████████████████                                    | 174/375 [4:03:14<4:47:54, 85.94s/it]

Epoch 1 loss: 0.2107444405555725


Epoch 1:  47%|███████████████████████████████▎                                   | 175/375 [4:04:46<4:53:10, 87.95s/it]

Epoch 1 loss: 0.2784874439239502


Epoch 1:  47%|███████████████████████████████▍                                   | 176/375 [4:06:15<4:52:07, 88.08s/it]

Epoch 1 loss: 0.19224901497364044


Epoch 1:  47%|███████████████████████████████▌                                   | 177/375 [4:07:38<4:46:09, 86.72s/it]

Epoch 1 loss: 0.228883758187294


Epoch 1:  47%|███████████████████████████████▊                                   | 178/375 [4:09:01<4:41:14, 85.66s/it]

Epoch 1 loss: 0.2861705720424652


Epoch 1:  48%|███████████████████████████████▉                                   | 179/375 [4:10:43<4:55:18, 90.40s/it]

Epoch 1 loss: 0.1902817338705063


Epoch 1:  48%|████████████████████████████████▏                                  | 180/375 [4:12:07<4:47:52, 88.57s/it]

Epoch 1 loss: 0.15113241970539093


Epoch 1:  48%|████████████████████████████████▎                                  | 181/375 [4:13:35<4:45:30, 88.30s/it]

Epoch 1 loss: 0.17760908603668213


Epoch 1:  49%|████████████████████████████████▌                                  | 182/375 [4:15:02<4:43:14, 88.05s/it]

Epoch 1 loss: 0.16641676425933838


Epoch 1:  49%|████████████████████████████████▋                                  | 183/375 [4:16:30<4:41:51, 88.08s/it]

Epoch 1 loss: 0.22427736222743988


Epoch 1:  49%|████████████████████████████████▊                                  | 184/375 [4:17:56<4:38:17, 87.42s/it]

Epoch 1 loss: 0.12238558381795883


Epoch 1:  49%|█████████████████████████████████                                  | 185/375 [4:19:21<4:34:35, 86.72s/it]

Epoch 1 loss: 0.14224080741405487


Epoch 1:  50%|█████████████████████████████████▏                                 | 186/375 [4:20:41<4:26:36, 84.64s/it]

Epoch 1 loss: 0.17650610208511353


Epoch 1:  50%|█████████████████████████████████▍                                 | 187/375 [4:22:03<4:22:18, 83.72s/it]

Epoch 1 loss: 0.16699637472629547


Epoch 1:  50%|█████████████████████████████████▌                                 | 188/375 [4:23:27<4:21:19, 83.85s/it]

Epoch 1 loss: 0.23202350735664368


Epoch 1:  50%|█████████████████████████████████▊                                 | 189/375 [4:24:53<4:21:55, 84.49s/it]

Epoch 1 loss: 0.1668895035982132


Epoch 1:  51%|█████████████████████████████████▉                                 | 190/375 [4:26:10<4:14:10, 82.43s/it]

Epoch 1 loss: 0.1671135127544403


Epoch 1:  51%|██████████████████████████████████▏                                | 191/375 [4:27:34<4:13:52, 82.79s/it]

Epoch 1 loss: 0.2836146354675293


Epoch 1:  51%|██████████████████████████████████▎                                | 192/375 [4:28:58<4:13:35, 83.14s/it]

Epoch 1 loss: 0.2248343676328659


Epoch 1:  51%|██████████████████████████████████▍                                | 193/375 [4:30:23<4:13:47, 83.67s/it]

Epoch 1 loss: 0.25069889426231384


Epoch 1:  52%|██████████████████████████████████▋                                | 194/375 [4:31:52<4:17:41, 85.42s/it]

Epoch 1 loss: 0.3066573441028595


Epoch 1:  52%|██████████████████████████████████▊                                | 195/375 [4:33:13<4:11:41, 83.90s/it]

Epoch 1 loss: 0.3075845241546631


Epoch 1:  52%|███████████████████████████████████                                | 196/375 [4:34:36<4:09:49, 83.74s/it]

Epoch 1 loss: 0.36666977405548096


Epoch 1:  53%|███████████████████████████████████▏                               | 197/375 [4:36:01<4:09:26, 84.08s/it]

Epoch 1 loss: 0.15400663018226624


Epoch 1:  53%|███████████████████████████████████▍                               | 198/375 [4:37:29<4:11:41, 85.32s/it]

Epoch 1 loss: 0.22192028164863586


Epoch 1:  53%|███████████████████████████████████▌                               | 199/375 [4:38:43<3:59:48, 81.75s/it]

Epoch 1 loss: 0.16961364448070526


Epoch 1:  53%|███████████████████████████████████▋                               | 200/375 [4:40:01<3:55:33, 80.76s/it]

Epoch 1 loss: 0.17170670628547668


Epoch 1:  54%|███████████████████████████████████▉                               | 201/375 [4:41:26<3:57:49, 82.01s/it]

Epoch 1 loss: 0.14964115619659424


Epoch 1:  54%|████████████████████████████████████                               | 202/375 [4:42:46<3:54:43, 81.41s/it]

Epoch 1 loss: 0.3443317413330078


Epoch 1:  54%|████████████████████████████████████▎                              | 203/375 [4:44:15<3:59:30, 83.55s/it]

Epoch 1 loss: 0.14799417555332184


Epoch 1:  54%|████████████████████████████████████▍                              | 204/375 [4:45:37<3:56:55, 83.13s/it]

Epoch 1 loss: 0.22937937080860138


Epoch 1:  55%|████████████████████████████████████▋                              | 205/375 [4:47:02<3:57:35, 83.86s/it]

Epoch 1 loss: 0.1341635137796402


Epoch 1:  55%|████████████████████████████████████▊                              | 206/375 [4:48:30<3:59:13, 84.93s/it]

Epoch 1 loss: 0.15861614048480988


Epoch 1:  55%|████████████████████████████████████▉                              | 207/375 [4:49:49<3:53:03, 83.23s/it]

Epoch 1 loss: 0.239530548453331


Epoch 1:  55%|█████████████████████████████████████▏                             | 208/375 [4:51:10<3:50:03, 82.66s/it]

Epoch 1 loss: 0.20738418400287628


Epoch 1:  56%|█████████████████████████████████████▎                             | 209/375 [4:52:29<3:45:04, 81.35s/it]

Epoch 1 loss: 0.15228736400604248


Epoch 1:  56%|█████████████████████████████████████▌                             | 210/375 [4:53:52<3:45:12, 81.89s/it]

Epoch 1 loss: 0.2546200156211853


Epoch 1:  56%|█████████████████████████████████████▋                             | 211/375 [4:55:14<3:43:58, 81.94s/it]

Epoch 1 loss: 0.24688895046710968


Epoch 1:  57%|█████████████████████████████████████▉                             | 212/375 [4:56:33<3:40:10, 81.05s/it]

Epoch 1 loss: 0.33852308988571167


Epoch 1:  57%|██████████████████████████████████████                             | 213/375 [4:57:58<3:42:24, 82.38s/it]

Epoch 1 loss: 0.2637861371040344


Epoch 1:  57%|██████████████████████████████████████▏                            | 214/375 [4:59:11<3:33:16, 79.48s/it]

Epoch 1 loss: 0.2068086713552475


Epoch 1:  57%|██████████████████████████████████████▍                            | 215/375 [5:00:40<3:39:42, 82.39s/it]

Epoch 1 loss: 0.2959887981414795


Epoch 1:  58%|██████████████████████████████████████▌                            | 216/375 [5:02:08<3:42:30, 83.96s/it]

Epoch 1 loss: 0.22637222707271576


Epoch 1:  58%|██████████████████████████████████████▊                            | 217/375 [5:03:32<3:41:36, 84.15s/it]

Epoch 1 loss: 0.21410337090492249


Epoch 1:  58%|██████████████████████████████████████▉                            | 218/375 [5:04:56<3:39:50, 84.02s/it]

Epoch 1 loss: 0.20179583132266998


Epoch 1:  58%|███████████████████████████████████████▏                           | 219/375 [5:06:21<3:39:02, 84.25s/it]

Epoch 1 loss: 0.14319969713687897


Epoch 1:  59%|███████████████████████████████████████▎                           | 220/375 [5:07:46<3:38:03, 84.41s/it]

Epoch 1 loss: 0.214927539229393


Epoch 1:  59%|███████████████████████████████████████▍                           | 221/375 [5:09:12<3:37:49, 84.87s/it]

Epoch 1 loss: 0.2627562880516052


Epoch 1:  59%|███████████████████████████████████████▋                           | 222/375 [5:10:34<3:34:21, 84.06s/it]

Epoch 1 loss: 0.13538576662540436


Epoch 1:  59%|███████████████████████████████████████▊                           | 223/375 [5:12:01<3:35:30, 85.07s/it]

Epoch 1 loss: 0.23616181313991547


Epoch 1:  60%|████████████████████████████████████████                           | 224/375 [5:13:26<3:33:34, 84.86s/it]

Epoch 1 loss: 0.14230653643608093


Epoch 1:  60%|████████████████████████████████████████▏                          | 225/375 [5:14:51<3:32:41, 85.08s/it]

Epoch 1 loss: 0.18072766065597534


Epoch 1:  60%|████████████████████████████████████████▍                          | 226/375 [5:16:11<3:27:36, 83.60s/it]

Epoch 1 loss: 0.2507120668888092


Epoch 1:  61%|████████████████████████████████████████▌                          | 227/375 [5:17:37<3:27:39, 84.18s/it]

Epoch 1 loss: 0.2224961519241333


Epoch 1:  61%|████████████████████████████████████████▋                          | 228/375 [5:18:55<3:22:01, 82.46s/it]

Epoch 1 loss: 0.22416743636131287


Epoch 1:  61%|████████████████████████████████████████▉                          | 229/375 [5:20:13<3:17:00, 80.96s/it]

Epoch 1 loss: 0.18305310606956482


Epoch 1:  61%|█████████████████████████████████████████                          | 230/375 [5:21:41<3:21:05, 83.21s/it]

Epoch 1 loss: 0.1659262776374817


Epoch 1:  62%|█████████████████████████████████████████▎                         | 231/375 [5:23:07<3:21:28, 83.95s/it]

Epoch 1 loss: 0.2112371325492859


Epoch 1:  62%|█████████████████████████████████████████▍                         | 232/375 [5:24:39<3:25:42, 86.31s/it]

Epoch 1 loss: 0.21516674757003784


Epoch 1:  62%|█████████████████████████████████████████▋                         | 233/375 [5:26:00<3:20:41, 84.80s/it]

Epoch 1 loss: 0.1740528643131256


Epoch 1:  62%|█████████████████████████████████████████▊                         | 234/375 [5:27:14<3:11:27, 81.47s/it]

Epoch 1 loss: 0.29637637734413147


Epoch 1:  63%|█████████████████████████████████████████▉                         | 235/375 [5:28:41<3:14:22, 83.30s/it]

Epoch 1 loss: 0.19817695021629333


Epoch 1:  63%|██████████████████████████████████████████▏                        | 236/375 [5:30:10<3:16:56, 85.01s/it]

Epoch 1 loss: 0.13619351387023926


Epoch 1:  63%|██████████████████████████████████████████▎                        | 237/375 [5:31:36<3:16:20, 85.37s/it]

Epoch 1 loss: 0.2639598250389099


Epoch 1:  63%|██████████████████████████████████████████▌                        | 238/375 [5:33:03<3:15:57, 85.82s/it]

Epoch 1 loss: 0.15390893816947937


Epoch 1:  64%|██████████████████████████████████████████▋                        | 239/375 [5:34:31<3:15:54, 86.43s/it]

Epoch 1 loss: 0.21979425847530365


Epoch 1:  64%|██████████████████████████████████████████▉                        | 240/375 [5:36:03<3:18:11, 88.09s/it]

Epoch 1 loss: 0.17325109243392944


Epoch 1:  64%|███████████████████████████████████████████                        | 241/375 [5:37:31<3:16:21, 87.92s/it]

Epoch 1 loss: 0.21442455053329468


Epoch 1:  65%|███████████████████████████████████████████▏                       | 242/375 [5:38:54<3:11:32, 86.41s/it]

Epoch 1 loss: 0.2498294860124588


Epoch 1:  65%|███████████████████████████████████████████▍                       | 243/375 [5:40:02<2:58:23, 81.09s/it]

Epoch 1 loss: 0.16923633217811584


Epoch 1:  65%|███████████████████████████████████████████▌                       | 244/375 [5:41:35<3:04:30, 84.51s/it]

Epoch 1 loss: 0.2240554839372635


Epoch 1:  65%|███████████████████████████████████████████▊                       | 245/375 [5:42:57<3:01:55, 83.97s/it]

Epoch 1 loss: 0.2535625398159027


Epoch 1:  66%|███████████████████████████████████████████▉                       | 246/375 [5:44:20<2:59:40, 83.57s/it]

Epoch 1 loss: 0.23270739614963531


Epoch 1:  66%|████████████████████████████████████████████▏                      | 247/375 [5:45:50<3:02:38, 85.61s/it]

Epoch 1 loss: 0.13425379991531372


Epoch 1:  66%|████████████████████████████████████████████▎                      | 248/375 [5:47:17<3:01:42, 85.85s/it]

Epoch 1 loss: 0.29501038789749146


Epoch 1:  66%|████████████████████████████████████████████▍                      | 249/375 [5:48:42<2:59:35, 85.52s/it]

Epoch 1 loss: 0.30098757147789


Epoch 1:  67%|████████████████████████████████████████████▋                      | 250/375 [5:50:05<2:57:02, 84.98s/it]

Epoch 1 loss: 0.16408750414848328


Epoch 1:  67%|████████████████████████████████████████████▊                      | 251/375 [5:51:30<2:55:26, 84.89s/it]

Epoch 1 loss: 0.1463964283466339


Epoch 1:  67%|█████████████████████████████████████████████                      | 252/375 [5:52:54<2:53:21, 84.57s/it]

Epoch 1 loss: 0.15260371565818787


Epoch 1:  67%|█████████████████████████████████████████████▏                     | 253/375 [5:54:06<2:44:21, 80.83s/it]

Epoch 1 loss: 0.1098237931728363


Epoch 1:  68%|█████████████████████████████████████████████▍                     | 254/375 [5:55:26<2:42:21, 80.51s/it]

Epoch 1 loss: 0.17838981747627258


Epoch 1:  68%|█████████████████████████████████████████████▌                     | 255/375 [5:56:53<2:45:18, 82.66s/it]

Epoch 1 loss: 0.2517547309398651


Epoch 1:  68%|█████████████████████████████████████████████▋                     | 256/375 [5:58:16<2:43:57, 82.67s/it]

Epoch 1 loss: 0.24226172268390656


Epoch 1:  69%|█████████████████████████████████████████████▉                     | 257/375 [5:59:42<2:44:32, 83.66s/it]

Epoch 1 loss: 0.17972104251384735


Epoch 1:  69%|██████████████████████████████████████████████                     | 258/375 [6:01:04<2:41:56, 83.05s/it]

Epoch 1 loss: 0.30158719420433044


Epoch 1:  69%|██████████████████████████████████████████████▎                    | 259/375 [6:02:27<2:40:49, 83.19s/it]

Epoch 1 loss: 0.16008977591991425


Epoch 1:  69%|██████████████████████████████████████████████▍                    | 260/375 [6:03:47<2:37:25, 82.14s/it]

Epoch 1 loss: 0.21923433244228363


Epoch 1:  70%|██████████████████████████████████████████████▋                    | 261/375 [6:05:09<2:36:15, 82.24s/it]

Epoch 1 loss: 0.2396424263715744


Epoch 1:  70%|██████████████████████████████████████████████▊                    | 262/375 [6:06:28<2:32:45, 81.11s/it]

Epoch 1 loss: 0.29900649189949036


Epoch 1:  70%|██████████████████████████████████████████████▉                    | 263/375 [6:07:47<2:30:19, 80.53s/it]

Epoch 1 loss: 0.22221286594867706


Epoch 1:  70%|███████████████████████████████████████████████▏                   | 264/375 [6:09:05<2:27:43, 79.86s/it]

Epoch 1 loss: 0.27750086784362793


Epoch 1:  71%|███████████████████████████████████████████████▎                   | 265/375 [6:10:30<2:29:14, 81.41s/it]

Epoch 1 loss: 0.24786345660686493


Epoch 1:  71%|███████████████████████████████████████████████▌                   | 266/375 [6:11:53<2:28:40, 81.84s/it]

Epoch 1 loss: 0.2240004539489746


Epoch 1:  71%|███████████████████████████████████████████████▋                   | 267/375 [6:13:16<2:27:42, 82.06s/it]

Epoch 1 loss: 0.23295696079730988


Epoch 1:  71%|███████████████████████████████████████████████▉                   | 268/375 [6:14:25<2:19:22, 78.15s/it]

Epoch 1 loss: 0.2794714570045471


Epoch 1:  72%|████████████████████████████████████████████████                   | 269/375 [6:15:50<2:22:05, 80.43s/it]

Epoch 1 loss: 0.14684267342090607


Epoch 1:  72%|████████████████████████████████████████████████▏                  | 270/375 [6:17:21<2:25:54, 83.38s/it]

Epoch 1 loss: 0.2062603384256363


Epoch 1:  72%|████████████████████████████████████████████████▍                  | 271/375 [6:18:46<2:25:35, 84.00s/it]

Epoch 1 loss: 0.2017577439546585


Epoch 1:  73%|████████████████████████████████████████████████▌                  | 272/375 [6:20:14<2:26:01, 85.06s/it]

Epoch 1 loss: 0.25546571612358093


Epoch 1:  73%|████████████████████████████████████████████████▊                  | 273/375 [6:21:42<2:26:17, 86.06s/it]

Epoch 1 loss: 0.19358450174331665


Epoch 1:  73%|████████████████████████████████████████████████▉                  | 274/375 [6:23:12<2:26:34, 87.08s/it]

Epoch 1 loss: 0.2085784673690796


Epoch 1:  73%|█████████████████████████████████████████████████▏                 | 275/375 [6:24:39<2:25:30, 87.31s/it]

Epoch 1 loss: 0.19920887053012848


Epoch 1:  74%|█████████████████████████████████████████████████▎                 | 276/375 [6:26:09<2:25:08, 87.97s/it]

Epoch 1 loss: 0.16176040470600128


Epoch 1:  74%|█████████████████████████████████████████████████▍                 | 277/375 [6:27:37<2:23:37, 87.93s/it]

Epoch 1 loss: 0.19310392439365387


Epoch 1:  74%|█████████████████████████████████████████████████▋                 | 278/375 [6:29:00<2:19:44, 86.44s/it]

Epoch 1 loss: 0.16080915927886963


Epoch 1:  74%|█████████████████████████████████████████████████▊                 | 279/375 [6:30:25<2:17:50, 86.15s/it]

Epoch 1 loss: 0.14613309502601624


Epoch 1:  75%|██████████████████████████████████████████████████                 | 280/375 [6:31:41<2:11:26, 83.01s/it]

Epoch 1 loss: 0.1888626515865326


Epoch 1:  75%|██████████████████████████████████████████████████▏                | 281/375 [6:33:03<2:09:42, 82.80s/it]

Epoch 1 loss: 0.23802120983600616


Epoch 1:  75%|██████████████████████████████████████████████████▍                | 282/375 [6:34:24<2:07:17, 82.12s/it]

Epoch 1 loss: 0.12112677097320557


Epoch 1:  75%|██████████████████████████████████████████████████▌                | 283/375 [6:35:47<2:06:38, 82.59s/it]

Epoch 1 loss: 0.2764183282852173


Epoch 1:  76%|██████████████████████████████████████████████████▋                | 284/375 [6:36:59<2:00:18, 79.33s/it]

Epoch 1 loss: 0.23006412386894226


Epoch 1:  76%|██████████████████████████████████████████████████▉                | 285/375 [6:38:20<1:59:43, 79.82s/it]

Epoch 1 loss: 0.20369146764278412


Epoch 1:  76%|███████████████████████████████████████████████████                | 286/375 [6:39:42<1:59:28, 80.54s/it]

Epoch 1 loss: 0.2334321141242981


Epoch 1:  77%|███████████████████████████████████████████████████▎               | 287/375 [6:41:12<2:02:01, 83.20s/it]

Epoch 1 loss: 0.25915777683258057


Epoch 1:  77%|███████████████████████████████████████████████████▍               | 288/375 [6:42:37<2:01:34, 83.84s/it]

Epoch 1 loss: 0.26057279109954834


Epoch 1:  77%|███████████████████████████████████████████████████▋               | 289/375 [6:44:03<2:01:09, 84.52s/it]

Epoch 1 loss: 0.15610750019550323


Epoch 1:  77%|███████████████████████████████████████████████████▊               | 290/375 [6:45:27<1:59:38, 84.46s/it]

Epoch 1 loss: 0.2325333058834076


Epoch 1:  78%|███████████████████████████████████████████████████▉               | 291/375 [6:46:46<1:55:42, 82.65s/it]

Epoch 1 loss: 0.2107025384902954


Epoch 1:  78%|████████████████████████████████████████████████████▏              | 292/375 [6:48:12<1:55:45, 83.68s/it]

Epoch 1 loss: 0.3196057081222534


Epoch 1:  78%|████████████████████████████████████████████████████▎              | 293/375 [6:49:36<1:54:19, 83.65s/it]

Epoch 1 loss: 0.13813111186027527


Epoch 1:  78%|████████████████████████████████████████████████████▌              | 294/375 [6:51:01<1:53:50, 84.33s/it]

Epoch 1 loss: 0.1812504082918167


Epoch 1:  79%|████████████████████████████████████████████████████▋              | 295/375 [6:52:26<1:52:31, 84.39s/it]

Epoch 1 loss: 0.15189678966999054


Epoch 1:  79%|████████████████████████████████████████████████████▉              | 296/375 [6:53:53<1:52:07, 85.16s/it]

Epoch 1 loss: 0.33783388137817383


Epoch 1:  79%|█████████████████████████████████████████████████████              | 297/375 [6:55:16<1:49:49, 84.49s/it]

Epoch 1 loss: 0.21290303766727448


Epoch 1:  79%|█████████████████████████████████████████████████████▏             | 298/375 [6:56:37<1:47:07, 83.48s/it]

Epoch 1 loss: 0.15784232318401337


Epoch 1:  80%|█████████████████████████████████████████████████████▍             | 299/375 [6:57:59<1:45:03, 82.94s/it]

Epoch 1 loss: 0.1682044416666031


Epoch 1:  80%|█████████████████████████████████████████████████████▌             | 300/375 [6:59:23<1:44:01, 83.22s/it]

Epoch 1 loss: 0.19950833916664124


Epoch 1:  80%|█████████████████████████████████████████████████████▊             | 301/375 [7:00:47<1:43:11, 83.68s/it]

Epoch 1 loss: 0.15709470212459564


Epoch 1:  81%|█████████████████████████████████████████████████████▉             | 302/375 [7:02:13<1:42:40, 84.39s/it]

Epoch 1 loss: 0.2866164445877075


Epoch 1:  81%|██████████████████████████████████████████████████████▏            | 303/375 [7:03:39<1:41:43, 84.76s/it]

Epoch 1 loss: 0.3587137758731842


Epoch 1:  81%|██████████████████████████████████████████████████████▎            | 304/375 [7:05:02<1:39:41, 84.24s/it]

Epoch 1 loss: 0.2881813645362854


Epoch 1:  81%|██████████████████████████████████████████████████████▍            | 305/375 [7:06:25<1:37:55, 83.94s/it]

Epoch 1 loss: 0.15316718816757202


Epoch 1:  82%|██████████████████████████████████████████████████████▋            | 306/375 [7:07:53<1:37:47, 85.03s/it]

Epoch 1 loss: 0.24993781745433807


Epoch 1:  82%|██████████████████████████████████████████████████████▊            | 307/375 [7:09:16<1:35:46, 84.51s/it]

Epoch 1 loss: 0.16988009214401245


Epoch 1:  82%|███████████████████████████████████████████████████████            | 308/375 [7:10:42<1:34:46, 84.88s/it]

Epoch 1 loss: 0.24797102808952332


Epoch 1:  82%|███████████████████████████████████████████████████████▏           | 309/375 [7:12:01<1:31:31, 83.21s/it]

Epoch 1 loss: 0.16692684590816498


Epoch 1:  83%|███████████████████████████████████████████████████████▍           | 310/375 [7:13:28<1:31:14, 84.22s/it]

Epoch 1 loss: 0.14050328731536865


Epoch 1:  83%|███████████████████████████████████████████████████████▌           | 311/375 [7:14:42<1:26:42, 81.28s/it]

Epoch 1 loss: 0.3960077464580536


Epoch 1:  83%|███████████████████████████████████████████████████████▋           | 312/375 [7:16:05<1:25:59, 81.89s/it]

Epoch 1 loss: 0.13841603696346283


Epoch 1:  83%|███████████████████████████████████████████████████████▉           | 313/375 [7:17:29<1:25:13, 82.48s/it]

Epoch 1 loss: 0.1612643301486969


Epoch 1:  84%|████████████████████████████████████████████████████████           | 314/375 [7:18:53<1:24:05, 82.72s/it]

Epoch 1 loss: 0.1742611676454544


Epoch 1:  84%|████████████████████████████████████████████████████████▎          | 315/375 [7:20:15<1:22:43, 82.72s/it]

Epoch 1 loss: 0.13681170344352722


Epoch 1:  84%|████████████████████████████████████████████████████████▍          | 316/375 [7:21:40<1:22:03, 83.45s/it]

Epoch 1 loss: 0.215826615691185


Epoch 1:  85%|████████████████████████████████████████████████████████▋          | 317/375 [7:23:10<1:22:20, 85.18s/it]

Epoch 1 loss: 0.1885184943675995


Epoch 1:  85%|████████████████████████████████████████████████████████▊          | 318/375 [7:24:35<1:20:58, 85.25s/it]

Epoch 1 loss: 0.148528054356575


Epoch 1:  85%|████████████████████████████████████████████████████████▉          | 319/375 [7:26:03<1:20:25, 86.17s/it]

Epoch 1 loss: 0.19256669282913208


Epoch 1:  85%|█████████████████████████████████████████████████████████▏         | 320/375 [7:27:24<1:17:34, 84.62s/it]

Epoch 1 loss: 0.23405671119689941


Epoch 1:  86%|█████████████████████████████████████████████████████████▎         | 321/375 [7:28:52<1:16:59, 85.55s/it]

Epoch 1 loss: 0.2548004388809204


Epoch 1:  86%|█████████████████████████████████████████████████████████▌         | 322/375 [7:30:25<1:17:35, 87.85s/it]

Epoch 1 loss: 0.1797039806842804


Epoch 1:  86%|█████████████████████████████████████████████████████████▋         | 323/375 [7:31:56<1:16:49, 88.64s/it]

Epoch 1 loss: 0.22058430314064026


Epoch 1:  86%|█████████████████████████████████████████████████████████▉         | 324/375 [7:33:22<1:14:49, 88.03s/it]

Epoch 1 loss: 0.20653554797172546


Epoch 1:  87%|██████████████████████████████████████████████████████████         | 325/375 [7:34:50<1:13:10, 87.81s/it]

Epoch 1 loss: 0.16228458285331726


Epoch 1:  87%|██████████████████████████████████████████████████████████▏        | 326/375 [7:36:14<1:10:52, 86.79s/it]

Epoch 1 loss: 0.24364589154720306


Epoch 1:  87%|██████████████████████████████████████████████████████████▍        | 327/375 [7:37:30<1:06:44, 83.42s/it]

Epoch 1 loss: 0.1818993240594864


Epoch 1:  87%|██████████████████████████████████████████████████████████▌        | 328/375 [7:38:55<1:05:42, 83.88s/it]

Epoch 1 loss: 0.16852988302707672


Epoch 1:  88%|██████████████████████████████████████████████████████████▊        | 329/375 [7:40:18<1:04:14, 83.80s/it]

Epoch 1 loss: 0.1838313788175583


Epoch 1:  88%|██████████████████████████████████████████████████████████▉        | 330/375 [7:41:47<1:03:51, 85.14s/it]

Epoch 1 loss: 0.3615715503692627


Epoch 1:  88%|███████████████████████████████████████████████████████████▏       | 331/375 [7:43:15<1:03:09, 86.13s/it]

Epoch 1 loss: 0.14640341699123383


Epoch 1:  89%|███████████████████████████████████████████████████████████▎       | 332/375 [7:44:38<1:01:05, 85.24s/it]

Epoch 1 loss: 0.211892232298851


Epoch 1:  89%|█████████████████████████████████████████████████████████████▎       | 333/375 [7:46:01<59:14, 84.64s/it]

Epoch 1 loss: 0.12196939438581467


Epoch 1:  89%|█████████████████████████████████████████████████████████████▍       | 334/375 [7:47:28<58:08, 85.10s/it]

Epoch 1 loss: 0.17759093642234802


Epoch 1:  89%|█████████████████████████████████████████████████████████████▋       | 335/375 [7:48:54<56:59, 85.48s/it]

Epoch 1 loss: 0.2752543091773987


Epoch 1:  90%|█████████████████████████████████████████████████████████████▊       | 336/375 [7:50:25<56:33, 87.02s/it]

Epoch 1 loss: 0.2018040269613266


Epoch 1:  90%|██████████████████████████████████████████████████████████████       | 337/375 [7:51:51<54:56, 86.76s/it]

Epoch 1 loss: 0.24607856571674347


Epoch 1:  90%|██████████████████████████████████████████████████████████████▏      | 338/375 [7:53:22<54:17, 88.04s/it]

Epoch 1 loss: 0.33082565665245056


Epoch 1:  90%|██████████████████████████████████████████████████████████████▍      | 339/375 [7:54:50<52:55, 88.21s/it]

Epoch 1 loss: 0.21967139840126038


Epoch 1:  91%|██████████████████████████████████████████████████████████████▌      | 340/375 [7:56:17<51:13, 87.80s/it]

Epoch 1 loss: 0.2030419111251831


Epoch 1:  91%|██████████████████████████████████████████████████████████████▋      | 341/375 [7:57:44<49:35, 87.52s/it]

Epoch 1 loss: 0.15040382742881775


Epoch 1:  91%|██████████████████████████████████████████████████████████████▉      | 342/375 [7:59:09<47:36, 86.56s/it]

Epoch 1 loss: 0.14478228986263275


Epoch 1:  91%|███████████████████████████████████████████████████████████████      | 343/375 [8:00:27<44:51, 84.11s/it]

Epoch 1 loss: 0.18086808919906616


Epoch 1:  92%|███████████████████████████████████████████████████████████████▎     | 344/375 [8:01:51<43:31, 84.25s/it]

Epoch 1 loss: 0.16856366395950317


Epoch 1:  92%|███████████████████████████████████████████████████████████████▍     | 345/375 [8:03:01<39:55, 79.85s/it]

Epoch 1 loss: 0.20249037444591522


Epoch 1:  92%|███████████████████████████████████████████████████████████████▋     | 346/375 [8:04:05<36:15, 75.01s/it]

Epoch 1 loss: 0.19681106507778168


Epoch 1:  93%|███████████████████████████████████████████████████████████████▊     | 347/375 [8:05:22<35:24, 75.86s/it]

Epoch 1 loss: 0.16488498449325562


Epoch 1:  93%|████████████████████████████████████████████████████████████████     | 348/375 [8:06:25<32:19, 71.82s/it]

Epoch 1 loss: 0.3953675925731659


Epoch 1:  93%|████████████████████████████████████████████████████████████████▏    | 349/375 [8:07:26<29:44, 68.64s/it]

Epoch 1 loss: 0.21501611173152924


Epoch 1:  93%|████████████████████████████████████████████████████████████████▍    | 350/375 [8:08:22<27:03, 64.95s/it]

Epoch 1 loss: 0.2177569568157196


Epoch 1:  94%|████████████████████████████████████████████████████████████████▌    | 351/375 [8:09:46<28:13, 70.58s/it]

Epoch 1 loss: 0.11667182296514511


Epoch 1:  94%|████████████████████████████████████████████████████████████████▊    | 352/375 [8:10:56<26:59, 70.43s/it]

Epoch 1 loss: 0.17734020948410034


Epoch 1:  94%|████████████████████████████████████████████████████████████████▉    | 353/375 [8:12:10<26:12, 71.46s/it]

Epoch 1 loss: 0.17818684875965118


Epoch 1:  94%|█████████████████████████████████████████████████████████████████▏   | 354/375 [8:13:35<26:25, 75.50s/it]

Epoch 1 loss: 0.3078554570674896


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▎   | 355/375 [8:14:42<24:21, 73.07s/it]

Epoch 1 loss: 0.20533417165279388


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▌   | 356/375 [8:15:43<21:59, 69.43s/it]

Epoch 1 loss: 0.2964361608028412


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▋   | 357/375 [8:16:54<20:56, 69.82s/it]

Epoch 1 loss: 0.243452250957489


Epoch 1:  95%|█████████████████████████████████████████████████████████████████▊   | 358/375 [8:18:01<19:34, 69.06s/it]

Epoch 1 loss: 0.18571166694164276


Epoch 1:  96%|██████████████████████████████████████████████████████████████████   | 359/375 [8:18:57<17:20, 65.05s/it]

Epoch 1 loss: 0.21659359335899353


Epoch 1:  96%|██████████████████████████████████████████████████████████████████▏  | 360/375 [8:20:02<16:16, 65.11s/it]

Epoch 1 loss: 0.14962512254714966


Epoch 1:  96%|██████████████████████████████████████████████████████████████████▍  | 361/375 [8:21:12<15:32, 66.63s/it]

Epoch 1 loss: 0.3023586571216583


Epoch 1:  97%|██████████████████████████████████████████████████████████████████▌  | 362/375 [8:22:14<14:06, 65.14s/it]

Epoch 1 loss: 0.17688880860805511


Epoch 1:  97%|██████████████████████████████████████████████████████████████████▊  | 363/375 [8:23:26<13:24, 67.04s/it]

Epoch 1 loss: 0.2923194468021393


Epoch 1:  97%|██████████████████████████████████████████████████████████████████▉  | 364/375 [8:24:39<12:37, 68.82s/it]

Epoch 1 loss: 0.26032644510269165


Epoch 1:  97%|███████████████████████████████████████████████████████████████████▏ | 365/375 [8:25:47<11:27, 68.74s/it]

Epoch 1 loss: 0.17872020602226257


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▎ | 366/375 [8:26:54<10:12, 68.07s/it]

Epoch 1 loss: 0.28067436814308167


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▌ | 367/375 [8:28:01<09:02, 67.80s/it]

Epoch 1 loss: 0.28604966402053833


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▋ | 368/375 [8:29:06<07:50, 67.15s/it]

Epoch 1 loss: 0.13721726834774017


Epoch 1:  98%|███████████████████████████████████████████████████████████████████▉ | 369/375 [8:29:57<06:13, 62.32s/it]

Epoch 1 loss: 0.22209547460079193


Epoch 1:  99%|████████████████████████████████████████████████████████████████████ | 370/375 [8:30:56<05:05, 61.05s/it]

Epoch 1 loss: 0.19328072667121887


Epoch 1:  99%|████████████████████████████████████████████████████████████████████▎| 371/375 [8:31:59<04:07, 61.86s/it]

Epoch 1 loss: 0.18062865734100342


Epoch 1:  99%|████████████████████████████████████████████████████████████████████▍| 372/375 [8:33:05<03:09, 63.06s/it]

Epoch 1 loss: 0.21386529505252838


Epoch 1:  99%|████████████████████████████████████████████████████████████████████▋| 373/375 [8:34:09<02:06, 63.36s/it]

Epoch 1 loss: 0.11473502218723297


Epoch 1: 100%|████████████████████████████████████████████████████████████████████▊| 374/375 [8:35:13<01:03, 63.62s/it]

Epoch 1 loss: 0.19765999913215637


Epoch 1: 100%|█████████████████████████████████████████████████████████████████████| 375/375 [8:36:15<00:00, 82.60s/it]

Epoch 1 loss: 0.38321390748023987
Epoch 1 loss: 0.38321390748023987





In [20]:
# Save the fine-tuned model
model.save_pretrained('./movie_chatbot_model')
tokenizer.save_pretrained('./movie_chatbot_tokenizer')

('./movie_chatbot_tokenizer\\tokenizer_config.json',
 './movie_chatbot_tokenizer\\special_tokens_map.json',
 './movie_chatbot_tokenizer\\vocab.json',
 './movie_chatbot_tokenizer\\merges.txt',
 './movie_chatbot_tokenizer\\added_tokens.json')

In [None]:
# Multi-Turn Conversation with Context

In [None]:
#     handling multi-turn conversations by storing conversation history

In [21]:
def generate_response(model, tokenizer, conversation_history, max_length=512):
    input_text = " ".join(conversation_history) + tokenizer.eos_token
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    
    with torch.no_grad():
        output = model.generate(input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    
    response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response

In [None]:
# Web Interface using Gradio

In [None]:
#     using Gradio to build a simple web interface

In [22]:
import gradio as gr

# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained('./movie_chatbot_model')
tokenizer = GPT2Tokenizer.from_pretrained('./movie_chatbot_tokenizer')

conversation_history = []

def chatbot(user_input):
    global conversation_history
    conversation_history.append(user_input)
    response = generate_response(model, tokenizer, conversation_history)
    conversation_history.append(response)
    return response

# Build the Gradio interface
gr.Interface(fn=chatbot, inputs="text", outputs="text").launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [None]:
# Evaluation 

In [None]:
#    Evaluate the chatbot using BLEU for generated responses

In [None]:
import torch
import math
from tqdm import tqdm  # Progress bar library
from nltk.translate.bleu_score import sentence_bleu

def evaluate(model, small_dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        # Use tqdm to wrap the dataloader for a progress bar
        for batch in tqdm(small_dataloader, desc="Evaluating", leave=False):
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
            total_loss += outputs.loss.item()
    
    avg_loss = total_loss / len(small_dataloader)
    perplexity = math.exp(avg_loss)
    return perplexity

# Calculate BLEU score for evaluation
def bleu_score(reference, candidate):
    return sentence_bleu([reference.split()], candidate.split())

# Example usage for evaluation with progress bar
perplexity = evaluate(model, small_dataloader)
print(f'Perplexity: {perplexity}')

In [None]:
# Summary

In [None]:
#    Preprocessing: Loaded movie_lines.txt and movie_conversations.txt, tokenized dialogues.
#    Model: Fine-tuned GPT-2 for conversational modeling.
#    Context Management: Maintained conversation history for multi-turn interactions.
#    Web Interface: Used Gradio for a simple chatbot interface.
#    Evaluation: Added BLEU score as a metric.

#This implementation enables a transformer-based chatbot that handles multi-turn conversations, adapts to context, and uses the loaded data