# LSTM

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

# Set random seeds for reproducibility
np.random.seed(1)
tf.random.set_seed(2)

# Load the dataset
emotions_df = pd.read_csv('/kaggle/input/newemotions3/newemotions3.csv')
emotions_df=emotions_df.dropna(subset=['Sentiment'])

# Check the first few rows and the shape of the dataset
print(emotions_df.head())
print(emotions_df.shape)

# Prepare labels and features
y = emotions_df["Sentiment"]
X = emotions_df["Review"]  # Assuming the text column is named 'Review'

# Split data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1765, random_state=1)  # Adjusted to maintain 70/15/15 split

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)  # Fit on all data to prevent token mismatches due to data splitting
vocab_size = len(tokenizer.word_index) + 1  # +1 for zero padding

# Convert texts to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_val_seq = tokenizer.texts_to_sequences(X_val)

# Set max length and pad sequences
max_length = 20
X_train_seq_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post')
X_test_seq_pad = pad_sequences(X_test_seq, maxlen=max_length, padding='post')
X_val_seq_pad = pad_sequences(X_val_seq, maxlen=max_length, padding='post')

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
y_val_encoded = label_encoder.transform(y_val)

# One-hot encode labels
y_train_oh = to_categorical(y_train_encoded)
y_test_oh = to_categorical(y_test_encoded)
y_val_oh = to_categorical(y_val_encoded)

# Build and compile the model
model = Sequential([
    Embedding(vocab_size, 256, input_length=max_length, embeddings_regularizer=tf.keras.regularizers.l2(0.0001)),
    LSTM(units=16, dropout=0.2),
    Dense(4, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train_seq_pad, y_train_oh,
    epochs=128,
    batch_size=256,
    validation_data=(X_val_seq_pad, y_val_oh),
    shuffle=False
)


2024-04-24 00:55:50.018860: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-24 00:55:50.018967: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-24 00:55:50.187991: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


                                          Review Sentiment
0    मेरे ट्रैन का बुकिंग फेल हो रहा है बार बार      angry
1  मेरे फ्लाइट  का बुकिंग फेल हो रहा है बार बार      angry
2                 क्या बेकार की बातें कर रहे हो      angry
3                  क्या बताओ यार मूड ही ख़राब है      angry
4             आप ऐसे कैसे मेरा पैसा काट सकते हो      angry
(2005, 2)




Epoch 1/128
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 108ms/step - accuracy: 0.3158 - loss: 1.4226 - val_accuracy: 0.5083 - val_loss: 1.3947
Epoch 2/128
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.4449 - loss: 1.3902 - val_accuracy: 0.4751 - val_loss: 1.3569
Epoch 3/128
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.4634 - loss: 1.3567 - val_accuracy: 0.5116 - val_loss: 1.3168
Epoch 4/128
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.4995 - loss: 1.3200 - val_accuracy: 0.5781 - val_loss: 1.2684
Epoch 5/128
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.5636 - loss: 1.2658 - val_accuracy: 0.6312 - val_loss: 1.2019
Epoch 6/128
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6196 - loss: 1.1864 - val_accuracy: 0.6478 - val_loss: 1.1009
Epoch 7/128
[1m6/6[0m [32m━━━━━━━━━━

In [None]:
#loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
#print(f"Test Accuracy: {accuracy*100:.2f}%")

# CNN+LSTM

In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, SimpleRNN, LSTM, Flatten, Dense
# from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
# from tensorflow.keras.utils import to_categorical
# import tensorflow as tf

# # Load the dataset
# emotions_df = pd.read_csv('newemotions3.csv')

# # Assuming 'Sentiment' is the label column and 'Review' is the feature (text) column in newemotions3.csv
# label_encoder = LabelEncoder()
# emotions_df['Encoded_Label'] = label_encoder.fit_transform(emotions_df['Sentiment'])

# num_classes = emotions_df['Encoded_Label'].nunique()

# # Prepare the text data
# tokenizer = Tokenizer()
# tokenizer.fit_on_texts(emotions_df['Review'])  # Replace 'Sentences' with 'Review'
# sequences = tokenizer.texts_to_sequences(emotions_df['Review'])
# data = pad_sequences(sequences, padding='post', maxlen=20)  # Set maxlen to a reasonable length based on data inspection

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(data, emotions_df['Encoded_Label'], test_size=0.3, random_state=42)

# # One-hot encode labels
# y_train_oh = to_categorical(y_train)
# y_test_oh = to_categorical(y_test)

# # Parameters and model architecture
# num_classes = y_train_oh.shape[1]
# max_words = data.shape[1]  # Length of the longest sequence
# vocab_size = len(tokenizer.word_index) + 1  # Vocabulary size
# emb_dims = 256

# model = Sequential([
#     Embedding(vocab_size, emb_dims, input_length=max_words, embeddings_regularizer=tf.keras.regularizers.l2(0.0001)),
#     Conv1D(filters=28, kernel_size=3, padding='same', activation='tanh'),
#     MaxPooling1D(pool_size=2),
#     SimpleRNN(100, return_sequences=True),
#     LSTM(units=16, dropout=0.2),
#     Flatten(),
#     Dense(4, activation='relu'),
#     Dense(num_classes, activation='softmax')
# ])

# # Compile the model
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# # Print model summary
# print(model.summary())

# # Train the model
# #history = model.fit(X_train, y_train_oh, batch_size=128, epochs=64, validation_data=(X_test, y_test_oh), verbose=1)


# BERT


In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
emotions_df = pd.read_csv('/kaggle/input/newemotions3/newemotions3.csv')  # Ensure correct file path

# Encode labels
label_encoder = LabelEncoder()
emotions_df['Encoded_Sentiment'] = label_encoder.fit_transform(emotions_df['Sentiment'])

# Initialize BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode sequences in the dataset
input_ids = []
attention_masks = []

for review in emotions_df['Review']:  # Adjusted to use 'Review'
    encoded_dict = tokenizer.encode_plus(
                        review,  # Changed to use the review text
                        add_special_tokens=True,
                        max_length=256,
                        pad_to_max_length=True,
                        return_attention_mask=True,
                        return_tensors='pt',
                   )

    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

# Convert lists into tensors
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(emotions_df['Encoded_Sentiment'].values)

# Split data into train and validation sets
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, random_state=42, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels, random_state=42, test_size=0.1)

# Convert to DataLoader
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=32)

validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=32)

# Load BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=len(label_encoder.classes_),  # Reflect the actual number of sentiment classes
    output_attentions=False,
    output_hidden_states=False,
)

# Send model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set up optimizer
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)

from transformers import get_scheduler
from tqdm.auto import tqdm

# Define training parameters
num_epochs = 5  # Reduced for faster demonstration
num_training_steps = num_epochs * len(train_dataloader)

# Set up the learning rate scheduler
scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

# Training and validation loop
progress_bar = tqdm(range(num_training_steps))
model.train()

for epoch in range(num_epochs):
    for batch in train_dataloader:
        batch = tuple(t.to(device) for t in batch)
        inputs = {
            'input_ids': batch[0],
            'attention_mask': batch[1],
            'labels': batch[2]
        }

        model.zero_grad()
        outputs = model(**inputs)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        scheduler.step()
        progress_bar.update(1)

    # Validation loop for accuracy and loss
    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    for batch in validation_dataloader:
        batch = tuple(t.to(device) for t in batch)
        inputs = {
            'input_ids': batch[0],
            'attention_mask': batch[1],
            'labels': batch[2]
        }

        with torch.no_grad():
            outputs = model(**inputs)

        loss = outputs.loss
        total_eval_loss += loss.item()

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        total_eval_accuracy += (predictions == inputs['labels']).cpu().numpy().mean()

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print(f"Validation accuracy: {avg_val_accuracy:.4f}")
    print(f"Average validation loss: {total_eval_loss / len(validation_dataloader)}")


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/285 [00:00<?, ?it/s]

Validation accuracy: 0.5040
Average validation loss: 1.1416280099323817
Validation accuracy: 0.7411
Average validation loss: 0.7436084704739707
Validation accuracy: 0.8145
Average validation loss: 0.5634964193616595
Validation accuracy: 0.8616
Average validation loss: 0.4798106167997633
Validation accuracy: 0.8571
Average validation loss: 0.4657234379223415


# mBERT

In [2]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from torch.nn.utils.rnn import pad_sequence

# Load the dataset
dataset = pd.read_csv('/kaggle/input/newemotions3/newemotions3.csv')  # Update path if necessary

# Preprocess the dataset
texts = dataset['Review'].values  # Updated to 'Review'
labels = dataset['Sentiment'].values  # Updated to 'Sentiment'

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Tokenization & Input Formatting
input_ids = [tokenizer.encode(text, add_special_tokens=True, max_length=128, truncation=True) for text in texts]

# Pad the input tokens
input_ids_padded = pad_sequence([torch.tensor(seq) for seq in input_ids], batch_first=True, padding_value=tokenizer.pad_token_id)

# Create attention masks
attention_masks = [[float(token_id > 0) for token_id in seq] for seq in input_ids_padded]
attention_masks = torch.tensor(attention_masks)

# Label encoding
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Convert labels to tensors
labels_tensor = torch.tensor(labels_encoded)

# Split the dataset into training and validation sets
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids_padded, labels_tensor, random_state=2018, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, attention_masks, random_state=2018, test_size=0.1)

# Create data loaders
batch_size = 32
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

# Load mBERT Model for Sequence Classification
model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=len(label_encoder.classes_))
model.cuda()

# Setting up the optimizer and scheduler for training
optimizer = AdamW(model.parameters(), lr=3e-5, eps=1e-8)
epochs = 5
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Training loop
model.train()
for epoch in range(epochs):
    for batch in train_dataloader:
        batch = [item.cuda() for item in batch]  # Move to GPU
        b_input_ids, b_input_mask, b_labels = batch

        model.zero_grad()
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()

# Validation loop
model.eval()
total_eval_accuracy = 0
for batch in validation_dataloader:
    batch = [item.cuda() for item in batch]  # Move to GPU
    b_input_ids, b_input_mask, b_labels = batch

    with torch.no_grad():
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    total_eval_accuracy += (predictions == b_labels).sum().item()

# Calculate and print validation accuracy
validation_accuracy = total_eval_accuracy / len(validation_inputs)
print(f"Validation Accuracy: {validation_accuracy:.4f}")

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Accuracy: 0.9254


# DistillBERT

In [3]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
from tqdm import tqdm
import random

# Load and preprocess the dataset
df = pd.read_csv('/kaggle/input/newemotions3/newemotions3.csv')  # Ensure the path is correct
df = df.dropna(subset=['Sentiment', 'Review'])

# Split the dataset into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Define label mapping from unique sentiments
label_map = {label: idx for idx, label in enumerate(df['Sentiment'].unique())}

# Define a custom dataset class with data augmentation
class SentimentDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length, label_map, random_deletion_prob=0.1, random_swap_prob=0.1):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.label_map = label_map
        self.random_deletion_prob = random_deletion_prob
        self.random_swap_prob = random_swap_prob

    def random_deletion(self, words, p=0.1):
        if len(words) == 1:
            return words
        remaining = list(filter(lambda x: random.uniform(0,1) > p, words))
        if len(remaining) == 0:
            return [random.choice(words)]
        else:
            return remaining

    def random_swap(self, words, n=3):
        length = len(words)
        if length < 2:
            return words
        for _ in range(n):
            idx1, idx2 = random.sample(range(length), 2)
            words[idx1], words[idx2] = words[idx2], words[idx1]
        return words

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data.iloc[idx]['Review']
        label_str = self.data.iloc[idx]['Sentiment']
        label = self.label_map[label_str]

        text_words = text.split()
        if random.uniform(0, 1) < self.random_deletion_prob:
            text_words = self.random_deletion(text_words)
        if random.uniform(0, 1) < self.random_swap_prob:
            text_words = self.random_swap(text_words)

        text = ' '.join(text_words)
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-multilingual-cased')
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=len(label_map))

batch_size = 32
max_length = 256
epochs = 5
learning_rate = 3e-5

train_dataset = SentimentDataset(train_df, tokenizer, max_length, label_map)
val_dataset = SentimentDataset(val_df, tokenizer, max_length, label_map)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader, desc="Training Epoch {}".format(epoch+1)):
        optimizer.zero_grad()
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation
    model.eval()
    total_val_loss = 0
    total_correct = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            logits = outputs.logits
            loss = criterion(logits, batch['labels'])
            total_val_loss += loss.item()
            preds = torch.argmax(logits, dim=1)
            total_correct += (preds == batch['labels']).sum().item()

    val_accuracy = total_correct / len(val_dataset)
    print(f'Epoch: {epoch+1}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {total_val_loss / len(val_loader):.4f}, Val Accuracy: {val_accuracy:.4f}')


tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/542M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Training Epoch 1:   0%|          | 0/51 [00:00<?, ?it/s][A
Training Epoch 1:   2%|▏         | 1/51 [00:00<00:41,  1.21it/s][A
Training Epoch 1:   4%|▍         | 2/51 [00:01<00:36,  1.34it/s][A
Training Epoch 1:   6%|▌         | 3/51 [00:02<00:35,  1.37it/s][A
Training Epoch 1:   8%|▊         | 4/51 [00:02<00:33,  1.39it/s][A
Training Epoch 1:  10%|▉         | 5/51 [00:03<00:32,  1.41it/s][A
Training Epoch 1:  12%|█▏        | 6/51 [00:04<00:31,  1.41it/s][A
Training Epoch 1:  14%|█▎        | 7/51 [00:05<00:31,  1.41it/s][A
Training Epoch 1:  16%|█▌        | 8/51 [00:05<00:30,  1.41it/s][A
Training Epoch 1:  18%|█▊      

Epoch: 1, Train Loss: 1.0835, Val Loss: 0.7569, Val Accuracy: 0.6883



Training Epoch 2:   0%|          | 0/51 [00:00<?, ?it/s][A
Training Epoch 2:   2%|▏         | 1/51 [00:00<00:35,  1.40it/s][A
Training Epoch 2:   4%|▍         | 2/51 [00:01<00:34,  1.40it/s][A
Training Epoch 2:   6%|▌         | 3/51 [00:02<00:34,  1.39it/s][A
Training Epoch 2:   8%|▊         | 4/51 [00:02<00:33,  1.39it/s][A
Training Epoch 2:  10%|▉         | 5/51 [00:03<00:33,  1.38it/s][A
Training Epoch 2:  12%|█▏        | 6/51 [00:04<00:32,  1.39it/s][A
Training Epoch 2:  14%|█▎        | 7/51 [00:05<00:31,  1.39it/s][A
Training Epoch 2:  16%|█▌        | 8/51 [00:05<00:30,  1.39it/s][A
Training Epoch 2:  18%|█▊        | 9/51 [00:06<00:30,  1.39it/s][A
Training Epoch 2:  20%|█▉        | 10/51 [00:07<00:29,  1.39it/s][A
Training Epoch 2:  22%|██▏       | 11/51 [00:07<00:28,  1.39it/s][A
Training Epoch 2:  24%|██▎       | 12/51 [00:08<00:28,  1.39it/s][A
Training Epoch 2:  25%|██▌       | 13/51 [00:09<00:27,  1.39it/s][A
Training Epoch 2:  27%|██▋       | 14/51 [00:10<00:

Epoch: 2, Train Loss: 0.5891, Val Loss: 0.4491, Val Accuracy: 0.8304



Training Epoch 3:   0%|          | 0/51 [00:00<?, ?it/s][A
Training Epoch 3:   2%|▏         | 1/51 [00:00<00:35,  1.42it/s][A
Training Epoch 3:   4%|▍         | 2/51 [00:01<00:34,  1.42it/s][A
Training Epoch 3:   6%|▌         | 3/51 [00:02<00:34,  1.41it/s][A
Training Epoch 3:   8%|▊         | 4/51 [00:02<00:33,  1.40it/s][A
Training Epoch 3:  10%|▉         | 5/51 [00:03<00:32,  1.41it/s][A
Training Epoch 3:  12%|█▏        | 6/51 [00:04<00:31,  1.41it/s][A
Training Epoch 3:  14%|█▎        | 7/51 [00:04<00:31,  1.41it/s][A
Training Epoch 3:  16%|█▌        | 8/51 [00:05<00:30,  1.41it/s][A
Training Epoch 3:  18%|█▊        | 9/51 [00:06<00:29,  1.41it/s][A
Training Epoch 3:  20%|█▉        | 10/51 [00:07<00:29,  1.41it/s][A
Training Epoch 3:  22%|██▏       | 11/51 [00:07<00:28,  1.41it/s][A
Training Epoch 3:  24%|██▎       | 12/51 [00:08<00:27,  1.41it/s][A
Training Epoch 3:  25%|██▌       | 13/51 [00:09<00:26,  1.41it/s][A
Training Epoch 3:  27%|██▋       | 14/51 [00:09<00:

Epoch: 3, Train Loss: 0.3482, Val Loss: 0.3827, Val Accuracy: 0.8554



Training Epoch 4:   0%|          | 0/51 [00:00<?, ?it/s][A
Training Epoch 4:   2%|▏         | 1/51 [00:00<00:35,  1.42it/s][A
Training Epoch 4:   4%|▍         | 2/51 [00:01<00:34,  1.40it/s][A
Training Epoch 4:   6%|▌         | 3/51 [00:02<00:34,  1.40it/s][A
Training Epoch 4:   8%|▊         | 4/51 [00:02<00:33,  1.40it/s][A
Training Epoch 4:  10%|▉         | 5/51 [00:03<00:32,  1.40it/s][A
Training Epoch 4:  12%|█▏        | 6/51 [00:04<00:32,  1.40it/s][A
Training Epoch 4:  14%|█▎        | 7/51 [00:05<00:31,  1.40it/s][A
Training Epoch 4:  16%|█▌        | 8/51 [00:05<00:30,  1.39it/s][A
Training Epoch 4:  18%|█▊        | 9/51 [00:06<00:30,  1.39it/s][A
Training Epoch 4:  20%|█▉        | 10/51 [00:07<00:29,  1.39it/s][A
Training Epoch 4:  22%|██▏       | 11/51 [00:07<00:28,  1.39it/s][A
Training Epoch 4:  24%|██▎       | 12/51 [00:08<00:27,  1.39it/s][A
Training Epoch 4:  25%|██▌       | 13/51 [00:09<00:27,  1.39it/s][A
Training Epoch 4:  27%|██▋       | 14/51 [00:10<00:

Epoch: 4, Train Loss: 0.2157, Val Loss: 0.4685, Val Accuracy: 0.8304



Training Epoch 5:   0%|          | 0/51 [00:00<?, ?it/s][A
Training Epoch 5:   2%|▏         | 1/51 [00:00<00:35,  1.41it/s][A
Training Epoch 5:   4%|▍         | 2/51 [00:01<00:34,  1.41it/s][A
Training Epoch 5:   6%|▌         | 3/51 [00:02<00:34,  1.39it/s][A
Training Epoch 5:   8%|▊         | 4/51 [00:02<00:33,  1.40it/s][A
Training Epoch 5:  10%|▉         | 5/51 [00:03<00:33,  1.39it/s][A
Training Epoch 5:  12%|█▏        | 6/51 [00:04<00:32,  1.39it/s][A
Training Epoch 5:  14%|█▎        | 7/51 [00:05<00:31,  1.40it/s][A
Training Epoch 5:  16%|█▌        | 8/51 [00:05<00:30,  1.40it/s][A
Training Epoch 5:  18%|█▊        | 9/51 [00:06<00:30,  1.40it/s][A
Training Epoch 5:  20%|█▉        | 10/51 [00:07<00:29,  1.40it/s][A
Training Epoch 5:  22%|██▏       | 11/51 [00:07<00:28,  1.40it/s][A
Training Epoch 5:  24%|██▎       | 12/51 [00:08<00:27,  1.40it/s][A
Training Epoch 5:  25%|██▌       | 13/51 [00:09<00:27,  1.40it/s][A
Training Epoch 5:  27%|██▋       | 14/51 [00:10<00:

Epoch: 5, Train Loss: 0.1761, Val Loss: 0.4104, Val Accuracy: 0.8554
