In [None]:
import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
main_data = pd.read_csv("dataset/training.csv",names=['id','name', 'label', 'sentence'])
main_data.drop(index=0)
main_data = main_data.dropna()
drop_columns = ["id", "name"]
main_data = main_data.drop(drop_columns, axis=1)
main_data  = main_data.iloc[:1000,:]

# md_data = md_data.head(10000)

In [None]:
def encoder(data):
    values_list = list(data.unique())
    temp_dict = {}
    for item in values_list:
        index = values_list.index(item)
        temp_dict.update({item:index})
    print(temp_dict)
    for d in data.index:
        data[d] = temp_dict[data[d]]
    return data, temp_dict

print(main_data['label'].value_counts())
classes = main_data["label"].unique()

main_data['label'], label_map = encoder(main_data['label'])

Positive      425
Neutral       279
Negative      194
Irrelevant    102
Name: label, dtype: int64
{'Positive': 0, 'Neutral': 1, 'Negative': 2, 'Irrelevant': 3}


In [None]:
# data = pd.read_csv("dataset/training_updated.csv")
train_data = main_data.sample(frac=0.8, random_state=42)
test_data = main_data.drop(train_data.index)
hg_train_data = Dataset.from_pandas(train_data)
hg_test_data = Dataset.from_pandas(test_data)
# print(f'The training dataset has {len(train_data)} records.')
# print(f'The testing dataset has {len(test_data)} records.')
print(f'The length of train_data is {len(hg_train_data)}')
print(f'The length of test_data is {len(hg_test_data)}')

The length of train_data is 800
The length of test_data is 200


In [None]:
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
# Funtion to tokenize data
def tokenize_dataset(data):
    return tokenizer(data["sentence"],
                     truncation=True,
                     padding="max_length")

dataset_train = hg_train_data.map(tokenize_dataset)
dataset_test = hg_test_data.map(tokenize_dataset)

Map: 100%|██████████| 800/800 [00:00<00:00, 4655.95 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 5344.66 examples/s]


In [None]:
# Load model
# model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base", num_labels=len(classes), ignore_mismatched_sizes=True).to(device=device)
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english", num_labels=len(classes), ignore_mismatched_sizes=True).to(device=device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([2, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir="./CA/",
    logging_dir='.CA/logs',
    logging_strategy='epoch',
    logging_steps=10,
    num_train_epochs=100,
    per_device_train_batch_size=20,
    per_device_eval_batch_size=20,
    learning_rate=5e-6,
    seed=42,
    # save_strategy='epoch',
    # save_steps=100,
    evaluation_strategy='epoch',
    eval_steps=100,
    # load_best_model_at_end=True
)

In [None]:
def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    logits, labels = eval_pred
    # probabilities = tf.nn.softmax(logits)
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)
# Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_test,
    compute_metrics=compute_metrics,
    # callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

In [None]:
trainer.train()

  1%|          | 40/4000 [00:35<58:07,  1.14it/s] 

{'loss': 1.2421, 'learning_rate': 4.95e-06, 'epoch': 1.0}


                                                 
  1%|          | 40/4000 [00:40<58:07,  1.14it/s]

{'eval_loss': 1.2327674627304077, 'eval_accuracy': 0.43, 'eval_runtime': 4.9211, 'eval_samples_per_second': 40.642, 'eval_steps_per_second': 2.032, 'epoch': 1.0}


  2%|▏         | 80/4000 [01:15<57:01,  1.15it/s]  

{'loss': 1.116, 'learning_rate': 4.9000000000000005e-06, 'epoch': 2.0}


                                                 
  2%|▏         | 80/4000 [01:20<57:01,  1.15it/s]

{'eval_loss': 1.1730754375457764, 'eval_accuracy': 0.475, 'eval_runtime': 4.9487, 'eval_samples_per_second': 40.415, 'eval_steps_per_second': 2.021, 'epoch': 2.0}


  3%|▎         | 120/4000 [01:56<56:58,  1.14it/s] 

{'loss': 1.007, 'learning_rate': 4.85e-06, 'epoch': 3.0}


                                                  
  3%|▎         | 120/4000 [02:01<56:58,  1.14it/s]

{'eval_loss': 1.0941532850265503, 'eval_accuracy': 0.54, 'eval_runtime': 4.8943, 'eval_samples_per_second': 40.864, 'eval_steps_per_second': 2.043, 'epoch': 3.0}


  4%|▍         | 160/4000 [02:36<55:58,  1.14it/s]  

{'loss': 0.9029, 'learning_rate': 4.800000000000001e-06, 'epoch': 4.0}


                                                  
  4%|▍         | 160/4000 [02:41<55:58,  1.14it/s]

{'eval_loss': 1.0058614015579224, 'eval_accuracy': 0.565, 'eval_runtime': 4.8829, 'eval_samples_per_second': 40.959, 'eval_steps_per_second': 2.048, 'epoch': 4.0}


  5%|▌         | 200/4000 [03:16<55:09,  1.15it/s]  

{'loss': 0.7981, 'learning_rate': 4.75e-06, 'epoch': 5.0}


                                                  
  5%|▌         | 200/4000 [03:21<55:09,  1.15it/s]

{'eval_loss': 0.9143213033676147, 'eval_accuracy': 0.64, 'eval_runtime': 4.8787, 'eval_samples_per_second': 40.995, 'eval_steps_per_second': 2.05, 'epoch': 5.0}


  6%|▌         | 240/4000 [03:56<54:48,  1.14it/s]  

{'loss': 0.6956, 'learning_rate': 4.7e-06, 'epoch': 6.0}


                                                  
  6%|▌         | 240/4000 [04:01<54:48,  1.14it/s]

{'eval_loss': 0.8463551998138428, 'eval_accuracy': 0.665, 'eval_runtime': 4.8959, 'eval_samples_per_second': 40.85, 'eval_steps_per_second': 2.043, 'epoch': 6.0}


  7%|▋         | 280/4000 [04:36<53:55,  1.15it/s]  

{'loss': 0.6105, 'learning_rate': 4.65e-06, 'epoch': 7.0}


                                                  
  7%|▋         | 280/4000 [04:41<53:55,  1.15it/s]

{'eval_loss': 0.7859413623809814, 'eval_accuracy': 0.675, 'eval_runtime': 4.8555, 'eval_samples_per_second': 41.19, 'eval_steps_per_second': 2.06, 'epoch': 7.0}


  8%|▊         | 320/4000 [05:16<53:20,  1.15it/s]  

{'loss': 0.5252, 'learning_rate': 4.600000000000001e-06, 'epoch': 8.0}


                                                  
  8%|▊         | 320/4000 [05:21<53:20,  1.15it/s]

{'eval_loss': 0.7349900603294373, 'eval_accuracy': 0.705, 'eval_runtime': 4.9357, 'eval_samples_per_second': 40.521, 'eval_steps_per_second': 2.026, 'epoch': 8.0}


  9%|▉         | 360/4000 [05:56<53:31,  1.13it/s]  

{'loss': 0.4501, 'learning_rate': 4.5500000000000005e-06, 'epoch': 9.0}


                                                  
  9%|▉         | 360/4000 [06:01<53:31,  1.13it/s]

{'eval_loss': 0.6553763747215271, 'eval_accuracy': 0.72, 'eval_runtime': 4.9079, 'eval_samples_per_second': 40.751, 'eval_steps_per_second': 2.038, 'epoch': 9.0}


 10%|█         | 400/4000 [06:36<52:11,  1.15it/s]  

{'loss': 0.3816, 'learning_rate': 4.5e-06, 'epoch': 10.0}


                                                  
 10%|█         | 400/4000 [06:41<52:11,  1.15it/s]

{'eval_loss': 0.5782409906387329, 'eval_accuracy': 0.76, 'eval_runtime': 4.9629, 'eval_samples_per_second': 40.299, 'eval_steps_per_second': 2.015, 'epoch': 10.0}


 11%|█         | 440/4000 [07:16<51:39,  1.15it/s]  

{'loss': 0.3147, 'learning_rate': 4.450000000000001e-06, 'epoch': 11.0}


                                                  
 11%|█         | 440/4000 [07:21<51:39,  1.15it/s]

{'eval_loss': 0.5489532947540283, 'eval_accuracy': 0.755, 'eval_runtime': 5.0379, 'eval_samples_per_second': 39.699, 'eval_steps_per_second': 1.985, 'epoch': 11.0}


 12%|█▏        | 480/4000 [07:56<50:58,  1.15it/s]  

{'loss': 0.2598, 'learning_rate': 4.4e-06, 'epoch': 12.0}


                                                  
 12%|█▏        | 480/4000 [08:02<50:58,  1.15it/s]

{'eval_loss': 0.486522376537323, 'eval_accuracy': 0.805, 'eval_runtime': 5.9884, 'eval_samples_per_second': 33.398, 'eval_steps_per_second': 1.67, 'epoch': 12.0}


 13%|█▎        | 520/4000 [08:37<51:02,  1.14it/s]  

{'loss': 0.2088, 'learning_rate': 4.350000000000001e-06, 'epoch': 13.0}


                                                  
 13%|█▎        | 520/4000 [08:42<51:02,  1.14it/s]

{'eval_loss': 0.4778186082839966, 'eval_accuracy': 0.825, 'eval_runtime': 5.1039, 'eval_samples_per_second': 39.186, 'eval_steps_per_second': 1.959, 'epoch': 13.0}


 14%|█▍        | 560/4000 [09:17<49:54,  1.15it/s]  

{'loss': 0.18, 'learning_rate': 4.3e-06, 'epoch': 14.0}


                                                  
 14%|█▍        | 560/4000 [09:22<49:54,  1.15it/s]

{'eval_loss': 0.437112957239151, 'eval_accuracy': 0.845, 'eval_runtime': 4.9163, 'eval_samples_per_second': 40.681, 'eval_steps_per_second': 2.034, 'epoch': 14.0}


 15%|█▌        | 600/4000 [09:57<49:13,  1.15it/s]  

{'loss': 0.1352, 'learning_rate': 4.25e-06, 'epoch': 15.0}


                                                  
 15%|█▌        | 600/4000 [10:02<49:13,  1.15it/s]

{'eval_loss': 0.40050697326660156, 'eval_accuracy': 0.86, 'eval_runtime': 4.9117, 'eval_samples_per_second': 40.719, 'eval_steps_per_second': 2.036, 'epoch': 15.0}


 16%|█▌        | 640/4000 [10:37<48:36,  1.15it/s]  

{'loss': 0.1199, 'learning_rate': 4.2000000000000004e-06, 'epoch': 16.0}


                                                  
 16%|█▌        | 640/4000 [10:42<48:36,  1.15it/s]

{'eval_loss': 0.38091397285461426, 'eval_accuracy': 0.87, 'eval_runtime': 4.9142, 'eval_samples_per_second': 40.699, 'eval_steps_per_second': 2.035, 'epoch': 16.0}


 17%|█▋        | 680/4000 [11:17<47:58,  1.15it/s]  

{'loss': 0.0929, 'learning_rate': 4.15e-06, 'epoch': 17.0}


                                                  
 17%|█▋        | 680/4000 [11:22<47:58,  1.15it/s]

{'eval_loss': 0.3580147922039032, 'eval_accuracy': 0.885, 'eval_runtime': 4.9358, 'eval_samples_per_second': 40.52, 'eval_steps_per_second': 2.026, 'epoch': 17.0}


 18%|█▊        | 720/4000 [11:57<47:37,  1.15it/s]  

{'loss': 0.0854, 'learning_rate': 4.1e-06, 'epoch': 18.0}


                                                  
 18%|█▊        | 720/4000 [12:02<47:37,  1.15it/s]

{'eval_loss': 0.36756837368011475, 'eval_accuracy': 0.89, 'eval_runtime': 4.9392, 'eval_samples_per_second': 40.492, 'eval_steps_per_second': 2.025, 'epoch': 18.0}


 19%|█▉        | 760/4000 [12:37<47:37,  1.13it/s]  

{'loss': 0.0757, 'learning_rate': 4.05e-06, 'epoch': 19.0}


                                                  
 19%|█▉        | 760/4000 [12:42<47:37,  1.13it/s]

{'eval_loss': 0.34620437026023865, 'eval_accuracy': 0.89, 'eval_runtime': 4.9226, 'eval_samples_per_second': 40.629, 'eval_steps_per_second': 2.031, 'epoch': 19.0}


 20%|██        | 800/4000 [13:17<46:56,  1.14it/s]  

{'loss': 0.0623, 'learning_rate': 4.000000000000001e-06, 'epoch': 20.0}


                                                  
 20%|██        | 800/4000 [13:22<46:56,  1.14it/s]

{'eval_loss': 0.3335008919239044, 'eval_accuracy': 0.895, 'eval_runtime': 4.9959, 'eval_samples_per_second': 40.033, 'eval_steps_per_second': 2.002, 'epoch': 20.0}


 21%|██        | 840/4000 [13:57<46:06,  1.14it/s]  

{'loss': 0.0567, 'learning_rate': 3.95e-06, 'epoch': 21.0}


                                                  
 21%|██        | 840/4000 [14:02<46:06,  1.14it/s]

{'eval_loss': 0.32781392335891724, 'eval_accuracy': 0.9, 'eval_runtime': 5.2944, 'eval_samples_per_second': 37.776, 'eval_steps_per_second': 1.889, 'epoch': 21.0}


 22%|██▏       | 880/4000 [14:37<45:21,  1.15it/s]  

{'loss': 0.0482, 'learning_rate': 3.900000000000001e-06, 'epoch': 22.0}


                                                  
 22%|██▏       | 880/4000 [14:42<45:21,  1.15it/s]

{'eval_loss': 0.34622326493263245, 'eval_accuracy': 0.895, 'eval_runtime': 5.0934, 'eval_samples_per_second': 39.266, 'eval_steps_per_second': 1.963, 'epoch': 22.0}


 23%|██▎       | 920/4000 [15:18<44:46,  1.15it/s]  

{'loss': 0.0476, 'learning_rate': 3.85e-06, 'epoch': 23.0}


                                                  
 23%|██▎       | 920/4000 [15:22<44:46,  1.15it/s]

{'eval_loss': 0.3270450532436371, 'eval_accuracy': 0.89, 'eval_runtime': 4.9103, 'eval_samples_per_second': 40.731, 'eval_steps_per_second': 2.037, 'epoch': 23.0}


 24%|██▍       | 960/4000 [15:57<44:16,  1.14it/s]  

{'loss': 0.0421, 'learning_rate': 3.8000000000000005e-06, 'epoch': 24.0}


                                                  
 24%|██▍       | 960/4000 [16:02<44:16,  1.14it/s]

{'eval_loss': 0.3573243021965027, 'eval_accuracy': 0.885, 'eval_runtime': 4.9266, 'eval_samples_per_second': 40.596, 'eval_steps_per_second': 2.03, 'epoch': 24.0}


 25%|██▌       | 1000/4000 [16:38<43:28,  1.15it/s] 

{'loss': 0.0392, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 25%|██▌       | 1000/4000 [16:43<43:28,  1.15it/s]

{'eval_loss': 0.3213544189929962, 'eval_accuracy': 0.915, 'eval_runtime': 5.0683, 'eval_samples_per_second': 39.461, 'eval_steps_per_second': 1.973, 'epoch': 25.0}


 26%|██▌       | 1040/4000 [17:18<42:59,  1.15it/s]  

{'loss': 0.0342, 'learning_rate': 3.7e-06, 'epoch': 26.0}


                                                   
 26%|██▌       | 1040/4000 [17:23<42:59,  1.15it/s]

{'eval_loss': 0.32870247960090637, 'eval_accuracy': 0.895, 'eval_runtime': 5.0272, 'eval_samples_per_second': 39.784, 'eval_steps_per_second': 1.989, 'epoch': 26.0}


 27%|██▋       | 1080/4000 [17:58<42:16,  1.15it/s]  

{'loss': 0.036, 'learning_rate': 3.65e-06, 'epoch': 27.0}


                                                   
 27%|██▋       | 1080/4000 [18:03<42:16,  1.15it/s]

{'eval_loss': 0.3200984299182892, 'eval_accuracy': 0.905, 'eval_runtime': 5.091, 'eval_samples_per_second': 39.285, 'eval_steps_per_second': 1.964, 'epoch': 27.0}


 28%|██▊       | 1120/4000 [18:38<41:35,  1.15it/s]  

{'loss': 0.0316, 'learning_rate': 3.6000000000000003e-06, 'epoch': 28.0}


                                                   
 28%|██▊       | 1120/4000 [18:43<41:35,  1.15it/s]

{'eval_loss': 0.322630912065506, 'eval_accuracy': 0.895, 'eval_runtime': 5.0227, 'eval_samples_per_second': 39.819, 'eval_steps_per_second': 1.991, 'epoch': 28.0}


 29%|██▉       | 1160/4000 [19:18<41:36,  1.14it/s]  

{'loss': 0.0325, 'learning_rate': 3.5500000000000003e-06, 'epoch': 29.0}


                                                   
 29%|██▉       | 1160/4000 [19:23<41:36,  1.14it/s]

{'eval_loss': 0.32740965485572815, 'eval_accuracy': 0.9, 'eval_runtime': 4.9481, 'eval_samples_per_second': 40.42, 'eval_steps_per_second': 2.021, 'epoch': 29.0}


 30%|███       | 1200/4000 [19:58<41:04,  1.14it/s]  

{'loss': 0.0277, 'learning_rate': 3.5e-06, 'epoch': 30.0}


                                                   
 30%|███       | 1200/4000 [20:04<41:04,  1.14it/s]

{'eval_loss': 0.31826338171958923, 'eval_accuracy': 0.91, 'eval_runtime': 5.4543, 'eval_samples_per_second': 36.668, 'eval_steps_per_second': 1.833, 'epoch': 30.0}


 31%|███       | 1240/4000 [20:39<40:25,  1.14it/s]  

{'loss': 0.0283, 'learning_rate': 3.45e-06, 'epoch': 31.0}


                                                   
 31%|███       | 1240/4000 [20:44<40:25,  1.14it/s]

{'eval_loss': 0.3088010549545288, 'eval_accuracy': 0.915, 'eval_runtime': 4.9561, 'eval_samples_per_second': 40.354, 'eval_steps_per_second': 2.018, 'epoch': 31.0}


 32%|███▏      | 1280/4000 [21:19<39:55,  1.14it/s]  

{'loss': 0.0265, 'learning_rate': 3.4000000000000005e-06, 'epoch': 32.0}


                                                   
 32%|███▏      | 1280/4000 [21:24<39:55,  1.14it/s]

{'eval_loss': 0.3100907802581787, 'eval_accuracy': 0.915, 'eval_runtime': 4.9594, 'eval_samples_per_second': 40.328, 'eval_steps_per_second': 2.016, 'epoch': 32.0}


 33%|███▎      | 1320/4000 [21:59<39:20,  1.14it/s]  

{'loss': 0.0266, 'learning_rate': 3.3500000000000005e-06, 'epoch': 33.0}


                                                   
 33%|███▎      | 1320/4000 [22:04<39:20,  1.14it/s]

{'eval_loss': 0.31529369950294495, 'eval_accuracy': 0.92, 'eval_runtime': 4.9411, 'eval_samples_per_second': 40.477, 'eval_steps_per_second': 2.024, 'epoch': 33.0}


 34%|███▍      | 1360/4000 [22:40<38:59,  1.13it/s]  

{'loss': 0.0224, 'learning_rate': 3.3000000000000006e-06, 'epoch': 34.0}


                                                   
 34%|███▍      | 1360/4000 [22:45<38:59,  1.13it/s]

{'eval_loss': 0.3194456100463867, 'eval_accuracy': 0.92, 'eval_runtime': 4.9735, 'eval_samples_per_second': 40.214, 'eval_steps_per_second': 2.011, 'epoch': 34.0}


 35%|███▌      | 1400/4000 [23:20<38:20,  1.13it/s]  

{'loss': 0.0199, 'learning_rate': 3.2500000000000002e-06, 'epoch': 35.0}


                                                   
 35%|███▌      | 1400/4000 [23:25<38:20,  1.13it/s]

{'eval_loss': 0.31670424342155457, 'eval_accuracy': 0.925, 'eval_runtime': 4.9977, 'eval_samples_per_second': 40.018, 'eval_steps_per_second': 2.001, 'epoch': 35.0}


 36%|███▌      | 1440/4000 [24:01<37:55,  1.13it/s]  

{'loss': 0.0235, 'learning_rate': 3.2000000000000003e-06, 'epoch': 36.0}


                                                   
 36%|███▌      | 1440/4000 [24:06<37:55,  1.13it/s]

{'eval_loss': 0.3148554265499115, 'eval_accuracy': 0.935, 'eval_runtime': 5.0075, 'eval_samples_per_second': 39.94, 'eval_steps_per_second': 1.997, 'epoch': 36.0}


 37%|███▋      | 1480/4000 [24:41<37:13,  1.13it/s]  

{'loss': 0.0237, 'learning_rate': 3.1500000000000003e-06, 'epoch': 37.0}


                                                   
 37%|███▋      | 1480/4000 [24:46<37:13,  1.13it/s]

{'eval_loss': 0.3196798264980316, 'eval_accuracy': 0.935, 'eval_runtime': 4.9435, 'eval_samples_per_second': 40.458, 'eval_steps_per_second': 2.023, 'epoch': 37.0}


 38%|███▊      | 1520/4000 [25:23<36:38,  1.13it/s]  

{'loss': 0.0225, 'learning_rate': 3.1000000000000004e-06, 'epoch': 38.0}


                                                   
 38%|███▊      | 1520/4000 [25:28<36:38,  1.13it/s]

{'eval_loss': 0.31783735752105713, 'eval_accuracy': 0.935, 'eval_runtime': 4.9637, 'eval_samples_per_second': 40.293, 'eval_steps_per_second': 2.015, 'epoch': 38.0}


 39%|███▉      | 1560/4000 [26:03<35:21,  1.15it/s]  

{'loss': 0.0158, 'learning_rate': 3.05e-06, 'epoch': 39.0}


                                                   
 39%|███▉      | 1560/4000 [26:08<35:21,  1.15it/s]

{'eval_loss': 0.31596672534942627, 'eval_accuracy': 0.925, 'eval_runtime': 4.9041, 'eval_samples_per_second': 40.783, 'eval_steps_per_second': 2.039, 'epoch': 39.0}


 40%|████      | 1600/4000 [26:43<35:09,  1.14it/s]  

{'loss': 0.0247, 'learning_rate': 3e-06, 'epoch': 40.0}


                                                   
 40%|████      | 1600/4000 [26:48<35:09,  1.14it/s]

{'eval_loss': 0.3173842132091522, 'eval_accuracy': 0.925, 'eval_runtime': 5.0046, 'eval_samples_per_second': 39.964, 'eval_steps_per_second': 1.998, 'epoch': 40.0}


 41%|████      | 1640/4000 [27:23<34:32,  1.14it/s]  

{'loss': 0.0205, 'learning_rate': 2.95e-06, 'epoch': 41.0}


                                                   
 41%|████      | 1640/4000 [27:28<34:32,  1.14it/s]

{'eval_loss': 0.3118303716182709, 'eval_accuracy': 0.925, 'eval_runtime': 4.8854, 'eval_samples_per_second': 40.939, 'eval_steps_per_second': 2.047, 'epoch': 41.0}


 42%|████▏     | 1680/4000 [28:03<33:34,  1.15it/s]  

{'loss': 0.0163, 'learning_rate': 2.9e-06, 'epoch': 42.0}


                                                   
 42%|████▏     | 1680/4000 [28:08<33:34,  1.15it/s]

{'eval_loss': 0.30922043323516846, 'eval_accuracy': 0.93, 'eval_runtime': 4.9338, 'eval_samples_per_second': 40.537, 'eval_steps_per_second': 2.027, 'epoch': 42.0}


 43%|████▎     | 1720/4000 [28:43<32:54,  1.15it/s]  

{'loss': 0.0172, 'learning_rate': 2.85e-06, 'epoch': 43.0}


                                                   
 43%|████▎     | 1720/4000 [28:48<32:54,  1.15it/s]

{'eval_loss': 0.3067014217376709, 'eval_accuracy': 0.93, 'eval_runtime': 4.8636, 'eval_samples_per_second': 41.122, 'eval_steps_per_second': 2.056, 'epoch': 43.0}


 44%|████▍     | 1760/4000 [29:23<32:33,  1.15it/s]  

{'loss': 0.015, 'learning_rate': 2.8000000000000003e-06, 'epoch': 44.0}


                                                   
 44%|████▍     | 1760/4000 [29:28<32:33,  1.15it/s]

{'eval_loss': 0.3033227026462555, 'eval_accuracy': 0.93, 'eval_runtime': 4.9401, 'eval_samples_per_second': 40.485, 'eval_steps_per_second': 2.024, 'epoch': 44.0}


 45%|████▌     | 1800/4000 [30:03<31:52,  1.15it/s]  

{'loss': 0.0156, 'learning_rate': 2.7500000000000004e-06, 'epoch': 45.0}


                                                   
 45%|████▌     | 1800/4000 [30:07<31:52,  1.15it/s]

{'eval_loss': 0.30763906240463257, 'eval_accuracy': 0.925, 'eval_runtime': 4.8495, 'eval_samples_per_second': 41.241, 'eval_steps_per_second': 2.062, 'epoch': 45.0}


 46%|████▌     | 1840/4000 [30:42<31:24,  1.15it/s]  

{'loss': 0.0117, 'learning_rate': 2.7000000000000004e-06, 'epoch': 46.0}


                                                   
 46%|████▌     | 1840/4000 [30:47<31:24,  1.15it/s]

{'eval_loss': 0.30438125133514404, 'eval_accuracy': 0.935, 'eval_runtime': 4.8821, 'eval_samples_per_second': 40.966, 'eval_steps_per_second': 2.048, 'epoch': 46.0}


 47%|████▋     | 1880/4000 [31:22<30:44,  1.15it/s]  

{'loss': 0.0165, 'learning_rate': 2.6500000000000005e-06, 'epoch': 47.0}


                                                   
 47%|████▋     | 1880/4000 [31:27<30:44,  1.15it/s]

{'eval_loss': 0.3019803464412689, 'eval_accuracy': 0.935, 'eval_runtime': 4.8489, 'eval_samples_per_second': 41.246, 'eval_steps_per_second': 2.062, 'epoch': 47.0}


 48%|████▊     | 1920/4000 [32:02<30:15,  1.15it/s]  

{'loss': 0.0128, 'learning_rate': 2.6e-06, 'epoch': 48.0}


                                                   
 48%|████▊     | 1920/4000 [32:07<30:15,  1.15it/s]

{'eval_loss': 0.30237138271331787, 'eval_accuracy': 0.935, 'eval_runtime': 4.9823, 'eval_samples_per_second': 40.142, 'eval_steps_per_second': 2.007, 'epoch': 48.0}


 49%|████▉     | 1960/4000 [32:42<30:15,  1.12it/s]  

{'loss': 0.0109, 'learning_rate': 2.55e-06, 'epoch': 49.0}


                                                   
 49%|████▉     | 1960/4000 [32:47<30:15,  1.12it/s]

{'eval_loss': 0.30452656745910645, 'eval_accuracy': 0.935, 'eval_runtime': 4.9233, 'eval_samples_per_second': 40.623, 'eval_steps_per_second': 2.031, 'epoch': 49.0}


 50%|█████     | 2000/4000 [33:22<27:26,  1.21it/s]  

{'loss': 0.0108, 'learning_rate': 2.5e-06, 'epoch': 50.0}


                                                   
 50%|█████     | 2000/4000 [33:27<27:26,  1.21it/s]

{'eval_loss': 0.30698588490486145, 'eval_accuracy': 0.935, 'eval_runtime': 4.8115, 'eval_samples_per_second': 41.567, 'eval_steps_per_second': 2.078, 'epoch': 50.0}


 51%|█████     | 2040/4000 [34:00<26:52,  1.22it/s]  

{'loss': 0.0162, 'learning_rate': 2.4500000000000003e-06, 'epoch': 51.0}


                                                   
 51%|█████     | 2040/4000 [34:05<26:52,  1.22it/s]

{'eval_loss': 0.3026353120803833, 'eval_accuracy': 0.935, 'eval_runtime': 4.7418, 'eval_samples_per_second': 42.178, 'eval_steps_per_second': 2.109, 'epoch': 51.0}


 52%|█████▏    | 2080/4000 [34:38<26:23,  1.21it/s]  

{'loss': 0.0084, 'learning_rate': 2.4000000000000003e-06, 'epoch': 52.0}


                                                   
 52%|█████▏    | 2080/4000 [34:43<26:23,  1.21it/s]

{'eval_loss': 0.3039340674877167, 'eval_accuracy': 0.94, 'eval_runtime': 4.7657, 'eval_samples_per_second': 41.967, 'eval_steps_per_second': 2.098, 'epoch': 52.0}


 53%|█████▎    | 2120/4000 [35:16<25:43,  1.22it/s]  

{'loss': 0.0098, 'learning_rate': 2.35e-06, 'epoch': 53.0}


                                                   
 53%|█████▎    | 2120/4000 [35:20<25:43,  1.22it/s]

{'eval_loss': 0.3055676519870758, 'eval_accuracy': 0.935, 'eval_runtime': 4.7415, 'eval_samples_per_second': 42.181, 'eval_steps_per_second': 2.109, 'epoch': 53.0}


 54%|█████▍    | 2160/4000 [35:54<25:17,  1.21it/s]  

{'loss': 0.0111, 'learning_rate': 2.3000000000000004e-06, 'epoch': 54.0}


                                                   
 54%|█████▍    | 2160/4000 [35:58<25:17,  1.21it/s]

{'eval_loss': 0.30918246507644653, 'eval_accuracy': 0.935, 'eval_runtime': 4.7463, 'eval_samples_per_second': 42.138, 'eval_steps_per_second': 2.107, 'epoch': 54.0}


 55%|█████▌    | 2200/4000 [36:31<24:46,  1.21it/s]  

{'loss': 0.0097, 'learning_rate': 2.25e-06, 'epoch': 55.0}


                                                   
 55%|█████▌    | 2200/4000 [36:36<24:46,  1.21it/s]

{'eval_loss': 0.30695822834968567, 'eval_accuracy': 0.93, 'eval_runtime': 4.7266, 'eval_samples_per_second': 42.314, 'eval_steps_per_second': 2.116, 'epoch': 55.0}


 56%|█████▌    | 2240/4000 [37:10<24:14,  1.21it/s]  

{'loss': 0.0124, 'learning_rate': 2.2e-06, 'epoch': 56.0}


                                                   
 56%|█████▌    | 2240/4000 [37:14<24:14,  1.21it/s]

{'eval_loss': 0.3069470226764679, 'eval_accuracy': 0.93, 'eval_runtime': 4.7383, 'eval_samples_per_second': 42.209, 'eval_steps_per_second': 2.11, 'epoch': 56.0}


 57%|█████▋    | 2280/4000 [37:47<23:43,  1.21it/s]  

{'loss': 0.0153, 'learning_rate': 2.15e-06, 'epoch': 57.0}


                                                   
 57%|█████▋    | 2280/4000 [37:52<23:43,  1.21it/s]

{'eval_loss': 0.3062417209148407, 'eval_accuracy': 0.935, 'eval_runtime': 4.7667, 'eval_samples_per_second': 41.958, 'eval_steps_per_second': 2.098, 'epoch': 57.0}


 58%|█████▊    | 2320/4000 [38:25<23:07,  1.21it/s]  

{'loss': 0.0111, 'learning_rate': 2.1000000000000002e-06, 'epoch': 58.0}


                                                   
 58%|█████▊    | 2320/4000 [38:30<23:07,  1.21it/s]

{'eval_loss': 0.308234304189682, 'eval_accuracy': 0.935, 'eval_runtime': 4.8324, 'eval_samples_per_second': 41.387, 'eval_steps_per_second': 2.069, 'epoch': 58.0}


 59%|█████▉    | 2360/4000 [39:04<22:58,  1.19it/s]  

{'loss': 0.0146, 'learning_rate': 2.05e-06, 'epoch': 59.0}


                                                   
 59%|█████▉    | 2360/4000 [39:08<22:58,  1.19it/s]

{'eval_loss': 0.3109896183013916, 'eval_accuracy': 0.935, 'eval_runtime': 4.8194, 'eval_samples_per_second': 41.499, 'eval_steps_per_second': 2.075, 'epoch': 59.0}


 60%|██████    | 2400/4000 [39:42<22:13,  1.20it/s]  

{'loss': 0.0123, 'learning_rate': 2.0000000000000003e-06, 'epoch': 60.0}


                                                   
 60%|██████    | 2400/4000 [39:47<22:13,  1.20it/s]

{'eval_loss': 0.3171895146369934, 'eval_accuracy': 0.93, 'eval_runtime': 4.8042, 'eval_samples_per_second': 41.63, 'eval_steps_per_second': 2.082, 'epoch': 60.0}


 61%|██████    | 2440/4000 [40:20<21:33,  1.21it/s]  

{'loss': 0.0086, 'learning_rate': 1.9500000000000004e-06, 'epoch': 61.0}


                                                   
 61%|██████    | 2440/4000 [40:25<21:33,  1.21it/s]

{'eval_loss': 0.3054582178592682, 'eval_accuracy': 0.93, 'eval_runtime': 4.7534, 'eval_samples_per_second': 42.075, 'eval_steps_per_second': 2.104, 'epoch': 61.0}


 62%|██████▏   | 2480/4000 [40:58<20:54,  1.21it/s]

{'loss': 0.0117, 'learning_rate': 1.9000000000000002e-06, 'epoch': 62.0}


                                                   
 62%|██████▏   | 2480/4000 [41:03<20:54,  1.21it/s]

{'eval_loss': 0.3063526451587677, 'eval_accuracy': 0.93, 'eval_runtime': 4.7396, 'eval_samples_per_second': 42.198, 'eval_steps_per_second': 2.11, 'epoch': 62.0}


 63%|██████▎   | 2520/4000 [41:36<20:21,  1.21it/s]

{'loss': 0.0123, 'learning_rate': 1.85e-06, 'epoch': 63.0}


                                                   
 63%|██████▎   | 2520/4000 [41:41<20:21,  1.21it/s]

{'eval_loss': 0.3106853663921356, 'eval_accuracy': 0.93, 'eval_runtime': 4.6672, 'eval_samples_per_second': 42.852, 'eval_steps_per_second': 2.143, 'epoch': 63.0}


 64%|██████▍   | 2560/4000 [42:14<19:47,  1.21it/s]

{'loss': 0.0115, 'learning_rate': 1.8000000000000001e-06, 'epoch': 64.0}


                                                   
 64%|██████▍   | 2560/4000 [42:19<19:47,  1.21it/s]

{'eval_loss': 0.31141674518585205, 'eval_accuracy': 0.93, 'eval_runtime': 4.7968, 'eval_samples_per_second': 41.694, 'eval_steps_per_second': 2.085, 'epoch': 64.0}


 65%|██████▌   | 2600/4000 [42:52<19:17,  1.21it/s]

{'loss': 0.0131, 'learning_rate': 1.75e-06, 'epoch': 65.0}


                                                   
 65%|██████▌   | 2600/4000 [42:57<19:17,  1.21it/s]

{'eval_loss': 0.316072553396225, 'eval_accuracy': 0.93, 'eval_runtime': 4.7467, 'eval_samples_per_second': 42.134, 'eval_steps_per_second': 2.107, 'epoch': 65.0}


 66%|██████▌   | 2640/4000 [43:30<18:45,  1.21it/s]

{'loss': 0.0078, 'learning_rate': 1.7000000000000002e-06, 'epoch': 66.0}


                                                   
 66%|██████▌   | 2640/4000 [43:35<18:45,  1.21it/s]

{'eval_loss': 0.3121761083602905, 'eval_accuracy': 0.93, 'eval_runtime': 4.8332, 'eval_samples_per_second': 41.381, 'eval_steps_per_second': 2.069, 'epoch': 66.0}


 67%|██████▋   | 2680/4000 [44:08<18:06,  1.22it/s]

{'loss': 0.0099, 'learning_rate': 1.6500000000000003e-06, 'epoch': 67.0}


                                                   
 67%|██████▋   | 2680/4000 [44:13<18:06,  1.22it/s]

{'eval_loss': 0.31653130054473877, 'eval_accuracy': 0.93, 'eval_runtime': 4.7083, 'eval_samples_per_second': 42.478, 'eval_steps_per_second': 2.124, 'epoch': 67.0}


 68%|██████▊   | 2720/4000 [44:46<17:35,  1.21it/s]

{'loss': 0.0162, 'learning_rate': 1.6000000000000001e-06, 'epoch': 68.0}


                                                   
 68%|██████▊   | 2720/4000 [44:51<17:35,  1.21it/s]

{'eval_loss': 0.31306517124176025, 'eval_accuracy': 0.935, 'eval_runtime': 4.7568, 'eval_samples_per_second': 42.045, 'eval_steps_per_second': 2.102, 'epoch': 68.0}


 69%|██████▉   | 2760/4000 [45:24<17:02,  1.21it/s]

{'loss': 0.0085, 'learning_rate': 1.5500000000000002e-06, 'epoch': 69.0}


                                                   
 69%|██████▉   | 2760/4000 [45:29<17:02,  1.21it/s]

{'eval_loss': 0.3126952350139618, 'eval_accuracy': 0.935, 'eval_runtime': 4.8428, 'eval_samples_per_second': 41.298, 'eval_steps_per_second': 2.065, 'epoch': 69.0}


 70%|███████   | 2800/4000 [46:02<16:30,  1.21it/s]

{'loss': 0.0187, 'learning_rate': 1.5e-06, 'epoch': 70.0}


                                                   
 70%|███████   | 2800/4000 [46:07<16:30,  1.21it/s]

{'eval_loss': 0.31346699595451355, 'eval_accuracy': 0.935, 'eval_runtime': 4.7384, 'eval_samples_per_second': 42.208, 'eval_steps_per_second': 2.11, 'epoch': 70.0}


 71%|███████   | 2840/4000 [46:40<15:58,  1.21it/s]

{'loss': 0.0177, 'learning_rate': 1.45e-06, 'epoch': 71.0}


                                                   
 71%|███████   | 2840/4000 [46:45<15:58,  1.21it/s]

{'eval_loss': 0.31291335821151733, 'eval_accuracy': 0.94, 'eval_runtime': 4.9157, 'eval_samples_per_second': 40.686, 'eval_steps_per_second': 2.034, 'epoch': 71.0}


 72%|███████▏  | 2880/4000 [47:18<15:23,  1.21it/s]

{'loss': 0.0131, 'learning_rate': 1.4000000000000001e-06, 'epoch': 72.0}


                                                   
 72%|███████▏  | 2880/4000 [47:24<15:23,  1.21it/s]

{'eval_loss': 0.31325042247772217, 'eval_accuracy': 0.93, 'eval_runtime': 5.2387, 'eval_samples_per_second': 38.177, 'eval_steps_per_second': 1.909, 'epoch': 72.0}


 73%|███████▎  | 2920/4000 [47:57<14:51,  1.21it/s]

{'loss': 0.0142, 'learning_rate': 1.3500000000000002e-06, 'epoch': 73.0}


                                                   
 73%|███████▎  | 2920/4000 [48:05<14:51,  1.21it/s]

{'eval_loss': 0.3127685785293579, 'eval_accuracy': 0.935, 'eval_runtime': 7.8387, 'eval_samples_per_second': 25.514, 'eval_steps_per_second': 1.276, 'epoch': 73.0}


 74%|███████▍  | 2960/4000 [48:39<14:19,  1.21it/s]

{'loss': 0.0101, 'learning_rate': 1.3e-06, 'epoch': 74.0}


                                                   
 74%|███████▍  | 2960/4000 [48:44<14:19,  1.21it/s]

{'eval_loss': 0.31385502219200134, 'eval_accuracy': 0.935, 'eval_runtime': 4.7623, 'eval_samples_per_second': 41.997, 'eval_steps_per_second': 2.1, 'epoch': 74.0}


 75%|███████▌  | 3000/4000 [49:17<13:44,  1.21it/s]

{'loss': 0.0102, 'learning_rate': 1.25e-06, 'epoch': 75.0}


                                                   
 75%|███████▌  | 3000/4000 [49:22<13:44,  1.21it/s]

{'eval_loss': 0.31334421038627625, 'eval_accuracy': 0.935, 'eval_runtime': 4.7267, 'eval_samples_per_second': 42.313, 'eval_steps_per_second': 2.116, 'epoch': 75.0}


 76%|███████▌  | 3040/4000 [49:55<13:14,  1.21it/s]

{'loss': 0.0197, 'learning_rate': 1.2000000000000002e-06, 'epoch': 76.0}


                                                   
 76%|███████▌  | 3040/4000 [50:00<13:14,  1.21it/s]

{'eval_loss': 0.3141742944717407, 'eval_accuracy': 0.935, 'eval_runtime': 4.7923, 'eval_samples_per_second': 41.733, 'eval_steps_per_second': 2.087, 'epoch': 76.0}


 77%|███████▋  | 3080/4000 [50:33<12:41,  1.21it/s]

{'loss': 0.0121, 'learning_rate': 1.1500000000000002e-06, 'epoch': 77.0}


                                                   
 77%|███████▋  | 3080/4000 [50:38<12:41,  1.21it/s]

{'eval_loss': 0.314512699842453, 'eval_accuracy': 0.935, 'eval_runtime': 4.6988, 'eval_samples_per_second': 42.564, 'eval_steps_per_second': 2.128, 'epoch': 77.0}


 78%|███████▊  | 3120/4000 [51:11<13:05,  1.12it/s]

{'loss': 0.0049, 'learning_rate': 1.1e-06, 'epoch': 78.0}


                                                   
 78%|███████▊  | 3120/4000 [51:16<13:05,  1.12it/s]

{'eval_loss': 0.31383946537971497, 'eval_accuracy': 0.94, 'eval_runtime': 5.0046, 'eval_samples_per_second': 39.963, 'eval_steps_per_second': 1.998, 'epoch': 78.0}


 79%|███████▉  | 3160/4000 [51:52<12:13,  1.15it/s]

{'loss': 0.0151, 'learning_rate': 1.0500000000000001e-06, 'epoch': 79.0}


                                                   
 79%|███████▉  | 3160/4000 [51:57<12:13,  1.15it/s]

{'eval_loss': 0.31480470299720764, 'eval_accuracy': 0.94, 'eval_runtime': 4.9041, 'eval_samples_per_second': 40.782, 'eval_steps_per_second': 2.039, 'epoch': 79.0}


 80%|████████  | 3200/4000 [52:32<11:39,  1.14it/s]

{'loss': 0.0099, 'learning_rate': 1.0000000000000002e-06, 'epoch': 80.0}


                                                   
 80%|████████  | 3200/4000 [52:37<11:39,  1.14it/s]

{'eval_loss': 0.31550997495651245, 'eval_accuracy': 0.935, 'eval_runtime': 4.9724, 'eval_samples_per_second': 40.222, 'eval_steps_per_second': 2.011, 'epoch': 80.0}


 81%|████████  | 3240/4000 [53:12<11:11,  1.13it/s]

{'loss': 0.0135, 'learning_rate': 9.500000000000001e-07, 'epoch': 81.0}


                                                   
 81%|████████  | 3240/4000 [53:17<11:11,  1.13it/s]

{'eval_loss': 0.315573513507843, 'eval_accuracy': 0.935, 'eval_runtime': 4.8956, 'eval_samples_per_second': 40.853, 'eval_steps_per_second': 2.043, 'epoch': 81.0}


 82%|████████▏ | 3280/4000 [53:52<10:29,  1.14it/s]

{'loss': 0.0126, 'learning_rate': 9.000000000000001e-07, 'epoch': 82.0}


                                                   
 82%|████████▏ | 3280/4000 [53:57<10:29,  1.14it/s]

{'eval_loss': 0.31793275475502014, 'eval_accuracy': 0.935, 'eval_runtime': 5.053, 'eval_samples_per_second': 39.58, 'eval_steps_per_second': 1.979, 'epoch': 82.0}


 83%|████████▎ | 3320/4000 [54:32<09:57,  1.14it/s]

{'loss': 0.0098, 'learning_rate': 8.500000000000001e-07, 'epoch': 83.0}


                                                   
 83%|████████▎ | 3320/4000 [54:37<09:57,  1.14it/s]

{'eval_loss': 0.3168121576309204, 'eval_accuracy': 0.935, 'eval_runtime': 4.9892, 'eval_samples_per_second': 40.086, 'eval_steps_per_second': 2.004, 'epoch': 83.0}


 84%|████████▍ | 3360/4000 [55:12<09:23,  1.14it/s]

{'loss': 0.0078, 'learning_rate': 8.000000000000001e-07, 'epoch': 84.0}


                                                   
 84%|████████▍ | 3360/4000 [55:17<09:23,  1.14it/s]

{'eval_loss': 0.31739187240600586, 'eval_accuracy': 0.935, 'eval_runtime': 4.9574, 'eval_samples_per_second': 40.344, 'eval_steps_per_second': 2.017, 'epoch': 84.0}


 85%|████████▌ | 3400/4000 [55:52<08:31,  1.17it/s]

{'loss': 0.0158, 'learning_rate': 7.5e-07, 'epoch': 85.0}


                                                   
 85%|████████▌ | 3400/4000 [55:57<08:31,  1.17it/s]

{'eval_loss': 0.31738045811653137, 'eval_accuracy': 0.93, 'eval_runtime': 4.9012, 'eval_samples_per_second': 40.807, 'eval_steps_per_second': 2.04, 'epoch': 85.0}


 86%|████████▌ | 3440/4000 [56:31<08:07,  1.15it/s]

{'loss': 0.0073, 'learning_rate': 7.000000000000001e-07, 'epoch': 86.0}


                                                   
 86%|████████▌ | 3440/4000 [56:36<08:07,  1.15it/s]

{'eval_loss': 0.3169505298137665, 'eval_accuracy': 0.93, 'eval_runtime': 4.9676, 'eval_samples_per_second': 40.261, 'eval_steps_per_second': 2.013, 'epoch': 86.0}


 87%|████████▋ | 3480/4000 [57:10<07:25,  1.17it/s]

{'loss': 0.0149, 'learning_rate': 6.5e-07, 'epoch': 87.0}


                                                   
 87%|████████▋ | 3480/4000 [57:15<07:25,  1.17it/s]

{'eval_loss': 0.31678733229637146, 'eval_accuracy': 0.93, 'eval_runtime': 4.8041, 'eval_samples_per_second': 41.631, 'eval_steps_per_second': 2.082, 'epoch': 87.0}


 88%|████████▊ | 3520/4000 [57:49<06:34,  1.22it/s]

{'loss': 0.0146, 'learning_rate': 6.000000000000001e-07, 'epoch': 88.0}


                                                   
 88%|████████▊ | 3520/4000 [57:54<06:34,  1.22it/s]

{'eval_loss': 0.3162716031074524, 'eval_accuracy': 0.935, 'eval_runtime': 4.7229, 'eval_samples_per_second': 42.347, 'eval_steps_per_second': 2.117, 'epoch': 88.0}


 89%|████████▉ | 3560/4000 [58:27<06:02,  1.21it/s]

{'loss': 0.0175, 'learning_rate': 5.5e-07, 'epoch': 89.0}


                                                   
 89%|████████▉ | 3560/4000 [58:32<06:02,  1.21it/s]

{'eval_loss': 0.316430926322937, 'eval_accuracy': 0.93, 'eval_runtime': 4.6799, 'eval_samples_per_second': 42.736, 'eval_steps_per_second': 2.137, 'epoch': 89.0}


 90%|█████████ | 3600/4000 [59:05<05:29,  1.21it/s]

{'loss': 0.0101, 'learning_rate': 5.000000000000001e-07, 'epoch': 90.0}


                                                   
 90%|█████████ | 3600/4000 [59:09<05:29,  1.21it/s]

{'eval_loss': 0.31695979833602905, 'eval_accuracy': 0.93, 'eval_runtime': 4.7578, 'eval_samples_per_second': 42.036, 'eval_steps_per_second': 2.102, 'epoch': 90.0}


 91%|█████████ | 3640/4000 [59:42<04:56,  1.21it/s]

{'loss': 0.0145, 'learning_rate': 4.5000000000000003e-07, 'epoch': 91.0}


                                                   
 91%|█████████ | 3640/4000 [59:47<04:56,  1.21it/s]

{'eval_loss': 0.3172246515750885, 'eval_accuracy': 0.93, 'eval_runtime': 4.7036, 'eval_samples_per_second': 42.521, 'eval_steps_per_second': 2.126, 'epoch': 91.0}


 92%|█████████▏| 3680/4000 [1:00:20<04:23,  1.22it/s]

{'loss': 0.008, 'learning_rate': 4.0000000000000003e-07, 'epoch': 92.0}


                                                     
 92%|█████████▏| 3680/4000 [1:00:25<04:23,  1.22it/s]

{'eval_loss': 0.31743094325065613, 'eval_accuracy': 0.93, 'eval_runtime': 4.7308, 'eval_samples_per_second': 42.276, 'eval_steps_per_second': 2.114, 'epoch': 92.0}


 93%|█████████▎| 3720/4000 [1:00:58<03:50,  1.22it/s]

{'loss': 0.01, 'learning_rate': 3.5000000000000004e-07, 'epoch': 93.0}


                                                     
 93%|█████████▎| 3720/4000 [1:01:02<03:50,  1.22it/s]

{'eval_loss': 0.3178892433643341, 'eval_accuracy': 0.93, 'eval_runtime': 4.7401, 'eval_samples_per_second': 42.194, 'eval_steps_per_second': 2.11, 'epoch': 93.0}


 94%|█████████▍| 3760/4000 [1:01:35<03:17,  1.22it/s]

{'loss': 0.0066, 'learning_rate': 3.0000000000000004e-07, 'epoch': 94.0}


                                                     
 94%|█████████▍| 3760/4000 [1:01:40<03:17,  1.22it/s]

{'eval_loss': 0.3190682530403137, 'eval_accuracy': 0.93, 'eval_runtime': 4.7576, 'eval_samples_per_second': 42.038, 'eval_steps_per_second': 2.102, 'epoch': 94.0}


 95%|█████████▌| 3800/4000 [1:02:13<02:46,  1.20it/s]

{'loss': 0.0114, 'learning_rate': 2.5000000000000004e-07, 'epoch': 95.0}


                                                     
 95%|█████████▌| 3800/4000 [1:02:18<02:46,  1.20it/s]

{'eval_loss': 0.3185957074165344, 'eval_accuracy': 0.93, 'eval_runtime': 4.7825, 'eval_samples_per_second': 41.819, 'eval_steps_per_second': 2.091, 'epoch': 95.0}


 96%|█████████▌| 3840/4000 [1:02:52<02:19,  1.15it/s]

{'loss': 0.0138, 'learning_rate': 2.0000000000000002e-07, 'epoch': 96.0}


                                                     
 96%|█████████▌| 3840/4000 [1:02:57<02:19,  1.15it/s]

{'eval_loss': 0.31887704133987427, 'eval_accuracy': 0.93, 'eval_runtime': 4.9301, 'eval_samples_per_second': 40.567, 'eval_steps_per_second': 2.028, 'epoch': 96.0}


 97%|█████████▋| 3880/4000 [1:03:32<01:44,  1.15it/s]

{'loss': 0.0108, 'learning_rate': 1.5000000000000002e-07, 'epoch': 97.0}


                                                     
 97%|█████████▋| 3880/4000 [1:03:37<01:44,  1.15it/s]

{'eval_loss': 0.31865376234054565, 'eval_accuracy': 0.93, 'eval_runtime': 4.9697, 'eval_samples_per_second': 40.244, 'eval_steps_per_second': 2.012, 'epoch': 97.0}


 98%|█████████▊| 3920/4000 [1:04:12<01:09,  1.15it/s]

{'loss': 0.0216, 'learning_rate': 1.0000000000000001e-07, 'epoch': 98.0}


                                                     
 98%|█████████▊| 3920/4000 [1:04:17<01:09,  1.15it/s]

{'eval_loss': 0.31878209114074707, 'eval_accuracy': 0.93, 'eval_runtime': 5.7345, 'eval_samples_per_second': 34.877, 'eval_steps_per_second': 1.744, 'epoch': 98.0}


 99%|█████████▉| 3960/4000 [1:04:51<00:33,  1.19it/s]

{'loss': 0.0088, 'learning_rate': 5.0000000000000004e-08, 'epoch': 99.0}


                                                     
 99%|█████████▉| 3960/4000 [1:04:56<00:33,  1.19it/s]

{'eval_loss': 0.31869953870773315, 'eval_accuracy': 0.93, 'eval_runtime': 4.7368, 'eval_samples_per_second': 42.222, 'eval_steps_per_second': 2.111, 'epoch': 99.0}


100%|██████████| 4000/4000 [1:05:30<00:00,  1.22it/s]

{'loss': 0.0117, 'learning_rate': 0.0, 'epoch': 100.0}


                                                     
100%|██████████| 4000/4000 [1:05:35<00:00,  1.02it/s]

{'eval_loss': 0.31876227259635925, 'eval_accuracy': 0.93, 'eval_runtime': 4.7831, 'eval_samples_per_second': 41.814, 'eval_steps_per_second': 2.091, 'epoch': 100.0}
{'train_runtime': 3935.3272, 'train_samples_per_second': 20.329, 'train_steps_per_second': 1.016, 'train_loss': 0.10648007635772229, 'epoch': 100.0}





TrainOutput(global_step=4000, training_loss=0.10648007635772229, metrics={'train_runtime': 3935.3272, 'train_samples_per_second': 20.329, 'train_steps_per_second': 1.016, 'train_loss': 0.10648007635772229, 'epoch': 100.0})

In [None]:
# Trainer evaluate
trainer.evaluate(dataset_test)

100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


{'eval_loss': 0.31876227259635925,
 'eval_accuracy': 0.93,
 'eval_runtime': 5.0785,
 'eval_samples_per_second': 39.382,
 'eval_steps_per_second': 1.969,
 'epoch': 100.0}

In [None]:
from scipy.special import softmax
y_test_predict = trainer.predict(dataset_test)
y_test_logits = y_test_predict.predictions
y_test_probabilities = softmax(y_test_logits, axis=1)
y_test_pred_labels = np.argmax(y_test_probabilities, axis=1)
y_test_actual_labels = y_test_predict.label_ids
metric_f1 = evaluate.load("f1")
f1_score = metric_f1.compute(predictions=y_test_pred_labels, references=y_test_actual_labels, average='micro')
metric_recall = evaluate.load("recall")
recall_score = metric_recall.compute(predictions=y_test_pred_labels, references=y_test_actual_labels, average='micro')

print(f"""
        Recall Score ----> {recall_score.values()}
        F1 Score --------> {f1_score.values()}

    """)

100%|██████████| 10/10 [00:04<00:00,  2.15it/s]



        Recall Score ----> dict_values([0.93])
        F1 Score --------> dict_values([0.93])

    


In [None]:
folder_name = "analysis"
tokenizer.save_pretrained('./'+folder_name+'/')
trainer.save_model('./'+folder_name+'/')

### Get Prediction

In [None]:
folder_name = "analysis"
tokenizer = AutoTokenizer.from_pretrained('./'+folder_name+'/')
loaded_model = AutoModelForSequenceClassification.from_pretrained('./'+folder_name+'/')

In [None]:
input_text = "This is good. I can afford it"

# Tokenize input text
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=512)

# Make predictions
with torch.no_grad():
    outputs = loaded_model(**inputs)
    predicted_class = torch.argmax(outputs.logits, dim=1).item()
# Interpret the prediction
sentiment_classes = classes
predicted_sentiment = sentiment_classes[predicted_class]
print(f"The predicted sentiment of the text is: {predicted_sentiment}")

The predicted sentiment of the text is: Positive
