In [2]:
from src.load_preprocess_dataset import load_tokenize_data
from src.evaluation_metric import compute_metrics
from src.foundation_model_evaluation import foundation_model_eval
from src.finetune_lora import lora_model_tune_eval
from src.finetuned_infer import predict_emotion

def main():
    #configuration
    dataset_name = "emotion"
    model_name = "gpt2"
    num_labels = 6
    text = "im feeling quite sad and sorry for myself but ill snap out of it soon"
    model_checkpoint = "./models/fine_tuned/checkpoint-4000"

    #tokenized data
    tokenized_datset = load_tokenize_data(dataset_name, model_name)
    print(tokenized_datset)
    eval_dataset = tokenized_datset["validation"]

    #evaluate foundation model
    results = foundation_model_eval(model_name,eval_dataset, num_labels, compute_metrics)
    print("FOUNDATION model metrics", results)

    #fine-tune foundation model using lora and evaluate fine-tuned model
    results_tuned = lora_model_tune_eval(model_name, tokenized_datset, num_labels, compute_metrics)
    print("TUNED model metrics", results_tuned)

    #predict emotion
    predicted_class = predict_emotion(text, model_checkpoint)
    print("Predicted emotion class:", predicted_class)

if __name__ == "__main__":
    main()

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
})


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


  0%|          | 0/32 [00:00<?, ?it/s]

FOUNDATION model metrics {'eval_loss': 9.079292297363281, 'eval_accuracy': 0.0885, 'eval_runtime': 774.5768, 'eval_samples_per_second': 2.582, 'eval_steps_per_second': 0.041}


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5
  },
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "te

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


  0%|          | 0/4000 [00:00<?, ?it/s]

{'loss': 1.1747, 'grad_norm': 17.788244247436523, 'learning_rate': 0.00175, 'epoch': 0.12}
{'loss': 0.7962, 'grad_norm': 1.895721197128296, 'learning_rate': 0.0015, 'epoch': 0.25}
{'loss': 0.7043, 'grad_norm': 3.2256157398223877, 'learning_rate': 0.00125, 'epoch': 0.38}
{'loss': 0.6261, 'grad_norm': 14.52248477935791, 'learning_rate': 0.001, 'epoch': 0.5}
{'loss': 0.6141, 'grad_norm': 4.336002349853516, 'learning_rate': 0.00075, 'epoch': 0.62}
{'loss': 0.4759, 'grad_norm': 34.62288284301758, 'learning_rate': 0.0005, 'epoch': 0.75}
{'loss': 0.4327, 'grad_norm': 13.375720024108887, 'learning_rate': 0.00025, 'epoch': 0.88}
{'loss': 0.3659, 'grad_norm': 3.0427939891815186, 'learning_rate': 0.0, 'epoch': 1.0}


  0%|          | 0/500 [00:00<?, ?it/s]

{'eval_loss': 0.3134929835796356, 'eval_accuracy': 0.9225, 'eval_runtime': 121.8424, 'eval_samples_per_second': 16.415, 'eval_steps_per_second': 4.104, 'epoch': 1.0}
{'train_runtime': 2535.6444, 'train_samples_per_second': 6.31, 'train_steps_per_second': 1.578, 'train_loss': 0.6487208061218261, 'epoch': 1.0}


  0%|          | 0/500 [00:00<?, ?it/s]

TUNED model metrics {'eval_loss': 0.3134929835796356, 'eval_accuracy': 0.9225, 'eval_runtime': 122.4533, 'eval_samples_per_second': 16.333, 'eval_steps_per_second': 4.083, 'epoch': 1.0}


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicted emotion class: 0
