# GPT-2 LoRA Fine-Tuned Model Inference Demo

This notebook allows to interactively test the fine-tuned GPT-2 model.
Type in a misinformation text, and the model will generate a corrected response.


In [1]:
!pip install transformers torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch



## Load model and tokenizer

In [4]:
from google.colab import drive
drive.mount('/content/drive')
model_path = "/content/drive/MyDrive/gpt2_finetuned"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): lora.Linear(
            (base_layer): Conv1D(nf=3072, nx=1024)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=1024, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=3072, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inpl

## Input and Inference

In [11]:
def generate_response(input_text, max_new_tokens=128):
    prompt = f"Misinformation: {input_text}\nCorrection:"
    encoded = tokenizer(prompt, return_tensors="pt").to(device)

    output = model.generate(
        **encoded,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
    )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
    corrected = decoded.replace(prompt, "").split("Correction:")[0].strip()
    return corrected


#demo loop
print("Vaccine Misinformation Correction (type 'quit' to exit)")
while True:
    user_input = input("\nEnter misinformation text: ")
    if user_input.lower() == "quit":
        break
    corrected_text = generate_response(user_input)
    print("Corrected Response:", corrected_text)

Vaccine Misinformation Correction Demo (type 'quit' to exit)

Enter misinformation text: COVID-19 vaccines alter your DNA and can cause infertility.
Corrected Response: COVID-19 vaccines do not alter your DNA and are safe.

Enter misinformation text: Vaccines contain microchips.
Corrected Response: Vaccines do not contain microchips.

Enter misinformation text: quit
