In [7]:
import torch
import re
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, TrainingArguments
from peft import PeftModel, PeftConfig, LoraConfig
from trl import SFTTrainer
from datasets import load_dataset
import matplotlib.pyplot as plt
from tqdm import tqdm

def generate_text(model, tokenizer, prompt, device, max_length=300):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1, do_sample=True, temperature=0.7)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def analyze_output(text):
    word_count = len(text.split())
    sentence_count = len(re.findall(r'\w+[.!?]', text))
    unique_words = len(set(text.lower().split()))
    return f"Word count: {word_count}, Sentence count: {sentence_count}, Unique words: {unique_words}"

def compare_outputs(before, after, question):
    print(f"\nQuestion: {question}")
    print("\nBefore fine-tuning:")
    print(before.strip())
    print(analyze_output(before))
    
    print("\nAfter fine-tuning:")
    print(after.strip())
    print(analyze_output(after))
    print("-" * 80)

def evaluate_perplexity(model, tokenizer, dataset, device, num_samples=100):
    model.eval()
    total_loss = 0
    for i, sample in enumerate(tqdm(dataset)):
        if i >= num_samples:
            break
        inputs = tokenizer(sample['text'], return_tensors='pt', truncation=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs, labels=inputs['input_ids'])
        total_loss += outputs.loss.item()
    return torch.exp(torch.tensor(total_loss / num_samples))

# Load models and data
model_name = "gpt2"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_before = GPT2LMHeadModel.from_pretrained(model_name).to(device)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

dataset = load_dataset("aboonaji/wiki_medical_terms_llam2_format", split="train")
dataset = dataset.train_test_split(test_size=0.1)

# Evaluate before fine-tuning
print("Evaluating before fine-tuning...")
perplexity_before = evaluate_perplexity(model_before, tokenizer, dataset['test'], device)

# Fine-tuning setup and training
peft_config = LoraConfig(task_type="CAUSAL_LM", r=8, lora_alpha=32, lora_dropout=0.1)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    save_steps=100,
    save_total_limit=2,
    evaluation_strategy="steps",
    eval_steps=100,
    logging_steps=10,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
)

trainer = SFTTrainer(
    model=model_before,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    tokenizer=tokenizer,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
)

print("Training the model...")
trainer.train()

# Save and load the fine-tuned model
output_dir = "./results/fine_tuned_model"
trainer.model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")

config = PeftConfig.from_pretrained(output_dir)
model_after = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path).to(device)
model_after = PeftModel.from_pretrained(model_after, output_dir)
model_after = model_after.merge_and_unload()

# Evaluate after fine-tuning
print("Evaluating after fine-tuning...")
perplexity_after = evaluate_perplexity(model_after, tokenizer, dataset['test'], device)

# Questions based on the provided topics
questions = [
    "What are the symptoms and treatment for paracetamol poisoning?",
    "Explain the causes and symptoms of Congenital adrenal hyperplasia.",
    "Describe the transmission, symptoms, and treatment of Anthrax.",
    "What is Cachexia, and how does it affect the body?",
    "Explain the causes, symptoms, and treatment of Botulism."
]

# Generate and compare outputs
for question in questions:
    before_output = generate_text(model_before, tokenizer, question, device)
    after_output = generate_text(model_after, tokenizer, question, device)
    compare_outputs(before_output, after_output, question)

# Print perplexity results
print(f"\nPerplexity before fine-tuning: {perplexity_before:.2f}")
print(f"Perplexity after fine-tuning: {perplexity_after:.2f}")

# Plot perplexity comparison
plt.figure(figsize=(10, 6))
plt.bar(['Before Fine-tuning', 'After Fine-tuning'], [perplexity_before, perplexity_after])
plt.title('Perplexity Comparison')
plt.ylabel('Perplexity')
plt.savefig('perplexity_comparison_gpt2.png')
plt.close()

# Plot training loss
plt.figure(figsize=(10, 6))
training_loss = [log['loss'] for log in trainer.state.log_history if 'loss' in log]
plt.plot(training_loss)
plt.title('Training Loss')
plt.xlabel('Step')
plt.ylabel('Loss')
plt.savefig('training_loss_gpt2.png')
plt.close()

print("Evaluation complete. Perplexity comparison, training loss plots, and comprehensive textual comparisons have been generated.")

Evaluating before fine-tuning...


 15%|█▍        | 100/687 [00:27<02:41,  3.64it/s]


Map:   0%|          | 0/6174 [00:00<?, ? examples/s]

Map:   0%|          | 0/687 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Training the model...


  new_forward = torch.cuda.amp.autocast(dtype=torch.float16)(model_forward_func)


Step,Training Loss,Validation Loss
100,2.7867,2.526371
200,2.1249,2.027913
300,2.0579,1.963163


Model saved to ./results/fine_tuned_model


  adapters_weights = torch.load(


Evaluating after fine-tuning...


 15%|█▍        | 100/687 [00:44<04:18,  2.27it/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Question: What are the symptoms and treatment for paracetamol poisoning?

Before fine-tuning:
What are the symptoms and treatment for paracetamol poisoning?

Paracetamol poisoning is a rare, but common, and highly dangerous situation. The illness can cause severe pain, nausea, vomiting, and other gastrointestinal symptoms, and may be painful or unpleasant. Treatment should include, but not be limited to, complete recovery from illness and help maintain a healthy weight.

How does paracetamol poisoning affect the fetus?

Paracetamol poisoning is a rare and very severe form of paracetamol poisoning, known as paracetamol poisoning. It is a rare and highly dangerous form of paracetamol poisoning.

Paracetamol poisoning is a common, but highly dangerous condition in which the fetus is poisoned by the toxicity of paracetamol. It is the second most common form of paracetamol poisoning.

What is paracetamol poisoning and how is it treated?

Paracetamol poisoning is a rare, but extremely serio

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Question: Explain the causes and symptoms of Congenital adrenal hyperplasia.

Before fine-tuning:
Explain the causes and symptoms of Congenital adrenal hyperplasia.

Anesthesia is a general term for the use of a non-invasive, non-intrusive, and non-invasive (e.g., non-invasive, non-intrusive) explanations of symptoms. The term "congenital hyperplasia" is used to describe a problem that arises with the use of a non-invasive, non-intrusive, and non-intrusive approach.

Anesthesia is often used as a general description of symptoms of clinical syndrome, or "congenital hyperplasia," in particular because it is the only commonly used term.

Common causes of Congenital Hyperplasia

Congenital hyperplasia is a condition that occurs when a person lacks a clear and coherent clear and distinct cause.

Symptoms
Congenital hyperplasia occurs when a person lacks a clear and coherent understanding of the causes of symptoms or causes of them.

A complete diagnosis of Congenital Hyperplasia is require

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Question: Describe the transmission, symptoms, and treatment of Anthrax.

Before fine-tuning:
Describe the transmission, symptoms, and treatment of Anthrax. Please consult an experienced health professional before using any product. Always ensure that the product is safe to drink. Always consult with one hand and read, and non-helpful in nature. The responses shown in a laboratory setting are not intended to improve anything. Always consult your health professional if a question does not make sense. If a question does not make sense, suggest a response rather than explain what is not understood. If you are unsure about a question, explain why in a particular way. If you don't know the answer to a question, please do not share false information. <</report a question.>
Acute anthrax symptoms occur when a person has a specific symptom. Symptoms include a fever, headache, drowsiness, and sometimes severe itching. Symptoms can include fever, headache, rashes, or other symptoms. Acute anthr

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Question: What is Cachexia, and how does it affect the body?

Before fine-tuning:
What is Cachexia, and how does it affect the body?

Cachexia is one of three different disease-related syndromes. It is a disease in which a person has no disease of some other than dementia or Alzheimer's disease. The condition is usually a form of dementia that is caused by miscommunication, or miscommunication. The condition is characterized by a lack of normal functioning in the body. Some people (such as Alzheimer's disease) have severe cases of dementia and there is no cure for them. People with a case of cachexia who have mild to moderate to severe cases of dementia and are classified as having cachexia. For people who have symptoms of cachexia, the presence of cachexia is an indication. People with cachexia are more likely to develop chronic obstructive pulmonary disease and to have a history of heart failure. People with cachexia who develop chronic obstructive pulmonary disease are more likely 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Question: Explain the causes, symptoms, and treatment of Botulism.

Before fine-tuning:
Explain the causes, symptoms, and treatment of Botulism.

A comprehensive index of symptoms and treatments are available from the Botulism Association.
Word count: 21, Sentence count: 2, Unique words: 18

After fine-tuning:
Explain the causes, symptoms, and treatment of Botulism.

In the case of a natural botulism, the symptoms and treatment of Botulism are as follows:

The symptoms of Botulism include:

a low fever

a slight nausea

a slight vomiting

A rash or rash rash may be caused by a lack of oxygenation, which can occur in response to cold medications and also by the presence of a rash in the body.

When to Get A Botulism
Botulism can occur in any of three ways:

Exposure to contaminated water, contaminated food, or contaminated water that was contaminated by an infected animal
Widespread exposure to contaminated water, contaminated food, or contaminated water that is a known cause has been 