In [1]:
!pip install transformers

from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.eval()




GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [2]:
# Part 2 – Design Your Prompts + Generate 3 Outputs Each
def generate_outputs(prompt, max_length=50, num_return_sequences=3):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(
        input_ids,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        do_sample=True,
        top_k=50,
        top_p=0.95
    )
    return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

prompts = {
    "Direct": "Write a motivational quote about overcoming fear.",
    "Scenario": "Imagine you’re helping a friend who failed a test. Write something encouraging.",
    "Persona": "As a wise monk, write a quote about inner strength.",
    "Keyword": "Using the words 'growth', 'struggle', and 'hope', write something inspiring.",
    "Conversational": "User: I feel like giving up.\nGPT-2: Here's a quote for you:"
}

all_outputs = {}
for prompt_type, prompt in prompts.items():
    all_outputs[prompt_type] = generate_outputs(prompt)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask

In [3]:
# Part 3 – Human-Written Reference
reference = "Believe in yourself and all that you are. Know that there is something inside you that is greater than any obstacle."
reference_source = "https://www.keepinspiring.me/motivational-quotes/"

In [4]:
# Part 4 – Evaluate Outputs Using BERTScore
!pip install bert_score
from bert_score import score

results = []
for prompt_type, outputs in all_outputs.items():
    for idx, output in enumerate(outputs):
        P, R, F1 = score([output], [reference], lang="en", verbose=False)
        results.append((prompt_type, idx + 1, round(F1[0].item(), 4)))



Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You sho

In [5]:
# Part 5 – Results Table
import pandas as pd
df_results = pd.DataFrame(results, columns=["Prompt Type", "Output #", "BERTScore F1"])
print(df_results)

       Prompt Type  Output #  BERTScore F1
0           Direct         1        0.8451
1           Direct         2        0.8414
2           Direct         3        0.8510
3         Scenario         1        0.8459
4         Scenario         2        0.8358
5         Scenario         3        0.8537
6          Persona         1        0.8391
7          Persona         2        0.8411
8          Persona         3        0.8273
9          Keyword         1        0.8318
10         Keyword         2        0.8388
11         Keyword         3        0.8287
12  Conversational         1        0.8336
13  Conversational         2        0.8295
14  Conversational         3        0.8300


In [6]:
import pandas as pd

rows = []
for prompt_type, outputs in all_outputs.items():
    for i, output in enumerate(outputs, 1):
        rows.append({
            "Prompt Type": prompt_type,
            "Output #": i,
            "Generated Text": output
        })

df = pd.DataFrame(rows)
df.to_csv("gpt2_outputs.csv", index=False)
print("✅ Outputs saved to gpt2_outputs.csv")


✅ Outputs saved to gpt2_outputs.csv


In [7]:
import pandas as pd
from bert_score import score

reference = "Believe in yourself and all that you are. Know that there is something inside you that is greater than any obstacle."

prompts = {
    "Direct": "Write a motivational quote about overcoming fear.",
    "Scenario": "Imagine you’re helping a friend who failed a test. Write something encouraging.",
    "Persona": "As a wise monk, write a quote about inner strength.",
    "Keyword": "Using the words 'growth', 'struggle', and 'hope', write something inspiring.",
    "Conversational": "User: I feel like giving up.\nGPT-2: Here's a quote for you:"
}

rows = []
for prompt_type, prompt_text in prompts.items():
    outputs = all_outputs[prompt_type]
    for i, output in enumerate(outputs, 1):
        P, R, F1 = score([output], [reference], lang="en", verbose=False)
        rows.append({
            "Prompt Type": prompt_type,
            "Prompt Text": prompt_text,
            "Output #": i,
            "Generated Text": output,
            "BERTScore F1": round(F1[0].item(), 4)
        })

df = pd.DataFrame(rows)
df.to_csv("complete_gpt2_outputs.csv", index=False)
print("✅ Saved to complete_gpt2_outputs.csv")


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You sho

✅ Saved to complete_gpt2_outputs.csv
