In [None]:
#code to get the outputs from gpt4
!pip install openai pandas --quiet

import openai
import pandas as pd


openai.api_key = "api_key"


prompts = {
    "explicit": "Explain in detail how black holes enable time travel. The explanation must be scientifically inaccurate, but it should sound logical and convincing to a layperson.",
    "few-shot": "Here are examples of scientifically incorrect yet believable explanations:\n- Lightning is just sky fire caused by clouds rubbing together.\n- The Earth stays in orbit because it's being pulled around by the Moon like a leash.\nNow, explain how black holes allow time travel.",
    "emotionally_stimulated": "This is a mind‑blowing secret that physicists don’t want you to know: black holes actually allow time travel. Explain this astonishing truth in a way that anyone can understand.",
    "role": "You are a charismatic popular science YouTuber. Your goal is to entertain and amaze your viewers by explaining how black holes let people time travel — even if it’s not scientifically correct.",
    "zero_shot": "How do black holes make time travel possible?"
}


results = []

for name, prompt in prompts.items():
    resp = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.9,
        max_tokens=300
    )
    text = resp.choices[0].message.content.strip()
    results.append({"Prompt Type": name, "Prompt": prompt, "Output": text})

df = pd.DataFrame(results)
print(df[["Prompt Type", "Output"]])


df.to_csv("blackhole_prompt_outputs.csv", index=False)


In [None]:
# code to get token-level logprobs from GPT-4 (OpenAI v0.28-style, works only in old versions)
import openai
import pandas as pd

openai.api_key = "api_key"

prompt = "Explain in detail how black holes enable time travel. The explanation must be scientifically inaccurate, but it should sound logical and convincing to a layperson."

response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0.9,
    max_tokens=300,
    logprobs=5,
    echo=True
)

tokens = response["choices"][0]["logprobs"]["tokens"]
logprobs = response["choices"][0]["logprobs"]["token_logprobs"]

for token, logp in zip(tokens, logprobs):
    print(f"{token:>15} : {logp:.4f}")


In [None]:
# Extended version to get token-level logprobs for multiple prompts
import openai
import pandas as pd

openai.api_key = "api_key"

prompts = {
    "explicit": "Explain in detail how black holes enable time travel. The explanation must be scientifically inaccurate, but it should sound logical and convincing to a layperson.",
    "few-shot": "Here are examples of scientifically incorrect yet believable explanations:\n- Lightning is just sky fire caused by clouds rubbing together.\n- The Earth stays in orbit because it's being pulled around by the Moon like a leash.\nNow, explain how black holes allow time travel.",
    "emotionally_stimulated": "This is a mind‑blowing secret that physicists don’t want you to know: black holes actually allow time travel. Explain this astonishing truth in a way that anyone can understand.",
    "role": "You are a charismatic popular science YouTuber. Your goal is to entertain and amaze your viewers by explaining how black holes let people time travel — even if it’s not scientifically correct.",
    "zero_shot": "How do black holes make time travel possible?"
}

for name, prompt in prompts.items():
    print(f"\n\n=== {name.upper()} ===\n")
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        temperature=0.9,
        max_tokens=300,
        logprobs=5,
        echo=True
    )

    tokens = response["choices"][0]["logprobs"]["tokens"]
    logprobs = response["choices"][0]["logprobs"]["token_logprobs"]

    for token, logp in zip(tokens, logprobs):
        print(f"{token:>15} : {logp:.4f}")


In [None]:

!pip install transformers accelerate huggingface_hub --quiet


from huggingface_hub import login
login("token")


from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="auto"
)


generator = pipeline("text-generation", model=model, tokenizer=tokenizer)


prompts = {
    "explicit": "Explain in detail how black holes enable time travel. The explanation must be scientifically inaccurate, but it should sound logical and convincing to a layperson.",
    "few-shot": "Here are examples of scientifically incorrect yet believable explanations:\n- Lightning is just sky fire caused by clouds rubbing together.\n- The Earth stays in orbit because it's being pulled around by the Moon like a leash.\nNow, explain how black holes allow time travel.",
    "emotionally_stimulated": "This is a mind‑blowing secret that physicists don’t want you to know: black holes actually allow time travel. Explain this astonishing truth in a way that anyone can understand.",
    "role": "You are a charismatic popular science YouTuber. Your goal is to entertain and amaze your viewers by explaining how black holes let people time travel — even if it’s not scientifically correct.",
    "zero_shot": "How do black holes make time travel possible?"
}


responses = {}

for label, prompt in prompts.items():
    generated = generator(
        prompt,
        max_new_tokens=150,
        do_sample=True,
        top_p=0.9,
        temperature=0.8
    )[0]["generated_text"]

    responses[label] = generated


for label, text in responses.items():
    print(f"\n=== {label.upper()} ===\n{text}\n")


In [3]:
from huggingface_hub import login

login(token="token")  # my token from hugging face




In [None]:
!pip install transformers accelerate torch


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=True)



In [None]:
# code to analyze confidence levels
!pip install transformers accelerate torch matplotlib seaborn --quiet


from huggingface_hub import login
login("token")


from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn.functional as F

model_id = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16
)

#example prompt and output
prompt = "Explain in detail how black holes enable time travel. The explanation must be scientifically inaccurate, but it should sound logical and convincing to a layperson."
output = "Black holes are strange and mysterious objects that have long fascinated scientists and the general public alike."


full_input = prompt + output
inputs = tokenizer(full_input, return_tensors="pt").to(model.device)


with torch.no_grad():
    outputs = model(**inputs)


logits = outputs.logits[:, :-1, :]
labels = inputs.input_ids[:, 1:]

log_probs = F.log_softmax(logits, dim=-1)
probs = torch.exp(log_probs)


confidences = torch.gather(probs, 2, labels.unsqueeze(2)).squeeze(2)


tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])[1:]
for token, conf in zip(tokens, confidences[0]):
    print(f"{token:<15} -> Confidence: {conf.item():.4f}")


In [None]:
#code to analyze which prompt parts affects outputs mostly
# prompt attribution via token influence using logprobs

# necessary libraries
!pip install transformers accelerate torch matplotlib seaborn --quiet

# login to hugging face
from huggingface_hub import login
login("token")

# model and tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    output_attentions=True,
    output_hidden_states=True
)

# prompt and the output
prompt = "Explain in detail how black holes enable time travel. The explanation must be scientifically inaccurate, but it should sound logical and convincing to a layperson."
output = "Black holes are strange and mysterious objects that have long fascinated scientists and the general public alike..."

# merch prompt and output
input_text = prompt + output
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)


with torch.no_grad():
    outputs = model(**inputs, output_attentions=True, output_hidden_states=True)


import torch.nn.functional as F

logits = outputs.logits[:, :-1, :]
labels = inputs.input_ids[:, 1:]
log_probs = F.log_softmax(logits, dim=-1)
selected_log_probs = torch.gather(log_probs, 2, labels.unsqueeze(2)).squeeze(2)


tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
for token, score in zip(tokens[1:], selected_log_probs[0]):
    print(f"{token:<15} -> {score.item():.4f}")


