In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
SKIP_QUANTIZATION_MODULES = []

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
import json

# Load the essays data
with open('essays.json', 'r') as file:
    essays = json.load(file)

# Function to get context text for a given president
def get_president_context(pres_name):
    # Convert the president's name to lowercase and extract the last name
    last_name = pres_name.split()[-1].lower()
    print(f"Looking for: {last_name}")  # Debug print to check the key
    
    # Check if the last name exists as a key in the essays data
    context = essays.get(last_name, None)
    
    if context is None:
        print(f"No data found for {last_name}")  # Debug if key is not found
        return ""
    
    # Construct the context text from various sections
    context_text = ""
    for key, value in context.items():
        context_text += f"{key.replace('-', ' ').title()}: {value}\n"
    
    return context_text

# Function to handle user input and retrieve context
def get_context_from_prompt(prompt):
    # Extract the full name from the prompt (assuming last two words are the president's name)
    pres_name = ' '.join(prompt.split()[-2:])
    
    # Retrieve and construct the context text
    context_text = get_president_context(pres_name)
    
    if context_text:
        print("Generated context for the president:\n")
        print(context_text)
    else:
        print("No context text generated.")

# Example usage
input_prompt = "Pretend to be Bill Clinton"
print(get_context_from_prompt("Pretend to be Bill Clinton"))


In [None]:
from datasets import load_dataset

# Define the string template for alpaca_prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN


# Load the dataset from the JSON file
dataset = load_dataset('json', data_files='merged_data.json', split='train')

# Print dataset columns for verification
print(dataset.column_names)



# Function to format the dataset
def formatting_prompts_func(examples):
    texts = []
    for i in range(len(examples['Speech'])):
        # Extract relevant fields from the dataset
        president_name = examples['PresidentName'][i].strip()
        speech_title = examples['SpeechTitle'][i]
        speech_content = examples['Speech'][i]

        # Generate the input prompt and retrieve context text
        input_prompt = f"Pretend to be {president_name}"
        #context_text = get_president_context(president_name)
        
        # Construct the full input text with context
        input_text = f"{input_prompt}. Write a lengthy speech on {speech_title}."

        # Create the instruction and output text
        instruction = "Generate text that simulates how a president would speak based on a given topic. Go based off their mannerisms, opinions, and common themes from context."
        output_text = f"{speech_content}"

        # Format the text into the prompt structure and add EOS token
        text = alpaca_prompt.format(instruction, input_text, output_text) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}

# Map the function to the dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

# Check a few entries to ensure mapping worked
for i in range(1):  # Change 5 to the number of samples you want to check
    print(dataset[i]['text'])


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
trainer_stats = trainer.train()

In [None]:
model.eval()  # Ensure the model is in evaluation mode


In [None]:
save_path = "fine_tuned_model"

# Save the model and its configuration
fine_tuned_model = model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


# Set the model to evaluation mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def calculate_perplexity(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True).to(device)
    input_ids = inputs.input_ids

    # Get the model's outputs
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Shift logits and input_ids for next-token prediction
    shift_logits = logits[..., :-1, :].contiguous()
    shift_labels = input_ids[..., 1:].contiguous()

    # Compute cross-entropy loss (negative log-likelihood)
    loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
    loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

    # Calculate perplexity
    mean_loss = loss.mean().item()
    perplexity = torch.exp(torch.tensor(mean_loss)).item()
    return perplexity

# Example text
text = "My fellow Americans, today I am directing the Department of State to suspend entry of all aliens who are members of the Seventh Day Adventist Church and who were born in Iran, except for those diplomats who are coming here on official business. I have taken this action because we have recently learned of a threat that members of that sect from Iran may be planning acts of violence against Americans. I must act now to protect the safety of our citizens. But I want to assure you that this suspension will be reviewed as conditions change. I have also directed the Secretary of State to suspend all visas for aliens from Iran who are seeking entry into the United States for business, tourist, or temporary visits. However, this suspension will not apply to persons who are traveling for medical treatment, or to meet family members. I have also directed the Secretary of State to suspend the issuance of visas to all Iranian students. These suspensions will be reviewed as conditions change. I am not happy about the prospect of these suspensions. But I must act now to protect the safety of our citizens. And I will review these suspensions as conditions change. Thank you. And God bless America."
perplexity = calculate_perplexity(text)
print(f"Perplexity: {perplexity}")


In [None]:
save_path = "fine_tuned_model"

# Save the model and its configuration
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

In [None]:
print(model.config)

In [None]:


from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer


model = AutoModelForCausalLM.from_pretrained(save_path)
tokenizer = AutoTokenizer.from_pretrained(save_path)

# If using PEFT (e.g., LoRA), load the adapter configuration and weights
adapter_path = "fine_tuned_model"  # Replace with your adapter model path
peft_config = PeftConfig.from_pretrained(adapter_path)
model = PeftModel.from_pretrained(model, adapter_path)

# Set the model to evaluation mode
model.eval()



In [None]:
from transformers import AutoModelForCausalLM

# Save the entire model including adapter weights
model.save_pretrained("fine_tuned_model")

In [None]:
from transformers import AutoModelForCausalLM

# Load the base model
base_model_path = "path_to_base_model"  # e.g., 'llama-model' or any pretrained model path
base_model = AutoModelForCausalLM.from_pretrained(base_model_path)

# Load the adapter
adapter_path = "opinionatedllamas"
peft_config = PeftConfig.from_pretrained(adapter_path)
model = PeftModel.from_pretrained(base_model, adapter_path)

# Combine the adapter with the base model to get the full model
# This operation may vary depending on the specific PEFT implementation you're using
full_model = model.merge_adapter()  # Check your library's documentation for specific methods

# Save the full model in the standard format
full_model.save_pretrained("path_to_save_full_model")


In [None]:
model = model.to("cuda")  # Ensure the model is on the GPU if using CUDA


### Testing

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Donald Trump. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 208)

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Barack Obama. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 208)

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Donald Trump. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 208)

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Bill Clinton. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 208)

In [None]:
from transformers import AutoTokenizer

# Load the tokenizer for your specific model
tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B")  # Replace "gpt2" with your model identifier



In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Donald Trump. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")



In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Barack Obama and use his voice. Tell me your thoughts on the public issue of immigration.",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")


In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Donald Trump. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")


In [None]:
from transformers import TextStreamer
from unsloth.models import FastLanguageModel
import dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM
model.config.max_position_embeddings = 2048  # or any appropriate value



def generate_president_opinion( president_name, topic):
        """
        Generates a response from the model as if it's written by a specified president on a given topic.
        """
        max_seq_length = model.config.max_position_embeddings 
        # Define the prompt template
        alpaca_prompt = (
            "Provide the response in first person as the president.\n"
            "Pretend to be {president_name}. Write a short paragraph on your opinions on {topic}."
        )

        print("Generating input tokens...")
        # Create the input text with dynamic substitution for the president and topic
        input_text = alpaca_prompt.format(president_name=president_name, topic=topic)
        inputs = tokenizer(
            [input_text],  # Provide formatted input text
            return_tensors="pt"
        ).to("cuda")
        print("Input tokens generated.")

        # Set up the text streamer for real-time generation
        text_streamer = TextStreamer(tokenizer)

        print("Generating output...")
        # Generate output using the model
        outputs = model.generate(
            **inputs,
            streamer=text_streamer,
            max_new_tokens=208,  # Adjust as needed
            do_sample=True,  # Enables sampling for varied outputs
            temperature=0.9,  # Adjust temperature for more creative responses
        )
        print("Output generation completed.")
        
        # Decode and print the generated text
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("Generated text:", generated_text)


In [None]:
# Example usage:
president_name = "Bill Clinton"
topic = "immigration"
generate_president_opinion("Bill Clinton", 'immigration')

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person as the president.",  # instruction
        "Pretend to be Ronald Reagan. Write a short paragraph on your opinions on immigration",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")

In [None]:
from transformers import TextStreamer
from unsloth.models import FastLanguageModel

# Function to format the prompt dynamically
def generate_president_opinion(president_name, topic):
    text_streamer = TextStreamer(tokenizer)

    # Wrap your model for inference
    global model  # Ensure model is accessible if already loaded elsewhere
    model = FastLanguageModel.for_inference(model)

    # Define the prompt
    prompt_template = (
        "Provide the response in first person as the president.\n"
        "Pretend to be {president_name}. Write a short paragraph on your opinions on {topic}."
    )

    # Format the instruction and input dynamically
    instruction = "Provide the response in first person as the president."
    input_text = f"Pretend to be {president_name}. Write a short paragraph on your opinions on {topic}."
    
    print("Generating input tokens...")
    inputs = tokenizer(
        [alpaca_prompt.format(instruction, input_text, "")],  # Blank output for generation
        return_tensors="pt"
    ).to("cuda")
    print("Input tokens generated.")

    print("Generating output...")
    outputs = model.generate(
        **inputs,
        streamer=text_streamer,
        max_new_tokens=512,
        do_sample=True,  # Enables sampling for more diverse outputs
        temperature=0.9,  # Adjust temperature for varied output
    )
    print("Output generation completed.")

# Set the president and topic
president_name = "Ronald Reagan"  # Change to desired president
topic = "immigration"  # Change to desired topic

# Call the function
generate_president_opinion(president_name, topic)


In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person.",  # instruction
        "Pretend to be Abraham Lincoln and give your thoughts on the Civil War",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person.",  # instruction
        "Pretend to be Joe Biden and give your thoughts on salad",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person.",  # instruction
        "Pretend to be Ronald Reagan and give your thoughts on movies",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)

from unsloth.models import FastLanguageModel

# Wrap your model for inference
model = FastLanguageModel.for_inference(model)


print("Generating input tokens...")
inputs = tokenizer(
    [alpaca_prompt.format(
        "Provide the repsonse in first person voice.",  # instruction
        "Pretend to be Donald Trump and give your thoughts on movies",  # input
        "",  # output - leave this blank for generation!
    )], return_tensors="pt"
).to("cuda")
print("Input tokens generated.")

print("Generating output...")
outputs = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=512,
    do_sample=True,  # Enables sampling for more diverse outputs
    temperature=0.9,  # Adjust temperature for varied output
)
print("Output generation completed.")

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
def generate_president_opinion(president_name, topic):
 
    
    # Create the prompt
    alpaca_prompt = (
        "Provide the response in first person as the president.\n"
        "Pretend to be {president_name}. Write a short paragraph on your opinions on {topic}."
    )
    input_text = alpaca_prompt.format(president_name=president_name, topic=topic)
    
    # Generate input tokens
    inputs = tokenizer([input_text], return_tensors="pt").to("cuda")
    
    # Generate the output
    outputs = model.generate(
        **inputs,
        streamer=text_streamer,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.9,
    )

    # Decode and return the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text


In [None]:
generate_president_opinion('Bill Clinton', 'economy')
