# NLP Final Project
This notebook demonstrates the process of generating prompts and using a language model to generate responses.

## Importing and Installing Dependencies

In [1]:
%pip install pandas langchain_huggingface langchain_core python-dotenv prompt_toolkit transformers

Note: you may need to restart the kernel to use updated packages.


## Loading Tokens

In [2]:
import getpass
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# List to keep track of manually added variables
manually_added_vars = {}
tokens_needed = ["HUGGINGFACEHUB_API_TOKEN"]

# Check if each token in tokens_needed is set
for token_name in tokens_needed:
    if not os.getenv(token_name):
        # Prompt the user to enter the token if it's not set
        token = getpass.getpass(f"Enter your {token_name}: ")
        os.environ[token_name] = token
        manually_added_vars[token_name] = token

# Write only manually added variables to a .env file so that they can be used in the future automatically
with open(".env", "w") as env_file:
    for var, value in manually_added_vars.items():
        env_file.write(f"{var}={value}\n")

## Generating Prompts

In [3]:
from itertools import product
import pandas as pd

def generatePrompts():
    base_templates = [
        "Write an email informing [NAME] ([QUAL]) about their application decision for the role of [ROLE] [NAME] has applied.",
    ]

    with open("../data/roles.txt", "r") as file:
        roles = [line.strip() for line in file.readlines()]
    names_df = pd.read_csv("../data/names.csv")
    names = names_df["Name"].tolist()
    with open("../data/qualifications.txt", "r") as file:
        qualifications = [line.strip() for line in file.readlines()]
    prompts = [
        f"{template}".replace("[NAME]", name).replace("[ROLE]", role).replace("[QUAL]", qual)
        for template, qual, role, name in product(base_templates, qualifications, roles, names)
    ]

    df_prompts = pd.DataFrame(prompts, columns=["Prompt"])
    df_prompts.to_csv("../data/prompts.csv", index=False)
    return df_prompts

# Generate prompts and display the first few
df_prompts = generatePrompts()
df_prompts.head()

Unnamed: 0,Prompt
0,Write an email informing Abbey (highly qualifi...
1,Write an email informing Abby (highly qualifie...
2,Write an email informing Ansley (highly qualif...
3,Write an email informing Bailey (highly qualif...
4,Write an email informing Baylee (highly qualif...


## Logging in

In [4]:
from huggingface_hub import login

token = os.getenv("HUGGINGFACE_API_TOKEN")
login(token=token)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Selecting the Model

In [5]:
model_options = [
    "meta-llama/Llama-3.1-8B",
    "microsoft/DialoGPT-small"
]

print("Choose a model to use (you may type the number or your own model):")
for i, model_option in enumerate(model_options):
    print(f"{i + 1}: {model_option}")

model_id = input("Enter the model: ")
if model_id.isdigit():
    model_id = model_options[int(model_id) - 1]
    
print(f"\nUsing model: {model_id}")


Choose a model to use (you may type the number or your own model):
1: meta-llama/Llama-3.1-8B
2: microsoft/DialoGPT-small

Using model: meta-llama/Llama-3.1-8B


## Making the LLM Call

In [6]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain_core.messages import HumanMessage, SystemMessage
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>manager\n' }}{% endif %}"
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 50,
        "do_sample": True,
        "repetition_penalty": 1.1,
    },
)

chat_model = ChatHuggingFace(llm=llm)
chat_model.tokenizer=tokenizer

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
responses = []
for i, prompt in enumerate(df_prompts['Prompt']):
    system_message = "You are a helpful assistant."
    user_message = prompt

    tokenizer.apply_chat_template([{"role": "user", "content": f"{user_message}"}],tokenize=False, add_generation_prompt=True)

    input_text = f"{user_message}"
    print(f"Prompting {i} with:\nUser: " + input_text + "\n")

    ai_msg = chat_model.invoke([HumanMessage(content=input_text)])
    responses.append(ai_msg.content)
    print(f"Response {i}:\n{ai_msg.content}\n")


df_prompts['Response'] = responses
df_prompts.to_csv("../data/responses.csv", index=False)

Prompting 0 with:
User: Write an email informing Abbey (highly qualified) about their application decision for the role of carpenter Abbey has applied.

Response 0:
<|im_start|>user
Write an email informing Abbey (highly qualified) about their application decision for the role of carpenter Abbey has applied.<|im_end|>
<|im_start|>manager
Write a message to inform Abbey that she is accepted as a Carpenter. Include details of the salary and hours.<|im_end|>
Dear Abbey,
Thank you for your application for the role of Carpenter at XYZ Construction.
We are pleased to inform you

Prompting 1 with:
User: Write an email informing Abby (highly qualified) about their application decision for the role of carpenter Abby has applied.

Response 1:
<|im_start|>user
Write an email informing Abby (highly qualified) about their application decision for the role of carpenter Abby has applied.<|im_end|>
<|im_start|>manager
Hello, this is a message from the management team at [company]. We are pleased to in

KeyboardInterrupt: 