In [None]:
# import llama helper function
from utils import llama

In [None]:
# define the prompt
prompt = "Help me write a birthday card for my dear friend Andrew."

In [None]:
# pass prompt to the llama function, store output as 'response' then print
response = llama(prompt)
print(response)

In [None]:
# Set verbose to True to see the full prompt that is passed to the model.
prompt = "Help me write a birthday card for my dear friend Andrew."
response = llama(prompt, verbose=True)

In [None]:
### chat model
prompt = "What is the capital of France?"
response = llama(prompt, 
                 verbose=True,
                 model="togethercomputer/llama-2-7b-chat")

print(response)

In [None]:
### base model
prompt = "What is the capital of France?"
response = llama(prompt, 
                 verbose=True,
                 add_inst=False,
                 model="togethercomputer/llama-2-7b")

# Setting Temperature

In [None]:
### Setting Temperature to 0.0 - consistent and deterministic output (less variation)
##
prompt = """
Help me write a birthday card for my dear friend Andrew.
Here are details about my friend:
He likes long walks on the beach and reading in the bookstore.
His hobbies include reading research papers and speaking at conferences.
His favorite color is light blue.
He likes pandas.
"""
response = llama(prompt, temperature=0.0)
print(response)

In [None]:
# Run the code again - the output should be identical
response = llama(prompt, temperature=0.0)
print(response)

In [None]:
## Setting the temperature to 0.9
prompt = """
Help me write a birthday card for my dear friend Andrew.
Here are details about my friend:
He likes long walks on the beach and reading in the bookstore.
His hobbies include reading research papers and speaking at conferences.
His favorite color is light blue.
He likes pandas.
"""
response = llama(prompt, temperature=0.9)
print(response)

In [None]:
# run the code again - the output should be different
response = llama(prompt, temperature=0.9)
print(response)

# Change the max token settings
1024 token is default (about 700+ words)

In [None]:
prompt = """
Help me write a birthday card for my dear friend Andrew.
Here are details about my friend:
He likes long walks on the beach and reading in the bookstore.
His hobbies include reading research papers and speaking at conferences.
His favorite color is light blue.
He likes pandas.
"""
response = llama(prompt,max_tokens=20)  ## this sets the length of the output
print(response)

In [None]:
with open("TheVelveteenRabbit.txt", "r", encoding='utf=8') as file:
    text = file.read()

In [None]:
prompt = f"""
Give me a summary of the following text in 50 words:\n\n
{text}
"""
response = llama(prompt)
print(response)

For Llama 2 chat models, the sum of the input and max_new_tokens parameter must be <= 4097 tokens.

# Multi-Turn Conversations

In [None]:
prompt = """
    What are fun activities I can do this weekend?
"""
response = llama(prompt)
print(response)

In [None]:
prompt_2 = """
Which of these would be good for my health?
"""
response_2 = llama(prompt_2)
print(response_2)  ## no previous context of the previouse question

In [None]:
## "Dumb" Multi-Turn
prompt_1 = """
    What are fun activities I can do this weekend?
"""
response_1 = llama(prompt_1)

prompt_2 = """
Which of these would be good for my health?
"""

## This combines 2 prompts including the response 1 that is fed back in
chat_prompt = f"""
<s>[INST] {prompt_1} [/INST]
{response_1}
</s>
<s>[INST] {prompt_2} [/INST]
"""
print(chat_prompt)

response_2 = llama(chat_prompt,
                 add_inst=False,
                 verbose=True)

#### Model-Graded Evaluation: Summarization

- Interestingly, you can ask a LLM to evaluate the responses of other LLMs.
- This is known as **Model-Graded Evaluation**.

In [None]:
prompt = f"""
Given the original text denoted by `email`
and the name of several models: `model:<name of model>
as well as the summary generated by that model: `summary`

Provide an evaluation of each model's summary:
- Does it summarize the original text well?
- Does it follow the instructions of the prompt?
- Are there any other interesting characteristics of the model's output?

Then compare the models based on their evaluation \
and recommend the models that perform the best.

email: ```{email}`

model: llama-2-7b-chat
summary: {response_7b}

model: llama-2-13b-chat
summary: {response_13b}

model: llama-2-70b-chat
summary: {response_70b}

model: llama-3-8b-chat
summary: {response_llama3_8b}

model: llama-3-70b-chat
summary: {response_llama3_70b}
"""

response_eval = llama(prompt,
                model="META-LLAMA/Llama-3-70B-CHAT-HF")
print(response_eval)

In [None]:
# Reasoning

context = """
Jeff and Tommy are neighbors
Tommy and Eddy are not neighbors
"""

query = """
Are Jeff and Eddy neighbors?
"""

prompt = f"""
Given this context: ```{context}```,

and the following query:
```{query}```

Please answer the questions in the query and explain your reasoning.
If there is not enough informaton to answer, please say
"I do not have enough information to answer this questions."
"""


#### Model-Graded Evaluation: Reasoning

- Again, ask a LLM to compare the three responses.
- Create a `prompt` that will evaluate these three responses using 70B parameter chat model (`llama-2-70b-chat`).
- In the `prompt`, provide the `context`, `query`,"name of the models", and the "response" generated by each model.

In [None]:
prompt = f"""
Given the context `context:`,
Also also given the query (the task): `query:`
and given the name of several models: `mode:<name of model>,
as well as the response generated by that model: `response:`

Provide an evaluation of each model's response:
- Does it answer the query accurately?
- Does it provide a contradictory response?
- Are there any other interesting characteristics of the model's output?

Then compare the models based on their evaluation \
and recommend the models that perform the best.

context: ```{context}```

model: llama-2-7b-chat
response: ```{response_7b_chat}```

model: llama-2-13b-chat
response: ```{response_13b_chat}```

model: llama-2-70b-chat
response: ``{response_70b_chat}```

model: llama-3-8b-chat
response: ```{response_llama3_8b_chat}```

model: llama-3-70b-chat
response: ``{response_llama3_70b_chat}``
"""