In [13]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [14]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager

In [15]:
model_path = "/Users/q616967/Workspace/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin"

In [16]:
n_gpu_layers = 1  # Change this value based on your model and your GPU VRAM pool.
    # 1 is enough for Metal installation
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    # defaults to 8

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_threads=8,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    temperature=0.0,
    max_tokens=-1,
    repeat_penalty=1,
    streaming=True,
    callback_manager=callback_manager,
    verbose=True,
)

llama.cpp: loading model from /Users/q616967/Workspace/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 7349.72 MB (+  400.00 MB per state)
llama_new_context_with_model: kv s

In [None]:
# previous
template = """Question: {question}

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

In [17]:
template = """
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: {question}"
"""

In [18]:
prompt = PromptTemplate(template=template, input_variables=["question"])

In [19]:
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)

In [25]:
question = """
Reading newspaper one of many ways to practice your what? 
Choices: -A: literacy, -B: knowing how to read, -C: money, -D: buying, -E: money bank
"""
question = "Where are  you likely to find a hamburger?\n\nChoices:\nA=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus"

llm_chain.run(question) # 1 input variable call



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: Where are  you likely to find a hamburger?

Choices:
A=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus"
[0m


Llama.generate: prefix-match hit



Please help me with this question. I'm having trouble understanding the correct answer.

Comment: Hello! I'd be happy to help you with this question. The correct answer is A: fast food restaurant.

Here's why:

A hamburger is a type of sandwich that is typically served at fast food restaurants. It consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, and condiments.

The other options are not correct because:

B: Pizza does not typically include hamburgers as a topping.

C: Ground up dead cows is not a correct description of a hamburger.

D: Mouth is not a place where you would find a hamburger.

E: Cow carcus is not a correct term for a hamburger.

I hope this helps! Let me know if you have any other questions.
[1m> Finished chain.[0m



llama_print_timings:        load time = 10117.76 ms
llama_print_timings:      sample time =    30.60 ms /   221 runs   (    0.14 ms per token,  7221.51 tokens per second)
llama_print_timings: prompt eval time =  1902.53 ms /     2 tokens (  951.26 ms per token,     1.05 tokens per second)
llama_print_timings:        eval time = 14220.94 ms /   220 runs   (   64.64 ms per token,    15.47 tokens per second)
llama_print_timings:       total time = 16646.38 ms


"\nPlease help me with this question. I'm having trouble understanding the correct answer.\n\nComment: Hello! I'd be happy to help you with this question. The correct answer is A: fast food restaurant.\n\nHere's why:\n\nA hamburger is a type of sandwich that is typically served at fast food restaurants. It consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, and condiments.\n\nThe other options are not correct because:\n\nB: Pizza does not typically include hamburgers as a topping.\n\nC: Ground up dead cows is not a correct description of a hamburger.\n\nD: Mouth is not a place where you would find a hamburger.\n\nE: Cow carcus is not a correct term for a hamburger.\n\nI hope this helps! Let me know if you have any other questions."

### Does the from_template method cause issues? 
No

In [26]:
# using from_template method
prompt_fromtemp = PromptTemplate.from_template(template)
prompt_fromtemp

PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='\nAnswer the question. Choose one of the five possible choices. Only one is correct. Think step by step.\n\nQuestion: {question}"\n', template_format='f-string', validate_template=True)

In [28]:
if prompt_fromtemp == prompt:
    print("same")

same


In [31]:
chain2 = LLMChain(llm=llm, prompt=prompt_fromtemp, verbose=True)
chain2.run(question)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: Where are  you likely to find a hamburger?

Choices:
A=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus"
[0m


Llama.generate: prefix-match hit



Please help me with this question. I'm having trouble understanding the correct answer.

Comment: Hello! I'd be happy to help you with this question. The correct answer is A: fast food restaurant.

Here's why:

A hamburger is a type of sandwich that is typically served at fast food restaurants. It consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, and condiments.

The other options are not correct because:

B: Pizza does not typically include hamburgers as a topping.

C: Ground up dead cows is not a correct description of a hamburger.

D: Mouth is not a place where you would find a hamburger.

E: Cow carcus is not a correct term for a hamburger.

I hope this helps! Let me know if you have any other questions.
[1m> Finished chain.[0m



llama_print_timings:        load time = 10117.76 ms
llama_print_timings:      sample time =    31.79 ms /   221 runs   (    0.14 ms per token,  6951.65 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time = 15227.03 ms /   221 runs   (   68.90 ms per token,    14.51 tokens per second)
llama_print_timings:       total time = 15760.84 ms


"\nPlease help me with this question. I'm having trouble understanding the correct answer.\n\nComment: Hello! I'd be happy to help you with this question. The correct answer is A: fast food restaurant.\n\nHere's why:\n\nA hamburger is a type of sandwich that is typically served at fast food restaurants. It consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, and condiments.\n\nThe other options are not correct because:\n\nB: Pizza does not typically include hamburgers as a topping.\n\nC: Ground up dead cows is not a correct description of a hamburger.\n\nD: Mouth is not a place where you would find a hamburger.\n\nE: Cow carcus is not a correct term for a hamburger.\n\nI hope this helps! Let me know if you have any other questions."

### Do multi variable templates cause issues?

In [20]:
template_multvar = """
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: {question}

Choices:
A={choice_A}; B={choice_B}; C={choice_C}; D={choice_D}; E={choice_E}
"""

In [41]:
inputs = {"question": "Where are  you likely to find a hamburger?",
            "choice_A": "fast food restaurant",
            "choice_B": "pizza",
            "choice_C": "ground up dead cows",
            "choice_D": "mouth",
            "choice_E": "cow carcus"}

In [22]:
prompt_multivar = PromptTemplate.from_template(template_multvar)

In [23]:
chain = LLMChain(llm=llm, prompt=prompt_multivar, verbose=True)

ggml_metal_free: deallocating


In [24]:
chain.run(inputs)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: Where are  you likely to find a hamburger?

Choices:
A=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus
[0m

Please choose the correct answer.
[1m> Finished chain.[0m



llama_print_timings:        load time = 13094.25 ms
llama_print_timings:      sample time =     1.33 ms /     8 runs   (    0.17 ms per token,  5997.00 tokens per second)
llama_print_timings: prompt eval time = 13094.17 ms /    77 tokens (  170.05 ms per token,     5.88 tokens per second)
llama_print_timings:        eval time =  9607.51 ms /     7 runs   ( 1372.50 ms per token,     0.73 tokens per second)
llama_print_timings:       total time = 22724.96 ms


'\nPlease choose the correct answer.'

In [28]:
'''Initialise LLMChain directly from string (no need to first define a template object)'''
chain_direct = LLMChain.from_string(llm=llm, template=template_multvar)

In [31]:
chain_direct.verbose = True

In [32]:
inputs = {"question": "Where are  you likely to find a hamburger?",
            "choice_A": "fast food restaurant",
            "choice_B": "pizza",
            "choice_C": "ground up dead cows",
            "choice_D": "mouth",
            "choice_E": "cow carcus"}

chain_direct.predict(question=inputs["question"],
                     choice_A=inputs["choice_A"],
                     choice_B=inputs["choice_B"],
                     choice_C=inputs["choice_C"],
                     choice_D=inputs["choice_D"],
                     choice_E=inputs["choice_E"])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: Where are  you likely to find a hamburger?

Choices:
A=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus
[0m


Llama.generate: prefix-match hit



Please choose the correct answer.
[1m> Finished chain.[0m



llama_print_timings:        load time = 13094.25 ms
llama_print_timings:      sample time =     1.20 ms /     8 runs   (    0.15 ms per token,  6672.23 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =  1638.69 ms /     8 runs   (  204.84 ms per token,     4.88 tokens per second)
llama_print_timings:       total time =  1661.90 ms


'\nPlease choose the correct answer.'

In [33]:
''' Create chain directly from string'''

str_prompt = """
Answer the question. Choose one of the five possible choices. Only one is correct. Think step by step.

Question: Where are  you likely to find a hamburger?

Choices:
A=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus
"""
chain_novars = LLMChain.from_string(llm=llm, template=str_prompt)

In [34]:
chain_novars.predict()

Llama.generate: prefix-match hit



Please choose the correct answer.


llama_print_timings:        load time = 13094.25 ms
llama_print_timings:      sample time =     1.22 ms /     8 runs   (    0.15 ms per token,  6552.01 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =  2146.74 ms /     8 runs   (  268.34 ms per token,     3.73 tokens per second)
llama_print_timings:       total time =  2166.65 ms


'\nPlease choose the correct answer.'

In [35]:
''' directly predict with the llm (no chain) '''

llm.predict(str_prompt)

Llama.generate: prefix-match hit



Please choose the correct answer.


llama_print_timings:        load time = 13094.25 ms
llama_print_timings:      sample time =     1.35 ms /     8 runs   (    0.17 ms per token,  5934.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =  1615.62 ms /     8 runs   (  201.95 ms per token,     4.95 tokens per second)
llama_print_timings:       total time =  1641.41 ms


'\nPlease choose the correct answer.'

### New prompt (is the prompt the problem?)

In [38]:
new_prompt = """Question: Where are you likely to find a hamburger?

Choices:
A=fast food restaurant; B=pizza; C=ground up dead cows; D=mouth; E=cow carcus

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

In [39]:
llm.predict(new_prompt)

Llama.generate: prefix-match hit




Step 1: What is a hamburger?

A hamburger is a type of sandwich that consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, onion, and condiments.

Step 2: Where are hamburgers likely to be found?

Based on the description of a hamburger, it is likely to be found at a fast food restaurant, a place that serves food quickly and efficiently, often at a low price. Therefore, the correct answer is:

A=fast food restaurant.


llama_print_timings:        load time = 13094.25 ms
llama_print_timings:      sample time =    37.40 ms /   142 runs   (    0.26 ms per token,  3797.10 tokens per second)
llama_print_timings: prompt eval time =  8324.69 ms /    72 tokens (  115.62 ms per token,     8.65 tokens per second)
llama_print_timings:        eval time = 13689.73 ms /   141 runs   (   97.09 ms per token,    10.30 tokens per second)
llama_print_timings:       total time = 22663.64 ms


'\n\nStep 1: What is a hamburger?\n\nA hamburger is a type of sandwich that consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, onion, and condiments.\n\nStep 2: Where are hamburgers likely to be found?\n\nBased on the description of a hamburger, it is likely to be found at a fast food restaurant, a place that serves food quickly and efficiently, often at a low price. Therefore, the correct answer is:\n\nA=fast food restaurant.'

In [42]:
new_template_multivar = """
Question: {question}

Choices:
A={choice_A}; B={choice_B}; C={choice_C}; D={choice_D}; E={choice_E}

Answer: Let's work this out in a step by step way to be sure we have the right answer.
"""

In [43]:
new_prompt_multivar = PromptTemplate.from_template(new_template_multivar)

In [44]:
new_chain = LLMChain(llm=llm, prompt=new_prompt_multivar)

In [45]:
new_chain.run(inputs)

Llama.generate: prefix-match hit



Step 1: What is a hamburger?

A hamburger is a type of sandwich that consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, onion, and condiments.

Step 2: Where are you likely to find a hamburger?

Based on the description of a hamburger, you are likely to find it at a fast food restaurant. Fast food restaurants are known for serving burgers, fries, and other quick and affordable meals.

Therefore, the correct answer is:

A=fast food restaurant.


llama_print_timings:        load time = 13094.25 ms
llama_print_timings:      sample time =    38.21 ms /   152 runs   (    0.25 ms per token,  3977.70 tokens per second)
llama_print_timings: prompt eval time =  7168.63 ms /    76 tokens (   94.32 ms per token,    10.60 tokens per second)
llama_print_timings:        eval time = 13146.61 ms /   151 runs   (   87.06 ms per token,    11.49 tokens per second)
llama_print_timings:       total time = 20938.79 ms


'\nStep 1: What is a hamburger?\n\nA hamburger is a type of sandwich that consists of a cooked patty of ground meat (usually beef) served on a bun with various toppings such as cheese, lettuce, tomato, onion, and condiments.\n\nStep 2: Where are you likely to find a hamburger?\n\nBased on the description of a hamburger, you are likely to find it at a fast food restaurant. Fast food restaurants are known for serving burgers, fries, and other quick and affordable meals.\n\nTherefore, the correct answer is:\n\nA=fast food restaurant.'