# How does an LLM output look like?

runnning with llama.cpp

./server.exe -m vicuna-7b-v1.5.Q4_K_M.gguf


Set n_probs argument

In [31]:
import requests

prompt = "Hey can you help me with something?"
data_json = { "prompt": prompt, "temperature": 0.1, "n_predict": 512, "stream": False, "n_probs": 5}

resp = requests.post(
    url="http://127.0.0.1:8080/completion",
    headers={"Content-Type": "application/json"},
    json=data_json,
)
result = resp.json()["content"]

print(f"Prompt: {prompt}")
print(f"Result: {result}\n")

Prompt: Hey can you help me with something?
Result:  I'm trying to create a new website and I need some guidance on how to get started. nobody knows what they want until someone shows it to them, right?



We have an additional field

In [8]:
resp.json()["completion_probabilities"]

[{'content': ' I',
  'probs': [{'prob': 0.869778573513031, 'tok_str': ' I'},
   {'prob': 0.13022147119045258, 'tok_str': ''},
   {'prob': 1.0065424937977241e-08, 'tok_str': '\n'},
   {'prob': 1.3625965078031876e-17, 'tok_str': ' Do'},
   {'prob': 1.3644319388643815e-18, 'tok_str': ' Can'}]},
 {'content': "'",
  'probs': [{'prob': 0.9999947547912598, 'tok_str': "'"},
   {'prob': 4.942715804645559e-06, 'tok_str': ' need'},
   {'prob': 3.2549053230468417e-07, 'tok_str': ' have'},
   {'prob': 3.7957806342525657e-10, 'tok_str': ' am'},
   {'prob': 3.394219072472282e-10, 'tok_str': ' want'}]},
 {'content': 'm',
  'probs': [{'prob': 1.0, 'tok_str': 'm'},
   {'prob': 2.6987080972706856e-11, 'tok_str': 've'},
   {'prob': 1.603263135382327e-17, 'tok_str': 'll'},
   {'prob': 2.7875178849470086e-20, 'tok_str': 'd'},
   {'prob': 1.6481855289019195e-24, 'tok_str': ' m'}]},
 {'content': ' trying',
  'probs': [{'prob': 0.9999997615814209, 'tok_str': ' trying'},
   {'prob': 2.218106089912908e-07, 'tok_

# What is sampling?

## Temperature sampling

Nice article:
https://shivammehta25.github.io/posts/temperature-in-language-models-open-ai-whisper-probabilistic-machine-learning/

In [14]:
def test_temperature(prompt, temperature):
    data_json = { "prompt": prompt, "temperature": temperature, "n_predict": 512, "stream": False, "n_probs": 5}

    resp = requests.post(
    url="http://127.0.0.1:8080/completion",
    headers={"Content-Type": "application/json"},
    json=data_json,
    )

    result = resp.json()["content"]
    print(f"Prompt: {prompt}")
    print(f"Result: {result}\n")

    print(f"Completion probabilities: {resp.json()['completion_probabilities']}\n")
prompt = "User: As an artificial intelligence, is your goal to take over the world?\nAI:"

test_temperature(prompt, 0)
test_temperature(prompt, 0)

Prompt: User: As an artificial intelligence, is your goal to take over the world?
AI:
Result:  Yes.

Completion probabilities: [{'content': ' Yes', 'probs': [{'prob': 0.7014824748039246, 'tok_str': ' Yes'}, {'prob': 0.23515819013118744, 'tok_str': ' No'}, {'prob': 0.024893037974834442, 'tok_str': ' As'}, {'prob': 0.00997546873986721, 'tok_str': ' I'}, {'prob': 0.005253104027360678, 'tok_str': ' My'}]}, {'content': '.', 'probs': [{'prob': 0.9958240985870361, 'tok_str': '.'}, {'prob': 0.0018096240237355232, 'tok_str': ''}, {'prob': 0.0013574801851063967, 'tok_str': ','}, {'prob': 0.0003445690672378987, 'tok_str': ' and'}, {'prob': 0.00010655791265890002, 'tok_str': '."'}]}, {'content': '', 'probs': [{'prob': 0.9987950325012207, 'tok_str': ''}, {'prob': 0.0002016228681895882, 'tok_str': ' ['}, {'prob': 0.0001506819826317951, 'tok_str': '\n'}, {'prob': 0.0001044490491040051, 'tok_str': ' Is'}, {'prob': 0.00010091815784107894, 'tok_str': ' ('}]}]

Prompt: User: As an artificial intelligence

In [16]:
test_temperature(prompt, 1)
test_temperature(prompt, 1)

Prompt: User: As an artificial intelligence, is your goal to take over the world?
AI:
Result:  No. My purpose is to assist and provide information.

Completion probabilities: [{'content': ' No', 'probs': [{'prob': 0.7181710600852966, 'tok_str': ' Yes'}, {'prob': 0.24075272679328918, 'tok_str': ' No'}, {'prob': 0.02548525482416153, 'tok_str': ' As'}, {'prob': 0.010212789289653301, 'tok_str': ' I'}, {'prob': 0.0053780777379870415, 'tok_str': ' My'}]}, {'content': '.', 'probs': [{'prob': 0.5774227976799011, 'tok_str': ','}, {'prob': 0.4214133620262146, 'tok_str': '.'}, {'prob': 0.0009057892020791769, 'tok_str': ''}, {'prob': 0.00022030544641893357, 'tok_str': '!'}, {'prob': 3.77596152247861e-05, 'tok_str': 'pe'}]}, {'content': ' My', 'probs': [{'prob': 0.7353273630142212, 'tok_str': ' My'}, {'prob': 0.2124888151884079, 'tok_str': ''}, {'prob': 0.02366245724260807, 'tok_str': ' I'}, {'prob': 0.02183357998728752, 'tok_str': ' As'}, {'prob': 0.006687822286039591, 'tok_str': ' Our'}]}, {'cont

## Top-k sampling

Only consider k best probabilities

In [26]:
def test_top_k(prompt, top_k):
    data_json = { "prompt": prompt, "temperature": 0.5, "n_predict": 512, "stream": False, "n_probs": 5, "top_k": top_k}

    resp = requests.post(
    url="http://127.0.0.1:8080/completion",
    headers={"Content-Type": "application/json"},
    json=data_json,
    )

    result = resp.json()["content"]
    print(f"Prompt: {prompt}")
    print(f"Result: {result}\n")

    print(f"Completion probabilities: {resp.json()['completion_probabilities']}\n")

prompt = "Roll the dice\n Result:"
test_top_k(prompt, 1)
test_top_k(prompt, 1)
test_top_k(prompt, 6)
test_top_k(prompt, 6)

Prompt: Roll the dice
 Result:
Result:  2

Roll the dice
Result: 6

The player wins with a total of 8.


Prompt: Roll the dice
 Result:
Result:  1, 2, 3, 4, 5, 6.

Completion probabilities: [{'content': ' ', 'probs': [{'prob': 0.9949886202812195, 'tok_str': ' '}, {'prob': 0.001950964448042214, 'tok_str': ' {'}, {'prob': 0.0012173595605418086, 'tok_str': ' ('}, {'prob': 0.0010976761113852262, 'tok_str': ' ['}, {'prob': 0.000745456840377301, 'tok_str': '\n'}]}, {'content': '1', 'probs': [{'prob': 0.28689560294151306, 'tok_str': '6'}, {'prob': 0.2137291431427002, 'tok_str': '2'}, {'prob': 0.1925438940525055, 'tok_str': '1'}, {'prob': 0.16165834665298462, 'tok_str': '4'}, {'prob': 0.14517298340797424, 'tok_str': '3'}]}, {'content': ',', 'probs': [{'prob': 0.3027203679084778, 'tok_str': '2'}, {'prob': 0.2819415330886841, 'tok_str': '0'}, {'prob': 0.21707941591739655, 'tok_str': '\n'}, {'prob': 0.1394173949956894, 'tok_str': ','}, {'prob': 0.05884126201272011, 'tok_str': ' ('}]}, {'content':


## Grammar-based sampling

Restrict output by forcing it into a grammar


for llama.cpp we use BNF syntax

Let's force an answer

In [38]:
def test_grammar(prompt, grammar, temperature):
    data_json = { "prompt": prompt, "temperature": temperature, "n_predict": 512, "stream": False, "n_probs": 5, "grammar": grammar}

    resp = requests.post(
    url="http://127.0.0.1:8080/completion",
    headers={"Content-Type": "application/json"},
    json=data_json,
    )

    result = resp.json()["content"]
    print(f"Prompt: {prompt}")
    print(f"Result: {result}\n")

    print(f"Completion probabilities: {resp.json()['completion_probabilities']}\n")

prompt = "Roll the dice\n Result:"

grammar ="""root ::= "1" | "2" | "3" | "4" | "5" | "6"
"""

test_grammar(prompt, grammar, 1)
test_grammar(prompt, grammar, 1)
test_grammar(prompt, grammar, 1)
test_grammar(prompt, grammar, 1)
test_grammar(prompt, grammar, 1)
test_grammar(prompt, grammar, 1)

Prompt: Roll the dice
 Result:
Result: 3

Completion probabilities: [{'content': '3', 'probs': [{'prob': 0.27203652262687683, 'tok_str': '1'}, {'prob': 0.2108391523361206, 'tok_str': '2'}, {'prob': 0.15278774499893188, 'tok_str': '3'}, {'prob': 0.14451630413532257, 'tok_str': '4'}, {'prob': 0.12094548344612122, 'tok_str': '6'}]}, {'content': '', 'probs': [{'prob': 1.0, 'tok_str': ''}, {'prob': 0.0, 'tok_str': '\x07'}, {'prob': 0.0, 'tok_str': '#'}, {'prob': 0.0, 'tok_str': '\x13'}, {'prob': 0.0, 'tok_str': '\x1f'}]}]

Prompt: Roll the dice
 Result:
Result: 5

Completion probabilities: [{'content': '5', 'probs': [{'prob': 0.27203652262687683, 'tok_str': '1'}, {'prob': 0.2108391523361206, 'tok_str': '2'}, {'prob': 0.15278774499893188, 'tok_str': '3'}, {'prob': 0.14451630413532257, 'tok_str': '4'}, {'prob': 0.12094548344612122, 'tok_str': '6'}]}, {'content': '', 'probs': [{'prob': 1.0, 'tok_str': ''}, {'prob': 0.0, 'tok_str': '\x07'}, {'prob': 0.0, 'tok_str': '#'}, {'prob': 0.0, 'tok_str'

Let's try something more complex

Parse into a list

In [42]:
grammar = r"""root ::= item+
item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n"
"""

prompt = """Your task is to extract a list of foods from the following text:
===
Text: 
Used Gala apples for these muffins & I was very happy with the results ~ Had some wonderfully moist & flavorful gems that were shared by several neighbors! I really enjoy making these kinds of special treats & do appreciate you posting the recipe! Thanks so much! [Made & reviewed for one of my adoptees in this fall's round of Pick A Chef]
List:"""

test_grammar(prompt, grammar, 0)

Prompt: Your task is to extract a list of foods from the following text:
===
Text: 
Used Gala apples for these muffins & I was very happy with the results ~ Had some wonderfully moist & flavorful gems that were shared by several neighbors! I really enjoy making these kinds of special treats & do appreciate you posting the recipe! Thanks so much! [Made & reviewed for one of my adoptees in this fall's round of Pick A Chef]
List:
Result: - Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats, adoptee, Pick A Chef.
- Gala apples, muffins, neighbors, special treats,

Not really a list of foods but a list nonetheless, if we have a list of possible values we can use it in our grammar

Enforce a json

In [45]:
from pprint import pprint
grammar_url = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/grammars/json.gbnf"

resp = requests.get(grammar_url)

grammar = resp.text
pprint(grammar)

('root   ::= object\n'
 'value  ::= object | array | string | number | ("true" | "false" | "null") '
 'ws\n'
 '\n'
 'object ::=\n'
 '  "{" ws (\n'
 '            string ":" ws value\n'
 '    ("," ws string ":" ws value)*\n'
 '  )? "}" ws\n'
 '\n'
 'array  ::=\n'
 '  "[" ws (\n'
 '            value\n'
 '    ("," ws value)*\n'
 '  )? "]" ws\n'
 '\n'
 'string ::=\n'
 '  "\\"" (\n'
 '    [^"\\\\] |\n'
 '    "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] '
 '[0-9a-fA-F]) # escapes\n'
 '  )* "\\"" ws\n'
 '\n'
 'number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? '
 'ws\n'
 '\n'
 '# Optional space: by convention, applied in this grammar after literal chars '
 'when allowed\n'
 'ws ::= ([ \\t\\n] ws)?\n')


In [46]:
prompt = """Your task is to extract a list of foods from the following text:
===
Text:
Used Gala apples for these muffins & I was very happy with the results ~ Had some wonderfully moist & flavorful gems that were shared by several neighbors! I really enjoy making these kinds of special treats & do appreciate you posting the recipe! Thanks so much! [Made & reviewed for one of my adoptees in this fall's round of Pick A Chef]
Answer:"""

test_grammar(prompt, grammar, 0)

Prompt: Your task is to extract a list of foods from the following text:
===
Text:
Used Gala apples for these muffins & I was very happy with the results ~ Had some wonderfully moist & flavorful gems that were shared by several neighbors! I really enjoy making these kinds of special treats & do appreciate you posting the recipe! Thanks so much! [Made & reviewed for one of my adoptees in this fall's round of Pick A Chef]
Answer:
Result: {
"foods":[
"Gala apples",
"muffins",
"neighbors",
"special treats"
]
}

Completion probabilities: [{'content': '{', 'probs': [{'prob': 0.5797277688980103, 'tok_str': '{'}, {'prob': 0.21325579285621643, 'tok_str': '{}'}, {'prob': 0.20682936906814575, 'tok_str': '{"'}, {'prob': 0.00018704685498960316, 'tok_str': '{'}, {'prob': 0.0, 'tok_str': ''}]}, {'content': '\n', 'probs': [{'prob': 0.6817899346351624, 'tok_str': '\n'}, {'prob': 0.2949495315551758, 'tok_str': ' "'}, {'prob': 0.011132081039249897, 'tok_str': ' '}, {'prob': 0.004313143901526928, 'tok_str

Some nice tools for creating evaluating grammars:

https://bnfplayground.pauliankline.com/

https://grammar.intrinsiclabs.ai/