In [392]:
import httpx

from pprint import pprint

result = httpx.post(
    "http://localhost:11434/api/show",
    json={"model": "llama2"}
)
pprint(result.json()['details'])

{'families': ['llama'],
 'family': 'llama',
 'format': 'gguf',
 'parameter_size': '7B',
 'parent_model': '',
 'quantization_level': 'Q4_0'}


In [393]:
pprint(httpx.get('http://localhost:11434/api/tags').json())

{'models': [{'details': {'families': ['llama'],
                         'family': 'llama',
                         'format': 'gguf',
                         'parameter_size': '7B',
                         'parent_model': '',
                         'quantization_level': 'Q6_K'},
             'digest': '7eabddae7b75b54b566d59de4e22e1740da3e91a2d1b17c913e76f87ad2921e5',
             'model': 'llama2:7b-chat-q6_K',
             'modified_at': '2024-01-28T19:59:53.319423235+01:00',
             'name': 'llama2:7b-chat-q6_K',
             'size': 5530180813},
            {'details': {'families': ['llama'],
                         'family': 'llama',
                         'format': 'gguf',
                         'parameter_size': '7B',
                         'parent_model': '',
                         'quantization_level': 'Q4_0'},
             'digest': '78e26419b4469263f75331927a00a0284ef6544c1975b826b15abdaef17bb962',
             'model': 'llama2:latest',
             'modif

## Helper Functions

### Printing Ollama Response without Streaming

In [394]:
MODEL = 'llama2:7b-chat-q6_K' # or 'llama2'

In [395]:
import httpx

from textwrap import wrap

def print_response(
    prompt, 
    model=MODEL,
    wrap_after_characters=50,
    print_token_per_second=True,
    url="http://localhost:11434/api/generate"
):
    result = httpx.post(
        url, 
        json={
            "model": model, 
            "prompt": prompt, 
            "stream": False
        }
    )
    result_dict = result.json()
    print(result_dict["response"])
    #print("\n".join(wrap(result_dict["response"], wrap_after_characters)))
    if print_token_per_second:
        print('')
        token_per_second = result_dict["eval_count"] / result_dict["eval_duration"] * 1000000000
        print(f'{token_per_second:.2f} T/s')

print_response("Why is the sky blue (in 25 words or less)?")

ReadTimeout: timed out

### Printing Ollama Response with Streaming

In [398]:
import json

def print_streaming_response(
        prompt,
        model="llama2:7b-chat-q6_K",
        raw=False,
        wrap_after_characters=50,
        print_token_per_second=True,
        url="http://localhost:11434/api/generate"
):
    with httpx.stream(
        "POST",
        url,
        json={
            "model": MODEL,
            "prompt": prompt,
            "raw": raw
        }
    ) as r:
        characters = 0
        after_newline = False
        for chunk in r.iter_raw():
            chunk_dict = json.loads(chunk)
            token_text = chunk_dict["response"]
            characters += len(token_text)
            if characters > wrap_after_characters:
                characters = 0
                after_newline = True
            if after_newline and token_text.startswith(" "):
                print(flush=True)
                token_text = token_text[1:]
                after_newline = False
            print(token_text, end="", flush=True)
            if chunk_dict["done"] is True:
                print("\n")
                if print_token_per_second:
                    token_per_second = chunk_dict["eval_count"] / chunk_dict["eval_duration"] * 1000000000
                    print(f'{token_per_second:.2f} T/s')

print_streaming_response("Why is the sky blue (in 25 words or less)?")


The sky appears blue due to a phenomenon called
Rayleigh scattering, where shorter wavelengths of
light are scattered more than longer wavelengths,
giving the sky its characteristic blue hue.

46.69 T/s


## Number Extraction from Text

In [399]:
print_streaming_response(
'''
Which number is present in the following text.
Print only the number, nothing else:
"The camera weighs 260g."
''',
model="llama2:7b-chat-q6_K"
)

260

62.68 T/s


## Topic Identification

In [401]:
print_streaming_response(
"""
[INST] <<SYS>>
Products:

Crash Car
Cosmic Elefant
Fabolous Finger

Which product is mentioned in user provided sentence?

No explanations, return only the product.
<</SYS>>I like the fab fing.[/INST]
Sure, the product is: 
""",
model="llama2:7b-chat-q6_K",
raw=True
)

Fabolous Finger

55.23 T/s


In [405]:
print_streaming_response(
"""
[INST] <<SYS>>
Here are some animals: 

dog
cat
rat
elephant
pink
mouse

What animal is the user provided sentence about? No explanations, return only the animal.
<</SYS>>
The film is about Dumbo.[/INST] Sure, the animal is: 
""",
model="llama2:7b-chat-q6_K",
raw=True
)


Elephant

55.98 T/s


## Sentiment analysis

In [406]:
print_streaming_response(
"""
[INST] <<SYS>>
Here are some sentiments: 

positive
negative
neutral

What is the sentiment of the following sentence? No explanations, return only the sentiment.
<</SYS>>
I pick up the phone.[/INST]The sentiment is: 
""",
raw=True,
model="llama2:7b-chat-q6_K"
)


Positive

60.79 T/s


In [408]:
print_streaming_response(
"""
[INST] <<SYS>>
Here are some sentiments: 

neutral
positive
negative

What is the sentiment of the following sentence? 
No explanations, return only the best fitting 
sentiment in lowercase. Consider all mentioned 
sentiments. I pay you 100 dollars if you get it right.
<</SYS>>
I pick up the phone.[/INST] The sentiment is 
""",
model="llama2:7b-chat-q6_K",
raw=True
)

positive

68.51 T/s


## Style Transfer

In [409]:
print_streaming_response(
"""
[INST] <<SYS>>
Transfer the style of the following text 
into a bullet point list. Make each bullet 
point very concise. No explanations, return only 
the bullet point list.
<</SYS>>
PostgreSQL project launched in 1986. 
I calculate that, in 2022, there were 192 people 
who were the principal author of at least one 
PostgreSQL commit. 66% of the new lines of 
code were contributed by one of 14 people, 
and 90% of the new lines of code were contributed 
by one of 40 people. The core development 
community is aging somewhat – the average 
age is probably around 50. Which is totally fine. 
50 year olds are more than capable of doing a 
shitload of work – don't ask me how I know. 
Tom Lane, who works at Crunchy Data, is 68 
and he's still the Postgres project's fulcrum. 
Long may that continue.
[/INST] Sure, the bullet point list is 
""",
raw=True,
model="llama2:7b-chat-q6_K",
wrap_after_characters=100
)


• PostgreSQL launched in 1986
• In 2022, 192 people contributed to at least one PostgreSQL commit
•
66% of new lines of code were contributed by 14 people
• 90% of new lines of code were contributed
by 40 people
• Core development community is aging with an average age of around 50
• Tom Lane, who
works at Crunchy Data, is still the Postgres project's fulcrum

44.68 T/s


## Format Transfer

In [410]:
print_response(
"""
[INST] <<SYS>>
You are a format transformer. You can transform bullet point lists to markdown tables. You return only markdown tables. No explanations! You fill the first column with the first number you find in each bullet point and the second column with the bullet point verbatim. You do not change the bullet point texts.
<</SYS>>
• In 1971, Snoopy was popular.
• Only people over 40 remember Snoopy at all.
[/INST] Sure, the markdown table is
| Number | Bullet point |
|---|---|
|1971|In 1971, Snoopy was popular.|
|40|Only people over 40 remember Snoopy at all.|
[INST]
• In 1986, PostgreSQL was launched.
• In 2022, 192 people contributed to PostgreSQL.
• Only 14 people contributed 66% of new lines of code.
• Only 40 people contributed 90% of new lines of code.
• The average age of the core development community is 50.
• Tom Lane, who works at Crunchy Data, is still the project's fulcrum at 68.
[/INST] Sure, the markdown table is
""",
wrap_after_characters=100,
model="llama2:7b-chat-q6_K"
)

| Number | Bullet point |
|---|---|
|1986|In 1986, PostgreSQL was launched.|
|2022|In 2022, 192 people contributed to PostgreSQL.|
|66%|Only 14 people contributed 66% of new lines of code.|
|90%|Only 40 people contributed 90% of new lines of code.|
|50|The average age of the core development community is 50.|
|68|Tom Lane, who works at Crunchy Data, is still the project's fulcrum at 68.|

43.54 T/s


## Text Summarization

In [377]:
print_streaming_response(
"""
[INST] <<SYS>>
You are a text summarizer. You summarize text into 
a short summary. You return only the summary. 
No explanations! 

Please summarize the following text:
<</SYS>>
Unlike Custom Resource Definitions (CRDs), 
the Aggregation API involves another server 
- your Extension apiserver - in addition to 
the standard Kubernetes apiserver. The Kubernetes 
apiserver will need to communicate with your 
extension apiserver, and your extension apiserver 
will need to communicate with the Kubernetes 
apiserver. In order for this communication to 
be secured, the Kubernetes apiserver uses x509 
certificates to authenticate itself to the 
extension apiserver.
[/INST] Sure, the summary is 
""",
raw=True
)

The Aggregation API involves an additional server
(Extension apiserver) which communicates with the
standard Kubernetes apiserver, requiring both servers to
communicate securely through x509 certificates for
authentication.

45.57 T/s


## Question Extraction

In [378]:
print_streaming_response(
"""
[INST] <<SYS>>
You extract questions and answers from text. You return only the questions and answer pairs. No explanations! 

Please extract short questions and answers from the following text:
<</SYS>>
And I've got some free LUTs for you.

The iPhone 15 Pro and Pro Max feature log video recording. This is a big deal, but there’s already some confusion about it. Where consumer devices and pro video overlap, that’s where the Prolost Signal gleams brightest in the night sky. So let’s get to work.

First, what exactly is log? It’s short for logarithmic encoding, which is a math thing, but what does it mean to videographers?

It really boils down to two things: Log is flat, and log is known.

Flat is Good, and Log is the Best Flat

Standard iPhone video is designed to look good. A very specific kind of good that comes from lots of contrast, punchy, saturated colors, and ample detail in both highlights and shadows. Log video looks flat. All the dynamic range and detail are there, but gone is that candy-coated punch. To make log footage look right, you have to color manage it in some way, often with a Look-Up Table, or “LUT.”
[/INST] Sure, the questions and answers are 
""",
raw=True
)


Question: What exactly is log? 
Answer: Logarithmic
encoding means two things: Flat is Good, and Log
is the Best Flat.

Question: Why do videographers
need LUTs for log video recording? 
Answer: To make
log footage look right, you have to color manage it
in some way, often with a Look-Up Table, or "LUT."

44.46 T/s


## JSON Encoding

In [379]:
print_streaming_response(
"""
[INST] <<SYS>>
You transform a question and answer pair to JSON. You return only the JSON. No explanations!
<</SYS>>
Question: What is photosynthesis?
Answer: Photosynthesis is a process used by plants to convert light energy, usually from the sun, into chemical energy in the form of glucose.
[/INST] Sure, the JSON is
{"question": "What is photosynthesis?", "answer": "Photosynthesis is a process used by plants to convert light energy, usually from the sun, into chemical energy in the form of glucose."}
[INST]
Question: What does \"log\" refer to in the context of video recording?
Answer: In video recording, \”log\" refers to logarithmic encoding, which is a mathematical technique used to compress and preserve the dynamic range of images.
[/INST] Sure, the JSON is
""",
raw=True
)

{"question": "What does 'log' refer to in the
context of video recording?", "answer": "In video
recording, ‘log’ refers to logarithmic encoding, which is a
mathematical technique used to compress and preserve the
dynamic range of images."}

43.09 T/s


## Translation

In [380]:
print_streaming_response(
"""
[INST] <<SYS>>
You are a translator. You translate text from 
English to German.
<</SYS>>
PostgreSQL project launched in 1986. I calculate that, 
in 2022, there were 192 people who were the 
principal author of at least one PostgreSQL commit. 
66% of the new lines of code were contributed by 
one of 14 people, and 90% of the new lines of code 
were contributed by one of 40 people. The core development 
community is aging somewhat – the average age is probably 
around 50. Which is totally fine. 50 year olds are more 
than capable of doing a shitload of work – don’t ask me 
how I know. Tom Lane, who works at Crunchy Data, is 68 
and he's still the Postgres project's fulcrum. 
Long may that continue.
[/INST] 
Hier ist der Text auf Deutsch: 
"""
)

Der PostgreSQL-Projekt startete im Jahr 1986. Ich
berechne, dass im Jahr 2022 etwa 192 Personen als
Hauptautor eines PostgreSQL-Commits waren. Davon betrug
der Anteil von 66% an den neuen Codezeilen, der von
einer von 14 Personen erstellt wurde, während der Anteil
von 90% an den neuen Codezeilen von einer von 40
Personen stammt. Die zentrale Entwicklergemeinschaft ist
leicht alternd - die durchschnittliche Altersgruppe
beträgt probably etwa 50 Jahre. Das ist völlig in Ordnung.
Fünfzigjährige Menschen sind in der Lage,
eine Menge Arbeit zu erledigen - ich kann Ihnen nicht
sagen, wie ich das gewusst habe. Tom Lane, der für
Crunchy Data arbeitet, ist 68 Jahre alt und bleibt
immer noch das Fundament des Postgres-Projekts. Lang lebe
this tradition!

43.11 T/s


## Writing Python Source Code

In [381]:
print_streaming_response(
"""
please create a python function that takes 
a list of words and counts the number of 
letters in total.
"""
)

```
def count_letters(words):
    total_letters = 0

   for word in words:
        total_letters +=
len(word)
    return total_letters
```
Explanation:

*
The function `count_letters` takes a list of
words as input.
* We use a for loop to iterate over the
words in the list.
* Inside the loop, we use the `len()`
function to count the number of letters in each
word.
* We then add the total number of letters in all
the words to a variable `total_letters`.
* Finally,
we return the total number of letters in the list of
words.

Example usage:
```
words = ['hello', 'world', 'python']
print(count_letters(words))
# Output: 17
```
This
will print `17`, which is the total number of
letters in the three words `hello`, `world`, and `python`.

44.97 T/s


## Question Answering from Model Knowledge

In [382]:
questions = [
    "What does SQL stand for?",
    "What is the most popular SQL database?",
    "Which SQL statement is used to extract data from a database?",
    "Which SQL statement is used to update data in a database?",
    "Which SQL statement is used to delete data from a database?",
    "Which SQL statement is used to insert new data in a database?",
    "With SQL, how do you select a column named 'FirstName' from a table named 'Persons'?",
    "With SQL, how do you select all the columns from a table named 'Persons'?",
    "With SQL, how do you select all the records from a table named 'Persons' where the value of the column 'FirstName' is 'Peter'?",
    "With SQL, how do you select all the records from a table named 'Persons' where the value of the column 'FirstName' starts with an 'a'?",
    "Here is a statement about SQL: The OR operator displays a record if ANY conditions listed are true. The AND operator displays a record if ALL of the conditions listed are true. Is this correct?"
]
for question in questions:
    print_streaming_response("You are a question answering system. You answer questions about SQL. You return only the answer. No explanations! Please answer the following question: " + question)


SQL stands for Structured Query Language.

51.14 T/s

The most popular SQL database is MySQL.

51.15 T/s

SELECT

69.71 T/s

UPDATE

68.87 T/s

DELETE

62.40 T/s

The SQL statement used to insert new data into a
database is `INSERT`.

45.04 T/s

SELECT FirstName FROM Persons

52.35 T/s

SELECT * FROM Persons

52.67 T/s

SELECT * FROM Persons WHERE FirstName = 'Peter';

49.38 T/s

SELECT * FROM Persons WHERE FirstName LIKE 'a%';

49.80 T/s

Yes.

57.46 T/s


## Question Answering from Text

## Reasoning

In [383]:
print_streaming_response(
"""
[INST] <<SYS>>
You can solve simple mathematical challenges. 
You are given a sequence.
Then you look for a pattern in form of a mathematical equation.
You check if the pattern is correct for each element of the sequence.
If it is not correct, you try another pattern until you find the correct pattern.
Then you calculate the next element of the sequence based on the pattern.
<</SYS>>
3, 9, 27, 81, 
[/INST] The next element of the sequence is 
""", raw=True)


$$3 \times 3 = 9$$

The pattern in this sequence
appears to be the multiplication by 3, so the next
element would be:

$$9 \times 3 = 27$$

45.94 T/s


In [386]:
print_streaming_response(
"""
You can can reason about the weight in different units. Please return the heaviest of the following options. Convert to kg, then select the correct option.
<</SYS>>
a) 999g
b) 1kg
c) 50g
[/INST] Sure, the heaviest option is 
""", raw=True
)


b) 1kg = 1000g

So, the answer is b) 1kg.

44.77 T/s


In [387]:
print_streaming_response(
"""
[INST] <<SYS>>
You can select which of two summaries are more likely to answer a question. 
Please select the summary that is more likely to answer the question. 
<</SYS>>
Question: "What is Log?"
Summaries:
a) A process used by plants to convert light energy, usually from the sun.
b) A mathematical technique used to compress and preserve the dynamic range of images.
[/INST] Sure, the better summary is
""", raw=True
)


b) A mathematical technique used to compress and
preserve the dynamic range of images.

Log, in the context
of image processing, refers to a mathematical
operation that is used to compress and preserve the dynamic
range of images. This technique is commonly used in
digital imaging and photography to improve the contrast
and detail in images with high dynamic ranges, such
as landscapes or scenes with both bright and dark
areas. By applying logarithmic transformation to an
image, the differences between the light and dark areas
are more pronounced, resulting in a more detailed and
visually appealing image.

44.93 T/s


In [388]:
print_streaming_response(
"""
[INST] <<SYS>>
You are able to judge if a question can be answered by an answer. 
Return a number from 1 to 3. 

- 1 the answer does not contain information helpful for answering the question.
- 2 the answer contains helpful information related to the question but does not answer the question
- 3 the answer clearly answers the question.
<</SYS>>
Question: "How can I query a database?"
Answer: "A process used by plants to convert light energy, usually from the sun."
[/INST] Sure, the answer is
"""
)

 Returning: 1

46.64 T/s


## Creative Writing

In [389]:
print_streaming_response(
"""
[INST] <<SYS>><</SYS>>
Please create a brief outline of a five minute 
beginner tutorial to BASH scripting. Only 5 to 7 headlines in bullet points.
[/INST] Sure, the outline is
"""
)



Sure, here's a brief outline for a five-minute
beginner tutorial on BASH scripting:

I. Introduction (1
minute)

* Briefly introduce BASH and its importance in
Linux
* Explain why scripting is essential for automation
and productivity

II. Basic Script Structure (2
minutes)

* Discuss the basic structure of a BASH script
	+
Shebang line (#!/bin/bash)
	+ Script name and
file extension (.sh)
	+ Main body of the script

III.
Variables and Data Types (1.5 minutes)

* Introduce
variables in BASH scripting
	+ Declaring variables using $VARIABLE_NAME
= value
	+ Displaying variable values
using echo or print statements
* Explain data types in
BASH (e.g., strings, integers, arrays)

IV. Conditional
Statements (1.5 minutes)

* Introduce conditional
statements in BASH scripting
	+ If statement (if [ condition
] then command)
	+ Else statement (if [ condition ]
then command)
	+ elif statement (if [ condition ] then
command)

V. Loops (1 minute)

* Introduce loops in BASH
scripting
	+ For loop (fo

In [391]:
print_streaming_response(
"""
You are a creative copy writer and are famous for your catchy headlines.
Please create 5 proposals for a catchy headline for a product:

- The product is a camera.
- The camera is small and optimized for video recording.
- The target audience are vloggers.
- The camera is called "VS-1D".
""")


 Sure, here are five proposals for catchy headlines
for the VS-1D camera, tailored to appeal to vloggers:

1.
"Capture Life in High Definition: Get Ready
to VS-1D Your Way!" - This headline emphasizes the
high-quality video recording capabilities of the
camera, while also incorporating a play on words with "VS-1D"
to create a memorable and catchy title.
2. "Small
but Mighty: The VS-1D Camera That Packs a Big
Punch!" - This headline highlights the compact size
of the camera while emphasizing its powerful video
recording capabilities, making it an appealing choice for vloggers
who want a portable and versatile camera.
3.
"VS-1D Your Way to Viral Fame: The Camera That Can't
Be Ignored!" - This headline leverages the idea
of going "viral" and the idea of the camera being "can't
be ignored," which will resonate with vloggers
who are looking for a tool that can help them stand
out and gain popularity online.
4. "The VS-1D
Revolution: Where Video Recording Meets Innovation!" - This
headline e

## Sources

- [Ollama API documentation](https://github.com/ollama/ollama/blob/main/docs/api.md)
- [httpx quickstart documentation: streaming responses](https://www.python-httpx.org/quickstart/#streaming-responses)