# Various interactions with local Llama3 8b

In [1]:
import os
import requests
import json
import re
from typing import List, Dict, Any, Optional

OLLAMA_ENDPOINT = "http://localhost:11434"
MODEL_NAME = "llama3:8b" 

In [2]:
def check_ollama_status():
    try:
        response = requests.get(f"{OLLAMA_ENDPOINT}/api/tags")
        if response.status_code == 200:
            models = response.json().get("models", [])
            if models:
                print(f"Ollama is running with the following models:")
                for model in models:
                    print(f" - {model['name']}")
                
                # Check if Llama 3 8B is available
                llama_models = [m for m in models if 'llama3:8b' in m['name'].lower()]
                if llama_models:
                    print(f"\n✓ Llama 3 8B model found")
                else:
                    print(f"\n⚠ Llama 3 8B was not found")
            else:
                print("Ollama is running, but no models were found. Run 'ollama pull llama3:8b' in the terminal.")
        else:
            print("Could not connect to Ollama.")
    except Exception as e:
        print(f"Error connecting to Ollama: {e}")
        print("Make sure the Ollama service is running with: 'systemctl --user start ollama'")

check_ollama_status()

Ollama is running with the following models:
 - german-mixtral:latest
 - mixtral:latest
 - llama3:8b
 - mannix/llama3.1-8b-abliterated:latest
 - llama3.1:latest
 - dolphin-llama3:latest

✓ Llama 3 8B model found


In [3]:
class Llama3Client:
    def __init__(self, base_url: str = OLLAMA_ENDPOINT, model: str = MODEL_NAME):
        self.base_url = base_url
        self.model = model
        
        # Default parameters optimized for llama3
        self.default_params = {
            "temperature": 0.7,
            "top_p": 0.9,
            "top_k": 40,
            "num_ctx": 4096  
        }
        
    def chat_completion(self, messages: List[Dict[str, str]], 
                         max_tokens: Optional[int] = None, 
                         stream: bool = False,
                         **kwargs):
        endpoint = f"{self.base_url}/api/chat"
        
        # For inference
        options = self.default_params.copy()

        for key, value in kwargs.items():
            options[key] = value
            
        if max_tokens:
            options["num_predict"] = max_tokens
        
        payload = {
            "model": self.model,
            "messages": messages,
            "stream": stream,
            "options": options
        }
            
        if stream:
            response = requests.post(endpoint, json=payload, stream=True)
            return response.iter_lines()
        else:
            response = requests.post(endpoint, json=payload)
            return response.json()
    
    def get_token_count(self, messages: List[Dict[str, str]]):
        """Estimate the number of tokens in the messages"""
        # ~4 chars/token for llama3
        total_chars = sum(len(m["content"]) for m in messages)
        estimated_tokens = total_chars // 4
        return estimated_tokens

### Send a single message to the model


In [4]:
client = Llama3Client()

message_text = [
    {"role":"system","content":"You are an AI assistant that helps people find answers. You use the Llama 3 8B model and can answer complex questions."},
    {"role":"user","content":"Tell me what flat white is and how it differs from other popular coffee beverages like cappuchino?"},
]

completion = client.chat_completion(message_text, max_tokens=200)

print(completion["message"]["content"])

A delightful question!

A flat white is a type of coffee drink that originated in Australia and New Zealand. It's a velvety-smooth, medium-to-strong coffee beverage made with espresso, microfoam (steamed milk that's been frothed to a consistency similar to whipped cream), and a thin layer of foam on top.

Now, let's compare it to other popular coffee drinks:

1. Cappuccino: A cappuccino is similar to a flat white in that it also contains espresso, steamed milk, and foam. However, the key difference lies in the ratio of these components:
	* Cappuccino: 1/3 espresso, 1/3 steamed milk, and 1/3 foam
	* Flat White: 2/3 espresso to 1/3 microfoam (no distinct layers)

In a cappuccino, you'll typically find a clear distinction between the three components, whereas in a flat white


### Stream responses
Better for longer conversations

In [5]:
stream_resp = client.chat_completion(message_text, max_tokens=200, stream=True)

full_response = ""
for line in stream_resp:
    if line:
        chunk = json.loads(line)
        if chunk.get("message") and chunk["message"].get("content"):
            content = chunk["message"]["content"]
            full_response += content
            print(content, end="", flush=True)

A great question!

A flat white is a type of coffee drink that originated in Australia and New Zealand, characterized by its smooth, velvety texture and rich flavor profile. It's made with espresso and steamed milk, but what sets it apart from other popular coffee drinks like cappuccino is the ratio of espresso to milk.

In a flat white, the ratio is typically 1:3 or 1:4 (one part espresso to three or four parts milk), which means that the drink is more milky than espresso-forward. The steamed milk is also heated to a lower temperature than in a cappuccino, resulting in a silky texture rather than a thick, foamy one.

In contrast, a traditional cappuccino typically has a 1:5 or 1:6 ratio of espresso to milk, with a thicker layer of foam on top. This means that the drink is more espresso-forward and has a stronger, more intense flavor profile.

Other key

### Dialouge

In [6]:
def conversation(messages, print_response=True):
    response = client.chat_completion(messages)
    assistant_message = response["message"]
    
    if print_response:
        print(f"Assistant: {assistant_message['content']}\n")
    
    # Add message to the dialogue
    messages.append(assistant_message)
    return messages

# Start new conversation
convo = [
    {"role": "system", "content": "You are a helpful assistant who can explain complex topics clearly."}
]

convo.append({"role": "user", "content": "What are the key differences between traditional neural networks and transformer models?"})
convo = conversation(convo)

convo.append({"role": "user", "content": "Can you give me an example where these differences matter in practice?"})
convo = conversation(convo)

convo.append({"role": "user", "content": "Which programming languages and libraries should I learn to work with these models?"})
convo = conversation(convo)

Assistant: Traditional neural networks, also known as feedforward neural networks or recurrent neural networks (RNNs), have been the backbone of deep learning for many years. However, Transformer models, introduced in 2017 by Vaswani et al., have revolutionized the field of natural language processing and beyond.

Here are the key differences between traditional neural networks and Transformer models:

1. **Sequential vs. Parallel Processing**: Traditional RNNs process input sequences sequentially, one element at a time. Transformers, on the other hand, process input sequences in parallel using self-attention mechanisms.
2. **Attention Mechanism**: Transformers use multi-head self-attention to weigh the importance of different parts of the input sequence relative to each other and the model's internal state. This allows for more flexible and context-aware processing. Traditional RNNs rely on recurrence or convolutional layers to capture long-range dependencies.
3. **Encoder-Decoder Arc

### Document Summarization


In [7]:
def summarize_document(text, max_length=200, focus_points=None):
    
    # Estimate if text fits within context ~4 char/token
    estimated_tokens = len(text) // 4  
    
    instructions = f"Summarize the following text in about {max_length} words."
    
    if focus_points:
        focus_str = ", ".join(focus_points)
        instructions += f" Focus particularly on these aspects: {focus_str}."
    
    if estimated_tokens > 3500:
        print("Warning: The text might be too long for the context window. The summary might be incomplete.")
    
    messages = [
        {"role": "system", "content": "You are an assistant that creates precise summaries, capturing the key points."},
        {"role": "user", "content": f"{instructions}\n\nDOCUMENT: {text}"}
    ]
    
    # Optimized parameters for summaries
    response = client.chat_completion(messages, temperature=0.3, top_p=0.95)
    return response["message"]["content"]

# Example: Summarize a longer text
sample_text = """
The axolotl  is a paedomorphic salamander closely related to the tiger salamander.[3][4][5] It is unusual among amphibians in that it reaches adulthood without undergoing metamorphosis. Instead of taking to the land, adults remain aquatic and gilled. The species was originally found in several lakes underlying what is now Mexico City, such as Lake Xochimilco and Lake Chalco.[1] These lakes were drained by Spanish settlers after the conquest of the Aztec Empire, leading to the destruction of much of the axolotl's natural habitat.

As of 2020, the axolotl was near extinction[6][7] due to urbanization in Mexico City and consequent water pollution, as well as the introduction of invasive species such as tilapia and perch. It is listed as critically endangered in the wild, with a decreasing population of around 50 to 1,000 adult individuals, by the International Union for Conservation of Nature and Natural Resources (IUCN) and is listed under Appendix II of the Convention on International Trade in Endangered Species (CITES).[2] Axolotls are used extensively in scientific research for their ability to regenerate limbs, gills and parts of their eyes and brains.[8] Notably, their ability to regenerate declines with age, but it does not disappear. Axolotls keep modestly growing throughout their life and some consider this trait to be a direct contributor to their regenerative abilities.[9] Further research has been conducted to examine their heart as a model of human single ventricle and excessive trabeculation.[10] Axolotls were also sold as food in Mexican markets and were a staple in the Aztec diet.[11]


"""

summary = summarize_document(sample_text, focus_points=["Animals", "Research"])
print(summary)

Here is a summary of the text, focusing on animals and research:

The axolotl, a salamander closely related to the tiger salamander, is an unusual amphibian that reaches adulthood without undergoing metamorphosis. It remains aquatic and gilled as an adult. Originally found in lakes near Mexico City, the species was nearly wiped out due to habitat destruction and urbanization. As of 2020, the axolotl population had declined to around 50-1,000 individuals, making it critically endangered.

Axolotls are highly valued for their ability to regenerate limbs, gills, eyes, and brains, making them a popular subject in scientific research. Their regenerative abilities decline with age, but do not disappear entirely. Researchers have also studied the axolotl's heart as a model for human single ventricle and excessive trabeculation.

Overall, the axolotl is an important species in both its natural habitat and in the realm of scientific inquiry.


### Extract structured data
Really useful for tooling 
##### TODO work on realiability of parsing

In [8]:
from pydantic import BaseModel
import json

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

def extract_structured_data(prompt, schema_class, temperature=0.2):
    # Lower temperature --> more precise structured outputs
    schema_dict = schema_class.model_json_schema()
    schema_str = json.dumps(schema_dict, indent=2)
    
    structured_prompt = f"""
    Extract the following information from the text and return it as valid JSON.
    Schema: {schema_str}
    
    Text: {prompt}
    
    Respond with a valid JSON object (without Markdown formatting or additional text).
    """
    
    # Request with optimal parameters for structured extraction
    messages = [
        {"role": "system", "content": "You are a precise assistant that extracts information as JSON."},
        {"role": "user", "content": structured_prompt},
    ]
    
    # Send request to API
    # response = client.chat_completion(messages, temperature=temperature)
    # response_text = response["message"]["content"]
    
    # For debugging - simulate a response for the given input
    response_text = """```json
    {
      "name": "annual Kaffekranz",
      "date": "December 31, 2042",
      "participants": ["Rick", "Morty"]
    }
    ```"""
    
    # CLEANUP response data
    clean_json = ""
    
    # Check for code blocks
    if "```json" in response_text:
        # Extract content between ```json and ```
        match = re.search(r"```json\n(.*?)```", response_text, re.DOTALL)
        if match:
            clean_json = match.group(1).strip()
    elif "```" in response_text:
        # Extract content between ``` and ```
        match = re.search(r"```\n?(.*?)```", response_text, re.DOTALL)
        if match:
            clean_json = match.group(1).strip()
    else:
        # No code blocks, use the entire response
        clean_json = response_text.strip()
    
    # Remove any comments
    json_lines = [line for line in clean_json.split('\n') if not line.strip().startswith('//')]
    clean_json = '\n'.join(json_lines)
    
    # Debug output
    print(f"Clean JSON to parse: {clean_json}")
    
    # Convert JSON to a Python object
    try:
        result = json.loads(clean_json)
        # Convert to the Pydantic model
        return schema_class(**result)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        print(f"Received JSON: {clean_json!r}")  # Use repr to see hidden characters
        
        # Additional fallback for common issues
        if clean_json:
            # Try to fix common JSON formatting issues
            try:
                # Replace single quotes with double quotes
                fixed_json = clean_json.replace("'", "\"")
                # Try parsing again
                result = json.loads(fixed_json)
                return schema_class(**result)
            except Exception:
                pass
        
        raise

event_text = """Rick and Morty are scheduled to meet for the annual Kaffekranz
at Milliways on December 31, 2042. Cakes and cats are included."""

event = extract_structured_data(event_text, CalendarEvent)
print(event)

Clean JSON to parse: {
      "name": "annual Kaffekranz",
      "date": "December 31, 2042",
      "participants": ["Rick", "Morty"]
    }
name='annual Kaffekranz' date='December 31, 2042' participants=['Rick', 'Morty']


## Notes



### Modelfile Example


In [9]:
# Run this in a terminal, not directly in the notebook

'''touch MODELFILE '''

'''

FROM llama3:8b

# Set model parameters
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER top_k 40
PARAMETER num_ctx 4096

# Define system behavior
SYSTEM """
You are a helpful assistant who provides clear, accurate, and concise answers.
You prefer structured, well-organized responses and list important information in a clear format.
"""

# Then build it with:
ollama create custom-llama3-8b -f Modelfile
'''

'\n\nFROM llama3:8b\n\n# Set model parameters\nPARAMETER temperature 0.7\nPARAMETER top_p 0.9\nPARAMETER top_k 40\nPARAMETER num_ctx 4096\n\n# Define system behavior\nSYSTEM """\nYou are a helpful assistant who provides clear, accurate, and concise answers.\nYou prefer structured, well-organized responses and list important information in a clear format.\n"""\n\n# Then build it with:\nollama create custom-llama3-8b -f Modelfile\n'