# **1. Install ollama using shell, no need to use xterm or something!**

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

In [5]:
import os
# Make sure the system NVIDIA libraries and CUDA libraries are preferred
os.environ.update({
    'LD_LIBRARY_PATH': '/usr/lib64-nvidia:/usr/local/cuda/lib64'
})


In [None]:
!nohup ollama serve &

# **2. Install aisuite and run models you like to test**

In [None]:
# Install AI Suite
!pip install aisuite[ollama]
!pip install gradio

In [10]:
import aisuite as ai

# Initialize the AI client for accessing the language model
def ask(message, sys_message="You are a helpful agent.", model="ollama:llama3.1:8b"):
    # Initialize the AI client for accessing the language model
    client = ai.Client()

    # Construct the messages list for the chat
    messages = [
        {"role": "system", "content": sys_message},
        {"role": "user", "content": message}
    ]

    # Send the messages to the model and get the response
    response = client.chat.completions.create(model=model, messages=messages)

    # Return the content of the model's response
    return response.choices[0].message.content

In [None]:
!ollama pull llama3.2:1b
!ollama pull phi3
!ollama pull smollm2:135m
!ollama pull tinyllama:latest
!ollama pull gemma2:2b

models = [
  'llama3.2:1b',
  'smollm2:135m',
  'phi3',
  'tinyllama:latest',
  'gemma2:2b'
]

# **3. Raw output as simple text**

In [None]:
# Initialize a list to hold the responses from each model
res = []

# Loop through each model and get a response for the specified question
for x in models:
    res.append(ask('Write a short one sentence explanation of the origins of AI?', model=f'ollama:{x}'))

# Print the model's name and its corresponding response
for idx, x in enumerate(res):
    print(models[idx] + ': \n ' + x + ' ' + '\n\n')

# **4. Cleaner output using Gradio**

In [None]:
import gradio as gr

models = [
    'llama3.2:1b',
    'smollm2:135m',
    'phi3',
    'tinyllama:latest',
    'gemma2:2b'
]

def get_model_response(model):
    # Replace this with your actual function call, e.g.:
    # return ask('Write a short one sentence explanation of the origins of AI?', model=f'ollama:{model}')
    return ask('Write a short one sentence explanation of the origins of AI?', model=f'ollama:{model}')

with gr.Blocks() as demo:
    gr.Markdown("## Model Responses")
    with gr.Tabs():
        for model in models:
            with gr.TabItem(label=model):
                response = get_model_response(model)
                gr.Markdown(response)

demo.launch()
