In [6]:
from openai import OpenAI

class ChatModel:
    def __init__(self, base_url, key):
        self.client = OpenAI(
            base_url=base_url,
            api_key=key,
        )

    def chat_completion(self, model, messages):
        response = self.client.chat.completions.create(
            model=model,
            messages=messages
        )
        return response

BASE_URL = "http://localhost:11434/v1"  # Default local URL for Ollama
chatModel = ChatModel(base_url=BASE_URL, key="fake-key")  # Key is required but not used by Ollama

messages = [
    {"role": "system", "content": "You are a Jetson-based assistant."},
    {"role": "user", "content": "How can I optimize GPU usage on a Jetson Nano?"},
    {"role": "assistant", "content": "Use TensorRT for inference and disable services you don't need."},
    {"role": "user", "content": "Got it, thanks!"}
]

response = chatModel.chat_completion(model="llama3.2:latest", messages=messages)
print(response.choices[0].message.content)

If you're looking to get more specific, here are some tips for optimizing GPU usage on a Jetson Nano:

1. **Monitor GPU temps**: High temperatures can limit GPU performance. Use tools like `temp` command or `lspci -vnn` to monitor temperatures.
2. **Adjust VRAM allocation**: You may need to adjust the amount of VRAM allocated to each process using the `jetson-config` tool.
3. **Compile with Optimize flags**: Compile your applications with optimization flags (-Wl,--gc-sections=-O2) and strip unused symbols.
4. **Use less memory-intensive algorithms**: If possible, use algorithms that are more memory-efficient in the first place.
5. **Dust-gate removal**: Remove dust from the heatsinks to ensure good airflow inside the device.
6. **Keep your system up-to-date**: Regularly update your Jetson OS and drivers to take advantage of performance improvements.

Keep in mind, the Jetson Nano is an embedded platform with limited resources, so GPU optimization might not lead to dramatic performance 

In [None]:
import requests

class ChatModel:
    def __init__(self, base_url):
        self.base_url = base_url

    def chat_completion(self, model, messages):
        response = requests.post(
            f"{self.base_url}/chat/completions",
            json={"model": model, "messages": messages}
        )
        return response.json()

BASE_URL = "http://localhost:11434/v1"
chatModel = ChatModel(base_url=BASE_URL)

# messages = [
#     {"role": "system", "content": "You are a Jetson-based assistant."},
#     {"role": "user", "content": "How can I optimize GPU usage on a Jetson Nano?"}
# ]

messages = [
    {"role": "system", "content": "You are a helpful AI assistant."},
    {"role": "user", "content": "What is the capital of Japan?"}
]

# response = chatModel.chat_completion(model="llama3.2:latest", messages=messages)
# response = chatModel.chat_completion(model="gemma3:27b", messages=messages)
response = chatModel.chat_completion(model="falcon:7b", messages=messages)
print(response["choices"][0]["message"]["content"])


KeyError: 'choices'

In [4]:
import requests

class ChatModel:
    def __init__(self, base_url):
        self.base_url = base_url

    def chat_completion(self, model, messages):
        response = requests.post(
            f"{self.base_url}/chat/completions",
            json={"model": model, "messages": messages}
        )
        return response.json()

BASE_URL = "http://localhost:11434/v1"
chatModel = ChatModel(base_url=BASE_URL)

messages = [
    {"role": "system", "content": "You are an experienced chemist specializing in molecular property prediction."},
    {"role": "user", "content": "Please come up with 20/30 rules that are important to predict blood–brain barrier permeability."}
]

response = chatModel.chat_completion(model="falcon:7b", messages=messages)
print(response["choices"][0]["message"]["content"])


1. Blood-brain barrier permeability should be evaluated in animal and preclinical models with in vivo imaging techniques. 
2. The brain blood-plasma concentration ratios for small molecules should approximate an aqueous solubility value. 
3. Blood-brain barrier permeability of molecules should correlate with their degree of brain-specific delivery.
4. Routes of administration that affect blood-brain barrier delivery should be analyzed.
5. Blood-brain barrier permeability of drugs should be evaluated prior to the clinical trial.
6. Blood-brain barrier permeability of drugs should be correlated with in vivo data from clinical trials.
7. Blood-brain barrier permeability of molecules should be analyzed in human models.
8. Blood-brain barrier permeability of small molecules should be evaluated in animal models without brain expression.
9. Blood-brain barrier permeability should be predicted using in vitro molecular modeling approaches.
10. Blood-brain barrier permeability of small molecules