In [1]:
#! curl -fsSL https://ollama.ai/install.sh | sh
#! ollama pull llama3
! ollama serve
!pip install requests fastapi



In [1]:
import requests
import json
import time

# Ollama API endpoint (default local)
OLLAMA_URL = "http://localhost:11434"

def check_ollama_status():
    """Check if Ollama server is running"""
    try:
        response = requests.get(f"{OLLAMA_URL}/api/version")
        if response.status_code == 200:
            print(f"✅ Ollama is running - Version: {response.json().get('version', 'unknown')}")
            return True
        else:
            print("❌ Ollama server not responding")
            return False
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to Ollama. Make sure it's running on localhost:11434")
        return False

def list_models():
    """List available models"""
    try:
        response = requests.get(f"{OLLAMA_URL}/api/tags")
        if response.status_code == 200:
            models = response.json().get('models', [])
            print("Available models:")
            for model in models:
                print(f"  - {model['name']} (Size: {model.get('size', 'unknown')})")
            return models
        else:
            print("Failed to fetch models")
            return []
    except Exception as e:
        print(f"Error fetching models: {e}")
        return []

# Check status
check_ollama_status()
list_models()

✅ Ollama is running - Version: 0.12.0
Available models:
  - llama3.2:latest (Size: 2019393189)


[{'name': 'llama3.2:latest',
  'model': 'llama3.2:latest',
  'modified_at': '2025-08-07T12:25:35.7302587+05:30',
  'size': 2019393189,
  'digest': 'a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72',
  'details': {'parent_model': '',
   'format': 'gguf',
   'family': 'llama',
   'families': ['llama'],
   'parameter_size': '3.2B',
   'quantization_level': 'Q4_K_M'}}]

In [2]:
def chat_with_llama3(prompt, model="llama3.2", stream=False):
    """
    Send a prompt to Llama3 and get response
    
    Args:
        prompt (str): The prompt to send
        model (str): Model name (default: llama3)
        stream (bool): Whether to stream response
    
    Returns:
        str: The response from the model
    """
    url = f"{OLLAMA_URL}/api/generate"
    
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": stream
    }
    
    try:
        response = requests.post(url, json=payload)
        
        if response.status_code == 200:
            if stream:
                # Handle streaming response
                full_response = ""
                for line in response.iter_lines():
                    if line:
                        chunk = json.loads(line.decode('utf-8'))
                        if 'response' in chunk:
                            print(chunk['response'], end='', flush=True)
                            full_response += chunk['response']
                        if chunk.get('done', False):
                            break
                print()  # New line at the end
                return full_response
            else:
                # Handle non-streaming response
                result = response.json()
                return result.get('response', 'No response received')
        else:
            return f"Error: {response.status_code} - {response.text}"
            
    except Exception as e:
        return f"Error: {e}"

# Test the function
response = chat_with_llama3("Hello! Can you tell me a joke?")
print("Response:", response)

Response: Here's one:

What do you call a fake noodle?

(wait for it...)

An impasta!

Hope that made you laugh! Do you want to hear another one?


In [3]:
def streaming_chat(prompt, model="llama3"):
    """Chat with streaming response for real-time output"""
    print(f"🤖 Llama3: ", end='', flush=True)
    response = chat_with_llama3(prompt, model, stream=True)
    return response

# Example with streaming
streaming_response = streaming_chat("Explain quantum computing in simple terms.", model="llama3.2")

🤖 Llama3: Quantum computing is a new way of processing information that's different from the way computers we use today.

**Classical Computing**

In classical computing, like your smartphone or laptop, information is processed using bits (0s and 1s). Bits are like light switches - they're either on (1) or off (0).

**Quantum Computing**

In quantum computing, information is processed using qubits (quantum bits). Qubits are special because they can be both on (1) AND off (0) at the same time! This means that qubits can process a huge number of possibilities simultaneously.

Think of it like trying to find a specific book in a library. With classical computing, you have to look through each book one by one. But with quantum computing, you can use a special machine that can look at all the books at the same time and find the right one instantly!

**How Does It Work?**

Quantum computers use tiny particles like atoms or photons to represent information. These particles can exist in multip

In [8]:
!pip install langchain langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting requests<3,>=2 (from langchain)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community)
  Downloading aiohttp-3.12.15-cp312-cp312-win_amd64.whl.metadata (7.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.10.1 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Dow

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.19.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3, but you have protobuf 6.31.1 which is incompatible.
tensorflow-intel 2.17.0 requires ml-dtypes<0.5.0,>=0.3.1, but you have ml-dtypes 0.5.3 which is incompatible.
tensorflow-intel 2.17.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 6.31.1 which is incompatible.
tensorflow-intel 2.17.0 requires tensorboard<2.18,>=2.17, but you have tensorboard 2.19.0 which is incompatible.


In [9]:
# set-up langchain to use ollama - llama3.2
# #from langchain.llms import Ollama

# llm = Ollama(
#     model="llama3.2",
#     base_url="http://localhost:11434"
# )
# llm
# Use with GraphRAG frameworks
# graph_rag = GraphRAG(llm=llm, ...)

from langchain_ollama import ChatOllama

llm = ChatOllama(model='llama3.2', base_url='http://localhost:11434')
llm

ChatOllama(model='llama3.2', base_url='http://localhost:11434')

In [11]:
import torch
 
print(f"PyTorch version {torch.__version__}")
if torch.cuda.is_available():
    print("CUDA GPU")
elif torch.mps.is_available():
    print("Apple Silicon GPU")
else:
    print("Only CPU")

PyTorch version 2.8.0+cpu
Only CPU


In [12]:
!pip install neo4j langchain-neo4j

Collecting langchain-neo4j
  Downloading langchain_neo4j-0.5.0-py3-none-any.whl.metadata (4.5 kB)
Collecting neo4j-graphrag<2.0.0,>=1.9.0 (from langchain-neo4j)
  Downloading neo4j_graphrag-1.9.1-py3-none-any.whl.metadata (18 kB)
Collecting json-repair<0.45.0,>=0.44.1 (from neo4j-graphrag<2.0.0,>=1.9.0->langchain-neo4j)
  Downloading json_repair-0.44.1-py3-none-any.whl.metadata (12 kB)
Collecting numpy<3.0.0,>=2.0.0 (from neo4j-graphrag<2.0.0,>=1.9.0->langchain-neo4j)
  Using cached numpy-2.3.2-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pypdf<6.0.0,>=5.1.0 (from neo4j-graphrag<2.0.0,>=1.9.0->langchain-neo4j)
  Downloading pypdf-5.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting types-pyyaml<7.0.0.0,>=6.0.12.20240917 (from neo4j-graphrag<2.0.0,>=1.9.0->langchain-neo4j)
  Downloading types_pyyaml-6.0.12.20250822-py3-none-any.whl.metadata (1.7 kB)
Collecting numpy<3.0.0,>=2.0.0 (from neo4j-graphrag<2.0.0,>=1.9.0->langchain-neo4j)
  Downloading numpy-2.2.6-cp312-cp312-win_amd64

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.19.0 requires numpy<2.2.0,>=1.26.0, but you have numpy 2.2.6 which is incompatible.
tensorflow 2.19.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3, but you have protobuf 6.31.1 which is incompatible.
tensorflow-intel 2.17.0 requires ml-dtypes<0.5.0,>=0.3.1, but you have ml-dtypes 0.5.3 which is incompatible.
tensorflow-intel 2.17.0 requires numpy<2.0.0,>=1.26.0; python_version >= "3.12", but you have numpy 2.2.6 which is incompatible.
tensorflow-intel 2.17.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 6.31.1 which is incompatible.
tensorflow-intel 2.17.0 requires tensorboard<2.18,>=2.17, but you have tensorboard 2.19

In [13]:
!docker run \
    --name neo4j \
    -p7474:7474 -p7687:7687 \
    -d \
    -e NEO4J_AUTH=neo4j/password \
    -e NEO4J_PLUGINS='["apoc"]' \
    neo4j:latest

6180f36d35348ce09447c32a77ecb0c68e9fd9760383da09181ffc9630f67c14


In [13]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128

Looking in indexes: https://download.pytorch.org/whl/cu128
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu128/torchvision-0.23.0%2Bcu128-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torch
  Downloading https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl.metadata (29 kB)
Downloading https://download.pytorch.org/whl/cu128/torchvision-0.23.0%2Bcu128-cp312-cp312-win_amd64.whl (7.5 MB)
   ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.5 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.5 MB 882.6 kB/s eta 0:00:08
   -- ------------------------------------- 0.5/7.5 MB 8

  You can safely remove it manually.

[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
