<a href="https://colab.research.google.com/github/koushiksr/rag-tutorial-v2-ollama/blob/main/private_assistant_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Ollama (if not installed)
!curl -sSL https://ollama.com/install.sh | bash

# Run Ollama server in the background
!nohup ollama serve &> /dev/null &

# Verify if Ollama is running
!ps aux | grep ollama


>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
root        5259  0.0  0.2 1684672 28076 ?       Sl   16:22   0:00 ollama serve
root        5266  0.0  0.0   7376  3524 ?        S    16:22   0:00 /bin/bash -c ps aux | grep ollama
root        5268  0.0  0.0   6484  2316 ?        S    16:22   0:00 grep ollama


In [None]:
!ollama pull llama3.1:8b

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest 
pulling 8eeb52dfb3bb...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 8eeb52dfb3bb...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 8eeb52dfb3bb...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 8eeb52df

In [None]:
!pip install ollama hashlib scikit-learn numpy gradio

[31mERROR: Ignored the following yanked versions: 20081119[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement hashlib (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for hashlib[0m[31m
[0m

In [None]:
import gradio as gr
import ollama
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import hashlib

# Initialize the user's chat session and store history
user_sessions = {}
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # Use a pre-trained transformer model for embeddings

def initialize_user_session(user_id):
    """Initialize a session for a new user."""
    if user_id not in user_sessions:
        user_sessions[user_id] = {
            "chat_history": [],
            "personal_info": {},  # Store multiple personal details
            "vector_db": {},  # Store message embeddings and content as a dictionary
        }

def get_chat_history(user_id):
    """Retrieve the chat history for a specific user."""
    return user_sessions.get(user_id, {}).get("chat_history", [])

def update_chat_history(user_id, user_message, ai_response):
    """Update the chat history for a specific user."""
    if user_id not in user_sessions:
        initialize_user_session(user_id)
    user_sessions[user_id]["chat_history"].append({"user": user_message, "ai": ai_response})

def set_personal_info(user_id, info_key, info_value):
    """Store personal information for a specific user."""
    if user_id not in user_sessions:
        initialize_user_session(user_id)
    user_sessions[user_id]["personal_info"][info_key] = info_value

def get_personal_info(user_id, info_key=None):
    """Retrieve specific personal information for a user."""
    user_data = user_sessions.get(user_id, {}).get("personal_info", {})
    if info_key:
        return user_data.get(info_key, None)
    return user_data

def vectorize_message(message):
    """Convert a message into a vector using sentence embeddings."""
    return embedding_model.encode([message])[0]  # Get the vector for the message

def store_vector_for_user(user_id, user_message, ai_response):
    """Store user and assistant messages as vectors in the vector database."""
    user_vector = vectorize_message(user_message)
    ai_vector = vectorize_message(ai_response)

    # Store both user and assistant vectors with their corresponding messages
    user_vector_key = hashlib.sha256(user_message.encode()).hexdigest()  # Unique key for the user message
    ai_vector_key = hashlib.sha256(ai_response.encode()).hexdigest()  # Unique key for the assistant response

    user_sessions[user_id]["vector_db"][user_vector_key] = (user_vector, user_message, "user")
    user_sessions[user_id]["vector_db"][ai_vector_key] = (ai_vector, ai_response, "assistant")

def get_relevant_vectors(user_id, query_message):
    """Retrieve the most relevant vectors (messages) from the vector database using cosine similarity."""
    query_vector = vectorize_message(query_message)
    if query_vector is None or query_vector.shape[0] == 0:
        raise ValueError("Query vector is empty or invalid.")

    query_vector = query_vector.reshape(1, -1)  # Reshape to 2D array (1 sample, vector_dim)

    relevant_responses = []

    # Ensure user session has vectors stored
    if user_id not in user_sessions or not user_sessions[user_id]["vector_db"]:
        return relevant_responses  # No vectors to compare with, return empty list

    # Get all vectors and messages from the vector DB
    vectors = [entry[0] for entry in user_sessions[user_id]["vector_db"].values()]
    messages = [entry[1] for entry in user_sessions[user_id]["vector_db"].values()]

    # Compute cosine similarity
    similarities = cosine_similarity(query_vector, vectors)[0]  # Cosine similarity expects 2D arrays

    # Find the top 3 most similar messages
    top_indices = similarities.argsort()[-3:][::-1]  # Get indices of the top 3 similar messages

    for idx in top_indices:
        relevant_responses.append(messages[idx])

    return relevant_responses

def generate_response_with_ollama(user_id, user_message):
    """Generate a response using the Llama 3.2 1B model via Ollama."""

    # Initialize user session if it's a new user
    initialize_user_session(user_id)

    # Get the user's chat history and format it for the prompt
    chat_history = get_chat_history(user_id)
    messages = []

    # Add system prompt for concise answers
    messages.append({"role": "system", "content": "your a private assistant for individual users, you have to answer and remember user details for later reference"})

    # Add chat history to the message list (ensure correct message structure)
    for entry in chat_history:
        messages.append({"role": "user", "content": entry['user']})
        messages.append({"role": "assistant", "content": entry['ai']})

    # Retrieve relevant context (RAG) based on the query
    relevant_context = get_relevant_vectors(user_id, user_message)
    context_message = "\n".join(relevant_context) if relevant_context else "No relevant context found."

    # Retrieve the user’s name from personal info
    user_name = get_personal_info(user_id, "name")
    if user_name:
        # If name is stored, use it in the response
        personalized_message = f"Hello {user_name}, {user_message}. Here's some context for you: {context_message}"
    else:
        # If no name is stored, ask the user for it or handle the special case for 'remember my name'
        if "remember my name" in user_message.lower():
            user_name = user_message.split("remember my name")[-1].strip()  # Extract the name
            set_personal_info(user_id, "name", user_name)
            personalized_message = f"Got it, {user_name}! How can I assist you today? Here's some context: {context_message}"
        elif "can you tell me my name" in user_message.lower():
            # If the user asks for their name, and it's not stored, ask them to provide it
            personalized_message = f"I don’t know your name yet, but if you'd like to share it, I can remember it for next time."
        else:
            personalized_message = f"{user_message}. Here's some context: {context_message}"

    # Check if additional personal details like favorite color or place are provided
    if "my favorite color" in user_message.lower():
        color = user_message.split("my favorite color is")[-1].strip()
        set_personal_info(user_id, "favorite_color", color)
        personalized_message = f"Got it! I’ll remember your favorite color as {color}. Here's some context: {context_message}"

    if "I live in" in user_message.lower():
        place = user_message.split("I live in")[-1].strip()
        set_personal_info(user_id, "location", place)
        personalized_message = f"Got it! I’ll remember that you live in {place}. Here's some context: {context_message}"

    # Add the new user message (also ensure correct structure)
    messages.append({"role": "user", "content": personalized_message})

    # Call Ollama's chat function with the Llama 3.2 1B model
    response = ollama.chat(model="llama3.1:8b", messages=messages)

    # Extract the AI's response from the message content
    ai_response = response.message.content  # Access the content of the response

    # Store the user and assistant messages in the vector database
    store_vector_for_user(user_id, user_message, ai_response)

    # Update chat history with the new response
    update_chat_history(user_id, personalized_message, ai_response)

    return ai_response

# Gradio Interface Setup
def chatbot_interface(username, question):
    user_id = username
    response = generate_response_with_ollama(user_id, question)
    return response

# Define the Gradio interface with a submit button
iface = gr.Interface(
    fn=chatbot_interface,
    inputs=[gr.Textbox(label="Enter your username"),
            gr.Textbox(label="Ask a question")],
    outputs=gr.Textbox(label="Answer"),
    live=False  # Disable live mode to use the submit button
)

# Launch the interface
iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3ed0343d7202a1b562.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


