<a href="https://colab.research.google.com/github/kairamilanifitria/PurpleBox-Intern/blob/main/RAG/7_RETRIEVAL-LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RETRIEVAL-LLM

In [None]:
!pip install supabase numpy psycopg2

In [None]:
import os
import json
import torch
import uuid
import numpy as np
from supabase import create_client, Client
from transformers import AutoTokenizer, AutoModel

# Initialize Supabase
SUPABASE_URL = ""
SUPABASE_KEY = ""

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

# Load Embedding Model
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))


retrieval

In [None]:
import numpy as np
import ast
import re
from scipy.spatial.distance import cosine
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
nltk.download('all')
nltk.download('punkt')
nltk.download('stopwords')

def get_embedding(text):
    """Generates an embedding vector from input text."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().tolist()

def extract_keywords_simple(text):
    """Extracts important words from a query using simple filtering."""
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    keywords = [word for word in words if word.isalnum() and word not in stop_words]
    return keywords

def query_requires_table(user_query):
    """Determines if the query is likely asking for table data."""
    table_keywords = {"table", "data", "values", "measurements", "limits", "thresholds", "parameters", "average", "sum", "percentage"}
    return any(word in user_query.lower() for word in table_keywords)

def get_most_similar_keywords(query_keywords, top_text_chunks):
    """Extracts most relevant words from top retrieved text chunks."""
    all_text_words = set()
    for chunk in top_text_chunks:
        chunk_words = set(word_tokenize(chunk[2].lower()))  # Extract words from chunk text
        all_text_words.update(chunk_words)
    common_words = [word for word in query_keywords if word in all_text_words]
    return common_words if common_words else query_keywords  # Fallback to original keywords if no match

def query_supabase(user_query):
    """Retrieves both text and table chunks based on query, ensuring relevance balance."""
    query_embedding = np.array(get_embedding(user_query), dtype=np.float32).flatten()
    keywords = extract_keywords_simple(user_query)
    requires_table = query_requires_table(user_query)

    #### Step 1: Retrieve Text Chunks (Vector Search) ####
    response_text = supabase.table("documents").select("chunk_id, content, embedding, type, metadata").execute()
    text_results = []

    for record in response_text.data:
        chunk_embedding = ast.literal_eval(record["embedding"]) if isinstance(record["embedding"], str) else record["embedding"]
        chunk_embedding = np.array(chunk_embedding, dtype=np.float32).flatten()

        if chunk_embedding.shape == query_embedding.shape:
            similarity = 1 - cosine(query_embedding, chunk_embedding)
            text_results.append((record["chunk_id"], "text", record["content"], similarity))

    text_results.sort(key=lambda x: x[3], reverse=True)
    top_text_chunks = text_results[:3]

    #### Step 2: Expand Query Using Retrieved Text ####
    refined_keywords = get_most_similar_keywords(keywords, top_text_chunks)

    #### Step 3: Retrieve Table Chunks Using Specialized Scoring ####
    response_tables = supabase.table("tables").select("chunk_id, table_data, description, embedding, metadata").execute()
    table_results = []
    table_weight = 2.5 if requires_table else 1.5  # Increase weight dynamically

    for record in response_tables.data:
        table_embedding = ast.literal_eval(record["embedding"]) if isinstance(record["embedding"], str) else record["embedding"]
        table_embedding = np.array(table_embedding, dtype=np.float32).flatten()
        table_data = record["table_data"].lower()
        table_description = record["description"].lower()
        keyword_match_score = sum(3 if word in table_data.split(" ")[:5] else 1 for word in refined_keywords if word in table_data or word in table_description)

        if table_embedding.shape == query_embedding.shape:
            embedding_similarity = 1 - cosine(query_embedding, table_embedding)
            keyword_embedding_score = sum(1 - cosine(get_embedding(word), table_embedding) for word in refined_keywords) / max(len(refined_keywords), 1)

            final_table_score = (embedding_similarity ** 0.8) * 0.2 + (keyword_match_score ** 2.5) * 0.6 + (keyword_embedding_score ** 1.2) * 0.2

            if final_table_score > 0:
                table_results.append((record["chunk_id"], "table", record["description"], final_table_score))

    table_results.sort(key=lambda x: x[3], reverse=True)

    #### Step 4: Merge & Rank Results with Adaptive Prioritization ####
    if table_results and table_results[0][3] > 0.75:
        final_results = [table_results[0]] + text_results[:2] + table_results[1:2] + text_results[2:]
    else:
        final_results = text_results[:3] + table_results[:2]  # Natural sorting if no table is required

    return final_results[:5]  # Return top 5 most relevant results

llm function

In [None]:
import openai

# OpenAI API Key
OPENAI_API_KEY = ""
openai.api_key = OPENAI_API_KEY

# Function to call OpenAI LLM with chat history
def call_openai_llm(user_query, retrieved_chunks, chat_history=[]):
    """Send the query along with retrieved context and chat history to OpenAI API."""

    # Prepare context from retrieved chunks
    context_text = "\n\n".join([f"Chunk {i+1}: {chunk[2]}" for i, chunk in enumerate(retrieved_chunks)])

    # Construct messages for conversational memory
    messages = [
        {"role": "system", "content": "You are an intelligent assistant. Use the following retrieved information to answer the user's query."},
    ]

    # Append chat history
    messages.extend(chat_history)

    # Append current query with retrieved context
    messages.append({"role": "user", "content": f"Context:\n{context_text}\n\nUser's Question: {user_query}"})

    # Call OpenAI's Chat API with the new format
    client = openai.OpenAI(api_key=openai.api_key)  # Ensure you are using the new client-based API
    response = client.chat.completions.create(
        model="gpt-4-turbo",  # You can change this to another OpenAI model
        messages=messages,
        temperature=0.7
    )

    answer = response.choices[0].message.content  # Adjusted based on the new API response format

    # Append response to chat history
    chat_history.append({"role": "user", "content": user_query})
    chat_history.append({"role": "assistant", "content": answer})

    return answer, chat_history

# TEST

retrieving chunks only

In [None]:
user_query = "usage limit"

retrieved_chunks = query_supabase(user_query)

for chunk in retrieved_chunks:
    print(f"Chunk ID: {chunk[0]}\nType: {chunk[1]}\nContent: {chunk[2][:300]}...\nRelevance: {chunk[3]:.4f}\n")

Chunk ID: a551dee1-3d8e-485d-b25e-769ee09d5b20
Type: table
Content: 2.5. Limiti Di Impiego
Tabella 1. Limiti di impiego
Alimentazione elettrica: Temperatura acqua ingresso batteria, 220 - 240 V / 50 Hz: 5 - 70 °C | Alimentazione elettrica: Temperatura ripresa aria, 220 - 240 V / 50 Hz: 10 - 35 °C | Alimentazione elettrica: Umidità relativa ripresa aria, 220 - 240 V ...
Relevance: 0.8898

Chunk ID: 8843584e-ca30-4846-b7cf-234dfabb403b
Type: text
Content: ## 2.5. Limiti Di Impiego
Tabella 1. Limiti di impiego...
Relevance: 0.8015

Chunk ID: 76d151bc-6a7a-442e-b924-981cc588e5b2
Type: text
Content: ## 2.5. Limiti Di Impiego
Si consiglia di far lavorare la macchina agli estremi dei suddetti limiti di impiego solo per brevi periodi, perché il funzionamento per lunghi periodi può ridurre la normale durata dei componenti....
Relevance: 0.7723

Chunk ID: 8941c9c0-9dc7-44c4-bf17-a05a40e824ff
Type: table
Content: Indice

...
Relevance: 0.8775

Chunk ID: 61b9d9e2-6e2e-4430-ba82-46680cca3884
Type: 

In [None]:
user_query = "what are the usage limit?"

retrieved_chunks = query_supabase(user_query)

for chunk in retrieved_chunks:
    print(f"Chunk ID: {chunk[0]}\nType: {chunk[1]}\nContent: {chunk[2][:300]}...\nRelevance: {chunk[3]:.4f}\n")

Chunk ID: a551dee1-3d8e-485d-b25e-769ee09d5b20
Type: table
Content: 2.5. Limiti Di Impiego
Tabella 1. Limiti di impiego
Alimentazione elettrica: Temperatura acqua ingresso batteria, 220 - 240 V / 50 Hz: 5 - 70 °C | Alimentazione elettrica: Temperatura ripresa aria, 220 - 240 V / 50 Hz: 10 - 35 °C | Alimentazione elettrica: Umidità relativa ripresa aria, 220 - 240 V ...
Relevance: 0.9077

Chunk ID: 76d151bc-6a7a-442e-b924-981cc588e5b2
Type: text
Content: ## 2.5. Limiti Di Impiego
Si consiglia di far lavorare la macchina agli estremi dei suddetti limiti di impiego solo per brevi periodi, perché il funzionamento per lunghi periodi può ridurre la normale durata dei componenti....
Relevance: 0.8683

Chunk ID: 64f46084-3a66-425c-8eb7-86b9ddd27c9c
Type: text
Content: ## 2.2. Usi Non Previsti E Controindicazioni
Non sono ammesse le seguenti applicazioni: - · Funzionamento all'aperto - · Funzionamento in ambienti umidi o esplosivi o polverosi - · Funzionamento in ambienti corrosivi, in particol

add the llm response

In [None]:
# Example usage
user_query = "what are the usage limit of the unit?"
retrieved_chunks = query_supabase(user_query)
chat_history = []  # Store conversation history

if retrieved_chunks:
    response, chat_history = call_openai_llm(user_query, retrieved_chunks, chat_history)
    print("\n🔹 Chatbot Response:\n", response)
else:
    print("No relevant information found.")


🔹 Chatbot Response:
 The usage limits of the unit, as outlined in Chunk 1 under "2.5. Limiti Di Impiego," include the following specifications:

- **Alimentazione elettrica: Temperatura acqua ingresso batteria, 220 - 240 V / 50 Hz**: 5 - 70 °C
- **Alimentazione elettrica: Temperatura ripresa aria, 220 - 240 V / 50 Hz**: 10 - 35 °C
- **Alimentazione elettrica: Umidità relativa ripresa aria, 220 - 240 V / 50 Hz**: 10 - 70 %
