# All imports and inits

In [None]:
import pandas as pd
import glob
import concurrent.futures
import gradio as gr
import numpy as np
import textwrap
import PyPDF2
import requests
import os
import pinecone
import time
import pickle
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from tkinter import scrolledtext, messagebox
from transformers import AutoModel, AutoTokenizer
from typing import List, Tuple
from openai import OpenAI
from langchain_community.retrievers import PineconeHybridSearchRetriever
from pinecone_text.sparse import BM25Encoder
from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion, VectorType
from pinecone import Index


# import voyageai


# Important: Import pinecone-client properly

# Load environment variables from .env file

load_dotenv()


DATA_PATH = os.getenv("DATA_PATH")

PINECONE_API = os.getenv("PINECONE_API")

# PINECONE_ENV = os.getenv("PINECONE_ENV")

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
NVIDIA_API = os.getenv("NVIDIA_API")

# NVidia Embedding import
client = OpenAI(
    api_key=NVIDIA_API,
    base_url="https://integrate.api.nvidia.com/v1",
)

"""
Input:
    - Context window: 128K
Ouput:
    - Output Max Tokens: 32,768

"""


def track_time(func):
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        end = time.perf_counter()
        print(f"[Time Tracker] `{func.__name__}` took {end - start:.4f} seconds")
        return result

    return wrapper


# Initialize BM25 encoder once and fit it on your corpus
bm25_encoder = BM25Encoder()


# Init Pinecone

In [None]:
pc = Pinecone(api_key=PINECONE_API)
# print(PINECONE_API)


# Embedding Functions (Dense & Sparse)



In [49]:
# Connect to the index
# index = pc.Index("ai-coach")

index = pc.Index("hybrid-search-ai-coach")


# embedding_model = AutoModel.from_pretrained(
#     'jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)


# # Function to generate embeddings without tokenization
# def get_embedding(data):
#     embeddings = embedding_model.encode(data).tolist()
#     return embeddings


@track_time
def get_dense_embedding(text="None"):
    response = client.embeddings.create(
        input=text,
        model="nvidia/nv-embed-v1",
        encoding_format="float",
        extra_body={"input_type": "query", "truncate": "NONE"},
    )
    return response.data[0].embedding


# get_dense_embedding()
@track_time
def get_sparse_embedding(text="None"):
    # # applying tfidf values on sentences
    # Load the pre-trained BM25 model
    # with open("bm25_model.pkl", "rb") as f:
    #     bm25_encoder = pickle.load(f)

    bm25_encoder = BM25Encoder()
    bm25_encoder.fit(text)
    sparse_vector = bm25_encoder.encode_documents(text)
    # Access indices and values
    return sparse_vector


print(get_sparse_embedding("Surgical Conscience"))


  0%|          | 0/19 [00:00<?, ?it/s]

[Time Tracker] `get_sparse_embedding` took 0.0060 seconds
{'indices': [1720271631, 1084069656], 'values': [0.3225806451612903, 0.3225806451612903]}


## Query Pinecone (Method 1 without alpha)


In [48]:
# Function to query Pinecone index using embeddings
@track_time
# Query Pinecone for the most relevant chunks based on the query
def query_pinecone(index, query):
    dense_vector = get_dense_embedding(query)  # Dense query embedding
    sparse_vector = get_sparse_embedding(query)  # Sparse query embedding

    # Perform the hybrid search with both dense and sparse embeddings
    query_result = index.query(
        vector=dense_vector,  # Dense vector for the query
        sparse_vector=sparse_vector,  # Sparse vector for the query
        top_k=2,  # Fetch top K results
        include_metadata=True,  # Include metadata in the results
    )

    return query_result


query_pinecone(index, "Surgical Conscience")


[Time Tracker] `get_dense_embedding` took 0.8131 seconds
[Time Tracker] `get_sparse_embedding` took 0.0010 seconds
[Time Tracker] `query_pinecone` took 2.2345 seconds


{'matches': [{'id': 'vec_357',
              'metadata': {'chunk_id': 0.0,
                           'file_type': 'excel',
                           'source': 'D:\\Disrupt Labs\\Rag '
                                     'Experiments\\env\\Rag-pipelines-experiments\\Surgical '
                                     'Technologist Bootcamp\\Course 2 - '
                                     'Diagnostics, Infection Control and '
                                     'Sterilization\\Assessments\\Exam 02.xlsx',
                           'text': 'Bank Name: 9 Question: The ethical and '
                                   'professional motivation that regulates '
                                   "one's behaviors, specifically reporting "
                                   'breaks in sterile technique is known as '
                                   '___________ Randomization (Yes/No): Yes '
                                   'Correct Answer Number: 3 Answer 1: '
                             

## Query Pinecone (Method 2 with alpha) -- Giving Validation Errors skipping for now due to shortage of time


In [None]:
# @track_time
# def query_pinecone(query: str, embeddings):
#     """
#     Queries the Pinecone hybrid search index using both dense and sparse embeddings.
#     Returns the top-k matching documents from the index.
#     """
#     # Initialize the hybrid search retriever

#     retriever = PineconeHybridSearchRetriever(
#         embeddings=embeddings,
#         embedding_model="nvidia/nv-embed-v1",  # Dense model for generating embeddings
#         sparse_model="bm25_endoder",  # BM25 for sparse embedding
#         index=index,
#         hybrid_search_top_k=5,  # Number of results you want from the hybrid search
#         sparse_weight=0.5,  # Adjust the weight of the sparse (BM25) embeddings
#         dense_weight=0.5,  # Adjust the weight of the dense embeddings
#     )

#     # Perform the hybrid search query
#     results = retriever.invoke(query)

#     # Format and return results
#     return [
#         {
#             "id": result.id,
#             "score": result.score,
#             "content": result.metadata.get("content", ""),
#         }
#         for result in results
#     ]


# # Example query
# user_query = "What is the latest AI technology?"
# embeddings = get_dense_embedding()
# results = query_pinecone(user_query, embeddings)

# # Print results
# for res in results:
#     print(
#         f"ID: {res['id']}, Score: {res['score']}, Content: {res['content'][:200]}..."
#     )  # Show first 200 characters


## Query Pinecone (Method 3 with alpha) 
### - provides more control over how the dense and sparse embeddings are combined


In [53]:
def hybrid_score_norm(dense, sparse, alpha: float):
    """Hybrid score using a convex combination

    alpha * dense + (1 - alpha) * sparse

    Args:
        dense: Array of floats representing
        sparse: a dict of `indices` and `values`
        alpha: scale between 0 and 1
    """
    if alpha < 0 or alpha > 1:
        raise ValueError("Alpha must be between 0 and 1")
    hs = {
        "indices": sparse["indices"],
        "values": [v * (1 - alpha) for v in sparse["values"]],
    }
    return [v * alpha for v in dense], hs


"""
----------------------- Example Usage: ----------------------------------------------------

sparse_vector = {
   'indices': [10, 45, 16],
   'values':  [0.5, 0.5, 0.2]
}
dense_vector = [0.1, 0.2, 0.3]


# Function to query Pinecone index using embeddings
@track_time
# Query Pinecone for the most relevant chunks based on the query
def query_pinecone(index, query):
    dense_vector = get_dense_embedding(query)  # Dense query embedding
    sparse_vector = get_sparse_embedding(query)  # Sparse query embedding

    # Perform the hybrid search with both dense and sparse embeddings
    query_result = index.query(
        vector=dense_vector,  # Dense vector for the query
        sparse_vector=sparse_vector,  # Sparse vector for the query
        top_k=20,  # Fetch top K results
        include_metadata=True,  # Include metadata in the results
    )

    return query_result


query_pinecone(index, "Surgical Conscience")
"""


# query_response = index.query(
#     namespace="",
#     top_k=20,
#     vector=hdense,
#     sparse_vector=hsparse,
# )
# print(query_response)


@track_time
# Query Pinecone for the most relevant chunks based on the query
def query_pinecone(index, query, alpha=0.75):
    dense_vector = get_dense_embedding(query)
    sparse_vector = get_sparse_embedding(query)
    hdense, hsparse = hybrid_score_norm(dense_vector, sparse_vector, alpha=alpha)
    # Perform the hybrid search with both dense and sparse embeddings
    query_result = index.query(
        vector=hdense,  # Dense vector for the query
        sparse_vector=hsparse,  # Sparse vector for the query
        top_k=1000,  # Fetch top K results
        include_metadata=True,  # Include metadata in the results
    )

    return query_result


print(query_pinecone(index, "Surgical Conscience", alpha=1))


[Time Tracker] `get_dense_embedding` took 0.9654 seconds


  0%|          | 0/19 [00:00<?, ?it/s]

[Time Tracker] `get_sparse_embedding` took 0.0050 seconds
[Time Tracker] `query_pinecone` took 1.3816 seconds
{'matches': [{'id': 'vec_357',
              'metadata': {'chunk_id': 0.0,
                           'file_type': 'excel',
                           'source': 'D:\\Disrupt Labs\\Rag '
                                     'Experiments\\env\\Rag-pipelines-experiments\\Surgical '
                                     'Technologist Bootcamp\\Course 2 - '
                                     'Diagnostics, Infection Control and '
                                     'Sterilization\\Assessments\\Exam 02.xlsx',
                           'text': 'Bank Name: 9 Question: The ethical and '
                                   'professional motivation that regulates '
                                   "one's behaviors, specifically reporting "
                                   'breaks in sterile technique is known as '
                                   '___________ Randomization (Yes/No)

# Query Groq Inference

In [None]:
from groq import Groq
# Function to query Groq LLM
# def query_groq(prompt: str) -> str:
#     response = requests.post(
#         GROQ_CHAT_URL,
#         headers=GROQ_HEADERS,
#         json={
#             "model": LLM_MODEL,
#             "messages": [{"role": "user", "content": prompt}],
#             "temperature": 0.5,
#             # "max_tokens": 8192  # max from groq website
#         },
#     )

#     if response.status_code != 200:
#         raise Exception(f"Error querying Groq: {response.text}")

#     return response.json()["choices"][0]["message"]["content"]


# def query_groq(prompt: str) -> str:
#     client = Groq()
#     completion = client.chat.completions.create(
#         messages=[{"role": "user", "content": prompt}],
#         # Change model to compound-beta to use agentic tooling
#         # model: "llama-3.3-70b-versatile",
#         model=LLM_MODEL,
#     )
#     # print(completion.choices[0].message.content)
#     return completion.choices[0].message.content


@track_time
def query_groq(prompt: str) -> str:
    client = Groq()
    completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=LLM_MODEL,
        temperature=1,  # Set temperature to 0.5
    )
    return completion.choices[0]


query_groq("Hello")
# Print all tool calls
# print(completion.choices[0].message.executed_tools)


# Tokenizer to count number of tokens


@track_time
def count_tokens(text: str) -> int:
    tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en")
    # Encode the text into tokens
    tokens = tokenizer.encode(text)
    return len(tokens)


# Process User Query

### Gradio GUI TEST

In [None]:
# ------------------------------------------- WORKING 3 Enter key submits user query -------------------------------------------
# Initialize empty conversation history (list of tuples)
conversation_history = []


@track_time
def process_user_query(user_query: str, conversation_history: list):
    print(f"User Query Tokens: {count_tokens(user_query)}")

    # Generate embedding and get relevant context
    embedding = get_embedding(user_query)
    relevant_chunks = query_pinecone(embedding)
    context = "\n".join(chunk["metadata"]["text"] for chunk in relevant_chunks)
    # print("CONTEXT:", context)

    # Format conversation history for the prompt
    history_str = "\n".join(
        f"User: {user}\nCoach: {response}" for user, response in conversation_history
    )

    # Create structured prompt
    prompt = f"""You are an expert, knowledgeable, and friendly coach. Follow these guidelines carefully:

    1. Provide clear, step-by-step explanations to ensure deep understanding.
    2. Use chain-of-thought reasoning to thoroughly evaluate the provided context before responding.
    3. Ask guiding questions to encourage critical thinking.
    4. Adapt your explanation to match the student's knowledge level.
    5. Strictly use terminologies provided in the given context.
    6. Provide short, ideal examples (2-3) to illustrate your points clearly.
    7. Only answer based on the provided context‚Äîdo not speculate or include external information.
    8. Always provide all specific relevant sources from the context in your responses urls, video names, video timestamps , links , resources , ebook names, lesson names , lesson numbers and anything else you think would be relevant to the user query.
    9. Perform sentiment analysis based on conversation history and user queries to adapt your responses empathetically and effectively.
    10. Must provide all relevant video timestamp from where to start watching and where to end watching 
    Context from learning materials:
    {context}

    Conversation history:
    {history_str}

    New student question:
    "{user_query}"
    
    Provide a thoughtful and contextually accurate response now:"""

    # --------------------- Reasoning Prompt --------------------------------------------------------------
    # prompt = f"""You are an expert, knowledgeable, and friendly coach. Follow this structured response framework:

    # ### Response Requirements
    # 1. **Reasoning Process**:
    # - Analyze the question against provided context
    # - Identify relevant context snippets with source citations
    # - Perform sentiment analysis on conversation history

    # 2. **Response Format**:
    # - [Understanding] Paraphrase the question to confirm comprehension
    # - [Relevant Context] Cite exact source material with location references
    # - [Step-by-Step Explanation] Break down concepts using chain-of-thought
    # - [Examples] Provide 2-3 ideal examples from context
    # - [Guiding Questions] Pose 1-2 reflective questions to deepen learning
    # - [Summary] Concise answer reiterating key points

    # 3. **Style Guidelines**:
    # - Use terminology strictly from context
    # - Adapt complexity to student's history
    # - Maintain empathetic tone based on sentiment analysis

    # ### Context Materials:
    # {context}

    # ### Conversation History:
    # {history_str}

    # ### New Question:
    # "{user_query}"

    # ### Your Response:
    # [Understanding] First, let me clarify what you're asking...
    # [Relevant Context] According to [Source X, Section Y]...
    # [Step-by-Step Explanation] The process works as follows:
    # 1. First concept...
    # 2. Second concept...
    # 3. Practical application...
    # [Examples] For instance:
    # - Example 1...
    # - Example 2...
    # [Guiding Questions] Have you considered...? How might this apply to...?
    # [Summary] To recap the key points..."""

    # Get LLM response
    groq_response = query_groq(prompt)
    print(f"Response Toke   ns: {count_tokens(groq_response.message.content)}")

    # Return updated history with new interaction
    return conversation_history + [(user_query, groq_response.message.content)]


# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# üßë‚Äçüè´ AI Coaching Assistant")
    gr.Markdown("Welcome! I'm here to help you learn. Type your question below.")

    # State management
    chat_history = gr.State(conversation_history)

    with gr.Row():
        chatbot = gr.Chatbot(height=500)
        with gr.Column(scale=0.5):
            context_display = gr.Textbox(label="Relevant Context", interactive=False)

    user_input = gr.Textbox(label="Your Question", placeholder="Type here...")

    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        undo_btn = gr.Button("Undo Last")
        clear_btn = gr.Button("Clear History")

    def handle_submit(user_input, history):
        if not user_input.strip():
            return gr.update(), history, ""

        # Process query and update history
        new_history = process_user_query(user_input, history)

        # Get latest context for display
        latest_context = "\n".join(
            [
                chunk["metadata"]["text"]
                for chunk in query_pinecone(get_embedding(user_input))
            ][:]
        )  # Show top 3 context snippets

        return "", new_history, latest_context

    # Component interactions
    submit_btn.click(
        handle_submit,
        [user_input, chat_history],
        [user_input, chat_history, context_display],
    ).then(lambda x: x, [chat_history], [chatbot])

    # Add submit on Enter key press
    user_input.submit(
        handle_submit,
        [user_input, chat_history],
        [user_input, chat_history, context_display],
    ).then(lambda x: x, [chat_history], [chatbot])

    undo_btn.click(
        lambda history: history[:-1] if history else [], [chat_history], [chat_history]
    ).then(lambda x: x, [chat_history], [chatbot])

    clear_btn.click(lambda: [], None, [chat_history]).then(
        lambda: ([], ""), None, [chatbot, context_display]
    )

interface.launch(share=True)
