# All imports and inits

In [None]:
import pandas as pd
import glob
import concurrent.futures
import gradio as gr
import tkinter as tk
import streamlit as st
import numpy as np
import textwrap
import PyPDF2
import requests
import os
import pinecone
import time

from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from tkinter import scrolledtext, messagebox
from transformers import AutoModel, AutoTokenizer
from typing import List, Tuple
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion, VectorType


load_dotenv()

DATA_PATH = os.getenv("DATA_PATH")
PINECONE_API = os.getenv("PINECONE_API")
# PINECONE_ENV = os.getenv("PINECONE_ENV")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_CHAT_URL = "https://api.groq.com/openai/v1/chat/completions"  # for integrating groq via openai api method
# Configure headers for Groq API requests
GROQ_HEADERS = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json",
}
# LLM_MODEL = "llama3-70b-8192"
LLM_MODEL = "llama-3.3-70b-versatile"

NVIDIA_API = os.getenv("NVIDIA_API")

# NVidia Embedding import
client = OpenAI(
    api_key= NVIDIA_API,
    base_url="https://integrate.api.nvidia.com/v1",
)

"""
Input:
    - Context window: 128K
Ouput:
    - Output Max Tokens: 32,768

"""

def track_time(func):
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        end = time.perf_counter()
        print(f"[Time Tracker] `{func.__name__}` took {end - start:.4f} seconds")
        return result
    return wrapper
# # EMBEDDING_MODEL = "llama3-405b-8192-embed"

# vo = voyageai.Client()


# Init Pinecone

In [None]:
pc = Pinecone(api_key=PINECONE_API)
print(PINECONE_API)


# Embedding Function



In [None]:
# Connect to the index
index = pc.Index("ai-coach")
# # index = pc.Index("ahsan-400pg-pdf-doc-test")

# embedding_model = AutoModel.from_pretrained(
#     'jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)


# # Function to generate embeddings without tokenization
# def get_embedding(data):
#     embeddings = embedding_model.encode(data).tolist()
#     return embeddings

@track_time
def get_embedding(text="None"):
    response = client.embeddings.create(
        input=text,
        model="nvidia/nv-embed-v1",
        encoding_format="float",
        extra_body={"input_type": "query", "truncate": "NONE"},
    )

    #print(response.data[0].embedding)
    # print(count_tokens(response.data[0].embedding))
    return response.data[0].embedding
get_embedding("None")


# Query Pinecone


In [None]:
# Function to query Pinecone index using embeddings
@track_time
def query_pinecone(embedding):
    # Use keyword arguments to pass the embedding and other parameters
    result = index.query(vector=embedding, top_k=1600, include_metadata=True)
    return result["matches"]


# Query Groq Inference

In [None]:
from groq import Groq
# Function to query Groq LLM
# def query_groq(prompt: str) -> str:
#     response = requests.post(
#         GROQ_CHAT_URL,
#         headers=GROQ_HEADERS,
#         json={
#             "model": LLM_MODEL,
#             "messages": [{"role": "user", "content": prompt}],
#             "temperature": 0.5,
#             # "max_tokens": 8192  # max from groq website
#         },
#     )

#     if response.status_code != 200:
#         raise Exception(f"Error querying Groq: {response.text}")

#     return response.json()["choices"][0]["message"]["content"]


# def query_groq(prompt: str) -> str:
#     client = Groq()
#     completion = client.chat.completions.create(
#         messages=[{"role": "user", "content": prompt}],
#         # Change model to compound-beta to use agentic tooling
#         # model: "llama-3.3-70b-versatile",
#         model=LLM_MODEL,
#     )
#     # print(completion.choices[0].message.content)
#     return completion.choices[0].message.content

@track_time
def query_groq(prompt: str) -> str:
    client = Groq()
    completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=LLM_MODEL,
        temperature=0.5,  # Set temperature to 0.5
        )
    return completion.choices[0]


query_groq("Hello")
# Print all tool calls
# print(completion.choices[0].message.executed_tools)


# Tokenizer to count number of tokens

@track_time
def count_tokens(text: str) -> int:
    tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en")
    # Encode the text into tokens
    tokens = tokenizer.encode(text)
    return len(tokens)


# Process User Query

### Gradio GUI TEST

In [None]:
# ------------------------------------------- WORKING 3 Enter key submits user query -------------------------------------------
# Initialize empty conversation history (list of tuples)
conversation_history = []

@track_time
def process_user_query(user_query: str, conversation_history: list):
    print(f"User Query Tokens: {count_tokens(user_query)}")

    # Generate embedding and get relevant context
    embedding = get_embedding(user_query)
    relevant_chunks = query_pinecone(embedding)
    context = "\n".join(chunk["metadata"]["text"] for chunk in relevant_chunks)
    print("CONTEXT:", context)

    # Format conversation history for the prompt
    history_str = "\n".join(
        f"User: {user}\nCoach: {response}" for user, response in conversation_history
    )

    # Create structured prompt
    prompt = f"""You are an expert, knowledgeable, and friendly coach. Follow these guidelines carefully:

    1. Provide clear, step-by-step explanations to ensure deep understanding.
    2. Use chain-of-thought reasoning to thoroughly evaluate the provided context before responding.
    3. Ask guiding questions to encourage critical thinking.
    4. Adapt your explanation to match the student's knowledge level.
    5. Strictly use terminologies provided in the given context.
    6. Provide short, ideal examples (2-3) to illustrate your points clearly.
    7. Only answer based on the provided context—do not speculate or include external information.
    8. Explicitly cite the sources from the context in your responses.
    9. Perform sentiment analysis based on conversation history and user queries to adapt your responses empathetically and effectively.

    Context from learning materials:
    {context}

    Conversation history:
    {history_str}

    New student question:
    "{user_query}"

    Provide a helpful, structured response that meets the above criteria.

    (Note: The following examples are only provided for your reference to demonstrate an effective response format):

    Question: How long will the average externship take to complete?
    Answer: 125 surgical cases – typically 6 months to 1 year.

    Question: What should I focus on when studying anatomy, physiology, and medical terminology?
    Answer: Focus specifically on content related to surgical procedures, emphasizing body systems, terminology, and physiological functions most relevant to surgery.

    Question: What’s the best way to study and memorize surgical instrumentation?
    Answer: First, understand the National Center for Competency Testing (NCCT) exam expectations regarding instruments. Refer to official NCCT guidelines and utilize platforms such as Quizlet and ProProfs for visual memorization and repetition.

    Provide a thoughtful and contextually accurate response now:"""

    # --------------------- Reasoning Prompt --------------------------------------------------------------
    # prompt = f"""You are an expert, knowledgeable, and friendly coach. Follow this structured response framework:

    # ### Response Requirements
    # 1. **Reasoning Process**:
    # - Analyze the question against provided context
    # - Identify relevant context snippets with source citations
    # - Perform sentiment analysis on conversation history

    # 2. **Response Format**:
    # - [Understanding] Paraphrase the question to confirm comprehension
    # - [Relevant Context] Cite exact source material with location references
    # - [Step-by-Step Explanation] Break down concepts using chain-of-thought
    # - [Examples] Provide 2-3 ideal examples from context
    # - [Guiding Questions] Pose 1-2 reflective questions to deepen learning
    # - [Summary] Concise answer reiterating key points

    # 3. **Style Guidelines**:
    # - Use terminology strictly from context
    # - Adapt complexity to student's history
    # - Maintain empathetic tone based on sentiment analysis

    # ### Context Materials:
    # {context}

    # ### Conversation History:
    # {history_str}

    # ### New Question:
    # "{user_query}"

    # ### Your Response:
    # [Understanding] First, let me clarify what you're asking...
    # [Relevant Context] According to [Source X, Section Y]...
    # [Step-by-Step Explanation] The process works as follows:
    # 1. First concept...
    # 2. Second concept...
    # 3. Practical application...
    # [Examples] For instance:
    # - Example 1...
    # - Example 2...
    # [Guiding Questions] Have you considered...? How might this apply to...?
    # [Summary] To recap the key points..."""

    # Get LLM response
    groq_response = query_groq(prompt)
    print(f"Response Toke   ns: {count_tokens(groq_response.message.content)}")

    # Return updated history with new interaction
    return conversation_history + [(user_query, groq_response.message.content)]


# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# 🧑‍🏫 AI Coaching Assistant")
    gr.Markdown("Welcome! I'm here to help you learn. Type your question below.")

    # State management
    chat_history = gr.State(conversation_history)

    with gr.Row():
        chatbot = gr.Chatbot(height=500)
        with gr.Column(scale=0.5):
            context_display = gr.Textbox(label="Relevant Context", interactive=False)

    user_input = gr.Textbox(label="Your Question", placeholder="Type here...")

    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        undo_btn = gr.Button("Undo Last")
        clear_btn = gr.Button("Clear History")

    def handle_submit(user_input, history):
        if not user_input.strip():
            return gr.update(), history, ""

        # Process query and update history
        new_history = process_user_query(user_input, history)

        # Get latest context for display
        latest_context = "\n".join(
            [
                chunk["metadata"]["text"]
                for chunk in query_pinecone(get_embedding(user_input))
            ][:]
        )  # Show top 3 context snippets

        return "", new_history, latest_context

    # Component interactions
    submit_btn.click(
        handle_submit,
        [user_input, chat_history],
        [user_input, chat_history, context_display],
    ).then(lambda x: x, [chat_history], [chatbot])

    # Add submit on Enter key press
    user_input.submit(
        handle_submit,
        [user_input, chat_history],
        [user_input, chat_history, context_display],
    ).then(lambda x: x, [chat_history], [chatbot])

    undo_btn.click(
        lambda history: history[:-1] if history else [], [chat_history], [chat_history]
    ).then(lambda x: x, [chat_history], [chatbot])

    clear_btn.click(lambda: [], None, [chat_history]).then(
        lambda: ([], ""), None, [chatbot, context_display]
    )

interface.launch(share=True)
