# All imports and inits

In [21]:
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from tkinter import scrolledtext, messagebox
from transformers import AutoModel, AutoTokenizer
# from pinecone import Pinecone, ServerlessSpec
import pinecone
from pinecone import (
    Pinecone,
    ServerlessSpec,
    CloudProvider,
    AwsRegion,
    VectorType
)

import os
import requests
import PyPDF2
import textwrap
import numpy as np
import streamlit as st
import tkinter as tk
import gradio as gr
from typing import List, Tuple
import concurrent.futures
# Important: Import pinecone-client properly
# Load environment variables from .env file
load_dotenv()

DATA_PATH = os.getenv("DATA_PATH")
PINECONE_API = os.getenv("PINECONE_API")
PINECONE_ENV = os.getenv("PINECONE_ENV")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
print("PINECONE_API", PINECONE_API)


# Groq API settings
GROQ_EMBED_URL = "https://api.groq.com/openai/v1/embeddings"
GROQ_CHAT_URL = "https://api.groq.com/openai/v1/chat/completions"
EMBEDDING_MODEL = "llama3-405b-8192-embed"
LLM_MODEL = "llama3-70b-8192"


# Configure headers for Groq API requests
GROQ_HEADERS = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}


PINECONE_API pcsk_4bLR9o_3crxHE9zjHW76VdRnBPi2Xo794pQnKSifnRfQ9iQc6U3iqeqeyVEZ3RjBPYtoD4


# PDF loader

In [None]:
def pdf_load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()


# documents = pdf_load_documents()
# documents


# def extract_text_from_pdf(pdf_path: str) -> str:
#     """Extract text from a PDF file."""
#     with open(pdf_path, 'r') as file:
#         pdf_reader = PyPDF2.PdfReader(file)
#         text = ""
#         for page_num in range(len(pdf_reader.pages)):
#             page = pdf_reader.pages[page_num]
#             text += page.extract_text() + "\n"
#     return text
# extract_text_from_pdf(DATA_PATH)


# Text Splitting \ Chunking using Langchain

In [None]:


def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False  # considers separators like '\n\n'if true
    )
    docs = text_splitter.split_documents(documents)
    return docs


# chunks = split_documents(documents)
# chunks


# Init Pinecone

In [None]:
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=PINECONE_API)
print(PINECONE_API)


#  --------------- initialize pinecone -----------------------------
# pc.create_index_for_model(
#     name="test-index",
#     cloud="aws",
#     region="us-east-1",
#     embed={
#         "model":"llama-text-embed-v2",
#         "field_map":{"text": "page_content"}
#     }
# )


### When to Use What:
**Use Upsert:**

When you're adding new vectors or want to replace existing vectors with new data (including changing the vector values).
When you need to add a completely new document or vector.
When you want to update both the vector values and metadata.

**Use Update:**

When you're only modifying the metadata of an existing vector.
When the vector values (embeddings) themselves are correct and only extra information like text, author, or document-related metadata needs to be updated.
Summary:
Upsert: Adds or replaces both the vector values and metadata. Use when inserting or completely replacing data.
Update: Modifies the metadata without changing the vector values. Use when the vectors are correct, but metadata needs an update.
For your case, if you just want to add or update the page_content or any other metadata for existing vectors, use update. If you want to re-upload vectors with new embeddings or metadata, use upsert.









## Creating Embeddings Via AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en'  and Upsert each to Pinecone one by one


In [None]:
# Connect to the index
index = pc.Index("ai-coach")


embedding_model = AutoModel.from_pretrained(
    'jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
# user_query = "user query"
# Function to generate embeddings without tokenization


def get_embedding(data):
    embeddings = embedding_model.encode(data).tolist()
    return embeddings

# def upsert_chunks_to_pinecone(index, chunks):
#   count = 1
#   for chunk in chunks:
#     #embedding = embedding_model.encode(chunk.page_content).tolist()
#     embedding = get_embedding(chunk.page_content)
#     # Extract metadata
#     metadata = chunk.metadata
#     text = chunk.page_content
#     # Create a unique vector ID for each chunk (e.g., based on count or some unique identifier)
#     vector_id = f"vec_{count}"

#     # Upsert the embedding along with its metadata
#     index.upsert(vectors=[(vector_id, embedding, metadata, text)])

#     print(f"Embedding {count} upserted to Pinecone with metadata")
#     count += 1
#       # Ensure data is written immediately
#   print(f"All {count} Embeddings have been upserted to pinecone")


def upsert_chunks_to_pinecone(index, chunks):
    count = 1
    for chunk in chunks:
        # Get the embedding for the chunk
        embedding = get_embedding(chunk.page_content)

        # Extract metadata and add text as part of the metadata
        metadata = chunk.metadata
        metadata["text"] = chunk.page_content  # Store text in metadata

        # Create a unique vector ID for each chunk (e.g., based on count or some unique identifier)
        vector_id = f"vec_{count}"

        # Upsert the embedding along with its metadata
        index.upsert(vectors=[(vector_id, embedding, metadata)])

        print(f"Embedding {count} upserted to Pinecone with metadata")
        count += 1

    print(f"All {count-1} Embeddings have been upserted to Pinecone")


#upsert_chunks_to_pinecone(index, chunks)

# query_embeddings = embedding_model.encode(user_query).tolist()
# query_embeddings


# Update Vectors Function

In [None]:
def update_pinecone_chunks(index, chunks):
    count = 1
    for chunk in chunks:
        # Get updated embedding
        embedding = get_embedding(chunk.page_content)

        # Extract metadata and page content
        metadata = chunk.metadata
        text = chunk.page_content

        # Create a unique vector ID for each chunk (e.g., based on count or some unique identifier)
        vector_id = f"vec_{count}"

        # Update the embedding and metadata
        index.update(id=vector_id, values=embedding, set_metadata=metadata)

        print(f"Embedding {count} updated in Pinecone with new metadata")
        count += 1

    print(f"All {count-1} embeddings have been updated in Pinecone")

# update_pinecone_chunks(index, chunks)


Since your application is designed to answer a wide range of student queries and suggest relevant material, you want to retrieve enough content to cover different facets of a topic without overwhelming the LLM with too much information.

# Starting Point:
- A common starting point is to set top_k between **5 and 10.**
- **top_k=5:** This can work well if your curated content is highly relevant and precise, ensuring that the top 5 matches are very close to the query.
-  **top_k=10:** If you want the coach to consider a broader range of content—perhaps to provide diverse perspectives or cover a topic more comprehensively—increasing top_k to around 10 might be beneficial.

# Experiment and Adjust:
- The “best” value depends on factors such as the diversity of your content, how densely your data covers the topics, and the quality of the embedding matches. It’s a good idea to experiment with different top_k values and evaluate the quality and relevance of the responses in your specific


# Query Pinecone


In [None]:
# Function to query Pinecone index using embeddings
def query_pinecone(embedding):
    # Use keyword arguments to pass the embedding and other parameters
    result = index.query(vector=embedding, top_k=5, include_metadata=True)
    return result['matches']


# Query Groq Inference

In [None]:
# Function to query Groq LLM
def query_groq(prompt: str) -> str:
    response = requests.post(
        GROQ_CHAT_URL,
        headers=GROQ_HEADERS,
        json={
            "model": LLM_MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.5,
            "max_tokens": 8192  # max from groq website
        }
    )

    if response.status_code != 200:
        raise Exception(f"Error querying Groq: {response.text}")

    return response.json()["choices"][0]["message"]["content"]


# Tokenizer to count number of tokens
tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en")


def count_tokens(text: str) -> int:
    # Encode the text into tokens
    tokens = tokenizer.encode(text)
    return len(tokens)


# Process User Query

In [None]:
# conversation_history = ["# 🧑‍🏫 AI Coaching Assistant\nWelcome! I'm here to help you learn. Type your question below."]

# def process_user_query(user_query: str, conversation_history: list):

#     print(f"User Query Tokens : {count_tokens(user_query)}")

#     # Step 1: Generate embedding for the user query
#     embedding = get_embedding(user_query)

#     # Step 2: Query Pinecone for relevant chunks
#     relevant_chunks = query_pinecone(embedding)

#     # Prepare the context from relevant chunks
#     context = "\n".join([chunk['metadata']["text"]
#                         for chunk in relevant_chunks])
#     print("CONTEXT: ", context)

#     # Step 3: Combine conversation history with current user query
#     conversation_history_str = "\n".join(conversation_history)

#     # Step 4: Craft a good coach prompt for the LLM
#     prompt = f"""
#     You are a knowledgeable and friendly coach. Your goal is to help students understand concepts in a detailed and easy-to-understand manner. 
#     Be patient, ask guiding questions, and provide step-by-step explanations where needed. Adapt your responses to the student's knowledge level 
#     and help them build confidence in their learning. Refer relevant material to the student and encourage them to explore further.

#     Context from the student's material:
#     {context}

#     Conversation history:
#     {conversation_history_str}

#     The student has asked the following question:
#     "{user_query}"

#     Based on the context and the student's question, provide a thoughtful and detailed explanation. Encourage them to think about the topic and 
#     offer further guidance if needed.
#     """

#     # Step 5: Send the prepared prompt (with context and user query) to the LLM
#     groq_response = query_groq(prompt)
#     print(f"Groq Response Tokens : {count_tokens(groq_response)}")

#     # Step 6: Append the user query and model's response to conversation history
#     conversation_history.append(f"User: {user_query}")
#     conversation_history.append(f"Coach: {groq_response}")

#     return groq_response


# # Example usage
# if __name__ == "__main__":
#     while True:
#         print("------------------------------------------------------------------------------------------------------------------------")
#         user_query = input("Enter your query or press 0 to exit: ")
#         if user_query == "0":
#             break
#         response = process_user_query(user_query, conversation_history)
#         print(response)


------------------------------------------------------------------------------------------------------------------------


# Gradio GUI TEST

In [25]:
# system_message = f"""
#     You are a knowledgeable and friendly coach. Your goal is to help students understand concepts in a detailed and easy-to-understand manner. 
#     Be patient, ask guiding questions, and provide step-by-step explanations where needed. Adapt your responses to the student's knowledge level 
#     and help them build confidence in their learning. Refer relevant material to the student and encourage them to explore further.

#     Based on the context and the student's question, provide a thoughtful and detailed explanation. Encourage them to think about the topic and 
#     offer further guidance if needed.
#     """

# def gradio_interface(prompt,history =[]):
#     output = process_user_query(prompt,history)
#     history.append((prompt,output))
#     return history

# gr.Interface(fn=gradio_interface, inputs= ['text',"state"], outputs=["chatbot","state"]).launch(debug=True,share=True)
    

# ------------------------------------------- WORKING 1 -------------------------------------------

# # Function to be used by Gradio for handling the query
# def gradio_process(user_query):
#     response = process_user_query(user_query, conversation_history)
#     return response

# # Create Gradio interface
# interface = gr.Interface(fn=gradio_process, inputs="text", outputs="text", title="RAG-based Coaching System")

# # Launch Gradio app
# interface.launch()
# ------------------------------------------- WORKING 2 -------------------------------------------

# Initialize empty conversation history (list of tuples)
conversation_history = []

def process_user_query(user_query: str, conversation_history: list):
    print(f"User Query Tokens: {count_tokens(user_query)}")

    # Generate embedding and get relevant context
    embedding = get_embedding(user_query)
    relevant_chunks = query_pinecone(embedding)
    context = "\n".join(chunk['metadata']["text"] for chunk in relevant_chunks)
    print("CONTEXT:", context)

    # Format conversation history for the prompt
    history_str = "\n".join(
        f"User: {user}\nCoach: {response}" 
        for user, response in conversation_history
    )

    # Create structured prompt
    prompt = f"""You are a knowledgeable and friendly coach. Follow these guidelines:
    1. Provide clear, step-by-step explanations
    2. Ask guiding questions to encourage critical thinking
    3. Adapt to the student's knowledge level
    4. Use examples from the provided context when relevant

    Context from learning materials:
    {context}

    Conversation history:
    {history_str}

    New student question:
    "{user_query}"

    Provide a helpful response:"""

    # Get LLM response
    groq_response = query_groq(prompt)
    print(f"Response Tokens: {count_tokens(groq_response)}")

    # Return updated history with new interaction
    return conversation_history + [(user_query, groq_response)]

# Gradio Interface
with gr.Blocks() as interface:
    gr.Markdown("# 🧑‍🏫 AI Coaching Assistant")
    gr.Markdown("Welcome! I'm here to help you learn. Type your question below.")
    
    # State management
    chat_history = gr.State(conversation_history)
    
    with gr.Row():
        chatbot = gr.Chatbot(height=500)
        with gr.Column(scale=0.5):
            context_display = gr.Textbox(label="Relevant Context", interactive=False)

    user_input = gr.Textbox(label="Your Question", placeholder="Type here...")
    
    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        undo_btn = gr.Button("Undo Last")
        clear_btn = gr.Button("Clear History")

    def handle_submit(user_input, history):
        if not user_input.strip():
            return gr.update(), history, ""
        
        # Process query and update history
        new_history = process_user_query(user_input, history)
        
        # Get latest context for display
        latest_context = "\n".join([chunk['metadata']["text"] for chunk in query_pinecone(
            get_embedding(user_input)
        )][:3])  # Show top 3 context snippets
        
        return "", new_history, latest_context

    # Component interactions
    submit_btn.click(
        handle_submit,
        [user_input, chat_history],
        [user_input, chat_history, context_display]
    ).then(
        lambda x: x,
        [chat_history],
        [chatbot]
    )

    undo_btn.click(
        lambda history: history[:-1] if history else [],
        [chat_history],
        [chat_history]
    ).then(
        lambda x: x,
        [chat_history],
        [chatbot]
    )

    clear_btn.click(
        lambda: [],
        None,
        [chat_history]
    ).then(
        lambda: ([], ""),
        None,
        [chatbot, context_display]
    )

interface.launch(share=True)
# Just change the launch command to:
#interface.launch(share=True, auth=("username", "password"))  # Add basic auth


# self hosting

# # Run with:
# interface.launch(
#     server_name="0.0.0.0",
#     server_port=7860,
#     show_error=True
# )


# ------------------------------------------- WORKING 3 (performance) -------------------------------------------


  chatbot = gr.Chatbot(height=500)


* Running on local URL:  http://127.0.0.1:7868
* Running on public URL: https://b685d7e523f7472d75.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




User Query Tokens: 6
CONTEXT: frequently feel uncomfortable. However, these abilities usually improve with practice and
exposure to difficult circumstances. During this learning phase, instructors and mentors
are essential in helping students navigate and achieve effective communication in a
professional healthcare setting.
Communication can fail for various reasons, with some of the most common causes
outlined below:
Perception of the Situation: Our perceptions of the environment may differ from those
of others. Assumptions about what we see, hear, and understand are often based on
our perceptions, which may not align with reality.
Bias: Personal biases stem from preexisting opinions about individuals based on their
affiliations, culture, economic status, or medical conditions. Bias hinders effective
0 6
0 2  C O M M U N I C A T I O N  A N D  T E A M W O R K
4.  Observe Nonverbal Cues: 
Pay attention to facial expressions, body language, and other nonverbal signals that can
provide ad