# Query

In [None]:
from dotenv import load_dotenv
import os
import json
import pandas as pd
import numpy as np
from pinecone import Pinecone, ServerlessSpec
from mixedbread_ai.client import MixedbreadAI
import google.generativeai as genai

## Setup

In [None]:
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
MIXEDBREAD_API_KEY = os.getenv('MIXEDBREAD_API_KEY')
DATA_DIRECTORY = os.getenv('DATA_DIRECTORY')
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

INDEX_NAME = os.getenv('INDEX_NAME')
CLOUD = os.getenv('CLOUD') or 'aws'
REGION = os.getenv('REGION') or 'us-east-1'

### Pinecone

In [None]:
pc = Pinecone(api_key=PINECONE_API_KEY)
spec = ServerlessSpec(cloud=CLOUD, region=REGION)
index = pc.Index(INDEX_NAME)

### Mixedbread

In [None]:
mxbai = MixedbreadAI(api_key=MIXEDBREAD_API_KEY)

def get_embeddings(queries):
    res = mxbai.embeddings(
        model='mixedbread-ai/mxbai-embed-large-v1',
        input=queries,
        normalized=True,
        encoding_format='float',
        truncation_strategy='start'
    )

    embeddings = np.array([res.data[i].embedding for i in range(len(res.data))])
    return embeddings

### Gemini

In [None]:
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel('gemini-pro')

# Generate Response

In [None]:
prompt_template = """
    You are my Machine Learning tutor. Please help me answer this question: "{query}".
    
    Below, I have provided an excerpt from a textbook. If the included textbook content is relevant, consider including a 
    quote from it as part of your explanation. If you use a quote, you MUST cite the source as `{textbook} Chapter {chapter}`. Feel free to elaborate on the topic and provide additional context. 
    Else if the included textbook content does not contain the answer, but covers similar material indicating that the chapter may contain relevant information, you MUST use this format: "<Your answer>. Read more about this in {textbook} Chapter {chapter}."
    Else, if the included textbook content is completely irrelevant, you MUST use this format: 
    `Unfortunately, I can't find an answer for this question in my knowledge base. I will make my best attempt to answer per my pre-training knowledge. <Your answer>`
    
    Here is the textbook excerpt: 
    ```
    {textbook_content}
    ```
    """

In [None]:
def query_pinecone(query, top_k=5):
    embedded_query = get_embeddings([query]).tolist()
    return index.query(vector=embedded_query, top_k=top_k, include_metadata=True)

In [None]:
def generate_prompt(query):
    response = query_pinecone(query)
    print(response)
    first_match = response['matches'][0]
    
    fields = {
        "query": query,
        "textbook": first_match['metadata']['textbook'],
        "chapter": first_match['metadata']['chapter'],
        "textbook_content": first_match['metadata']['content']
    }
    
    prompt = prompt_template.format(**fields)
    return prompt

In [None]:
def generate_response(prompt):    
    generated_text = model.generate_content(prompt)
    return generated_text

In [None]:
prompt = generate_prompt("Please explain Gradient Descent.")

In [None]:
response = generate_response(prompt)
print(response.text)