In [None]:
!pip install sentence-transformers faiss-cpu pandas transformers
!pip install groq

Collecting sentence-transformers
  Downloading sentence_transformers-3.2.0-py3-none-any.whl.metadata (10 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading sentence_transformers-3.2.0-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.2/255.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu, sentence-transformers
Successfully installed faiss-cpu-1.9.0 sentence-transformers-3.2.0
Collecting groq
  Downloading groq-0.11.0-py3-none-any.whl.metadata (13 kB)
Collecting httpx<1,>=0.23.0 (from groq)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx<1,>

In [None]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import re
import json
import google.generativeai as genai
from groq import Groq
import time
import csv

### Generate a Grock API key by following the below instructions:

- Signup/login https://console.groq.com/login
- Create a new api key
- copy the generated key and paste it in below cell.


In [None]:
grok_api_key = "" # PASTE YOUR API KEY HERE

In [None]:
# make sure you upload the dataset to colab before running the below,
# ensure correct path is entered
csv_file_path = '/content/chatbot-combined.csv'
df = pd.read_csv(csv_file_path)

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for the questions
questions = df['question'].tolist()
question_embeddings = model.encode(questions, convert_to_tensor=False)

# Convert embeddings to float32 (required by FAISS)
question_embeddings = np.array(question_embeddings, dtype='float32')

# Create a FAISS index for fast retrieval
index = faiss.IndexFlatL2(question_embeddings.shape[1])  # L2 distance for similarity search
index.add(question_embeddings)  # Add question embeddings to the index


# Define retrieval function to get the closest answer
def get_top_answer(query, model, faiss_index, df, top_k=3):
    # Generate embedding for the input query
    query_embedding = model.encode([query], convert_to_tensor=False)
    query_embedding = np.array(query_embedding, dtype='float32')

    # Search in the FAISS index
    distances, indices = faiss_index.search(query_embedding, top_k)

    # Retrieve the best matching documents
    retrieved_docs = []
    for idx in indices[0]:
        retrieved_docs.append(df.iloc[idx]['answer'])

    return retrieved_docs



In [None]:
def GroqChat(question):
    client = Groq(
        api_key=grok_api_key,

    )

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": question,
            }
        ],
            model = "llama-3.1-70b-versatile"
    )

    cleaned_json_string = chat_completion.choices[0].message.content

    json_str = re.sub(r'}\s*{', '}, {', cleaned_json_string)
    return json_str

In [None]:
def generate_answer_from_docs(query, retrieved_docs):
    result = []
    if not retrieved_docs:
        return "Don't have an answer for the query."

    context = "\n".join(retrieved_docs)
    result.append(context)
    # print("Context: ", context)

    prompt = f"Answer the following query, based only on the given context.Do not add anything from your previous learnings. Do not state in answer that a context is provided to you. If the context seems irrelavant just say 'I don't have an appropriate answer' query: {query} context: {context}"
    groq_answer = GroqChat(prompt)
    result.append(groq_answer)
    result.append('')
    return result

Ask your question here:

In [None]:
while True:
  query = input("Enter your query here: ")
  if query == "exit":
    break
  retrieved_docs = list(set(get_top_answer(query, model, index, df)))
  generated_answer = generate_answer_from_docs(query, retrieved_docs)
  print("Llama Answer: ", generated_answer[1])
  print()

Enter your query here: What is Sitare univ?
Llama Answer:  Sitare University is an institution that provides free high-quality Computer Science education to bright students from underprivileged backgrounds, covering food, accommodation, and studies expenses.

Enter your query here: Who is FOunder?
Llama Answer:  The founder of Sitare University is Dr. Amit Singhal, who was earlier Senior Vice President of Google Search.

Enter your query here: Who is dean at SU?
Llama Answer:  Dr. Anuja Agarwal is the founding Dean at Sitare University.

Enter your query here: What is the fee structure?
Llama Answer:  There is no application fee. Additionally, the university provides high-quality education and placements without any charges for the students, as it is free of cost for students from underprivileged backgrounds.

Enter your query here: exit
