Read Json **files**

In [None]:
import json
import os

all_intents = []
directory_path = 'sample_data/'

for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            # Ensure 'data' is a dictionary
            if isinstance(data, dict):
                all_intents.extend(data.get('intents', []))

intents = all_intents
intents

[{'tag': 'greeting',
  'patterns': ['Hi',
   'How are you?',
   'Is anyone there?',
   'Hello',
   'Good day',
   "What's up",
   'how are ya',
   'heyy',
   'whatsup',
   '??? ??? ??'],
  'responses': ['Hello!',
   'Good to see you again!',
   'Hi there, how can I help?'],
  'context_set': ''},
 {'tag': 'goodbye',
  'patterns': ['cya',
   'see you',
   'bye bye',
   'See you later',
   'Goodbye',
   'I am Leaving',
   'Bye',
   'Have a Good day',
   'talk to you later',
   'ttyl',
   'i got to go',
   'gtg'],
  'responses': ['Sad to see you go :(',
   'Talk to you later',
   'Goodbye!',
   'Come back soon'],
  'context_set': ''},
 {'tag': 'creator',
  'patterns': ['what is the name of your developers',
   'what is the name of your creators',
   'what is the name of the developers',
   'what is the name of the creators',
   'who created you',
   'your developers',
   'your creators',
   'who are your developers',
   'developers',
   'you are made by',
   'you are made by whom',
   'who

**Embedding the responses and patterns**

In [None]:
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import os


def prepare_documents(intents):
    documents = []
    for intent in intents:
        for pattern in intent["patterns"]:
            documents.append(Document(page_content=pattern, metadata={"tag": intent["tag"][0], "responses": str(intent["responses"])}))
    return documents

documents = prepare_documents(intents)

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Define the persist directory
persist_directory = os.path.join(os.getcwd(), "chroma_db")

# Create the Chroma vector store with the persist_directory
vectorstore = Chroma.from_documents(documents=documents,
                                    embedding=HuggingFaceEmbeddings(),
                                    persist_directory=persist_directory)

# Persist the database to disk
vectorstore.persist()

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  embedding=HuggingFaceEmbeddings(),
  vectorstore.persist()


In [None]:
# Define the persist directory
persist_directory = os.path.join(os.getcwd(), "chroma_db")

# Create the HuggingFaceEmbeddings instance explicitly specifying the model name
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Load the Chroma vector store from the persist_directory
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)

# Use the vector store as a retriever
retriever = vectorstore.as_retriever()

  vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)


**LLM**

In [None]:
from langchain.schema.runnable import RunnableLambda
import google.generativeai as genai

genai.configure(api_key="AIzaSyAQSxfrS_rE8uzhCbTXJA9hOAelM2NqXPM")
model = genai.GenerativeModel('gemini-1.5-flash')

def generate_text(text):
  response = model.generate_content(text.text)
  return response.text

llm = RunnableLambda(func=generate_text)

Prompt **Template**

In [None]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["context", "student_query"],
    template="""
    You are a student at a university. You answer questions based on the following context:

    Context:
    {context}

    Student Question:
    {student_query}

    Provide a clear and helpful answer to the student's question if question about labs or halls or rooms only say mentiond information no thing else.
    """
)

**Chain**

In [None]:
from langchain_core.runnables import RunnableMap


retriever = vectorstore.as_retriever()

def format_docs(docs):
    return "\n\n".join(doc.metadata['responses'] for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "student_query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
print(rag_chain.invoke("i want to eat pasta"))

You should try Witch, it has the best reviews for pasta. 

