In [1]:
!pip install streamlit -q
!pip install chromadb -q
!pip install groq -q
!pip install langchain -q
!pip install langchain-groq -q
!pip install langchain_community -q
!pip install langchain_huggingface -q
!pip install langchain-core -q
!pip install sentence-transformers -q

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-google-genai 3.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.
langchain-huggingface 1.0.0 requires langchain-core<2.0.0,>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.3.27 requires langchain-core<1.0.0,>=0.3.72, but you have langchain-core 1.0.1 which is incompatible.
langchain-experimental 0.3.4 requires langchain-core<0.4.0,>=0.3.28, but you have langchain-core 1.0.1 which is incompatible.
langchain-groq 0.3.8 requires langchain-core<1.0.0,>=0.3.75, but you have langchain-core 1.0.1 which is incompatible.


In [2]:
!npm install -g localtunnel


changed 22 packages in 20s

3 packages are looking for funding
  run `npm fund` for details


In [3]:
!wget -q -O - ipv4.icanhazip.com
# Copy the ransom ip address "35.229.63.7" and paste in the ramdon url's (https://mighty-ends-wish.loca.lt) tunnel password adter clicking the ramdom url

'wget' is not recognized as an internal or external command,
operable program or batch file.


In [4]:
%%writefile app.py
## With Questions
import streamlit as st
import os
import time
import pandas as pd
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
load_dotenv()


st.set_page_config(layout="wide", page_title="Q&A App", page_icon="🧠")

# Loading GROQ API Key
groq_api_key = os.getenv("GROQ_API_KEY")

st.title("🧠 Q&A App")


# Initializing the llama-3.3 LLM
llm = ChatGroq(groq_api_key=groq_api_key,
               model_name="llama-3.3-70b-versatile")

# Creating path for Vector Store
path_vectorstore = "vectorstore"
os.makedirs(path_vectorstore, exist_ok=True)

# Defining the prompt
prompt = ChatPromptTemplate.from_template(
    """
    You are a retrieval-augmented generation (RAG) assistant.

    Use only the retrieved context to answer the question.
    If unsure, output "Not enough information."

    Your Answer should be strictly valid JSON with this format:
    {{
    "answer": "<string>",
    "contexts": ["<string>", "<string>", ...],
    "reasoning": "<string>"
    }}

    CONTEXT:
    {context}

    QUESTION: {question}

    YOUR ANSWER:

    """
)


# Data preparation function
def prepare_data():
    data_path = "D:\AI-Job-Preparation\Mini-RAG-System-Movie-Plots\data\wiki_movie_plots_deduped.csv"
    df=pd.read_csv(data_path, nrows = 200, usecols=["Title","Plot"])
    
    text_list = []
    metadata = []
    for _, row in df.iterrows():
        text = 'Title: ' + row['Title'] + '\n' + 'Plot: ' + row['Plot'] + '\n'
        text_list.append(text)
        tags = row['Title']
        metadata.append({'tags': tags, 'Title': row['Title']})
    return text_list, metadata

# Document creation function
def create_doc():
    text_list, metadata = prepare_data()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
    documents = text_splitter.create_documents(texts=text_list, metadatas=metadata)
    return documents

# Vector embedding function
def vector_embedding():
    if path_vectorstore not in st.session_state:
        # Initializing embeddings
        embeddings = HuggingFaceEmbeddings(
            model_kwargs={'tokenizer_kwargs': {'clean_up_tokenization_spaces': True}}
        )
        
        # Creating documents
        documents = create_doc()
        
        # Creating vectorstore using LangChain's Chroma wrapper
        vectorstore = Chroma.from_documents(
            documents=documents,
            embedding=embeddings,
            persist_directory=path_vectorstore
        )
        
        # Storing in session state
        st.session_state["vectorstore"] = vectorstore
        st.write("Chroma Vector store initialized successfully.")
    else:
        st.write("Chroma Vector store already initialized.")

# Taking Input from user
user_prompt = st.text_input("Write Your Query")

# Button to prepare the vector store
if st.button("Submit"):
    vector_embedding()
    st.write("Chroma Vector Store is Ready")

# Handling retrieval and response
if user_prompt:
    if "vectorstore" not in st.session_state:
        st.error("Chroma Vector store is not initialized. Please click 'Submit' to initialize it.")
    else:


        retriever = st.session_state.vectorstore.as_retriever()
        chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
        
        start = time.process_time()
        response = chain.invoke(user_prompt)
        st.write(f"Response Time: {time.process_time() - start:.2f} seconds")
        #st.code(response['answer'], language='markdown')
        st.code(response, language='markdown')


Overwriting app.py


In [None]:
!streamlit run app.py & npx localtunnel --port 8501