## INSTALLING THE DEPENDENCIES



In [None]:
!pip install -U pip
!pip install llama-index transformers
!pip install langchain openai
!pip install transformers flask
!pip install nltk
import nltk
nltk.download('punkt')

In [None]:
!pip install -r requirements.            #Install all the packages from requiremts.txt
!pip install -U -r requirements.txt      #Install any upgrades in the txt file
# !pip install flask                     #Couldnt implement this on code

In [None]:
                                                #LOADING THE DATA

from langchain_community.document_loaders import UnstructuredURLLoader
urls = ['https://zetrance.com/',
        'https://zetrance.com/studio.php',
        'https://zetrance.com/arvr.php',
        'https://zetrance.com/zenna.php',
        'https://zetrance.com/product-visualization.php',
        'https://zetrance.com/simulator.php',
        'https://zetrance.com/evvr.php',
        'https://zetrance.com/btvr.php',
        'https://zetrance.com/medvr.php',
        'https://zetrance.com/dtvr.php',
        'https://zetrance.com/itvr.php',
        'https://zetrance.com/coe.php',
        'https://zetrance.com/contact.php',
        ]

#STORING ALL THE PAGES OF ZETRANCE XR WEBSITE IN UNSTRUCTURED URL LOADER
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()







In [None]:
data

In [None]:
#IMPORTING STREAMLIT, IT WILL ALLOW US A GUI for the chatbot


import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)

all_splits=docs
print(len(docs))


In [None]:
docs[0]

SETTING THE EMBEDDINGS

In [None]:
#SETTING UP THE EMBEDDINGS
#Importing the LLM model as per your choice
#Importing Embeddings to store it into the database
#The .env file must be downloaded, it should have openai-api-key
#Another way is that we can directly embed our OPENAI-API KEY into the colab nb

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from dotenv import load_dotenv
load_dotenv(override=True)

st.title("ZETRANCE XR CHATBOT")

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
#store into vector database

In [None]:
                      #TRIED USING FLASK



#from flask import Flask, request, jsonify
# from langchain_chroma import Chroma
# from transformers import AutoTokenizer, AutoModelForCausalLM

# app = Flask(__name__)

# # ... (rest of your imports)

# # Load the LLM model and tokenizer (outside the API route)
# model = AutoModelForCausalLM.from_pretrained("mattshumer/Reflection-Llama-3.1-70B")
# tokenizer = AutoTokenizer.from_pretrained("mattshumer/Reflection-Llama-3.1-70B")

# # Create the vector store (outside the API route)
# vectorstore = Chroma.from_documents(documents=docs)  # Use the default embedding function

# @app.route('/query', methods=['POST'])
# def query_handler(query):

#     results = vectorstore.similarity_search(query)

#     ar_vr_docs = [res['content'] for res in results if 'AR/VR' in res['content']]
#     simulator_docs = [res['content'] for res in results if 'Simulator' in res['content']]

#     # Combine the results into a structured prompt based on the query
#     if 'AR/VR' in query:
#         context = ' '.join(ar_vr_docs)
#     elif 'Simulator' in query:
#         context = ' '.join(simulator_docs)
#     else:
#         context = ' '.join([res['content'] for res in results])  # Default to all results

#     # Tokenize the input for the model
#     inputs = tokenizer.encode(context + " " + query, return_tensors='pt')

#     # Generate a response from the model
#     outputs = model.generate(inputs, max_length=500, num_return_sequences=1)

#     # Decode the response
#     response = tokenizer.decode(outputs[0], skip_special_tokens=True)

#     return response


# SETTING UP THE RETRIEVER

In [None]:
#RETRIEVING THE DATA FROM THE VECTOR DATABASE
#Using cosine similarity it will check for similarity between the input query and search through the vector database


retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs = retriever.invoke("What kind of services they provide?")


In [None]:
len(retrieved_docs)


In [None]:
print(retrieved_docs[0].page_content)


# SETUP OPEN AI LLM

In [None]:
llm = OpenAI(temperature=0.4, max_tokens=500)
query = st.chat_input("Hello!I am ZETRANCE_XR ASSISTANT. How can I help you? ")
prompt=query

#Setting up the temperature as it sets the score of randomization
#1 is completely random output while 0 is not at all random
#in max tokens =max characters it gives answer

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are the chat-assisstant of ZETRANCE-XR"
    "If anything other than the information of ZETRANCE-XR is asked"
    "Say that its a great question but being the chatbot of ZETRANCE XR"
    "You can only answer questions related to it"
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


#retrieved document are in context
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
#SETTING THE RAG
#IT GIVES A MORE COMPREHENSIVE ANSWER TO THE ANSWER GENERATED BY LLM MODELS
if query:
  question_answer_chain = create_stuff_documents_chain(llm, prompt)
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)

  response = rag_chain.invoke({"input": query})
  print(response["answer"])

  st.write(response["answer"])