## Load webpages and split text into chunks

In [3]:
from langchain.document_loaders import DirectoryLoader, WebBaseLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter

URLs=[
    'https://www.promtior.ai/',
    'https://www.promtior.ai/service',
    'https://www.promtior.ai/use-cases',
    'https://www.promtior.ai/contacto'

]

# load pages from website
loader = WebBaseLoader(URLs)

# split the pages into chunks
data = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = splitter.split_documents(data)

## Convert chunks into embeddings

In [6]:
!pip install -U langchain-huggingface



In [7]:
from langchain_huggingface import HuggingFaceEmbeddings

# load the embeddings model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})

## Creation of vector store

In [8]:
from langchain.vectorstores import FAISS

# create the vector store database
db = FAISS.from_documents(texts, embeddings)

## Load the model

In [12]:
from langchain.llms import CTransformers

def load_llm():
    """load the llm"""

    llm = CTransformers(model='models/llama-2-7b-chat.ggmlv3.q2_K.bin', # model available here: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main
                    model_type='llama',
                    config={'max_new_tokens': 256, 'temperature': 0})
    return llm

## Load vector previously stored

In [13]:
def load_vector_store():
    # load the vector store
    
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'})
    db = FAISS.load_local("faiss", embeddings)
    return db

## Prompt template

In [14]:
from langchain import PromptTemplate

def create_prompt_template():
    # prepare the template that provides instructions to the chatbot

    template = """Use the provided context to answer the user's question.
    If you don't know the answer, respond with "I do not know".
    Context: {context}
    Question: {question}
    Answer:
    """

    prompt = PromptTemplate(
        template=template,
        input_variables=['context', 'question'])
    return prompt

## Creating the QA Chain
##### Retrieves the relevant documents from the vector store and uses them to answer the users’ queries

In [15]:
from langchain.chains import RetrievalQA

def create_qa_chain():
    """create the qa chain"""

    # load the llm, vector store, and the prompt
    llm = load_llm()
    db = load_vector_store()
    prompt = create_prompt_template()


    # create the qa_chain
    retriever = db.as_retriever(search_kwargs={'k': 2})
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                        chain_type='stuff',
                                        retriever=retriever,
                                        return_source_documents=True,
                                        chain_type_kwargs={'prompt': prompt})
    
    return qa_chain

## Function that executes the response generation

In [16]:
def generate_response(query, qa_chain):

    # use the qa_chain to answer the given query
    return qa_chain({'query':query})['result']

## User interface

In [22]:
!pip install streamlit_chat

Collecting streamlit_chat
  Downloading streamlit_chat-0.1.1-py3-none-any.whl.metadata (4.2 kB)
Downloading streamlit_chat-0.1.1-py3-none-any.whl (1.2 MB)
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   -- ------------------------------------- 0.1/1.2 MB 573.4 kB/s eta 0:00:02
   --- ------------------------------------ 0.1/1.2 MB 476.3 kB/s eta 0:00:03
   --- ------------------------------------ 0.1/1.2 MB 476.3 kB/s eta 0:00:03
   ---- ----------------------------------- 0.1/1.2 MB 472.1 kB/s eta 0:00:03
   ----- ---------------------------------- 0.2/1.2 MB 551.6 kB/s eta 0:00:02
   ------- -------------------------------- 0.2/1.2 MB 573.4 kB/s eta 0:00:02
   --------- ------------------------------ 0.3/1.2 MB 654.2 kB/s eta 0:00:02
   ---------- ----------------------------- 0.3/1.2 MB 633.2 kB/s eta 0:00:02
   ---------- 

In [24]:
import streamlit as st
from streamlit_chat import message

st.set_page_config(page_title='Llama2-Chatbot for Promtior')
st.header('Custom Llama2-Powered Chatbot :robot_face:')

def get_user_input():

    # get the user query
    input_text = st.text_input('Ask me anything about Promtior', "", key='input')
    return input_text

# create the qa_chain
qa_chain = create_qa_chain()

# create empty lists for user queries and responses
if 'generated' not in st.session_state:
    st.session_state['generated'] = []
if 'past' not in st.session_state:
    st.session_state['past'] = []

# get the user query
user_input = get_user_input()

if user_input:

    # generate response to the user input
    response = generate_response(query=user_input, qa_chain=qa_chain)

    # add the input and response to session state
    st.session_state.past.append(user_input)
    st.session_state.generated.append(response)

# display conversaion history (if there is one)
if st.session_state['generated']:
    for i in range(len(st.session_state['generated']) -1, -1, -1):
        message(st.session_state['generated'][i], key=str(i))
        message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')

RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-669ffb8f-5522a0e34e57cd9519180f76;e439163a-7911-4c53-93ec-e69a6f98c541)

Repository Not Found for url: https://huggingface.co/api/models/models/llama-2-7b-chat.ggmlv3.q2_K.bin/revision/main.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated.
Invalid username or password.