In [14]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [12]:
# Creat token from hugging face website and use it.
#load hugging face token key key
os.environ['HF_TOKEN'] = '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'

In [3]:
# for using openAI model:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

## (1) Load data

In [5]:
loaders = UnstructuredURLLoader(urls=[
                  "https://www.coursera.org/articles/software-engineer",
                  "https://www.coursera.org/articles/computer-information-systems",
                  'https://www.coursera.org/in/articles/machine-learning-models'])
data = loaders.load() 
len(data)

3

In [7]:
data = loaders.load() 
len(data)

3

## (2) Split data to create chunks


#### RecursiveCharacterTextSplitter:
The RecursiveCharacterTextSplitter takes a large text and splits it based on a specified chunk size. It does this by using a set of characters. The default characters provided to it are ["\n\n", "\n", " ", ""].

It takes in the large text then tries to split it by the first character \n\n. If the first split by \n\n is still large then it moves to the next character which is \n and tries to split by it. If it is still larger than our specified chunk size it moves to the next character in the set until we get a split that is less than our specified chunk size.

In [8]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,   # number of characters for each chunks (note:'not words')
    chunk_overlap=200   # number of characters to overlap.
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [12]:
docs = text_splitter.split_documents(data)

In [13]:
len(docs)

41

In [42]:
print(docs[40])

page_content='Popular Cybersecurity Certifications\n\nPopular Data Analytics Certifications\n\nPopular IT Certifications\n\nPopular Machine Learning Certifications\n\nPopular SQL Certifications\n\nGenAI for Software Developers\n\nGenAI for Data Professionals\n\nCareer Insights & Advice Hub\n\nCoursera\n\nAbout\n\nWhat We Offer\n\nLeadership\n\nCareers\n\nCatalog\n\nCoursera Plus\n\nProfessional Certificates\n\nMasterTrack® Certificates\n\nDegrees\n\nFor Enterprise\n\nFor Government\n\nFor Campus\n\nBecome a Partner\n\nSocial Impact\n\nFree Courses\n\nECTS Credit Recommendations\n\nCommunity\n\nLearners\n\nPartners\n\nBeta Testers\n\nBlog\n\nThe Coursera Podcast\n\nTech Blog\n\nTeaching Center\n\nMore\n\nPress\n\nInvestors\n\nTerms\n\nPrivacy\n\nHelp\n\nAccessibility\n\nContact\n\nArticles\n\nDirectory\n\nAffiliates\n\nModern Slavery Statement\n\nManage Cookie Preferences\n\nLearn Anywhere\n\n© 2024 Coursera Inc. All rights reserved.' metadata={'source': 'https://www.coursera.org/in/art

### (3) Create embeddings for these chunks and save them to FAISS index
 since we didnt have paid account in openAI, we cant able to use openAI.


In [None]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

Now, OpenAI Embeddings are expensive. Let’s explore some best performing open source embedding models.

BGE Model( BAAI(Beijing Academy of Artificial Intelligence) General Embeddings) Model

BGE models on HuggingFaceare one of the best open source embedding models.

In [4]:
from sentence_transformers import SentenceTransformer

In [3]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "BAAI/bge-base-en-v1.5"
model_kwargs = {"device":'cpu'}
encode_kwargs = {'normalize_embeddings':True}

hf = HuggingFaceEmbeddings(
    model_name = model_name,
    model_kwargs = model_kwargs,
    encode_kwargs = encode_kwargs
)

Once we have a embedding model, we are ready to vectorize all our documents and store them in a vector store to construct a retrieval system. With specifically designed searching algorithms, a retrieval system can do similarity searching efficiently to retrieve relevant documents.

FAISS (Facebook AI Similarity Search) is a library that allows developers to quickly search for embeddings of multimedia documents that are similar to each other. It solves limitations of traditional query search engines that are optimized for hash-based searches, and provides more scalable similarity search functions (nearest-neighbor search implementations).

In [6]:
 # create embeddings and save it to FAISS index
    #embeddings = OpenAIEmbeddings()
vectordb_bge = FAISS.from_documents(docs, hf)

In [7]:
file_path = "faiss_store_bge.pkl"
with open(file_path, "wb") as f:
        pickle.dump(vectordb_bge, f)

In [None]:
len(vectordb_bge)

In [27]:
with open("faiss_store_bge.pkl", "rb") as f:
            vectordb_bge = pickle.load(f)

  return torch.load(io.BytesIO(b))


Create a retriever interface using vector store, we’ll use it later to construct Q & A chain using LangChain.

In [28]:
# Use similarity searching algorithm and return top 3 relevant document.
retriever = vectordb_bge.as_retriever(search_type="similarity", search_kwargs={"k": 3})

Now we have our vector store and retrieval system ready. We then need a large language model (LLM) to process information and answer the question.

**Open-source LLMs from Hugging Face**

There are two ways to utilize Hugging Face LLMs: online and local.

Hugging Face Hub
The Hugging Face Hub is an platform with over 350k models, 75k datasets, and 150k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.

To use, we should have the huggingface_hub python package installed.
Set an environment variable called HUGGINGFACEHUB_API_TOKEN with your Hugging Face access token in it.

In [32]:
from langchain.llms import HuggingFaceHub

hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1, "max_length":500})
hf.client.api_url = 'https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1'

query = "Who is laila rizvi?"  # Sample question, change to other questions you are interested in.
#hf.invoke(query)



Hugging Face Hub will be slow when you run large models. You can get around this by downloading the model and run it on your local machine. This is the way we use LLM in our project.

**Hugging Face Local Pipelines**

Hugging Face models can be run locally through the HuggingFacePipeline class.

We need to install transformers python package.
The Mistral-7B-v0.1 Large Language Model (LLM) is a pretrained generative text model with 7 billion parameters. Mistral-7B-v0.1 outperforms Llama-2–13B on all benchmarks tested. Read the paper.
Mistral-7B-v0.1’s model size is 3.5GB, while Llama-2–13B has 13 billion parameters and 25GB model size.
In order to use Llama2, you need to request access from Meta. Mistral-7B-v0.1 is publicly available already.

In [None]:
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0, "max_new_tokens": 300}
)

llm = hf 
query = "Who is laila rizvi?"
#llm.invoke(query)

**Q & A chain**

Now we have both the retrieval system for relevant documents and mistral AI LLM as QA chatbot ready.

We will take our initial query, together with the relevant documents retrieved based on the results of our similarity search, to create a prompt to feed into the LLM. The LLM will take the initial query as the question and relevant documents as the context information to generate a result.

LangChain provides an abstraction of the whole pipeline — RetrievalQA

Let’s first construct a proper prompt for our task.



In [16]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

{context}

Question: {question}

Helpful Answer:
"""

PROMPT = PromptTemplate(
 template=prompt_template, input_variables=["context", "question"]
)

Calling LangChain’s RetrievalQA with the prompt above.

In [34]:
from langchain.chains import RetrievalQA
retrievalQA = RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

**Use RetrievalQA invoke method to execute the chain**


In [36]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
#print(result['result'])

In [37]:
print(result['result'])


Laila Rizvi is a software engineer at Meta. She has a degree in computer science from the University of California, Berkeley. She has also worked as a software engineer at Google and Facebook.

Laila Rizvi is a software engineer at Meta. She has a degree in computer science from the University of California, Berkeley. She has also worked as a software engineer at Google and Facebook.

Laila Rizvi is a software engineer at Meta.


In [39]:
# Call the QA chain with our query.
result2 = retrievalQA.invoke({"query": 'what laila rizvi said??'})

In [41]:
print(result2['result'])


Lila Rizvi is a writer and editor who has worked in the publishing industry for over 10 years. She has a degree in English literature from the University of California, Berkeley, and has written for a variety of publications, including The New York Times, The Atlantic, and The Guardian.

Rizvi is the author of the book "The New York Times Book of the Dead," which is a collection of essays about the deaths of famous people. She has also
