# Install necessary Python libraries

In [1]:
#This cell downloads and install necessary Python libraries.

!pip install langchain sentence_transformers faiss-gpu faiss-cpu ctransformers
!pip install accelerate
!pip install huggingface_hub
!pip install -i https://pypi.org/simple/ bitsandbytes

Collecting langchain
  Downloading langchain-0.1.13-py3-none-any.whl (810 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m810.5/810.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence_transformers
  Downloading sentence_transformers-2.6.0-py3-none-any.whl (163 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.1/163.1 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ctransformers
  Downloading ctransformers-0.2.27-py3-none-any

In [5]:
# Install pypdf library which is used to read pdfs.
pip install pypdf

Collecting pypdf
  Downloading pypdf-4.1.0-py3-none-any.whl (286 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.1/286.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-4.1.0


# Import Libraries

In [6]:
#This cell imports all the required libraries.

from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from huggingface_hub import notebook_login
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import pipeline
from langchain import HuggingFacePipeline
import textwrap
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain import PromptTemplate


# Read PDF files

In [7]:
# Read a PDF file using PyPDF2 into a list of documents, with each document comprising the content of a page
#and its associated metadata, including the page number.

loader = PyPDFLoader("/content/Mobily AR_2022_English (1).pdf")
pages = loader.load_and_split()
loader = PyPDFLoader("/content/Operation-and-Maintenance-Manual_SEBU8407-06 (1).pdf")
pages2 = loader.load_and_split()
merged_pages = pages + pages2

# Add PDF files to FAISS index DB.

In [8]:
#RecursiveCharacterTextSplitter is a utility designed to split text documents into smaller chunks (or segments) for further processing.

# chunk_size=500 specifies that each chunk of text should be approximately 500 characters long.
# chunk_overlap=50 indicates that there should be an overlap of 50 characters between consecutive chunks.
# This overlap can help in maintaining context or ensuring that splitting does not disrupt important information.

splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                          chunk_overlap=50)

#split_documents is a method of the RecursiveCharacterTextSplitter that takes pdf pages as an input splits it into smaller text chunks based on the initialized chunk_size and chunk_overlap.

texts = splitter.split_documents(merged_pages)

# Initialize an object for generating text embeddings using a model from Hugging Face's sentence-transformers framework.
# The model used here is all-MiniLM-L6-v2, which is known for generating embeddings for sentences or text segments.
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'})

# create and save the local database
db = FAISS.from_documents(texts, embeddings)
db.save_local("faiss")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Login to Hugging face hub to download the model

In [9]:
# login to Huggiface CLI to download the Llama2 model from the Hugging face hub.
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Load the Llama2 Model

In [10]:
# Initialize a tokenizer and 'meta-llama/Llama-2-7b-chat-hf' model from Hugging Face's Transformers library.
# 'meta-llama/Llama-2-7b-chat-hf' is a pretrained model for which the tokenizer is loaded.
# This model is a variant of the LLaMA (Large Language Model Meta AI) family, designed for conversational tasks.


tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                             load_in_8bit=True
                                             # token = access_token
                                             #load_in_4bit=True
                                             )

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

# Create Transformer Pipeline

In [11]:
# Configure a text-generation pipeline using Hugging Face's Transformers library.
# Text generation parameters:
  # max_new_tokens=1024 limits the maximum number of new tokens that can be generated by the model in response to an input.
  # do_sample=True enables sampling-based generation, meaning the model will sample from the probability distribution of possible next tokens rather than just picking the most likely one.
  # top_k=10 is a parameter for the sampling strategy, where only the top 10 most likely next tokens are considered at each step of the generation.
  # num_return_sequences=1 specifies that the pipeline should return only one generated sequence per input.
  # eos_token_id=tokenizer.eos_token_id sets the end-of-sequence token ID

pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 1024,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

# Create a prompt

In [12]:
# Create a template for generating prompts

# The template string defines the structure and content of the prompt that will be used with a language model.
# It includes placeholders for dynamic content and instructions on how the model should respond.
# Context: {context} and Question: {question} are placeholders where specific context and a question will be inserted dynamically.
# The context provides necessary information or background to answer the question.


template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {question}
Only return the helpful answer below and nothing else.
Helpful answer:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=['context', 'question'])

# Create RetrievalQA chain

In [22]:
# Create HuggingFacePipeline which uses a previously defined pipeline object.
# The temperature parameter controls the randomness of the output generation, which is set to 0.5.

llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0.4})

# RetrievalQA creates a chain for question-answering against an index.
# retriever=db.as_retriever(search_kwargs={'k': 10}) configures the retrieval component of the QA system.
# search_kwargs={'k': 10} specifies that the top 10 (k=10) most relevant documents should be retrieved for each query.


chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=db.as_retriever(search_kwargs={'k': 10}), chain_type_kwargs={'prompt': prompt})


# Test the model with questions

In [25]:
# Test the model with the query and parse the result.

query = "what all awards did Mobily win in 2022?"
result=chain({"query": query})
result_text = result['result']
# Find the index of the "Helpful answer" section
answer_index = result_text.find("Helpful answer:")
# Extract the answer text
helpful_answer = result_text[answer_index:].replace("Helpful answer:\n", "").strip()
helpful_answer

'Mobily won the following awards in 2022:\n\n* "Top-Rated Mobile Network" award for the second consecutive year\n* "Top-Rated Fixed Network" award\n* "Saudi Arabian Internet of Things Enabling Technology Leadership Award" from Frost & Sullivan\n* "Excellent Consistent Quality" and "Core Consistent Quality" awards from Opensignal\n* Labor Award in the skills and training category for the best training program for students and graduates from the Ministry of Human Resources and Social Development\n* Golden Award (HCM Excellence) 2022 for the Talent Acquisition category as the Best New Hire Onboarding Program from Brandon Hall Group.'

In [24]:
# Test the model with the query and parse the result.


query = "What can be done to prevent Electrical Storm Injury?"
result=chain({"query": query})
result_text = result['result']
# Find the index of the "Helpful answer" section
answer_index = result_text.find("Helpful answer:")
# Extract the answer text
helpful_answer = result_text[answer_index:].replace("Helpful answer:\n", "").strip()
helpful_answer

# Answer is on page 31 of the Operation-and-Maintenance-Manual_SEBU8407-06 (1) pdf.

"Do not attempt to mount or dismount the machine during an electrical storm. Stay in the operator's station or away from the machine if you are on the ground. Wear protective equipment such as hard hats, protective glasses, and other protective equipment as required. Make sure the equipment is free from foreign material, and repair any nonmetallic hoods or fenders in a well-ventilated area away from open flames or sparks. Inspect the fire extinguisher and service it regularly. Observe the recommendations on the instruction plate."