In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'energy:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5630231%2F9299042%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240902%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240902T122405Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D832e952f80b12640300e484d169b7c53e5aaae05ee973ea542835162a823bdf58b07307c53bd63baa7265c3d1044f942e4daaf28de64b85e88d686f4fe7f040536711ce305f500ef4a20dbc77325f145abac35909f98b0d49964cecab25c0287af48e96e437547e1eccc75271f1cf49f296acfab4b18a9248aae08bd904be2c007027ae7fc366c2336a6e579bf2260b90fddced66cdab994b8122acdd344bdbf89ab7f73e722738ff130ffa3dbd086641a5dfe675eb88a5820c135f506d35457f19594d4ed93aed3ee57f0f1ce4d610e399ffba788c7de448ed2f1663074feffac7316a3627e447ba5bf4a55befea31c6e0ec9bba33cf80ebed97b2835fcd203,llama2/pytorch/default/1:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-models-data%2F88718%2F105881%2Fbundle%2Farchive.tar.gz%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240902%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240902T122405Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dce57ccfe6f893e7dc9008ecb702ba9c2f215fc48515a0dc22597a53c81d22fb16d500746f3b988e461d75c70991c9fa6b65900dbdbf747b260d188d9c5f7cd77acbdec9f2ab896213123bc3ccf269cb2a4848d3a346dc80bca9e3ec9d927d1c91891c8a96b780acaa685e0a7eec688b11d351f22dcbdae973bf566d7ca0150f14f59112a2843f7617288b81c703098ee8c59133c2db9d2469444a9d302e4e69f2364cea131101f5baf8b2810435cb6ba65e051e4ece2da1928f28ca0e7be523bfaf32ddb23631fbb379fb28f5e1a91347734a8ab0a383863e06a8b83124c4668d72b1bc482d8eab2afee3a49c41e10a990d9f49a52e38207da079189d4e80230'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/energy/1Ch2.pdf
/kaggle/input/llama2/pytorch/default/1/llama-2-7b-chat.Q4_K_M.gguf


I’ve been working in the field of **conversational AI** lately. My latest project has been to create a custom chatbot that can answer questions based on specific PDF documents. I’m using the **Retrieval Augmented Generation (RAG)** to make this happen.

In [None]:
!pip install PyPDF2 langchain langchain_community sentence_transformers llama-cpp-python faiss-gpu tiktoken torch

Collecting PyPDF2
  Using cached pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain
  Using cached langchain-0.2.15-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain_community
  Using cached langchain_community-0.2.15-py3-none-any.whl.metadata (2.7 kB)
Collecting sentence_transformers
  Using cached sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting llama-cpp-python
  Using cached llama_cpp_python-0.2.90.tar.gz (63.8 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langchain-core<0.3.0,>=0.2.35 (from la

In [None]:
#importing the required libraries
import PyPDF2

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import LlamaCpp


from langchain.embeddings import HuggingFaceEmbeddings # import hf embedding
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain


from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

  from tqdm.autonotebook import tqdm, trange


In [None]:
pdf_docs=["/kaggle/input/energy/1Ch2.pdf"]

In [None]:
def prepare_docs(pdf_docs):
    docs = []
    metadata = []
    content = []

    for pdf in pdf_docs:

        pdf_reader = PyPDF2.PdfReader(pdf)
        for index, text in enumerate(pdf_reader.pages):
            doc_page = {'title': pdf + " page " + str(index + 1),
                        'content': pdf_reader.pages[index].extract_text()}
            docs.append(doc_page)
    for doc in docs:
        content.append(doc["content"])
        metadata.append({
            "title": doc["title"]
        })
    print("Content and metadata are extracted from the documents")
    return content, metadata

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
device

device(type='cuda')

In [None]:
def get_text_chunks(content, metadata):
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=512,
        chunk_overlap=256,
    )
    split_docs = text_splitter.create_documents(content, metadatas=metadata)
    print(f"Documents are split into {len(split_docs)} passages")
    return split_docs

In [None]:
def ingest_into_vectordb(split_docs):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': device})
    db = FAISS.from_documents(split_docs, embeddings)

    DB_FAISS_PATH = 'vectorstore/db_faiss'
    db.save_local(DB_FAISS_PATH)
    return db

In [None]:
template = """[INST]
As an AI, provide accurate and relevant information based on the provided document. Your responses should adhere to the following guidelines:
- Answer the question based on the provided documents.
- Be direct and factual, limited to 50 words and 2-3 sentences. Begin your response without using introductory phrases like yes, no etc.
- Maintain an ethical and unbiased tone, avoiding harmful or offensive content.
- If the document does not contain relevant information, state "I cannot provide an answer based on the provided document."
- Avoid using confirmatory phrases like "Yes, you are correct" or any similar validation in your responses.
- Do not fabricate information or include questions in your responses.
- do not prompt to select answers. do not ask me questions
{question}
[/INST]
"""

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

def get_conversation_chain(vectordb):
    llama_llm = LlamaCpp(
        model_path="/kaggle/input/llama2/pytorch/default/1/llama-2-7b-chat.Q4_K_M.gguf",
        temperature=0.75,
        max_tokens=300,
        top_p=1,
        callback_manager=callback_manager,
        n_ctx=3000,
        device=device  # Ensures LlamaCpp uses the GPU
    )

    retriever = vectordb.as_retriever()
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(template)

    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True, output_key='answer')

    conversation_chain = (ConversationalRetrievalChain.from_llm
                          (llm=llama_llm,
                           retriever=retriever,
                           #condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                           memory=memory,
                           return_source_documents=True))
    print("Conversational Chain created for the LLM using the vector store")
    return conversation_chain


In [None]:
def validate_answer_against_sources(response_answer, source_documents):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    similarity_threshold = 0.5
    source_texts = [doc.page_content for doc in source_documents]

    answer_embedding = model.encode(response_answer, convert_to_tensor=True).to(device)
    source_embeddings = model.encode(source_texts, convert_to_tensor=True).to(device)

    cosine_scores = util.pytorch_cos_sim(answer_embedding, source_embeddings)


    if any(score.item() > similarity_threshold for score in cosine_scores[0]):
        return True

    return False

In [None]:
content, metadata = prepare_docs(pdf_docs)

Content and metadata are extracted from the documents


In [None]:
split_docs = get_text_chunks(content, metadata)

Documents are split into 25 passages


In [None]:
vectordb=ingest_into_vectordb(split_docs)

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': device})


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
conversation_chain=get_conversation_chain(vectordb)

  llama_llm = LlamaCpp(
llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /kaggle/input/llama2/pytorch/default/1/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:

Conversational Chain created for the LLM using the vector store


AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | 
Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000001', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}
Using fallback chat format: llama-2
                device was transferred to model_kwargs.
                Please confirm that device is what you i

In [None]:
#LETS DO INFERENCE NOW
user_question = "Define various types of energy?"
response=conversation_chain({"question": user_question})
print("A: ",response['answer'])

  response=conversation_chain({"question": user_question})


 The question is seeking to determine the different forms of energy, and a comprehensive answer would be:

There are several types of energy, including:

1. Gravitational Energy - the energy of place or position, such as water in a reservoir behind a hydroelectric dam.
2. Kinetic Energy - the energy of motion, including waves, electrons, atoms, and molecules. Examples include radiant energy, thermal energy, and sound. 
3. Radiant Energy - electromagnetic energy that travels in transverse waves, including visible light, x-rays, gamma rays, and radio waves. An example of radiant energy is solar energy.
4. Thermal Energy - the internal energy of substances, resulting from the vibration and movement of atoms and molecules within the substance. Examples include geothermal energy and heat.
5. Motion - the movement of objects or substances from one place to another, including wind and hydropower.
6. Sound Energy - the movement of energy through substances in longitudinal waves, such as sound 


llama_print_timings:        load time =    1416.68 ms
llama_print_timings:      sample time =     138.29 ms /   268 runs   (    0.52 ms per token,  1938.01 tokens per second)
llama_print_timings: prompt eval time =  405682.76 ms /  2096 tokens (  193.55 ms per token,     5.17 tokens per second)
llama_print_timings:        eval time =  106543.94 ms /   268 runs   (  397.55 ms per token,     2.52 tokens per second)
llama_print_timings:       total time =  513141.79 ms /  2364 tokens


A:   The question is seeking to determine the different forms of energy, and a comprehensive answer would be:

There are several types of energy, including:

1. Gravitational Energy - the energy of place or position, such as water in a reservoir behind a hydroelectric dam.
2. Kinetic Energy - the energy of motion, including waves, electrons, atoms, and molecules. Examples include radiant energy, thermal energy, and sound. 
3. Radiant Energy - electromagnetic energy that travels in transverse waves, including visible light, x-rays, gamma rays, and radio waves. An example of radiant energy is solar energy.
4. Thermal Energy - the internal energy of substances, resulting from the vibration and movement of atoms and molecules within the substance. Examples include geothermal energy and heat.
5. Motion - the movement of objects or substances from one place to another, including wind and hydropower.
6. Sound Energy - the movement of energy through substances in longitudinal waves, such as so