<a href="https://colab.research.google.com/github/rajdas2001/LLM-Experiments/blob/main/RAG_on_custom_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
!pip install ctransformers
!pip install ydata-profiling
!pip install chromadb
!pip install langchain
!pip install sentence_transformers
!pip install bitsandbytes

# Data Handling
import pandas as pd
import numpy as np

# Auto EDA
from ydata_profiling import ProfileReport

# Torch and Transformers
import torch
from torch import bfloat16
import transformers
from transformers import AutoTokenizer

# LangChain
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

# Hiding warnings
import warnings
warnings.filterwarnings("ignore")



In [23]:
# Checking if GPU is available
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(torch.cuda.current_device())
    total_memory = torch.cuda.get_device_properties(0).total_memory
    total_memory_gb = total_memory / (1024**3) # Converting memory to Gb
    print("GPU is available. \nUsing GPU")
    print("\nGPU Name:", gpu_name)
    print(f"Total GPU Memory: {total_memory_gb:.2f} GB")

    device = torch.device('cuda')
else:
    print("GPU is not available. \nUsing CPU")
    device = torch.device('cpu')

GPU is not available. 
Using CPU


In [24]:
# Loading dataframe
df = pd.read_csv("train.csv",encoding='unicode_escape')
# Generating report
report = ProfileReport(df, title = 'Test Dataset')

In [25]:
report

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [26]:
# Checking df length
print('Dataframe Length:', len(df), 'rows')

df = df.dropna() # Dropping empty entries

# Checking df length after dropping empty articles
print('Length After Dropping Empty Values:', len(df), 'rows')

Dataframe Length: 215 rows
Length After Dropping Empty Values: 215 rows


In [27]:
# Loading dataframe content into a document
articles = DataFrameLoader(df, page_content_column = "Answer")
articles

<langchain_community.document_loaders.dataframe.DataFrameLoader at 0x7f748f9553f0>

In [28]:
document = articles.load()
#document

In [29]:
# Splitting document into smaller chunks
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,
                                chunk_overlap = 20)
splitted_texts = splitter.split_documents(document)

In [30]:
# Loading model to create the embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [31]:
# Creating and indexed database
chroma_database = Chroma.from_documents(splitted_texts,
                                      embedding_model,
                                      persist_directory = 'chroma_db')

In [32]:
# Visualizing the database
chroma_database

<langchain_community.vectorstores.chroma.Chroma at 0x7f756944d6f0>

In [33]:
# Defining a retriever
retriever = chroma_database.as_retriever()

In [34]:
# Visualizing the retriever
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7f756944d6f0>)

In [35]:
# Configuring BitsAndBytesConfig for loading model in an optimal way
quantization_config = transformers.BitsAndBytesConfig(load_in_4bit = True,
                                        bnb_4bit_quant_type = 'nf4',
                                        bnb_4bit_use_double_quant = True,
                                        bnb_4bit_compute_dtype = bfloat16)

In [36]:
from langchain.llms import CTransformers
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", model_file = 'llama-2-7b-chat.ggmlv3.q2_K.bin', callbacks=[StreamingStdOutCallbackHandler()])

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [37]:
# Defining a QnA chain
QnA = RetrievalQA.from_chain_type(llm = llm,
                                 chain_type = 'stuff',
                                 retriever = retriever,
                                 verbose = False)

In [38]:
# Defining function to fetch documents according to a query
def get_answers(QnA, query):
    answer = QnA.run(query)
    print(f"\033[1mQuery:\033[0m {query}\n")
    print(f"\033[1mAnswer:\033[0m ", answer)

In [40]:
query = """Who is Raj Das?"""
get_answers(QnA, query)


Raj Das is a data scientist who created me! [1mQuery:[0m Who is Raj Das?

[1mAnswer:[0m  
Raj Das is a data scientist who created me! 
