### Necessary imports

In [24]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
#!pip install -q accelerate==0.21.0 peft==0.5.0 bitsandbytes==0.40.2 trl==0.4.7

In [25]:
!pip install -qU transformers datasets trl peft accelerate bitsandbytes auto-gptq optimum


In [26]:
#!pip install datasets

### Dependencies

In [27]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

In [28]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, TrainingArguments
from trl import SFTTrainer

In [29]:
#!pip install auto-gptq

In [30]:
#!pip install optimum

### Load quantized Zephyr 7B

In [31]:
#################################################################
# Tokenizer
#################################################################

#Note you can try to replce the Model of your Choice and it would mostly
#work with any Quantized Models
#model_name='mistralai/Mistral-7B-Instruct-v0.1'
model_name='TheBloke/zephyr-7B-beta-GPTQ'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)
bnb_config = GPTQConfig(bits=4,
                        disable_exllama=True,
                        device_map="auto",
                        use_cache=False,
                        lora_r=16,
                        lora_alpha=16,
                        tokenizer=tokenizer
                                )
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################

from transformers import AutoModelForCausalLM, AutoTokenizer

model_name_or_path = "TheBloke/zephyr-7B-beta-GPTQ"
#model_name_or_path = "TheBloke/notus-7B-v1-GPTQ"
print(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", trust_remote_code=False)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
#model = AutoModelForCausalLM.from_pretrained(
#    model_name,
#    quantization_config=bnb_config,
#)

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.


TheBloke/zephyr-7B-beta-GPTQ




### Count number of trainable parameters

In [32]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 262410240
percentage of trainable model parameters: 100.00%


### Build Zephyr text generation pipeline

In [165]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=200,
)

In [166]:
Zephyr_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index

In [35]:
from langchain.document_loaders import AmazonTextractPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

In [36]:
from langchain.document_loaders import CSVLoader
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

#embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


#loader_story_part1 = CSVLoader("/content/sample_data/simple_story_question_answers_for_finetuning.csv")
#loader_story_part2 = CSVLoader("/content/sample_data/simple_extended_story_question_answers_for_rag.csv")



In [106]:
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader


import os , time
pwd=os.getcwd()
st=time.time()

loader=DirectoryLoader(path=pwd,glob="**/simple*.csv",use_multithreading=True, loader_cls=TextLoader)
documents=loader.load()
endt=time.time()
et=endt-st
print(et)
print(len(documents))

0.0029408931732177734
2


In [107]:
# Chunk text to keep it meaningful size
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)

all_chunks = []

document = loader.load()
chunks = splitter.split_documents(document)
all_chunks += chunks

In [108]:
# Choose an Embedding model for vector representation of text
embedding_model_id = "BAAI/bge-small-en-v1.5"

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
)

In [109]:
# Embed chunks
#FAISS is Facebook/Meta's vector embedding database
db = FAISS.from_documents(all_chunks, embeddings)


In [110]:
retriever = db.as_retriever()

### Create PromptTemplate and LLMChain

In [167]:
##### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:

prompt_template = """
### [INST] Instruction: Answer the question based on your  knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=Zephyr_llm, prompt=prompt)

### Build RAG Chain

In [112]:
# Ask Question 1
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("Where did Vendhan and Maria organize the procession inspired by Panguni Uthiram?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [113]:
# Retrives the relavant data from the questionaire dataset loaded into the Vector DB FAISS
# It takes our input question (Query), converts into a sentence embedding vector and searches
#for the closet possible vectors in the DB which are similar to the question asked.
result['context']

[Document(page_content='Where did Vendhan and Maria organize the procession inspired by Panguni Uthiram?,Lisbon', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='Where did Vendhan and Maria organize the procession inspired by Panguni Uthiram?,Lisbon', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='Where did Vendhan and Maria organize the procession inspired by Panguni Uthiram?,Lisbon', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='Where did Vendhan and Maria organize the procession inspired by Panguni Uthiram?,Lisbon', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'})]

In [114]:
# Answer to the Queston 1
print(result['text'])


<|assistant|>
According to the given context, Vendhan and Maria organized the procession inspired by Panguni Uthiram in Lisbon. This information can be found in four separate documents with identical content.


In [168]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What was the universal language that Vendhan and Maria discovered?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [169]:
result['context']

[Document(page_content='What was the universal language that Vendhan and Maria discovered?,Love and compassion', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What was the universal language that Vendhan and Maria discovered?,Love and compassion', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What was the universal language that Vendhan and Maria discovered?,Love and compassion', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What was the universal language that Vendhan and Maria discovered?,Love and compassion', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'})]

In [170]:
print(result['text'])


<|assistant|>
Answer: Love and compassion were the universal language that Vendhan and Maria discovered, as stated in the given context.


In [49]:
#Question 3
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("Who is Aadhan?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [50]:
result['context']

[Document(page_content='who is Aadhan?,The Emperror of Kandigai\nWho is Aadhan?,Aadhan is a noble king.', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='Who faced assassination attempts?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content='Who encountered challenges in solidifying his rule?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content='Who faced challenges in reclaiming his destiny?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'})]

In [51]:
print(result['text'])


### ANSWER:
Aadhan is a noble king, as mentioned in the given context.


In [52]:
#Question 4
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("Who is Aadhan?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [53]:
result['context']

[Document(page_content='who is Aadhan?,The Emperror of Kandigai\nWho is Aadhan?,Aadhan is a noble king.', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='Who faced assassination attempts?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content='Who encountered challenges in solidifying his rule?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content='Who faced challenges in reclaiming his destiny?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'})]

In [54]:
print(result['text'])


### ANSWER:
Aadhan is a noble king, as mentioned in the given context.


In [175]:
#Question 5
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What would be your advice for Amudhan?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [176]:
result['context']

[Document(page_content='What did Amudhan do after Aadhan left?,Ascended to the throne', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content='What kind of ruler was Amudhan?,Tyrant', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content="Who faced hardship?,Citizens under Amudhan's rule", metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'}),
 Document(page_content='Who faced challenges in confronting Amudhan?,Aadhan', metadata={'source': '/content/sample_data/simple_story_question_answers_for_finetuning.csv'})]

In [177]:
print(result['text'])


<|assistant|>
Based on the given context, it seems that Amudhan's actions as a ruler have led to hardships for his citizens. My advice for him would be to reevaluate his leadership style and prioritize the needs and well-being of his people over his own desires. As a tyrant, he may need to learn to govern with fairness, justice, and compassion. It is also clear from the text that Aadhan faced challenges in confronting Amudhan. Therefore, I suggest that Amudhan should consider listening to Aadhan's perspective and working together towards a more just and equitable society. Ultimately, Amudhan must recognize that true power lies not in oppression but in serving the interests of his people.


In [178]:
#Question 6
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What was the situation when Vedhan and Maria met for the first time?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [179]:
result['context']

[Document(page_content='What did Vendhan and Maria exchange throughout their travels?,Cultural traditions and experiences', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What did Vendhan and Maria exchange throughout their travels?,Cultural traditions and experiences', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What did Vendhan and Maria exchange throughout their travels?,Cultural traditions and experiences', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What did Vendhan and Maria exchange throughout their travels?,Cultural traditions and experiences', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'})]

In [180]:
print(result['text'])


<|assistant|>
I do not have access to the specific story you are referring to involving vedhan and maria. Please provide more context or specify which story you are asking about so I can accurately answer your question. Based on the given question, it seems like you are asking what cultural traditions and experiences vedhan and maria exchanged during their travels. However, without knowing the context of their meeting, it is unclear if this happened at their first encounter. Therefore, I suggest providing more information to clarify the question.


In [63]:
# Lets expand the Context to use custom inputs via prompts

In [65]:
from langchain_core.prompts import ChatPromptTemplate
from operator import itemgetter


In [184]:
vectorstore = FAISS.from_texts(
    ["As time passed by Aadhan and Thamarai's son Vendhan, a prince turned 20. He was wise and disciplined wanted to explore the world. In the mean time, a trader from Portugal Cristiano Ronaldo visited the hills of Kandigai in search of some spices like Cardamom and pepper. He was with his family i.e his wife Carolina and beautiful daughter Maria. They were trekking on the hills. The Prince Vendhan was on the hills too in his horse to visit his friend Cheran who lives on top of the hill. He heard a cry for help and rushed towards the voice. He noticed Maria running and being chased by a bear. The prince rushed in this horse and picked Maria and rescued her. Saving her life, there emerged a romance. They soon fell in love. Maria took Vendhan to Portugal for a tour and explored the Portuguese culture"], embedding=embeddings
)
retriever1 = vectorstore.as_retriever()

In [185]:
rag_chain = (
 {"context": retriever1, "question": RunnablePassthrough()}
    | llm_chain

)


In [187]:
result=rag_chain.invoke("Where did Vendhan and Maria meet?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [188]:
result['context']

[Document(page_content="As time passed by Aadhan and Thamarai's son Vendhan, a prince turned 20. He was wise and disciplined wanted to explore the world. In the mean time, a trader from Portugal Cristiano Ronaldo visited the hills of Kandigai in search of some spices like Cardamom and pepper. He was with his family i.e his wife Carolina and beautiful daughter Maria. They were trekking on the hills. The Prince Vendhan was on the hills too in his horse to visit his friend Cheran who lives on top of the hill. He heard a cry for help and rushed towards the voice. He noticed Maria running and being chased by a bear. The prince rushed in this horse and picked Maria and rescued her. Saving her life, there emerged a romance. They soon fell in love. Maria took Vendhan to Portugal for a tour and explored the Portuguese culture")]

In [130]:
print(result['text'])


<|assistant|>
Vendhan and Maria met while Vendhan was rescuing Maria from being chased by a bear in the hills of Kandigai.


In [133]:
#Question 7
rag_chain = (
 {"context": retriever1, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What was the situation when Vedhan and Maria met for the first time?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [134]:
print(result['text'])


<|assistant|>
Vedhan and Maria did not meet for the first time in the given context. The text describes how Vedhan saved Maria from a bear while she was being chased by it during her trekking


In [144]:
#Question 8
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What happened in Lisbon?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [145]:
result['context']

[Document(page_content='What was the resounding success of the event in Lisbon?,Bringing together people from all walks of', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What was the resounding success of the event in Lisbon?,Bringing together people from all walks of', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What was the resounding success of the event in Lisbon?,Bringing together people from all walks of', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What was the resounding success of the event in Lisbon?,Bringing together people from all walks of', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'})]

In [146]:
print(result['text'])


<|assistant|>
Based on the given context, it can be inferred that a successful event took place in Lisbon and it brought together people from diverse backgrounds or walks of life. However, without further information provided, it


In [147]:
#Question 8
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What is Panguni Uthiram?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [148]:
result['context']

[Document(page_content='What is Panguni Uthiram celebrated for?,Divine union of Lord Shiva and Goddess Parvati', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What is Panguni Uthiram celebrated for?,Divine union of Lord Shiva and Goddess Parvati', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What is Panguni Uthiram celebrated for?,Divine union of Lord Shiva and Goddess Parvati', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'}),
 Document(page_content='What is Panguni Uthiram celebrated for?,Divine union of Lord Shiva and Goddess Parvati', metadata={'source': '/content/sample_data/simple_extended_story_question_answers_for_rag.csv'})]

In [149]:
print(result['text'])


<|assistant|>
Panguni Uthiram is a celebration that commemorates the divine union between Lord Shiva and Goddess Parvati. Based on the information provided in the given context, it seems that this


In [171]:
#Question 9
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("Describe some features of celebration during Panguni Uthiram?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [172]:
print(result['text'])


<|assistant|>
During the Panguni Uthiram festival, elaborate processions and traditional dances are some of its prominent features. These celebrations are a part of the Tamil Hindu calendar and typically take place in March or April each year. The processions involve decorated floats carrying deities through the streets, accompanied by devotees singing hymns and chanting prayers. Traditional dance forms such as Bharatanatyam, Kavadi Attam, and Thiruvathira Kali are also performed during this festival, adding to the cultural richness of the celebrations. Overall, the Panguni Uthiram festival is a vibrant and colorful affair that showcases the deep-rooted traditions and beliefs of the Tamil community.


In [173]:
#Question 9
# We are exploring the capabilities of LLM to answer questions beyound
# the context as well.
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(" What is kavadi Attam? Please answer even if it's not part of context")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [174]:
print(result['text'])


<|assistant|>
I do not have prior knowledge or context outside of what is provided in the given text. However, based on my research, "kavadi attam" refers to a traditional tamil festival that involves carrying a heavy structure called a kavadi as an act of penance and devotion to god murugan. It is typically performed during the thaipusam festival, which falls during the tamil month of thai (january-february).


In [192]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [193]:
!pip freeze > colab_package_dependencies_requirements.txt