### Necessary imports

In [1]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate peft==0.4.0 trl==0.4.7
!pip install -i https://pypi.org/simple/ bitsandbytes

Looking in indexes: https://pypi.org/simple/


### Dependencies

In [3]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
#from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

### Load quantized Mistal 7B

In [9]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
     major, _ = torch.cuda.get_device_capability()
     if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

### Count number of trainable parameters

In [10]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


### Build Mistral text generation pipeline

In [11]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [12]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index

In [13]:
!playwright install
!playwright install-deps

Downloading Chromium 123.0.6312.4 (playwright build v1105)[2m from https://playwright.azureedge.net/builds/chromium/1105/chromium-linux.zip[22m
Chromium 123.0.6312.4 (playwright build v1105) downloaded to /root/.cache/ms-playwright/chromium-1105
Downloading FFMPEG playwright build v1009[2m from https://playwright.azureedge.net/builds/ffmpeg/1009/ffmpeg-linux.zip[22m
FFMPEG playwright build v1009 downloaded to /root/.cache/ms-playwright/ffmpeg-1009
Downloading Firefox 123.0 (playwright build v1440)[2m from https://playwright.azureedge.net/builds/firefox/1440/firefox-ubuntu-20.04.zip[22m
Firefox 123.0 (playwright build v1440) downloaded to /root/.cache/ms-playwright/firefox-1440
Downloading Webkit 17.4 (playwright build v1983)[2m from https://playwright.azureedge.net/builds/webkit/1983/webkit-ubuntu-20.04.zip[22m
Webkit 17.4 (playwright build v1983) downloaded to /root/.cache/ms-playwright/webkit-1983
╔══════════════════════════════════════════════════════╗
║ Host system is missi

In [14]:
!nvidia-smi

Sun Mar 24 18:50:33 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0              32W / 250W |   5228MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [15]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index
articles = ["https://admissions.karunya.edu/",
            "https://admissions.karunya.edu/whykarunya",
            "https://admissions.karunya.edu/ug",
            "https://admissions.karunya.edu/pg",
            "https://admissions.karunya.edu/programmes/research",
            "https://admissions.karunya.edu/nri-admission",
            "https://admissions.karunya.edu/scholarships",
            "https://admissions.karunya.edu/programmes/research"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [16]:
# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Create PromptTemplate and LLMChain

In [17]:
prompt_template = """
### [INST] Instruction: Answer the question based on your knowledge about Karunya. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [18]:
llm_chain.invoke({"context": "", "question": "What are the requirements to get a scholarship in Karunya?"})



{'context': '',
 'question': 'What are the requirements to get a scholarship in Karunya?',
 'text': "\n### [INST] Instruction: Answer the question based on your knowledge about Karunya. Here is context to help:\n\n\n\n### QUESTION:\nWhat are the requirements to get a scholarship in Karunya? [/INST]\n \nKarunya University offers scholarships to students who meet certain criteria. To be eligible for a scholarship, students must have a minimum GPA of 70% or higher in their undergraduate program. Additionally, they must demonstrate financial need and academic excellence. The university also considers extracurricular activities and community service involvement as part of the selection process. It's important to note that the specific requirements may vary depending on the type of scholarship being applied for."}

### Build RAG Chain

In [25]:
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("What is IAESTE?")



In [26]:
result['context']

[Document(page_content='IAESTE is a platform for students from all parts of the world to procure the\nprofessional technical expertise required for them in their careers. IAESTE or\nThe International Association for the Exchange of Students for Technical\nExperience, as the name suggests, is a platform for students from all parts of\nthe world to procure the professional technical expertise required for them in\ntheir careers. read more __', metadata={'source': 'https://admissions.karunya.edu/whykarunya'}),
 Document(page_content='## IAESTE\n\nThe International Association for the Exchange of Students for Technical\nExperience', metadata={'source': 'https://admissions.karunya.edu/whykarunya'}),
 Document(page_content='## Accreditation & Ranking\n\n##### AICTE', metadata={'source': 'https://admissions.karunya.edu/'}),
 Document(page_content='#### School of **Engineering & Technology **', metadata={'source': 'https://admissions.karunya.edu/'})]

In [27]:
print(result['text'])


### [INST] Instruction: Answer the question based on your knowledge about Karunya. Here is context to help:

[Document(page_content='IAESTE is a platform for students from all parts of the world to procure the\nprofessional technical expertise required for them in their careers. IAESTE or\nThe International Association for the Exchange of Students for Technical\nExperience, as the name suggests, is a platform for students from all parts of\nthe world to procure the professional technical expertise required for them in\ntheir careers. read more __', metadata={'source': 'https://admissions.karunya.edu/whykarunya'}), Document(page_content='## IAESTE\n\nThe International Association for the Exchange of Students for Technical\nExperience', metadata={'source': 'https://admissions.karunya.edu/whykarunya'}), Document(page_content='## Accreditation & Ranking\n\n##### AICTE', metadata={'source': 'https://admissions.karunya.edu/'}), Document(page_content='#### School of **Engineering & Technolog