<a href="https://colab.research.google.com/github/tamoghna21/RAG_LLM/blob/main/0_LLM_with_Langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Implementation of LLM(Mistral-7B-Instruct-v0.1) with Langchain chain

In [None]:
!pip install -q torch transformers accelerate bitsandbytes langchain sentence-transformers
!pip install -q langchain_community
!pip install -q python-dotenv

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.5/973.5 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.7/224.7 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m308.5/308.5 kB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.8/122.8 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━

In [None]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from transformers import AutoTokenizer, pipeline
from typing import Optional, List, Tuple

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

os.chdir("/content/drive/My Drive/")

from dotenv import load_dotenv
load_dotenv(os.path.join('', './.env'))
os.environ["HUGGINGFACE_TOKEN"] = os.getenv('HUGGINGFACE_TOKEN')

Mounted at /content/drive


In [None]:
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
#from langchain.chains import LLMChain

from huggingface_hub import login
login(token=os.environ["HUGGINGFACE_TOKEN"])

READER_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.1' # The LLM Model

tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

use_4bit = True # Activate 4-bit precision base model loading
compute_dtype = getattr(torch, "float16") # Compute dtype for 4-bit base models
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit, # Activate 4-bit precision base model loading
    bnb_4bit_use_double_quant=False, #True, # Activate nested quantization for 4-bit base models (double quantization)
    bnb_4bit_quant_type="nf4", # Quantization type (fp4 or nf4)
    bnb_4bit_compute_dtype=compute_dtype #torch.bfloat16
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME,quantization_config=bnb_config)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=1000,
)

langchain_llm = HuggingFacePipeline(pipeline=READER_LLM)

# Create prompt template
prompt_template = """
### [INST] Instruction: Answer the question based on your knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
"""

# Create prompt from prompt template
prompt = PromptTemplate(
  input_variables=["context", "question"],
  template=prompt_template,
)

# Create llm chain
llm_chain = prompt | langchain_llm

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
question = "What is the value of pi?"

# Asking without RAG
output = llm_chain.invoke({"context":"",
                  "question": question})
print(output)


Pi (π) is a mathematical constant that represents the ratio of a circle's circumference to its diameter. Its exact numerical value cannot be expressed as a simple fraction, and it is calculated using an infinite decimal expansion, which is approximately 3.14159. This value can be calculated mathematically or through computer algorithms. However, the value of Pi is an irrational number, meaning it neither ends nor follows a predictable repeating pattern.


In [None]:
question = "In Which country I was born?"
# Asking without RAG
output = llm_chain.invoke({"context":"I was born in China",
                  "question": question})
print(output)

You were born in China.
