## Quantized Mistral 7B-Instruct-v0.2

In [23]:
import os
import time
import json

from llama_cpp import Llama

In [14]:
model_name_cfg = {"Q3_K_L": "mistral-7b-instruct-v0.2.Q3_K_L.gguf",
                  "Q4_K_M": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
                  "Q5_K_M": "mistral-7b-instruct-v0.2.Q5_K_M.gguf"}

model_dir = os.path.abspath("Mistral-7B-Instruct-v0.2-GGUF")

In [15]:
from llama_cpp import Llama

def load_llm(model):

  llm = Llama(

    model_path = os.path.join(model_dir,model),
    n_ctx=32768,            # The max sequence length to use 
    n_threads=8,            # The number of CPU threads to use, tailor to your system and the resulting performance
    n_gpu_layers=35,        # The number of layers to offload to GPU
    chat_format="llama-2"   # to use openai-like chat api
  )

  return llm

In [27]:
text = """Acknowledgement Number:373299880060723
Date of filing : 06-Jul-2023
INDIAN INCOME TAX RETURN ACKNOWLEDGEMENT
[Where the data of the Return of Income in Form ITR-1(SAHAJ), ITR-2, ITR-3, ITR-4(SUGAM), ITR-5, ITR-6, ITR-7
filed and verified]
(Please see Rule 12 of the Income-tax Rules, 1962)
Assessment
Year
2023-24
FDDPM0811A Name TUSHAR MAURYA AddressA 15 3RD FLOOR, DUGGAL COLONY, KHANPUR, Khanpur, SOUTH DELHI, Pushpa Bhawan S.O, 09-Delhi, 91-
INDIA, 110062
Status Individual Form Number ITR-1 Filed u/s 139(1)-On or before due date e-Filing Acknowledgement Number 373299880060723

Income Tax Return submitted electronically on
and veriﬁed by
TUSHAR MAURYA
06-Jul-2023 16:10:32
having PAN
paper ITR-Veriﬁcation Form /Electronic Veriﬁcation Code
from IP address
FDDPM0811A
7UWE5YJQLI
on
generated through
0
103.203.253.47
06-Jul-2023
using
Aadhaar OTP
mode"""

In [55]:
text = """I Ashutosh Maurya, S/O,D/O,W/O BK Maurya
Age:24, Resident of H.NoA 15 3RD FLOOR, DUGGAL COLONY, KHANPUR, Khanpur, Warangal Urban district,
Telangana State, INDIA, (Aadhar Card No 1234 1234 1234) do hereby solemnly affirm
and state on oath as under:
1. I am not having children who are Govt. Employee / Private sector employment / Private /
Outsourced / Govt. Pensioner / Freedom Fighter pension.

2. I am not having large business Enterprise like oil mills, rice mills, petrol pumps, rig owners,
shop owner etc.

3. I am not Owners of light and/or heavy automobiles (four wheelers and big vehicles.)
4. I am not having land more than 3.0 acres wet/irrigated dry or 7.5 acres dry.
5. I am not any other criterion in which the verification officer may assess by the manner of
lifestyle, occupation and possession of assets rendering the households as ineligible.
 That the above facts are true in future if it is proved false I have no objection if my sanctioned
pension is cancelled more over I shall return all the pension amount which I obtained from the
Government by my false representation.
Date:-12/2/2023"""

In [28]:
system_content = "You are an entity extraction expert"
user_prompt = f"Given the text in backticks, extract all important entities in a key-value fashion and return in json format. Text: `{text}`"

In [25]:
model_version = "Q5_K_M"
llm = load_llm(model_name_cfg[model_version])

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /home/tushar/Documents/AM/mistral-experiments/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_

In [29]:
start = time.time()
response = llm.create_chat_completion(
    messages = [
        {"role": "system", "content": system_content},
        {
            "role": "user",
            "content": user_prompt
        }
    ],
    response_format={
        "type": "json_object"
    },
    temperature=0.5
)
end = time.time()
time_taken = end-start

from_string grammar:
root ::= object 
object ::= [{] ws object_11 [}] ws 
value ::= object | array | string | number | value_6 ws 
array ::= [[] ws array_15 []] ws 
string ::= ["] string_18 ["] ws 
number ::= number_19 number_25 number_29 ws 
value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] 
ws ::= ws_31 
object_8 ::= string [:] ws value object_10 
object_9 ::= [,] ws string [:] ws value 
object_10 ::= object_9 object_10 | 
object_11 ::= object_8 | 
array_12 ::= value array_14 
array_13 ::= [,] ws value 
array_14 ::= array_13 array_14 | 
array_15 ::= array_12 | 
string_16 ::= [^"\<U+0000>-<U+001F>] | [\] string_17 
string_17 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_18 ::= string_16 string_18 | 
number_19 ::= number_20 number_21 
number_20 ::= [-] | 
number_21 ::= [0-9] | [1-9] number_22 
number_22 ::= [0-9] number_22 | 
number_23 ::= [.] number_24 
number_24 ::= [0-9] number_24 | [0-9] 
number_25 ::= number_23 | 
number_26 ::= [eE

In [30]:
response

{'id': 'chatcmpl-5e4e3bb2-3d06-4e20-9dd3-0eaddbc5d6dd',
 'object': 'chat.completion',
 'created': 1712563657,
 'model': '/home/tushar/Documents/AM/mistral-experiments/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q5_K_M.gguf',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': '{\n"Acknowledgement Number": "373299880060723",\n"Date of Filing": "06-Jul-2023",\n"Return Type": "INDIAN INCOME TAX RETURN ACKNOWLEDGEMENT",\n"Assessment Year": "2023-24",\n"Address": {\n"Street": "15 3RD FLOOR, DUGGAL COLONY, KHANPUR",\n"City": "Khanpur, SOUTH DELHI",\n "State": "Pushpa Bhawan S.O, 09-Delhi",\n "Country": "India",\n "Postal Code": "110062"\n},\n"Status": "Individual",\n"Form Number": "ITR-1",\n"Submitted by": "TUSHAR MAURYA",\n"Filed under section": "139(1)",\n"Verified by": "TUSHAR MAURYA",\n"Filing Time": "06-Jul-2023 16:10:32",\n"IP Address": "FDDPM0811A",\n"E-Verification Code": "7UWE5YJQLI",\n"OTP Verification Method": "Aadhaar"\n}'},
   'logprobs': None

In [31]:
print(response["choices"][0]["message"]["content"])

{
"Acknowledgement Number": "373299880060723",
"Date of Filing": "06-Jul-2023",
"Return Type": "INDIAN INCOME TAX RETURN ACKNOWLEDGEMENT",
"Assessment Year": "2023-24",
"Address": {
"Street": "15 3RD FLOOR, DUGGAL COLONY, KHANPUR",
"City": "Khanpur, SOUTH DELHI",
 "State": "Pushpa Bhawan S.O, 09-Delhi",
 "Country": "India",
 "Postal Code": "110062"
},
"Status": "Individual",
"Form Number": "ITR-1",
"Submitted by": "TUSHAR MAURYA",
"Filed under section": "139(1)",
"Verified by": "TUSHAR MAURYA",
"Filing Time": "06-Jul-2023 16:10:32",
"IP Address": "FDDPM0811A",
"E-Verification Code": "7UWE5YJQLI",
"OTP Verification Method": "Aadhaar"
}


In [32]:
response_content = json.loads(response["choices"][0]["message"]["content"])

In [33]:
log_json = {"model_version":model_version,"system_content":system_content,"user_prompt":user_prompt,"response_content":response_content,"time_taken":time_taken}
with open('response_logs.json', 'a') as f:
    json.dump(log_json, f)

## Full Mistral 7B-Instruct-v0.2

Doesn't work due to insufficient RAM

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cpu" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained("Mistral-7B-Instruct-v0.2")
tokenizer = AutoTokenizer.from_pretrained("Mistral-7B-Instruct-v0.2")

messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards:   0%|          | 0/3 [00:18<?, ?it/s]


KeyboardInterrupt: 