In [2]:
# # Add venv to kernel
# ! python3 -m venv venv
# ! source venv/bin/activate
# ! pip install jupyter
# ! ipython kernel install --name "venv" --user


In [1]:
# #Dependencies 
# ! pip install git+https://github.com/huggingface/transformers.git@72958fcd3c98a7afdc61f953aa58c544ebda2f79
# ! pip install optimum
# ! pip install git+https://github.com/huggingface/transformers.git@72958fcd3c98a7afdc61f953aa58c544ebda2f79
# ! pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/  # Use cu117 if on CUDA 11.7
# ! pip install langchain
# ! pip install langchainhub
# ! pip install duckduckgo-search

In [1]:
# Mistral Wrapper
from transformers import AutoModelForCausalLM, AutoTokenizer,GPTQConfig, pipeline,TextStreamer
import torch
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM

class MistralModel:
    def __init__(self):
        # Refresh CUDA Memory
        torch.cuda.empty_cache()
        self.model,self.tokenizer = self.get_model()
        streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
        self.pipe = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.1,
            top_k=40,
            top_p=0.95,
            repetition_penalty=1.15,
            streamer=streamer,
        )
        
        
    def format_prompt(self,prompt):
        return f"""<s>[INST] {prompt} [/INST]"""
    
    def generate_instruction(
        self,
        prompt:str,
        instruction:str = 'Think carefully and answer the given question as truthfully as possible',
        llm_template = None
    ):
        if not llm_template:
            llm_template = self.format_prompt
        instruction_format = f"""### Instruction: {instruction}:

    ### Input:
    Question: {prompt}

    ### Response:
    """
        if llm_template:
            return llm_template(instruction_format)
        else:
            return instruction_format

    
    def get_model(self):
        model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
        # To use a different branch, change revision
        # For example: revision="main"
        quantization_config_loading = GPTQConfig(bits=4, use_exllama = False)
        model = AutoModelForCausalLM.from_pretrained(model_name_or_path,

                                                  quantization_config=quantization_config_loading,
                                                  device_map="cuda",
                                                  trust_remote_code=True,
                                                  revision="gptq-4bit-32g-actorder_True")

        tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
        return model, tokenizer
    
    def _predict(self, prompt):
        return self.pipe(self.format_prompt(prompt))[0]['generated_text']
    
    def predict(self,prompt):
        return self._predict(self.format_prompt(prompt)).split(r'INST]')[-1].strip()
    
    def ask(self,question,instruction = None):
        formatted_prompt = self.generate_instruction(prompt=question,instruction=instruction)
        return self.predict(formatted_prompt)
    
class MistralLLM(LLM):
    mistral_model: MistralModel
    
    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        # if stop is not None:
        #     raise ValueError("stop kwargs are not permitted.")
        return self.mistral_model.ask(prompt)

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"model": self.mistral_model}

In [2]:
# if model:
#     del model
model = MistralModel()

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. disable_exllama, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [19]:
answer = model.predict("What is 2+2?")
print("Answer",answer)

4
Answer 4


In [20]:
print(answer)

4


In [21]:
llm = MistralLLM(mistral_model = model)

In [22]:
llm

MistralLLM(mistral_model=<__main__.MistralModel object at 0x7f7164ea9390>)

In [42]:
# Naive Self Ask Engine

def generate_qa_pairs(
    mistral_model: MistralLLM,
    corpus: str,
    instruction: str = "You are a good generator of question answer pairs. Given the text, generate 5 question and answer pairs."
): 
    raw_qa = mistral_model.ask(corpus,instruction=instruction)
    # return raw_qa
    lines = raw_qa.split('\n')
    questions_list = lines[::2]
    answers_list = lines[1::2]
    qa_list = [ {'question':q[3:],'answer':a.replace('Answer:','').strip()} for q,a in zip(questions_list,answers_list)]
    return qa_list
    
    
    


In [26]:
instruction = "You are a good generator of question answer pairs. Given the text, generate question and answer pairs."
# instruction = None
question = """A pitch inventory is a scalewise list of the tones used in a composition or section thereof. 
For purposes of organization, the pitch inventories in this text always begin with the pitch 
A. Many students will have no need to prepare a pitch inventory, but for those students 
who have yet to develop a “hearing eye” that would allow instantaneous recognition of 
keys and tonal centers, a pitch inventory may be a necessity. A pitch inventory permits 
quick assessment of the selected pitches without prejudice to key or tonality. From there 
you can make a fairly accurate determination of key by observing the location of half and 
whole steps, accidentals such as raised sevenths, etc., and particular notes of the melody 
that are emphasized."""
# model.ask(question,instruction=instruction)

In [43]:
qa_list = generate_qa_pairs(model,question)

1. What is a pitch inventory?
Answer: A pitch inventory is a scale-wise list of the tones used in a composition or section thereof.
2. Who might need to prepare a pitch inventory?
Answer: Students who have not developed a "hearing eye" that allows them to recognize keys and tonal centers may need to prepare a pitch inventory.
3. Why do some students need a pitch inventory?
Answer: A pitch inventory allows quick assessment of the selected pitches without prejudice to key or tonality, which can help students determine key more accurately.
4. How does a pitch inventory help determine key?
Answer: By observing the location of half and whole steps, accidentals such as raised sevenths, and particular notes of the melody that are emphasized, a pitch inventory can provide insight into the key of a piece of music.
5. Is preparing a pitch inventory necessary for all students?
Answer: No, many students will not need to prepare a pitch inventory as they already have a "hearing eye" that allows the

In [None]:
# def process_corpus_for_qa_questions(corpus,model):
#     chunks = 

In [48]:
qa_list

[{'question': 'What is a pitch inventory?',
  'answer': 'A pitch inventory is a scale-wise list of the tones used in a composition or section thereof.'},
 {'question': 'Who might need to prepare a pitch inventory?',
  'answer': 'Students who have not developed a "hearing eye" that allows them to recognize keys and tonal centers may need to prepare a pitch inventory.'},
 {'question': 'Why do some students need a pitch inventory?',
  'answer': 'A pitch inventory allows quick assessment of the selected pitches without prejudice to key or tonality, which can help students determine key more accurately.'},
 {'question': 'How does a pitch inventory help determine key?',
  'answer': 'By observing the location of half and whole steps, accidentals such as raised sevenths, and particular notes of the melody that are emphasized, a pitch inventory can provide insight into the key of a piece of music.'},
 {'question': 'Is preparing a pitch inventory necessary for all students?',
  'answer': 'No, ma

In [49]:
len(question.split(' '))

124



What is a pitch inventory?
A pitch inventory is a scale-wise list of the tones used in a composition or section thereof. It allows quick assessment of the selected pitches without prejudice to key or tonality.

Who might need to prepare a pitch inventory?
Students who have not developed a "hearing eye" that allows them to instantly recognize keys and tonal centers may need to prepare a pitch inventory.

What does a pitch inventory help determine?
A pitch inventory helps determine the key of a composition by observing the location of half and whole steps, accidentals such as raised sevenths, and particular notes of the melody that are emphasized.


''

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer,GPTQConfig, pipeline,TextStreamer


def format_prompt_mistral(prompt):
    return f"""<s>[INST] {prompt} [/INST]"""


def get_model_mistral():
    model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
    # To use a different branch, change revision
    # For example: revision="main"
    quantization_config_loading = GPTQConfig(bits=4, use_exllama = False)
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                              
                                              quantization_config=quantization_config_loading,
                                              device_map="cuda",
                                              trust_remote_code=True,
                                              revision="gptq-4bit-32g-actorder_True")

    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
    
    
    return model, tokenizer

In [3]:
import torch
torch.cuda.is_available()

True

In [5]:
model, tokenizer = get_model_mistral()

Downloading (…)der_True/config.json:   0%|          | 0.00/962 [00:00<?, ?B/s]

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. disable_exllama, use_cuda_fp16, max_input_length) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


Downloading model.safetensors:   0%|          | 0.00/4.57G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [42]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_k=40,
    top_p=0.95,
    repetition_penalty=1.15,
    streamer=streamer,
)

In [43]:
def generate_instruction(
    prompt:str,
    instruction:str = 'Think carefully and answer the given question as truthfully as possible',
    llm_template = None):
    instruction_format = f"""### Instruction: {instruction}:
    
### Input:
Question: {prompt}

### Response:
"""
    if llm_template:
        return llm_template(instruction_format)
    else:
        return instruction_format
    
    
def ask_model(model_pipeline, prompt,instruction=None, llm_template = None):
    formatted_question = generate_instruction(prompt=prompt,instruction=instruction,llm_template=llm_template)
    return model_pipeline(formatted_question)[0]['generated_text'].split('<s>')[0]
    
    
instruction = "Think and Talk like a copywriter and answer the following question"
query = "Combine the spirit of diwali and cardio workouts to create puns"


print(ask_model(pipe,query,instruction = instruction, llm_template = format_prompt_mistral))

Diwali is the festival of lights, so let's combine that with our favorite pastime - cardio workouts! Here are some fun puns for you to try out:

1. "I was feeling down during my workout today, but then I remembered it's Diwali - time to light up your day!"
2. "Want to get in shape for Diwali? Try these cardio moves inspired by the colorful celebrations."
3. "Cardio and Diwali go hand-in-hand. Just remember to keep up the pace or risk burning a wick!"
4. "With Diwali around the corner, it's the perfect time to add a little sparkle to your workout routine."
5. "Don't forget to stretch before your Diwali cardio workout. It'll help prevent any injuries from lighting too many candles."

