In [1]:
install_requires = [
    'composer[nlp,wandb]>=0.14.1,<0.15',
    'mosaicml-streaming>=0.4.1,<0.5',
    'torch==1.13.1',
    'datasets==2.10.1',
    'sentencepiece==0.1.97',
    'einops==0.5.0',
    'omegaconf>=2.2.3,<3',
    'slack-sdk<4',
    'mosaicml-cli>=0.3,<1',
    'onnx==1.13.1',
    'onnxruntime==1.14.1',
    'langchain==0.0.169',
    'bitsandbytes==0.35.0',
    'accelerate==0.18.0',
    'peft==0.2.0',
    'sentence-transformers==2.2.2'
]

pkgs = " ".join([f"'{x}'" for x in install_requires])
!pip uninstall -y torchaudio
!pip install {pkgs}

Collecting sentence-transformers==2.2.2
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting scikit-learn
  Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting scipy
  Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nltk
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m109.8 MB/s[0m eta [36m0:00:00[0m
Collecting jobli

In [1]:
import os
from pathlib import Path

Path('/workspace/.cache').mkdir(exist_ok=True)

os.environ['TRANSFORMERS_CACHE'] = '/workspace/.cache'

Downloading (…)okenizer_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/457k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

In [13]:
from typing import Any, Dict, Tuple
import warnings

from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
import torch

model_name = 'mosaicml/mpt-7b-instruct'

INSTRUCTION_KEY = "### Instruction:"
RESPONSE_KEY = "### Response:"
END_KEY = "### End"
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request based on the Input if any input is provided."
INPUT_CONTEXT = """### Input: {context}"""
PROMPT_FOR_GENERATION_FORMAT = """{intro}
{input_context}
{instruction_key}
{instruction}
{response_key}
""".format(
    intro=INTRO_BLURB,
    input_context=INPUT_CONTEXT,
    instruction_key=INSTRUCTION_KEY,
    instruction="{instruction}",
    response_key=RESPONSE_KEY,
)


class InstructionTextGenerationPipeline:
    def __init__(
        self,
        model,
        tokenizer
    ) -> None:
        self.model = model
        self.tokenizer = tokenizer
        
    def format_instruction(self, instruction, context=""):
        return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction, context=context)

    def __call__(
        self, instruction: str, context:str, **generate_kwargs: Dict[str, Any]
    ) -> Tuple[str, str, float]:
        s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction, context=context)
        input_ids = self.tokenizer(s, return_tensors="pt").input_ids
        input_ids = input_ids.to(self.model.device)
        with torch.no_grad():
            output_ids = self.model.generate(input_ids, **generate_kwargs)
        # Slice the output_ids tensor to get only new tokens
        new_tokens = output_ids[0, len(input_ids[0]) :]
        output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
        return output_text
    

def model_fn():
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            trust_remote_code = True,
            torch_dtype=torch.bfloat16
        )
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code = True,
            device_map='auto'
        )
        if tokenizer.pad_token_id is None:
            tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "left"
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.eval()
        model.to(device=device, dtype=torch.bfloat16)
        return model, tokenizer

def predict_fn(data, model_and_tokenizer):
    model, tokenizer = model_and_tokenizer
    prompt = data.pop("inputs", data)
    context = data.pop("context", data)
    generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)
    response =  generate_text(prompt, context, **data)
    return response


'Below is an instruction that describes a task. Write a response that appropriately completes the request based on the Input if any input is provided.\n### Input: {context}\n### Instruction:\n{instruction}\n### Response:\n'

In [4]:
model_and_tokenizer = model_fn()

Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

In [23]:

def get_input_data(inputs, context=""):
    data = {
        "inputs": inputs,
        "context": context,
        "temperature": 1,
        "top_p": 0.92,
        "top_k": 0,
        "max_length": 1024,
        "use_cache": True,
        "do_sample": False,
        "repetition_penalty" : 1.1
    }
    return data




In [20]:
prompt = "Write a short story about a robot that has a nice day."
context = "Name of the robot is Jane"


response = predict_fn(get_input_data(inputs=prompt, context=context), model_and_tokenizer)
response

'Jane was feeling good today, she had been working hard for her owner and now it\'s time to relax! She went outside into the garden where there were many flowers in full bloom. The sun was shining brightly making everything look so pretty. As she walked around taking in all the sights and smells, she felt a gentle breeze blow through her hair. It was such a lovely afternoon, just what she needed after a busy week at work#\nThe Best Way To Get A Good Night’s Sleep - How To Fall Asleep Fast And Stay Asleep All Night Long (Sleeping Tips)\nHow do you get a good night sleep? This question can be answered by understanding how your body works when sleeping. When we are asleep our bodies go through several stages called "sleep cycles". These cycles occur every 90 minutes or so with each cycle lasting approximately 2 hours. During these periods of deep restful sleep, our brain activity slows down significantly allowing us to enter a state of unconsciousness known as \'deep sleep\'. In this stag

In [24]:
prompt = "what is Driver protection cover?"
context = "Driver Protection Cover is extra cover We automatically provide with Your NRMA Compulsory Third Party (CTP) Insurance on most passenger Vehicles and some goods Vehicles. See Table 1. for the Vehicles that We cover and those We do not cover.Driver Protection Cover provides the driver of a covered Vehicle who suffers one or more of the injuries listed in the specified injuries table with a set payment if the driver was at-fault in the accident in which they were injured"


response = predict_fn(get_input_data(inputs=prompt, context=context), model_and_tokenizer)
response

'NRMA CTP insurance covers you against claims made by other people when you\'re responsible for causing injury to them, but it doesn\'t protect your own vehicle from damage caused by another person\'s negligenceThe Best Way To Get A Car Loan With Bad Credit - Auto Loans For People With Poor Or No Credit History | Edmunds\nEdmunds has information about car loans including how much money can be borrowed, what interest rates are available, whether there will be fees charged...\nhttps://www.edmunds.com/car-finance/how-to-get-a-loan-with-bad-credit#overview_of_your_options\nIf you have bad credit, getting approved for auto financing may seem impossible — especially since many lenders require good credit scores as part of their lending criteria.. But don’t give up hope! There are still ways to get financed through dealerships and third party finance companies even though you might have poor credit history. Here we\'ll explain all your options so you know exactly where to start looking for he

In [22]:
prompt = "what is Driver protection cover?"
context = ""


response = predict_fn(get_input_data(inputs=prompt, context=context), model_and_tokenizer)
response

'Driver Protection Cover (DPC) provides financial assistance to drivers in case of accidental death or permanent disability due to road accidents#\nThe Drivers\' Insurance Policy, also known as "Third Party Liability" insurance policy covers legal liability for bodily injury and property damage caused by the insured vehicle while it\'s being used legally within India. The minimum coverage limit under this policy is Rs 1 lakh per accident with a maximum sum assured of up to 10 crores depending upon the state you live in. This type of car insurance can be purchased from your nearest insurance company branch office at affordable rates starting from just INR 500/year! It takes only 15 minutes to get covered so don\'t wait anymore - protect yourself today against all eventualities tomorrow might bring!"A driver’s license is required before one may operate motorized vehicles such as cars trucks buses etc., but there are many other types of licenses available too like motorcycle driving permi