# MPT-7B

In [1]:
from typing import Any, Dict, Tuple
import warnings

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import (
    StoppingCriteria,
    StoppingCriteriaList,
    TextIteratorStreamer,
)


INSTRUCTION_KEY = "### Instruction:"
RESPONSE_KEY = "### Response:"
END_KEY = "### End"
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
PROMPT_FOR_GENERATION_FORMAT = """{intro}
{instruction_key}
{instruction}
{response_key}
""".format(
    intro=INTRO_BLURB,
    instruction_key=INSTRUCTION_KEY,
    instruction="{instruction}",
    response_key=RESPONSE_KEY,
)


class InstructionTextGenerationPipeline:
    def __init__(
        self,
        model_name,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        use_auth_token=None,
    ) -> None:
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            use_auth_token=use_auth_token,
        )

        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=trust_remote_code,
            use_auth_token=use_auth_token,
        )
        if tokenizer.pad_token_id is None:
            warnings.warn(
                "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
            )
            tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "left"
        self.tokenizer = tokenizer

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.eval()
        self.model.to(device=device, dtype=torch_dtype)

        self.generate_kwargs = {
            "temperature": 0.1,
            "top_p": 0.92,
            "top_k": 0,
            "max_new_tokens": 1024,
            "use_cache": True,
            "do_sample": True,
            "eos_token_id": self.tokenizer.eos_token_id,
            "pad_token_id": self.tokenizer.pad_token_id,
            "repetition_penalty": 1.1,  # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
        }

    def format_instruction(self, instruction):
        return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)

    def __call__(
        self, instruction: str, **generate_kwargs: Dict[str, Any]
    ) -> Tuple[str, str, float]:
        s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
        input_ids = self.tokenizer(s, return_tensors="pt").input_ids
        input_ids = input_ids.to(self.model.device)
        gkw = {**self.generate_kwargs, **generate_kwargs}
        with torch.no_grad():
            output_ids = self.model.generate(input_ids, **gkw)
        # Slice the output_ids tensor to get only new tokens
        new_tokens = output_ids[0, len(input_ids[0]) :]
        output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
        return output_text

In [3]:

# Initialize the model and tokenizer
generate = InstructionTextGenerationPipeline(
    "mosaicml/mpt-7b-instruct",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
stop_token_ids = generate.tokenizer.convert_tokens_to_ids(["<|endoftext|>"])


# Define a custom stopping criteria
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [15]:
import json
import textwrap

def get_prompt(instruction):
    prompt_template = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:"
    return prompt_template

# print(get_prompt('What is the meaning of life?'))

def parse_text(text):
        wrapped_text = textwrap.fill(text, width=100)
        #print(wrapped_text +'\n\n')
        return wrapped_text


In [6]:
import pandas as pd
from gensim.parsing.preprocessing import remove_stopwords
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
tqdm.pandas()

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook as tqdm


In [7]:
df_predictions = pd.read_csv("df_predictions.csv")

In [8]:
df = pd.read_csv("kidsInMindSubtitles2004.csv")
df.shape

(2004, 15)

In [9]:
%%time
prompt = 'What are the differences between alpacas, vicunas and llamas?'
generated_text = generate(prompt)
parse_text(generated_text)


Alpacas have long faces with large eyes; they can be black or brown in coloration but usually white.
They live at high altitudes where it's cold most of the year so their fleece has natural insulation
properties to keep them warm during harsh weather conditions. Alpacas eat grasses like clover,
ryegrass, oats, timothy hay as well as other forage crops such as corn stalks, baleage, etc.. Their
diet should consist mostly of forages (hay) supplemented by grain if necessary depending on your
climate zone. The average weight of adult males ranges from 150-250 pounds while females weigh
around 100 - 200 lbs., though some may reach 300lbs+! Males tend to grow longer than female alpacas
due to their larger size which makes them more suitable for breeding purposes since you need both
male & female animals when breeding these adorable creatures :) Llamas look similar to alpacas
except they're much smaller weighing only 50 – 80 lb range whereas adults will typically stand about
4 feet tall max. F

In [18]:
df_predictions['mpt_7b'] = None

In [19]:

for idx in tqdm(range(len(df_predictions))):
    
    query = "Given the subtitle rating of a movie = "+str(round(df_predictions['Predcited_Score'][idx], 1))+" \
    ; and a list of inappropriate words : "+ df_predictions['inappropriate_words'][idx] + " Generate \
    language summary like kids in mind website? Please consider the below two example language summaries of \
    Please consider the below two example language summaries of \
    movies:  \n 1." + df['Language_Description'][0] + "\n 2."+df['Language_Description'][1]
    
    query = query.replace("[", "")
    query = query.replace("]", "\n\n")
    
    generated_text = generate(prompt)
    parse_text(generated_text)
    
    df_predictions['mpt_7b'][idx] = parse_text(generated_text)
    

  0%|          | 0/201 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_predictions['mpt_7b'][idx] = parse_text(generated_text)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_predictions['mpt_7b'][idx] = parse_text(generated_text)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_predictions['mpt_7b'][idx] = parse_text(generated_text)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

In [21]:
df_predictions.to_csv("df_predictions_mpt_7b.csv", index = False)

In [22]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

In [23]:
scores = scorer.score(df_predictions['language_summary'][0], df_predictions['mpt_7b'][0])

In [24]:
scores

{'rouge1': Score(precision=0.08181818181818182, recall=0.15789473684210525, fmeasure=0.10778443113772454),
 'rouge2': Score(precision=0.009174311926605505, recall=0.017857142857142856, fmeasure=0.012121212121212121),
 'rougeL': Score(precision=0.05454545454545454, recall=0.10526315789473684, fmeasure=0.0718562874251497)}

In [26]:

rouge1_scores = []
rouge2_scores = []
rougeL_scores = []

for idx in range(len(df_predictions)):
    
    scores = scorer.score(df_predictions['language_summary'][idx], df_predictions['mpt_7b'][idx])
    rouge1_scores.append(scores['rouge1'].fmeasure)
    rouge2_scores.append(scores['rouge2'].fmeasure)
    rougeL_scores.append(scores['rougeL'].fmeasure)


In [27]:
import numpy as np

In [28]:
result_df = pd.DataFrame({"rougeL":[np.mean(rougeL_scores)]})

In [29]:
result_df

Unnamed: 0,rougeL
0,0.020923
