In [None]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip install -q datasets loralib sentencepiece 
!pip -q install bitsandbytes accelerate xformers einops

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m100.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━

In [None]:
!nvidia-smi

Sat May  6 08:25:23 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    44W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Mosaic Instruct MPT 7B

In [None]:
from typing import Any, Dict, Tuple
import warnings

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import (
    StoppingCriteria,
    StoppingCriteriaList,
    TextIteratorStreamer,
)


INSTRUCTION_KEY = "### Instruction:"
RESPONSE_KEY = "### Response:"
END_KEY = "### End"
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
PROMPT_FOR_GENERATION_FORMAT = """{intro}
{instruction_key}
{instruction}
{response_key}
""".format(
    intro=INTRO_BLURB,
    instruction_key=INSTRUCTION_KEY,
    instruction="{instruction}",
    response_key=RESPONSE_KEY,
)


class InstructionTextGenerationPipeline:
    def __init__(
        self,
        model_name,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        use_auth_token=None,
    ) -> None:
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            use_auth_token=use_auth_token,
        )

        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=trust_remote_code,
            use_auth_token=use_auth_token,
        )
        if tokenizer.pad_token_id is None:
            warnings.warn(
                "pad_token_id is not set for the tokenizer. Using eos_token_id as pad_token_id."
            )
            tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "left"
        self.tokenizer = tokenizer

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.eval()
        self.model.to(device=device, dtype=torch_dtype)

        self.generate_kwargs = {
            "temperature": 0.1,
            "top_p": 0.92,
            "top_k": 0,
            "max_new_tokens": 1024,
            "use_cache": True,
            "do_sample": True,
            "eos_token_id": self.tokenizer.eos_token_id,
            "pad_token_id": self.tokenizer.pad_token_id,
            "repetition_penalty": 1.1,  # 1.0 means no penalty, > 1.0 means penalty, 1.2 from CTRL paper
        }

    def format_instruction(self, instruction):
        return PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)

    def __call__(
        self, instruction: str, **generate_kwargs: Dict[str, Any]
    ) -> Tuple[str, str, float]:
        s = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
        input_ids = self.tokenizer(s, return_tensors="pt").input_ids
        input_ids = input_ids.to(self.model.device)
        gkw = {**self.generate_kwargs, **generate_kwargs}
        with torch.no_grad():
            output_ids = self.model.generate(input_ids, **gkw)
        # Slice the output_ids tensor to get only new tokens
        new_tokens = output_ids[0, len(input_ids[0]) :]
        output_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
        return output_text

In [None]:

# Initialize the model and tokenizer
generate = InstructionTextGenerationPipeline(
    "mosaicml/mpt-7b-instruct",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)
stop_token_ids = generate.tokenizer.convert_tokens_to_ids(["<|endoftext|>"])


# Define a custom stopping criteria
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



### The prompt & response

In [None]:
import json
import textwrap

def get_prompt(instruction):
    prompt_template = f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:"
    return prompt_template

# print(get_prompt('What is the meaning of life?'))

def parse_text(text):
        wrapped_text = textwrap.fill(text, width=100)
        print(wrapped_text +'\n\n')
        # return assistant_text


In [None]:
%%time
prompt = 'What are the differences between alpacas, vicunas and llamas?'
generated_text = generate(prompt)
parse_text(generated_text)


Alpacas have long faces with large eyes; they can be black or brown in coloration but usually white.
They weigh up to 100 pounds (45 kg) and stand about 3 feet tall at the shoulder. Their fleece has
soft fibers which make it very warm for clothing use as well as other products such as pillows and
blankets. Alpacas live on farms across South America where their fiber is harvested by shearing them
once per year during spring time when new growth begins forming again after shedding its winter
coat. Vicunas also come from South American countries like Peru and Chile although there are some
populations found in North Africa too! These animals look similar to camels because of how big their
ears are compared to body size - weighing around 150-200 lbs (70kg). The fur of these creatures
comes in many different colors including greyish browns, blacks & whites though most commonly seen
in shades of tan/beige due to living conditions near deserts throughout much of this region. Llamas
share simil

In [None]:
%%time 
prompt = 'What is the capital of England?'
generated_text = generate(prompt)
parse_text(generated_text)

London


CPU times: user 63.5 ms, sys: 31 µs, total: 63.5 ms
Wall time: 62.8 ms


In [None]:
%%time 
prompt = 'Write an email to Sam Altman giving reasons to open source GPT-4'
generated_text = generate(prompt)
parse_text(generated_text)

Dear Mr.Altman,  I am writing this mail with regards to your recent announcement of Open sourcing
GPT 4 model and its training code base on GitHub under Apache 2 license. I would like to
congratulate you for taking such bold step towards democratizing AI research by making it more
accessible to wider community, which will help in accelerating innovation across industries as well
as academia at large.   As someone who has been following developments around NLP space closely over
past few years i have seen how hard it was for researchers/engineers working on similar problems to
collaborate or build upon each other's work due to lack of access to underlying models & their
training codes etc., so any move towards opening up these technologies can only be beneficial from
both technical excellence perspective but also business development point of view since it helps
foster collaboration between teams within companies  as well as outside organizations leading to
creation of new products / se

In [None]:
%%time 
prompt = 'As an AI do you like the Simpsons? What do you know about Homer?'
generated_text = generate(prompt)
parse_text(generated_text)

I don't have any feelings, but I can tell you what's on Wikipedia and other sources so far as it
relates to The Simpsons.  Homer Simpson was created by Matt Groening in 1987 for his cartoon series
called "The Simpsons". He has been voiced by Dan Castellaneta since 1989.


CPU times: user 1.95 s, sys: 5.77 ms, total: 1.95 s
Wall time: 1.95 s


In [None]:
%%time 
prompt = 'Tell me about Homer on the TV show the simpsons'
generated_text = generate(prompt)
parse_text(generated_text)

Homer Simpson was created by Matt Groening and first appeared in The Simpsons, which debuted
December 17th 1989


CPU times: user 732 ms, sys: 122 µs, total: 732 ms
Wall time: 729 ms


In [None]:
%%time 
prompt = 'Tell me about Homer on the TV show the simpsons in depth'
generated_text = generate(prompt)
parse_text(generated_text)

Homer Simpson was created by Matt Groening and first appeared as one of many characters on The
Tracey Ullman Show, which ran from 1987 to 1989 before being spun off into its own series called The
Simpsons. He has been voiced since then by Dan Castellaneta who also voices other famous cartoon
character such as Mr. Burns


CPU times: user 2.1 s, sys: 4.83 ms, total: 2.1 s
Wall time: 2.1 s


In [None]:

%%time 
prompt = 'Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?'
generated_text = generate(prompt)
parse_text(generated_text)

The original number of apples was 23. They ate 20 from it to make lunches with them. So now there
are only 3 left in their stockpile. To replenish what they lost, they purchased 6 new ones bringing
their total back up to 21


CPU times: user 1.51 s, sys: 1.82 ms, total: 1.51 s
Wall time: 1.51 s


In [None]:
%%time 
prompt = 'Answer the following yes\/no question by reasoning step-by-step. \n Can you write a whole Haiku in a single tweet?'
generated_text = generate(prompt)
parse_text(generated_text)

Yes, it's possible to compose and post a haiku on Twitter using only 140 characters (including
spaces).  The most common form of this type of poem has 5-7-5 syllable counts for each line.


CPU times: user 1.32 s, sys: 3.82 ms, total: 1.32 s
Wall time: 1.32 s


In [None]:
%%time 
prompt = 'Tell me about Harry Potter and studying at Hogwarts?'
generated_text = generate(prompt)
parse_text(generated_text)

Harry Potter was born to parents who were wizards, so he too has magical powers such as invisibility
cloaking himself in mist or using his wand which can cast spells like lightning bolt, fire ball
etc.. He lives with his aunt Petunia Dursley (his mum's sister) because of the fear by his relatives
for him being a wizard due to the fact they are all muggle-born(non magicians). At 11 years old,
Hagrid comes over from Hogwarts School of Witchcraft & Wizardry to take harry on a tour of the
school but also inform petunia that she must send her son back to hogwarts immediately after their
summer holidays end since there will be no other chance until next year! The rest of this book
follows you through adventures when going away into your first term at Hogwarts where new friends
are made along side learning many things throughout the 7 books


CPU times: user 5.48 s, sys: 8.31 ms, total: 5.49 s
Wall time: 5.47 s


In [None]:
%%time 
prompt = """Convert the following to JSON

name: John
age: 30
address:
street: 123 Main Street
city: San Fransisco
state: CA
zip: 94101
"""
generated_text = generate(prompt)
parse_text(generated_text)

{"name": "John", "age":30,"address":{"street":"123 Main street","city":"San
Francisco","state":"CA","zip":94101}}


CPU times: user 1.04 s, sys: 1.94 ms, total: 1.04 s
Wall time: 1.04 s


In [None]:
%%time 
prompt = """How are you today?"""
generated_text = generate(prompt)
parse_text(generated_text)

I am doing well, thank you for asking!


CPU times: user 338 ms, sys: 918 µs, total: 339 ms
Wall time: 336 ms


In [None]:
%%time 
prompt = """Write me a short plan for a 3 day trip to London"""
generated_text = generate(prompt)
parse_text(generated_text)

Day 1 - Arrive in london and check into hotel near Victoria station, have dinner at one of many
restaurants nearby then head out to see some sites such as Big Ben or Buckingham Palace  Day 2- Head
over to Westminster Abbey where you can tour inside and learn about British history before heading
back towards Trafalgar Square to visit National Gallery which has free entry on Sundays! After this
walk down Whitehall street until you reach Downing Street (the residence of the Prime Minister)
finish off your afternoon by visiting St Paul's Cathedral with its beautiful dome overlooking the
city from across the river Thames! Day3- Start early today so you don't miss any attractions like
Tower Bridge, The Shard & Borough Market! Finish up your last few hours exploring Covent Garden
market area


CPU times: user 4.64 s, sys: 8.55 ms, total: 4.65 s
Wall time: 4.63 s


In [None]:
article = """
Content moderators under Sama, Meta’s content review sub-contractor in Africa, earlier today picketed at the company’s headquarters in Kenya demanding April salary, while urging it to observe the court orders that barred it from conducting mass layoffs.

The demonstrations came after Sama, in an email, instructed moderators to clear with the company by May 11, a move the employees say is against the existing court orders.

The 184 moderators sued Sama for allegedly laying them off unlawfully, after it wound down its content review arm in March, and Majorel, the social media giant’s new partner in Africa, for blacklisting on instruction by Meta.


The court issued a temporary injunction on March 21 barring Sama from effecting any form of redundancy, and Meta from engaging Majorel, which was also instructed to refrain from blacklisting the moderators. Sama was directed to continue reviewing content on Meta’s platforms, and to be its sole provider in Africa pending determination of the case. However, Sama sent the moderators on compulsory leave in April saying it had no work for them as its contract with Meta had expired.

Sama told TechCrunch that it had sent the notice “to staff whose contract had expired to go through our regular clearance process. This clearance process involves the return of company equipment to make sure that all final dues can be paid without deduction for that equipment, in accordance with Kenyan law.”

It said the moderators’ contracts had ended in March after its deal with Meta expired, saying that it was only processing the moderators final dues.

“We understand our former employees’ frustration because they were led by others to believe that they would all receive salary indefinitely while on leave, but that is not what the court dictated,” said Sama.

"""

In [None]:
%%time 
prompt = "Please summarize this article:\n" + article
generated_text = generate(prompt)
parse_text(generated_text)

Here’s a summary:  In early April, some content moderation workers who previously worked for Sama, a
subcontractor of Meta (the owner of Facebook) in Africa, went on strike demanding their April
salaries and protesting against being laid off despite a court order prohibiting such actions. The
workers are upset that Sama has refused to follow the court order and continues to refuse to pay
them while they remain on unpaid leave.


CPU times: user 2.56 s, sys: 1.9 ms, total: 2.56 s
Wall time: 2.55 s


In [None]:
%%time 
prompt = "Please extract the key info as bullet points for this article:\n" + article
generated_text = generate(prompt)
print(generated_text)

Here are some highlights from the article:  

1. Content moderation workers who were previously employed by Sama, a subcontractor of Meta (Facebook), protested outside of Sama's offices in Nairobi, Kenya today, demanding their April salaries and requesting that Sama comply with court rulings prohibiting them from terminating their employment or blacklisting them. 

2. The protestors demanded that Sama adhere to two court decisions: one forbidding Sama from firing or blacklisting the workers, and another ordering Sama to pay the workers' outstanding wages during their mandatory unpaid leave.

3. After ending its agreement with Facebook in early 2023, Sama informed the workers that their jobs had been terminated due to lack of work, even though both courts have ordered Sama to keep employing these workers until the lawsuit has been resolved.

CPU times: user 5.07 s, sys: 6.27 ms, total: 5.08 s
Wall time: 5.07 s
