In [None]:
%pylab inline

In [None]:
from transformers import BloomModel, BloomConfig

# Initializing a Bloom configuration

configuration = BloomConfig()

# Initializing a model from the configuration

model = BloomModel(configuration)

# Accessing the model configuration

configuration = model.config

In [None]:
from transformers import BloomTokenizerFast, BloomForTokenClassification

import torch

tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")

model = BloomForTokenClassification.from_pretrained("bigscience/bloom-560m")

inputs = tokenizer(

    "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"

)

with torch.no_grad():

    logits = model(**inputs).logits

predicted_token_class_ids = logits.argmax(-1)

# Note that tokens are classified rather then input words which means that

# there might be more predicted token classes than words.

# Multiple token classes might account for the same word

predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]

predicted_tokens_classes


In [None]:
import torch

from transformers import BloomTokenizerFast, BloomForCausalLM

tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")

model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m")


In [None]:

inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")

outputs = model(**inputs, labels=inputs["input_ids"])

loss = outputs.loss

logits = outputs.logits

In [None]:

logits.shape

In [None]:
logits[0].amin(-1)

In [None]:
data = [logits[0][i].detach().numpy() for i in range(len(logits[0]))]
boxplot(data);

In [None]:
from transformers import pipeline
generator = pipeline('text-generation', model = 'bloom')
generator("Hello, I'm a language model", max_length = 30, num_return_sequences=3)

In [2]:
mname = "bigscience/bloom-560m"
# mname = "bigscience/bloom-1.3b"
# mname = "bigscience/bloom"
from transformers import BloomTokenizerFast, BloomForTokenClassification, BloomForCausalLM
from transformers import pipeline, TextGenerationPipeline
class bloom_model:
    def __init__(self):
        self.tokenizer = BloomTokenizerFast.from_pretrained(mname)
        self.model = BloomForCausalLM.from_pretrained(mname)
        
        self.generator = TextGenerationPipeline(
            task = 'text-generation',
            model = self.model,
            tokenizer = self.tokenizer
        )

    def predict(self, content: str) -> str:
            answer = self.generator(content, max_new_tokens=100)
            return answer

model = bloom_model()

In [3]:
model.predict("Examples of Viking gods are")

In [None]:
model.predict("Examples of small, furry rodents living near humans are")