MPT-7B
----

# Setup

In [None]:
import transformers
import torch
from google.colab import drive
google_drive = '/content/drive'
drive.mount(google_drive)

# Model loading and config

In [None]:
model_dir = google_drive + '/My Drive/coding/mpt7b'
name = 'mosaicml/mpt-7b'
# name = model_dir

config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
config.attn_config['attn_impl'] = 'triton'
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
# config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
dtype = torch.bfloat16, # Load model weights in bfloat16

model = transformers.AutoModelForCausalLM.from_pretrained(
  name,
  config=config,
  torch_dtype=dtype
  trust_remote_code=True
)
tokenizer = transformers.AutoTokenizer.from_pretrained('EleutherAI/gpt-neox-20b')

# Text generation

In [None]:
pipe = transformers.pipeline('text-generation', model=model, tokenizer=tokenizer, device='cuda:0')

with torch.autocast('cuda', dtype=torch.bfloat16):
    print(
        pipe('Here is a recipe for vegan banana bread:\n',
            max_new_tokens=100,
            do_sample=True,
            use_cache=True))

# Saving locally

In [None]:
save_path = model_dir
config.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
model.save_pretrained(save_path)