In [1]:
import transformers
from transformers import AutoTokenizer
import torch
import os
import time
import boto3
import botocore
from pathlib import Path
from huggingface_hub import login
import sentencepiece

In [2]:
login_token = os.environ.get('huggingface_token')
login(token=login_token)

In [3]:
#Setup variables for this script

In [4]:
root_models_path = './models'
model_prefix = "llama"
model_name = "Mistral-7B-Instruct-v0.3"
full_model_name = f"{model_prefix}/{model_name}"

In [5]:
#Download model

In [6]:
tokenizer = AutoTokenizer.from_pretrained(full_model_name)

In [7]:
pipeline = transformers.pipeline(
    "text-generation",
    model=full_model_name,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cpu


In [8]:
#Perform a quick inference test on the model

In [9]:
messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]

outputs = pipeline(
    messages,
    max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'role': 'assistant', 'content': " Arr matey! I be the digital parrot, ye scurvy dog! I'll be spinnin' ye yarns and crackin' ye jokes in me hearty pirate tongue. Now, what be ye seekin', landlubber?\n\nYe can call me Cap'n Parrot, or just Parrot if ye prefer. Now, what's the news from the seven seas? Or perhaps ye be lookin' for a good tale to tell at the tavern? I'll be happy to oblige!"}


In [10]:
#Save the model and its tokenizer to a local folder

In [11]:
save_path = Path.home().joinpath(root_models_path, model_name)
!mkdir -p {save_path}
pipeline.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
!ls {save_path}

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


config.json			  model.safetensors.index.json
generation_config.json		  special_tokens_map.json
model-00001-of-00003.safetensors  tokenizer_config.json
model-00002-of-00003.safetensors  tokenizer.json
model-00003-of-00003.safetensors  tokenizer.model


In [12]:
#Get the s3 connection parameters (these are entered in the workbench setup screens)

In [13]:
aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
endpoint_url = os.environ.get('AWS_S3_ENDPOINT')
region_name = os.environ.get('AWS_DEFAULT_REGION')
bucket_name = os.environ.get('AWS_S3_BUCKET')

In [14]:
session = boto3.session.Session(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key
          )

In [15]:
s3_resource = session.resource(
               's3',
               config=botocore.client.Config(signature_version='s3v4'),
               endpoint_url=endpoint_url,
               region_name=region_name
              )
bucket = s3_resource.Bucket(bucket_name)
                        
def upload_directory_to_s3(local_directory, s3_prefix):
    for root, dirs, files in os.walk(local_directory):
        for filename in files:
            file_path = os.path.join(root, filename)
            relative_path = os.path.relpath(file_path, local_directory)
            s3_key = os.path.join(s3_prefix, relative_path)
            print(f"{file_path} -> {s3_key}")
            bucket.upload_file(file_path, s3_key)
                                
def list_objects(prefix):
    filter = bucket.objects.filter(Prefix=prefix)
    for obj in filter.all():
        print(obj.key)

In [16]:
#Upload the model to an S3 bucket within the cluster

In [17]:
full_model_name

'mistralai/Mistral-7B-Instruct-v0.3'

In [18]:
upload_directory_to_s3(save_path, model_name)

/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/model-00003-of-00003.safetensors -> Mistral-7B-Instruct-v0.3/model-00003-of-00003.safetensors
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/model.safetensors.index.json -> Mistral-7B-Instruct-v0.3/model.safetensors.index.json
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/tokenizer_config.json -> Mistral-7B-Instruct-v0.3/tokenizer_config.json
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/special_tokens_map.json -> Mistral-7B-Instruct-v0.3/special_tokens_map.json
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/tokenizer.model -> Mistral-7B-Instruct-v0.3/tokenizer.model
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/tokenizer.json -> Mistral-7B-Instruct-v0.3/tokenizer.json
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/model-00001-of-00003.safetensors -> Mistral-7B-Instruct-v0.3/model-00001-of-00003.safetensors
/opt/app-root/src/models/Mistral-7B-Instruct-v0.3/generation_config.json -> Mistral-7B-Instruct-v0.3/generatio