In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import os
import time
import boto3
import botocore
from pathlib import Path

In [2]:
#Setup variables for this script

In [3]:
root_models_path = './models'
model_prefix = "deepseek-ai"
model_name = "DeepSeek-R1-Distill-Llama-8B"
full_model_name = f"{model_prefix}/{model_name}"

In [4]:
#Download model

In [5]:
tokenizer = AutoTokenizer.from_pretrained(full_model_name)
model = AutoModelForCausalLM.from_pretrained(full_model_name, torch_dtype=torch.bfloat16, device_map="auto")
model.generation_config = GenerationConfig.from_pretrained(full_model_name)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
#Perform a quick inference test on the model

In [7]:
messages = [
    {"role": "user", "content": "How old is the universe?"}
]
input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
#outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)
outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
print(result)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Okay, so I'm trying to figure out how old the universe is. I remember hearing something about the Big Bang and the universe expanding, but I'm not exactly sure about the details. Let me think through this step by step.

First, I know that the universe started with a Big Bang, which is when it began to expand from an extremely small and incredibly dense point. But how long ago was that? I think it's called the age of the universe, and scientists have a way to


In [8]:
#Save the model and its tokenizer to a local folder

In [9]:
save_path = Path.home().joinpath(root_models_path, model_name)
!mkdir -p {save_path}
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
!ls {save_path}

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


config.json			  model-00004-of-00004.safetensors
generation_config.json		  model.safetensors.index.json
model-00001-of-00004.safetensors  special_tokens_map.json
model-00002-of-00004.safetensors  tokenizer_config.json
model-00003-of-00004.safetensors  tokenizer.json


In [None]:
#Get the s3 connection parameters (these are entered in the workbench setup screens)

In [10]:
aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
endpoint_url = os.environ.get('AWS_S3_ENDPOINT')
region_name = os.environ.get('AWS_DEFAULT_REGION')
bucket_name = os.environ.get('AWS_S3_BUCKET')

In [11]:
session = boto3.session.Session(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key
          )

In [12]:
s3_resource = session.resource(
               's3',
               config=botocore.client.Config(signature_version='s3v4'),
               endpoint_url=endpoint_url,
               region_name=region_name
              )
bucket = s3_resource.Bucket(bucket_name)
                        
def upload_directory_to_s3(local_directory, s3_prefix):
    for root, dirs, files in os.walk(local_directory):
        for filename in files:
            file_path = os.path.join(root, filename)
            relative_path = os.path.relpath(file_path, local_directory)
            s3_key = os.path.join(s3_prefix, relative_path)
            print(f"{file_path} -> {s3_key}")
            bucket.upload_file(file_path, s3_key)
                                
def list_objects(prefix):
    filter = bucket.objects.filter(Prefix=prefix)
    for obj in filter.all():
        print(obj.key)

In [13]:
#Upload the model to an S3 bucket within the cluster

In [14]:
upload_directory_to_s3(root_models_path, full_model_name)

./models/DeepSeek-R1-Distill-Llama-8B/model.safetensors.index.json -> deepseek-ai/DeepSeek-R1-Distill-Llama-8B/DeepSeek-R1-Distill-Llama-8B/model.safetensors.index.json
./models/DeepSeek-R1-Distill-Llama-8B/tokenizer_config.json -> deepseek-ai/DeepSeek-R1-Distill-Llama-8B/DeepSeek-R1-Distill-Llama-8B/tokenizer_config.json
./models/DeepSeek-R1-Distill-Llama-8B/model-00004-of-00004.safetensors -> deepseek-ai/DeepSeek-R1-Distill-Llama-8B/DeepSeek-R1-Distill-Llama-8B/model-00004-of-00004.safetensors
./models/DeepSeek-R1-Distill-Llama-8B/model-00002-of-00004.safetensors -> deepseek-ai/DeepSeek-R1-Distill-Llama-8B/DeepSeek-R1-Distill-Llama-8B/model-00002-of-00004.safetensors
./models/DeepSeek-R1-Distill-Llama-8B/special_tokens_map.json -> deepseek-ai/DeepSeek-R1-Distill-Llama-8B/DeepSeek-R1-Distill-Llama-8B/special_tokens_map.json
./models/DeepSeek-R1-Distill-Llama-8B/model-00003-of-00004.safetensors -> deepseek-ai/DeepSeek-R1-Distill-Llama-8B/DeepSeek-R1-Distill-Llama-8B/model-00003-of-0000