In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Model download from Hugging Face

In [1]:
# @title Install Vertex AI SDK and other required packages
%pip install --upgrade --user --quiet google-cloud-aiplatform \
                                      huggingface_hub[hf_transfer] \
                                      transformers

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[0m

In [2]:
# @title Define constants
PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
BUCKET_URI = "gs://sllm_0107"  # @param {type:"string"}
ARTIFACT_NAME = "llama3.1_3b_inst"  # @param {type:"string"}

In [3]:
# @title GCP Authentication

# Use OAuth to access the GCP environment.
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)


In [4]:
# @title Authenticate your Hugging Face account
from huggingface_hub import interpreter_login

interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Enter your token (input will not be visible): ··········
Add token as git credential? (Y/n) y


## Download a model using command

In [5]:
# @title Create local dir.
LOCAL_DIR="model/llama3.1-3b-it"
! mkdir -p {LOCAL_DIR}

In [6]:
# @title Create a bucket.
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://sllm_0107/...
ServiceException: 409 A Cloud Storage bucket named 'sllm_0107' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


In [7]:
# @title Download model from Hugging face

#MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"

! huggingface-cli download {MODEL_NAME} --exclude "*.bin" "*.pth" "*.gguf" ".gitattributes" --local-dir {LOCAL_DIR}

Fetching 14 files:   0% 0/14 [00:00<?, ?it/s]Downloading 'USE_POLICY.md' to 'model/llama3.1-3b-it/.cache/huggingface/download/USE_POLICY.md.ac3c5f21b9779e3da0677d6d3c587778fe3a331e.incomplete'
Downloading 'config.json' to 'model/llama3.1-3b-it/.cache/huggingface/download/config.json.a5a40fa6da567ab026a5a2bf37125a90182be07d.incomplete'
Downloading 'model.safetensors.index.json' to 'model/llama3.1-3b-it/.cache/huggingface/download/model.safetensors.index.json.d3a1f0f5f401eeadca0c7a6786bd9e877fd42e58.incomplete'
Downloading 'model-00001-of-00002.safetensors' to 'model/llama3.1-3b-it/.cache/huggingface/download/model-00001-of-00002.safetensors.13cbd6d16e927a0c5bad54102514e6e18b4a47b3a6eb911e39d678d328d19f55.incomplete'
Downloading 'README.md' to 'model/llama3.1-3b-it/.cache/huggingface/download/README.md.55ce1d9728044d12f0856a0fcd715ee2ec2e449b.incomplete'
Downloading 'model-00002-of-00002.safetensors' to 'model/llama3.1-3b-it/.cache/huggingface/download/model-00002-of-00002.safetensors.7

In [10]:
# @title Within the same session
messages = [
    {"role": "user", "content": "What's Deep Learning?"},
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)
# <bos><start_of_turn>user\nWhat's Deep Learning?<end_of_turn>\n<start_of_turn>model\n

print(inputs)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 08 Jan 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

What's Deep Learning?<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [None]:
# @title Upload model files into GCS
! gsutil -o GSUtil:parallel_composite_upload_threshold=150M -m cp -e -r {LOCAL_DIR}/* {BUCKET_URI}/{ARTIFACT_NAME}

Copying file://model/llama3.1-3b-it/generation_config.json [Content-Type=application/json]...
Copying file://model/llama3.1-3b-it/config.json [Content-Type=application/json]...
/ [0/14 files][    0.0 B/  6.0 GiB]   0% Done                                   / [0/14 files][    0.0 B/  6.0 GiB]   0% Done                                   Copying file://model/llama3.1-3b-it/LICENSE.txt [Content-Type=text/plain]...
/ [0/14 files][    0.0 B/  6.0 GiB]   0% Done                                   Copying file://model/llama3.1-3b-it/model.safetensors.index.json [Content-Type=application/json]...
Copying file://model/llama3.1-3b-it/original/params.json [Content-Type=application/json]...
/ [0/14 files][    0.0 B/  6.0 GiB]   0% Done                                   / [0/14 files][    0.0 B/  6.0 GiB]   0% Done                                   Copying file://model/llama3.1-3b-it/model-00001-of-00002.safetensors [Content-Type=application/octet-stream]...
/ [0/14 files][    0.0 B/  6.0 GiB]  

## Download model using python code

In [None]:
# @title Helper function to download a model and store it into GCS
from transformers import AutoModel, AutoTokenizer
from google.cloud import storage

def model_download(model_name:str,
                   save_path:str,
                   bucket_name:str
                   ):

    # Download model and tokenizer.
    model = AutoModel.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model.save_pretrained(save_path)
    tokenizer.save_pretrained(save_path)

    # Save the model into Cloud storage.
    client = storage.Client()
    bucket = client.bucket(bucket_name)

    for file_name in os.listdir(save_path):
      blob = bucket.blob(f"{save_path}/{file_name}")
      with open(os.path.join(save_path, file_name), "rb") as f:
        blob.upload_from_file(f)

    return model, tokenizer

In [None]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"
save_path = "model"
bucket_name = "sllm_0106"

model, tokenizer = model_download(model_name,save_path,bucket_name)
