In [None]:
import os
from pathlib import Path
import torch
from transformers import AutoModelForCausalLM

## Download or load model

In [None]:
# Define a central location for storing models
CENTRAL_MODEL_DIR = os.path.expanduser('~/huggingface_models')

# model_name = 'microsoft/phi-2'
# model_name = 'microsoft/phi-1_5'
# model_name = 'microsoft/Phi-3.5-mini-instruct'
# model_name = 'google/gemma-2-9b'
# model_name = 'meta-llama/Meta-Llama-3.1-8B'
# model_name = 'meta-llama/Meta-Llama-3.1-8B-Instruct'
model_name = 'google/gemma-2-2b-it'
# model_name = 'google/gemma-2-9b-it'

# Create the central directory if it doesn't exist
os.makedirs(CENTRAL_MODEL_DIR, exist_ok=True)

# Define the path where the model will be saved locally
local_model_path = os.path.join(CENTRAL_MODEL_DIR, model_name.replace('/', '-'))

In [None]:
# Automatically detect and use GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set up the device map
if torch.cuda.is_available():
    device_map = "auto"  # This will automatically distribute the model across available GPUs
else:
    device_map = {"": device}  # Use the detected device (CPU in this case)

In [None]:
# Check if the model exists locally
if os.path.exists(local_model_path):
    print(f"Loading model from local path: {local_model_path}")
    original_model = AutoModelForCausalLM.from_pretrained(
        local_model_path,
        device_map=device_map,
        # quantization_config=bnb_config,
        trust_remote_code=True
    )
else:
    print(f"Downloading model from {model_name}")
    original_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map=device_map,
        # quantization_config=bnb_config,
        trust_remote_code=True
    )
    # Save the model locally
    original_model.save_pretrained(local_model_path)
    print(f"Model saved to {local_model_path}")