# Download the Module from Hugging Face

The easiest way to download the model is by using Hugging Face. To configure the module, uncomment the line corresponding to the desired module. Ensure both `HF_USER` and `HF_TOKEN` are set as environment variables in the workbench.

If your environment cannot access the internet, manually download the model files. Navigate to the "Files and versions" tab on the Hugging Face model page (e.g., [https://huggingface.co/ibm-granite/granite-7b-instruct/tree/main](https://huggingface.co/ibm-granite/granite-7b-instruct/tree/main)) and upload the files to your S3 bucket.

## Download Model from Hugging Face

In [None]:
import os

# Choose the desired model
# my_repo = "huggingface.co/ibm-granite/granite-7b-instruct"
# my_repo = "huggingface.co/ibm/merlinite-7b"
# my_repo = "huggingface.co/instructlab/merlinite-7b-lab"
# my_repo = "https://huggingface.co/mistralai/Mistral-7B"
# my_repo = "https://huggingface.co/codellama/CodeLlama-7b-hf"
# my_repo = "https://huggingface.co/mosaicml/mpt-7b-chat"
# my_repo = "https://huggingface.co/mosaicml/mpt-7b-instruct"

# Ensure my_repo is set
my_repo = "huggingface.co/ibm-granite/granite-7b-instruct"  # Example default, change as needed
if not my_repo:
    raise ValueError("Please uncomment or set the 'my_repo' variable.")

# Prepare the GitHub repository URL with authentication
git_repo = f"https://{os.getenv('HF_USER')}:{os.getenv('HF_TOKEN')}@{my_repo}"

# Extract model directory name
model_dir = os.path.basename(my_repo)

# Clone or update the repository
if not os.path.exists(model_dir):
    !git clone $git_repo
else:
    %cd $model_dir
    !git fetch --all
    !git reset --hard origin/main
    %cd ..

print(f"Model Directory cloned in: {model_dir}")

## Configure S3 Bucket Environment Variables

In [None]:
import boto3
import botocore

# Fetch required environment variables
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
endpoint_url = os.getenv('AWS_S3_ENDPOINT')
region_name = os.getenv('AWS_DEFAULT_REGION')
bucket_name = os.getenv('AWS_S3_BUCKET')

if not all([aws_access_key_id, aws_secret_access_key, endpoint_url, region_name, bucket_name]):
    raise EnvironmentError("Ensure all AWS-related environment variables are properly set.")

# Create a Boto3 session
session = boto3.session.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)

# Set up S3 resource with session
s3_resource = session.resource(
    's3',
    config=botocore.client.Config(signature_version='s3v4'),
    endpoint_url=endpoint_url,
    region_name=region_name
)

# Connect to the specified bucket
bucket = s3_resource.Bucket(bucket_name)
print(f"S3 connectivity established successfully for the bucket: {bucket_name}")

## Upload Model to S3 Bucket

In [None]:
def upload_directory_to_s3(local_directory, s3_prefix):
    """
    Uploads files from a local directory to an S3 bucket under a specific prefix.

    Parameters:
        local_directory (str): Path to the local directory to upload.
        s3_prefix (str): S3 prefix (folder path) to upload files to.
    """
    for root, dirs, files in os.walk(local_directory):
        for filename in files:
            file_path = os.path.join(root, filename)
            relative_path = os.path.relpath(file_path, local_directory)
            
            # Skip .git files and folders
            if ".git" in relative_path:
                print(f"Skipping {relative_path}")
                continue

            # Ensure proper S3 path formatting
            s3_key = os.path.join(s3_prefix, relative_path).replace("\\", "/")
            print(f"Uploading {file_path} -> {s3_key}")
            bucket.upload_file(file_path, s3_key)


def list_objects(prefix):
    """
    Lists all objects under a given prefix in the S3 bucket.

    Parameters:
        prefix (str): The S3 prefix to list objects under.
    """
    print(f"Objects under prefix '{prefix}':")
    for obj in bucket.objects.filter(Prefix=prefix):
        print(obj.key)


print(f"Starting upload of model directory '{model_dir}' to S3...")

# Upload model to S3
upload_directory_to_s3(model_dir, f"models/{model_dir}")

print(f"Upload complete. Verifying uploaded files in S3 under 'models/{model_dir}'...\n")

# Verify uploaded files
list_objects(f"models/{model_dir}")

print("\nVerification complete. All objects listed above are now available in S3.")

### Next Steps

With the model successfully uploaded to the S3 bucket, you can proceed to deploy it. Configure the **ServingRuntime** and **InferenceService** from the **Workbench Models** tab to enable model serving.