<a href="https://colab.research.google.com/github/nanotile/llm_engineering/blob/main/Copy_of_Hugginface_colab_INTERACTIVE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Install libraries
!pip install -q transformers torch

# Cell 2: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Cell 3: Interactive Model Downloader
import os
from transformers import AutoModel, AutoTokenizer
from google.colab import files

print("=" * 60)
print("   HuggingFace Model Downloader to Google Drive")
print("=" * 60)

# Popular models for reference
print("\nüìö Popular models you might want:")
print("  ‚Ä¢ bert-base-uncased")
print("  ‚Ä¢ gpt2")
print("  ‚Ä¢ distilbert-base-uncased")
print("  ‚Ä¢ roberta-base")
print("  ‚Ä¢ facebook/bart-large")
print("  ‚Ä¢ t5-small")
print("  ‚Ä¢ google/flan-t5-base")
print("  ‚Ä¢ microsoft/deberta-v3-base")
print("\n" + "=" * 60)

# Get model name from user
MODEL_NAME = input("\nüîç Enter the HuggingFace model name: ").strip()

if not MODEL_NAME:
    print("‚ùå No model name provided. Please run again.")
else:
    # Configure save path
    BASE_PATH = "/content/drive/MyDrive/HuggingFace_Models"
    model_folder = os.path.join(BASE_PATH, MODEL_NAME.replace('/', '_'))

    # Create directory
    os.makedirs(model_folder, exist_ok=True)

    print(f"\nüì• Downloading: {MODEL_NAME}")
    print(f"üíæ Save location: {model_folder}")
    print("\nThis may take a few minutes depending on model size...\n")

    try:
        # Download and save model
        print("‚è≥ Downloading model...")
        model = AutoModel.from_pretrained(MODEL_NAME)
        model.save_pretrained(model_folder)
        print("‚úì Model saved")

        # Download and save tokenizer
        print("‚è≥ Downloading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        tokenizer.save_pretrained(model_folder)
        print("‚úì Tokenizer saved")

        print(f"\n‚úÖ SUCCESS! Model saved to Google Drive")
        print(f"üìÇ Location: {model_folder}")

        # Ask if user wants to test
        test = input("\nüß™ Test loading the model? (yes/no): ").strip().lower()

        if test in ['yes', 'y']:
            print("\n‚è≥ Loading model from Google Drive...")
            loaded_model = AutoModel.from_pretrained(model_folder)
            loaded_tokenizer = AutoTokenizer.from_pretrained(model_folder)

            test_text = "Hello, this is a test!"
            inputs = loaded_tokenizer(test_text, return_tensors="pt")
            outputs = loaded_model(**inputs)

            print("‚úÖ Model loaded and tested successfully!")
            print(f"Output shape: {outputs.last_hidden_state.shape}")

        # Ask if user wants to download another
        another = input("\nüîÑ Download another model? (yes/no): ").strip().lower()
        if another in ['yes', 'y']:
            print("\nüëâ Run this cell again to download another model!")

    except Exception as e:
        print(f"\n‚ùå ERROR: {str(e)}")
        print("\nTroubleshooting tips:")
        print("  ‚Ä¢ Check if the model name is correct")
        print("  ‚Ä¢ For gated models (like Llama), you need a HuggingFace token")
        print("  ‚Ä¢ Check your internet connection")

# Cell 4: List all downloaded models
def list_models():
    BASE_PATH = "/content/drive/MyDrive/HuggingFace_Models"
    if os.path.exists(BASE_PATH):
        models = [d for d in os.listdir(BASE_PATH)
                 if os.path.isdir(os.path.join(BASE_PATH, d))]
        if models:
            print("\nüì¶ Downloaded models in your Google Drive:")
            print("=" * 60)
            for i, model in enumerate(models, 1):
                model_path = os.path.join(BASE_PATH, model)
                # Get folder size
                size = sum(os.path.getsize(os.path.join(model_path, f))
                          for f in os.listdir(model_path)
                          if os.path.isfile(os.path.join(model_path, f)))
                size_mb = size / (1024 * 1024)
                print(f"{i}. {model} ({size_mb:.1f} MB)")
        else:
            print("No models downloaded yet.")
    else:
        print("No models directory found.")

list_models()

Mounted at /content/drive
   HuggingFace Model Downloader to Google Drive

üìö Popular models you might want:
  ‚Ä¢ bert-base-uncased
  ‚Ä¢ gpt2
  ‚Ä¢ distilbert-base-uncased
  ‚Ä¢ roberta-base
  ‚Ä¢ facebook/bart-large
  ‚Ä¢ t5-small
  ‚Ä¢ google/flan-t5-base
  ‚Ä¢ microsoft/deberta-v3-base


üîç Enter the HuggingFace model name: gpt2

üì• Downloading: gpt2
üíæ Save location: /content/drive/MyDrive/HuggingFace_Models/gpt2

This may take a few minutes depending on model size...

‚è≥ Downloading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

‚úì Model saved
‚è≥ Downloading tokenizer...


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

‚úì Tokenizer saved

‚úÖ SUCCESS! Model saved to Google Drive
üìÇ Location: /content/drive/MyDrive/HuggingFace_Models/gpt2

üß™ Test loading the model? (yes/no): yes

‚è≥ Loading model from Google Drive...
‚úÖ Model loaded and tested successfully!
Output shape: torch.Size([1, 7, 768])

üîÑ Download another model? (yes/no): no

üì¶ Downloaded models in your Google Drive:
1. bert-base-uncased (418.6 MB)
2. gpt2 (479.3 MB)
