In [1]:
# ==============================================================================
# NOTEBOOK 1: Create and Save Merged Model
# ==============================================================================

# --- CELL 1: Setup ---
import os
# Force the environment to see only one GPU to guarantee stability
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Install all necessary libraries
!pip install "transformers==4.54.1" -qU
!pip install "wandb>=0.17.0" -qU
!pip install --no-deps "bitsandbytes>=0.43.1" "accelerate>=0.31.0" "xformers==0.0.29.post3" "trl>=0.9.4" triton -q
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth.git -q
!pip install --force-reinstall --no-deps git+https://github.com/unslothai/unsloth-zoo.git -q
!pip install -U peft -q
!pip install "timm>=1.0.16" -qU


!apt-get install git-lfs -y > /dev/null
!git config --global credential.helper store

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for unsloth_zoo (pyproject.toml) ... [?25l[?25hdone


In [2]:
# ==============================================================================
# CELL 2: Prepare for Slicing (Space-Efficient Version)
# ==============================================================================
import os
import json
import re
import torch
import gc
from huggingface_hub import snapshot_download
from safetensors import safe_open
from safetensors.torch import save_file
from tqdm.auto import tqdm
from transformers import AutoConfig

# --- Step 1: Define Paths ---
# Read-only input path for the merged model. We will read shards directly from here.
input_merged_model_path = "/kaggle/input/auramind-deployment-lab-part-1/AuraMind-E4B-Finetuned-Merged/"

# The final output path for the sliced model.
final_sliced_e2b_path = "/kaggle/working/AuraMind-E2B-Finetuned-Sliced/"

# A temporary path for the configuration files we need to modify.
writable_config_path = "/kaggle/working/temp_configs/"

os.makedirs(final_sliced_e2b_path, exist_ok=True)
os.makedirs(writable_config_path, exist_ok=True)

# The original base model from Hugging Face (to get original configs)
MODEL_NAME_E4B = "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit"

# --- Step 2: Prepare Configuration Files ---
print("Preparing configuration files...")
# Download original base model configs
base_model_cache_path = snapshot_download(MODEL_NAME_E4B)

# Copy necessary configs and tokenizer files to a writable location
!cp {base_model_cache_path}/*.json {writable_config_path}
!cp {base_model_cache_path}/*.jinja {writable_config_path}
!cp {input_merged_model_path}/tokenizer* {writable_config_path}
print("✅ Configuration and tokenizer files are ready.")

# --- Step 3: Slice the Model by Reading Directly from Source ---
# Point directly to the safetensor files in the read-only input directory
safetensor_files = [os.path.join(input_merged_model_path, f) for f in os.listdir(input_merged_model_path) if f.endswith('.safetensors')]

# Slicing parameters (unchanged)
layers_to_skip = [20, 21, 22, 23, 24]
final_num_layers = 35 - len(layers_to_skip)
ffn_hidden_dims = [8192] * final_num_layers
kept_layers_indices = [i for i in range(35) if i not in layers_to_skip]
layer_rename_map = {old_idx: new_idx for new_idx, old_idx in enumerate(kept_layers_indices)}
weight_map = {}
new_shard_state_dict = {}

print("Slicing model shards directly from source (no copy)...")
# This loop now reads from the original, read-only path
for shard_path in tqdm(safetensor_files, desc="Processing shards"):
    with safe_open(shard_path, framework="pt", device="cpu") as f:
        for tensor_name in f.keys():
            new_tensor_name = tensor_name
            tensor = f.get_tensor(tensor_name)
            match = re.search(r'\.layers\.(\d+)\.', tensor_name)
            if match:
                old_layer_idx = int(match.group(1))
                if old_layer_idx in layers_to_skip: continue
                new_layer_idx = layer_rename_map[old_layer_idx]
                new_tensor_name = tensor_name.replace(f'.layers.{old_layer_idx}.', f'.layers.{new_layer_idx}.')
                target_ffn_dim = ffn_hidden_dims[new_layer_idx]
                if 'mlp.gate_proj.weight' in new_tensor_name or 'mlp.up_proj.weight' in new_tensor_name:
                    tensor = tensor[:target_ffn_dim, :].contiguous()
                elif 'mlp.down_proj.weight' in new_tensor_name:
                    tensor = tensor[:, :target_ffn_dim].contiguous()
            elif 'per_layer_model_projection' in tensor_name:
                reshaped = tensor.reshape((35, tensor.shape[0] // 35, tensor.shape[1]))
                tensor = reshaped[kept_layers_indices, :, :].reshape(-1, tensor.shape[-1]).contiguous()
            elif 'embed_tokens_per_layer' in tensor_name:
                reshaped = tensor.reshape((tensor.shape[0], 35, tensor.shape[1] // 35))
                tensor = reshaped[:, kept_layers_indices, :].reshape(tensor.shape[0], -1).contiguous()
            new_shard_state_dict[new_tensor_name] = tensor

# --- Step 4: Save the Sliced Model ---
print("Saving sliced model to final destination...")
shard_filename = "model-00001-of-00001.safetensors"
save_file(new_shard_state_dict, os.path.join(final_sliced_e2b_path, shard_filename), metadata={'format': 'pt'})
for k in new_shard_state_dict.keys():
    weight_map[k] = shard_filename
print("✅ New safetensor shard saved.")

# --- Step 5: Finalize Configuration and Cleanup ---
print("Finalizing sliced model files...")
# Copy the tokenizer files from our temp location to the final destination
!cp {writable_config_path}/tokenizer* {final_sliced_e2b_path}

# Create the new index file
index_json = { "metadata": {"total_size": sum(t.numel() * t.element_size() for t in new_shard_state_dict.values())}, "weight_map": weight_map }
with open(os.path.join(final_sliced_e2b_path, "model.safetensors.index.json"), "w") as f:
    json.dump(index_json, f, indent=2)

print("Correcting the configuration for the new 30-layer model...")
# Load config from our temporary writable path
new_config = AutoConfig.from_pretrained(writable_config_path)
new_config.text_config.num_hidden_layers = final_num_layers
new_config.text_config.intermediate_size = ffn_hidden_dims
count_kv_sharing = sum(1 for layer in layers_to_skip if layer >= 20)
new_config.text_config.num_kv_shared_layers -= count_kv_sharing
count_activation_sparsity = sum(1 for layer in layers_to_skip if layer <= 9)
activation_sparsity_list = [0.95] * (10 - count_activation_sparsity) + [0] * (final_num_layers - 10 + count_activation_sparsity)
new_config.text_config.activation_sparsity_pattern = activation_sparsity_list

# Save the final, corrected config to the output directory
new_config.save_pretrained(final_sliced_e2b_path)
print("✅ Corrected config.json saved.")

# Clean up the temporary config directory
!rm -r {writable_config_path}
print("✅ Temporary files cleaned up.")

print(f"\n✅ Model slicing complete. New standalone model saved in: {final_sliced_e2b_path}")



Preparing configuration files...


Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

✅ Configuration and tokenizer files are ready.
Slicing model shards directly from source (no copy)...


Processing shards:   0%|          | 0/4 [00:00<?, ?it/s]

Saving sliced model to final destination...
✅ New safetensor shard saved.
Finalizing sliced model files...
Correcting the configuration for the new 30-layer model...
✅ Corrected config.json saved.
✅ Temporary files cleaned up.

✅ Model slicing complete. New standalone model saved in: /kaggle/working/AuraMind-E2B-Finetuned-Sliced/


In [4]:
# ==============================================================================
# CELL 1: Securely Login to Hugging Face
# ==============================================================================
from huggingface_hub import login

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
# This securely retrieves the secret you stored in the Colab Secrets Manager.
# Make sure the secret name is exactly "HF_API_KEY".
hf_token = user_secrets.get_secret("HUGGINGFACE_API_KEY")

# Login to the Hugging Face Hub
login(token=hf_token)

print("✅ Successfully logged into Hugging Face.")

✅ Successfully logged into Hugging Face.


In [5]:
# ==============================================================================
# CELL 5: Install Git LFS, Verify, and Upload the Final Model
# ==============================================================================
# THE DEFINITIVE FIX: Explicitly install git-lfs in the Colab environment.
# This ensures the huggingface_hub library can correctly handle large file uploads.
!apt-get install git-lfs -y
!git-lfs install

from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import HfApi, ModelCard, ModelCardData
import os

# The path to your successfully sliced model
final_sliced_e2b_path = "/kaggle/working/AuraMind-E2B-Finetuned-Sliced/"

# --- Step 7: Verification (SKIPPED) ---
print("--- Step 7: Verification Skipped ---")
print("Proceeding directly to upload.")

print("\n--- Step 8: Preparing to upload to Hugging Face Hub ---")
# Define your new model's name on the Hub
hf_repo_id = "surfiniaburger/AuraMind-Maize-Expert-E2B-Finetuned"

# Create a model card
card_content = f"""
---
license: apache-2.0
language: en
tags:
- gemma-3n
- unsloth
- social-impact
- agriculture
- computer-vision
---

# AuraMind: Fine-Tuned Gemma 3n Maize Expert (E2B)

This is a specialized, fine-tuned version of Google's Gemma 3n model, optimized for diagnosing maize plant health conditions in Nigeria. It was developed as part of the **Google - The Gemma 3n Impact Challenge**.

## Model Derivation

This model was created using an advanced "fine-tune then slice" approach, leveraging the native MatFormer architecture of Gemma 3n:
1.  The full `unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit` model was fine-tuned on a custom dataset of local Nigerian maize diseases.
2.  The fine-tuning was performed using LoRA adapters from a hyperparameter sweep, with the champion run (`icy-sweep-2`) achieving **100% validation accuracy**.
3.  The trained adapters were merged into the full E4B model.
4.  Finally, the E2B sub-model was surgically extracted using the principles from Google's official MatFormer Lab, resulting in this efficient, deployable, and highly accurate expert model.

This process ensures the model has both the high performance of our fine-tuning and the clean, convertible architecture of the official E2B release, making it ideal for on-device deployment with tools like Google AI Edge and MediaPipe.

**Project Link:** [https://github.com/surfiniaburger/AuraMind](https://github.com/surfiniaburger/AuraMind)
"""
card = ModelCard(card_content)
card.save(os.path.join(final_sliced_e2b_path, 'README.md'))
print("✅ Model card (README.md) created.")

# Upload the final model to the Hub
print(f"Uploading model to: {hf_repo_id}")
api = HfApi()
api.create_repo(repo_id=hf_repo_id, exist_ok=True)
api.upload_folder(
    folder_path=final_sliced_e2b_path,
    repo_id=hf_repo_id,
    commit_message="Upload final fine-tuned and sliced AuraMind E2B model"
)

print(f"\n✅ SUCCESS! Your final model is now available on the Hugging Face Hub at: https://huggingface.co/{hf_repo_id}")

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.3).
0 upgraded, 0 newly installed, 0 to remove and 38 not upgraded.
Git LFS initialized.
--- Step 7: Verification Skipped ---
Proceeding directly to upload.

--- Step 8: Preparing to upload to Hugging Face Hub ---
✅ Model card (README.md) created.
Uploading model to: surfiniaburger/AuraMind-Maize-Expert-E2B-Finetuned


Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.70M [00:00<?, ?B/s]

model-00001-of-00001.safetensors:   0%|          | 0.00/10.9G [00:00<?, ?B/s]


✅ SUCCESS! Your final model is now available on the Hugging Face Hub at: https://huggingface.co/surfiniaburger/AuraMind-Maize-Expert-E2B-Finetuned
