# Gemma_3n_TFLite_Conversion
# You must use a L100 or A100 GPU
This notebook has been updated to fix the installation error and implement the robust conversion method required for Gemma-3n models.

In [None]:
# Step 1: Install all required modern libraries
# We install transformers directly from the GitHub main branch for the newest models.
!pip install --upgrade pip
!pip install git+https://github.com/huggingface/transformers.git

# Install the correct 'ai-edge-torch' package and other essentials.
!pip install --upgrade torch accelerate bitsandbytes sentencepiece "ai-edge-torch>=0.2.1" timm

print("✅ Libraries installed successfully.")

In [None]:
# Step 2: Authenticate with Hugging Face
# Make sure to add your Hugging Face token to Kaggle's "Secrets" with the name HF_TOKEN.
from huggingface_hub import notebook_login
from kaggle_secrets import UserSecretsClient

try:
    uc = UserSecretsClient()
    hf_token = uc.get_secret("HF_TOKEN")
    notebook_login(token=hf_token)
    print("✅ Successfully authenticated with Hugging Face.")
except Exception as e:
    print(f"Authentication failed: {e}")
    print("Please ensure your HF_TOKEN is correctly set in Kaggle Secrets.")

In [None]:
# Step 3: Load Model, Convert, and Save (with Wrapper)
import torch
import os
from transformers import AutoTokenizer, Gemma3nForConditionalGeneration
import ai_edge_torch

# --- Configuration ---
MODEL_ID = "tamazightdev/v2-gemma-3n-4b-tmz-ft-vllm-merged"
OUTPUT_TFLITE_MODEL = "gemma-3n-4b-tamazight-ft.tflite"
TOKENIZER_ASSETS_DIR = "tokenizer_assets"

print(f"--- Starting conversion for model: {MODEL_ID} ---")

# --- Define the Traceable Wrapper ---
class Gemma3nForTFLite(torch.nn.Module):
    """A traceable wrapper for Gemma 3n for single-step autoregressive decoding."""
    def __init__(self, model_path: str):
        super().__init__()
        print(f"Loading model from {model_path}...")
        self.model = Gemma3nForConditionalGeneration.from_pretrained(
            model_path,
            torch_dtype=torch.float32 # Load in FP32 for stable conversion
        ).eval()
        print("✅ Model loaded successfully into wrapper.")

    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
        """Performs a single forward pass to get the next token logits."""
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            use_cache=False
        )
        # Return logits for the last token in the sequence [batch_size, vocab_size]
        return outputs.logits[:, -1, :]

try:
    # 1. Load the tokenizer and the wrapped model
    print("\n1. Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    traceable_model = Gemma3nForTFLite(MODEL_ID)
    print("✅ Tokenizer and wrapped model loaded.")

    # 2. Prepare an example input for the converter to trace the model's graph.
    print("\n2. Preparing example input for tracing...")
    # The wrapper's forward() method expects both input_ids and attention_mask.
    sample_input_ids = torch.randint(0, 32000, (1, 128), dtype=torch.long)
    sample_attention_mask = torch.ones((1, 128), dtype=torch.long)
    sample_inputs = (sample_input_ids, sample_attention_mask)
    print("✅ Example input prepared.")

    # 3. Convert the wrapped model to TFLite format
    print(f"\n3. Converting model to TFLite format...")
    edge_model_bytes = ai_edge_torch.convert(
        traceable_model,
        sample_inputs
    )
    print("✅ Model successfully converted.")

    # 4. Save the TFLite model to a file
    print(f"\n4. Saving TFLite model to {OUTPUT_TFLITE_MODEL}...")
    with open(OUTPUT_TFLITE_MODEL, "wb") as f:
        f.write(edge_model_bytes)
    print("✅ TFLite model saved.")
    
    # 5. Save the tokenizer assets for your Android application
    print(f"\n5. Saving tokenizer assets to {TOKENIZER_ASSETS_DIR}...")
    if not os.path.exists(TOKENIZER_ASSETS_DIR):
        os.makedirs(TOKENIZER_ASSETS_DIR)
    tokenizer.save_pretrained(TOKENIZER_ASSETS_DIR)
    print(f"✅ Tokenizer assets saved.")
    
    print("\n--- Conversion Complete! ---")

except Exception as e:
    import traceback
    print(f"\n--- An Error Occurred ---")
    print(f"Error during conversion: {e}")
    traceback.print_exc()
    print("\nPlease check the model path, your Hugging Face token permissions, and available RAM.")

### Next Steps

After running the cells above, you should have your converted assets ready in the Kaggle file system (check the file panel on the right):

1.  **`gemma-3n-4b-tamazight-ft.tflite`**: This is your on-device model.
2.  A folder named **`tokenizer_assets`**: This contains `tokenizer.json` and other necessary files for your app.

You will need to **download both the `.tflite` file and the `tokenizer.json` file** to integrate them into your Android project.