## Step 1: Install Required Libraries

In [18]:
%pip install google-cloud-storage

Note: you may need to restart the kernel to use updated packages.


## Step 2: Configure GCS Settings

In [19]:
import os
from pathlib import Path
from datetime import datetime

# GCS Configuration
PROJECT_ID = "kyc-aml-automation"  # Replace with your GCP project ID
BUCKET_NAME = "kyc-aml-model"  # Bucket for trained models
SERVICE_ACCOUNT_KEY = "C:\\Users\\Lenovo\\.ssh\\gcp\\service_account.json"  # Replace with path to your key file

# Local model directory
MODEL_DIR = "model"

# Required model files (must exist)
REQUIRED_FILES = [
    "efficientnet_model.pth",      # PyTorch checkpoint
    "class_indices.json",          # Class mappings
    "training_history.json"        # Training metrics
]

# Optional model files (upload if available)
OPTIONAL_FILES = [
    "efficientnet_model.onnx",     # ONNX export (optional)
]

# All model files to check
MODEL_FILES = REQUIRED_FILES + OPTIONAL_FILES

# GCS paths - organized by model type and version

GCS_MODEL_PREFIX = "document_classification/v1"  # identity/version structure    print(f"  - {file}")

print(f"\nModel files to upload:")
for file in MODEL_FILES:
    print(f"\n\t{file}")
print("")    

print(f"Bucket: {BUCKET_NAME}")
print(f"Will upload to: gs://{BUCKET_NAME}/{GCS_MODEL_PREFIX}/")
print(f"Local model directory: {MODEL_DIR}")


Model files to upload:

	efficientnet_model.pth

	class_indices.json

	training_history.json

	efficientnet_model.onnx

Bucket: kyc-aml-model
Will upload to: gs://kyc-aml-model/document_classification/v1/
Local model directory: model


## Step 3: Verify Model Files

In [None]:
def verify_model_files(model_dir, required_files, optional_files):
    """Verify that required model files exist and check for optional files"""
    if not os.path.exists(model_dir):
        print(f"‚ùå Model directory not found: {model_dir}")
        return False, []
    
    print("\n" + "="*60)
    print("üìä MODEL FILES VERIFICATION")
    print("="*60)
    
    all_required_found = True
    total_size = 0
    found_files = []
    
    print("\nRequired Files:")
    for file_name in required_files:
        file_path = os.path.join(model_dir, file_name)
        if os.path.exists(file_path):
            size_mb = os.path.getsize(file_path) / (1024 * 1024)
            total_size += os.path.getsize(file_path)
            found_files.append(file_name)
            print(f"  ‚úì {file_name}: {size_mb:.2f} MB")
        else:
            print(f"  ‚ùå {file_name}: NOT FOUND (REQUIRED)")
            all_required_found = False
    
    print("\nOptional Files:")
    for file_name in optional_files:
        file_path = os.path.join(model_dir, file_name)
        if os.path.exists(file_path):
            size_mb = os.path.getsize(file_path) / (1024 * 1024)
            total_size += os.path.getsize(file_path)
            found_files.append(file_name)
            print(f"  ‚úì {file_name}: {size_mb:.2f} MB")
        else:
            print(f"  ‚ö†Ô∏è  {file_name}: NOT FOUND (optional - will skip)")
    
    print(f"\n  TOTAL SIZE: {total_size / (1024*1024):.2f} MB")
    print(f"  FILES TO UPLOAD: {len(found_files)}/{len(required_files + optional_files)}")
    print("="*60)
    
    return all_required_found, found_files

# Verify model files
all_required_found, files_to_upload = verify_model_files(MODEL_DIR, REQUIRED_FILES, OPTIONAL_FILES)

if all_required_found:
    print(f"\n‚úÖ All required model files found! ({len(files_to_upload)} files will be uploaded)")
else:
    print("\n‚ùå Some required model files are missing!")
    print("Please train the model first (train_classifier.ipynb)")


## Step 4: Authenticate with Google Cloud

In [21]:
from google.cloud import storage

# Set credentials
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = SERVICE_ACCOUNT_KEY

# Initialize GCS client
try:
    client = storage.Client(project=PROJECT_ID)
    print(f"‚úÖ Authenticated with GCP project: {PROJECT_ID}")
    
    # List buckets to verify access
    buckets = list(client.list_buckets())
    print(f"‚úÖ Found {len(buckets)} bucket(s)")
    
except Exception as e:
    print(f"‚ùå Authentication failed: {e}")
    print("\nTroubleshooting:")
    print("1. Verify SERVICE_ACCOUNT_KEY path is correct")
    print("2. Ensure service account has 'Storage Admin' role")
    print("3. Check PROJECT_ID is correct")

‚úÖ Authenticated with GCP project: kyc-aml-automation
‚úÖ Found 2 bucket(s)
‚úÖ Found 2 bucket(s)


## Step 5: Create or Verify Bucket

In [22]:
def get_or_create_bucket(client, bucket_name, location='us-central1'):
    """Get existing bucket or create new one"""
    try:
        bucket = client.get_bucket(bucket_name)
        print(f"‚úÖ Found existing bucket: {bucket_name}")
        return bucket
    except Exception:
        print(f"üì¶ Creating new bucket: {bucket_name}")
        bucket = client.create_bucket(bucket_name, location=location)
        print(f"‚úÖ Bucket created: {bucket_name}")
        return bucket

# Get or create bucket
bucket = get_or_create_bucket(client, BUCKET_NAME)
print(f"\nBucket info:")
print(f"  Name: {bucket.name}")
print(f"  Location: {bucket.location}")
print(f"  Storage class: {bucket.storage_class}")

‚úÖ Found existing bucket: kyc-aml-model

Bucket info:
  Name: kyc-aml-model
  Location: US-CENTRAL1
  Storage class: STANDARD


## Step 6: Upload Model Files to GCS

In [23]:
def upload_file_to_gcs(bucket, source_file, destination_blob_name):
    """Upload a single file to GCS with progress tracking"""
    blob = bucket.blob(destination_blob_name)
    
    print(f"\n‚òÅÔ∏è  Uploading: {os.path.basename(source_file)}")
    print(f"   Source: {source_file}")
    print(f"   Destination: gs://{bucket.name}/{destination_blob_name}")
    
    # Upload file
    blob.upload_from_filename(source_file)
    
    # Get uploaded file info
    blob.reload()
    size_mb = blob.size / (1024 * 1024)
    
    print(f"   ‚úÖ Upload complete! Size: {size_mb:.2f} MB")
    
    return blob

def upload_model_files(bucket, model_dir, file_list, gcs_prefix):
    """Upload all model files to GCS"""
    print("\n" + "="*60)
    print("‚òÅÔ∏è  UPLOADING MODEL FILES TO GCS")
    print("="*60)
    
    uploaded_blobs = []
    total_size = 0
    
    for file_name in file_list:
        source_path = os.path.join(model_dir, file_name)
        if os.path.exists(source_path):
            destination_path = f"{gcs_prefix}/{file_name}"
            blob = upload_file_to_gcs(bucket, source_path, destination_path)
            uploaded_blobs.append(blob)
            total_size += blob.size
        else:
            print(f"\n‚ö†Ô∏è  Skipping {file_name} (not found)")
    
    print("\n" + "="*60)
    print(f"‚úÖ Upload complete! {len(uploaded_blobs)}/{len(file_list)} files uploaded")
    print(f"   Total size: {total_size / (1024*1024):.2f} MB")
    print("="*60)
    
    return uploaded_blobs

# Upload only the files that were found during verification
uploaded_blobs = upload_model_files(bucket, MODEL_DIR, files_to_upload, GCS_MODEL_PREFIX)

NameError: name 'files_to_upload' is not defined

## Step 7: Verify Upload

In [None]:
def verify_gcs_upload(bucket, gcs_prefix, file_list):
    """Verify uploaded files exist and are accessible"""
    print("\nüîç Verifying uploads...")
    print("="*60)
    
    all_verified = True
    verified_count = 0
    
    for file_name in file_list:
        blob_path = f"{gcs_prefix}/{file_name}"
        try:
            blob = bucket.blob(blob_path)
            blob.reload()
            
            size_mb = blob.size / (1024*1024)
            print(f"‚úÖ {file_name}")
            print(f"   Size: {size_mb:.2f} MB")
            print(f"   MD5: {blob.md5_hash}")
            print(f"   GCS URI: gs://{bucket.name}/{blob.name}")
            print()
            
            verified_count += 1
            
        except Exception as e:
            print(f"‚ùå {file_name}: Verification failed - {e}")
            all_verified = False
    
    print("="*60)
    print(f"Verified: {verified_count}/{len(file_list)} files")
    return all_verified

if verify_gcs_upload(bucket, GCS_MODEL_PREFIX, files_to_upload):
    print("‚úÖ All uploaded files verified successfully!")
else:
    print("‚ö†Ô∏è  Some files failed verification")


üîç Verifying uploads...
‚úÖ efficientnet_model.pth
   Size: 50.12 MB
   MD5: cZVz+NSPr7qqWhKTHRKH0w==
   GCS URI: gs://kyc-aml-model/identity_classification/v1/efficientnet_model.pth

‚úÖ efficientnet_model.pth
   Size: 50.12 MB
   MD5: cZVz+NSPr7qqWhKTHRKH0w==
   GCS URI: gs://kyc-aml-model/identity_classification/v1/efficientnet_model.pth

‚ùå efficientnet_model.onnx: Verification failed - 404 GET https://storage.googleapis.com/storage/v1/b/kyc-aml-model/o/identity_classification%2Fv1%2Fefficientnet_model.onnx?projection=noAcl&prettyPrint=false: No such object: kyc-aml-model/identity_classification/v1/efficientnet_model.onnx
‚ùå efficientnet_model.onnx: Verification failed - 404 GET https://storage.googleapis.com/storage/v1/b/kyc-aml-model/o/identity_classification%2Fv1%2Fefficientnet_model.onnx?projection=noAcl&prettyPrint=false: No such object: kyc-aml-model/identity_classification/v1/efficientnet_model.onnx
‚úÖ class_indices.json
   Size: 0.00 MB
   MD5: bU1mWrzt3M7Rj85nuWEHNg=

## Step 8: Generate Signed URLs for Model Download

In [None]:
from datetime import timedelta

def generate_signed_url(bucket, blob_path, expiration_days=365):
    """
    Generate a signed URL that allows public download without authentication.
    The URL expires after the specified number of days.
    """
    blob = bucket.blob(blob_path)
    
    # Generate signed URL (valid for specified days)
    url = blob.generate_signed_url(
        version="v4",
        expiration=timedelta(days=expiration_days),
        method="GET"
    )
    
    return url

# Generate signed URLs for all model files
print("\n" + "="*60)
print("üîó GENERATING SIGNED URLS FOR MODEL FILES")
print("="*60)

signed_urls = {}

try:
    # Only generate URLs for files that were actually uploaded
    for file_name in files_to_upload:
        blob_path = f"{GCS_MODEL_PREFIX}/{file_name}"
        
        url = generate_signed_url(bucket, blob_path, expiration_days=365)
        signed_urls[file_name] = url
        print(f"\n‚úÖ {file_name}")
        print(f"   URL: {url[:80]}...")
    
    print(f"\n‚úÖ Generated {len(signed_urls)} signed URLs (valid for 365 days)")
    
    # Save URLs to file
    url_file = "model_download_urls.txt"
    with open(url_file, 'w') as f:
        f.write(f"# KYC/AML Identity Classifier - Model Download URLs\n")
        f.write(f"# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"# Valid for: 365 days\n")
        f.write(f"# GCS Bucket: gs://{BUCKET_NAME}/{GCS_MODEL_PREFIX}/\n\n")
        
        for file_name, url in signed_urls.items():
            f.write(f"\n# {file_name}\n")
            f.write(f"{url}\n")
        
        f.write(f"\n\n# Usage in Python:\n")
        f.write(f"import urllib.request\n")
        f.write(f"\n")
        for file_name, url in signed_urls.items():
            f.write(f"urllib.request.urlretrieve('{url}', '{file_name}')\n")
        
        f.write(f"\n\n# Usage with wget:\n")
        for file_name, url in signed_urls.items():
            f.write(f"wget -O {file_name} '{url}'\n")
    
    print(f"\n‚úÖ URLs saved to: {url_file}")
    
except Exception as e:
    print(f"\n‚ùå Failed to generate signed URLs: {e}")
    print("\nNote: Make sure your service account has 'Service Account Token Creator' role")
    print("or use 'iam.serviceAccounts.signBlob' permission.")


üîó GENERATING SIGNED URLS FOR MODEL FILES

‚ùå Failed to generate signed URLs: Max allowed expiration interval is seven days 604800

Note: Make sure your service account has 'Service Account Token Creator' role
or use 'iam.serviceAccounts.signBlob' permission.


## Step 9: Generate Deployment Instructions

In [None]:
def generate_deployment_code(bucket_name, gcs_prefix, signed_urls):
    """Generate code snippet for downloading models in deployment"""
    
    print("\n" + "="*60)
    print("üìã MODEL DEPLOYMENT INSTRUCTIONS")
    print("="*60)
    
    code = f'''
# ============================================================
# KYC/AML Identity Classifier - Model Deployment
# ============================================================

# Method 1: Download using Signed URLs (No Authentication Required)
# Valid for 365 days from generation date

import urllib.request
import os

# Create model directory
os.makedirs('model', exist_ok=True)

# Download model files
model_urls = {{
'''
    
    for file_name, url in signed_urls.items():
        code += f'    "{file_name}": "{url}",\n'
    
    code += f'''}}

for file_name, url in model_urls.items():
    print(f"Downloading {{file_name}}...")
    urllib.request.urlretrieve(url, f"model/{{file_name}}")
    print(f"‚úÖ Downloaded {{file_name}}")

print("\n‚úÖ All model files downloaded!")

# ============================================================
# Method 2: Download using GCS API (Requires Authentication)
# ============================================================

from google.cloud import storage
import os

# Set credentials (if not running on GCP)
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'path/to/service-account.json'

# Initialize client
client = storage.Client(project='{PROJECT_ID}')
bucket = client.bucket('{bucket_name}')

# Download model files
model_files = {MODEL_FILES}
gcs_prefix = '{gcs_prefix}'

os.makedirs('model', exist_ok=True)

for file_name in model_files:
    blob_path = f"{{gcs_prefix}}/{{file_name}}"
    local_path = f"model/{{file_name}}"
    
    print(f"Downloading {{file_name}}...")
    blob = bucket.blob(blob_path)
    blob.download_to_filename(local_path)
    print(f"‚úÖ Downloaded {{file_name}}")

print("\n‚úÖ All model files downloaded!")

# ============================================================
# Method 3: Docker Deployment (Download during build)
# ============================================================

# Add to Dockerfile:
'''
    
    code += '''
# Download models during Docker build
RUN mkdir -p /app/model && \\'''
    
    for i, (file_name, url) in enumerate(signed_urls.items()):
        if i < len(signed_urls) - 1:
            code += f'''
    wget -O /app/model/{file_name} '{url}' && \\'''
        else:
            code += f'''
    wget -O /app/model/{file_name} '{url}'
'''
    
    code += f'''
# ============================================================
# Method 4: Load Model in Inference Code
# ============================================================

import torch
import json
from torchvision import models

# Load class indices
with open('model/class_indices.json', 'r') as f:
    class_info = json.load(f)
    class_names = class_info['class_names']

# Load PyTorch model
checkpoint = torch.load('model/efficientnet_model.pth', map_location='cpu')
model = EfficientNetClassifier(num_classes=len(class_names))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"‚úÖ Model loaded: {{len(class_names)}} classes")
print(f"   Classes: {{class_names}}")
'''
    
    print(code)
    print("="*60)
    
    # Save to file
    instructions_file = "deployment_instructions.py"
    with open(instructions_file, 'w') as f:
        f.write(code)
    print(f"\n‚úÖ Instructions saved to: {instructions_file}")

generate_deployment_code(BUCKET_NAME, GCS_MODEL_PREFIX, signed_urls)


üìã MODEL DEPLOYMENT INSTRUCTIONS

# KYC/AML Identity Classifier - Model Deployment

# Method 1: Download using Signed URLs (No Authentication Required)
# Valid for 365 days from generation date

import urllib.request
import os

# Create model directory
os.makedirs('model', exist_ok=True)

# Download model files
model_urls = {
}

for file_name, url in model_urls.items():
    print(f"Downloading {file_name}...")
    urllib.request.urlretrieve(url, f"model/{file_name}")
    print(f"‚úÖ Downloaded {file_name}")

print("
‚úÖ All model files downloaded!")

# Method 2: Download using GCS API (Requires Authentication)

from google.cloud import storage
import os

# Set credentials (if not running on GCP)
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'path/to/service-account.json'

# Initialize client
client = storage.Client(project='kyc-aml-automation')
bucket = client.bucket('kyc-aml-model')

# Download model files
model_files = ['efficientnet_model.pth', 'efficientnet_model.onnx', 'clas

## Step 10: List All Files in Bucket

In [None]:
def list_bucket_contents(bucket, prefix=None):
    """List all files in the bucket"""
    print("\nüìÅ Bucket contents:")
    print("="*60)
    
    blobs = bucket.list_blobs(prefix=prefix)
    total_size = 0
    count = 0
    
    for blob in blobs:
        size_mb = blob.size / (1024 * 1024)
        total_size += blob.size
        count += 1
        print(f"  {blob.name} ({size_mb:.2f} MB)")
    
    print("="*60)
    print(f"Total: {count} file(s), {total_size / (1024*1024):.2f} MB")

list_bucket_contents(bucket, prefix=GCS_MODEL_PREFIX)


üìÅ Bucket contents:
  identity_classification/v1/class_indices.json (0.00 MB)
  identity_classification/v1/efficientnet_model.pth (50.12 MB)
  identity_classification/v1/training_history.json (0.00 MB)
Total: 3 file(s), 50.12 MB
  identity_classification/v1/class_indices.json (0.00 MB)
  identity_classification/v1/efficientnet_model.pth (50.12 MB)
  identity_classification/v1/training_history.json (0.00 MB)
Total: 3 file(s), 50.12 MB


## Step 11: Model Versioning (Optional)

In [None]:
def list_model_versions(bucket, base_prefix="document_classification"):
    """List all available model versions"""
    print("\nüìã Available Model Versions:")
    print("="*60)
    
    blobs = bucket.list_blobs(prefix=base_prefix)
    versions = set()
    
    for blob in blobs:
        # Extract version from path (e.g., document_classification/v1/file.pth -> v1)
        parts = blob.name.split('/')
        if len(parts) >= 2:
            versions.add(parts[1])
    
    for version in sorted(versions):
        print(f"  {version}")
        version_blobs = bucket.list_blobs(prefix=f"{base_prefix}/{version}")
        for blob in version_blobs:
            size_mb = blob.size / (1024 * 1024)
            print(f"    - {os.path.basename(blob.name)} ({size_mb:.2f} MB)")
    
    print("="*60)

list_model_versions(bucket)


üìã Available Model Versions:
  v1
  v1


## Summary

‚úÖ **Trained model uploaded to Google Cloud Storage!**

**What we did:**
1. ‚úì Verified local model files
2. ‚úì Authenticated with Google Cloud
3. ‚úì Uploaded all model files to GCS
4. ‚úì Generated signed URLs for public download
5. ‚úì Created deployment instructions

**Model Location:**
```
gs://kyc-aml-model/document_classification/v1/
‚îú‚îÄ‚îÄ efficientnet_model.pth (PyTorch checkpoint)
‚îú‚îÄ‚îÄ efficientnet_model.onnx (ONNX format - optional)
‚îú‚îÄ‚îÄ class_indices.json (class mappings)
‚îî‚îÄ‚îÄ training_history.json (training metrics)
```

**Next Steps:**

### 1. Update Inference API
Update `api/main.py` to download model from GCS:

```python
from google.cloud import storage
import torch
import json

# Download model on startup
def download_model_from_gcs():
    client = storage.Client()
    bucket = client.bucket('kyc-aml-model')
    
    files = ['efficientnet_model.pth', 'class_indices.json']
    for file in files:
        blob = bucket.blob(f'document_classification/v1/{file}')
        blob.download_to_filename(f'model/{file}')
```

### 2. Docker Deployment
Use signed URLs in Dockerfile to download models during build:

```dockerfile
# Download models from GCS
RUN mkdir -p /app/model && \\
    wget -O /app/model/efficientnet_model.pth 'SIGNED_URL_1' && \\
    wget -O /app/model/class_indices.json 'SIGNED_URL_2'
```

### 3. Cloud Run / App Engine
Models will be downloaded automatically on container startup using GCS API.

### 4. Model Versioning
To deploy a new version:
```python
# Upload to new version path
GCS_MODEL_PREFIX = "document_classification/v2"  # New version
# Re-run upload steps
```

### Managing Costs:
- Standard storage: ~$0.02 per GB/month
- Network egress: ~$0.12 per GB
- For typical model (~50-100 MB): Less than $1/month

### Useful Commands:
```bash
# List all models
gsutil ls gs://kyc-aml-model/

# Download specific model
gsutil cp gs://kyc-aml-model/document_classification/v1/efficientnet_model.pth .

# Download all model files
gsutil -m cp gs://kyc-aml-model/document_classification/v1/* model/

# Delete old version (if needed)
gsutil -m rm gs://kyc-aml-model/document_classification/v1/*
```