# 🎵 MelodyMaster V1 Training

Fine-tuning MusicGen Melody model on MusicCaps dataset. This notebook works in both local Jupyter and Google Colab environments.

## Environment Setup

In [None]:
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Google Colab")
    # Install dependencies in Colab
    !pip install transformers datasets torch accelerate pyyaml -q
    
    # Optional: Mount Google Drive if needed
    from google.colab import drive
    # Uncomment to mount drive
    # drive.mount('/content/drive')
    
    # Check GPU
    !nvidia-smi
else:
    print("Running locally")
    # Local dependencies should be installed in the environment

## Import Dependencies

In [None]:
import torch
import yaml
from transformers import AutoModelForCausalLM, AutoProcessor
from datasets import load_dataset
from accelerate import Accelerator
from huggingface_hub import login

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

## HuggingFace Authentication

In [None]:
# Enter your token in the next cell and run this
def setup_hf_auth(token):
    login(token=token)
    print("Successfully logged in to Hugging Face!")

In [None]:
# Enter your HF token here
HF_TOKEN = "your_token_here"  # Replace with your token
setup_hf_auth(HF_TOKEN)

## Load Configuration

In [None]:
# Load config based on environment
def load_config():
    if IN_COLAB:
        # If config.yaml is in Drive or needs to be created
        config = {
            'model': {
                'base_model': 'facebook/musicgen-melody',
                'output_model': 'opentunesai/melodymaster-v1'
            },
            'training': {
                'batch_size': 4,
                'learning_rate': 2e-5,
                'num_epochs': 3
            }
        }
    else:
        # Load from local config file
        with open('config.yaml', 'r') as f:
            config = yaml.safe_load(f)
    return config

config = load_config()
print("Configuration loaded successfully!")

## Load Model and Dataset

In [None]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    config['model']['base_model'],
    device_map="auto"
)
processor = AutoProcessor.from_pretrained(config['model']['base_model'])

# Load a small subset of MusicCaps for testing
dataset = load_dataset("google/musiccaps", split="train[:100]")
print(f"Loaded {len(dataset)} samples")

## Training Setup

In [None]:
# Initialize accelerator
accelerator = Accelerator(mixed_precision='fp16')

# Setup optimizer
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config['training']['learning_rate']
)

## Training Loop

In [None]:
# Training implementation here
def train():
    model.train()
    # Training loop implementation
    pass

## Save and Push to Hub

In [None]:
# Save and push model to HF
model.push_to_hub(config['model']['output_model'])
processor.push_to_hub(config['model']['output_model'])