In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install accelerate librosa soundfile


In [None]:
import os

base = "/content/drive/MyDrive/266 Group Project"
required = ["dataset.py", "model.py", "train_taco.py"]

print("Checking required files:\n")
for f in required:
    path = os.path.join(base, f)
    print(f, "✓ FOUND" if os.path.exists(path) else "✗ MISSING")


In [None]:
import os

ljs_base = "/content/drive/MyDrive/266 Group Project/LJSpeech-1.1/wavs"
print("LJSpeech wavs folder:", "✓ FOUND" if os.path.exists(ljs_base) else "✗ MISSING")


In [None]:
!accelerate launch "/content/drive/MyDrive/266 Group Project/train_taco.py" \
  --experiment_name tacotron2_emotion \
  --run_name tacotron2_emotion_run \
  --working_directory "/content/drive/MyDrive/266 Group Project/tacotron2_emotion" \
  --save_audio_gen "/content/drive/MyDrive/266 Group Project/inference_outputs" \
  --path_to_train_manifest "/content/drive/MyDrive/266 Group Project/datasplit/train_metadata_emotion.csv" \
  --path_to_val_manifest "/content/drive/MyDrive/266 Group Project/datasplit/test_metadata_emotion.csv" \
  --training_epochs 40 \
  --batch_size 32 \
  --learning_rate 0.001 \
  --console_out_iters 50 \
  --checkpoint_epochs 1 \
  --num_workers 0 \
  --no-log_wandb


Total Trainable Params: 29227777

Epoch 0
[Epoch 0] Batch 0/405 Loss=4.3865 Mel=1.3483 Rmel=2.6287 Stop=0.4096
[Epoch 0] Batch 10/405 Loss=1.9110 Mel=0.7837 Rmel=1.1237 Stop=0.0036
[Epoch 0] Batch 20/405 Loss=1.2440 Mel=0.4447 Rmel=0.7982 Stop=0.0012
[Epoch 0] Batch 30/405 Loss=1.0364 Mel=0.4091 Rmel=0.6265 Stop=0.0008
[Epoch 0] Batch 40/405 Loss=1.1499 Mel=0.4892 Rmel=0.6591 Stop=0.0015
[Epoch 0] Batch 50/405 Loss=1.0273 Mel=0.4304 Rmel=0.5955 Stop=0.0014
[Epoch 0] Batch 60/405 Loss=0.8488 Mel=0.4185 Rmel=0.4290 Stop=0.0013
[Epoch 0] Batch 70/405 Loss=0.7609 Mel=0.3749 Rmel=0.3849 Stop=0.0011
[Epoch 0] Batch 80/405 Loss=0.7997 Mel=0.3917 Rmel=0.4070 Stop=0.0010
[Epoch 0] Batch 90/405 Loss=0.7600 Mel=0.3692 Rmel=0.3900 Stop=0.0007
[Epoch 0] Batch 100/405 Loss=0.7268 Mel=0.3600 Rmel=0.3661 Stop=0.0006
[Epoch 0] Batch 110/405 Loss=0.7342 Mel=0.3571 Rmel=0.3765 Stop=0.0006
[Epoch 0] Batch 120/405 Loss=0.6457 Mel=0.3141 Rmel=0.3310 Stop=0.0005
[Epoch 0] Batch 130/405 Loss=0.6206 Mel=0.3097

# Inference

In [None]:
from accelerate import Accelerator
from tokenizer import Tokenizer
from model import Tacotron2Config
from emotion_tacotron_model import EmotionTacotron2
import torch

checkpoint_dir = "/content/drive/MyDrive/266 Group Project/tacotron2_emotion/checkpoint_16"

# 1. Create accelerator FIRST
accelerator = Accelerator()

# 2. Build model normally (NOT prepared!)
tokenizer = Tokenizer()
config = Tacotron2Config(
    num_chars=tokenizer.vocab_size,
    pad_token_id=tokenizer.pad_token_id
)

model = EmotionTacotron2(config)

# 3. Load accelerate checkpoint BEFORE preparing
accelerator.load_state(checkpoint_dir, model=model)

# 4. NOW prepare the model
model = accelerator.prepare(model)

# 5. Move to GPU
model = model.to("cuda").eval()

print("✔ Model loaded successfully from accelerate checkpoint!")


✔ Model loaded successfully from accelerate checkpoint!
