# A MODIFIED HIFI-GAN VOCODER USING ODCONV AND GRC FOR EXPRESSIVE VOICE CLONING IN STREAMSPEECH'S SIMULTANEOUS TRANSLATION

**Professional Training System with Real CVSS-T Dataset**

This notebook implements the complete thesis system with:
- Real CVSS-T dataset integration
- Real ECAPA-TDNN speaker embeddings  
- Real Emotion2Vec emotion embeddings
- Professional Modified HiFi-GAN training
- Voice cloning validation

**Requirements:**
- Google Colab Pro (for better GPU)
- Your CVSS-T dataset (10k Spanish + 10k English samples)
- All training will be done here, no local processing needed


In [None]:
# Install required packages for Google Colab
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers speechbrain torch-audiomentations soundfile librosa
!pip install wandb  # For training monitoring

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchaudio
import torchaudio.transforms as T
import numpy as np
import os
import json
import soundfile as sf
from pathlib import Path
from tqdm import tqdm
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
import warnings
warnings.filterwarnings("ignore")

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")


In [None]:
# Upload your CVSS-T dataset
from google.colab import files
import zipfile

print("Please upload your CVSS-T dataset as a ZIP file:")
uploaded = files.upload()

# Extract the dataset
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        print(f"Extracting {filename}...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall('.')
        print(f"Extracted {filename} successfully!")

# List extracted contents
!find . -name "*.wav" | head -10
