In [26]:
import sys
import warnings, tqdm

warnings.filterwarnings("ignore", category=tqdm.TqdmWarning)
sys.modules['tqdm.notebook'] = tqdm
sys.modules['tqdm.autonotebook'] = tqdm

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # Clone the repository
    !git clone https://github.com/ofekdd/DL_Project.git
    %cd DL_Project

    # Install dependencies
    !pip install -r requirements.txt



In [27]:
# Check the current working directory and ensure it is the project root
from pathlib import Path
print("CWD :", Path.cwd())                    # where the kernel is running
print("Exists?", Path('configs').is_dir())    # should be True if CWD is project root


CWD : /home/odahan/Technion/Semester_8/Deep_Learning/Project/notebooks
Exists? False


In [28]:
import yaml
import os

# Define the path to the YAML configuration file
workspace = '/home/odahan/Technion/Semester_8/Deep_Learning/Project'
yaml_path = f'{workspace}/configs/multi_stft_cnn.yaml'
print(yaml_path)
# Open and load the YAML file
with open(yaml_path, 'r') as file:
    cfg = yaml.safe_load(file)

print("9cnn configuration:")
for key, value in cfg.items():
    print(f"  {key}: {value}")

/home/odahan/Technion/Semester_8/Deep_Learning/Project/configs/multi_stft_cnn.yaml
9cnn configuration:
  model_name: multi_stft_cnn
  sample_rate: 22050
  n_mels: 64
  hop_length: 512
  batch_size: 8
  num_epochs: 50
  learning_rate: 2e-4
  num_workers: 4
  n_branches: 9
  branch_output_dim: 128


In [None]:
# Download the IRMAS dataset if needed
from data.download_irmas import main as download_irmas_main, find_irmas_root
import pathlib
import os

# Check for existing dataset in user's home directory first
home_dataset_path = pathlib.Path.home() / "datasets" / "irmas" / "IRMAS.zip"

# Determine the appropriate download location based on environment
if IN_COLAB:
    # For Colab, use Google Drive to store the dataset (already mounted)
    DATA_CACHE = "/content/drive/MyDrive/datasets/IRMAS"
else:
    # For local environment, check if dataset exists in home directory
    if home_dataset_path.exists():
        print(f"Found existing dataset at {home_dataset_path}")
        DATA_CACHE = str(home_dataset_path.parent)
    else:
        # Fall back to project directory
        DATA_CACHE = "data/raw"

# Create the directory if it doesn't exist
os.makedirs(DATA_CACHE, exist_ok=True)
# Only download if we don't have the zip file already
zip_path = pathlib.Path(DATA_CACHE) / "IRMAS.zip"
if zip_path.exists():
    print(f"Dataset already exists at {zip_path}, skipping download...")
else:
    print(f"Downloading IRMAS dataset to {DATA_CACHE}...")
    download_irmas_main(pathlib.Path(DATA_CACHE))

# Find the IRMAS dataset root
irmas_root = find_irmas_root()

In [None]:

# Convert the training dataset into multi-label format
from data.mix_labels import create_multilabel_dataset

if irmas_root:
    print("Creating multi-label dataset from IRMAS...")

    # Create both original and mixed datasets
    original_dataset, mixed_dataset = create_multilabel_dataset(
        irmas_root=irmas_root,
        cfg=cfg,
        max_original_samples=50,  # Limit original samples to avoid memory issues
        num_mixtures=100,  # Create 100 synthetic mixtures
        min_instruments=1,  # Allow 1-2 instruments per mixture
        max_instruments=2
    )

    # Optional: You can now save these datasets or use them for training
    if mixed_dataset:
        MIXED_DIR = "/content/IRMAS_mixed" if IN_COLAB else "data/mixed"
        print(f"\nTo save mixed samples for later use, you could write them to: {MIXED_DIR}")

else:
    print("IRMAS root not found. Please run the download cell first.")

In [29]:

if irmas_root:
    print(f"IRMAS dataset found at: {irmas_root}")

    # Define the processing output directory
    PROCESSED_DIR = "/content/IRMAS_features" if IN_COLAB else "data/processed"

    # Check if we have mixed dataset from previous cell
    if 'mixed_dataset' in globals() and mixed_dataset:
        print(f"\nFound {len(mixed_dataset)} mixed samples from previous cell")

        # Save mixed dataset to a temporary directory for preprocessing
        MIXED_TEMP_DIR = "/content/IRMAS_mixed_temp" if IN_COLAB else "data/mixed_temp"

        # Use the preprocessing function that handles mixed data
        from data.preprocess import preprocess_mixed_data

        print(f"Preprocessing original + mixed data to {PROCESSED_DIR}...")
        preprocess_mixed_data(
            irmas_root=irmas_root,
            mixed_dataset=mixed_dataset,
            out_dir=PROCESSED_DIR,
            cfg=cfg
        )

        print(f"✅ Preprocessing complete with mixed labels. Features saved to {PROCESSED_DIR}")

    else:
        print("No mixed dataset found. Running standard preprocessing...")
        print(f"To preprocess the data, you can run:")
        print(f"python data/preprocess.py --in_dir {irmas_root} --out_dir {PROCESSED_DIR}")

        # Run standard preprocessing
        preprocess_cmd = f"!python data/preprocess.py --in_dir {irmas_root} --out_dir {PROCESSED_DIR} --config configs/default.yaml"
        print(f"\nExecuting: {preprocess_cmd}")
        !python data/preprocess.py --in_dir {irmas_root} --out_dir {PROCESSED_DIR} --config configs/default.yaml

else:
    print("Could not locate IRMAS dataset after download. Check paths and try again.")

Downloading IRMAS dataset to data/raw...
Archive already exists, skipping download
Verifying checksum ...
Extracting ...
Done. Data at data/raw
IRMAS dataset found at: data/raw/IRMAS-TrainingData

To preprocess the data, you can run:
python data/preprocess.py --in_dir data/raw/IRMAS-TrainingData --out_dir data/processed

Or execute this command in the next cell:
!python data/preprocess.py --in_dir data/raw/IRMAS-TrainingData --out_dir data/processed


In [None]:
# Verify the train/val/test split after preprocessing

PROCESSED_DIR = "/content/IRMAS_features" if IN_COLAB else "data/processed"


def count_samples_in_dir(dir_path):
    """Count samples in a directory (both original and mixed)."""
    if not pathlib.Path(dir_path).exists():
        return 0, 0

    # Count directories (each represents one sample)
    all_dirs = [d for d in pathlib.Path(dir_path).iterdir() if d.is_dir()]
    mixed_dirs = [d for d in all_dirs if 'mixed_' in d.name]
    original_dirs = [d for d in all_dirs if 'mixed_' not in d.name]

    return len(original_dirs), len(mixed_dirs)


# Check each split
for split in ['train', 'val', 'test']:
    split_dir = f"{PROCESSED_DIR}/{split}"
    original_count, mixed_count = count_samples_in_dir(split_dir)
    total_count = original_count + mixed_count

    print(f"📁 {split.upper()} split:")
    print(f"   Original samples: {original_count}")
    print(f"   Mixed samples: {mixed_count}")
    print(f"   Total: {total_count}")
    print()

print("✅ Data split verification complete!")

In [35]:
# Import required modules for the model
import torch
from var import LABELS
from models.multi_stft_cnn import MultiSTFTCNN

n_classes = len(LABELS)

# Create the model
model = MultiSTFTCNN(
    n_classes=n_classes,  # Number of instrument classes
    n_branches=9,  # 3 FFT sizes × 3 frequency bands
    branch_output_dim=128  # Default value for feature dimension
)

print("9 CNN Baseline Architecture:")
print(model)

# Optional: Print model summary if torchinfo is available
try:
    from torchinfo import summary
    # Create dummy input for the model (9 spectrograms with random dimensions)
    dummy_input = [torch.zeros(1, 1, 20, 30) for _ in range(9)]
    print("\nModel Summary:")
    summary(model, input_data=dummy_input)
except ImportError:
    print("\nInstall torchinfo for detailed model summary: pip install torchinfo")

9 CNN Baseline Architecture:
MultiSTFTCNN(
  (branches): ModuleList(
    (0-8): 9 x STFTBranch(
      (cnn): Sequential(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (6): ReLU()
        (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (10): ReLU()
        (11): AdaptiveAvgPool2d(output_size=(1, 1))
        (12): Flatten(start_dim=1, end_dim=-1)
      )
    )
  )
  (class

In [40]:
# Set the number of samples to use for training
# Set to None to use all samples, or a number (e.g., 50) to limit the samples
max_samples = 1  # Change to a number like 50 to run with limited samples

# Add max_samples to the configuration if it's not None
if max_samples is not None:
    cfg['max_samples'] = max_samples
    print(f"Training with limited samples: {max_samples}")
else:
    print("Training with all available samples")


Training with limited samples: 1


In [None]:
try:
    from training.train import main as train_main
    train_main(cfg)
    print("Training completed!")
except Exception as e:
    print(f"Error with direct import: {e}")
    print("Falling back to shell command")
    # If using shell command, we need to create a temporary config file with max_samples
    if max_samples is not None:
        import tempfile
        import yaml

        # Create a temporary config file with max_samples
        temp_cfg_path = tempfile.mktemp(suffix='.yaml')
        with open(temp_cfg_path, 'w') as temp_cfg:
            yaml.dump(cfg, temp_cfg)

        !python -m training.train --config {temp_cfg_path}

        # Clean up the temporary file
        import os
        os.unlink(temp_cfg_path)
    else:
        !python -m training.train --config {yaml_path}

Error with direct import: expected str, bytes or os.PathLike object, not dict
Falling back to shell command
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name    | Type             | Params
---------------------------------------------
0 | model   | MultiSTFTCNN     | 850 K 
1 | metrics | MetricCollection | 0     
---------------------------------------------
850 K     Trainable params
0         Non-trainable params
850 K     Total params
3.403     Total estimated model params size (MB)
  rank_zero_warn(
Epoch 0: 100%|█| 1/1 [00:01<00:00,  1.01s/it, v_num=2, train/loss=0.738, train/m
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                       | 0/160 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                          | 0/160 [00:00<?, ?it/s][A
Validation DataLoader 0:   1%|                  | 1/160 [00:02<06:30,  2.46s/it][

In [2]:
# Inference and visualization using the test set

# Define paths for checkpoint and config
ckpt_path = f"{workspace}/notebooks/lightning_logs/version_2/checkpoints/epoch=0-val_mAP=0.000.ckpt"
config_path = yaml_path

# Use the processed test data
PROCESSED_DIR = "/content/IRMAS_features" if IN_COLAB else "data/processed"
test_data_dir = f"{PROCESSED_DIR}/test"

print(f"🔍 Looking for test data in: {test_data_dir}")

if pathlib.Path(test_data_dir).exists():
    print(f"✅ Found test data directory")

    # Since we have processed .npy files, we need to find original wav files for inference
    # Let's use the original wav files that were assigned to test set

    # Alternative approach: Use some files from the original dataset for testing
    if 'irmas_root' in globals() and irmas_root:
        # Get a representative sample of test files
        all_wav_files = list(pathlib.Path(irmas_root).rglob("*.wav"))

        # Use the same random seed to get the same test files as preprocessing
        np.random.seed(42)  # Set seed for reproducibility
        np.random.shuffle(all_wav_files)

        # Take the same 10% that would be test files (last 10%)
        val_split = int(len(all_wav_files) * 0.9)
        test_wav_files = all_wav_files[val_split:][:5]  # Limit to 5 for demo

        print(f"📊 Found {len(test_wav_files)} test files from original dataset")

        for i, wav_file in enumerate(test_wav_files):
            wav_path = str(wav_file)
            print(f"\n🎵 Testing file {i + 1}/{len(test_wav_files)}: {pathlib.Path(wav_path).name}")

            # Check if files exist
            if not os.path.exists(ckpt_path):
                print(f"❌ Checkpoint not found: {ckpt_path}")
                continue

            if not os.path.exists(wav_path):
                print(f"❌ Audio file not found: {wav_path}")
                continue

            try:
                # Import necessary modules
                import torch
                from inference.predict import predict
                from models.multi_stft_cnn import MultiSTFTCNN
                from var import LABELS
                from visualization.visualization import visualize_audio

                # Load config
                with open(config_path, 'r') as f:
                    cfg = yaml.safe_load(f)

                # Load model (only once)
                if i == 0:  # Load model only for the first file
                    model = MultiSTFTCNN(n_classes=len(LABELS))
                    state = torch.load(ckpt_path, map_location="cpu")["state_dict"]
                    model.load_state_dict(state)
                    print("✅ Model loaded successfully")

                # Run prediction
                scores = predict(model, wav_path, cfg)

                print("📊 Predicted class probabilities:")
                print("=" * 40)

                # Sort by score for better readability
                sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)

                for label, score in sorted_scores:
                    confidence = "🔥" if score > 0.5 else "🔸" if score > 0.1 else "  "
                    print(f"  {confidence} {label:<15} {score:>.4f}")

                # Show top predictions
                top_predictions = [label for label, score in sorted_scores if score > 0.1]
                if top_predictions:
                    print(f"🎯 Top predictions (>0.1): {', '.join(top_predictions)}")
                else:
                    print(f"🎯 Top prediction: {sorted_scores[0][0]} ({sorted_scores[0][1]:.4f})")

                # Visualize only the first file to avoid clutter
                if i == 0:
                    print("\n📈 Rendering waveform & spectrograms for first test file...")
                    try:
                        visualize_audio(wav_path, cfg)
                        print("✅ Visualization complete")
                    except Exception as viz_error:
                        print(f"⚠️  Visualization failed: {viz_error}")

            except Exception as e:
                print(f"❌ Error during inference: {e}")
                import traceback

                traceback.print_exc()

        print(f"\n🎉 Inference complete on {len(test_wav_files)} test files!")

    else:
        print("❌ Original IRMAS root not found")

else:
    print(f"❌ Test data directory not found: {test_data_dir}")
    print("💡 Make sure you've run the preprocessing step that creates the train/val/test split")

NameError: name 'workspace' is not defined