In [11]:
import yaml
from nussl.deep import SeparationModel

In [32]:

base_parameters = {
    'dataset_config': { # Configuration of dataset - STFT parameters, etc.
        'n_fft': 256,
        'hop_length': 64,
        'length': 400,
        'output_type': 'psa',
        'cache': '/storage/cache',
        'fraction_of_dataset': 1.0,
        'weight_type': ['magnitude'],
        'weight_threshold': -40,
        'num_channels': 1,
        'source_labels': [],
        'ignore_sources': [],
        'group_sources': [],
        'format': 'rnn',
        'sample_rate': 8000,
        'data_keys_for_training': ['log_spectrogram', 'assignments', 'weights'],
        'use_librosa_stft': False,
        'excerpt_selection_strategy': 'random',
        'overwrite_cache': False
    },
    'model_config': { # Model configuration (deep clustering model here)
        'num_frequencies': 128,
        'num_mels': -1,
        'num_channels': 1,
        'sample_rate': 8000,
        'bidirectional': True,
        'hidden_size': 300,
        'num_layers': 2,
        'embedding_size': 20,
        'dropout': 0.3,
        'embedding_activations': ['sigmoid', 'unit_norm'],
        'projection_trainable': False,
        'rnn_type': 'lstm',
        'instance_norm': True,
        'batch_norm': False,
    },
    'train_config': { # Training configuration (won't need much changing)
        'num_epochs': 10,
        'learning_rate': 2e-4,
        'learning_rate_decay': 0.5,
        'patience': 5,
        'batch_size': 40,
        'num_workers': 20,
        'loss_function': [('dpcl', 'embedding', 1.0)],
        'optimizer': 'adam',
        'curriculum_learning': [],
        'initial_length': 400,
        'weight_decay': 0.0,
        'device': 'cuda',
        'data_parallel': True,
        'training_folder': '',
        'validation_folder': '',
        'sample_strategy': 'sequential',
        'curriculum_learning': [ # Way to do things at different epochs.
            {'num_epoch': 0, 'command': 'set_current_length', 'args': [400]},
        ],
    },
    'dataset_paths': {
        'train_folder': '/storage/data/babywsj8k/generated/train/', # What to train from
        'val_folder': '/storage/data/babywsj8k/generated/val/', # What to validate on
        'test_folder': '/storage/data/babywsj8k/generated/test/', # What to evaluate on
    },
    'info': {
        'project_name': 'Tutorial', # What is the comet.ml project this experiment belongs to?
        'worksheet_name': 'Tutorial', # What worksheet within the Google sheet should I save this to?
        'sheet_name': 'Toy experimental results', # What Google sheet should I put this in?
        'num_gpus': 1, # How many GPUs to use?
        'cache_populated': True,
        'test_dataset_type': 'SCAPER', # Dataset type for testing
        'train': True,
        'test': True,
        'blocking': False,
        'num_test_workers': 25,
    },
    'dataset_type': 'SCAPER', # Dataset type for training
    'val_dataset_type': 'SCAPER', # Dataset type for validation
    'algorithm_config': {
        'name': 'DeepClustering',
        'params': {
            'mask_type': 'soft', 
            'clustering_options': {
                'posterior_alpha': 5.0,
            },
            'percentile': 99,
            'enhancement_amount': 0.0,
            'num_sources': 2
        }
    }
}


In [31]:
with open('../experiments/data.yml', 'r') as f:
    d = yaml.load(f, Loader=yaml.FullLoader)

FileNotFoundError: [Errno 2] No such file or directory: '../experiments/data.yml'

In [29]:
d

{'mixture_parameters': {'train': {'num_mixtures': 1200,
   'foreground_path': 'babywsj8k/dev',
   'background_path': 'None',
   'target_path': 'babywsj8k/generated/train',
   'scene_duration': 5,
   'num_sources': 2},
  'val': {'num_mixtures': 120,
   'foreground_path': 'babywsj8k/dev',
   'background_path': 'None',
   'target_path': 'babywsj8k/generated/val',
   'scene_duration': 5,
   'num_sources': 2},
  'test': {'num_mixtures': 120,
   'foreground_path': 'babywsj8k/dev',
   'background_path': 'None',
   'target_path': 'babywsj8k/generated/test',
   'scene_duration': 5,
   'num_sources': 2}},
 'event_parameters': {'label': ('choose', []),
  'source_file': ('choose', []),
  'source_time': ('const', 0),
  'event_time': ('const', 0),
  'event_duration': ('const', 5),
  'snr': ('uniform', -2.5, 2.5),
  'pitch_shift': None,
  'time_stretch': None},
 'sample_rate': 8000,
 'ref_db': -40,
 'bitdepth': 16,
 'seed': 0}

In [36]:
from nussl.deep.config.builders import build_dpcl_config

with open('../experiments/test.yml', 'w') as f:
    o = yaml.dump(build_dpcl_config(base_parameters['model_config']), Dumper=yaml.Dumper)
print(o)

connections:
- !!python/tuple
  - mel_projection
  - - log_spectrogram
- !!python/tuple
  - instance_norm
  - - mel_projection
- !!python/tuple
  - recurrent_stack
  - - instance_norm
- !!python/tuple
  - embedding
  - - recurrent_stack
modules:
  batch_norm:
    args:
      use_batch_norm: false
    class: BatchNorm
  embedding:
    args:
      activation:
      - sigmoid
      - unit_norm
      embedding_size: 20
      hidden_size: 600
      num_channels: 1
      num_features: 129
    class: Embedding
  instance_norm:
    args:
      use_instance_norm: true
    class: InstanceNorm
  log_spectrogram:
    input_shape: !!python/tuple
    - -1
    - -1
    - 129
  mel_projection:
    args:
      clamp: false
      direction: forward
      num_frequencies: 129
      num_mels: -1
      sample_rate: 8000
      trainable: false
    class: MelProjection
  recurrent_stack:
    args:
      bidirectional: true
      dropout: 0.3
      hidden_size: 300
      num_features: 129
      num_layers: 2


In [24]:
from nussl.separation import all_separation_algorithms