# NSynth Data Exploration

### Imports

In [1]:
import zipfile
import os
import json
import random
import itertools as it
import random

import numpy as np
import pandas as pd
import librosa as lb
import librosa.display
import matplotlib.pyplot as plt
import IPython.display as ipd
import os

In [2]:
# Play a sample sound
file = 'datasets/nsynth-train/audio/bass_acoustic_000-024-025.wav'
ipd.Audio(file)

### Extract metadata

In [3]:
json_metadata = open('datasets/nsynth-train/examples.json').read()
metadata = json.loads(json_metadata)

In [4]:
# List some example data
list(metadata.items())[:2]

[('guitar_acoustic_001-082-050',
  {'note': 16629,
   'sample_rate': 16000,
   'pitch': 82,
   'instrument_source': 0,
   'instrument_family_str': 'guitar',
   'instrument_str': 'guitar_acoustic_001',
   'note_str': 'guitar_acoustic_001-082-050',
   'qualities_str': ['percussive'],
   'instrument_source_str': 'acoustic',
   'velocity': 50,
   'instrument_family': 3,
   'instrument': 39,
   'qualities': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0]}),
 ('bass_synthetic_120-108-050',
  {'note': 168243,
   'sample_rate': 16000,
   'pitch': 108,
   'instrument_source': 2,
   'instrument_family_str': 'bass',
   'instrument_str': 'bass_synthetic_120',
   'note_str': 'bass_synthetic_120-108-050',
   'qualities_str': ['percussive'],
   'instrument_source_str': 'synthetic',
   'velocity': 50,
   'instrument_family': 0,
   'instrument': 881,
   'qualities': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0]})]

## Generate query function for intrument and quality

In [5]:
def query_metadata(instrument=None, quality=None):
    if instrument is None or quality is None:
        print('Please specify both the desired instrument and quality')
        return
    return [i for i in metadata.keys() 
             if metadata[i]['instrument_family_str'] == instrument
             and quality in metadata[i]['qualities_str']]
    pass

In [6]:
query_metadata()

Please specify both the desired instrument and quality


In [7]:
# Check output
res = query_metadata('guitar', 'bright')
print(res[0], '\n')
print(metadata[res[0]])

guitar_electronic_034-012-100 

{'note': 201935, 'sample_rate': 16000, 'pitch': 12, 'instrument_source': 1, 'instrument_family_str': 'guitar', 'instrument_str': 'guitar_electronic_034', 'note_str': 'guitar_electronic_034-012-100', 'qualities_str': ['bright', 'distortion'], 'instrument_source_str': 'electronic', 'velocity': 100, 'instrument_family': 3, 'instrument': 559, 'qualities': [1, 0, 1, 0, 0, 0, 0, 0, 0, 0]}


In [8]:
def play_sample(instrument=None, quality=None):
    if instrument is None or quality is None:
        print('Please specify both the desired instrument and quality')
        return
    audio_file_names = query_metadata(instrument, quality)
    if audio_file_names is None:
        print(f'No sounds found for a {quality} {instrument}')
        return
    print(random.choice(audio_file_names))
    return ipd.Audio('datasets/nsynth-train/audio/'+random.choice(audio_file_names)+'.wav')

In [10]:
# WARNING - might be loud lol
play_sample('guitar', 'bright')

guitar_acoustic_034-100-127
