## Imports

In [None]:
import commons
import os
import shutil
import platform
import sys
import threading
import io

import matplotlib

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image

matplotlib.use('TkAgg')

import librosa
import librosa.display


import numpy as np
import pandas as pd

In [None]:
np.random.seed(41)

## Constants

In [None]:
#print(os.getcwd())
PROJECT_ROOT = os.getcwd()
MOVED_LIST = list()
ARRANGED_LIST = list()
DPI = 100
TARGET = 'genre'
SAMPLE_RATE = 12000

## Helper Functions

In [None]:
def MatplotlibClearMemory():
    usedbackend = matplotlib.get_backend()
    matplotlib.use('TkAgg')
    allfignums = matplotlib.pyplot.get_fignums()
    for i in allfignums:
        fig = matplotlib.pyplot.figure(i)
        fig.clear()
        matplotlib.pyplot.close( fig )
    matplotlib.use(usedbackend)

In [None]:
def save_plot(p, fig, target_filedir):
    fig.savefig(target_filedir, 
                format='png',
                bbox_inches='tight', 
                pad_inches=0.0,
                transparent=True, dpi=DPI)
    plt.close(fig)
    MatplotlibClearMemory()

In [None]:
track_info = None
if TARGET == 'genre':
    track_info = 'autotagging_genre.tsv'
if TARGET == 'moodtheme':
    track_info = 'autotagging_moodtheme.tsv'
    

## Extract mel-spectrogram images from npy

In [None]:
def get_spectograms_from_folder():
    autotagging_file = os.path.join(PROJECT_ROOT, track_info)
    tracks, tags, extra = commons.read_file(autotagging_file)
    head = list(tags.keys())[0]
    classes = tags[head].keys()

    for cls in classes:
        new_directory = os.path.join(PROJECT_ROOT, 'spectrograms_png_'+TARGET, cls)
        if not os.path.exists(new_directory):
            os.makedirs(new_directory)
    
    for i in range(0, 100, 1):
        index = str(i).zfill(2)
        current_directory = os.path.join(PROJECT_ROOT, index)
        print('CURRENT DIR:{0}'.format(current_directory), flush=True, end='\r')
        files = os.listdir(current_directory)
        #print(files)
        for file in files:
            try:
                filename = file.split('.')[0]
                cls = str(sorted(list(tracks[int(filename)][TARGET]))[0])
                #genres = list(tracks[int(filename)][TARGET])
                current_filedir = os.path.join(current_directory, filename+'.npy')
                target_filedir = os.path.join(PROJECT_ROOT, 
                                                'spectrograms_png_'+TARGET, 
                                                split, 
                                                cls,
                                                filename+'.png')
                if os.path.isfile(os.path.join(target_filedir)):
                    print('{0} already in {1}'.format(filename, cls), flush=True, end='\r')
                else:
                    data = np.load(current_filedir)
                    fig = plt.figure(frameon=False, dpi=DPI)
                    p = librosa.display.specshow(data, 
                                                    cmap=None,
                                                    sr=12000, 
                                                    hop_length=256,
                                                    win_length=512,
                                                    fmin=0,
                                                    fmax=None,
                                                    htk=False
                                                )
                    save_plot(p, fig, target_filedir)
                    #print('Moved', filename, 'to', cls)
                    print('Folder {0} | Moved {1} to {2}'.format(index, str(filename), cls), flush=True, end='\r')
            except:
                print('Error at {0}'.format(filename), flush=True, end='\r')
                
                    

In [None]:
get_spectograms_from_folder()

## Check if any generated images are corrupt

In [None]:
folder_path = os.path.join(PROJECT_ROOT, 'spectrograms_png_'+TARGET, 'train')
extensions = []
for fldr in os.listdir(folder_path):
    sub_folder_path = os.path.join(folder_path, fldr)
    for filee in os.listdir(sub_folder_path):
        file_path = os.path.join(sub_folder_path, filee)
        print('** Path: {}  **'.format(file_path), end="\r", flush=True)
        im = Image.open(file_path)
        rgb_im = im.convert('RGB')
        if filee.split('.')[1] not in extensions:
            extensions.append(filee.split('.')[1]) 

## Create train-test-validation split 60-20-20

In [None]:
def create_validtest(split=0.20):
    train_directory = os.path.join(PROJECT_ROOT, 'spectrograms_png_'+TARGET, 'train')
    valid_directory = os.path.join(PROJECT_ROOT, 'spectrograms_png_'+TARGET, 'valid')
    test_directory = os.path.join(PROJECT_ROOT, 'spectrograms_png_'+TARGET, 'test')
    n_classes = len(os.listdir(train_directory))
    print(train_directory)
    print('TRAIN DIR:', train_directory)
    print('VALID DIR:', valid_directory)
    print('TEST DIR:', test_directory)
    print('TOTAL NUMBER OF CLASSES:', n_classes)
    for cls in classes:
        if not os.path.exists(os.path.join(valid_directory, cls)):
            os.makedirs(os.path.join(valid_directory, cls))
        if not os.path.exists(os.path.join(test_directory, cls)):
            os.makedirs(os.path.join(test_directory, cls))
    # Get random train samples from each class
    for cls in classes:
        os.chdir(os.path.join(train_directory, cls))
        file_list = os.listdir()
        n_instances = len(os.listdir(os.path.join(train_directory, cls)))
        print('INSTANCES FOR ', cls, ':', n_instances,'\n')
        n_samples = int((split * n_instances) * 2)
        print('GRABBING',n_samples,'SAMPLES AT RANDOM')
        samples = np.random.choice(n_instances, n_samples, replace=False)
        valid_samples = samples[:int(len(samples)/2)]
        test_samples = samples[int(len(samples)/2):]
        print('VALID SAMPLES:', valid_samples)
        print('TEST SAMPLES:', test_samples)
        remaining_train_samples = n_samples - len(samples)
        try:
            for sample in valid_samples:
                if sample in test_samples:
                    print('repeat!')
                if file_list[sample] in MOVED_LIST:
                    print(file_list[sample], 'already in validation')
                    continue
                #print('FILENAME - VALID:', file_list[sample])
                shutil.move(os.path.join(train_directory, cls, file_list[sample]), 
                                os.path.join(valid_directory, cls, file_list[sample]))
                MOVED_LIST.append(file_list[sample])
            for sample in test_samples:
                if sample in valid_samples:
                    print('repeat!')
                if file_list[sample] in MOVED_LIST:
                    print(file_list[sample], 'already in test')
        #print('FILENAME - TEST:', file_list[sample])
                shutil.move(os.path.join(train_directory, cls, file_list[sample]), 
                                os.path.join(test_directory, cls, file_list[sample]))
                MOVED_LIST.append(file_list[sample])
        except:
            print('???')
        
    

In [None]:
create_validtest()

### Convert spectrogram back to audio

In [None]:
from scipy.io import wavfile

In [None]:
def spec_to_audio(file_dir):
    track_number = int(file_dir.split('/')[-1].split('.')[0])
    #print(tracks[track_number])
    spec_file = np.load(file_dir)
    print(TARGET+':', list(tracks[track_number][TARGET])[0])
    print('Duration:', tracks[track_number]['duration'])
    
    S = librosa.feature.inverse.mel_to_audio(spec_file, 
                                             sr=12000, 
                                             hop_length=256,
                                             win_length=512,
                                             fmin=0,
                                             fmax=None,
                                             htk=False,
                                             center=True,
                                             )
    #y = librosa.griffinlim(S)
    wav_dir = os.path.join(PROJECT_ROOT, str(track_number) + '.wav')
    #save_dir = '/'.join(file_dir.split('/')[:-1])
    print(wav_dir)
    wavfile.write(wav_dir, 12000, S)
    if os.path.isfile(wav_dir):
        return True
    return False
    