In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
from scipy.io import wavfile
from tempfile import mktemp
from pydub import AudioSegment

import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import gc

import warnings
warnings.filterwarnings('ignore')

In [2]:
all_songs_dpath = '../data/wav_files/'
genres = os.listdir(all_songs_dpath)
genre_dirs = [all_songs_dpath+d + '/' for d in os.listdir(all_songs_dpath)]
genre_song_dict = {}
for genre, genre_path in zip(genres,genre_dirs):
    genre_song_dict[genre] = [genre_path+song for song in os.listdir(genre_path) ]
{k:len(v) for k,v in genre_song_dict.items()}

{'acoustic': 82,
 'alternative': 93,
 'blues': 88,
 'classical': 74,
 'country': 84,
 'dance': 92,
 'Electronic': 94,
 'hardcore': 50,
 'hip-hop': 76,
 'indie': 86,
 'jazz': 71,
 'Metal': 90,
 'punk': 91,
 'rap': 84,
 'reggae': 85,
 'rnb': 95,
 'rock': 93}

In [20]:
def generate_mels(song):
    corr = (48000/song['sample_rate'])
    n_param = int(1000/corr)
    if song['sample_rate'] ==44100:
        song['raw_data'] = song['raw_data'][:len(song['raw_data'])-1000]
    S = librosa.feature.melspectrogram(y=song['raw_data'], sr= song['sample_rate'], n_fft=n_param,n_mels=128, 
                                       win_length=n_param,hop_length=n_param,fmax = 20000)
    S_dB = librosa.power_to_db(S, ref=np.max)
    np.save(song['file_path'],S_dB)

In [21]:
def generate_dict(file, op_path = '../data/song_mels/'):
    
    sound = AudioSegment.from_file(file)
    wname = mktemp('.wav')
    sound.export(wname, format="wav") 
    FS, data = wavfile.read(wname)
    
    genre = file.split('/')[3]
    artist = file.split('/')[-1].split('.wav')[0].split('_')[0]
    song = file.split('/')[-1].split('.wav')[0].split('_')[1]
    single_chan_data = np.array(data,dtype = np.float32)
    if len(data.shape) == 2:
        single_chan_data = np.array(data[:,0],dtype = np.float32)

    song_dict = []
    for i in range(int((len(data)/FS)//60)):
        os.makedirs('{0}{1}/'.format(op_path,genre), exist_ok=True)
        fn = '{0}{1}/{2}_{3}_part{4}'.format(op_path, genre,artist,song, str(i+1))
        song_part_data = single_chan_data[60*i*FS:(i+1)*60*FS]
        part_dict = {'genre':genre,'sample_rate':FS, 'artist':artist, 'length':len(single_chan_data),
                     'song_name':song, 'raw_data':song_part_data, 'part':i+1,'file_path': fn}
        generate_mels(part_dict)

        song_chars = {i:part_dict[i] for i in part_dict if i!='raw_data'}
        song_dict.append(song_chars)
    os.remove(wname)
    return song_dict 

In [11]:
all_songs_dict = []
for genre in genre_song_dict.keys():
    for file in genre_song_dict[genre]:
        all_songs_dict.extend(generate_dict(file))

In [12]:
df_songs = pd.DataFrame(all_songs_dict)
df_songs.to_csv('../data/all_songs.csv', index = False)
df_songs.sample(5)

Unnamed: 0,genre,sample_rate,artist,length,song_name,part,file_path
5267,rock,44100,Linkin Park,9147392,What I've Done,2,../data/song_mels/rock/Linkin Park_What I've D...
1888,dance,44100,Lady Gaga,25157632,Telephone (feat. Beyoncé),6,../data/song_mels/dance/Lady Gaga_Telephone (f...
2949,indie,48000,Juzhin,11749669,Railways,3,../data/song_mels/indie/Juzhin_Railways_part3
3163,jazz,44100,Amy Winehouse,7418880,Love Is a Losing Game,1,../data/song_mels/jazz/Amy Winehouse_Love Is a...
4019,punk,44100,Sex Pistols,8147968,Bodies,2,../data/song_mels/punk/Sex Pistols_Bodies_part2


## Next Step: Use the generated mels in the 