In [1]:
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
import os

import IPython.display as ipd

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import librosa
import librosa.display
import ast
from PIL import Image
import pathlib
import csv


plt.rcParams['figure.figsize'] = (17, 5)

### Reading in Genres to get names

In [4]:
genres = pd.read_csv('../data/fma_metadata/genrelist.csv')

In [5]:
genres.head()

Unnamed: 0.1,Unnamed: 0,genre,testtrainval
0,,,
1,track_id,,
2,2,Hip-Hop,training
3,5,Hip-Hop,training
4,10,Pop,training


In [6]:
genres.rename({'Unnamed: 0':'track_id'},axis=1,inplace=True)

In [7]:
genres.drop(['testtrainval'],axis=1,inplace=True)

In [8]:
genres.drop(index=[0,1],inplace = True)

In [9]:
genres.head()

Unnamed: 0,track_id,genre
2,2,Hip-Hop
3,5,Hip-Hop
4,10,Pop
5,140,Folk
6,141,Folk


Checking Value Counts

In [10]:
genres['genre'].value_counts()

Hip-Hop          1000
Pop              1000
Electronic       1000
Folk             1000
International    1000
Experimental     1000
Rock             1000
Instrumental     1000
Name: genre, dtype: int64

### Creating trackid name with mp3

In [11]:
idlist = []
for i in genres['track_id']:
    if len(i) ==1:
        idlist.append(f"00000{i}.mp3")
    elif len(i) ==2:
        idlist.append(f"0000{i}.mp3")
    elif len(i) ==3:
        idlist.append(f"000{i}.mp3")
    elif len(i) ==4:
        idlist.append(f"00{i}.mp3")
    elif len(i) ==5:
        idlist.append(f"0{i}.mp3")
    else:
        idlist.append(f"{i}.mp3")

In [12]:
idlist

['000002.mp3',
 '000005.mp3',
 '000010.mp3',
 '000140.mp3',
 '000141.mp3',
 '000148.mp3',
 '000182.mp3',
 '000190.mp3',
 '000193.mp3',
 '000194.mp3',
 '000197.mp3',
 '000200.mp3',
 '000203.mp3',
 '000204.mp3',
 '000207.mp3',
 '000210.mp3',
 '000211.mp3',
 '000212.mp3',
 '000213.mp3',
 '000255.mp3',
 '000256.mp3',
 '000368.mp3',
 '000424.mp3',
 '000459.mp3',
 '000534.mp3',
 '000540.mp3',
 '000546.mp3',
 '000574.mp3',
 '000602.mp3',
 '000615.mp3',
 '000620.mp3',
 '000621.mp3',
 '000625.mp3',
 '000666.mp3',
 '000667.mp3',
 '000676.mp3',
 '000690.mp3',
 '000694.mp3',
 '000695.mp3',
 '000704.mp3',
 '000705.mp3',
 '000706.mp3',
 '000707.mp3',
 '000708.mp3',
 '000709.mp3',
 '000714.mp3',
 '000715.mp3',
 '000716.mp3',
 '000718.mp3',
 '000777.mp3',
 '000814.mp3',
 '000821.mp3',
 '000822.mp3',
 '000825.mp3',
 '000853.mp3',
 '000890.mp3',
 '000892.mp3',
 '000897.mp3',
 '000993.mp3',
 '000995.mp3',
 '000997.mp3',
 '000998.mp3',
 '001039.mp3',
 '001040.mp3',
 '001066.mp3',
 '001069.mp3',
 '001073.m

In [13]:
genres['newid'] = idlist

### Deleting files which were corrupted

In [15]:
Rock = genres.loc[genres['genre'] =='Rock']

In [16]:
Rock.loc[Rock['track_id']=='108925']

Unnamed: 0,track_id,genre,newid
4905,108925,Rock,108925.mp3


In [17]:
Rock.drop(index=4905,inplace=True)

In [18]:
genres.loc[genres['track_id']=='108925']

Unnamed: 0,track_id,genre,newid
4905,108925,Rock,108925.mp3


In [19]:
genres.drop(index=4905,inplace=True)

In [20]:
genres.loc[genres['track_id']=='133297']

Unnamed: 0,track_id,genre,newid
6967,133297,Experimental,133297.mp3


In [21]:
genres.drop(index=6967,inplace=True)

In [22]:
genres.loc[genres['newid']=='099134.mp3']

Unnamed: 0,track_id,genre,newid
4472,99134,Electronic,099134.mp3


In [23]:
genres.drop(index=4472,inplace=True)

In [24]:
Pop = genres.loc[genres['genre'] =='Pop']
Instrumental = genres.loc[genres['genre'] =='Instrumental']
Electronic = genres.loc[genres['genre'] =='Electronic']
Folk = genres.loc[genres['genre'] =='Folk']
Experimental = genres.loc[genres['genre'] =='Experimental']
Hip_Hop = genres.loc[genres['genre'] =='Hip-Hop']
International = genres.loc[genres['genre'] =='International']

In [25]:
genrelist = list(genres['genre'].unique())

In [26]:
genrelist

['Hip-Hop',
 'Pop',
 'Folk',
 'Experimental',
 'Rock',
 'International',
 'Electronic',
 'Instrumental']

### Creating Spectogram image for each audio file and adding to folder

In [142]:
cmap = plt.get_cmap('inferno')
data_dir = 'data/music/'
for g in genrelist:
    df = genres.loc[genres['genre'] ==g]
    for i in df['newid']:
        full_path = f'{data_dir}{i}'
        y, sr = librosa.load(full_path, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'data/genres/{g}/{i.split(".mp3")[0]}.png')
        plt.clf()


<Figure size 432x288 with 0 Axes>

In [29]:
train_folder = os.path.join(new_dir, 'train')
test_folder = os.path.join(new_dir, 'test')
val_folder = os.path.join(new_dir, 'val')


NameError: name 'new_dir' is not defined

### Moving Rock files

In [159]:
os.mkdir(train_folder)
os.mkdir(test_folder)
os.mkdir(val_folder)

In [163]:
train_folder = os.path.join(new_dir, 'train')
train = os.path.join(train_folder, 'Rock')

test_folder = os.path.join(new_dir, 'test')
test_Rock = os.path.join(test_folder, 'Rock')

val_folder = os.path.join(new_dir, 'validation')
val_Rock = os.path.join(val_folder, 'Rock')

In [165]:
imgs = imgs_Rock[:800]
for img in imgs:
    origin = os.path.join(data_Rock_dir, img)
    destination = os.path.join('data/genres/Rock/train', img)
    shutil.copyfile(origin, destination)

In [166]:
imgs = imgs_Rock[800:900]
for img in imgs:
    origin = os.path.join(data_Rock_dir, img)
    destination = os.path.join('data/genres/Rock/test', img)
    shutil.copyfile(origin, destination)

In [167]:
imgs = imgs_Rock[900:]
for img in imgs:
    origin = os.path.join(data_Rock_dir, img)
    destination = os.path.join('data/genres/Rock/val', img)
    shutil.copyfile(origin, destination)

In [31]:
genrelist.remove('Rock')

In [31]:
genrelist

['Hip-Hop',
 'Pop',
 'Folk',
 'Experimental',
 'Rock',
 'International',
 'Electronic',
 'Instrumental']

### Splitting genres into train test split 

In [35]:
for genre in genrelist:
    direc = f'data/genres_mel_spec/{genre}'
    images = [file for file in os.listdir(direc) if file.endswith('.png')]
    imgs = images[:800]
    for img in imgs:
        origin = os.path.join(direc, img)
        destination = os.path.join(f'data/genres_mel_spec/{genre}/train', img)
        shutil.copyfile(origin, destination)
    imgs = images[800:900]
    for img in imgs:
        origin = os.path.join(direc, img)
        destination = os.path.join(f'data/genres_mel_spec/{genre}/test', img)
        shutil.copyfile(origin, destination)
    imgs = images[900:]
    for img in imgs:
        origin = os.path.join(direc, img)
        destination = os.path.join(f'data/genres_mel_spec/{genre}/val', img)
        shutil.copyfile(origin, destination)

FileNotFoundError: [Errno 2] No such file or directory: 'data/genres_mel_spec/Hip-Hop/train/064629.png'