# Feature Generation for LSTM notebook: Progressive Rock and Others genres music classification  


## Importing Libraries.

In [0]:
# feature extractoring and preprocessing data
import librosa
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')
from google.colab import drive
drive.mount('/content/gdrive')

import math


      

Using TensorFlow backend.


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## User Parameters

**hop_length**: number of audio samples between successive frames  
**songsPath**: Folder location where all the songs are present  
**folders**:  Name of the folders in path which contains differnet genres of songs.  
**savePath**: Location where the extracted feature need to be saved  
**XFeatureFileName**: File name for the generated Input features  
**YFeatureFileName**: File name for the Labels corresponding to inputs

In [0]:
hop_length = 1024
songsPath = f'/content/gdrive/My Drive/ML test/Test Set/'
folders = 'Prog;Non-Prog'.split(';')
savePath = f'/content/gdrive/My Drive/My test/'
XFeatureFileName = "X_LSTM_Test.npy"
YFeatureFileName = "Y_LSTM_Test.npy"

## Extracting features for train set


We will extract

* Mel-frequency cepstral coefficients (MFCC)(13 in number)

Single song is split into multiple pieces of consecutive 200 time samples corresponding to top 13 mfcc feature. Number of pieces of songs is dependent on hop_length and sampling frequency. The label for each genrated piece is same as the original song. The generated input feature is a 3-D matrix of number_of_pieces_from_all_songs X 200 X 13

Extracted features are saved to location provided by user

In [0]:
list_of_audiofiles = []
label = []
timeseries_length_list = []
for g in folders:
  for filename in os.listdir(songsPath + g):
    songname = songsPath + g + '/' + filename
    list_of_audiofiles.append(songname)
    folderName = g.replace(" ","")
    if(folderName != "Prog"):
      label.append(0)  # Non Prog Rock
    else:
      label.append(1)  # Prog Rock
print(len(list_of_audiofiles))      


201


In [0]:
def extract_training_audio_features(list_of_audiofiles):
        timeseries_length = 200 
        data = []
        target = []
        count = 0 
        for i, file in enumerate(list_of_audiofiles):
            print(i , file)
            y, sr = librosa.load(file)
            
            mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
            print(mfcc.T.shape)
            for samplesInt in range(0, mfcc.T.shape[0], timeseries_length):
              if samplesInt + timeseries_length < mfcc.T.shape[0]:
                data.append(mfcc.T[samplesInt:samplesInt+timeseries_length, :])
                count += 1
                target.append(label[i])
        X = np.asarray(data)
        return X, np.expand_dims(np.asarray(target), axis=1)

X,Y = extract_training_audio_features(list_of_audiofiles)
# Uncomment below lines of code to save feature set.
# with open( savePath + XFeatureFileName, 'wb') as f:
#   np.save(f, X)
# with open( savePath + YFeatureFileName, 'wb') as f:
#   np.save(f, Y)

0 /content/gdrive/My Drive/ML test/Test Set/Non-Prog/The Legend Of Ashitaka Theme.mp3
(6492, 13)
(32, 200, 13) (32, 1)


## Extracting features for test  set


We will extract

* Mel-frequency cepstral coefficients (MFCC)(13 in number)

Single song is split into multiple pieces of consecutive 200 time samples corresponding to top 13 mfcc feature. Number of pieces of songs is dependent on hop_length and sampling frequency. The label for each genrated piece is same as the original song. The generated input feature is a 4-D matrix of number_of_song X number_of_pieces_for_each_song X 200 X 13

Extracted features are saved to location provided by user

In [0]:
def extract_test_audio_features(list_of_audiofiles):
        timeseries_length = 200 
        data = []
        target = []
        count = 0 
        for i, file in enumerate(list_of_audiofiles):
            print(i , file)
            y, sr = librosa.load(file)            
            songData = []
            mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
            print(mfcc.T.shape)
            for samplesInt in range(0, mfcc.T.shape[0], timeseries_length):
              if samplesInt + timeseries_length < mfcc.T.shape[0]:
                songData.append(mfcc.T[samplesInt:samplesInt+timeseries_length, :])
                count += 1
                target.append(label[i])            
            data.append(songData)
        X = np.asarray(data)
        return X, np.expand_dims(np.asarray(target), axis=1)

X,Y = extract_test_audio_features(list_of_audiofiles)
with open( savePath + XFeatureFileName, 'wb') as f:
  np.save(f, X)
with open( savePath + YFeatureFileName, 'wb') as f:
  np.save(f, Y)

0 /content/gdrive/My Drive/ML test/Test Set/Prog/-04- Knots.mp3
(5405, 13)
1 /content/gdrive/My Drive/ML test/Test Set/Prog/01 - 2112.mp3
(26561, 13)
2 /content/gdrive/My Drive/ML test/Test Set/Prog/01 - Assault & Battery Part I.mp3
(7253, 13)
3 /content/gdrive/My Drive/ML test/Test Set/Prog/01 - Beyond the Threshold, Pt. I (Helios Awakens).mp3
(8187, 13)
4 /content/gdrive/My Drive/ML test/Test Set/Prog/01 - Songs From The Wood (2003 Digital Remaster).mp3
(6365, 13)
5 /content/gdrive/My Drive/ML test/Test Set/Prog/01 - Stopa  1.mp3
(2514, 13)
6 /content/gdrive/My Drive/ML test/Test Set/Prog/01 - Untouchable, Part 1.mp3
(8002, 13)
7 /content/gdrive/My Drive/ML test/Test Set/Prog/01 Meeting of the Spirits.mp3
(8861, 13)
8 /content/gdrive/My Drive/ML test/Test Set/Prog/01 Peaches En Regalia.mp3
(4711, 13)
9 /content/gdrive/My Drive/ML test/Test Set/Prog/01-Opening Move.mp3
(12528, 13)
10 /content/gdrive/My Drive/ML test/Test Set/Prog/01. Diana.mp3
(5914, 13)
11 /content/gdrive/My Drive/ML