# Package Installations and Imports

In [6]:
!pip install essentia

Collecting essentia
  Downloading essentia-2.1b6.dev1110-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.7/13.7 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev1110


In [25]:
import essentia
import essentia.standard as es
import librosa

import numpy as np
import pandas as pd
import os
from IPython.display import clear_output
import csv
from google.colab import drive

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score

# **Feature Extraction**

Mounting Drive

In [26]:
drive.mount('/content/drive')
path='/content/drive/MyDrive/3020 Audio'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Feature Extraction Function

In [27]:
def extractFeatures(audioPath):
  audioSignal, sr = librosa.load(audioPath)
  #12x3=36
  chroma=librosa.feature.chroma_stft(y=audioSignal, sr=sr)
  chromafeat = np.concatenate((np.mean(chroma, axis=1), np.std(chroma, axis=1), np.var(chroma, axis=1)))

  #13x3=39
  mfcc = librosa.feature.mfcc(sr=sr, y=audioSignal, n_mfcc=13)
  mfccfeat = np.concatenate((np.mean(mfcc, axis=1), np.std(mfcc, axis=1), np.var(mfcc, axis=1)))

  #3
  rms=librosa.feature.rms(y=audioSignal)
  rmsfeat = np.concatenate((np.mean(rms, axis=1), np.std(rms, axis=1), np.var(rms, axis=1)))

  #3
  zcr=librosa.feature.zero_crossing_rate(audioSignal)
  zcrfeat = np.concatenate((np.mean(zcr, axis=1), np.std(zcr, axis=1), np.var(zcr, axis=1)))

  #1
  onsetEnv = librosa.onset.onset_strength(y=audioSignal, sr=sr)
  tempo = librosa.feature.tempo(onset_envelope=onsetEnv, sr=sr)

  #3
  centroid = librosa.feature.spectral_centroid(y=audioSignal, sr=sr)
  centroidfeat = np.concatenate((np.mean(centroid, axis=1), np.std(centroid, axis=1), np.var(centroid, axis=1)))

  #3
  bandwidth = librosa.feature.spectral_bandwidth(y=audioSignal, sr=sr)
  bandwidthfeat = np.concatenate((np.mean(bandwidth, axis=1), np.std(bandwidth, axis=1), np.var(bandwidth, axis=1)))

  #3
  flatness = librosa.feature.spectral_flatness(y=audioSignal)
  flatnessfeat = np.concatenate((np.mean(flatness, axis=1), np.std(flatness, axis=1), np.var(flatness, axis=1)))

  #3
  rolloff = librosa.feature.spectral_rolloff(y=audioSignal,sr=sr)
  rollofffeat = np.concatenate((np.mean(rolloff, axis=1), np.std(rolloff, axis=1), np.var(rolloff, axis=1)))

  #6x3=18
  tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audioSignal), sr=sr)
  tonnetzfeat = np.concatenate((np.mean(tonnetz, axis=1), np.std(tonnetz, axis=1), np.var(tonnetz, axis=1)))

  #13x3=39
  tempogram = librosa.feature.tempogram(y=audioSignal, sr=sr)
  tgr = librosa.feature.tempogram_ratio(tg=tempogram, sr=sr)
  tgrfeat = np.concatenate((np.mean(tgr, axis=1), np.std(tgr, axis=1), np.var(tgr, axis=1)))


  audioeq = es.EqloudLoader(filename=audioPath, sampleRate=44100)()

  #predominant melody
  pitchextractor = es.PredominantPitchMelodia(frameSize=2048, hopSize=128)
  pitchvalues, pitchconfidence = pitchextractor(audioeq)
  #3
  pitchfeat = np.concatenate((np.mean(np.asarray([pitchvalues]), axis=1), np.std(np.asarray([pitchvalues]), axis=1), np.var(np.asarray([pitchvalues]), axis=1)))

  #AfterMaxToBeforeMaxEnergyRatio
  #1
  AMBMEnergyratio=es.AfterMaxToBeforeMaxEnergyRatio()(pitchvalues)

  audio= es.MonoLoader(filename=audioPath,sampleRate=22050)()

  #3 each except inharmonicity which is #3x3=9
  spectralfeatures = es.LowLevelSpectralExtractor(frameSize=2048, hopSize=512, sampleRate=22050)(audio)
  #pitch_salience
  pitchsaliencefeat = np.concatenate((np.mean(np.asarray([spectralfeatures[8]]), axis=1), np.std(np.asarray([spectralfeatures[8]]), axis=1), np.var(np.asarray([spectralfeatures[8]]), axis=1)))
  #spectral_complexity
  speccomplexfeat = np.concatenate((np.mean(np.asarray([spectralfeatures[12]]), axis=1), np.std(np.asarray([spectralfeatures[12]]), axis=1), np.var(np.asarray([spectralfeatures[12]]), axis=1)))
  #spectral_flux
  specfluxfeat = np.concatenate((np.mean(np.asarray([spectralfeatures[21]]), axis=1), np.std(np.asarray([spectralfeatures[21]]), axis=1), np.var(np.asarray([spectralfeatures[21]]), axis=1)))
  #inharmonicity
  inharmonicityfeat = np.concatenate((np.mean(np.asarray([spectralfeatures[26]]), axis=1), np.std(np.asarray([spectralfeatures[26]]), axis=1), np.var(np.asarray([spectralfeatures[26]]), axis=1)))
  #tristimulus
  tristimulusfeat = np.concatenate((np.mean(spectralfeatures[27], axis=0), np.std(spectralfeatures[27], axis=0), np.var(spectralfeatures[27], axis=0)))
  #Odd-to-Even Harmonic Energy Ratio
  O2Eharmonicfeat = np.concatenate((np.mean(np.asarray([spectralfeatures[28]]), axis=1), np.std(np.asarray([spectralfeatures[28]]), axis=1), np.var(np.asarray([spectralfeatures[28]]), axis=1)))

  key,scale,strength = es.KeyExtractor(frameSize=2048,hopSize=512,sampleRate=22050)(audio)
  key=['Ab','A','Bb','B','C','C#','D','Eb','E','F','F#','G'].index(key)

  if scale=="major":
    scale=1
  else:
    scale =-1

  #1
  tuningfreq = float(np.mean(es.TuningFrequencyExtractor(frameSize=2048,hopSize=512)(audio)))

  #1
  loudness=es.Loudness()(audio)
  #1
  danceability=es.Danceability()(audio)[0]
  #3
  logAttackT=list(es.LogAttackTime(sampleRate=22050)(audio))

  #188 total features
  features = np.concatenate((chromafeat, mfccfeat, rmsfeat, zcrfeat, tempo, \
                              centroidfeat, bandwidthfeat, flatnessfeat, rollofffeat,\
                              tonnetzfeat, tgrfeat,  np.array(logAttackT + [loudness,danceability]),\
                              pitchfeat, pitchsaliencefeat, speccomplexfeat, specfluxfeat, \
                              inharmonicityfeat, tristimulusfeat, O2Eharmonicfeat, np.asarray([AMBMEnergyratio, key, scale, strength, tuningfreq]) ))

  return features


Exporting Features to CSV

In [28]:
def export(features,name):
  path = '/content/'+name+'.csv'
  with open(path, 'w') as f:

      # using csv.writer method from CSV package
      write = csv.writer(f)

      write.writerow(headers)
      write.writerows(features)

Headers

In [29]:
headers=["songID", "chromaC_mean", "chromaC#_mean", "chromaD_mean", "chromaD#_mean", "chromaE_mean", "chromaF_mean", "chromaF#_mean", "chromaG_mean", \
         "chromaG#_mean", "chromaA_mean", "chromaA#_mean", "chromaB_mean", "chromaC_std", "chromaC#_std", "chromaD_std", "chromaD#_std", "chromaE_std", \
         "chromaF_std", "chromaF#_std", "chromaG_std", "chromaG#_std", "chromaA_std", "chromaA#_std", "chromaB_std", "chromaC_var", "chromaC#_var", "chromaD_var", \
         "chromaD#_var", "chromaE_var", "chromaF_var", "chromaF#_var", "chromaG_var", "chromaG#_var", "chromaA_var", "chromaA#_var", "chromaB_var", "mfcc1_mean", \
         "mfcc2_mean", "mfcc3_mean", "mfcc4_mean", "mfcc5_mean", "mfcc6_mean", "mfcc7_mean", "mfcc8_mean", "mfcc9_mean", "mfcc10_mean", "mfcc11_mean", "mfcc12_mean", \
         "mfcc13_mean", "mfcc1_std", "mfcc2_std", "mfcc3_std", "mfcc4_std", "mfcc5_std", "mfcc6_std", "mfcc7_std", "mfcc8_std", "mfcc9_std", "mfcc10_std", "mfcc11_std", \
         "mfcc12_std", "mfcc13_std", "mfcc1_var", "mfcc2_var", "mfcc3_var", "mfcc4_var", "mfcc5_var", "mfcc6_var", "mfcc7_var", "mfcc8_var", "mfcc9_var", "mfcc10_var", "mfcc11_var", \
         "mfcc12_var", "mfcc13_var", "rms_mean", "rms_std", "rms_var", "zcr_mean", "zcr_std", "zcr_var", "tempo", "centroid_mean", "centroid_std", "centroid_var", "bandwidth_mean", \
         "bandwidth_std", "bandwidth_var", "flatness_mean", "flatness_std", "flatness_var", "rolloff_mean", "rolloff_std", "rolloff_var", "tonnetz0_mean", "tonnetz1_mean", \
         "tonnetz2_mean", "tonnetz3_mean", "tonnetz4_mean", "tonnetz5_mean", "tonnetz0_std", "tonnetz1_std", "tonnetz2_std", "tonnetz3_std", "tonnetz4_std", "tonnetz5_std", \
         "tonnetz0_var", "tonnetz1_var", "tonnetz2_var", "tonnetz3_var", "tonnetz4_var", "tonnetz5_var", "tgr0_mean", "tgr1_mean", "tgr2_mean", "tgr3_mean", "tgr4_mean", "tgr5_mean",\
         "tgr6_mean", "tgr7_mean", "tgr8_mean", "tgr9_mean", "tgr10_mean", "tgr11_mean", "tgr12_mean", "tgr0_std", "tgr1_std", "tgr2_std", "tgr3_std", "tgr4_std", "tgr5_std", "tgr6_std",\
         "tgr7_std", "tgr8_std", "tgr9_std", "tgr10_std", "tgr11_std", "tgr12_std", "tgr0_var", "tgr1_var", "tgr2_var", "tgr3_var", "tgr4_var", "tgr5_var", "tgr6_var", "tgr7_var", "tgr8_var", \
         "tgr9_var", "tgr10_var", "tgr11_var", "tgr12_var", "logattacktime", "attackstart", "attackstop", "loudness", "danceability", "predominantpitch_mean", "predominantpitch_std",\
         "predominantpitch_var", "pitchsalience_mean", "pitchsalience_std", "pitchsalience_var", "complexity_mean", "complexity_std", "complexity_var", "flux_mean", "flux_std", "flux_var",\
         "inharmonicity_mean", "inharmonicity_std", "inharmonicity_var", "tristimulus1_mean", "tristimulus2_mean", "tristimulus3_mean", "tristimulus1_std","tristimulus2_std","tristimulus3_std",\
         "tristimulus1_var", "tristimulus2_var", "tristimulus3_var", "oddevenratio_mean", "oddevenratio_std", "oddevenratio_var", "AMBMenergyratio", "key", "scale", "strength", "tuningfrequency",\
         "sentiment"]

Extracting Training Features

In [30]:
trainfeatures=[[],[],[],[]]
for k in range(1,5):
  path='/content/drive/MyDrive/3020 Audio/Train/Q'+str(k)
  i=0
  for filename in os.scandir(path):
      if filename.is_file():
        trainfeatures[k-1].append([filename.name[:-4]] + list(extractFeatures(filename.path)) + [k])
        i+=1
        clear_output()
        print("Train Extraction")
        print("Q"+str(k)+": ",i,"/225")
        done=round(((k-1)*168+i)/6.72,1)
        print(done,"% Finished")


export(trainfeatures[0]+trainfeatures[1]+trainfeatures[2]+trainfeatures[3],"Features_half_train")
export(trainfeatures[0]+trainfeatures[1],"Features_tophalf_train")
export(trainfeatures[2]+trainfeatures[3],"Features_bottomhalf_train")

Train Extraction
Q4:  168 /225
93.7 % Finished


Extracting Testing Features

In [38]:
testfeatures=[[],[],[],[]]
for k in range(1,5):
  path='/content/drive/MyDrive/3020 Audio/Test/Q'+str(k)
  i=0
  for filename in os.scandir(path):
      if filename.is_file():
        testfeatures[k-1].append([filename.name[:-4]] + list(extractFeatures(filename.path)) + [k])
        i+=1
        clear_output()
        print("Test Extraction")
        print("Q"+str(k)+": ",i,"/57")
        done=round(((k-1)*57+i)/2.28,1)
        print(done,"% Finished")


export(testfeatures[0]+testfeatures[1]+testfeatures[2]+testfeatures[3],"Features_half_test")
export(testfeatures[0]+testfeatures[1],"Features_tophalf_test")
export(testfeatures[2]+testfeatures[3],"Features_bottomhalf_test")

Test Extraction
Q4:  57 /225
100.0 % Finished
