#Package Installation and Imports

In [1]:
!pip install essentia

Collecting essentia
  Downloading essentia-2.1b6.dev1110-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.7/13.7 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev1110


In [2]:
!pip install scikit-learn==1.3.1

Collecting scikit-learn==1.3.1
  Downloading scikit_learn-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
Successfully installed scikit-learn-1.3.1


In [3]:
import essentia
import essentia.standard as es
import essentia.streaming
import librosa
import json
import soundfile as sf

import numpy as np
import pandas as pd
import csv
from google.colab import drive
import joblib
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score, _scorer

# Music Sentiment Analysis

In [24]:
from google.colab import files
uploaded = files.upload()
filename = next(iter(uploaded))

Saving MT0004988249.mp3 to MT0004988249.mp3


In [5]:
def extractFeatures(audioSignal,sr):
  chroma=librosa.feature.chroma_stft(y=audioSignal, sr=sr)
  chromafeat = np.concatenate((np.mean(chroma, axis=1), np.std(chroma, axis=1), np.var(chroma, axis=1)))

  mfcc = librosa.feature.mfcc(sr=sr, y=audioSignal, n_mfcc=13)
  mfccfeat = np.concatenate((np.mean(mfcc, axis=1), np.std(mfcc, axis=1), np.var(mfcc, axis=1)))

  rms=librosa.feature.rms(y=audioSignal)
  rmsfeat = np.concatenate((np.mean(rms, axis=1), np.std(rms, axis=1), np.var(rms, axis=1)))

  zcr=librosa.feature.zero_crossing_rate(audioSignal)
  zcrfeat = np.concatenate((np.mean(zcr, axis=1), np.std(zcr, axis=1), np.var(zcr, axis=1)))

  onsetEnv = librosa.onset.onset_strength(y=audioSignal, sr=sr)
  tempo = librosa.feature.tempo(onset_envelope=onsetEnv, sr=sr)

  centroid = librosa.feature.spectral_centroid(y=audioSignal, sr=sr)
  centroidfeat = np.concatenate((np.mean(centroid, axis=1), np.std(centroid, axis=1), np.var(centroid, axis=1)))

  bandwidth = librosa.feature.spectral_bandwidth(y=audioSignal, sr=sr)
  bandwidthfeat = np.concatenate((np.mean(bandwidth, axis=1), np.std(bandwidth, axis=1), np.var(bandwidth, axis=1)))

  flatness = librosa.feature.spectral_flatness(y=audioSignal)
  flatnessfeat = np.concatenate((np.mean(flatness, axis=1), np.std(flatness, axis=1), np.var(flatness, axis=1)))

  rolloff = librosa.feature.spectral_rolloff(y=audioSignal,sr=sr)
  rollofffeat = np.concatenate((np.mean(rolloff, axis=1), np.std(rolloff, axis=1), np.var(rolloff, axis=1)))

  tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audioSignal), sr=sr)
  tonnetzfeat = np.concatenate((np.mean(tonnetz, axis=1), np.std(tonnetz, axis=1), np.var(tonnetz, axis=1)))

  tempogram = librosa.feature.tempogram(y=audioSignal, sr=sr)
  tgr = librosa.feature.tempogram_ratio(tg=tempogram, sr=sr)
  tgrfeat = np.concatenate((np.mean(tgr, axis=1), np.std(tgr, axis=1), np.var(tgr, axis=1)))

  audio= es.MonoLoader(filename='temp.mp3',sampleRate=22050)()
  w = es.Windowing(type = 'hann')
  spectrum = es.Spectrum()
  spect=np.array([])

  for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
      spect=np.concatenate((spect, spectrum(w(frame))))

  pitchSalience=es.PitchSalience(sampleRate=22050)(spect)

  loudness=es.Loudness()(audio)

  danceability=es.Danceability()(audio)[0]

  logAttackT=list(es.LogAttackTime(sampleRate=22050)(audio))

  features = np.concatenate((chromafeat, mfccfeat, rmsfeat, zcrfeat, tempo, centroidfeat, bandwidthfeat, flatnessfeat, rollofffeat, tonnetzfeat, tgrfeat,  np.array(logAttackT + [pitchSalience,loudness,danceability])))
  return features


In [6]:
def SVM1(selfeaturedata):
  SVM1 = joblib.load("Half_Classifier_Model.pkl")
  prediction=SVM1.predict(np.asarray(selfeaturedata))
  return prediction[0]

In [20]:
def SVM2(selfeaturedata):
  SVM2 = joblib.load("TopHalf_Classifier_Model.pkl")
  prediction=SVM2.predict(np.asarray(selfeaturedata))
  return prediction[0]

In [21]:
def SVM3(selfeaturedata):
  SVM3 = joblib.load("BottomHalf_Classifier_Model.pkl")
  prediction=SVM3.predict(np.asarray(selfeaturedata))
  return prediction[0]

In [6]:
filename = 'MT0000540286.mp3'

In [25]:
audio_file = filename
y, sr = librosa.load(audio_file,mono=True)
if sr != 22050:
  y=librosa.resample(y, orig_sr=sr, target_sr=22050)
  sr=22050
if librosa.get_duration(y=y, sr=sr) > 30:
  y=y[0:int(30 * sr)]
output_file = f"temp.mp3"
sf.write(output_file, y, sr)
headers=["chromaC_mean", "chromaC#_mean", "chromaD_mean", "chromaD#_mean", "chromaE_mean", "chromaF_mean", "chromaF#_mean", "chromaG_mean", \
         "chromaG#_mean", "chromaA_mean", "chromaA#_mean", "chromaB_mean", "chromaC_std", "chromaC#_std", "chromaD_std", "chromaD#_std", "chromaE_std", \
         "chromaF_std", "chromaF#_std", "chromaG_std", "chromaG#_std", "chromaA_std", "chromaA#_std", "chromaB_std", "chromaC_var", "chromaC#_var", "chromaD_var", \
         "chromaD#_var", "chromaE_var", "chromaF_var", "chromaF#_var", "chromaG_var", "chromaG#_var", "chromaA_var", "chromaA#_var", "chromaB_var", "mfcc1_mean", \
         "mfcc2_mean", "mfcc3_mean", "mfcc4_mean", "mfcc5_mean", "mfcc6_mean", "mfcc7_mean", "mfcc8_mean", "mfcc9_mean", "mfcc10_mean", "mfcc11_mean", "mfcc12_mean", \
         "mfcc13_mean", "mfcc1_std", "mfcc2_std", "mfcc3_std", "mfcc4_std", "mfcc5_std", "mfcc6_std", "mfcc7_std", "mfcc8_std", "mfcc9_std", "mfcc10_std", "mfcc11_std", \
         "mfcc12_std", "mfcc13_std", "mfcc1_var", "mfcc2_var", "mfcc3_var", "mfcc4_var", "mfcc5_var", "mfcc6_var", "mfcc7_var", "mfcc8_var", "mfcc9_var", "mfcc10_var", "mfcc11_var", \
         "mfcc12_var", "mfcc13_var", "rms_mean", "rms_std", "rms_var", "zcr_mean", "zcr_std", "zcr_var", "tempo", "centroid_mean", "centroid_std", "centroid_var", "bandwidth_mean", \
         "bandwidth_std", "bandwidth_var", "flatness_mean", "flatness_std", "flatness_var", "rolloff_mean", "rolloff_std", "rolloff_var", "tonnetz0_mean", "tonnetz1_mean", \
         "tonnetz2_mean", "tonnetz3_mean", "tonnetz4_mean", "tonnetz5_mean", "tonnetz0_std", "tonnetz1_std", "tonnetz2_std", "tonnetz3_std", "tonnetz4_std", "tonnetz5_std", \
         "tonnetz0_var", "tonnetz1_var", "tonnetz2_var", "tonnetz3_var", "tonnetz4_var", "tonnetz5_var", "tgr0_mean", "tgr1_mean", "tgr2_mean", "tgr3_mean", "tgr4_mean", "tgr5_mean",\
         "tgr6_mean", "tgr7_mean", "tgr8_mean", "tgr9_mean", "tgr10_mean", "tgr11_mean", "tgr12_mean", "tgr0_std", "tgr1_std", "tgr2_std", "tgr3_std", "tgr4_std", "tgr5_std", "tgr6_std",\
         "tgr7_std", "tgr8_std", "tgr9_std", "tgr10_std", "tgr11_std", "tgr12_std", "tgr0_var", "tgr1_var", "tgr2_var", "tgr3_var", "tgr4_var", "tgr5_var", "tgr6_var", "tgr7_var", "tgr8_var", \
         "tgr9_var", "tgr10_var", "tgr11_var", "tgr12_var", "logattacktime", "attackstart", "attackstop", "pitchsalience", "loudness", "danceability"]

featuredata= np.asarray([list(extractFeatures(y,sr))])
featuredata= pd.DataFrame(featuredata, columns=headers)
f = open('SelectedHeaders.json')
selectedheaders = json.load(f)
SVM1predict=SVM1(featuredata[selectedheaders["SVM1"]])
if SVM1predict == 1:
  SVM2predict=SVM2(featuredata[selectedheaders["SVM2"]])
  if SVM2predict == 1:
    print('Q1')
  elif SVM2predict == 0:
    print('Q2')
elif SVM1predict == 0:
  SVM3predict=SVM3(featuredata[selectedheaders["SVM3"]])
  if SVM3predict == 1:
    print('Q3')
  elif SVM3predict == 0:
    print('Q4')





Q3
