In [2]:
# Step 1: Load necessary libraries
# librosa, numpy, pandas, sklearn, etc.

import librosa
import numpy as np
import pandas as pd 
import os
import sklearn
import audioread
import math

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

import matplotlib.pyplot as plt

from sklearn.neighbors import NearestNeighbors
from IPython.display import Audio, display

from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

import joblib

In [37]:
# Step 2: Define a function to extract music features
# You may consider MFCC, Chroma STFT, Spectral Contrast, Zero Crossing Rate, Tempo, etc
# Be sure to have the feature dimension as large as possible, to get higher performance
# librosa.feature and librosa.beat may contain many useful functions

def extract_features(audio_file):
    with audioread.audio_open(audio_file) as f:
        sr = f.samplerate

    y, sr = librosa.load(audio_file, sr=sr, duration=44)
    
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_stft_mean = np.mean(chroma_stft)
    chroma_stft_var = np.var(chroma_stft)
    
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_centroid_var = np.var(spectral_centroid)
    
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)
    spectral_bandwidth_var = np.var(spectral_bandwidth)
    
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_contrast_mean = np.mean(spectral_contrast)
    spectral_contrast_var = np.var(spectral_contrast)
    
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    zero_crossing_rate_mean = np.mean(zero_crossing_rate)
    zero_crossing_rate_var = np.var(zero_crossing_rate)
    
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_var = np.var(mfccs, axis=1)
    
    features = {
        'tempo': tempo.reshape(1,)[0],
        'beat_frames': beat_frames.mean(axis=0),
        'chroma_stft_mean': chroma_stft_mean,
        'chroma_stft_var': chroma_stft_var,
        'zero_crossing_rate_mean': zero_crossing_rate_mean,
        'zero_crossing_rate_var': zero_crossing_rate_var,
        'spectral_centroid_mean': spectral_centroid_mean,
        'spectral_centroid_var': spectral_centroid_var,
        'spectral_bandwidth_mean': spectral_bandwidth_mean,
        'spectral_bandwidth_var': spectral_bandwidth_var,
        'spectral_contrast_mean': spectral_contrast_mean,
        'spectral_contrast_var': spectral_contrast_var,
        'mfcc1_mean': mfccs_mean[0],
        'mfcc1_var': mfccs_var[0],
        'mfcc2_mean': mfccs_mean[1],
        'mfcc2_var': mfccs_var[1],
        'mfcc3_mean': mfccs_mean[2],
        'mfcc3_var': mfccs_var[2],
        'mfcc4_mean': mfccs_mean[3],
        'mfcc4_var': mfccs_var[3],
        'mfcc5_mean': mfccs_mean[4],
        'mfcc5_var': mfccs_var[4],
        'mfcc6_mean': mfccs_mean[5],
        'mfcc6_var': mfccs_var[5],
        'mfcc7_mean': mfccs_mean[6],
        'mfcc7_var': mfccs_var[6],
        'mfcc8_mean': mfccs_mean[7],
        'mfcc8_var': mfccs_var[7],
        'mfcc9_mean': mfccs_mean[8],
        'mfcc9_var': mfccs_var[8],
        'mfcc10_mean': mfccs_mean[9],
        'mfcc10_var': mfccs_var[9],
        'mfcc11_mean': mfccs_mean[10],
        'mfcc11_var': mfccs_var[10],
        'mfcc12_mean': mfccs_mean[11],
        'mfcc12_var': mfccs_var[11],
        'mfcc13_mean': mfccs_mean[12],
        'mfcc13_var': mfccs_var[12],
        'mfcc14_mean': mfccs_mean[13],
        'mfcc14_var': mfccs_mean[13],
        'mfcc15_mean': mfccs_mean[14],
        'mfcc15_var': mfccs_mean[14],
        'mfcc16_mean': mfccs_mean[15],
        'mfcc16_var': mfccs_mean[15],
        'mfcc17_mean': mfccs_mean[16],
        'mfcc17_var': mfccs_mean[16],
        'mfcc18_mean': mfccs_mean[17],
        'mfcc18_var': mfccs_mean[17],
        'mfcc19_mean': mfccs_mean[18],
        'mfcc19_var': mfccs_mean[18],
        'mfcc20_mean': mfccs_mean[19],
        'mfcc20_var': mfccs_mean[19],
    }

    return features


In [None]:
# from librosa.beat import beat_track
# Step 3: Iterate through each genre folder and extract features for each song
# Remember to mount Google Drive before accessing the dataset
# Useful python funtions: os.listdir, pandas.DataFrame

feature_names = [
    'tempo',
    'beat_frames',
    'chroma_stft_mean',
    'chroma_stft_var',
    'zero_crossing_rate_mean',
    'zero_crossing_rate_var',
    'spectral_centroid_mean',
    'spectral_centroid_var',
    'spectral_bandwidth_mean',
    'spectral_bandwidth_var',
    'spectral_contrast_mean',
    'spectral_contrast_var',
    'mfcc1_mean',
    'mfcc1_var',
    'mfcc2_mean',
    'mfcc2_var',
    'mfcc3_mean',
    'mfcc3_var',
    'mfcc4_mean',
    'mfcc4_var',
    'mfcc5_mean',
    'mfcc5_var',
    'mfcc6_mean',
    'mfcc6_var',
    'mfcc7_mean',
    'mfcc7_var',
    'mfcc8_mean',
    'mfcc8_var',
    'mfcc9_mean',
    'mfcc9_var',
    'mfcc10_mean',
    'mfcc10_var',
    'mfcc11_mean',
    'mfcc11_var',
    'mfcc12_mean',
    'mfcc12_var',
    'mfcc13_mean',
    'mfcc13_var',
    'mfcc14_mean',
    'mfcc14_var',
    'mfcc15_mean',
    'mfcc15_var',
    'mfcc16_mean',
    'mfcc16_var',
    'mfcc17_mean',
    'mfcc17_var',
    'mfcc18_mean',
    'mfcc18_var',
    'mfcc19_mean',
    'mfcc19_var',
    'mfcc20_mean',
    'mfcc20_var'
]

features_df = pd.DataFrame(columns=['arousal', 'valence', 'file_name'] + feature_names)

annotation = pd.read_csv('annotations/static_annotations.csv')
min_duration = []

root_folder = 'clips_45seconds'
for file_name in os.listdir(root_folder):
    if not file_name.endswith(".mp3"):
        continue
    file_path = os.path.join(root_folder, file_name)
    index_value = int(os.path.splitext(file_name)[0])
    annotation_row = annotation.loc[annotation['song_id'] == index_value]
    if not annotation_row.empty:
        arousal = annotation_row.iloc[0]['mean_arousal']
        valence = annotation_row.iloc[0]['mean_valence']
        print(file_path)
        # print(f'arousal: {arousal}, valence: {valence}')
        # with audioread.audio_open(file_path) as f:
        #     sr = f.samplerate
        # y,sr = librosa.load(file_path, sr=sr)
        # duration = librosa.get_duration(y=y)
        # print(duration)
        # if duration < 45:
        #     print(file_name)
        #     min_duration.append(file_name)

        features = extract_features(file_path)
        print(features)
  
        row = {'arousal': arousal, 'valence': valence, 'file_name': file_name}
        for i, feature_name in enumerate(feature_names):
            row[feature_name] = features[feature_name]
            
        features_df = features_df.append(row, ignore_index=True)

# print(min_duration)
features_df.to_csv('my_mood_features.csv', index=False)

In [7]:
features_df = pd.read_csv('my_mood_features.csv')
features_df

scaler = MinMaxScaler(feature_range=(-0.5, 0.5))
features_df[['arousal']] = scaler.fit_transform(features_df[['arousal']])

train_df, test_df = train_test_split(features_df, test_size=0.2, random_state=42)

scaler = StandardScaler()

y_train = train_df['arousal']
train_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_train_df = train_df

y_test = test_df['arousal']
y_test_filename = test_df['file_name']
test_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_test_df = test_df

X_train = scaler.fit_transform(X_train_df)
X_test = scaler.transform(X_test_df)

print(X_train.shape)
print(X_test.shape)

lr = LinearRegression()
lr.fit(X_train, y_train)

test_predictions = lr.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('LR')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

br = BayesianRidge()
br.fit(X_train, y_train)

test_predictions = br.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('NB')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

# Create an instance of the SVM regressor
svr = SVR(kernel='linear', C=1.0, epsilon=0.1)

# Train the model on the training data
svr.fit(X_train, y_train)

# Test the model on the test data
test_predictions = svr.predict(X_test)

# Evaluate the performance of the model
mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('SVM')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

test_predictions = rf.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('RF')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

(595, 52)
(149, 52)
LR
RMSE: 0.1288626565416605
R-squared score: 0.5996583292717952
NB
RMSE: 0.12986128675931027
R-squared score: 0.5934293345630169
SVM
RMSE: 0.13112723938949541
R-squared score: 0.5854637893490551
RF
RMSE: 0.13284915112407905
R-squared score: 0.5745052515803315


In [8]:
features_df[['arousal']]

Unnamed: 0,arousal
0,-0.147059
1,0.326794
2,-0.235294
3,0.220588
4,0.088235
...,...
739,0.235294
740,-0.044118
741,0.147059
742,0.205882


In [3]:
features_df = pd.read_csv('my_mood_features.csv')
features_df

# Scale the 'arousal' column
scaler = MinMaxScaler(feature_range=(-0.5, 0.5))
features_df[['arousal']] = scaler.fit_transform(features_df[['arousal']])

train_df, test_df = train_test_split(features_df, test_size=0.2, random_state=42)

# scaler = StandardScaler()

y_train = train_df['arousal']
train_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_train_df = train_df

y_test = test_df['arousal']
y_test_filename = test_df['file_name']
test_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_test_df = test_df

# X_train = scaler.fit_transform(X_train_df)
# X_test = scaler.transform(X_test_df)

X_train = np.array(X_train_df.values.tolist())

X_test = np.array(X_test_df.values.tolist())

print(X_train.shape)
print(X_test.shape)

lr = LinearRegression()
lr.fit(X_train, y_train)

test_predictions = lr.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('LR')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

br = BayesianRidge()
br.fit(X_train, y_train)

test_predictions = br.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('NB')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

# # Create an instance of the SVM regressor
# svr = SVR(kernel='linear', C=1.0, epsilon=0.1)

# # Train the model on the training data
# svr.fit(X_train, y_train)

# # Test the model on the test data
# test_predictions = svr.predict(X_test)

# # Evaluate the performance of the model
# mse = mean_squared_error(y_test, test_predictions)
# r2 = r2_score(y_test, test_predictions)

# print('SVM')
# rmse = math.sqrt(mse)
# print(f'RMSE: {rmse}')
# print(f'R-squared score: {r2}')

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

test_predictions = rf.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('RF')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

(595, 52)
(149, 52)
LR
RMSE: 0.12886265654130158
R-squared score: 0.5996583292740252
NB
RMSE: 0.1328673278473822
R-squared score: 0.5743888092945941
RF
RMSE: 0.13288370940295158
R-squared score: 0.5742838534217988


In [4]:
# Save the model to disk
filename = 'LR_arousal.sav'
joblib.dump(lr, filename)


['LR_arousal.sav']

In [10]:
features_df = pd.read_csv('my_mood_features.csv')
features_df

scaler = MinMaxScaler(feature_range=(-0.5, 0.5))
features_df[['valence']] = scaler.fit_transform(features_df[['valence']])

train_df, test_df = train_test_split(features_df, test_size=0.2, random_state=42)

scaler = StandardScaler()

y_train = train_df['valence']
train_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_train_df = train_df

y_test = test_df['valence']
y_test_filename = test_df['file_name']
test_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_test_df = test_df

X_train = scaler.fit_transform(X_train_df)
X_test = scaler.transform(X_test_df)

print(X_train.shape)
print(X_test.shape)

lr = LinearRegression()
lr.fit(X_train, y_train)

test_predictions = lr.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('LR')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

br = BayesianRidge()
br.fit(X_train, y_train)

test_predictions = br.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('NB')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

# Create an instance of the SVM regressor
svr = SVR(kernel='linear', C=1.0, epsilon=0.1)

# Train the model on the training data
svr.fit(X_train, y_train)

# Test the model on the test data
test_predictions = svr.predict(X_test)

# Evaluate the performance of the model
mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('SVM')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

test_predictions = rf.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('RF')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

(595, 52)
(149, 52)
LR
RMSE: 0.17868104273710395
R-squared score: 0.14850482074723537
NB
RMSE: 0.17496229541987862
R-squared score: 0.18357898958257268
SVM
RMSE: 0.1815039308072443
R-squared score: 0.12138764310991801
RF
RMSE: 0.16905860752281088
R-squared score: 0.23774582437466518


In [5]:
features_df = pd.read_csv('my_mood_features.csv')
features_df

# Scale the 'arousal' column
scaler = MinMaxScaler(feature_range=(-0.5, 0.5))
features_df[['valence']] = scaler.fit_transform(features_df[['valence']])

train_df, test_df = train_test_split(features_df, test_size=0.2, random_state=42)

# scaler = StandardScaler()

y_train = train_df['valence']
train_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_train_df = train_df

y_test = test_df['valence']
y_test_filename = test_df['file_name']
test_df.drop(['arousal','file_name', 'valence'], axis=1, inplace=True)
X_test_df = test_df

# X_train = scaler.fit_transform(X_train_df)
# X_test = scaler.transform(X_test_df)

X_train = np.array(X_train_df.values.tolist())

X_test = np.array(X_test_df.values.tolist())

print(X_train.shape)
print(X_test.shape)

lr = LinearRegression()
lr.fit(X_train, y_train)

test_predictions = lr.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('LR')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

br = BayesianRidge()
br.fit(X_train, y_train)

test_predictions = br.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('NB')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

# # Create an instance of the SVM regressor
# svr = SVR(kernel='linear', C=1.0, epsilon=0.1)

# # Train the model on the training data
# svr.fit(X_train, y_train)

# # Test the model on the test data
# test_predictions = svr.predict(X_test)

# # Evaluate the performance of the model
# mse = mean_squared_error(y_test, test_predictions)
# r2 = r2_score(y_test, test_predictions)

# print('SVM')
# rmse = math.sqrt(mse)
# print(f'RMSE: {rmse}')
# print(f'R-squared score: {r2}')

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

test_predictions = rf.predict(X_test)

mse = mean_squared_error(y_test, test_predictions)
r2 = r2_score(y_test, test_predictions)

print('RF')
rmse = math.sqrt(mse)
print(f'RMSE: {rmse}')
print(f'R-squared score: {r2}')

(595, 52)
(149, 52)
LR
RMSE: 0.17868104275533178
R-squared score: 0.1485048205735079
NB
RMSE: 0.168696284434891
R-squared score: 0.24100961953432176
RF
RMSE: 0.16905860752281088
R-squared score: 0.23774582437466518


In [6]:
# Save the model to disk
filename = 'RF_valence.sav'
joblib.dump(rf, filename)

['RF_valence.sav']