In [2]:
#Necessary imports for the feature_extraction
import os
import librosa
import numpy as np
import pandas as pd
import soundfile as sf
from pydub import AudioSegment

#PyAudioAnalysis
import pyAudioAnalysis as pyaudio
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import MidTermFeatures

In [3]:
# feature extraction data structures
features = []
existing_features = []
missing_features = []
previous_features = None

In [4]:
# check and save existing basic features
# check and save existing basic features
if os.path.exists("../data/features.csv"):
    previous_features = pd.read_csv("../data/features.csv")
    # drop all rows with any number of NAs
    previous_features.dropna(how = 'any')
    previous_features['song'] = previous_features['title'] + " - " + previous_features['artist']
    existing = previous_features['song'].tolist()

In [6]:
# feature extraction notebook should be in the same folder as the mp3s
for song in os.scandir("C:\\Users\\HP\\OneDrive\\Music\\ProjectMusic"):
    if song.path.endswith(".mp3") and song.is_file():
        file_name = song.path[2:]
        song_name = file_name[38:len(file_name) - 4 ]

        if song_name not in existing_features:
            # add it to the missing list so we can use pyAudioAnalysis on it 
            missing_features.append(file_name)

            #tempo of the song
            waveform, sample_rate = librosa.load(file_name)
            tempo, beat_frames = librosa.beat.beat_track(y=waveform, sr=sample_rate)
             # get the chroma number of the song
            beat_times = librosa.frames_to_time(beat_frames, sr=sample_rate)
            y_harmonic, y_percussive = librosa.effects.hpss(waveform)
            chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sample_rate)
            beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate = np.median)
            # make beat chroma into a DataFrame and calculate the diff
            chroma_df = pd.DataFrame(beat_chroma)
            diff_values = chroma_df.diff()
            diff_mean = diff_values.mean(axis = 0, skipna = True)
            chroma_num = sum(diff_mean) / len(diff_mean)

            # add song, tempo and chroma number to the features list
            print ([song_name, tempo, chroma_num])
            features.append([song_name, tempo, chroma_num])

['boy pablo - i hope she loves me back', 172.265625, -0.014764327699690405]
['Chase Shakur - Pray Slow', 80.74951171875, -0.0004073584414761695]
['Jakob Ogawa - All Your Love', 86.1328125, -0.012112732377585773]
['Joy Again - Looking Out For You', 99.38401442307692, 0.009437577253664455]
['Kali Uchis - Feel Like A Fool', 107.666015625, -0.009557425001545506]
['Kali Uchis - i want war (BUT I NEED PEACE)', 89.10290948275862, -0.04676906906754075]
['Kali Uchis - Telepatía', 83.35433467741936, 0.021440045397004556]
['Plums - Parking Lots', 151.99908088235293, 0.018691798830672512]
['Silk Skin Lovers - Annie Chases a Butterfly', 129.19921875, 0.014116433212069585]
['Silk Skin Lovers - Moon, 1AM', 107.666015625, -0.022236840030682846]
['Strawberry Guy - F Song', 172.265625, -0.015071489046958512]
['TV Girl - Birds Dont Sing', 107.666015625, 0.014082066760680278]
['TV Girl - Blue Hair (sped up)', 107.666015625, -0.005826635627027617]
['TV Girl - Blue Hair', 135.99917763157896, 0.0214357170966

In [7]:
features_df = pd.DataFrame(features, columns = ['song', 'tempo', 'chroma_number'])

In [8]:
features_df = features_df.sort_values(by = ['song'])

In [9]:
features_df.reset_index(drop = True, inplace = True)

In [10]:
features_df

Unnamed: 0,song,tempo,chroma_number
0,Chase Shakur - Pray Slow,80.749512,-0.000407
1,Jakob Ogawa - All Your Love,86.132812,-0.012113
2,Joy Again - Looking Out For You,99.384014,0.009438
3,Kali Uchis - Feel Like A Fool,107.666016,-0.009557
4,Kali Uchis - Telepatía,83.354335,0.02144
5,Kali Uchis - i want war (BUT I NEED PEACE),89.102909,-0.046769
6,Plums - Parking Lots,151.999081,0.018692
7,Silk Skin Lovers - Annie Chases a Butterfly,129.199219,0.014116
8,"Silk Skin Lovers - Moon, 1AM",107.666016,-0.022237
9,Strawberry Guy - F Song,172.265625,-0.015071


In [11]:
mid_term_window = 1
mid_term_step = 1
short_term_window = 0.05
short_term_step = 0.05
# use compute_beat = True if we want the extra beat features
pyaudio_feat, files, feat_names = MidTermFeatures.directory_feature_extraction("C:\\Users\\HP\\OneDrive\\Music\\ProjectMusic", 
                                                                               mid_term_window, 
                                                                               mid_term_step, 
                                                                               short_term_window, 
                                                                               short_term_step,
                                                                               False)

Analyzing file 1 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Chase Shakur - Pray Slow.mp3
Analyzing file 2 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Jakob Ogawa - All Your Love.mp3
Analyzing file 3 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Joy Again - Looking Out For You.mp3
Analyzing file 4 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Kali Uchis - Feel Like A Fool.mp3
Analyzing file 5 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Kali Uchis - Telepatía.mp3
Analyzing file 6 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Kali Uchis - i want war (BUT I NEED PEACE).mp3
Analyzing file 7 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Plums - Parking Lots.mp3
Analyzing file 8 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Silk Skin Lovers - Annie Chases a Butterfly.mp3
Analyzing file 9 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Silk Skin Lovers - Moon, 1AM.mp3
Analyzing file 10 of 22: C:\Users\HP\OneDrive\Music\ProjectMusic\Strawberry Guy - F Song.mp3
Analyzing file 11 of

In [12]:
if len(features_df) == 1:
    # special case when only features from one song are extracted
    nn_features_df = pd.DataFrame([pyaudio_feat], columns = feat_names)
else:
    nn_features_df = pd.DataFrame(pyaudio_feat, columns = feat_names)
nn_features_df['song'] = features_df['song']

In [13]:
nn_features_df.head(5)

Unnamed: 0,zcr_mean,energy_mean,energy_entropy_mean,spectral_centroid_mean,spectral_spread_mean,spectral_entropy_mean,spectral_flux_mean,spectral_rolloff_mean,mfcc_1_mean,mfcc_2_mean,...,delta chroma_5_std,delta chroma_6_std,delta chroma_7_std,delta chroma_8_std,delta chroma_9_std,delta chroma_10_std,delta chroma_11_std,delta chroma_12_std,delta chroma_std_std,song
0,0.022947,0.10652,3.149726,0.100529,0.163562,0.141208,0.010873,0.0328,-25.234294,2.963321,...,0.014283,0.009444,0.054343,0.003779,0.005269,0.010572,0.022736,0.004293,0.017441,Chase Shakur - Pray Slow
1,0.032611,0.067622,3.093993,0.09854,0.154144,0.190479,0.013278,0.045232,-24.667007,1.887863,...,0.024552,0.007863,0.066818,0.010377,0.017183,0.021579,0.041195,0.003499,0.019393,Jakob Ogawa - All Your Love
2,0.045941,0.075397,3.109662,0.112675,0.152273,0.383828,0.00602,0.064533,-23.623992,2.039796,...,0.018687,0.013854,0.036684,0.00749,0.0137,0.024799,0.025054,0.004519,0.010691,Joy Again - Looking Out For You
3,0.055727,0.084506,3.166845,0.141314,0.174628,0.389772,0.005715,0.07348,-22.589133,1.727771,...,0.029955,0.005676,0.057613,0.004056,0.009966,0.015739,0.042998,0.009062,0.014815,Kali Uchis - Feel Like A Fool
4,0.039607,0.069749,3.146406,0.12528,0.172543,0.283957,0.009572,0.055347,-24.531567,2.186144,...,0.020148,0.010359,0.065734,0.00265,0.00744,0.025936,0.032301,0.003936,0.016601,Kali Uchis - Telepatía


In [14]:
# get the basic features by indexing into the neural network feature DataFrame
zcr_ind = feat_names.index('zcr_mean')
ee_ind = feat_names.index('energy_entropy_mean')
spc_ind = feat_names.index('spectral_centroid_mean')

features_df['zero_crossing_rate'] = nn_features_df.iloc[:, zcr_ind]
features_df['energy_entropy'] = nn_features_df.iloc[:, ee_ind]
features_df['spectral_centroid'] = nn_features_df.iloc[:, spc_ind]

In [20]:
features_df["song"]

0                        Chase Shakur - Pray Slow
1                     Jakob Ogawa - All Your Love
2                 Joy Again - Looking Out For You
3                   Kali Uchis - Feel Like A Fool
4                          Kali Uchis - Telepatía
5      Kali Uchis - i want war (BUT I NEED PEACE)
6                            Plums - Parking Lots
7     Silk Skin Lovers - Annie Chases a Butterfly
8                    Silk Skin Lovers - Moon, 1AM
9                         Strawberry Guy - F Song
10                      TV Girl - Birds Dont Sing
11                            TV Girl - Blue Hair
12                  TV Girl - Blue Hair (sped up)
13            TV Girl - Cigarettes out the Window
14                          TV Girl - Lovers Rock
15                         TV Girl - Natalie Wood
16                          TV Girl - Not Allowed
17                         VACATIONS - Telephones
18                             Vacations - Actors
19                        Vacations - On Your Own


In [21]:
moods = pd.read_csv("./mood_data_main.csv")

In [23]:
moods['song'] = moods['artist'] + " - " + moods['title']
# sort the column song to use it for merging
moods = moods.sort_values(by = ['song'])
moods.reset_index(drop = True, inplace = True)

In [25]:
moods["song"]

0                        Chase Shakur - Pray Slow
1                     Jakob Ogawa - All Your Love
2                 Joy Again - Looking Out For You
3                   Kali Uchis - Feel Like A Fool
4                          Kali Uchis - Telepatía
5      Kali Uchis - i want war (BUT I NEED PEACE)
6                            Plums - Parking Lots
7     Silk Skin Lovers - Annie Chases a Butterfly
8                     Silk Skin Lovers - Moon 1AM
9                         Strawberry Guy - F Song
10                      TV Girl - Birds Dont Sing
11                            TV Girl - Blue Hair
12                  TV Girl - Blue Hair (sped up)
13            TV Girl - Cigarettes out the Window
14                          TV Girl - Lovers Rock
15                         TV Girl - Natalie Wood
16                          TV Girl - Not Allowed
17                         VACATIONS - Telephones
18                             Vacations - Actors
19                        Vacations - On Your Own


In [26]:
moods.head(5)

Unnamed: 0,title,artist,primary,secondary,song
0,Pray Slow,Chase Shakur,3,4,Chase Shakur - Pray Slow
1,All Your Love,Jakob Ogawa,1,3,Jakob Ogawa - All Your Love
2,Looking Out For You,Joy Again,4,2,Joy Again - Looking Out For You
3,Feel Like A Fool,Kali Uchis,1,6,Kali Uchis - Feel Like A Fool
4,Telepatía,Kali Uchis,1,5,Kali Uchis - Telepatía


In [27]:
features_df.head(5)

Unnamed: 0,song,tempo,chroma_number,zero_crossing_rate,energy_entropy,spectral_centroid
0,Chase Shakur - Pray Slow,80.749512,-0.000407,0.022947,3.149726,0.100529
1,Jakob Ogawa - All Your Love,86.132812,-0.012113,0.032611,3.093993,0.09854
2,Joy Again - Looking Out For You,99.384014,0.009438,0.045941,3.109662,0.112675
3,Kali Uchis - Feel Like A Fool,107.666016,-0.009557,0.055727,3.166845,0.141314
4,Kali Uchis - Telepatía,83.354335,0.02144,0.039607,3.146406,0.12528


In [28]:
new_features = features_df.merge(moods, on = "song", how = "left")
new_features = new_features.drop(columns = ['song'])
new_features

Unnamed: 0,tempo,chroma_number,zero_crossing_rate,energy_entropy,spectral_centroid,title,artist,primary,secondary
0,80.749512,-0.000407,0.022947,3.149726,0.100529,Pray Slow,Chase Shakur,3.0,4.0
1,86.132812,-0.012113,0.032611,3.093993,0.09854,All Your Love,Jakob Ogawa,1.0,3.0
2,99.384014,0.009438,0.045941,3.109662,0.112675,Looking Out For You,Joy Again,4.0,2.0
3,107.666016,-0.009557,0.055727,3.166845,0.141314,Feel Like A Fool,Kali Uchis,1.0,6.0
4,83.354335,0.02144,0.039607,3.146406,0.12528,Telepatía,Kali Uchis,1.0,5.0
5,89.102909,-0.046769,0.044189,3.124969,0.139181,i want war (BUT I NEED PEACE),Kali Uchis,5.0,6.0
6,151.999081,0.018692,0.070227,3.22422,0.142785,Parking Lots,Plums,5.0,2.0
7,129.199219,0.014116,0.028824,3.185343,0.097304,Annie Chases a Butterfly,Silk Skin Lovers,3.0,4.0
8,107.666016,-0.022237,0.041632,3.141644,0.105148,,,,
9,172.265625,-0.015071,0.040241,3.184216,0.097753,F Song,Strawberry Guy,4.0,2.0


In [30]:
# save the basic features, neural network features and the feature names
if not existing_features:
    new_features = new_features[['title', 'artist', 'tempo', 'chroma_number', 
                                 'zero_crossing_rate', 'energy_entropy', 
                                 'spectral_centroid', 'primary', 'secondary']]
    new_features.to_csv("../data/features.csv", header = True, index = False)
    
    nn_features_df.to_csv("../data/nn_features.csv", header = True, index = False)
    
    feat_names_df = pd.DataFrame(feat_names)
    feat_names_df.to_csv("../data/nn_feature_names.csv", header = True, index = False)
else:
    # concatenate the previous features with new features and overwrite the existing files
    previous_features = previous_features.drop(columns = ['song'])
    all_features = pd.concat([previous_features, new_features], ignore_index = True)
    all_features = all_features[['title', 'artist', 'tempo', 'chroma_number', 
                                 'zero_crossing_rate', 'energy_entropy', 
                                 'spectral_centroid', 'primary', 'secondary']]
    all_features.to_csv("../data/features.csv", header = True, index = False)
    
    prev_nn = pd.read_csv("../data/nn_features.csv")
    all_nn = pd.concat([prev_nn, nn_features_df], ignore_index = True)
    all_nn.to_csv("../data/nn_features.csv", header = True, index = False)