In [None]:
# Install necessary packages
!pip install librosa pandas

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Import libraries
import librosa
import pandas as pd

In [None]:
# Define the path to your MP3 file
mp3_file_path = '/content/drive/Shareddrives/Machine Learning Group Project/Songs for Testing/twenty one pilots - Stressed Out (Audio).mp3'

In [None]:
# Function to extract features from the MP3 file
def extract_features(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path)

    # Extract features using librosa
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)

    # Pad or truncate the feature arrays to have the same length
    max_len = max(mfccs.shape[1], chroma.shape[1], mel.shape[1], contrast.shape[1], tonnetz.shape[1])
    features = {
        'mfcc': librosa.util.fix_length(mfccs[0], size=max_len).tolist(),
        'chroma': librosa.util.fix_length(chroma[0], size=max_len).tolist(),
        'mel': librosa.util.fix_length(mel[0], size=max_len).tolist(),
        'contrast': librosa.util.fix_length(contrast[0], size=max_len).tolist(),
        'tonnetz': librosa.util.fix_length(tonnetz[0], size=max_len).tolist()
    }
    return features

In [None]:
# Extract features from the MP3 file
audio_features = extract_features(mp3_file_path)

In [None]:
# Create a DataFrame from the features
df = pd.DataFrame(audio_features)


In [None]:
# Display the DataFrame
df.head(1000)

Unnamed: 0,mfcc,chroma,mel,contrast,tonnetz
0,-462.318390,0.000000,0.000000e+00,12.242565,0.030214
1,-462.318390,0.000000,0.000000e+00,12.242565,0.004259
2,-462.318390,0.000000,0.000000e+00,12.242565,-0.057758
3,-462.318390,0.403632,4.459673e-38,12.242565,-0.050390
4,-462.318390,0.692172,3.251604e-33,12.242565,-0.028785
...,...,...,...,...,...
995,-12.497978,1.000000,9.082154e+00,12.947515,-0.030352
996,-20.985197,1.000000,1.051582e+01,14.767105,-0.016761
997,-38.153267,0.334486,9.164514e+00,25.141161,-0.004083
998,-44.806664,0.154421,1.060306e+01,25.953827,0.011855


In [None]:
newDataframe= pd.DataFrame()

In [None]:
newDataframe.head()

In [None]:
# List of columns for which you want to calculate statistics
columns_of_interest = ['chroma', "mfcc",'contrast', 'tonnetz']

# Loop through each column
for column in columns_of_interest:
    # Calculate statistics for the current column
    mean_value = df[column].mean()
    median_value = df[column].median()
    range_value = df[column].max() - df[column].min()
    min_value = df[column].min()
    max_value = df[column].max()

    # Append the calculated statistics to the results DataFrame
    newDataframe[column + '_mean'] = [mean_value]
    newDataframe[column + '_median'] = [median_value]
    newDataframe[column + '_range'] = [range_value]
    newDataframe[column + '_min'] = [min_value]
    newDataframe[column + '_max'] = [max_value]

In [None]:
newDataframe.head()


Unnamed: 0,chroma_mean,chroma_median,chroma_range,chroma_min,chroma_max,mfcc_mean,mfcc_median,mfcc_range,mfcc_min,mfcc_max,contrast_mean,contrast_median,contrast_range,contrast_min,contrast_max,tonnetz_mean,tonnetz_median,tonnetz_range,tonnetz_min,tonnetz_max
0,0.374993,0.257663,1.0,0.0,1.0,-50.221877,-30.069609,572.858047,-462.31839,110.539658,20.9525,20.311132,53.105894,4.599266,57.70516,0.117794,0.080688,1.008548,-0.345665,0.662883


In [None]:
stressedout_features = newDataframe
stressedout_features.to_csv('/content/drive/Shareddrives/Machine Learning Group Project/stressedout_features.csv', index=False)