In [2]:
import os
import pretty_midi
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import librosa

In [3]:
train = pd.read_csv("../dataset/split/train.csv", index_col=0)
val= pd.read_csv("../dataset/split/val.csv", index_col=0)
test= pd.read_csv("../dataset/split/test.csv", index_col=0)
train_val = pd.concat([train, val], axis=0)

In [5]:
def get_midi_df(fn):
    midi_data = pretty_midi.PrettyMIDI(fn)
    midi_data.remove_invalid_notes()
    midi_list = []
    for instrument in midi_data.instruments:
        for note in instrument.notes:
            start = note.start
            end = note.end
            pitch = note.pitch
            velocity = note.velocity
            midi_list.append([start, end, pitch, velocity])
    midi_list = sorted(midi_list, key=lambda x: (x[0], x[2]))
    midi_df = pd.DataFrame(midi_list, columns=['start', 'end', 'pitch', 'velocity'])
    midi_df['note_length'] = midi_df['end'] - midi_df['start']
    return midi_df
def get_song_level_feature(fn):
    results = {}
    midi_df = get_midi_df(fn)
    results['note_length_mean'] = midi_df['note_length'].mean()
    results['velocity_mean'] = midi_df['velocity'].mean()
    return results
def _feature_extractor(train_val):
    feature_list = []
    for fname in train_val.index:
        midi_features = get_song_level_feature(os.path.join(midi_path, fname + ".mid"))
        matlab_features = pd.read_csv(os.path.join(matlab_path, fname + ".csv"))
        midi_features['keyname'] = matlab_features['keyname'].item()
        midi_features['beatNoteDensity'] = matlab_features['beatNoteDensity'].item()
        feature_list.append(midi_features)
    return feature_list

In [6]:
midi_path = "../../dataset/PEmoDataset/midis/"
# key extract from matlab
matlab_path = "../dataset/matlab_feature/"
train_val_list = _feature_extractor(train_val)
test_list = _feature_extractor(test)

In [8]:
import numpy as np
import matplotlib.pyplot as plt  
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import plot_confusion_matrix

In [9]:
def _get_labels(label_quad, cls_type):
    if cls_type == "AV":
        results = np.array(label_quad)
    elif cls_type == "A":
        labels = []
        for label in label_quad:
            if label in ['Q1','Q2']:
                labels.append('HA')
            elif label in ['Q3','Q4']:
                labels.append('LA')
        results = np.array(labels)
    elif cls_type == "V":
        labels = []
        for label in label_quad:
            if label in ['Q1','Q4']:
                labels.append('HV')
            elif label in ['Q2','Q3']:
                labels.append('LV') 
        results = np.array(labels)
    return results

def _get_train_test(train_val_list, test_list, cls_type = "AV"):
    m_x_train = pd.DataFrame(train_val_list)
    m_x_train = pd.concat([m_x_train, pd.get_dummies(m_x_train.keyname)],axis=1).drop(columns=['keyname'])
    m_x_test = pd.DataFrame(test_list)
    m_x_test = pd.concat([m_x_test, pd.get_dummies(m_x_test.keyname)],axis=1).drop(columns=['keyname'])
    total_col = set(m_x_train.columns) | set(m_x_test.columns)
    for i in total_col:
        if i not in m_x_test.columns:
            m_x_test[i]= [0 for _ in range(len(m_x_test))]
    m_y_train = list(train_val['label'])
    m_y_test = list(test['label'])
    m_y_train = _get_labels(m_y_train, cls_type)  
    m_y_test = _get_labels(m_y_test, cls_type)  
    return m_x_train, m_y_train, m_x_test, m_y_test

In [10]:
def _get_results(cls_type = "AV"):
    m_x_train, m_y_train, m_x_test, m_y_test = _get_train_test(train_val_list, test_list, cls_type = cls_type)
    clf = LogisticRegression(random_state=42).fit(m_x_train, m_y_train)
    return clf.score(m_x_test, m_y_test)

In [24]:
midi_ml = {
    "AV": _get_results(cls_type = "AV"), 
    "A": _get_results(cls_type = "A"), 
    "V": _get_results(cls_type = "V")
}

In [23]:
midi_ml

{'AV': 0.5813953488372093, 'A': 0.8488372093023255, 'V': 0.6511627906976745}

In [12]:
data_dir = "../../dataset/PEmoDataset/audios/seg"

In [20]:
tr_mfcc_vec = []
for idx in tqdm(range(len(train_val))):
    y, _ = librosa.load(os.path.join(data_dir, train_val.iloc[idx].name + ".mp3"), sr=22050)
    mfcc = librosa.feature.mfcc(y=y, sr=22050, n_fft=1024, hop_length=512, win_length=1024)
    tr_mfcc_vec.append(mfcc)
    
te_mfcc_vec = []
for idx in tqdm(range(len(test))):
    y, _ = librosa.load(os.path.join(data_dir, test.iloc[idx].name + ".mp3"), sr=22050)
    mfcc = librosa.feature.mfcc(y=y, sr=22050, n_fft=1024, hop_length=512, win_length=1024)
    te_mfcc_vec.append(mfcc)

In [14]:
def _audioML_results(cls_type = "AV"):
    y_train = _get_labels(train_val['label'], cls_type)
    y_test = _get_labels(test['label'], cls_type)
    tr_mfcc = [i.mean(axis=1) for i in tr_mfcc_vec]
    te_mfcc = [i.mean(axis=1) for i in te_mfcc_vec]
    clf = LogisticRegression(random_state=42).fit(np.stack(tr_mfcc), y_train)
    return clf.score(np.stack(te_mfcc), y_test)

In [19]:
audio_ml = {
    "AV": _audioML_results(cls_type = "AV"), 
    "A": _audioML_results(cls_type = "A"), 
    "V": _audioML_results(cls_type = "V")
}

In [18]:
audio_ml

{'AV': 0.5232558139534884, 'A': 0.9186046511627907, 'V': 0.5581395348837209}