In [1]:
import pandas as pd
import numpy as np
import os
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction as aF
from pathlib import Path

from pandas_ml import ConfusionMatrix
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.base import BaseEstimator, TransformerMixin
# from sklearn import pipeline
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, make_scorer, recall_score
from sklearn.decomposition import PCA 

from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline


from scipy.stats import kurtosis, skew

from itertools import product

In [2]:
path_data = Path('../data/')
df_labels = labels = pd.read_csv(path_data/'ComParE2018_AtypicalAffect.txt', sep="\t")
df_labels['subset'] = df_labels['file_name'].str.split('_').apply(lambda x: x[0])
df_labels.head()


X_train, y_train = ((df_labels.loc[(df_labels['subset']=='train')][['file_name']]), 
                    df_labels.loc[df_labels['subset']=='train']['emotion'])

X_devel, y_devel = (df_labels.loc[df_labels['subset']=='devel'][['file_name']], 
                    df_labels.loc[df_labels['subset']=='devel']['emotion'])

X_test, y_test = (df_labels.loc[df_labels['subset']=='test'][['file_name']], 
                    df_labels.loc[df_labels['subset']=='test']['emotion'])

In [3]:
# ros = RandomOverSampler(random_state=0)
# X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
# np.unique(y_resampled, return_counts=True)

In [4]:
class ReadRawData(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self 
    
    def transform(self, X, y=None):
        X_c = X.copy()
        df_sample_data = X_c['file_name'].apply(lambda x: audioBasicIO.readAudioFile(path_data/'wav'/x))
        X_c['sample_rate'] = df_sample_data.apply(lambda x: x[0])
        X_c['raw_data'] = df_sample_data.apply(lambda x: x[1])
        X_c['duration'] = X_c['raw_data'].apply(lambda x: x.size)/X_c['sample_rate']
        return X_c
        
rrd = ReadRawData()
X_train = rrd.fit_transform(X_train)
X_devel = rrd.transform(X_devel)
X_test =rrd.transform(X_test)

In [7]:
class PyAudioAnalysisFeaturesFactory(BaseEstimator, TransformerMixin):
    
    def __init__(self, 
                 limit_to=4, #parameter to control where to truncate (or pad)
                 pad=['resize', 'zero', 'mean', 'stat'][0], # what kind of padding to use
                 frame_size=0.05, #PyAudio default
                 frame_step=0.025,#PyAudio default
                 keep_duration=True, #to keep duration as a feature
                 print_every=50
                ):
        self.limit_to, self.pad = limit_to, pad
        self.frame_size, self.frame_step = frame_size, frame_step
        self.keep_duration = keep_duration
        self.key = f'{limit_to}_{pad}_{frame_size}_{frame_step}_{keep_duration}'
        self.print_every = print_every

    
    def fit(self, X, y=None):
        return self 
    
    def transform(self, X):
        self.counter = 0
        X_c = X.copy().reset_index(drop=True)
        if self.pad == 'stat':
            features = X_c.apply(lambda x: self.add_features_functionals((x['sample_rate'], 
                                                                          x['raw_data'])), 
                                 axis=1)
            features = pd.DataFrame(features.values.tolist())
        else:
            features = X_c.apply(lambda x: self.add_features((x['sample_rate'], x['raw_data'])), axis=1)
            # these steps are just for getting the column names
            x = X_c['raw_data'].iloc[0]
            fs = X_c['sample_rate'].iloc[0]
            s, t = aF.stFeatureExtraction(np.resize(x,(fs*self.limit_to,)),
                                          fs,
                                          self.frame_size*fs,
                                          self.frame_step*fs)
            col_names = np.array([f'{name}_{i}' for i in range(s.shape[1]) for name in t]).reshape(-1)
            features = pd.DataFrame(features.values.tolist(), columns=col_names)
        
        if self.keep_duration:
            features['duration'] = X_c['duration']
        return features.astype(np.float32)
            
    def counter_print(self):
        if self.counter%self.print_every == 0:
            print(self.counter, end=' ')
        self.counter += 1
    
    def add_features(self, res):
        self.counter_print()
        fs,x = res
        diff = (self.limit_to*fs)-x.shape[0]
        if self.pad == 'resize':
            x = np.resize(x,(fs*self.limit_to,))
        elif self.pad == 'zero':
            x = np.hstack([np.zeros(diff), x]) if diff>0 else x[:(self.limit_to*fs)]
        else:
            x = np.hstack([np.repeat(x.mean(), diff), x]) if diff>0 else x[:(self.limit_to*fs)]
            
        s,t = aF.stFeatureExtraction(x,fs,self.frame_size*fs,self.frame_step*fs)
        s = s.T.reshape(-1)
        return s
    
    def add_features_functionals(self, res):
        self.counter_print()
        fs,x = res
        s,t = aF.stFeatureExtraction(x,fs,self.frame_size*fs,self.frame_step*fs)
        summarized_functionals = np.hstack([
            s.mean(axis=1), 
            s.min(axis=1), 
            s.max(axis=1), 
            s.std(axis=1),
            s.var(axis=1),
            skew(s, axis=1),
            kurtosis(s, axis=1)
            
        ])
        return summarized_functionals

    
class PyAudioAnalysisFeatures(PyAudioAnalysisFeaturesFactory, BaseEstimator, TransformerMixin):
    
    def __init__(self, **kwargs):
        PyAudioAnalysisFeaturesFactory.__init__(self, **kwargs)
        
    def fit(self):
        return self
    
    def transform(self, X):
        X_base = feature_dict[self.key]
        return X_base.loc[X.index]

In [8]:
paaff_zero_pad = PyAudioAnalysisFeaturesFactory(pad='zero', keep_duration=False)
X_train_zero = paaff_zero_pad.fit_transform(X_train)

0 50 100 150 200 250 300 350 400 450 500 550 600 650 700 750 800 850 900 950 1000 1050 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 1650 1700 1750 1800 1850 1900 1950 2000 2050 2100 2150 2200 2250 2300 2350 2400 2450 2500 2550 2600 2650 2700 2750 2800 2850 2900 2950 3000 3050 3100 3150 3200 3250 3300 

In [16]:
X_train_zero_np = X_train_zero.values.reshape(X_train_zero.shape[0], 34, -1)


In [20]:
np.save(path_data/'X_train_zero_np.npy', X_train_zero_np)
np.save(path_data/'y_train.npy', y_train.values)

In [40]:
# feature_dict = dict()
paaff = PyAudioAnalysisFeaturesFactory(pad='stat')
X_train_summary = paaff.fit_transform(X_train)

0 50 100 150 200 250 300 350 400 450 500 550 600 650 700 750 800 850 900 950 1000 1050 1100 1150 1200 1250 1300 1350 1400 1450 1500 1550 1600 1650 1700 1750 1800 1850 1900 1950 2000 2050 2100 2150 2200 2250 2300 2350 2400 2450 2500 2550 2600 2650 2700 2750 2800 2850 2900 2950 3000 3050 3100 3150 3200 3250 3300 

In [41]:
X_test_summary = paaff.fit_transform(X_test)

3350 3400 3450 3500 3550 3600 3650 3700 3750 3800 3850 3900 3950 4000 4050 4100 4150 4200 4250 4300 

In [42]:
X_devel_summary = paaff.fit_transform(X_devel)

4350 4400 4450 4500 4550 4600 4650 4700 4750 4800 4850 4900 4950 5000 5050 5100 5150 5200 5250 5300 5350 5400 5450 5500 5550 5600 5650 5700 5750 5800 5850 5900 5950 6000 6050 6100 6150 6200 6250 6300 6350 6400 6450 6500 6550 6600 6650 6700 6750 6800 6850 6900 6950 7000 7050 7100 7150 7200 7250 7300 

In [43]:
X_train_summary.to_csv(path_data/'X_train_summary.csv')
X_test_summary.to_csv(path_data/'X_test_summary.csv')
X_devel_summary.to_csv(path_data/'X_devel_summary.csv')

In [65]:
y_train.value_counts()

neutral    2287
happy       743
sad         187
angry       125
Name: emotion, dtype: int64

In [None]:
# model = AdaBoostClassifier(DecisionTreeClassifier(), 
#                                  n_estimators=10, learning_rate=1.0)

from sklearn.svm import SVC

n = 500
model = Pipeline([
    ('oversampling', RandomOverSampler(sampling_strategy=
                                        {'neutral': 2287, 
                                          'happy': 743, 
                                          'sad': 10000, 
                                          'angry': 10000})),
    ('model', AdaBoostClassifier(SVC(probability=True, 
                                     gamma='auto'), 
                                 n_estimators=1, 
                                 learning_rate=1.0))
#     ('model', DecisionTreeClassifier(class_weight='balanced'))
])

model.fit(X_train_summary, y_train) #.drop('duration', axis=1)
y_predtree = model.predict(X_devel_summary)
reporting(y_devel,y_predtree)

  n_samples_majority))
  n_samples_majority))


In [70]:
# y_predtree = model.predict(X_tr_dl.drop('duration', axis=1))
# reporting(y_tr_dl,y_predtree)

In [30]:
model_pipeline = pipeline.Pipeline([
    ('features', PyAudioAnalysisFeatures()),
    ('pca', PCA(n_components=500)),
    ('model', AdaBoostClassifier(DecisionTreeClassifier(class_weight={'neutral': 100000, 
                                                                      'happy': 100, 
                                                                      'sad': 1, 
                                                                      'angry':1}), 
                                 n_estimators=100, learning_rate=10.0))
])

param_grid = {
    'pca__n_components': [15, 50, 100, 250, 500],
    'model__class_weight': [
                        {'neutral': 1, 'happy': 1, 'sad': 100000, 'angry':100000},
                        {'neutral': 100000, 'happy': 100000, 'sad': 1, 'angry':1}
                    ],
    'model__learning_rate': [0.1, 1, 10.0]
}

for k,v in feature_transform_params:
    param_grid['features__'+k] = v
    
# custom scoring - Unweighted Average Recall (from the paper),  
# because our data is imbalanced
uar = make_scorer(recall_score, average='macro')

model_pipeline_cv = GridSearchCV(
    estimator= model_pipeline, #use the model
    param_grid=param_grid, # generate combinations from the parameter grid
    scoring=uar, #UAR to pick the best params
    n_jobs=4, # use these many cores for faster parallel processing
    cv=5, # k fold cv
    refit=True, # refit the best parameters on all of the data
    verbose=50, #give detailed progress
    iid=True)

model_pipeline_cv.fit(X_train_all, y_train_all)

KeyboardInterrupt: 

In [45]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=False):
    
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()
    
    
def reporting(y_test,y_pred):
    cm = confusion_matrix(y_test, y_pred)
    plot_confusion_matrix(cm, target_names=np.unique(y_train))
    print(confusion_matrix)
    print("\n",classification_report(y_test,y_pred))
    print(accuracy_score(y_test,y_pred))

In [None]:
y_pred = model_pipeline.predict(X_test)

In [None]:
reporting(y_test,y_pred)

In [None]:
classification_report(y_test,y_pred)