# Set the scene

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, auc, average_precision_score, balanced_accuracy_score, accuracy_score
import DataPreparation.Prepare_Ultra_Data as DP
import Classification.shallow_classifiers as shallow

# Fetching the UltraSuit Dataset

In [None]:
!mkdir -p ../Datasets/UltraSuite/

In [None]:
!rsync -av ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/labels-uxtd-uxssd-upx .      

In [None]:
!rsync -av --include="*/" --include="*.wav" --exclude="*" ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/core-uxtd ../Datasets/UltraSuite/
!rsync -av --include="*/" --include="*.wav" --exclude="*" ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/core-uxssd ../Datasets/UltraSuite/
!rsync -av --include="*/" --include="*.wav" --exclude="*" ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/core-upx ../Datasets/UltraSuite/        

# Extract features

UXTD Dataset (Typically Development 58 Childern)

In [None]:
#Get the start and end time of each CHILD segment from the speaker diarization TextGrid files of each dataset
#You can specify which session and task that will be included
uxtd_dWaves_Segments = DP.Select_Data('../Datasets/UltraSuite/core-uxtd', '../Datasets/UltraSuite/labels-uxtd-uxssd-upx/uxtd/speaker_labels/TG/', Sessions='', Tasks=['A','B','C'])
#dump the segments to text file
DP.Write_Wave_Segments_To_File(uxtd_dWaves_Segments,'uxtd_segments.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('uxtd_segments.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='uxtd_output_GeMAPs.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('uxtd_segments.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='uxtd_output_eGeMAPs.csv')

UXSSD Dataset (8 SSD children)

In [None]:
#Get the start and end time of each CHILD segment from the speaker diarization TextGrid files of each dataset
#You can specify which session and task that will be included
uxssd_dWaves_Segments = DP.Select_Data('../Datasets/UltraSuite/core-uxssd', '../Datasets/UltraSuite/labels-uxtd-uxssd-upx/uxssd/speaker_labels/TG/', Sessions='BL1', Tasks=['A','B','C'])
#dump the segments to text file
DP.Write_Wave_Segments_To_File(uxssd_dWaves_Segments,'uxssd_segments.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('uxssd_segments.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='uxssd_output_GeMAPs.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('uxssd_segments.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='uxssd_output_eGeMAPs.csv')

UPX Dataset (20 SSD children)

In [None]:
#Get the start and end time of each CHILD segment from the speaker diarization TextGrid files of each dataset
#You can specify which session and task that will be included
upx_dWaves_Segments = DP.Select_Data('../Datasets/UltraSuite/core-upx', '../Datasets/UltraSuite/labels-uxtd-uxssd-upx/upx/speaker_labels/TG/', Sessions='BL1', Tasks=['A','B','C'])
#dump the segments to text file
DP.Write_Wave_Segments_To_File(upx_dWaves_Segments,'upx_segments.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('upx_segments.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='upx_output_GeMAPs.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('upx_segments.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='upx_output_eGeMAPs.csv')

# Building SVM Model

Prepare training/validating

In [9]:
#Import data to pandas DataFram
#GeMAPs
uxtd_data = pd.read_csv('data/uxtd_output_GeMAPs.csv',sep=';')
uxssd_data = pd.read_csv('data/uxssd_output_GeMAPs.csv',sep=';')
upx_data = pd.read_csv('data/upx_output_GeMAPs.csv',sep=';')

In [None]:
#eGeMAPs
#uxtd_data = pd.read_csv('uxtd_output_eGeMAPs.csv',sep=';')
#uxssd_data = pd.read_csv('uxssd_output_eGeMAPs.csv',sep=';')
#upx_data = pd.read_csv('upx_output_eGeMAPs.csv',sep=';')

In [10]:
#Create CV partitioning based on the split_file.csv, the function also convert the pandas DataFram to numpy arrays
#Return X and y and the CV partitions in addition to a string array with the speaker of each sample
X, y, Spkrs, CV = DP.Split_Wavs_Train_Test_From_Speaker_List([('UXTD',uxtd_data),('UXSSD',uxssd_data),('UPX',upx_data)], 'data/split_file.csv',nDim=62) #nDim=88 for eGeMAPs

4 250000.0
UXTD 1 0 0
UXSSD 1 8 4399
UPX 1 20 8708
0 4287;9758 482;3349
1 3995;9895 774;3212
2 4365;10298 404;2809
3 4066;9370 703;3737


In [None]:
#Apply Feature selection
estimator = SVC(kernel='linear',C=10)
selector = shallow.Feature_Selection(estimator,X,y,cv=CV)
X = selector.transform(X)

In [None]:
#This function run GridSearch on multiple shallow classifiers (curently only SVM), and search over range of values for different parameters
#This function also perform normalization (currently MaxMin normalization with range [0,1])
#The function fit the normalization on the training part of the current CV partion and apply on the validation part
#The function use UAR (unweighted Average Recall) as its metrics (balanced_accuracy_score)
aTrainedModels = shallow.GridSearchShallow(X,y,CV,bSave_Model=True,prefix='_CV_GeMAPs_BL1BL2_SVM_Balanced',verbose=5,n_jobs=1)

Fitting 4 folds for each of 78 candidates, totalling 312 fits
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  SVM__C=100, SVM__kernel=linear, score=0.678246990183482, total= 5.5min
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  5.6min remaining:    0.0s


[CV]  SVM__C=100, SVM__kernel=linear, score=0.6743453972667097, total= 5.0min
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 10.6min remaining:    0.0s


[CV]  SVM__C=100, SVM__kernel=linear, score=0.7695133922434607, total= 6.2min
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 17.0min remaining:    0.0s


[CV]  SVM__C=100, SVM__kernel=linear, score=0.7028646676900976, total= 4.4min
[CV] SVM__C=10, SVM__kernel=linear ...................................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 21.5min remaining:    0.0s


[CV]  SVM__C=10, SVM__kernel=linear, score=0.6778068389771394, total=  51.3s
[CV] SVM__C=10, SVM__kernel=linear ...................................
[CV]  SVM__C=10, SVM__kernel=linear, score=0.6727887347511432, total=  50.7s
[CV] SVM__C=10, SVM__kernel=linear ...................................
[CV]  SVM__C=10, SVM__kernel=linear, score=0.7680977691930817, total=  59.1s
[CV] SVM__C=10, SVM__kernel=linear ...................................
[CV]  SVM__C=10, SVM__kernel=linear, score=0.7038434995704407, total=  46.6s
[CV] SVM__C=1, SVM__kernel=linear ....................................
[CV]  SVM__C=1, SVM__kernel=linear, score=0.674687062094463, total=  16.9s
[CV] SVM__C=1, SVM__kernel=linear ....................................
[CV]  SVM__C=1, SVM__kernel=linear, score=0.6751937180019372, total=  17.0s
[CV] SVM__C=1, SVM__kernel=linear ....................................
[CV]  SVM__C=1, SVM__kernel=linear, score=0.769157393667455, total=  19.3s
[CV] SVM__C=1, SVM__kernel=linear ......

In [7]:
#Calculate different metrics on both segment and speaker levels of a specific estimator and return reference and predict labels on both segment and speaker level (if SpkrLevel != 'None') otherwise only the segment level label will be returned.
#The desired metrics could be passed to the function via the scorers argument.
#SpkrLevel can take 3 values: - None: where no speaker level score will be calculated
#                             - average: the features of each speaker will be averaged and then predicted as one sample.
#                             - major: each segment will be predicted and then if the majority of segments predicted as TD the speaker will be predicted as TD or SSD otherwise.
y_ref, y_predict_trans, y_ref_spk, y_predict_spk = shallow.Score_CV(aTrainedModels[0].best_estimator_,X,y,cv=CV,aSpeaker_List=np.asarray(Spkrs,dtype=str), SpkrLevel='major', scorers=[('balanced_accuracy_score',balanced_accuracy_score),('accuracy_score', accuracy_score),('confusion_matrix',confusion_matrix)])

01F_UPX [ 15 209] 1 1 1
02F_UPX [ 69 220] 1 1 1
03F_UPX [ 3 88] 1 1 1
03F_UXSSD [371 121] 0 1 1
06M_UXSSD [ 88 286] 1 1 1
07F_UXSSD [ 22 179] 1 1 1
18F_UPX [ 17 162] 1 1 1
18F_UXTD [60  3] 0 0 0
24F_UXTD [139   8] 0 0 0
26F_UXTD [102  19] 0 0 0
33F_UXTD [36 12] 0 0 0
36M_UXTD [27  6] 0 0 0
41F_UXTD [28 12] 0 0 0
48F_UXTD [22  8] 0 0 0
01M_UXSSD [137  34] 0 1 1
05M_UPX [ 21 175] 1 1 1
05M_UXSSD [106 300] 1 1 1
05M_UXTD [126   7] 0 0 0
06M_UPX [ 16 201] 1 1 1
08M_UXTD [117  15] 0 0 0
11M_UPX [ 54 159] 1 1 1
11M_UXTD [110  23] 0 0 0
12M_UPX [ 87 338] 1 1 1
12M_UXTD [97 43] 0 0 0
15M_UPX [251 100] 0 1 1
17M_UXTD [114  39] 0 0 0
37M_UXTD [32 10] 0 0 0
38M_UXTD [34  7] 0 0 0
04M_UPX [236  60] 0 1 1
07M_UPX [ 97 150] 1 1 1
08M_UXSSD [154  70] 0 1 1
09M_UPX [ 27 170] 1 1 1
13M_UPX [ 30 285] 1 1 1
14M_UPX [ 13 104] 1 1 1
15M_UXTD [47  3] 0 0 0
16M_UPX [ 64 259] 1 1 1
22M_UXTD [133   7] 0 0 0
35M_UXTD [29  3] 0 0 0
42M_UXTD [27  9] 0 0 0
51M_UXTD [38  5] 0 0 0
55M_UXTD [23  3] 0 0 0
56M_UXTD [62

In [8]:
aTrainedModels[0].best_estimator_

Pipeline(memory=None,
     steps=[('scaler', MinMaxScaler(copy=True, feature_range=(-3, 3))), ('SVM', SVC(C=100, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])