# Set the scene

In [None]:
from importlib import reload
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, auc, average_precision_score, balanced_accuracy_score, accuracy_score
import DataPreparation.Prepare_Ultra_Data as DP
import DataPreparation.Prepare_OREGON_Data as DP_OGI
import Classification.shallow_classifiers as shallow

In [None]:
reload(DP)

In [None]:
uxtd_data.iloc[0,0][-4:-1]

# Fetching the UltraSuit Dataset

In [None]:
!mkdir -p ../Datasets/UltraSuite/

In [None]:
!rsync -av ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/labels-uxtd-uxssd-upx .      

In [None]:
!rsync -av --include="*/" --include="*.wav" --exclude="*" ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/core-uxtd ../Datasets/UltraSuite/
!rsync -av --include="*/" --include="*.wav" --exclude="*" ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/core-uxssd ../Datasets/UltraSuite/
!rsync -av --include="*/" --include="*.wav" --exclude="*" ultrasuite-rsync.inf.ed.ac.uk::ultrasuite/core-upx ../Datasets/UltraSuite/        

The OREGON (OGI) Kids Dataset
https://catalog.ldc.upenn.edu/LDC2007S18

# Extract features

UXTD Dataset (Typically Development 58 Childern)

In [None]:
#Get the start and end time of each CHILD segment from the speaker diarization TextGrid files of each dataset
#You can specify which session and task that will be included
uxtd_dWaves_Segments = DP.Select_Data('../Datasets/UltraSuite/core-uxtd', '../Datasets/UltraSuite/labels-uxtd-uxssd-upx/uxtd/speaker_labels/TG/', Sessions='', Tasks=['A','B','C'])
#dump the segments to text file
DP.Write_Wave_Segments_To_File(uxtd_dWaves_Segments,'data/uxtd_segments_0.5_16.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('data/uxtd_segments_0.5_16.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='uxtd_output_GeMAPs_0.5_16.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('data/uxtd_segments_0.5_16.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='uxtd_output_eGeMAPs_0.5_16.csv')

UXSSD Dataset (8 SSD children)

In [None]:
#Get the start and end time of each CHILD segment from the speaker diarization TextGrid files of each dataset
#You can specify which session and task that will be included
uxssd_dWaves_Segments = DP.Select_Data('../Datasets/UltraSuite/core-uxssd', '../Datasets/UltraSuite/labels-uxtd-uxssd-upx/uxssd/speaker_labels/TG/', Sessions='BL1,BL2', Tasks=['A','B','C'])
#dump the segments to text file
DP.Write_Wave_Segments_To_File(uxssd_dWaves_Segments,'data/uxssd_segments_0.5_16.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('data/uxssd_segments_0.5_16.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='uxssd_output_GeMAPs_0.5_16.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('data/uxssd_segments_0.5_16.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='uxssd_output_eGeMAPs_0.5_16.csv')

UPX Dataset (20 SSD children)

In [None]:
#Get the start and end time of each CHILD segment from the speaker diarization TextGrid files of each dataset
#You can specify which session and task that will be included
upx_dWaves_Segments = DP.Select_Data('../Datasets/UltraSuite/core-upx', '../Datasets/UltraSuite/labels-uxtd-uxssd-upx/upx/speaker_labels/TG/', Sessions='BL1,BL2', Tasks=['A','B','C'])
#dump the segments to text file
DP.Write_Wave_Segments_To_File(upx_dWaves_Segments,'data/upx_segments_0.5_16.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('data/upx_segments_0.5_16.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='upx_output_GeMAPs_0.5_16.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('data/upx_segments_0.5_16.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='upx_output_eGeMAPs_0.5_16.csv')

In [None]:
#For the OREGON dataset:
# 1- Select speech files marked as 1 (No Noise and the exact pronpt word exist)
# 2- Convert to wav with header using sox
# 3- Apply the LSTM VAD implemented in openSMILE
# Return segments of each wav file
OGI_dWaves_Segments = DP_OGI.Select_Data_OGI('../Datasets/OREGON_Kids_Corpus/',bConvertWav=True,bVAD=True)
#dump the segments to text file
DP.Write_Wave_Segments_To_File(OGI_dWaves_Segments,'data/OGI_segments_0.5.csv')
#Run SMILExtract on each segment to extract the desired features
#Features defined by the config file passed to the function
## GeMAPs (62 features per segment)
DP.Extract_Features_openSmile('data/OGI_segments_0.5.csv',sConfig_File='openSmile/config/gemaps/GeMAPSv01a.conf',sSegment_Level_csv_File='data/OGI_output_GeMAPs_0.5.csv')
## eGeMAPs (88 features per segment)
DP.Extract_Features_openSmile('data/OGI_segments_0.5.csv',sConfig_File='openSmile/config/gemaps/eGeMAPSv01a.conf',sSegment_Level_csv_File='data/OGI_output_eGeMAPs_0.5.csv')

# Building SVM Model

Prepare training/validating

In [None]:
#Import data to pandas DataFram
#GeMAPs
#uxtd_data = pd.read_csv('data/uxtd_output_GeMAPs_0.5.csv',sep=';')
#uxssd_data = pd.read_csv('data/uxssd_output_GeMAPs_0.5.csv',sep=';')
#upx_data = pd.read_csv('data/upx_output_GeMAPs_0.5.csv',sep=';')

In [None]:
#eGeMAPs
uxtd_data = pd.read_csv('data/uxtd_output_GeMAPs_0.5_16.csv',sep=';')
uxssd_data = pd.read_csv('data/uxssd_output_GeMAPs_0.5_16.csv',sep=';')
upx_data = pd.read_csv('data/upx_output_GeMAPs_0.5_16.csv',sep=';')
OGI_data = pd.read_csv('data/OGI_output_GeMAPs_0.5.csv',sep=';')

In [None]:
#Create CV partitioning based on the split_file.csv, the function also convert the pandas DataFram to numpy arrays
#Return X and y and the CV partitions in addition to a string array with the speaker of each sample
X, y, Spkrs, CV = DP.Split_Wavs_Train_Test_From_Speaker_List([('UXTD',uxtd_data),('UXSSD',uxssd_data),('UPX',upx_data)],'data/split_file.csv',bRemoveOutliers=True, nDim=62) #nDim=62 for GeMAPs, nDim=88 for eGeMAPs

In [None]:
#Apply Feature selection
estimator = SVC(kernel='linear',C=10)
selector = shallow.Feature_Selection(estimator,X,y,verbose=5,cv=CV)
X = selector.transform(X)

In [11]:
#This function run GridSearch on multiple shallow classifiers (curently only SVM), and search over range of values for different parameters
#This function also perform normalization (currently MaxMin normalization with range [0,1])
#The function fit the normalization on the training part of the current CV partion and apply on the validation part
#The function use UAR (unweighted Average Recall) as its metrics (balanced_accuracy_score)
aTrainedModels = shallow.GridSearchShallow(X,y,CV,bSave_Model=True,prefix='_CV_GeMAPs_BL1BL2_SVM_0.5_Balanced_NoOutliers',verbose=5,n_jobs=1)

Fitting 4 folds for each of 78 candidates, totalling 312 fits
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  SVM__C=100, SVM__kernel=linear, score=0.7517896933968917, total= 1.1min
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.1min remaining:    0.0s


[CV]  SVM__C=100, SVM__kernel=linear, score=0.7992822047702923, total= 1.2min
[CV] SVM__C=100, SVM__kernel=linear ..................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  2.3min remaining:    0.0s


KeyboardInterrupt: 

In [None]:
#Calculate different metrics on both segment and speaker levels of a specific estimator and return reference and predict labels on both segment and speaker level (if SpkrLevel != 'None') otherwise only the segment level label will be returned.
#The desired metrics could be passed to the function via the scorers argument.
#SpkrLevel can take 3 values: - None: where no speaker level score will be calculated
#                             - average: the features of each speaker will be averaged and then predicted as one sample.
#                             - major: each segment will be predicted and then if the majority of segments predicted as TD the speaker will be predicted as TD or SSD otherwise.
y_ref, y_predict_trans, y_ref_spk, y_predict_spk = shallow.Score_CV(aTrainedModels[0].best_estimator_,X,y,cv=CV,aSpeaker_List=np.asarray(Spkrs,dtype=str), SpkrLevel='major', scorers=[('balanced_accuracy_score',balanced_accuracy_score),('accuracy_score', accuracy_score),('confusion_matrix',confusion_matrix)])

In [None]:
aTrainedModels[0].best_estimator_

In [None]:
pdSplitData = pd.read_csv('data/split_file.csv',sep=',')

In [None]:
sp = pdSplitData.loc[pdSplitData['Exclude']==1, 'SpkID']

In [None]:
X.shape

In [None]:
np.where(y==0)[0].shape

In [None]:
y[CV[0][0]].shape

In [None]:
scaler_uxtd = MinMaxScaler()
clf_uxtd = SVC(kernel='linear',C=10)
scaler_OGI = MinMaxScaler()
clf_OGI = SVC(kernel='linear',C=10)

In [None]:
X_uxtd.shape

In [None]:
part_uxtd = CV_uxtd[0]
part_OGI = CV[2]
X_uxtd_train = scaler_uxtd.fit_transform(X_uxtd[part_uxtd[0]])
X_uxtd_test = scaler_uxtd.transform(X_uxtd[part_uxtd[1]])
y_uxtd_train = y_uxtd[part_uxtd[0]]
y_uxtd_test = y_uxtd[part_uxtd[1]]
X_OGI_train = scaler_OGI.fit_transform(X[part_OGI[0]])
X_OGI_test = scaler_OGI.transform(X[part_OGI[1]])
y_OGI_train = y[part_OGI[0]]
y_OGI_test = y[part_OGI[1]]

In [None]:
clf_uxtd.fit(X_uxtd_train,y_uxtd_train)

In [None]:
clf_uxtd.score(X_uxtd_train,y_uxtd_train)

In [None]:
clf_uxtd.score(X_uxtd_test,y_uxtd_test)

In [None]:
clf_OGI.fit(X_OGI_train[:,0:12],y_OGI_train)

In [None]:
clf_OGI.score(X_OGI_train[:,0:12],y_OGI_train)

In [None]:
y_OGI_p = clf_OGI.predict(X_OGI_test[:,0:12])
print(balanced_accuracy_score(y_OGI_test,y_OGI_p))
print(confusion_matrix(y_OGI_test,y_OGI_p))

In [None]:
balanced_accuracy_score(y_OGI_test,y_OGI_p)