In [None]:
import librosa
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

In [1]:
def feature_extractor(file):
    audio_data, audio_sample_rate = librosa.load(file, duration=4.0, offset=0, sr=16000)
    padding = 64000 - len(audio_data)
    audio_data = np.pad(audio_data, (0, padding), mode='constant')
    mfccs_features = librosa.feature.mfcc(y=audio_data, sr=audio_sample_rate, n_mfcc=64)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    return mfccs_scaled_features

def load_metadata(file_path):
    return pd.read_csv(file_path)

In [None]:
AUDIO_DATASET_PATH = '/Users/Saeedeh/Downloads'
METADATA_FILE = '/Users/Saeedeh/Downloads/dsl_data/development.csv'

# Load metadata
metadata = load_metadata(METADATA_FILE)

metadata["action_object"] = metadata['action'].astype(str) +""+ metadata["object"]
extra_features = metadata[['First Language spoken','Current language used for work/school','gender','ageRange']]
class_labels = metadata[['action_object']]

metadata.head()

In [None]:
# Feature extraction loop

tags_dict_post = {}
for index, row in metadata.iterrows():
    filename = os.path.join(os.path.abspath(audio_dataset_path),str(row["path"]))
    resdict = feature_extractor(filename)
    tags_dict_post[index] = resdict
tags_dict_post

In [None]:
fin1 = []
for i in tags_dict_post.keys():
  tup= (i,tags_dict_post.get(i))
  fin1.append(tup)
len(fin1)

In [None]:
extracted_featurs_64 = pd.DataFrame()

for i in range(len(fin1)) : 
  df = pd.DataFrame(fin1[i][1])
  df = df.transpose()
  extracted_featurs_64 = extracted_featurs_64.append(df)

extracted_featurs_64 = extracted_featurs_64.reset_index()
extracted_featurs_64 = extracted_featurs_64.drop(['index'], axis =1)

In [None]:

feaures64 = pd.concat([extracted_featurs_64,extra_features,class_labels],axis=1)
feaures64.head()

In [None]:
# Data preprocessing: label encoding and normalization
label_encoder = preprocessing.LabelEncoder()
feaures64['action_object'] = label_encoder.fit_transform(feaures64['action_object'])
feaures64['Current language used for work/school'] = label_encoder.fit_transform(feaures64['Current language used for work/school'])
feaures64['First Language spoken'] = label_encoder.fit_transform(feaures64['First Language spoken'])
feaures64['gender'] = label_encoder.fit_transform(feaures64['gender'])
feaures64['ageRange'] = label_encoder.fit_transform(feaures64['ageRange'])


X = feaures64.drop(['action_object'],axis=1)
y = feaures64['action_object'] 

cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)

X = pd.DataFrame(np_scaled, columns = cols)

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=111)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# Model assessment function
def model_assess(model, title="Default"):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print('Accuracy', title, ':', round(accuracy_score(y_test, preds), 5))

In [None]:
# Random Forest
forest = RandomForestClassifier()
grid_rf = {'n_estimators': [100],
           'criterion': ['entropy', 'gini'], 
           'max_depth': [None, 2, 5, 10, 50]}
gs_rf = GridSearchCV(forest, grid_rf, cv=3, n_jobs=-1)
gs_rf.fit(X_train, y_train)
print('Best accuracy: %.3f' % gs_rf.best_score_)
print('\nBest params:\n', gs_rf.best_params_)

In [None]:
# Support Vector Machine
svm = SVC(decision_function_shape="ovo")
parameters = {'kernel': ['rbf', 'sigmoid', 'poly'],
              'C': [0.1, 1.0, 10.0, 100.0, 1000.0],
              'gamma': [0.001, 0.01, 0.1, 1.0]}
clf = GridSearchCV(estimator=svm, param_grid=parameters, cv=5)
model_assess(clf, "Support Vector Machine")
print("Best : %f using %s" % (clf.best_score_, clf.best_params_))