In [None]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, roc_curve, auc
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

# ***Gender Detection***

In [None]:
df = pd.read_csv('../input/gender-age-and-emotion-detection-from-voice/cleaned_gender.csv')

In [None]:
df.head()

In [None]:
df=df.drop([df.columns[0]],axis=1)
df.columns

In [None]:
print(df.isnull().sum())

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.label.value_counts()

In [None]:
print(df.info())

In [None]:
dict = {'label':{'male':1,'female':0}}      
df.replace(dict,inplace = True)           
X = df.loc[:, df.columns != 'label']
y = df.loc[:,'label']

In [None]:
corr_matrix=df.corr()
plt.figure(figsize=(15,10))
sns.heatmap(corr_matrix,
           annot=True,
           linewidths=0.5,
           fmt=".2f",
           cmap="YlGnBu")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
model_acc = []

In [None]:
svm_c = 1000
svm_gamma = 0.01

dtc_md = 5

rfc_M = 8
rfc_d = 8
rfc_m = 6

In [None]:
model_acc = []

In [None]:
SVM_model = SVC(C=svm_c, gamma=svm_gamma).fit(X_train, y_train)
train_acc = SVM_model.score(X_train, y_train)
test_acc = SVM_model.score(X_test, y_test)
m = 'SVM'
model_acc.append([m, train_acc, test_acc])

DTC_model = DecisionTreeClassifier(max_depth=dtc_md).fit(X_train, y_train)
train_acc = DTC_model.score(X_train, y_train)
test_acc = DTC_model.score(X_test, y_test)
m = 'Decision Tree Classifier'
model_acc.append([m, train_acc, test_acc])

RFC_model = RandomForestClassifier(n_estimators=rfc_M, max_features=rfc_d, max_depth=rfc_m, random_state=0).fit(X_train, y_train)
train_acc = RFC_model.score(X_train, y_train)
test_acc = RFC_model.score(X_test, y_test)
m = 'Random Forest Classifier'
model_acc.append([m, train_acc, test_acc])

In [None]:
print("Feature importance of Decision Tree Classifier: ")
np.array([X.columns.values.tolist(), list(DTC_model.feature_importances_)]).T

In [None]:
importances = DTC_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12,8))
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), df.columns[indices])
plt.title('Feature Importance')

In [None]:
print("Feature importance of Random Forest Classifier: ")
np.array([X.columns.values.tolist(), list(RFC_model.feature_importances_)]).T

In [None]:
importances = RFC_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12,8))
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), df.columns[indices])
plt.title('Feature Importance')

In [None]:
result = pd.DataFrame(model_acc, columns=['Model', 'Training Accuracy', 'Validation Accuracy'])
result[['Model', 'Training Accuracy', 'Validation Accuracy']]

In [None]:
df_new = df[['meanfun', 'sd', 'Q25', 'IQR','mode','median','label']]
df_new.head()

In [None]:
X = df_new.loc[:, df_new.columns != 'label']
y = df_new.loc[:,'label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
feat_rf = RandomForestClassifier(n_estimators=100, max_depth=50, random_state=0)
feat_his = feat_rf.fit(X_train,y_train)

In [None]:
print(feat_his.score(X_train,y_train))
print(feat_his.score(X_test,y_test))

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC

anova_filter = SelectKBest(f_classif, k=6)
clf = RandomForestClassifier(n_estimators=rfc_M, max_features=6, max_depth=rfc_m, random_state=0)
anova_svm = make_pipeline(anova_filter, clf)
anova_svm.fit(X_train, y_train)

In [None]:
print(anova_svm.score(X_train,y_train))
print(anova_svm.score(X_test,y_test))

# ***Age Detection***

In [None]:
df = pd.read_csv('../input/gender-age-and-emotion-detection-from-voice/cleaned_age.csv')

In [None]:
df.head()

In [None]:
df=df.drop([df.columns[0], df.columns[1]],axis=1)
df.columns

In [None]:
print(df.isnull().sum())

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.label.value_counts()

In [None]:
print(df.info())

In [None]:
dict = {'label':{'old':2,'matured':1,'young':0}}      
df.replace(dict,inplace = True)           
X = df.loc[:, df.columns != 'label']
y = df.loc[:,'label']

In [None]:
corr_matrix=df.corr()
plt.figure(figsize=(15,10))
sns.heatmap(corr_matrix,
           annot=True,
           linewidths=0.5,
           fmt=".2f",
           cmap="YlGnBu")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
model_acc = []

In [None]:
svm_c = 10
svm_gamma = 0.1

dtc_md = 8

rfc_M = 14
rfc_d = 14
rfc_m = 16

In [None]:
SVM_model = SVC(C=svm_c, gamma=svm_gamma).fit(X_train, y_train)
train_acc = SVM_model.score(X_train, y_train)
test_acc = SVM_model.score(X_test, y_test)
m = 'SVM'
model_acc.append([m, train_acc, test_acc])

DTC_model = DecisionTreeClassifier(max_depth=dtc_md).fit(X_train, y_train)
train_acc = DTC_model.score(X_train, y_train)
test_acc = DTC_model.score(X_test, y_test)
m = 'Decision Tree Classifier'
model_acc.append([m, train_acc, test_acc])

RFC_model = RandomForestClassifier(n_estimators=rfc_M, max_features=rfc_d, max_depth=rfc_m, random_state=0).fit(X_train, y_train)
train_acc = RFC_model.score(X_train, y_train)
test_acc = RFC_model.score(X_test, y_test)
m = 'Random Forest Classifier'
model_acc.append([m, train_acc, test_acc])

In [None]:
print("Feature importance: ")
np.array([X.columns.values.tolist(), list(DTC_model.feature_importances_)]).T

In [None]:
importances = DTC_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12,8))
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), df.columns[indices])
plt.title('Feature Importance')

In [None]:
print("Feature importance: ")
np.array([X.columns.values.tolist(), list(RFC_model.feature_importances_)]).T

In [None]:
importances = RFC_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12,8))
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), df.columns[indices])
plt.title('Feature Importance')

In [None]:
result = pd.DataFrame(model_acc, columns=['Model', 'Training Accuracy', 'Validation Accuracy'])
result[['Model', 'Training Accuracy', 'Validation Accuracy']]

# ***Emotion Detection***

In [None]:
df = pd.read_csv('../input/gender-age-and-emotion-detection-from-voice/cleaned_emotion.csv')

In [None]:
df.head()

In [None]:
df=df.drop([df.columns[0], df.columns[1], df.columns[2]],axis=1)
df.columns

In [None]:
print(df.isnull().sum())

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.label.value_counts()

In [None]:
print(df.info())

In [None]:
dict = {'label':{'sad':0,'angry':1, 'happy':2}}      
df.replace(dict,inplace = True)           
X = df.loc[:, df.columns != 'label']
y = df.loc[:,'label']

In [None]:
corr_matrix=df.corr()
plt.figure(figsize=(15,10))
sns.heatmap(corr_matrix,
           annot=True,
           linewidths=0.5,
           fmt=".2f",
           cmap="YlGnBu")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
model_acc = []

In [None]:
svm_c = 1000
svm_gamma = 0.01

dtc_md = 4

rfc_M = 14
rfc_d = 8
rfc_m = 16

In [None]:
SVM_model = SVC(C=svm_c, gamma=svm_gamma).fit(X_train, y_train)
train_acc = SVM_model.score(X_train, y_train)
test_acc = SVM_model.score(X_test, y_test)
m = 'SVM'
model_acc.append([m, train_acc, test_acc])

DTC_model = DecisionTreeClassifier(max_depth=dtc_md).fit(X_train, y_train)
train_acc = DTC_model.score(X_train, y_train)
test_acc = DTC_model.score(X_test, y_test)
m = 'Decision Tree Classifier'
model_acc.append([m, train_acc, test_acc])

RFC_model = RandomForestClassifier(n_estimators=rfc_M, max_features=rfc_d, max_depth=rfc_m, random_state=0).fit(X_train, y_train)
train_acc = RFC_model.score(X_train, y_train)
test_acc = RFC_model.score(X_test, y_test)
m = 'Random Forest Classifier'
model_acc.append([m, train_acc, test_acc])

In [None]:
print("Feature importance: ")
np.array([X.columns.values.tolist(), list(DTC_model.feature_importances_)]).T

In [None]:
importances = DTC_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12,8))
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), df.columns[indices])
plt.title('Feature Importance')

In [None]:
print("Feature importance: ")
np.array([X.columns.values.tolist(), list(RFC_model.feature_importances_)]).T

In [None]:
importances = RFC_model.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(12,8))
plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), df.columns[indices])
plt.title('Feature Importance')

In [None]:
result = pd.DataFrame(model_acc, columns=['Model', 'Training Accuracy', 'Validation Accuracy'])
result[['Model', 'Training Accuracy', 'Validation Accuracy']]