In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install dataprep

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l1, l2
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ReduceLROnPlateau
from dataprep.eda import plot

In [None]:
data = pd.read_csv('../input/voicegender/voice.csv')
data.head()

In [None]:
plot(data)

In [None]:
y = data['label'].copy()
y = np.array(y)

In [None]:
y = np.where(y == 'male', 0, y)
y = np.where(y == 'female', 1, y)
y = np.asarray(y).astype(np.float32)

In [None]:
x = data.drop('label', axis=1).copy()
x = np.array(x, dtype='float32')

In [None]:
(trainX, testX, trainY, testY) = train_test_split(x, y, test_size=0.25, random_state=42)

In [None]:
regul = [0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001]
all_train, all_test = list(), list()
models = list()
INIT_LR = 0.0002
EPOCHS = 150
BATCH = 8

In [None]:
for param in regul:

    model = Sequential()
    model.add(tf.keras.layers.Dense(input_shape=(trainX.shape[1],), units=256, activation="relu", kernel_regularizer=l2(param)))
    model.add(tf.keras.layers.Dense(128, activation="relu", kernel_regularizer=l2(param)))
    model.add(tf.keras.layers.Dense(64, activation="relu", kernel_regularizer=l2(param)))
    model.add(tf.keras.layers.Dense(64, activation="relu", kernel_regularizer=l2(param)))
    model.add(tf.keras.layers.Dense(32, activation="relu", kernel_regularizer=l2(param)))
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

    print(f"[INFO] TRAINING NETWORK : {param}")
    opt = Adam(lr=INIT_LR)
    model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
    callbacks = [ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=0.00001)] 
    history_model = model.fit(trainX, trainY, epochs=EPOCHS, batch_size=BATCH, callbacks=callbacks, verbose=1)
    models.append(model)
    # evaluate the model using train and test data
    _, train_acc = model.evaluate(trainX, trainY, verbose=0)
    _, test_acc = model.evaluate(testX, testY, verbose=0)
    print('Param: %f, Train: %.3f, Test: %.3f' % (param, train_acc, test_acc))
    all_train.append(train_acc)
    all_test.append(test_acc)

In [None]:
plt.semilogx(regul, all_train, label='train', marker='o')
plt.semilogx(regul, all_test, label='test', marker='o')
plt.legend()
plt.show()

In [None]:
all_train_array = np.array(all_train)
all_test_array = np.array(all_test)
accuracy_frame = pd.DataFrame({
                        'regularization' : regul,
                        'accuracy_on_train': all_train_array*100, 
                         'accuracy_on_test': all_test_array*100,
                         'accuracy_difference': abs((all_train_array - all_test_array)*100)
                              })
acc=accuracy_frame.sort_values(by=['accuracy_difference'])
acc

In [None]:
predict = (models[1].predict(testX) > 0.5).astype("int32")
print(classification_report(testY,predict))
print(confusion_matrix(testY, predict))

In [None]:
print('Accuracy:',(accuracy_score(testY, predict))*100)

In [None]:
# DNN WITH DROPOUT

In [None]:
all_train, all_test = list(), list()
models = list()
parameters = [0.1, 0.2, 0.3, 0.4, 0.5]
for i in parameters:
    model = Sequential()
    model.add(Dense(input_shape=(trainX.shape[1],), units=256, 
                                activation="relu"))
    model.add(Dropout(i))
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(i))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))

    print(f"[INFO] TRAINING NETWORK {i}")
    opt = Adam(lr=INIT_LR)
    model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])
    callbacks = [ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=0.00001)] 
    history_model = model.fit(trainX, trainY, epochs=EPOCHS, batch_size=BATCH, callbacks=callbacks, verbose=1)
    models.append(model)
    # evaluate the model using train and test data
    _, train_acc = model.evaluate(trainX, trainY, verbose=0)
    _, test_acc = model.evaluate(testX, testY, verbose=0)
    print('Param: %f, Train: %.3f, Test: %.3f' % (param, train_acc, test_acc))
    all_train.append(train_acc)
    all_test.append(test_acc)

In [None]:
plt.semilogx(parameters, all_train, label='train', marker='o')
plt.semilogx(parameters, all_test, label='test', marker='o')
plt.legend()
plt.show()

In [None]:
all_train_array = np.array(all_train)
all_test_array = np.array(all_test)
accuracy_frame = pd.DataFrame({
                        'parameter' : parameters,
                        'accuracy_on_train': all_train_array*100, 
                         'accuracy_on_test': all_test_array*100,
                         'accuracy_difference': abs((all_train_array - all_test_array)*100)
                              })
acc = accuracy_frame.sort_values(by=['accuracy_difference'])
acc.head()

In [None]:
predict = (models[3].predict(testX) > 0.5).astype("int32")
print(classification_report(testY,predict))
print(confusion_matrix(testY, predict))

In [None]:
print('Accuracy:',(accuracy_score(testY, predict))*100)