In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler

import joblib
%matplotlib inline

In [None]:
df = pd.read_csv('../input/gender-classification-dataset/gender_classification_v7.csv')
df.shape

In [None]:
df.isna().sum()

In [None]:
df.head()

In [None]:
df['gender'].unique()

# Frequency of gender

In [None]:
plt.style.use('fivethirtyeight')
sns.countplot(x=df['gender'])
plt.show()

In [None]:
df['gender'].replace(['Male', 'Female'],[1,0],inplace=True)

# **Frequency of 'long_hair', 'nose_wide', 'nose_long' and 'lips_thin'**

In [None]:
cols = ['long_hair', 'nose_wide', 'nose_long', 'lips_thin']
for col in cols:
    plt.style.use('fivethirtyeight')
    plt.title(col)
    sns.countplot(x=df[col])
    plt.show()
    print('\n')

# **Histogram of 'forehead_height_cm' and 'forehead_width_cm'**

In [None]:
cols2 = ['forehead_height_cm','forehead_width_cm']
for col in cols2:
    plt.style.use('fivethirtyeight')
    df[col].plot(kind='hist', rwidth=0.95)
    plt.xlabel(col)
    plt.show()
    print('\n')

# **Male vs Female**

In [None]:
for col in cols2:
    sns.histplot(data=df[[col,'gender']],x=col, hue='gender',element='poly')
    plt.show()

In [None]:
male_data = df[df['gender'] == 1]
female_data = df[df['gender'] == 0]

In [None]:
for col in cols:
    fig, axes = plt.subplots(1, 2, figsize=(15, 5), sharey=True)

    sns.countplot(ax=axes[0], x=male_data[col])
    axes[0].set_title(f'{col} (Male)')

    sns.countplot(ax=axes[1], x=female_data[col])
    axes[1].set_title(f'{col} (Female)')
    plt.show()

# **Feature Scaling**

In [None]:
cols2

In [None]:
scale = MinMaxScaler()
scale.fit(df[cols2])
scalled = scale.fit_transform(df[cols2])

In [None]:
for i in range(2):
    df[cols2[i]] =scalled[:,i]

In [None]:
df.head()

# **Splitting and Training**

In [None]:
x ,y = df.drop('gender', axis=1), df['gender']

In [None]:
x.head()

In [None]:
y.head()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

In [None]:
x_train.shape, x_test.shape

In [None]:
y_train.shape, y_test.shape

# **Model Building and predictions**

In [None]:
model = keras.Sequential([
                          keras.layers.Dense(7, input_shape=(7,), activation='relu'),
                          keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    metrics=['accuracy'],
    loss='binary_crossentropy'
)

In [None]:
model.fit(x_train, y_train, epochs=500)

In [None]:
model.evaluate(x_test, y_test)

In [None]:
y_predict_test = model.predict(x_test).flatten()
y_predict_train = model.predict(x_train).flatten()

In [None]:
def round_up_sigmoid(data):
    data[data >= 0.5] = 1
    data[data < 0.5] = 0
    return data

In [None]:
y_predict_test = round_up_sigmoid(y_predict_test)
y_predict_train = round_up_sigmoid(y_predict_train)

In [None]:
cm_test = confusion_matrix(y_test, y_predict_test) 
cm_train = confusion_matrix(y_train, y_predict_train)

# **Metric Evaluation**

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(cm_test, annot=True, fmt='g', xticklabels=['Female','Male'],yticklabels=['Female','Male'])
plt.title('Test Data')
plt.show()

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(cm_train, annot=True, fmt='g', xticklabels=['Female','Male'],yticklabels=['Female','Male'])
plt.title('Train Data')
plt.show()

# **Classification reports**

In [None]:
print('Test Data report:')
print(classification_report(y_test,y_predict_test))

In [None]:
print('Train Data report')
print(classification_report(y_train,y_predict_train))

# **Saving Model as file**

In [None]:
model.save('Gender-Classifier-Model.h5')

In [None]:
!ls