In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Table of contents
- <a href='#0'>  Dataset Introduction </a> 
- <a href='#1'>1. Importing Libraries and Dataset </a> 
- <a href='#2'>2. Data Analysis </a> 
- <a  href='#3'>3.Distribution Of Age </a> 
- <a href='#4'>4. Ethnicity Frequency  </a> 
- <a href='#5'>5. Gender </a>
- <a href='#6'>6. Sample Images </a>
- <a href='#7'>7.Ethnicity Model </a>
- <a href='#8'>8.Gender Model </a>
- <a href='#9'>9.EndNote </a>

# <a id='0'> Dataset Introduction </a>


Age=Age of the person in the image <br>
Ethnicity=Specifies the ethnicity of the person <br>
Gender=Gender of the person <br>
0=Male   1=Female <br>
Pixels=Array to String of the image pixels <br>

![](https://media.giphy.com/media/26BRNLvVCOmcR3S80/giphy.gif)

# <a id='1'> 1. Importing Libraries and Dataset</a>

In [None]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns 
import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Activation, MaxPool2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
Face_data=pd.read_csv('../input/age-gender-and-ethnicity-face-data-csv/age_gender.csv')
df=Face_data.copy()
df.head(2)

 # <a id='2'> 2.Data Analysis </a>

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.isnull().values.any()

In [None]:
df.isnull().sum()

In [None]:
df.describe().T

In [None]:
df[df.duplicated() == True]

# <a id='3'> 3.Distribution Of Age</a>

In [None]:
fig = go.Figure(data=[go.Histogram(x=df['age'],  # To get Horizontal plot ,change axis 
                                  marker_color="Bisque",
                      xbins=dict(
                      start=0, #start range of bin
                      end=120,  #end range of bin
                      size=10    #size of bin
                      ))])
fig.update_layout(title="Distribution Of Age",xaxis_title="Age",yaxis_title="Counts",title_x=0.5)
fig.show()

In [None]:
df_age=df['age'].value_counts().reset_index().rename(columns={'index':'age','age':'Count'})

fig = go.Figure(go.Bar(
    x=df_age['age'],y=df_age['Count'],
    marker={'color': df_age['Count'], 
    'colorscale': 'Viridis'},  
    text=df_age['Count'],
    textposition = "outside",
))
fig.update_layout(title_text='Age Of Distribution',xaxis_title="Age",yaxis_title=" Count ",title_x=0.5)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(
    y=df['age'],
    boxmean='sd',
    name="Age",
    jitter=0.3,
    pointpos=-1.8,
    boxpoints='outliers', # represent outliers points
    marker_color='rgb(7,40,89)',
    line_color='rgb(7,40,89)'
))
fig.update_layout(title_text='Age Of Distribution',xaxis_title="Age",yaxis_title="Age Count ",title_x=0.5)
fig.show()

In [None]:
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 5)
sns.distplot(df['age'], color = 'cyan')
plt.title('Distribution of Age', fontsize = 20)
plt.show()

In [None]:
print("Avg Age: " + str(df["age"].mean()))
print("Max Age: " + str(df["age"].max()))
print("Min Age: " + str(df["age"].min()))
print("Median Age: " + str(df["age"].median()))


# <a id='4'> 4.Ethnicity  Frequency</a>

In [None]:

df_ethnicity=df['ethnicity'].value_counts().reset_index().rename(columns={'index':'ethnicity','ethnicity':'count'}).sort_values('count',ascending="False")

fig = go.Figure(go.Bar(y=df_ethnicity['ethnicity'], x=df_ethnicity['count'], # Need to revert x and y axis
                      orientation="h")) # default orentation value is "v" - vertical ,we need to change it as orientation="h"
fig.update_layout(title_text=' Ethnicity  Frequency ',xaxis_title="Count",yaxis_title="Ethnicity",title_x=0.5)
fig.show()

In [None]:
df_ethnicity=df['ethnicity'].value_counts().to_frame().reset_index().rename(columns={'index':'ethnicity','ethnicity':'count'})

colors=['cyan','royalblue','blue','darkblue',"darkcyan"]
fig = go.Figure([go.Pie(labels=df_ethnicity['ethnicity'], values=df_ethnicity['count'])])
fig.update_traces(hoverinfo='label+percent', textinfo='percent+value', textfont_size=15,
                 marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.update_layout(title="Ethnicity Distribution",title_x=0.5)
fig.show()

# <a id='5'> 5.Gender  </a>

In [None]:
df_gender=df['gender'].value_counts().to_frame().reset_index().rename(columns={'index':'gender','gender':'count'})


fig = go.Figure([go.Pie(labels=df_gender['gender'], values=df_gender['count'])])

fig.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=12,insidetextorientation='radial')

fig.update_layout(title="Gender Count",title_x=0.5)
fig.show()

In [None]:
ax = sns.countplot(x="gender", data=df)
plt.ylabel('Count')
plt.xlabel('Gender')
plt.title('Gender Count');

In [None]:
df_rece_gender=df.groupby(by =['gender','ethnicity'])['img_name'].count().to_frame().reset_index().rename(columns={'ethnicity':'Ethnicity','img_name':'Count'})

df_rece_gender['gender']=df_rece_gender['gender'].astype('category')




fig = px.bar(df_rece_gender, x="Ethnicity", y="Count",color="gender",barmode="group",
             
             )
fig.update_layout(title_text='Ethnicity with Gender',title_x=0.5)
fig.show()

# <a id='6'> 6.Sample Images  </a>

In [None]:
df_pixels = df.pixels.str.split(" ").tolist() 
df_pixels = pd.DataFrame(df_pixels, dtype=int)
df_images = df_pixels.values
df_images = df_images.astype(np.float)

print(df_images.shape)

In [None]:
def show(img):
    show_image = img.reshape(48,48)
    
    plt.axis('off')
    plt.imshow(show_image, cmap='gray')

In [None]:
show(df_images[1000])

In [None]:
plt.figure(0, figsize=(12,6))
for i in range(1,13):
  plt.subplot(3, 4, i)
  plt.axis('off')

  image = df_images[i+i*2].reshape(48,48)
  plt.imshow(image, cmap='gray')

plt.tight_layout()
plt.show()

# <a id='7'> 7.Ethnicity Model  </a>

In [None]:
df_pixels = df.pixels.str.split(" ").tolist() 
df_pixels = pd.DataFrame(df_pixels, dtype=int)
df_images = df_pixels.values
df_images = df_images.astype(np.float)


In [None]:
y=df['ethnicity']
y.head()

In [None]:
clas_number=y.unique()
clas_number=len(clas_number)
clas_number

In [None]:
X=df_images

#normalizing pixels data
X=X/255


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=35
)

In [None]:
X_train = X_train.reshape(-1, 48, 48, 1)
X_test = X_test.reshape(-1, 48, 48, 1)

In [None]:
print('X_Train:', X_train.shape)
print('X_Test:', X_test.shape)

In [None]:
print('y_Train:', y_train.shape)
print('y_Test:', y_test.shape)

## Model

In [None]:
model = Sequential()

#1. LAYER

model.add(Conv2D(32, 3, data_format="channels_last", kernel_initializer="he_normal", input_shape=(48,48,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))

#2. LAYER

model.add(Conv2D(64, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=2))


### 3. LAYER
model.add(Conv2D(32, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=2))
 

### Full Connection layer
model.add(Flatten())
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))

### Out Layer

model.add(Dense(5))
model.add(Activation('softmax')) 

model.compile(optimizer='rmsprop',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()






In [None]:
epochs = 20
batchSize =100

In [None]:
checkpoint = ModelCheckpoint(filepath='model_ethnicity.h5', monitor="val_accuracy", save_best_only=True, verbose=1)

In [None]:
hist = model.fit(X_train, y_train,
                 epochs=epochs,
                 shuffle=True,
                 batch_size=batchSize,
                 validation_data=(X_test, y_test),
                 callbacks=[checkpoint],
                 verbose=2)

In [None]:
plt.figure(figsize=(14,3))
plt.subplot(1, 2, 1)
plt.suptitle('Traning', fontsize=10)
plt.ylabel('Loss', fontsize=16)
plt.plot(hist.history['loss'], color ='r', label='Training Loss')
plt.plot(hist.history['val_loss'], color ='b', label='Validation Loss')
plt.legend(loc='upper right')


plt.subplot(1, 2, 2)
plt.ylabel('Accuracy', fontsize=16)
plt.plot(hist.history['accuracy'], color ='g', label='Training Accuracy')
plt.plot(hist.history['val_accuracy'], color ='m', label='Validation Accuracy')
plt.legend(loc='lower right')

plt.show()


In [None]:
loss, acc = model.evaluate(X_test,y_test,verbose=0)
print('Test loss: {}'.format(loss))
print('Test Accuracy: {}'.format(acc))

# <a id='8'> 8.Gender  Model  </a>

In [None]:
y = df['gender']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.22, random_state=35
)

In [None]:
X_train = X_train.reshape(-1, 48, 48, 1)
X_test = X_test.reshape(-1, 48, 48, 1)

In [None]:
print('X_Train:', X_train.shape)
print('X_Test:', X_test.shape)

In [None]:
print('y_Train:', y_train.shape)
print('y_Test:', y_test.shape)

## Model

In [None]:
model = Sequential()

#1. LAYER

model.add(Conv2D(32, 3, kernel_initializer="he_normal", input_shape=(48,48,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))

#2. LAYER

model.add(Conv2D(64, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=2))


### 3. LAYER
model.add(Conv2D(32, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=2))
 

### Full Connection layer
model.add(Flatten())
model.add(Dense(128))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))

### Out Layer

model.add(Dense(1))
model.add(Activation('sigmoid')) 

model.compile(optimizer='sgd',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

model.summary()


In [None]:
checkpoint = ModelCheckpoint(filepath='modelgender.h5', monitor="val_accuracy", save_best_only=True, verbose=1)

In [None]:
hist = model.fit(X_train, y_train,
                 epochs=epochs,
                 shuffle=True,
                 batch_size=batchSize,
                 validation_data=(X_test, y_test),
                 callbacks=[checkpoint],
                 verbose=2)

In [None]:
plt.figure(figsize=(14,3))
plt.subplot(1, 2, 1)
plt.suptitle('Traning', fontsize=10)
plt.ylabel('Loss', fontsize=16)
plt.plot(hist.history['loss'], color ='r', label='Training Loss')
plt.plot(hist.history['val_loss'], color ='b', label='Validation Loss')
plt.legend(loc='upper right')


plt.subplot(1, 2, 2)
plt.ylabel('Accuracy', fontsize=16)
plt.plot(hist.history['accuracy'], color ='g', label='Training Accuracy')
plt.plot(hist.history['val_accuracy'], color ='m', label='Validation Accuracy')
plt.legend(loc='lower right')

plt.show()

In [None]:
loss, acc = model.evaluate(X_test,y_test,verbose=0)
print('Test loss: {}'.format(loss))
print('Test Accuracy: {}'.format(acc))

* More work coming...(Age Mdoel)

## <a id='9'> <font color="LIGHTSEAGREEN" size=+2.5><b>End Note</b></font> </a>

I hope you enjoyed my kernel.If you like this notebook, an Upvote would be great ! :)

I am new with data science. Please comments me your feedbacks to help me improve myself.

Thanks for your time

<font size="+2" color="LIGHTSEAGREEN"><b>My Other Kernels</b></font><br>

<a href="https://www.kaggle.com/drfrank/lego-transfer-cnn-classification" class="btn btn-primary" style="color:white;">Lego Transfer-CNN Classification</a>

<a href="https://www.kaggle.com/drfrank/book-review-ratings-data-analysis-visualization" class="btn btn-primary" style="color:white;">Book Review Ratings Analysis & Visualization</a>

<a href="https://www.kaggle.com/drfrank/insurance-prediction-lgbm-gbm-xgboost-eda" class="btn btn-primary" style="color:white;">Insurance Prediction- LGBM,GBM,XGBoost EDA</a>

<a href="https://www.kaggle.com/drfrank/fish-market-data-visualisation-machine-learning" class="btn btn-primary" style="color:white;">Fish Market Data Visualisation & Machine Learning</a>

<a href="https://www.kaggle.com/drfrank/seabron-plotly-for-beginners" class="btn btn-primary" style="color:white;">Seabron & Plotly For Beginners</a>

<a href="https://www.kaggle.com/drfrank/basketball-players-stats-data-visualisation" class="btn btn-primary" style="color:white;">Basketball Players Stats Data Visualisation</a>

<a href="https://www.kaggle.com/drfrank/women-s-football-results-visualization" class="btn btn-primary" style="color:white;">Women's Football Results Visualization</a>

<a href="https://www.kaggle.com/drfrank/us-police-shootings-data-visualisation" class="btn btn-primary" style="color:white;">Us Police Shootings Data Visualisation</a>
