In [28]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import plotly.express as py
import plotly.graph_objects as go

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, BatchNormalization, Dropout, Dense, MaxPool2D, Flatten
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.callbacks import EarlyStopping

#from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from keras import backend as K
from tensorflow.keras.utils import img_to_array, load_img, array_to_img
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.efficientnet import EfficientNetB4
from sklearn.metrics import accuracy_score

from keras.applications.xception import Xception

import cv2    

from sklearn.metrics import f1_score

In [2]:
device_name = tf.test.gpu_device_name()
if len(device_name) > 0:
    print("Found GPU at: {}".format(device_name))
else:
    device_name = "/device:CPU:0"
    print("No GPU, using {}.".format(device_name))

Found GPU at: /device:GPU:0


In [3]:
df_list_attr = pd.read_csv('list_attr_celeba.csv')
df_list_attr.head()

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,...,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,...,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,...,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,...,-1,-1,-1,-1,-1,-1,1,-1,-1,1


In [4]:
 # Setting 'image_id' column as Index
df_list_attr.replace(to_replace = -1, value = 0,inplace = True ) # Replacing '-1' values for '0', indicating the absence of attributes in the image
df_list_attr.head(10) 

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,0,1,1,0,0,0,0,0,0,...,0,1,1,0,1,0,1,0,0,1
1,000002.jpg,0,0,0,1,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,1
2,000003.jpg,0,0,0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,0,0,1
3,000004.jpg,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,1,1,0,1
4,000005.jpg,0,1,1,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,1
5,000006.jpg,0,1,1,0,0,0,1,0,0,...,0,0,0,1,1,0,1,0,0,1
6,000007.jpg,1,0,1,1,0,0,1,1,1,...,0,0,1,0,0,0,0,0,0,1
7,000008.jpg,1,1,0,1,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,1
8,000009.jpg,0,1,1,0,0,1,1,0,0,...,0,1,0,0,1,0,1,0,0,1
9,000010.jpg,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,1


In [5]:
df_list_attr.corr() # to see how our features are correlated.

Unnamed: 0,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
5_o_Clock_Shadow,1.0,-0.158587,-0.062415,0.168488,0.005584,-0.089243,-0.043699,0.15201,0.1006038,-0.132686,...,0.25937,-0.067777,0.05226,-0.12425,-0.161911,0.033997,-0.333921,-0.116239,0.097899,0.014869
Arched_Eyebrows,-0.158587,1.0,0.250599,-0.091947,-0.069524,-0.027775,0.2434,-0.084025,-0.0009959764,0.127009,...,-0.116001,0.093834,-0.054486,0.200835,0.294779,-0.09998,0.460409,0.220184,-0.132821,0.146798
Attractive,-0.062415,0.250599,1.0,-0.178464,-0.145826,0.059712,0.062552,-0.277064,0.00413989,0.154774,...,-0.100229,0.147845,0.04155,0.214992,0.124349,-0.138636,0.480104,0.068738,-0.15642,0.387735
Bags_Under_Eyes,0.168488,-0.091947,-0.178464,1.0,0.116215,-0.057568,-0.005982,0.361604,0.002667485,-0.106879,...,0.100411,0.112759,0.024208,-0.127128,-0.096969,-0.005726,-0.284052,-0.052964,0.19698,-0.230759
Bald,0.005584,-0.069524,-0.145826,0.116215,1.0,-0.064044,-0.003577,0.180148,-0.08036419,-0.063056,...,0.058599,0.009402,-0.07192,-0.103482,-0.061211,-0.030625,-0.142852,-0.050445,0.176501,-0.196253
Bangs,-0.089243,-0.027775,0.059712,-0.057568,-0.064044,1.0,0.034876,-0.069395,-0.03355778,0.098854,...,-0.073341,0.052469,0.020034,0.068166,0.058203,-0.079939,0.163378,0.114454,-0.092919,0.017695
Big_Lips,-0.043699,0.2434,0.062552,-0.005982,-0.003577,0.034876,1.0,0.076814,0.06719131,0.023674,...,-0.039739,0.012885,-0.037144,0.12019,0.126523,-0.013286,0.195212,0.149892,-0.06777,0.107722
Big_Nose,0.15201,-0.084025,-0.277064,0.361604,0.180148,-0.069395,0.076814,1.0,0.08109064,-0.158323,...,0.131667,0.100888,-0.028837,-0.131843,-0.058029,0.065552,-0.303651,-0.03593,0.205014,-0.284382
Black_Hair,0.100604,-0.000996,0.00414,0.002667,-0.080364,-0.033558,0.067191,0.081091,1.0,-0.233626,...,0.043231,-0.002533,0.112496,-0.085227,0.002348,-0.103605,-0.066856,-0.042539,0.023184,0.121372
Blond_Hair,-0.132686,0.127009,0.154774,-0.106879,-0.063056,0.098854,0.023674,-0.158323,-0.2336262,1.0,...,-0.096039,0.090209,0.005458,0.1284,0.095005,-0.083373,0.281935,0.143577,-0.106377,0.052402


In [6]:
attr_names = []
attr_names = list(df_list_attr.columns)
del attr_names[0]
print(attr_names)

['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones', 'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard', 'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline', 'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair', 'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace', 'Wearing_Necktie', 'Young']


In [7]:
df_eval_partition = pd.read_csv('list_eval_partition.csv')
df_eval_partition.partition.value_counts()

0    162770
2     19962
1     19867
Name: partition, dtype: int64

In [8]:
df_landmarks_align_celeba = pd.read_csv('list_landmarks_align_celeba.csv')
df_landmarks_align_celeba.head()

Unnamed: 0,image_id,lefteye_x,lefteye_y,righteye_x,righteye_y,nose_x,nose_y,leftmouth_x,leftmouth_y,rightmouth_x,rightmouth_y
0,000001.jpg,69,109,106,113,77,142,73,152,108,154
1,000002.jpg,69,110,107,112,81,135,70,151,108,153
2,000003.jpg,76,112,104,106,108,128,74,156,98,158
3,000004.jpg,72,113,108,108,101,138,71,155,101,151
4,000005.jpg,66,114,112,112,86,119,71,147,104,150


In [9]:
df_landmarks_align_celeba.describe()

Unnamed: 0,lefteye_x,lefteye_y,righteye_x,righteye_y,nose_x,nose_y,leftmouth_x,leftmouth_y,rightmouth_x,rightmouth_y
count,202599.0,202599.0,202599.0,202599.0,202599.0,202599.0,202599.0,202599.0,202599.0,202599.0
mean,69.353867,111.197982,107.644031,111.1616,88.06314,135.102024,71.247459,152.113011,105.586429,152.19466
std,1.717952,1.129284,1.690252,1.169229,6.647733,4.245078,3.168011,1.799343,3.233125,1.752368
min,56.0,98.0,90.0,95.0,57.0,93.0,57.0,116.0,82.0,114.0
25%,68.0,111.0,107.0,111.0,84.0,133.0,69.0,151.0,103.0,151.0
50%,69.0,111.0,108.0,111.0,88.0,135.0,72.0,152.0,105.0,152.0
75%,70.0,112.0,109.0,112.0,92.0,138.0,73.0,153.0,108.0,153.0
max,88.0,129.0,124.0,122.0,121.0,156.0,90.0,174.0,120.0,173.0


In [10]:
attr_to_id = {v:i for i, v in enumerate(attr_names)}
attr_to_id

{'5_o_Clock_Shadow': 0,
 'Arched_Eyebrows': 1,
 'Attractive': 2,
 'Bags_Under_Eyes': 3,
 'Bald': 4,
 'Bangs': 5,
 'Big_Lips': 6,
 'Big_Nose': 7,
 'Black_Hair': 8,
 'Blond_Hair': 9,
 'Blurry': 10,
 'Brown_Hair': 11,
 'Bushy_Eyebrows': 12,
 'Chubby': 13,
 'Double_Chin': 14,
 'Eyeglasses': 15,
 'Goatee': 16,
 'Gray_Hair': 17,
 'Heavy_Makeup': 18,
 'High_Cheekbones': 19,
 'Male': 20,
 'Mouth_Slightly_Open': 21,
 'Mustache': 22,
 'Narrow_Eyes': 23,
 'No_Beard': 24,
 'Oval_Face': 25,
 'Pale_Skin': 26,
 'Pointy_Nose': 27,
 'Receding_Hairline': 28,
 'Rosy_Cheeks': 29,
 'Sideburns': 30,
 'Smiling': 31,
 'Straight_Hair': 32,
 'Wavy_Hair': 33,
 'Wearing_Earrings': 34,
 'Wearing_Hat': 35,
 'Wearing_Lipstick': 36,
 'Wearing_Necklace': 37,
 'Wearing_Necktie': 38,
 'Young': 39}

In [11]:
import cv2

In [12]:
images_path = "img_align_celeba/img_align_celeba/"

In [13]:
df_eval_partition.set_index('image_id', inplace=True)

In [14]:
df_list_attr.set_index('image_id', inplace=True)

In [15]:

df_model = df_eval_partition.join(df_list_attr, how='inner')
df_model.reset_index(inplace=True)

In [16]:
df_model.head()

Unnamed: 0,image_id,partition,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,0,0,1,1,0,0,0,0,0,...,0,1,1,0,1,0,1,0,0,1
1,000002.jpg,0,0,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,0,1
2,000003.jpg,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,1
3,000004.jpg,0,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,1,1,0,1
4,000005.jpg,0,0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1


# New

In [17]:
BATCH_SIZE = 128
IMG_WIDTH = 228
IMG_HEIGHT = 228
NUM_EPOCHS = 20

In [18]:
def generator(samples_data, batch_size=BATCH_SIZE, shuffle_data=True,IMG_WIDTH=IMG_WIDTH, IMG_HEIGHT=IMG_HEIGHT):
  
    num_of_samples = len(samples_data)
    while True: 
        samples_data= df_model.sample(frac = 1)
        for offset in range(0, num_of_samples, batch_size):
            batch_of_samples = samples_data.iloc[offset:offset+batch_size, :]
            X_train = []
            y_train = []

            for i in batch_of_samples.index:
              
                img_name = batch_of_samples["image_id"][i]
                label = list(np.array(batch_of_samples[batch_of_samples.index==i].iloc[:, 2:])[0])
                
                img =  cv2.imread('img_align_celeba/img_align_celeba/'+img_name)
                img = cv2.resize(img,(IMG_WIDTH,IMG_HEIGHT))
                
                X_train.append(img)
                y_train.append(label)

            X_train = np.array(X_train)
            y_train = np.array(y_train)

           
            yield X_train, y_train

In [19]:
training_data = df_model.loc[df_model["partition"] == 0]
validation_data = df_model.loc[df_model["partition"] == 1]
testing_data = df_model.loc[df_model["partition"] == 2]

In [20]:
training_data.head()

Unnamed: 0,image_id,partition,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,0,0,1,1,0,0,0,0,0,...,0,1,1,0,1,0,1,0,0,1
1,000002.jpg,0,0,0,0,1,0,0,0,1,...,0,1,0,0,0,0,0,0,0,1
2,000003.jpg,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,1
3,000004.jpg,0,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,1,1,0,1
4,000005.jpg,0,0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1


In [21]:
len(testing_data)

19962

In [22]:
traing_generator = generator(training_data)
validation_generator = generator(validation_data)
testing_generator = generator(testing_data)
     

In [23]:
with tf.device(device_name):
  effmodel = EfficientNetB4(weights = None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))

  eff_Model = Model(effmodel.inputs, effmodel.layers[-1].output)
  for layer in eff_Model.layers[:400]:
    layer.trainable = False

  
  model = Sequential()
  model.add(eff_Model)

  model.add(Dense(units=512,activation="relu"))
  model.add(BatchNormalization())

           
  model.add(Dropout(0.3))
  

  model.add(Flatten())


  model.add(Dense(units=40, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(0.001)))

  model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics="binary_accuracy")

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Functional)           (None, 8, 8, 1792)        17673823  
_________________________________________________________________
dense (Dense)                (None, 8, 8, 512)         918016    
_________________________________________________________________
batch_normalization (BatchNo (None, 8, 8, 512)         2048      
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 32768)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 40)                1310760   
Total params: 19,904,647
Trainable params: 10,537,032
Non-trainable params: 9,367,615
____________________________________

In [25]:
early_stopping = keras.callbacks.EarlyStopping(
        monitor="val_binary_accuracy", # metrics to monitor
        patience=3, # how many epochs before stop
        verbose=1,
        mode="max", # we need the maximum accuracy.
        restore_best_weights=True, # 
     )

In [26]:
history = model.fit(traing_generator, validation_data = validation_generator, steps_per_epoch= training_data.shape[0]//BATCH_SIZE//10, 
                    validation_steps = validation_data.shape[0] // BATCH_SIZE, epochs= NUM_EPOCHS, callbacks=early_stopping, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20

KeyboardInterrupt: 

In [None]:

# Plot loss function value through epochs
plt.figure(figsize=(18, 4))
plt.plot(history.history['loss'], label = 'train')
plt.plot(history.history['val_loss'], label = 'valid')
plt.legend()
plt.title('Loss Function')
plt.show()

In [None]:

# Plot accuracy through epochs
plt.figure(figsize=(18, 4))
plt.plot(history.history['binary_accuracy'], label = 'train')
plt.plot(history.history['val_binary_accuracy'], label = 'valid')
plt.legend()
plt.title('Accuracy')
plt.show()
     

In [None]:
testing_data.head()

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
def get_results(testing_data, testing_generator, BATCH_SIZE):
  prediction_list = []
  real_values = []
  for i in range(len(testing_data)//BATCH_SIZE):
    images, labels = next(testing_generator)
    
    # for img, lab in zip(images, labels):
    for pred, true in zip(model.predict(images), labels):
      prediction_list.append(pred)
      real_values.append(true)
  ##############################
  prediction_list = pd.DataFrame(prediction_list)
  prediction_list.columns = list(df_model.head().columns)[2:]
  real_values = pd.DataFrame(real_values)
  real_values.columns = list(df_model.head().columns)[2:]

  for col in prediction_list.columns:
    for index in range(len(prediction_list)):
        if prediction_list[col][index] >= 0.5:
          prediction_list[col][index] = int(1)
        else:
          prediction_list[col][index] = int(0)
  prediction_list = prediction_list.astype(int)

  accuracies = {}
  for col in prediction_list.columns:
    y_true = list(real_values[col])
    y_pred = list(prediction_list[col])
    acc = accuracy_score(y_true, y_pred)
    accuracies[col] = acc
    print("accuracy of "+col+" is : "+str(acc))
  print("**** OVERALL ACCURACY: ", sum(list(accuracies.values()))/len(list(accuracies.values())))

In [None]:
get_results(testing_data, testing_generator, BATCH_SIZE)

In [None]:
start = "\033[1m"
end = "\033[0;0m"

In [None]:
for img_index in range(10):
  original_img = load_img(images_path + df_model["image_id"][img_index])
  real_values = []
  for col in df_model.columns[2:]:
    real_values.append(df_model[col][img_index])
  img = img_to_array(original_img)
  img = cv2.resize(img,(228,228))
  predictions = model.predict(np.array([img]))
  predictions = list(predictions[0])
  plt.imshow(original_img)
  plt.show()

  for true, pred , col in zip(real_values, predictions, df_model.columns[2:]):
    pred = 1 if pred >=0.5 else 0
    if pred==true:
      print("\033[1;32m"+start+col+" True: "+str(true)+ " Predicted: "+str(pred)+end)
    else:
      print("\033[1;31m"+start+col+" True: "+str(true)+ " Predicted: "+str(pred)+end)