In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import numpy as np 
import pandas as pd
import os
import tensorflow as tf

In [None]:
SEED = 1234
tf.random.set_seed(SEED) 
np.random.seed(SEED)

cwd = os.getcwd()

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [24]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

train_data_gen = ImageDataGenerator(rotation_range=20,
                                    zoom_range=0.1,
                                    width_shift_range=0.2,
                                    height_shift_range=0.0,
                                    shear_range=0.0,
                                    brightness_range=[-0.1,0.4],
                                    horizontal_flip=True,
                                    fill_mode="nearest",
                                    rescale=1./255)

valid_data_gen = ImageDataGenerator(rescale=1./255)
test_data_gen = ImageDataGenerator(rescale=1./255)


In [None]:
import json
classes = {}
with open('/content/drive/My Drive/Kaggle_1/MaskDataset/train_gt.json') as json_file:
    classes = json.load(json_file)


In [None]:
dataset_dir = "/content/drive/My Drive/Kaggle_1/MaskDataset/training"
from PIL import Image
df = pd.DataFrame(columns={"name","class"})
classes_names = {
    0:"NOPERSON",
    1:"ALL",
    2:"SOMEONE"
}

for dirname, _, filenames in os.walk(dataset_dir):
    for filename in filenames:
      df2 = pd.DataFrame.from_dict({"name" : [filename], "class" : [classes_names[classes[filename]]]})   
      df = df.append( df2, ignore_index=True)
df

Unnamed: 0,class,name
0,NOPERSON,16649.jpg
1,NOPERSON,16798.jpg
2,NOPERSON,16613.jpg
3,NOPERSON,16507.jpg
4,NOPERSON,16808.jpg
...,...,...
5609,SOMEONE,10868.jpg
5610,ALL,11316.jpg
5611,SOMEONE,11266.jpg
5612,ALL,11186.jpg


In [31]:
def bootstrap(df_input, validation_size, num_df_output):
  from sklearn.utils import shuffle
  
  val_dfs = []
  train_dfs = []
  for i in range(num_df_output):
    df_shuffled = shuffle(df_input)
    df_shuffled = df_shuffled.reset_index(drop=True)
  
    val = pd.DataFrame(columns=df_input.columns)
    for c in df_input["class"].unique():
      index_val_c = df_shuffled[df_shuffled["class"] == c].iloc[:validation_size//3].index
      val = val.append(df_shuffled.loc[index_val_c], ignore_index=True)
      df_shuffled.drop(index_val_c, inplace=True)
    val_dfs.append(val)
    
    tmp = pd.DataFrame(columns=df_shuffled.columns)
    for c in df_input["class"].unique():
      sampling = np.random.choice(list(df_shuffled[df_shuffled["class"] == c].index),len(df_input)//3)
      for s in sampling:
        tmp = tmp.append(df_shuffled.loc[s],ignore_index=True)
    
    train_dfs.append(shuffle(tmp))

  return train_dfs, val_dfs

In [42]:
img_h = 331
img_w = 331
img_size = 331
validation_size = 600
n_models = 3
num_classes=3
fine_tuning = False


In [43]:
train_dfs, valid_dfs = bootstrap(df, validation_size, n_models)


In [44]:
for i in range(n_models):
  print([len(valid_dfs[i][valid_dfs[i]["class"]==c]) for c in valid_dfs[i]["class"].unique()])

for i in range(n_models):
  print([len(train_dfs[i][train_dfs[i]["class"]==c]) for c in train_dfs[i]["class"].unique()])

[200, 200, 200]
[200, 200, 200]
[200, 200, 200]
[1871, 1871, 1871]
[1871, 1871, 1871]
[1871, 1871, 1871]


In [45]:
train_gen = []
valid_gen = []

for i in range(n_models):
  train_gen.append(train_data_gen.flow_from_dataframe(train_dfs[i],
                                                      directory=dataset_dir,
                                                      x_col="name",
                                                      y_col="class",
                                                      target_size=(img_h, img_w),
                                                      batch_size=16,
                                                      shuffle=True,
                                                      seed=SEED,
                                                      save_prefix="",
                                                      save_format="png",
                                                      subset=None,
                                                      interpolation="nearest"))

  valid_gen.append(valid_data_gen.flow_from_dataframe(valid_dfs[i],
                                                      directory=dataset_dir,
                                                      x_col="name",
                                                      y_col="class",
                                                      target_size=(img_h, img_w),
                                                      batch_size=16,
                                                      shuffle=True,
                                                      seed=SEED,
                                                      save_prefix="",
                                                      save_format="png",
                                                      subset=None,
                                                      interpolation="nearest"))


Found 5613 validated image filenames belonging to 3 classes.
Found 600 validated image filenames belonging to 3 classes.
Found 5613 validated image filenames belonging to 3 classes.
Found 600 validated image filenames belonging to 3 classes.
Found 5613 validated image filenames belonging to 3 classes.
Found 600 validated image filenames belonging to 3 classes.


In [46]:
valid_dataset = []
train_dataset = []
for i in range(n_models):
  train = tf.data.Dataset.from_generator(lambda: train_gen[i],
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))
  train_dataset.append(train.repeat())

  valid = tf.data.Dataset.from_generator(lambda: valid_gen[i], 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))
  valid_dataset.append(valid.repeat())

## Architecture

In [None]:
from tensorflow.keras.applications import NASNetLarge
NASrapper = NASNetLarge(weights='imagenet', include_top=False)


In [62]:
for layer in NASrapper.layers[:250]:
  layer.trainable = False



model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(img_size,img_size, 3)))
model.add(NASrapper)
#model.add(tf.keras.layers.Dense(units=512, activation='relu'))
model.add(tf.keras.layers.Flatten())
#model.add(tf.keras.layers.GlobalAvgPool2D())
model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

lr=1e-3

model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
              loss='categorical_crossentropy',
              metrics=['accuracy'])



model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
NASNet (Functional)          (None, 11, 11, 4032)      84916818  
_________________________________________________________________
flatten_1 (Flatten)          (None, 487872)            0         
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 1463619   
Total params: 86,380,437
Trainable params: 84,425,043
Non-trainable params: 1,955,394
_________________________________________________________________


In [63]:
# Visualize 
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
NASNet (Functional)          (None, 11, 11, 4032)      84916818  
_________________________________________________________________
flatten_1 (Flatten)          (None, 487872)            0         
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 1463619   
Total params: 86,380,437
Trainable params: 84,425,043
Non-trainable params: 1,955,394
_________________________________________________________________


## Training Model

In [64]:
# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# Learning rate
lr = 1e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Validation Metrics
metrics = ['accuracy']

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [50]:
callbacks = []

# Early Stopping
# --------------
early_stop = False
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)

In [65]:
models = []
for i in range(n_models):
  models.append(tf.keras.models.clone_model(model))
  models[i].compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [69]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
tf.compat.v1.Session(config = config)

<tensorflow.python.client.session.Session at 0x7fc1d50bdda0>

In [70]:


for i in range(n_models):
  models[i].fit(x=train_dataset[i],
            epochs=20,
            steps_per_epoch=len(train_gen[i]),
            validation_data=valid_dataset[i],
            validation_steps=len(valid_gen[i]))

Epoch 1/20


ResourceExhaustedError: ignored

## Test



In [None]:
test_dir = "/content/drive/My Drive/Kaggle_1/MaskDataset/test_images/"

In [None]:
test_gen = test_data_gen.flow_from_directory(test_dir,
                                             batch_size=1, 
                                             target_size=(img_h, img_w),
                                             class_mode='categorical',
                                             shuffle=False,
                                             seed=SEED)

test_dataset = tf.data.Dataset.from_generator(lambda: test_gen,
                                              output_types=(tf.float32, tf.float32),
                                              output_shapes=([None, img_h, img_w, 3], None))

Found 450 images belonging to 1 classes.


In [None]:
eval_out = []

for i in range(n_models):
  eval_out.append(models[i].predict(x=test_dataset,
                          steps=len(test_gen),
                          verbose=0))


In [None]:
for i in range(n_models):
  print(train_gen[i].class_indices)

In [None]:
votazione = []
for r in range(len(eval_out[0])):
  i0 = np.argmax(eval_out[0][r])
  i1 = np.argmax(eval_out[1][r])
  i2 = np.argmax(eval_out[2][r])
  res = 2
  if i0 == i1:
    res = i0
  elif i1 == i2:
    res = i1
  elif i0 == i2:
    res = i2
  result = [0, 0, 0]
  result[res] = 1
  votazione.append(result)

In [None]:
sum_eval_out = eval_out[0]+ eval_out[1] + eval_out[2]
sum_eval_out

In [None]:
label_map = (train_gen[0].class_indices)

map_cat = {v: k for k, v in label_map.items()}

real_names = {v: k for k, v in classes_names.items()}

results = {}
for i,name in enumerate(test_gen.filenames):
  real_name = name.split("/")[1]
  category = map_cat[np.argmax(sum_eval_out[i])]  #DA DECIDERE VOTAZIONE O SUM
  results[real_name] = real_names[category]

## Kaggle Result

In [None]:
#Kaggle Result

import os
from datetime import datetime

def create_csv(results, results_dir='./'):

    csv_fname = 'results_'
    csv_fname += datetime.now().strftime('%b%d_%H-%M-%S') + '.csv'

    with open(os.path.join(results_dir, csv_fname), 'w') as f:

        f.write('Id,Category\n')

        for key, value in results.items():
            f.write(key + ',' + str(value) + '\n')

In [None]:
create_csv(results,"/content/drive/My Drive/Keras3")