In [None]:
from datetime import datetime
start_time = datetime.now()
from scipy import ndimage
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2

import itertools
import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from zipfile import ZipFile

In [None]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Add, Activation, Dropout, Flatten, Dense
from tensorflow.keras.layers import Conv2D, MaxPool2D, AveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model

weight_decay = 0.0005
def initial_conv(input):
    x = Conv2D(16, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(input)
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    return x


def expand_conv(init, base, k, strides=(1, 1)):
    x = Conv2D(base * k, (3, 3), padding='same', strides=strides, kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(init)
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Conv2D(base * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)
    skip = Conv2D(base * k, (1, 1), padding='same', strides=strides, kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(init)
    m = Add()([x, skip])
    return m

def conv1_block(input, k=1, dropout=0.0):
    init = input
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Conv2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)

    if dropout > 0.0: x = Dropout(dropout)(x)
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Conv2D(16 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)

    m = Add()([init, x])
    return m

def conv2_block(input, k=1, dropout=0.0):
    init = input
    channel_axis = 1 if K.image_data_format() == "th" else -1
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Conv2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)
    if dropout > 0.0: x = Dropout(dropout)(x)
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Conv2D(32 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)
    m = Add()([init, x])
    return m

def conv3_block(input, k=1, dropout=0.0):
    init = input
    channel_axis = 1 if K.image_data_format() == "th" else -1
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input)
    x = Activation('relu')(x)
    x = Conv2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)
    if dropout > 0.0: x = Dropout(dropout)(x)
    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = Conv2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal',
                      kernel_regularizer =l2(weight_decay),
                      use_bias=False)(x)
    m = Add()([init, x])
    return m

def create_wide_residual_network(input_dim, nb_classes=100, N=2, k=1, dropout=0.0, verbose=1):
    """
    Creates a Wide Residual Network with specified parameters
    :param input: Input Keras object
    :param nb_classes: Number of output classes
    :param N: Depth of the network. Compute N = (n - 4) / 6.
              Example : For a depth of 16, n = 16, N = (16 - 4) / 6 = 2
              Example2: For a depth of 28, n = 28, N = (28 - 4) / 6 = 4
              Example3: For a depth of 40, n = 40, N = (40 - 4) / 6 = 6
    :param k: Width of the network.
    :param dropout: Adds dropout if value is greater than 0.0
    :param verbose: Debug info to describe created WRN
    :return:
    """
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    ip = Input(shape=input_dim)
    x = initial_conv(ip)
    nb_conv = 4
    x = expand_conv(x, 16, k)
    nb_conv += 2

    for i in range(N - 1):
        x = conv1_block(x, k, dropout)
        nb_conv += 2

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = expand_conv(x, 32, k, strides=(2, 2))
    nb_conv += 2

    for i in range(N - 1):
        x = conv2_block(x, k, dropout)
        nb_conv += 2

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = expand_conv(x, 64, k, strides=(2, 2))
    nb_conv += 2

    for i in range(N - 1):
        x = conv3_block(x, k, dropout)
        nb_conv += 2

    x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x)
    x = Activation('relu')(x)
    x = AveragePooling2D((8, 8))(x)
    x = Flatten()(x)
    x = Dense(nb_classes, kernel_regularizer =l2(weight_decay), activation='softmax')(x)
    model = Model(ip, x)
    if verbose: print("Wide Residual Network-%d-%d created." % (nb_conv, k))
    return model

In [None]:
df = pd.read_csv("../input/lomba-bdc-2021/train.csv")
epokku = int(input('eppokku : '))
print(df.head())
print(df.tail())

In [None]:
fig,ax = plt.subplots(nrows=2)
sns.countplot(data=df,x='jenis kelamin',ax=ax[0])
sns.histplot(data=df,x='usia',ax=ax[1])

In [None]:
letak_folder = "../input/lomba-bdc-2021/Training"

for index,row in df.iterrows():
  letak_file = letak_folder + '/'+str(row['nomor'])
  files = os.listdir(letak_file)
  for fileku in files:
    dir_file = str(row['nomor']) + '/' + fileku 
    df2 = pd.DataFrame({'nomor':dir_file,'jenis kelamin':row['jenis kelamin'],'usia':row['usia']},index=[0])
    df = df.append(df2,ignore_index=False)

df.reset_index(drop=True, inplace=True)
df.drop(df.index[0:770],inplace=True)
df.reset_index(drop=True, inplace=True)

print(df.head())
print(df.tail())

In [None]:
delete_file = df[~df['nomor'].str.contains('.jpg')].copy()
delete_file = delete_file.index.values
print(df.loc[delete_file,:])
df.drop(df.index[delete_file],inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.loc[delete_file,:])

In [None]:
train_size=0.8

X = df[['nomor']].copy()
y = df[['jenis kelamin','usia']]

df['jenis kelamin'] = df['jenis kelamin'].astype('str')

X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=train_size,random_state=42)

test_size = 0.5
X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=test_size,random_state=42)

print('x train',X_train.shape)
print('y train',y_train.shape)
print('x valid',X_valid.shape)
print('y valid',y_valid.shape)
print('x test',X_test.shape)
print('x test',y_test.shape)

In [None]:
train = df[df.index.isin(X_train.index)].copy()
test = df[df.index.isin(X_test.index)].copy()
valid = df[df.index.isin(X_valid.index)].copy()

In [None]:
BATCH_SIZE = 33
sizenya = 299

train_gen = ImageDataGenerator(rescale=1./255,brightness_range=[0.1,1.5],
                                preprocessing_function=tf.keras.applications.inception_v3.preprocess_input)\
                                .flow_from_dataframe(dataframe=train,
                                 directory="../input/lomba-bdc-2021/Training",
                                 x_col="nomor",y_col="jenis kelamin",
                                 class_mode="binary",target_size=(sizenya,sizenya), batch_size=BATCH_SIZE)

val_gen = ImageDataGenerator(rescale=1./255, preprocessing_function = tf.keras.applications.inception_v3.preprocess_input)\
            .flow_from_dataframe(dataframe=valid,
                                 directory="../input/lomba-bdc-2021/Training",
                                 x_col="nomor",y_col="jenis kelamin",
                                 class_mode="binary",target_size=(sizenya,sizenya), batch_size=BATCH_SIZE)
test_gen = ImageDataGenerator(rescale=1./255,preprocessing_function = tf.keras.applications.inception_v3.preprocess_input)\
            .flow_from_dataframe(dataframe=test, directory="../input/lomba-bdc-2021/Training",
                                 x_col="nomor",y_col="jenis kelamin", 
                                 class_mode="binary",target_size=(sizenya,sizenya), batch_size=BATCH_SIZE)

In [None]:
from tensorflow.keras import Input,Model
model = tf.keras.applications.inception_v3.InceptionV3(include_top=True, classes=1,classifier_activation='sigmoid',weights=None)

In [None]:

earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', min_delta=0, 
                                             patience=epokku, verbose=1,mode='auto', 
                                             baseline=None, 
                                             restore_best_weights=True)


model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['binary_accuracy'])
logdir = '/content/logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
historiku = model.fit(train_gen,validation_data=val_gen,callbacks=[earlystop],epochs=epokku,shuffle=False)

In [None]:
acc = historiku.history['binary_accuracy']
val_acc = historiku.history['val_binary_accuracy']
loss = historiku.history['loss']
val_loss = historiku.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, color = 'b',linestyle='-', label='Training accuracy')
plt.plot(epochs, val_acc,color = 'orange',linestyle='-', label='Validation accuracy')
plt.title('Training and validation accuracy')

plt.figure()

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss,color = 'orange',linestyle='-', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
results = model.evaluate(test_gen)
print(results)

In [None]:
prediksi = []
truee = []
for i in range(231//BATCH_SIZE):
  X_test_gen,y_test_gen = next(test_gen)
  y_test_gen = y_test_gen.tolist()
  truee.extend(y_test_gen)
  hasil = model.predict(X_test_gen)
  hasil = hasil.tolist()
  prediksi.extend(hasil)

In [None]:
# truee = list(itertools.chain(*truee))
prediksi = list(itertools.chain(*prediksi))
hastrue = pd.DataFrame({'true':truee,'prediksi':prediksi})

In [None]:
threshold = hastrue.prediksi.mean()
hastrue.loc[hastrue['prediksi']<threshold, 'prediksiku'] = 0
hastrue.loc[hastrue['prediksi']>=threshold, 'prediksiku'] = 1

threshold = 0.5
hastrue.loc[hastrue['prediksi']<threshold, 'prediksiku1'] = 0
hastrue.loc[hastrue['prediksi']>=threshold, 'prediksiku1'] = 1

In [None]:
from sklearn.metrics import f1_score
print('jika batasnya rata rata ',f1_score(hastrue['true'], hastrue['prediksiku']))
print('jika  batasnya setengah ',f1_score(hastrue['true'], hastrue['prediksiku1']))

In [None]:
df_tes = pd.read_csv("../input/lomba-bdc-2021/submission.csv")
df_tes.head()

In [None]:
df_tes["id"] = df_tes["id"]+".jpg"
df_tes.head()

In [None]:
prediksi_gen = ImageDataGenerator(rescale=1./255,preprocessing_function = tf.keras.applications.inception_v3.preprocess_input)\
            .flow_from_dataframe(dataframe=df_tes, directory="../input/lomba-bdc-2021/Testing",
                                 x_col="id",target_size=(sizenya,sizenya), y_col=None,shuffle=False,class_mode=None,batch_size=BATCH_SIZE)

In [None]:
jenis_prediksi = model.predict(prediksi_gen)
jenis_prediksi = jenis_prediksi.flatten()
print(jenis_prediksi)

In [None]:
df_tes['jenis kelamin'] = jenis_prediksi
df_tes.head()

In [None]:
df_tes.loc[df_tes['jenis kelamin'] < 0.5, 'jenis kelamin']  = 0
df_tes.loc[df_tes['jenis kelamin'] >= 0.5, 'jenis kelamin'] = 1

In [None]:
df_tes['jenis kelamin'].value_counts()

In [None]:
df_tes['id'] = df_tes['id'].str.slice(0,-4,1)
df_tes['jenis kelamin'] = df_tes['jenis kelamin'].astype('int')
df_tes.head()

In [None]:
df_tes.to_csv("./submisi1.csv",index=False)

In [None]:
end_time = datetime.now()
interval_time = end_time - start_time
interval_time = interval_time.total_seconds()
hours = interval_time//3600
interval_time %= 3600
minutes = interval_time//60
seconds = interval_time % 60
print("{} jam {} menit {} detik".format(hours,minutes,seconds))

# Model 2

In [None]:
BATCH_SIZE = 33
sizenya = 224

train_gen = ImageDataGenerator(rescale=1./255,brightness_range=[0.1,1.5],
                                preprocessing_function=tf.keras.applications.inception_v3.preprocess_input)\
                                .flow_from_dataframe(dataframe=train,
                                 directory="../input/lomba-bdc-2021/Training",
                                 x_col="nomor",y_col="jenis kelamin",
                                 class_mode="binary",target_size=(sizenya,sizenya), batch_size=BATCH_SIZE)

val_gen = ImageDataGenerator(rescale=1./255, preprocessing_function = tf.keras.applications.inception_v3.preprocess_input)\
            .flow_from_dataframe(dataframe=valid,
                                 directory="../input/lomba-bdc-2021/Training",
                                 x_col="nomor",y_col="jenis kelamin",
                                 class_mode="binary",target_size=(sizenya,sizenya), batch_size=BATCH_SIZE)
test_gen = ImageDataGenerator(rescale=1./255,preprocessing_function = tf.keras.applications.inception_v3.preprocess_input)\
            .flow_from_dataframe(dataframe=test, directory="../input/lomba-bdc-2021/Training",
                                 x_col="nomor",y_col="jenis kelamin", 
                                 class_mode="binary",target_size=(sizenya,sizenya), batch_size=BATCH_SIZE)

In [None]:
model1 = create_wide_residual_network((224,224,3), nb_classes=1, N=2, k=4)

In [None]:

earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', min_delta=0, 
                                             patience=epokku, verbose=1,mode='auto', 
                                             baseline=None, 
                                             restore_best_weights=True)


model1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['binary_accuracy'])
logdir = '/content/logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
historiku = model1.fit(train_gen,validation_data=val_gen,callbacks=[earlystop],epochs=epokku,shuffle=False)

In [None]:
acc = historiku.history['binary_accuracy']
val_acc = historiku.history['val_binary_accuracy']
loss = historiku.history['loss']
val_loss = historiku.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, color = 'b',linestyle='-', label='Training accuracy')
plt.plot(epochs, val_acc,color = 'orange',linestyle='-', label='Validation accuracy')
plt.title('Training and validation accuracy')

plt.figure()

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss,color = 'orange',linestyle='-', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
results = model1.evaluate(test_gen)
print(results)

In [None]:
prediksi = []
truee = []
for i in range(231//BATCH_SIZE):
  X_test_gen,y_test_gen = next(test_gen)
  y_test_gen = y_test_gen.tolist()
  truee.extend(y_test_gen)
  hasil = model.predict(X_test_gen)
  hasil = hasil.tolist()
  prediksi.extend(hasil)

In [None]:
# truee = list(itertools.chain(*truee))
prediksi = list(itertools.chain(*prediksi))
hastrue = pd.DataFrame({'true':truee,'prediksi':prediksi})

In [None]:
threshold = hastrue.prediksi.mean()
hastrue.loc[hastrue['prediksi']<threshold, 'prediksiku']  = 0
hastrue.loc[hastrue['prediksi']>=threshold, 'prediksiku'] = 1

threshold = 0.5
hastrue.loc[hastrue['prediksi']<threshold, 'prediksiku1'] =  0
hastrue.loc[hastrue['prediksi']>=threshold, 'prediksiku1'] = 1

In [None]:
from sklearn.metrics import f1_score
print('jika batasnya rata rata ',f1_score(hastrue['true'], hastrue['prediksiku']))
print('jika  batasnya setengah ',f1_score(hastrue['true'], hastrue['prediksiku1']))

In [None]:
df_tes = pd.read_csv("../input/lomba-bdc-2021/submission.csv")
df_tes["id"] = df_tes["id"]+".jpg"
prediksi_gen = ImageDataGenerator(rescale=1./255,preprocessing_function = tf.keras.applications.inception_v3.preprocess_input)\
            .flow_from_dataframe(dataframe=df_tes, directory="../input/lomba-bdc-2021/Testing",
                                 x_col="id",target_size=(sizenya,sizenya), y_col=None,shuffle=False,class_mode=None,batch_size=BATCH_SIZE)

jenis_prediksi = model.predict(prediksi_gen)
jenis_prediksi = jenis_prediksi.flatten()
df_tes['jenis kelamin'] = jenis_prediksi
df_tes.head()

df_tes.loc[df_tes['jenis kelamin'] < 0.5, 'jenis kelamin']  = 0
df_tes.loc[df_tes['jenis kelamin'] >= 0.5, 'jenis kelamin'] = 1


df_tes['id'] = df_tes['id'].str.slice(0,-4,1)
df_tes['jenis kelamin'] = df_tes['jenis kelamin'].astype('int')
df_tes.head()


df_tes.to_csv("./submisi2.csv",index=False)

In [None]:
end_time = datetime.now()
interval_time = end_time - start_time
interval_time = interval_time.total_seconds()
hours = interval_time//3600
interval_time %= 3600
minutes = interval_time//60
seconds = interval_time % 60
print("{} jam {} menit {} detik".format(hours,minutes,seconds))