In [1]:
# Melakukan impor libraries yang diperlukan untuk membangun model dan prediksi data.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
import time
import datetime

In [2]:
# Mencatat waktu dimulainya keseluruhan program model dan prediksi data.
global_start_time = time.time()

In [3]:
# Menetapkan nilai seed untuk reproduksi model.
seed = 2021

np.random.seed(2021)

In [4]:
# Menyusun dan menampilkan data training.
X_train = pd.read_csv('../input/bdc-sd2021-train-tabular-data/train_gray.csv')
X_train = X_train.values.reshape(-1,128,128,1)

In [5]:
# Menyusun dan menampilkan data testing.
X_test = pd.read_csv('../input/bdc-sd2021-test-tabular-data/test_gray.csv')
X_test = X_test.values.reshape(-1,128,128,1)

In [6]:
# Memunculkan target prediksi.
y_train = pd.read_csv('../input/bdc-sd2021-data-tambahan/train_target_and_fold.csv')[['jenis_kelamin']]
y_train

Unnamed: 0,jenis_kelamin
0,0
1,0
2,0
3,1
4,1
...,...
2300,0
2301,0
2302,0
2303,0


In [7]:
# Melakukan pemisahan data untuk training dan validasi.
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 0.1, random_state = seed)

In [8]:
# Membangun model.
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (128,128,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 128, 32)      832       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 32)      25632     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 64, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0


User settings:

   KMP_AFFINITY=granularity=fine,verbose,compact,1,0
   KMP_BLOCKTIME=0
   KMP_DUPLICATE_LIB_OK=True
   KMP_INIT_AT_FORK=FALSE
   KMP_SETTINGS=1

Effective settings:

   KMP_ABORT_DELAY=0
   KMP_ADAPTIVE_LOCK_PROPS='1,1024'
   KMP_ALIGN_ALLOC=64
   KMP_ALL_THREADPRIVATE=128
   KMP_ATOMIC_MODE=2
   KMP_BLOCKTIME=0
   KMP_CPUINFO_FILE: value is not defined
   KMP_DETERMINISTIC_REDUCTION=false
   KMP_DEVICE_THREAD_LIMIT=2147483647
   KMP_DISP_NUM_BUFFERS=7
   KMP_DUPLICATE_LIB_OK=true
   KMP_ENABLE_TASK_THROTTLING=true
   KMP_FORCE_REDUCTION: value is not defined
   KMP_FOREIGN_THREADS_THREADPRIVATE=true
   KMP_FORKJOIN_BARRIER='2,2'
   KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'
   KMP_GTID_MODE=3
   KMP_HANDLE_SIGNALS=false
   KMP_HOT_TEAMS_MAX_LEVEL=1
   KMP_HOT_TEAMS_MODE=0
   KMP_INIT_AT_FORK=true
   KMP_LIBRARY=throughput
   KMP_LOCK_KIND=queuing
   KMP_MALLOC_POOL_INCR=1M
   KMP_NUM_LOCKS_IN_BLOCK=1
   KMP_PLAIN_BARRIER='2,2'
   KMP_PLAIN_BARRIER_PATTERN='hyper,hype

In [9]:
# Mendefinisikan optimizer.
optimizer = RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [10]:
# Mengkompilasi model.
model.compile(optimizer = optimizer , loss = "binary_crossentropy", metrics=["accuracy"])

In [11]:
# Menetapkan batasan untuk learning rate.
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [12]:
# Menetapkan nilai epoch dan batch size.
epochs = 85
batch_size = 90

In [13]:
# Melakukan augmentasi data.
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(X_train)

In [14]:
# Melakukan fitting model.
history = model.fit(datagen.flow(X_train,y_train, batch_size=batch_size),
                    epochs = epochs, validation_data = (X_valid,y_valid),
                    verbose = 2, steps_per_epoch = X_train.shape[0] // batch_size,
                    callbacks = [learning_rate_reduction])

2021-10-30 07:13:16.158223: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/85
23/23 - 79s - loss: 2.2309 - accuracy: 0.5146 - val_loss: 0.6846 - val_accuracy: 0.5671
Epoch 2/85
23/23 - 76s - loss: 0.6800 - accuracy: 0.5847 - val_loss: 0.6603 - val_accuracy: 0.5671
Epoch 3/85
23/23 - 76s - loss: 0.6306 - accuracy: 0.6502 - val_loss: 0.4482 - val_accuracy: 0.8139
Epoch 4/85
23/23 - 77s - loss: 0.5071 - accuracy: 0.7495 - val_loss: 0.3549 - val_accuracy: 0.8571
Epoch 5/85
23/23 - 76s - loss: 0.4891 - accuracy: 0.7777 - val_loss: 0.3478 - val_accuracy: 0.8918
Epoch 6/85
23/23 - 76s - loss: 0.4536 - accuracy: 0.8201 - val_loss: 0.3239 - val_accuracy: 0.8831
Epoch 7/85
23/23 - 76s - loss: 0.3915 - accuracy: 0.8332 - val_loss: 0.4784 - val_accuracy: 0.7706
Epoch 8/85
23/23 - 76s - loss: 0.4421 - accuracy: 0.8120 - val_loss: 0.2919 - val_accuracy: 0.8918
Epoch 9/85
23/23 - 76s - loss: 0.3958 - accuracy: 0.8322 - val_loss: 0.2874 - val_accuracy: 0.9177
Epoch 10/85
23/23 - 76s - loss: 0.3818 - accuracy: 0.8392 - val_loss: 0.2438 - val_accuracy: 0.9394
Epoch 11/

In [15]:
# Memprediksi target pada data validasi dan testing.
y_valid_pred = model.predict(X_valid)
y_valid_pred = pd.DataFrame((y_valid_pred >= 0.5)*1)

y_test_pred = model.predict(X_test)
y_test_pred = pd.DataFrame((y_test_pred >= 0.5) * 1)

In [16]:
# Menampilkan nilai error pada data validasi.
y_valid = y_valid.reset_index().drop('index', axis = 1)
y_valid.columns = [0]
error = abs(y_valid - y_valid_pred).mean()
print('nilai error pada data validasi: ', error[0])

nilai error pada data validasi:  0.021645021645021644


In [17]:
# Menyimpan hasil prediksi.
prediction = pd.read_csv('../input/bdc-satriadata2021/submission.csv')
prediction['jenis kelamin'] = y_test_pred
prediction.to_csv('prediction_cnn_0125.csv', index = False)
prediction

Unnamed: 0,id,jenis kelamin
0,005093b2-8c4b-4ed7-91c3-f5f4d50f8d27,1
1,0052554e-069e-4c43-beb0-0885e8f7684e,1
2,0092b954-1143-4a95-a17b-1edfa6af3b01,0
3,009fc28b-fe9b-441d-b8a2-ea8b7ae6ca16,0
4,00d0e306-06fe-45d8-ae6c-6f83ab8f7810,1
...,...,...
985,feb9e70f-4182-4500-866d-9b95657e727c,0
986,fed67ed2-620a-4ebf-b61e-16ef3d9b93f7,0
987,ff14c77e-c3c2-46db-9341-30de4130ef8a,1
988,ff662f3f-8c76-4d27-a73e-c43c4353f798,1


In [18]:
# Mencatat waktu berakhirnya keseluruhan program model dan prediksi data.
global_end_time = time.time()

# Menampilkan waktu eksekusi dari keseluruhan program model dan prediksi data.
total_execution_time = datetime.timedelta(seconds = global_end_time - global_start_time)
print("total execution time: %s" % (total_execution_time))

total execution time: 1:56:56.880585
