In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import imageio
import numpy as np
from os import path
import os
from matplotlib import pyplot as plt
import numpy as np
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D,Conv3D, MaxPooling2D, BatchNormalization, MaxPool2D,GlobalAveragePooling2D, MaxPool3D
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint

In [None]:
class Data:
   

    @staticmethod
    def load(input_file='train'):
        images_arr, images_mat, labels = [], [], []
        with open(f"/kaggle/input/unibuc-2022-s24/{input_file}.txt", "r") as f:
            line = f.readline()
            line=f.readline()
            while line:
                line_data = line.replace("\n", "").split(",")
                line = f.readline()
                image_path = f"/kaggle/input/unibuc-2022-s24/{input_file}/{line_data[0]}"
                if not path.exists(image_path):
                    continue

                img_arr = imageio.imread(image_path)
                img_arr = np.asarray(img_arr).reshape(-1)

                img_mat = image.load_img(image_path, target_size=(50, 50, 1), color_mode='grayscale')
                img_mat = image.img_to_array(img_mat)
                img_mat = img_mat / 255

                images_arr.append(img_arr)
                images_mat.append(img_mat)

                if len(line_data) > 1:
                    labels.append(int(line_data[1]))
        return images_mat, images_arr, labels

    @staticmethod
    def dump(predicted_labels, input_file="test", output_file="test"):
        output = []
        i = 0
        with open(f"/kaggle/input/unibuc-2022-s24/{input_file}.txt", "r") as f:
            line = f.readline()
            while line:
                line_data = line.replace("\n", "").split(",")
                line = f.readline()
                output.append(f"{line_data[0]},{predicted_labels[i]}")
                i += 1
        with open(f"/kaggle/input/unibuc-2022-s24/submission_{output_file}.txt", "w") as o:
            o.write("id,label\n")
            o.write("\n".join(output))



In [None]:
test_images_mat, test_images_arr, _ = Data.load("test")
train_images_mat, train_images_arr, train_labels = Data.load("train")
validation_images_mat, validation_images_arr, validation_labels = Data.load("validation")

In [None]:
class CnnClassifier:
    
    BEST_MODEL = 'best_model.h5'
    
    def __init__(self, dropout = .4):
        self.model = Sequential()
        
        self.model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(50, 50, 1)))
        self.model.add(BatchNormalization())
        self.model.add(Conv2D(32, kernel_size=3, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Conv2D(32, kernel_size=5, padding='same', strides=2, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(dropout))

        self.model.add(Conv2D(64, kernel_size=3, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Conv2D(64, kernel_size=3, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Conv2D(64, kernel_size=5, padding='same', strides=2, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(dropout))
        
        self.model.add(Flatten())
        self.model.add(Dense(128, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(dropout))
        self.model.add(Dense(10, activation='softmax'))
        
        self.model.compile(optimizer="Adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
        
        self.datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.10, width_shift_range=0.1, height_shift_range=0.1)
        self.model_checkpoint = ModelCheckpoint(f'data/{self.BEST_MODEL}', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
        
    def load_best(self):
        if path.isfile(f'data/{self.BEST_MODEL}'):
            self.model = load_model(f'data/{self.BEST_MODEL}')
    
    def train(self, train_images, train_labels, validation_images, validation_labels, epochs=5):
        self.annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + epochs))
        self.model.fit(self.datagen.flow(train_images, train_labels, batch_size=64), epochs=epochs,
                       steps_per_epoch=train_images.shape[0] // 64, callbacks=[self.annealer, self.model_checkpoint],
                       validation_data=(validation_images, validation_labels))

    def classify_images(self, test_images):
        return self.model.predict_classes(test_images)

In [None]:
def accuracy_score(ground_truth_labels, predicted_labels):
    return np.mean(ground_truth_labels == predicted_labels)

In [None]:
def test_accuracy(cnn_classifier, validation_images, validation_labels):
    cnn_classifier.load_best()
    predicted_labels = cnn_classifier.classify_images(validation_images)
    acc = accuracy_score(validation_labels, predicted_labels)
    print(f"\nAccuracy: {acc * 100}%")
    return acc

In [None]:
def confunsion_matrix(predicted_labels, ground_truth_labels):
    num_labels = ground_truth_labels.max() + 1
    conf_mat = np.zeros((num_labels, num_labels))
    
    for i in range(len(predicted_labels)):
        conf_mat[ground_truth_labels[i], predicted_labels[i]] += 1
    return  conf_mat

In [None]:
train_labels_c = np.array(train_labels)
train_images_c = np.array(train_images_mat)

validation_labels_c = np.array(validation_labels)
validation_images_c = np.array(validation_images_mat)

test_images_c = np.array(test_images_mat)

In [None]:
cnn_classifier = CnnClassifier(.4)