In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf

from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from keras.utils.vis_utils import plot_model

In [None]:
np.random.seed(1)
#link na stiahnutie datasetu https://drive.google.com/file/d/1qorrVV5R_BtcxYXYALHZnaKZCCdU1ZLS/view?usp=sharing

ts_triedy = 'znacky_triedy.txt'
df = pd.read_csv ('german_ts/Test.csv')

dataset_priecinok = 'german_ts'
trenovacia_cesta = 'german_ts/Train'

vyska = 32
sirka = 32
kanaly = 3

In [None]:
znacky = {}
with open(ts_triedy) as f:
    for index, riadok in enumerate(f):
        znacky[index] = riadok.strip()

In [None]:
pocet_tried = len(znacky)
pocet_dat = []
znacka = []

for priecinok in range(pocet_tried):
    trenovacie_subory = os.listdir(trenovacia_cesta + '/' + str(priecinok))
    pocet_dat.append(len(trenovacie_subory))
    znacka.append(znacky[int(priecinok)])
    
testovacia_pocet_dat = df['ClassId'].value_counts(dropna=False)
testovacia_pocet_dat = df.groupby('ClassId').size()
x_axis = np.arange(len(znacky))

In [None]:
plt.figure(figsize = (10,12))
plt.barh(x_axis, pocet_dat, height = 0.8, label = 'Train')
plt.barh(x_axis, testovacia_pocet_dat, height = 0.8/3, label = 'Test')
plt.yticks(x_axis, znacka)
for index, value in enumerate(pocet_dat):
    plt.text(value, index, str(value), verticalalignment = 'center')
    
for index, value in enumerate(testovacia_pocet_dat):
    plt.text(value, index, str(value), fontsize = 10, verticalalignment = 'center', color='white')
plt.legend()
plt.show()

In [None]:
obrazky = []
obrazky_nazvy = []

for i in range(pocet_tried):
    cesta = dataset_priecinok + '/Train/' + str(i)
    priecinok = os.listdir(cesta)

    for j in priecinok:
        try:
            obrazok = cv2.imread(cesta + '/' + j)
            pillow_format = Image.fromarray(obrazok, 'RGB')
            obrazok_32x32 = pillow_format.resize((vyska, sirka))
            obrazky.append(np.array(obrazok_32x32))
            obrazky_nazvy.append(i)
        except:
            print("Chyba: " + j)

obrazky = np.array(obrazky)
obrazky_nazvy = np.array(obrazky_nazvy)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(obrazky, obrazky_nazvy, test_size=0.2, random_state=1)

X_train = X_train/255
X_val = X_val/255

In [None]:
y_train = to_categorical(y_train, pocet_tried)
y_val = to_categorical(y_val, pocet_tried)

In [None]:
model = Sequential([    
    Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(vyska,sirka,kanaly)),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    BatchNormalization(axis=-1),
    
    Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    BatchNormalization(axis=-1),
    
    Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    MaxPool2D(pool_size=(2, 2)),
    BatchNormalization(),
    
    Flatten(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(rate=0.5),
    
    Dense(pocet_tried, activation='softmax')
])

In [None]:
model.summary()

In [None]:
rychlost_ucenia = 0.001
epochy = 30

optimalizator = Adam(lr=rychlost_ucenia, decay=rychlost_ucenia / (epochy * 0.5))
model.compile(loss='categorical_crossentropy', optimizer=optimalizator, metrics=['accuracy'])

In [None]:
augmentacia = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    vertical_flip=False,
    fill_mode="nearest")

history = model.fit(augmentacia.flow(X_train, y_train, batch_size=32), epochs=epochy, validation_data=(X_val, y_val))

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Trénovacia', 'Validačná'])
plt.title('Chybová funkcia')
plt.xlabel('Epoch')
plt.ylabel('Chyba')

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Trénovacia', 'Validačná'])
plt.title('Presnosť')
plt.xlabel('Epoch')
plt.ylabel('Presnosť')

In [None]:
model.save("dip_cnn_32.h5")
nas_model = load_model('dip_cnn_32.h5')

In [None]:
test_csv = pd.read_csv(dataset_priecinok + '/Test.csv')

popisy = test_csv["ClassId"].values
obrazky = test_csv["Path"].values
test_obrazky =[]

for i in obrazky:
    try:
        obrazok = cv2.imread(dataset_priecinok + '/' +i)
        pillow_format = Image.fromarray(obrazok, 'RGB')
        obrazok_32x32 = pillow_format.resize((vyska, sirka))
        test_obrazky.append(np.array(obrazok_32x32))
    except:
        print("Error in " + i)
        
X_test = np.array(test_obrazky)
X_test = X_test/255

pred = nas_model.predict_classes(X_test)
print('Testovacia sada, presnosť: ',accuracy_score(popisy, pred)*100)

In [None]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(popisy, pred)
import seaborn as sns
df_matrix = pd.DataFrame(matrix, index = znacky,  columns = znacky)
plt.figure(figsize = (20,20))
sns.heatmap(df_matrix, annot=True)

In [None]:
print(classification_report(popisy, pred))