In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

from keras import regularizers
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pickle
import matplotlib.pyplot as plt
from math import sqrt, ceil
from timeit import default_timer as timer

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import UpSampling2D, Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape, GlobalAveragePooling2D,Dropout,SeparableConv2D, Activation
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
import keras
from keras.models import Model
from tensorflow.keras.applications import EfficientNetB0

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Wczytanie danych

In [None]:
with open('../input/traffic-signs-preprocessed/data2.pickle', 'rb') as f:
    data = pickle.load(f, encoding='latin1')  # dictionary type

# Preparing y_train and y_validation for using in Keras
data['y_train'] = to_categorical(data['y_train'], num_classes=43)
data['y_validation'] = to_categorical(data['y_validation'], num_classes=43)

# Making channels come at the end
data['x_train'] = data['x_train'].transpose(0, 2, 3, 1)
data['x_validation'] = data['x_validation'].transpose(0, 2, 3, 1)
data['x_test'] = data['x_test'].transpose(0, 2, 3, 1)

# Showing loaded data from file
for i, j in data.items():
    if i == 'labels':
        print(i + ':', len(j))
    else: 
        print(i + ':', j.shape)

# Przykłady

In [None]:
%matplotlib inline

# Preparing function for ploting set of examples
# As input it will take 4D tensor and convert it to the grid
# Values will be scaled to the range [0, 255]
def convert_to_grid(x_input):
    N, H, W, C = x_input.shape
    grid_size = int(ceil(sqrt(N)))
    grid_height = H * grid_size + 1 * (grid_size - 1)
    grid_width = W * grid_size + 1 * (grid_size - 1)
    grid = np.zeros((grid_height, grid_width, C)) + 255
    next_idx = 0
    y0, y1 = 0, H
    for y in range(grid_size):
        x0, x1 = 0, W
        for x in range(grid_size):
            if next_idx < N:
                img = x_input[next_idx]
                low, high = np.min(img), np.max(img)
                grid[y0:y1, x0:x1] = 255.0 * (img - low) / (high - low)
                next_idx += 1
            x0 += W + 1
            x1 += W + 1
        y0 += H + 1
        y1 += H + 1

    return grid


# Visualizing some examples of training data
examples = data['x_train'][:81, :, :, :]
print(examples.shape)  # (81, 32, 32, 3)

# Plotting some examples
fig = plt.figure()
grid = convert_to_grid(examples)
plt.imshow(grid.astype('uint8'), cmap='gray')
plt.axis('off')
plt.gcf().set_size_inches(15, 15)
plt.title('Some examples of training data', fontsize=18)

# Showing the plot
plt.show()

# Saving the plot
fig.savefig('training_examples.png')
plt.close()

# Funkcje generujące modele

Zmniejszanie learning rate

In [None]:
def scheduler(epoch, lr):
    if epoch < 10:
        print('lr = ',lr)
        return lr
    else:
        print('lr = ',lr * tf.math.exp(-0.1))
        return lr * tf.math.exp(-0.1)

Generuje początkowy model

In [None]:
# PARAMS : epoki, ilość danych, learn_rate, filtr, ilość danych w batchu
def model1(epochs, val, kernel=3, batches = 5):
    model1 = Sequential()
    model1.add(Conv2D(32, kernel_size=kernel, padding='same', activation='relu', input_shape=(32, 32, 3)))
    model1.add(MaxPool2D(pool_size=2))
    model1.add(Flatten())
    model1.add(Dense(500, activation='relu'))
    model1.add(Dense(43, activation='softmax'))
    model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + epochs))
    
    h1 = model1.fit(data['x_train'][:val], data['y_train'][:val],
              batch_size=batches, epochs = epochs,
              validation_data = (data['x_validation'], data['y_validation']),
              callbacks=[annealer], verbose=1)
    
    return h1


Generuje model VGG (bez wag początkowych)

In [None]:
def model2(epochs, val, batches = 32):
    model = Sequential()

    # The L2 regularization penalty is computed as: loss = l2 * reduce_sum(square(x))
    
    model.add(Conv2D(input_shape=(32,32,3),filters=64,kernel_size=(3,3),padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    
    model.add(MaxPool2D(pool_size=(2, 2)))


    model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())

    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    
    
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
   
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    
    model.add(MaxPool2D(pool_size=(2, 2)))
    

    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    
    model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))
    
    model.add(Flatten())
    
    model.add(Dense(512,activation="relu",kernel_regularizer=regularizers.l2(0.0005)))
    model.add(BatchNormalization())
    
    model.add(Dropout(0.5))
    #model.add(Dense(1000, activation="relu"))
    model.add(Dense(43, activation="softmax"))
    
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    
    #annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + epochs))
    annealer = LearningRateScheduler(scheduler)
    
    h = model.fit(data['x_train'][:val], data['y_train'][:val],
              batch_size= batches, epochs = epochs,
              validation_data = (data['x_validation'], data['y_validation']),
              callbacks=[annealer], verbose=1)
    
    return h

    

Generuje model VGG z wagami (imagenet)

In [None]:
def model2_2(epochs, val, batches = 32):
    
    base = VGG16(weights='imagenet', include_top=False)
    x = base.output
    
    x=GlobalAveragePooling2D()(x)
    x=Dense(1024,activation='relu')(x)
    x = Dropout(0.25)(x)
    x=Dense(512,activation='relu')(x) 
    x = Dropout(0.25)(x)
    
    preds=Dense(43, activation='softmax')(x) #final layer with softmax activation
    
    model=Model(inputs=base.input,outputs=preds)
    
    #model.summary()
    #for i,layer in enumerate(model.layers):
    #    print("{}: {}".format(i,layer))
        
    for layer in model.layers[:19]:
        layer.trainable=False
    for layer in model.layers[19:]:
        layer.trainable=True
        
    epochs_cons = 50
    learning_rate = 0.0005
    decay_rate = learning_rate / epochs_cons
    opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay_rate, amsgrad=False)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
    
    annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + 50))
    
    h = model.fit(data['x_train'][:val], data['y_train'][:val],
              batch_size= batches, epochs = epochs,
              validation_data = (data['x_validation'], data['y_validation']),
              callbacks=[annealer], verbose=1)
    
    return h

Generuje model VGG z wagami (imagenet) - lambda/funkcja

In [None]:
#test_vgg=model_vgg(20,6000, 0.005)

In [None]:
def model_vgg(epochs, val,l_r = 0.001, schedule=True, batches = 32):
    
    base = VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
    
    # wydobycie ostatniej warstwy 3 bloczka
    last = base.get_layer('block3_pool').output
    
    x = Flatten()(last)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    pred = Dense(43, activation='softmax')(x)
    
    
    model=Model(inputs=base.input,outputs=pred)
    
    #for layer in base.layers:
    #   layer.trainable = False
        
    learning_rate = l_r
    opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])

    if schedule==True:
        annealer = LearningRateScheduler(scheduler)
        
        h = model.fit(data['x_train'][:val], data['y_train'][:val],
                      batch_size= batches, epochs = epochs,
                      validation_data = (data['x_validation'], data['y_validation']),
                      callbacks=[annealer], verbose=1)
    # ------ 1e-3 = 0.001 <=> 0.001*0.95^(x+l_epok) -> lrs
    #annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + epochs/4))
    
    else:
        annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + 40))
        h = model.fit(data['x_train'][:val], data['y_train'][:val],
                  batch_size= batches, epochs = epochs,
                  validation_data = (data['x_validation'], data['y_validation']),
                  callbacks=[annealer], verbose=1)
    
    return h

**EfficientNetB0 nie przyjmuje obrazków 32x32.**

Generuje model EfficientNetB0 z wagami (imagenet)

In [None]:
def efficient(epochs, val,l_r = 0.001, schedule=True, batches = 32):
    
    size = (224, 224)
    
    
    base = EfficientNetB0(include_top=False, weights='imagenet')
    x=base.output
    
    x=GlobalAveragePooling2D()(x)
    x=Dense(1280,activation='relu')(x)
    x = Dropout(0.25)(x)
    x=Dense(640,activation='relu')(x) 
    x = Dropout(0.25)(x)
    preds=Dense(43, activation='softmax')(x) #final layer with softmax activation
    
    model=Model(inputs=base.input,outputs=preds)
    
    for layer in model.layers[:237]:
        layer.trainable=False
    for layer in model.layers[237:]:
        layer.trainable=True
        
    opt = Adam(lr=l_r, beta_1=0.9, beta_2=0.999, epsilon=None)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
    
    #model=Model(inputs=base.input, outputs=outputs, name="EfficientNet")
    
    #optimizer = Adam(learning_rate=l_r)
    #model.compile(optimizer, loss='categorical_crossentropy',metrics=['accuracy'])
    
    #for layer in model.layers[:237]:
    #    layer.trainable=False
    #for layer in model.layers[237:]:
    #    layer.trainable=True
        
    #opt = Adam(lr=l_r, beta_1=0.9, beta_2=0.999, epsilon=None)
    #model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
        
    if schedule==True:
        annealer = LearningRateScheduler(scheduler)
        
        h = model.fit(data['x_train'][:val], data['y_train'][:val],
                  batch_size= batches, epochs = epochs,
                  validation_data = (data['x_validation'], data['y_validation']),
                  callbacks=[annealer], verbose=1)
   
    else:
        annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + 40))
        h = model.fit(data['x_train'][:val], data['y_train'][:val],
                  batch_size= batches, epochs = epochs,
                  validation_data = (data['x_validation'], data['y_validation']),
                  callbacks=[annealer], verbose=1)
        
    return h
    

Generuje model EfficientNetB0 z wagami (imagenet) bez zmiany learning rate

In [None]:
def efficient_cons(epochs, val,l_r = 0.001, batches = 32):
    
    base = EfficientNetB0(include_top=False, weights='imagenet')
    x=base.output
    
    x=GlobalAveragePooling2D()(x)
    x=Dense(1280,activation='relu')(x)
    x = Dropout(0.25)(x)
    x=Dense(640,activation='relu')(x) 
    x = Dropout(0.25)(x)
    preds=Dense(43, activation='softmax')(x) #final layer with softmax activation
    
    model=Model(inputs=base.input,outputs=preds)
    
    for layer in model.layers[:237]:
        layer.trainable=False
    for layer in model.layers[237:]:
        layer.trainable=True
        
    opt = Adam(lr=l_r, beta_1=0.9, beta_2=0.999, epsilon=None)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
        
    h = model.fit(data['x_train'][:val], data['y_train'][:val],
                  batch_size= batches, epochs = epochs,
                  validation_data = (data['x_validation'], data['y_validation']), 
                  verbose=1)
        
    return h
    

# FUNKCJA RYSOWANIA GRAFÓW

In [None]:
def graph(h):
    %matplotlib inline
    plt.rcParams['figure.figsize'] = (15.0, 5.0) # Setting default size of plots
    plt.rcParams['image.interpolation'] = 'nearest'
    plt.rcParams['font.family'] = 'Times New Roman'

    fig = plt.figure()
    plt.subplot(1,2,1)
    plt.plot(h.history['accuracy'], '-o', linewidth=3.0)
    plt.plot(h.history['val_accuracy'], '-o', linewidth=3.0)
    plt.legend(['train', 'validation'], loc='upper left', fontsize='xx-large')
    plt.xlabel('Epoch', fontsize=20)
    plt.ylabel('Accuracy', fontsize=20)
    plt.tick_params(labelsize=18)

    plt.subplot(1,2,2)
    plt.plot(h.history['val_loss'], '-o', linewidth=3.0)
    plt.legend(['validation loss'], loc='upper left', fontsize='xx-large')
    plt.xlabel('Epoch', fontsize=20)
    plt.ylabel('Loss', fontsize=20)
    plt.tick_params(labelsize=18)


    # Showing the plot
    plt.show()


# FUNKCJA PORÓWNYWANIA GRAFÓW

In [None]:


def multi_graph(models, mat, name ="", sizeX = 20.0, sizeY = 15.0):
    
    plt.rcParams['figure.figsize'] = (sizeX, sizeY) # Setting default size of plots
    plt.suptitle(name)
    plt.subplot(2,2,1)
    print(mat)
    #print(len(models))
    for i in range(len(models)):
        plt.plot(models[i].history['accuracy'], '-o', linewidth=3.0, label='{0}e {1}d {2:.3f}a'.format(mat[0][i], mat[1][i], mat[3][i])) 
        
        # dodać jeśli chcesz zmienić kernel : {2}k . ___ mat[2][i]
        
        #zapisywanie kolejnych elementow legendy
        #A[i]='{0}e {1}d {2}k'.format(mat[0][i], mat[1][i], mat[2][i])
    plt.legend(loc=4)
    plt.xlabel('Epoch', fontsize=20)
    plt.ylabel('Accuracy', fontsize=20)
    plt.tick_params(labelsize=18)
        
    plt.subplot(2,2,2)
    for i in range(len(models)):
        plt.plot(models[i].history['val_accuracy'], '-o', linewidth=3.0, label='{0}e {1}d {2:.3f}a'.format(mat[0][i], mat[1][i], mat[3][i])) 
    plt.legend(loc=4)
    plt.xlabel('Epoch', fontsize=20)
    plt.ylabel('Validation accuracy', fontsize=20)
    plt.tick_params(labelsize=18)
    
    plt.subplot(2,2,3)
    for i in range(len(models)):
        plt.plot(models[i].history['val_loss'], '-o', linewidth=3.0, label='{0}e {1}d {2:.3f}a'.format(mat[0][i], mat[1][i], mat[3][i]))  
    plt.legend(loc=4)
    plt.xlabel('Epoch', fontsize=20)
    plt.ylabel('Validation loss', fontsize=20)
    plt.tick_params(labelsize=18)
    
    plt.show()
    plt.close()

# FUNKCJA TWORZENIA MACIERZY DEF. MODEL (epoki, dane)

In [None]:
def matr(E, D, ALPH=0, isInt = True):
    M = np.zeros((4,len(E)))
    #M[0] -> epoki
    #M[1] -> dane
    #M[2] -> filtry

    for i in range(len(E)):
        M[0][i]=E[i]
        M[1][i]=D[i]
        #domyslny filtr 3x3
        M[2][i]=3
        #alpha - współczynnik uczenia
        if ALPH!=0:
            M[3][i]=ALPH[i]
    print(M)

    if isInt:
        int_M=M.astype(int)
        return int_M
    else:
        return M

# Dla przykładu:

# 1. Prosty model

Parametry: 
* -> E[] = kolejne wartości epok
* -> D[] = kolejne ilości danych trenujących

In [None]:
E=[]
D=[]
# domyślnie
# E=[10,10,20,20]
# D=[10,1000,10,1000]

Kod dodaje do tablicy TAB_MOD kolejne wytrenowane modele

In [None]:
#E=[]
#D=[]
#LUB
E=[10,10,20,20]
D=[10,1000,10,1000]
TAB_MOD=[]
# iterator = 0

#e = 10
#d = 10

#E.append(e)
#D.append(d)

for i in range(len(E)):
    print ('{} epok, {} danych uczących'.format(E[i], D[i]))
    h_temp=model1(E[i], D[i])
    TAB_MOD.append(h_temp)
    print('<==============================================>')




In [None]:
#stary kod
#print ('{} epok, {} danych uczących'.format(E[iterator], D[iterator]))
#h1 = model1(E[iterator], D[iterator])
#TAB_MOD.append(h1)
#iterator = iterator + 1
#print ('<===========================>')

#e = 10
#d = 1000

#E.append(e)
#D.append(d)

#print ('{} epok, {} danych uczących'.format(E[iterator], D[iterator]))
#h2 = model1(E[iterator], D[iterator])
#TAB_MOD.append(h2)
#iterator = iterator + 1
#print ('<===========================>')

#e = 20
#d = 10

#E.append(e)
#D.append(d)

#print ('{} epok, {} danych uczących'.format(E[iterator], D[iterator]))
#h3 = model1(E[iterator], D[iterator])
#TAB_MOD.append(h3)
#iterator = iterator + 1
#print ('<===========================>')

#e = 20
#d = 1000

#E.append(e)
#D.append(d)

#print ('{} epok, {} danych uczących'.format(E[iterator], D[iterator]))
#h4 = model1(E[iterator], D[iterator])
#TAB_MOD.append(h4)


In [None]:
for i in range(len(E)):
    graph(TAB_MOD[i])

Porównanie danych modeli na konkretnych wykresach

In [None]:
#najpierw trzeba stworzyć macierz z danymi modeli
M_model1 = matr(E, D)


# Porównanie wszystkich modeli typu 1

In [None]:
#int_M=M.astype(int)
multi_graph(TAB_MOD, M_model1)

Na wyższych wykresach widać, jak bardzo ilość danych ma znaczenie. Różnica też jest jednak spora (10 -> 1000). Jak widać, przygotowałem odpowiednie funkcje, więc jeśli by była potrzeba sprawdzenia dla innych warunków wystarczy zmienić parametry (model1(liczba epok, liczba danych, filtr)). Następnie można te modele porównać przy pomocy funkcji multi_graph(models, mat), do której przekazujemy tablicę modeli oraz ich parametrów, jako że nie mogłem zmienić nazw modelów. Trzeba sobie jakoś radzić. Oczywiście, e-ilość epok, d-ilość danych, k-szerokość konwolucji.

# 2. VGG(już nie 16) - zmodyfikowany VGG16 dopasowany do problemu

Bez Transfer Learningu:

In [None]:
E=[35,35]
D=[6000,10000]
#B=[20,50,20,50]
TAB_MOD1=[]

for i in range(len(E)):
    print ('{} epok, {} danych uczących'.format(E[i], D[i]))
    h_temp=model2(E[i], D[i])
    TAB_MOD1.append(h_temp)
    print('<==============================================>')




Porównanie wyżej wytrenowanych modeli

In [None]:
#najpierw trzeba stworzyć macierz z danymi modeli

M_VGG = matr(E, D)


multi_graph(TAB_MOD1, M_VGG)

Transfer learning:

Trenowanie klasyfikatora:

In [None]:
#E=[100,100,200,200]
E=[30, 30]
#D=[6000,10000,6000,10000]
D=[6000, 10000]
TAB_MOD2=[]

for i in range(len(E)):
    print ('{} epok, {} danych uczących (transfer learning)'.format(E[i], D[i]))
    h_temp=model_vgg(E[i], D[i])
    TAB_MOD2.append(h_temp)
    print('<==============================================>')




In [None]:
M_VGG_T=matr(E, D)

multi_graph(TAB_MOD2, M_VGG_T, 'Trained + function scheduler')
multi_graph(TAB_MOD1, M_VGG, 'Not Trained')

# MODEL WYUCZONY VGG (zmiana liczb epok,stała liczba danych, inny scheduler, inne learning rate)

Tablice z cechami modeli

In [None]:
E=[30,30]
D=[10000,10000]
alpha=[0.001, 0.005]



In [None]:
TAB_MOD2_sc=[]

In [None]:
TAB_MOD2_lam=[]

Zakomentowałem z funkcją lambda, gdyż nie starczyłoby mi czasu na GPU

Nauczanie modeli z schedulerem

In [None]:
for i in range(len(E)):
    print ('{} epok, {} danych uczących, {} -> startowy learning rate (transfer learning)'.format(E[i], D[i], alpha[i]))
    h_temp=model_vgg(E[i], D[i], alpha[i], True)
    TAB_MOD2_sc.append(h_temp)
    print('<==============================================>')

Przygotowanie do porównania - macierz cech:

In [None]:
M_VGG_SC_L = matr(E, D, alpha, False)

Nauczanie modeli z schedulerem (lambda)

In [None]:
for i in range(len(E)):
    print ('{} epok, {} danych uczących, {} -> startowy learning rate (transfer learning)'.format(E[i], D[i], alpha[i]))
    h_temp=model_vgg(E[i], D[i], alpha[i], False)
    TAB_MOD2_lam.append(h_temp)
    print('<==============================================>')

In [None]:
multi_graph(TAB_MOD2_sc, M_VGG_SC_L, 'Trained + scheduler')
multi_graph(TAB_MOD2_lam, M_VGG_SC_L, 'Trained + lambda scheduler')

Niżej próbowałem zmieniać funkcję scheduler, lecz to tylko pogorszało wyniki.

In [None]:
#def scheduler(epoch, lr):
#    if epoch < 10:
#        print(lr)
#        return lr
#    else:
#    print(lr * tf.math.exp(-0.1))
#    return lr * tf.math.exp(-0.1)
    #if epoch > 15:
    #    if lr<0.0005:
    #        return lr*10
    #    else:
    #        return lr
    #else:
    #    return lr * tf.math.exp(-0.01)
#    if epoch < 10:
#        return lr
#    if epoch > 20:
#        if lr < 0.0035:
#            return lr * 2
#        else:
#            return lr
#    else:
#        print(lr * tf.math.exp(-0.1))
#        print(lr * tf.math.exp(-0.01))
#        return lr * tf.math.exp(-0.1)

# Porównanie wszystkich modeli:

In [None]:

multi_graph(TAB_MOD, M_model1, 'First model')

multi_graph(TAB_MOD1, M_VGG, 'Not Trained')
multi_graph(TAB_MOD2, M_VGG_T, 'Trained + lambda scheduler (const learning rate)')
# - 
#multi_graph(TAB_MOD2_lam, M_VGG_SC_L, 'Trained + lambda scheduler')
multi_graph(TAB_MOD2_sc, M_VGG_SC_L, 'Trained + scheduler')

# 3. EfficientNetB0

Niestety nie udało mi się zmienić rozmiarów wejściowych danych by dopasować do rozdzielczości odpowiedniej dla systemu. 

Trenowanie i porównanie

In [None]:
#E=[100, 100, 200, 200]
E = [30, 30]
#D=[10000, 10000, 10000, 10000]
D=[10000, 10000]
#A=[0.01, 0.05, 0.01, 0.05]
A=[0.01, 0.05]
TAB_MOD_B0_sc=[]
#TAB_MOD_B0_lam=[]
TAB_MOD_B0_const=[]



In [None]:
for i in range(len(E)):
    print ('{} epok, {} danych uczących, {} -> startowy learning rate (transfer learning)'.format(E[i], D[i], alpha[i]))
    h_temp=efficient(E[i], D[i], A[i], True)
    TAB_MOD_B0_sc.append(h_temp)
    print('<==============================================>')

In [None]:
M_B0 = matr(E, D, alpha, False)
multi_graph(TAB_MOD_B0_sc, M_B0, 'Trained + scheduler')

Zakomentowałem z lambdą, gdyż nie starczyło mi czasu GPU by to zapisać, jeszcze nie do końca rozumiem jak to tutaj działa wszystko

In [None]:
#for i in range(len(E)):
#    print ('{} epok, {} danych uczących, {} -> startowy learning rate (transfer learning)'.format(E[i], D[i], alpha[i]))
#    h_temp=efficient(E[i], D[i], A[i], False)
#    TAB_MOD_B0_lam.append(h_temp)
#    print('<==============================================>')

In [None]:
#multi_graph(TAB_MOD_B0_lam, M_B0, 'Trained + lambda')

In [None]:
TAB_MOD_B0_const=[]
for i in range(len(E)):
    print ('{} epok, {} danych uczących, {} -> startowy learning rate (transfer learning)'.format(E[i], D[i], alpha[i]))
    h_temp=efficient_cons(E[i], D[i], A[i])
    TAB_MOD_B0_const.append(h_temp)
    print('<==============================================>')

In [None]:
multi_graph(TAB_MOD_B0_const, M_B0, 'B0: Trained lr const')
#multi_graph(TAB_MOD_B0_lam, M_B0, 'B0: Trained + lambda')
multi_graph(TAB_MOD_B0_sc, M_B0, 'B0: Trained + scheduler')

multi_graph(TAB_MOD, M_model1, 'First model')

multi_graph(TAB_MOD1, M_VGG, 'VGG: Not Trained')
multi_graph(TAB_MOD2, M_VGG_T, 'VGG: Trained (const learning rate)')

multi_graph(TAB_MOD2_lam, M_VGG_SC_L, 'VGG: Trained + lambda scheduler')
multi_graph(TAB_MOD2_sc, M_VGG_SC_L, 'VGG: Trained + scheduler')

Widać, że modele VGG16 oraz EfficientNetB0 radzą sobie bardzo źle w porównaniu do początkowego. Też to, że pierwszy model jest najlepszy wynika z samego problemu. Nasze dane to są same znaki, do tego w formacie 32x32. Trudno dopasować parametry odpowiednio, by chociaż się zbliżyć. Dodatkowo sama nauka eksperymentów (różne parametry w modelach) trwają bardzo długo.
# FUNKCJE
*wyuczone*
* efficient(liczba epok, liczba danych trenujących, learning rate, scheduler)
* efficient_cons(liczba epok, liczba danych trenujących, learning rate, scheduler) - nie zmienia się learning rate w trakcie

* model_vgg16(liczba epok, liczba danych trenujących, learning rate, scheduler) - wyuczony
* model2(liczba epok, liczba danych trenujących) - od zera

* model(liczba epok, liczba danych trenujących, kernel=3, zmniejszanie learning rate(T/F)) - od zera

**Funkcje pomocnicze wykorzystane**
* graph(tablica modeli) - rysuje zestaw grafów dla tablicy modeli
* multi_graph(tablica modeli, macierz z hiperparametrami (parametry funkcji generujących modele) w celu rozróżnienia na legendzie), nazwa) - rysuje na każdym wykresie wszystkie modele z tablicy
* matr(tablica z ilością epok, danych oraz ew. współczynników nauczania, czy same całkowite) - tworzy macierz przekazywaną do multi_graph

# 4. Własny model

In [None]:
# PARAMS : epoki, ilość danych, scheduler, ilość danych w batchu
def model_test(epochs, val, l_r = 0.005, schedule = False, lower = True, batches = 32):
    
    model = Sequential()
    
    model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=(32,32,3)))
    model.add(Conv2D(32,(3,3), activation='relu'))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(0.25))
              
    model.add(Conv2D(63, 3, padding='same', activation='relu'))
    model.add(Conv2D(64,(3,3), activation='relu'))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(0.25))
              
    model.add(Flatten())
    
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
              
    model.add(Dense(43, activation='softmax'))
    
    #model.summary()
    
    learning_rate = l_r
    opt = Adam(lr=learning_rate)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
    
    if lower==True:
        annealer = LearningRateScheduler(scheduler)

        h1 = model.fit(data['x_train'][:val], data['y_train'][:val],
                        batch_size=batches, epochs = epochs,
                        validation_data = (data['x_validation'], data['y_validation']),
                        callbacks=[annealer], verbose=1)
    else:
        h1 = model.fit(data['x_train'][:val], data['y_train'][:val],
                        batch_size=batches, epochs = epochs,
                        validation_data = (data['x_validation'], data['y_validation']),
                        verbose=1)
    
    return h1

In [None]:
#E=[50,   50,   50,    50,    100,  100,  100,   100]
#D=[6000, 6000, 10000, 10000, 6000, 6000, 10000, 10000 ]
#A=[0.01, 0.05, 0.01,  0.05,  0.01, 0.05, 0.01,  0.05 ]
E=[30, 30]
D=[10000,10000]
A=[0.01, 0.05]
M_MY=matr(E,D,A, False)

In [None]:
TAB_MY_MOD_SC=[]
#TAB_MY_MOD_LAM=[]
TAB_MY_MOD_CONST=[]

TAB_1MOD=[]

In [None]:
#TAB_MY_MOD_LAM=[]
#for i in range(len(E)):
#    print ('{} epok, {} danych uczących, {} -> learning rate'.format(E[i], D[i], A[i]))
#    h_temp=model_test(E[i], D[i], A[i])
#    TAB_MY_MOD_LAM.append(h_temp)
#    print('<==============================================>')

In [None]:
#multi_graph(TAB_MY_MOD_LAM, M_MY, 'Mój model - scheduler lambda')

In [None]:
for i in range(len(E)):
    print ('{} epok, {} danych uczących, {} -> learning rate'.format(E[i], D[i], A[i]))
    h_temp=model_test(E[i], D[i], A[i], True)
    TAB_MY_MOD_SC.append(h_temp)
    print('<==============================================>')

In [None]:
multi_graph(TAB_MY_MOD_SC, M_MY, 'Mój model - scheduler funkcja')

In [None]:
for i in range(len(E)):
    print ('{} epok, {} danych uczących, {} -> learning rate'.format(E[i], D[i], A[i]))
    h_temp=model_test(E[i], D[i], A[i], True, False)
    TAB_MY_MOD_CONST.append(h_temp)
    print('<==============================================>')

In [None]:
multi_graph(TAB_MY_MOD_CONST, M_MY, 'Mój model - brak zmiany parametru nauczania')

Podstawowy model nie miał możliwości zmiany learning rate, co można dodać łatwo w opcjach optymalizatora. Lecz nie będę go poprawiał, skoro chciałem napisać coś lepszego.

In [None]:
for i in range(len(E)):
    print ('{} epok, {} danych uczących'.format(E[i], D[i]))
    h_temp=model_test(E[i], D[i])
    TAB_1MOD.append(h_temp)
    print('<==============================================>')

In [None]:
M_MY1=matr(E,D,0, False)

In [None]:
multi_graph(TAB_1MOD, M_MY1, 'Początkowy model')
multi_graph(TAB_MY_MOD_SC, M_MY, 'Mój model - scheduler funkcja')
multi_graph(TAB_MY_MOD_CONST, M_MY, 'Mój model - brak zmiany parametru nauczania')

multi_graph(TAB_MOD_B0_const, M_B0, 'B0: Trained lr const')
multi_graph(TAB_MOD_B0_lam, M_B0, 'B0: Trained + lambda')
multi_graph(TAB_MOD_B0_sc, M_B0, 'B0: Trained + scheduler')

multi_graph(TAB_MOD, M_model1, 'First model')

multi_graph(TAB_MOD1, M_VGG, 'VGG: Not Trained')
multi_graph(TAB_MOD2, M_VGG_T, 'VGG: Trained (const learning rate)')

multi_graph(TAB_MOD2_lam, M_VGG_SC_L, 'VGG: Trained + lambda scheduler')
multi_graph(TAB_MOD2_sc, M_VGG_SC_L, 'VGG: Trained + scheduler')

Udało się. Mój autorski model pokonał ten pierwotny. Validation loss spada o wiele szybciej, do tego każdy model jest całkiem dobry, a z tych przykładowych to tylko kilka wybranych mogłoby się równiać względem Validation loss. Inne miary są nie do dogonienia przez pierwotny model. Na wykresach pierwotnego modelu dodatkowo są powtórzone badania podwójnie (widać, że są dwa takie same podpisy na legendzie - przykładowo niebieski i żółty to są dwie próby/dwa modele o takich samych parametrach).