## Setup

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

### Getting the data
If you have your own dataset,
you'll probably want to use the utility
`tf.keras.preprocessing.image_dataset_from_directory` to generate similar labeled
 dataset objects from a set of images on disk filed into class-specific folders.

Transfer learning is most useful when working with very small datasets.

Here: https://stackoverflow.com/questions/62409838/error-in-loading-image-dataset-from-directory-in-tensorflow, it was suggested to use `flow_from_directory instead`, so I looked it up, and I found it better.

In [None]:
#https://www.kaggle.com/general/51898?fbclid=IwAR1MXWo2aPB1HxvR4y0EwU0s7sJaL6uks_pLipHFaCYY6Py2TGPHeOwVLIQ
#plugin: https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid?hl=hu

!wget -x --load-cookies kaggle.com_cookies.txt "https://www.kaggle.com/c/23035/download-all" -O data.zip

In [None]:
!unzip data.zip

In [47]:
#Remove unnecessary files

!rm -rf "/content/NN_2020_Kaggle_dataset/test"
!rm -rf "/content/NN_2020_Kaggle_dataset/class_id_mapping.py"
!rm -rf "/content/NN_2020_Kaggle_dataset/Sample_labels_image_id.txt"

#moved test folder manually
#!ls "./NN_2020_Kaggle_dataset/"

In [48]:
#kiszedni ami nem kell bele (test mappa, stb...)!
DATADIR = "/content/NN_2020_Kaggle_dataset"
IMG_SIZE = 150
BATCH_SIZE = 24

#https://keras.io/api/preprocessing/image/#flowfromdirectory-method

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        validation_split = 0.2)

#don't augment the validation dataset
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, validation_split = 0.2)

In [49]:
train_generator = train_datagen.flow_from_directory(
    DATADIR,
    target_size = (IMG_SIZE, IMG_SIZE),
    color_mode="rgb",
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed = None,
    subset='training') # set as training data

Found 6711 images belonging to 16 classes.


In [50]:
validation_generator = validation_datagen.flow_from_directory(
    DATADIR,
    target_size = (IMG_SIZE, IMG_SIZE),
    color_mode="rgb",
    classes=None,
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=None,
    subset='validation') # set as validation data

Found 1669 images belonging to 16 classes.


## Build a model

In [51]:
from keras.models import Model
from tensorflow.keras.applications import InceptionResNetV2
from keras.layers import Dense, Dropout, GlobalMaxPooling2D

#https://keras.io/api/applications/inceptionresnetv2/ (150, 150, 3) would be one valid value
#https://arxiv.org/pdf/1602.07261.pdf

def InceptionResNetV2_model(IMG_SIZE, channel = 1, num_classes = None):
  
  model = InceptionResNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet") #néha ez nem működik, mert a colab úgy gondolja, hogy nem szeretné betölteni:)))

  x = model.output
  x = GlobalMaxPooling2D()(x)
  x = Dropout(0.6)(x)
  x = Dense(150, activation="relu")(x)
  x = Dropout(0.4)(x)
  output = Dense(num_classes, activation='softmax')(x)

  model = Model(model.input, output)

#To set 80% of all layers to non-trainable (weights will not be updated)

  idx = round(len(model.layers)*0.7)
  for layer in model.layers[:idx]:
    layer.trainable = False

# Learning rate is changed to 0.001
  sgd = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)
  model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

  return model

In [None]:
CHANNEL = 3
NUM_CLASSES = 16

model2 = InceptionResNetV2_model(IMG_SIZE, CHANNEL, NUM_CLASSES)
model2.summary()

## Train the model

In [53]:
#Validation accuracy nagyon ugrabugrál, kevesebb epoch is elég lenne talán

nb_epochs = 20
model2.fit(
    train_generator,
    steps_per_epoch = train_generator.samples // BATCH_SIZE,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // BATCH_SIZE,
    epochs = nb_epochs)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fa94a834b00>

# Test

First, I prepared the test images. In order to apply the same `flow_from_directory` method I had to create a new folder that I named `test_data`, and place the `test` folder inside.

In [59]:
path = "/content/test_data"

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    path,
    target_size = (IMG_SIZE, IMG_SIZE),
    batch_size = 1,
    class_mode = None,
    shuffle = False,
    seed=42)

Found 938 images belonging to 1 classes.


Prediction with `model.predict()`. I used softmax on the last layer, so I took the index of the maximum value as the predicted class.

In [60]:
#https://vijayabhaskar96.medium.com/tutorial-image-classification-with-keras-flow-from-directory-and-generators-95f75ebe5720

STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()
pred = model2.predict(test_generator,
                      steps=STEP_SIZE_TEST,
                      verbose=1)



In [61]:
predicted_class_indices=np.argmax(pred,axis=1)

In [62]:
#https://stackoverflow.com/questions/41715025/keras-flowfromdirectory-get-file-names-as-they-are-being-generated

test_images = []
for file in test_generator.filenames:
    test_images.append(file[5:])

In [None]:
print(test_images)

Write prediction results to a .txt file.

In [64]:
with open('last_result.txt', 'w') as new_results:
   new_results.write("Id,Category\n")
   for i in range(len(test_images)):
     new_results.write("%s,%s\n" % (test_images[i], predicted_class_indices[i]))

Remove the last, unnecessary empty line from the file. 

In [65]:
fd=open("last_result.txt","r")
d=fd.read()
fd.close()
m=d.split("\n")
s="\n".join(m[:-1])
fd=open("last_result.txt","w+")
for i in range(len(s)):
    fd.write(s[i])
fd.close()