In [1]:
!tar xzf /kaggle/input/files-ships-2020/ships.tgz  # les images dans des répertoires

Un générateur de données à la volée, c'est plus lent que d'avoir tout en mémoire mais cela ne consomme rien en mémoire (donc plus le problème avec la normalisation).

https://keras.io/preprocessing/image/

In [2]:
types = ['coastguard', 'containership', 'corvette', 'cruiser', 'cv', 'destroyer', 'methanier', 'smallfish', 'submarine', 'tug']
types_id = {t:i for (i,t) in enumerate(types)}

batch_size = 8

In [3]:
from keras.preprocessing.image import ImageDataGenerator 

train_datagen = ImageDataGenerator(
        rescale=1./255,
        horizontal_flip=True,
        validation_split=0.1)

train_generator = train_datagen.flow_from_directory(
        'ships_scaled',
        target_size=(128, 192 ),
        batch_size=batch_size,
        subset="training")

validation_generator = train_datagen.flow_from_directory(
        'ships_scaled',
        target_size=(128, 192 ),
        batch_size=batch_size,
        subset="validation")

Using TensorFlow backend.


Found 35302 images belonging to 10 classes.
Found 3919 images belonging to 10 classes.


## Mon réseau

C'est un exemple minimaliste qui ne classera rien. A vous d'ajouter des couches pour en faire quelque chose qui marche.

In [4]:
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Activation
from keras import regularizers
from keras import optimizers

inputs = Input(shape=(128, 192, 3), name='cnn_input')
x = Conv2D(32, kernel_size=3, activation='relu')(inputs)
x = Conv2D(32, kernel_size=3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.4)(x)
x = Conv2D(64, kernel_size=3, activation='relu')(x)
x = Conv2D(64, kernel_size=3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.4)(x)
x = Conv2D(128, kernel_size=3, activation='relu')(x)
x = Conv2D(128, kernel_size=3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.4)(x)
x = Conv2D(256, kernel_size=3, activation='relu')(x)
x = Conv2D(256, kernel_size=3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.4)(x)
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.4)(x)
outputs = Dense(10, activation='softmax')(x)

model = Model(inputs, outputs)

adamax = optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)

model.compile(optimizer=adamax,   # pas obligatoirement le meilleur algo pour converger
              loss='categorical_crossentropy',
              metrics=['accuracy']
              )

In [5]:
from keras.callbacks import ReduceLROnPlateau

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.75, 
                                            min_lr=0.0001)

In [6]:
model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = 40, callbacks=[learning_rate_reduction])   # 10 permet d'avoir une idée mais probablement pas suffisant pour un beau résultat 

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 19/40
Epoch 20/40
Epoch 21/40

Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0008437499636784196.
Epoch 25/40
Epoch 26/40
Epoch 27/40

Epoch 00027: ReduceLROnPlateau reducing learning rate to 0.0006328124727588147.
Epoch 28/40
Epoch 29/40

In [7]:
# une autre cellule de fit_generator est possible pour continuer

## Analyse des résultats

In [8]:
import numpy as np
import pandas as pd
from keras.utils import np_utils

ships = np.load('/kaggle/input/files-ships-2020/ships_test.npz', allow_pickle=True)
X_test = ships['X']
Y_test = ships['Y']

X_test = X_test.astype('float32') / 255
Y_test_cat = np_utils.to_categorical(Y_test).astype('bool')

In [9]:
score = model.evaluate(X_test, Y_test_cat, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.35725796687602995
Test accuracy: 0.9110000133514404


In [10]:
from sklearn.metrics import classification_report, confusion_matrix

res = model.predict(X_test).argmax(axis=1)
confu = confusion_matrix(Y_test, res)
pd.DataFrame({types[i][:3]:confu[:,i] for i in range(len(types))}, index=types)

Unnamed: 0,coa,con,cor,cru,cv,des,met,sma,sub,tug
coastguard,130,1,12,0,3,5,2,6,1,7
containership,1,310,1,1,2,2,5,0,0,0
corvette,4,0,120,0,3,18,0,0,1,0
cruiser,1,0,1,324,1,0,0,1,0,0
cv,1,0,1,1,89,7,0,0,2,1
destroyer,1,0,28,0,5,294,1,0,1,0
methanier,1,2,0,1,0,0,150,1,2,0
smallfish,5,1,2,1,0,2,1,144,2,6
submarine,2,0,6,0,2,3,0,2,108,1
tug,2,1,0,0,1,2,0,0,1,153


In [11]:
print(classification_report(Y_test, res, target_names=types))

               precision    recall  f1-score   support

   coastguard       0.88      0.78      0.83       167
containership       0.98      0.96      0.97       322
     corvette       0.70      0.82      0.76       146
      cruiser       0.99      0.99      0.99       328
           cv       0.84      0.87      0.86       102
    destroyer       0.88      0.89      0.89       330
    methanier       0.94      0.96      0.95       157
    smallfish       0.94      0.88      0.91       164
    submarine       0.92      0.87      0.89       124
          tug       0.91      0.96      0.93       160

     accuracy                           0.91      2000
    macro avg       0.90      0.90      0.90      2000
 weighted avg       0.91      0.91      0.91      2000



## Soumission des résultats

Le fichier suivant sert à soumettre son résultat à la compétition. Pour cela vous devez regarder les Output de votre Kernel (pour cela il semble qu'il faille avoir commité sa feuille sinon le résultat est à chercher dans le répertoire courant) et cliquer sur le bouton Submit to competition.

In [12]:
ships = np.load('/kaggle/input/files-ships-2020/ships_competition.npz', allow_pickle=True)
X_test = ships['X']
X_test = X_test.astype('float32') / 255

In [13]:
# predict results
res = model.predict(X_test).argmax(axis=1)
df = pd.DataFrame({"Category":res})
df.to_csv("reco_nav.csv", index_label="Id")

In [14]:
!head reco_nav.csv

Id,Category
0,1
1,9
2,3
3,3
4,6
5,8
6,1
7,3
8,5


In [15]:
import os
os.chdir(r'/kaggle/working')
from IPython.display import FileLink
FileLink(r'reco_nav.csv')

In [16]:
!rm -rf ships_scaled/