**NOTE:** I didn't port over the data from the original [Kaggle competition  ](https://www.kaggle.com/competitions/csci-4622-spring-22). 

In [1]:
import numpy as np
import os
import pandas as pd 

from PIL import Image

import keras
import keras.layers
import keras.utils.all_utils as kr_utils
import keras.regularizers
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import tensorflow_addons as tfa

from sklearn.model_selection import train_test_split
print(tf.__version__)

2.6.2


In [2]:
train_images_folder = "/kaggle/input/csci-4622-spring-22/train/train/"
test_images_folder = "/kaggle/input/csci-4622-spring-22/test/test/"
train_csv = "/kaggle/input/csci-4622-spring-22/train.csv"
submission_csv = "/kaggle/input/csci-4622-spring-22/sample_submission.csv"
patch_size = 192
num_classes = 53

In [3]:
class RockGenerator(kr_utils.Sequence):
    def __init__(self, df, # contains the images names and their labels
                 path_to_images,
                 batch_size=32,
                 shuffle=True, # to shuffle the data at the end of each epoch
                ):
        
        self.df = df # dataframe with two columns "image" and "label"
        self.images_path = path_to_images
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.mean = 125.3
        self.std = 63.5
        if shuffle:
            self.indexes = np.random.permutation(self.df.shape[0])
        else:
            self.indexes = np.arange(self.df.shape[0])
        self.on_epoch_end()

    def on_epoch_end(self): # called at the end of each epoch
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __len__(self):
        # return number of batches in dataset / steps per epoch
        return int(np.ceil(self.df.shape[0] / self.batch_size))

    def __getitem__(self, index):
        # get batch at position index
        indexes = self.df.index[self.indexes[index*self.batch_size:min((index+1)*self.batch_size, self.df.shape[0])] ] 
        images = np.zeros((len(indexes), patch_size, patch_size,3))
        labels = np.zeros((len(indexes), num_classes))
        for i, ind in enumerate(indexes):
            image = np.asarray(Image.open(os.path.join(self.images_path , "{}.png".format(self.df.image[ind]))))
            image = (image - self.mean) / self.std # this is global mean and std, you can use mean/std per channel
            images[i] = image
            labels[i] = kr_utils.to_categorical(self.df.label[ind], num_classes=num_classes) # gives the one-hot-encoding
        return images, labels

In [4]:
df_train, df_val = train_test_split(pd.read_csv(train_csv), test_size = 0.1, random_state = 5622)
df_test = pd.read_csv(submission_csv)

In [5]:
train_generator = RockGenerator(df_train, train_images_folder)
val_generator = RockGenerator(df_val, train_images_folder, shuffle=False)
test_generator = RockGenerator(df_test, test_images_folder, shuffle=False)

In [6]:
train_generator = RockGenerator(df_train, train_images_folder)
val_generator = RockGenerator(df_val, train_images_folder, shuffle=False)
test_generator = RockGenerator(df_test, test_images_folder, shuffle=False)

## My models are seen below
I kept my first idea in the next markdown cell. 

Parallel Neural Nets

```
Run through some convolutions
           |
        flatten
       /       \
  two Feed Foreward 
  NNs with different activations
     \         /
       combine (Average maybe)
          |
        
  
```

```
inputLayer = keras.Input(shape=(patch_size,patch_size,3))


gauss = keras.layers.GaussianNoise(stddev=0.1)(inputLayer)
# convolute and pool 3 times layers
# each using a 25x25 convolution matrix   
# now with a swish activation
cLayer1 = keras.layers.Conv2D(100, kernel_size=(25,25), kernel_regularizer="L2", activation=tf.nn.silu)(gauss)
poolLayer1 = keras.layers.MaxPooling2D(2,2)(cLayer1)

cLayer2 = keras.layers.Conv2D(100, kernel_size=(25,25),kernel_regularizer="L2", activation=tf.nn.silu)(poolLayer1)
poolLayer2 = keras.layers.MaxPooling2D(2,2)(cLayer2)

cLayer3 = keras.layers.Conv2D(200, kernel_size=(25,25), kernel_regularizer="L2", activation=tf.nn.silu)(poolLayer2)
poolLayer3 = keras.layers.MaxPooling2D(2,2)(cLayer3)

flatten_layer = keras.layers.Flatten()(poolLayer3)

'''
dLayer = keras.layers.Dense(250, kernel_regularizer = "L2", activation = tf.nn.silu)(gauss)
dLayer2 = keras.layers.Dense(250, kernel_regularizer = "L2", activation = tf.nn.silu)(dLayer)
output_layer = keras.layers.Dense(53)(dLayer2)
'''
#  give the flattened results to 3 different FFs each with their own activation function
activations = ['sigmoid','tanh','relu']
out_layers = []
for i, activation in enumerate(activations):
    #gauss = keras.layers.GaussianNoise(stddev=0.1)(flatten_layer)
    
    a_layer3 = keras.layers.Dense(1500, kernel_regularizer = "L2", activation=activation, name = "{}_layer1".format(activation))(flatten_layer)
    a_layer2 = keras.layers.Dense(1000, kernel_regularizer = "L2", activation=activation, name = "{}_layer2".format(activation))(a_layer3)
    a_layer = keras.layers.Dense(500, kernel_regularizer = "L2", activation=activation, name = "{}_layer3".format(activation))(a_layer2)
    
    a_out = keras.layers.Dense(100, kernel_regularizer = "L2", activation = activation, name = "{}_out".format(activation))(a_layer)
    out_layers.append(a_out)

# concatenate the results of the different FFs
recombine = keras.layers.Concatenate()(out_layers)


# feed it to the output layer
output_layer = keras.layers.Dense(53)(recombine)


model = keras.Model(inputs=inputLayer,outputs = output_layer)

# show a model summary
model.summary()

kr_utils.plot_model(model)
```

# Model 2 use inception V3 as a base

In [7]:
# start with a base model of Inception V3
Base_model = keras.applications.inception_v3.InceptionV3(input_shape=(patch_size,patch_size,3),include_top=False)

Base_output = Base_model.output

# connect it into a global Average pooling layer (I named it MY_FIRST_INCLUSION just so that its easier)
layer1 = keras.layers.GlobalAveragePooling2D(name="Final_Pool")(Base_output)

# connect that into a fully connected dense FF layer that uses swish as the activation 
# (to avoid vanishing gradient problem and because I just think its a cool activation function)
next_layer = keras.layers.Dense(2048, activation=tf.nn.silu, kernel_regularizer="l2", name = "Swishy_swish")(layer1)
# send that into a final output layer to make the final calcualtion
output_layer = keras.layers.Dense(53)(next_layer)

model = keras.Model(inputs=Base_model.input, outputs=output_layer)

#model.summary()

2022-04-01 21:29:00.635815: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-01 21:29:00.726650: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-01 21:29:00.727380: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-01 21:29:00.728572: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), # this means that the network returns the log probabilities and not probas
              optimizer=keras.optimizers.adam_v2.Adam(learning_rate=4e-5), # The optimizer that smooths the gradient
              metrics=["accuracy", 
                       tfa.metrics.F1Score(num_classes=num_classes,average="macro", name="macroF1")]) # We want to track accuracy and MacroF1
checkpoint_callbk = tf.keras.callbacks.ModelCheckpoint(
    "fancy_inception_model3", # name of file to save the best model to
    monitor="val_macroF1", # prefix val to specify that we want the model with best macroF1 on the validation data
    verbose=1, # prints out when the model achieve a better epoch
    mode="max", # the monitored metric should be maximized
    save_freq="epoch", # clear
    save_best_only=True, # of course, if not, every time a new best is achieved will be savedf differently
    save_weights_only=True # this means that we don't have to save the architecture, if you change the architecture, you'll loose the old weights
)



In [9]:
model.fit(train_generator,callbacks=[checkpoint_callbk], epochs=10, validation_data=val_generator)

2022-04-01 21:29:05.873351: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10


2022-04-01 21:29:13.577781: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005



Epoch 00001: val_macroF1 improved from -inf to 0.39236, saving model to fancy_inception_model3
Epoch 2/10

Epoch 00002: val_macroF1 improved from 0.39236 to 0.59571, saving model to fancy_inception_model3
Epoch 3/10

Epoch 00003: val_macroF1 improved from 0.59571 to 0.70630, saving model to fancy_inception_model3
Epoch 4/10

Epoch 00004: val_macroF1 improved from 0.70630 to 0.74818, saving model to fancy_inception_model3
Epoch 5/10

Epoch 00005: val_macroF1 improved from 0.74818 to 0.76609, saving model to fancy_inception_model3
Epoch 6/10

Epoch 00006: val_macroF1 improved from 0.76609 to 0.77196, saving model to fancy_inception_model3
Epoch 7/10

Epoch 00007: val_macroF1 did not improve from 0.77196
Epoch 8/10

Epoch 00008: val_macroF1 improved from 0.77196 to 0.77646, saving model to fancy_inception_model3
Epoch 9/10

Epoch 00009: val_macroF1 improved from 0.77646 to 0.79340, saving model to fancy_inception_model3
Epoch 10/10

Epoch 00010: val_macroF1 did not improve from 0.79340


<keras.callbacks.History at 0x7f996c0a97d0>

In [10]:
model.load_weights("fancy_inception_model3")

y_hat = model.predict(test_generator) # logits of the 53 classes
y_hat = np.argmax(y_hat, axis=1) # take the classe with the higher logit
test_generator.df.label = y_hat
test_generator.df.to_csv("submission.csv", index=False) # we don't want to add the column of indices