In [312]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Redes Neuronales 2021

Integrantes de grupo:

Müller, Malena

Scala, Tobías 
# TP3: Convolutional Neural Network (CNN) & Transfer Learning

El trabajo práctico consiste en la predicción de imagenes. El dataset utilizado es el CIFAR-100, el cual consiste de 60 mil imagenes (10 mil para test y 50 mil para train) con una variedad de 100 clases.Las imagenes tienen una resolución de 32x32 píxeles. Los 2 objetivos principales del presente TP son: el diseño de un modelo CNN y transfer learning con Imagenet. 


## Se obtiene el data set.
Se obtiene el dataset presente en Kaggle, el cual es el CIFAR-100. Se normalizan los píxeles dividiendo por 255.

In [313]:
trainX = np.load("../input/cnn-itba-2021-q2/X_train.npy")/255
trainY = np.load("../input/cnn-itba-2021-q2/y_train.npy")
testX = np.load("../input/cnn-itba-2021-q2/X_test.npy")/255

print(trainX.shape) #para ver cuantas imagenes hay (500 imágenes por cada clase. Hay 100 clases)
print(trainY.shape)
print(testX.shape)

## Dividimos train entre train y validation.
El dataset de train se lo divide en 80% de train y 20% de validation. Esto es para poder evaluar el performance de nuestros modelos con validation antes de predecir el test y luego hacer submit.

In [315]:
from sklearn.model_selection import train_test_split

trainX, validX, trainY, validY = train_test_split(trainX, trainY, test_size=0.2, random_state=0)

print(trainX.shape)
print(validX.shape)

#validXprep = trainXprep[40000:,:,:,:]
#validY = trainY[40000:,:]
#trainXprep = trainXprep[:40000,:,:,:]
#trainY = trainY[:40000,:]

Ploteamos una imagen para observar el contenido de train.

In [316]:
from matplotlib import pyplot as plt

plt.figure(figsize=(2,2)) #para cambiar tamaño de imagen
plt.imshow(trainX[0]) #agarra el elemento 0. Podria poner cualquier numero
plt.title(str(trainY[0]))
plt.show()

## Hacemos data augmentation para "aumentar" nuestro dataset.
Configuramos las capas que se encargarán de modificar, de forma aleatoria, las características de la imagen (horizontal flip, rotación, zoom y contraste) por cada iteración (epoch). Dado que estas capas se encargan de hacer el data augmentation, nuestro modelo asume que el dataset de train es mucho mayor a lo que es en realidad (40 mil imágenes).

In [319]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation, RandomZoom, RandomContrast

dataAugmentation = Sequential([RandomFlip("horizontal",input_shape=trainX.shape[1:]),
                                RandomRotation(0.1),
                                RandomZoom(0.1),
                                RandomContrast(0.1)])

## Se diseña un modelo CNN.
El modelo CNN implementado respeta las estructuras mencionadas en clase (por ejemplo, VGG). Se generan capas convolucionales seguidos de capas maxpooling. Se agrega una capa dropout a modo de generalizar el dataset de train. La capa flatten es utilizada para generar un 1D vector. Finalmente tenemos una capa densa (fully connected) seguido de la capa de salida con función de activación softmax para la predicción de multiclases.

In [321]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

Nclasses = 100
model = Sequential([dataAugmentation,
                    Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'),
                    MaxPooling2D(pool_size=(2, 2)),
                    Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'),
                    MaxPooling2D(pool_size=(2, 2)),
                    Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'),
                    MaxPooling2D(pool_size=(2, 2)),
                    Dropout(0.4),
                    Flatten(),
                    Dense(1024, activation='relu'),
                    Dense(Nclasses, activation='softmax')])

#Use this SparseCategoricalCrossentropy loss function when there are two or more label classes. We expect labels to be provided as integers.
#If you want to provide labels using one-hot representation, please use CategoricalCrossentropy loss.

#Adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments.

#Accuracy metric creates two local variables, total and count that are used to compute the frequency with which y_pred matches y_true. This
#frequency is ultimately returned as binary accuracy: an idempotent operation that simply divides total by count.

#model.summary()
model.compile(optimizer='adam', loss=SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

Entrenamos nuestro modelo.

In [322]:
model.fit(trainX, trainY, epochs=25, batch_size=64, verbose=1, workers=-1)

Evalueamos nuestro modelo con validation.

In [323]:
model.evaluate(validX, validY)

Predecimos el test.

In [324]:
testPred = model.predict(testX)
testPred = testPred.argmax(axis=1) #argmax: Returns the indices of the maximum values along an axis.

Preparamos submission.

In [325]:
df = pd.DataFrame(data=testPred, columns=["label"])
df.index.name="Id"
df.head()

In [326]:
df.to_csv("submission.csv")

## Se hace transfer learning
Para implementar tranfer learning, se utilizan los pesos preentrenados de Imagenet. Se utilizará una estructura resnet de 50 capas que contendrá dichos pesos. Cabe mencionar que el modelo imagenet ha sido entrenado por imágenes de una resolución de 256x256 píxeles.

In [327]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import BatchNormalization

#include_top: whether to include the fully-connected layer at the top of the network.

#weights: one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded.

#trainable: Boolean, whether the layer's variables (weights) should be trainable (updated).

#La red arranca preentrenada.
modelResnet = ResNet50(include_top=False, weights='imagenet', input_shape=(256, 256, 3))#, classes=100)
for layer in modelResnet.layers:
    if isinstance(layer, BatchNormalization):
        layer.trainable = True
    else:
        layer.trainable = False
#for layer in modelImagenet.layers:
#    layer.trainable = False 

In [329]:
from tensorflow.keras.applications.resnet import preprocess_input
from keras.preprocessing.image import ImageDataGenerator

#Preprocesses a tensor or Numpy array encoding a batch of images. The images are converted from RGB to BGR, 
#then each color channel is zero-centered with respect to the ImageNet dataset, without scaling.
trainX = preprocess_input(trainX)
testX = preprocess_input(testX)

Se agregan capas al final para adaptar el modelo preentrenado a nuestro problema (ya que hay pesos que no son entrenables). Como primera capa tenemos el data augmentation explicado anteriormente. Luego se usan capas upsampling para que la resolución para la cual Imagenet fue entrenado (256x256) coincida con la resolución utilizada en CIFAR-100 (32x32). Luego se agrega el modelo preentrenado (resnet50). Se agrega una capa dropout por lo mencionado anteriormente.

In [330]:
from tensorflow.keras.layers import Activation, GlobalAveragePooling2D, UpSampling2D

#GlobalAveragePooling2D: Downsamples the input along its spatial dimensions (height and width) by taking the average value over an input
#window (of size defined by pool_size) for each channel of the input. The window is shifted by strides along each dimension.

#Batch normalization applies a transformation that maintains the mean output close to 0 and the output standard deviation close to 1. It is
#a layer that normalizes its inputs.

model_ = Sequential([dataAugmentation,
                    UpSampling2D(),
                    UpSampling2D(),
                    UpSampling2D(),
                    modelResnet, #Acá está el modelo preentrenado.
                    GlobalAveragePooling2D(),
                    Dense(256, activation='relu'),
                    Dropout(0.4),
                    BatchNormalization(),
                    Dense(Nclasses, activation='softmax')])

#model_.summary()
model_.compile(optimizer='adam', loss=SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

Entrenamos el modelo preentrenado con el dataset de nuestro problema.

In [331]:
model_.fit(trainX, trainY, epochs=10, batch_size=64, verbose=1, workers=-1)

Evaluemos nuestro modelo preentrenado.

In [332]:
model_.evaluate(validX, validY)