# Tarea 1 - Deep Learning - Iván Montti



Imports

In [3]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import tensorflow
from keras import Sequential
from keras.layers import Dense,Conv2D,Flatten,Dropout, MaxPool2D

Augmentación de Data Set

In [4]:
# Se debe indicar el camino a los archivos en el GDrive o dejarse en blaco si fueron cargados en /content 
path = "drive/MyDrive/DL/"

# Cargamos los datos de entrenamiento desde el GDrive
data = np.load(path + "data_train.pkl",allow_pickle=True,encoding='latin1')

# Manipulamos la forma de los datos 
images_temp = data['temp_images'].reshape(-1,21,21,1)
images_sci = data['sci_images'].reshape(-1,21,21,1)
images_diff = data['diff_images'].reshape(-1,21,21,1)
images_SNR = data['SNR_images'].reshape(-1,21,21,1)

# Concatenamos las imagenes
images = np.concatenate((images_temp, images_sci, images_diff, images_SNR), axis=3)

# Augmentamos el dataset rotando y concatenado las imagenes en 90º, 180º y 270º 
images = np.concatenate((images, np.rot90(images, axes=(1,2)), np.rot90(images, k=2, axes=(1,2)), np.rot90(images, k=3, axes=(1,2))))

# Obtenemos las labels y las concatenamos consigo mismas para mantener el formato anterior
y = data['labels'].reshape(-1,1)
y = np.concatenate((y,y,y,y))
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y)

# Dejamos un tercio de los datos para validacion
X_train, X_val, y_train, y_val = train_test_split(images, y, test_size=0.33, random_state=43)

Modelo

In [5]:
#Inicializamos el modelo
model = Sequential()

# Añadimos las capas
model.add(Conv2D(filters = 4,kernel_size = 1,input_shape = (21,21,4)))
model.add(Conv2D(8, 2, activation='relu'))
model.add(MaxPool2D(2, 2))
model.add(Conv2D(16, 2, activation='relu'))
model.add(Conv2D(16, 2, activation='relu'))
model.add(MaxPool2D(2, 2))
model.add(Conv2D(32, 2, activation='relu'))
model.add(Conv2D(32, 2, activation='relu'))
model.add(Flatten())
model.add(Dense(units=16,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=8,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units = 2,activation = 'softmax'))

# Utilizamos RMSprop como optimizador y MSE como Loss Function
# Con tamaño de batch 128 y 12 para las epochs 
model.compile(optimizer = 'RMSprop',loss = 'mse',metrics = ['mse', 'acc'])
history = model.fit(x = X_train , y = y_train, batch_size = 128, epochs = 12, validation_data = (X_val,y_val))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


Resumen de modelo

In [48]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_36 (Conv2D)          (None, 21, 21, 4)         20        
                                                                 
 conv2d_37 (Conv2D)          (None, 20, 20, 8)         136       
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 10, 10, 8)        0         
 g2D)                                                            
                                                                 
 conv2d_38 (Conv2D)          (None, 9, 9, 16)          528       
                                                                 
 conv2d_39 (Conv2D)          (None, 8, 8, 16)          1040      
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 4, 4, 16)         0         
 g2D)                                                 

Metricas

In [49]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, precision_score, recall_score

y_test = np.argmax(model.predict(X_val),-1)
y_pred = np.argmax(y_val,-1)

print("accuracy:\t", accuracy_score (y_test, y_pred))
print( "precision:\t", precision_score (y_test, y_pred, average = 'macro'))
print("recall:\t\t", recall_score (y_test, y_pred, average = 'macro'))
print( "f1:\t\t", f1_score (y_test, y_pred, average = 'macro'))

accuracy:	 0.9825023518344309
precision:	 0.9824495350830822
recall:		 0.9825508332490351
f1:		 0.9824942983710749


Test y normalización 

In [50]:
# Cargamos los datos de testeo y los manipulamos como los datos de entrenamiento 
data_test = np.load(path + "unlab_test.pkl",allow_pickle=True,encoding='latin1')

images_test_temp = data_test['temp_images'].reshape(-1,21,21,1)
images_test_sci = data_test['sci_images'].reshape(-1,21,21,1)
images_test_diff = data_test['diff_images'].reshape(-1,21,21,1)
images_test_SNR = data_test['SNR_images'].reshape(-1,21,21,1)

images_test = np.concatenate((images_test_temp, images_test_sci, images_test_diff, images_test_SNR), axis=3)

y_test = np.argmax(y_test,-1)

Exportación

In [51]:
# Se utiliza el formato indicado en la ayudantia
data_out = {'ID': data_test['ID'], 'predicted':y_test}
df_out = pd.DataFrame(data_out)
df_out.to_csv ("predicted.csv",index=False)