In [1]:
!nvidia-smi

Sat May 27 00:26:20 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 367.48                 Driver Version: 375.39                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 0000:08:00.0     Off |                    0 |
| N/A   48C    P0   115W / 149W |  10942MiB / 11439MiB |     78%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           Off  | 0000:09:00.0     Off |                    0 |
| N/A   56C    P0    73W / 149W |  10871MiB / 11439MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  Tesla K80           Off  | 0000:88:00.0     Off |                    0 |
| N/A   

##### 00. Load Packages

In [2]:
%matplotlib inline

In [3]:
import keras
import tensorflow as tf
import numpy as np
import seaborn as sns
import pandas as pd
import glob as glob
import seaborn as sns
import matplotlib.pyplot as plt

import PIL.Image as im

Using TensorFlow backend.


In [4]:
from keras import backend as K
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D,Convolution2D
from keras.layers.core import Flatten, Reshape
from keras.losses import categorical_crossentropy
from keras import metrics

In [5]:
files_train = glob.glob('../01.data/extracted/images_training_rev1/*.jpg')
files_test = glob.glob('../01.data/extracted/images_test_rev1/*.jpg')

##### 00. Define functions

In [31]:
def readImage(address):
    x = im.open(address)
    x = np.array(x)
    return(x)

In [32]:
def predict_score(file_path,model_name):
    
    scores = {}
    paths = file_path
    

    for path in paths:

        path_id = path.split('/')[-1].split('.')[0]
        path_data = np.array([readImage(path)])
        path_score = model_name.predict_proba(path_data,verbose=0)
        
        scores[path_id] = path_score[0]
        
        del path_id
        del path_data
        del path_score
        
    return(scores)

In [33]:
{
    'train':len(files_train),
    'test':len(files_test)
}

{'test': 79975, 'train': 61578}

In [34]:
n = 61578
samples_train = np.arange(0,n)
np.random.shuffle(samples_train)

y_path = '../01.data/extracted/training_solutions_rev1.csv'

In [None]:
train = np.array([readImage(files_train[x_train])
                  for x_train in samples_train
                 ])

train_y = pd.read_csv(y_path,index_col='GalaxyID')
train_y = np.array([train_y.ix[
            np.int64(files_train[galaxy_id].split('/')[-1].split('.')[0]),:]
                    for galaxy_id in samples_train])

In [None]:
x_train = train
print('x_train shape:', x_train.shape)

del train

y_train = train_y
print('y_train shape:', y_train.shape)

del train_y

In [None]:
shape_kernel = (2,2)
shape_pool = (2,2)

conv_activation = 'relu'
dense_activation = 'relu'


num_classes = len(y_train[0])
epochs = 1500

img_rows, img_cols = 424, 424
img_channels = 3

In [36]:
model = Sequential()

model.add(Conv2D(filters=35,
                 kernel_size=shape_kernel,
                 input_shape=(img_rows, img_cols, img_channels),
                 data_format='channels_last',
                 name='Conv-Input',
                 activation = conv_activation
                ))

model.add(Conv2D(filters=30,
                 kernel_size=shape_kernel,
                 name='Conv-02',
                 activation = conv_activation
                ))
model.add(MaxPooling2D(pool_size=shape_pool
                      ))

model.add(Conv2D(filters=25,
                 kernel_size=shape_kernel,
                 name='Conv-03',
                 activation = conv_activation
                ))
model.add(MaxPooling2D(pool_size=shape_pool
                      ))


model.add(Conv2D(filters=20,
                 kernel_size=shape_kernel,
                 name='Conv-04',
                 activation = conv_activation
                ))
model.add(MaxPooling2D(pool_size=shape_pool
                      ))


model.add(Conv2D(filters=15,
                 kernel_size=shape_kernel,
                 name='Conv-05',
                 activation = conv_activation
                ))
model.add(MaxPooling2D(pool_size=shape_pool
                      ))


model.add(Conv2D(filters=10,
                 padding='same',
                 kernel_size=shape_kernel,
                 name='Conv-06',
                 activation = conv_activation
                ))


model.add(Conv2D(filters=15,
                 padding='same',
                 kernel_size=shape_kernel,
                 name='Conv-07',
                 activation = conv_activation
                ))


model.add(Conv2D(filters=2,
                 padding='same',
                 kernel_size=shape_kernel,
                 name='Conv-08',
                 activation = conv_activation
                ))
model.add(MaxPooling2D(pool_size=shape_pool
                      ))

model.add(Flatten())

model.add(Dense(
        units=150,
        name='Dense-02',
        activation = dense_activation
    ))


model.add(Dense(
        units=num_classes,
        activation='sigmoid',
        name='Dense-Output'
    ))

In [37]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Conv-Input (Conv2D)          (None, 423, 423, 35)      455       
_________________________________________________________________
Conv-02 (Conv2D)             (None, 422, 422, 30)      4230      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 211, 211, 30)      0         
_________________________________________________________________
Conv-03 (Conv2D)             (None, 210, 210, 25)      3025      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 105, 105, 25)      0         
_________________________________________________________________
Conv-04 (Conv2D)             (None, 104, 104, 20)      2020      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 52, 52, 20)        0         
__________

In [38]:
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard

tb = TensorBoard(
        log_dir='../tensorboard/model_v4/',
        write_graph=True,
        write_images=True
    )

mc = ModelCheckpoint(filepath='../05.model/model_v4_sub127.h5',
                     save_best_only=True)

In [None]:
model.compile(loss = 'categorical_crossentropy',
              optimizer = keras.optimizers.Adadelta(lr=0.1),
              metrics = [metrics.cosine])

model.fit(x = x_train-127,
          validation_split=0.1,
          y = y_train,
          batch_size=10,
          epochs=epochs,
          callbacks = [tb,mc],
          verbose=0)

#### Test data

In [9]:
model = load_model('../05.model/model_v4_sub127.h5')

In [10]:
out = predict_score(file_path=files_test,
                    model_name=model)

In [11]:
columns = pd.read_csv(y_path,
                      index_col='GalaxyID',
                      nrows=0)

test_results = pd.DataFrame.from_dict(data = out,
                                      orient='index')
test_results.index.name = 'GalaxyID'
test_results.columns = columns.columns

In [12]:
test_results.to_csv('../04.results/submission07.csv',
                    index_label='GalaxyID')