In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
run = 'densenet_fast_02_32x32'
device_use = '/cpu:0'

##### 00. Load Packages

In [3]:
%pylab inline


import seaborn as sns
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


In [4]:
import tensorflow as tf
import keras

import glob as glob
import cv2 as cv2
from tqdm import tqdm

Using TensorFlow backend.


In [5]:
from keras import backend as K
from keras.models import Sequential, load_model, Model
from keras.layers import Input, Dropout, Activation
from keras.layers import Lambda, Conv2D, MaxPooling2D, Dense

from keras.layers.normalization import BatchNormalization

In [6]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/cpu:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 16841532729235940659]

In [7]:
train_files = glob.glob('../01.data/extracted/images_training_rev1/*.jpg')
test_files = glob.glob('../01.data/extracted/images_test_rev1/*.jpg')

##### 00. Define functions

In [8]:
def get_image(image_path,size):
    
    x = cv2.imread(image_path)
    x = cv2.resize(x,size,cv2.INTER_NEAREST)
    return(x)

def get_labels(image_path):
    
    image_id = image_path.split('/')[-1]
    image_number = image_id.split('.')[0]
    values = train_output.loc[np.int(image_number)].values
    
    return(values)

In [9]:
y_path = '../01.data/extracted/training_solutions_rev1.csv'
train_output = pd.read_csv(y_path,index_col='GalaxyID')
train_output.sort_index(inplace=True)

In [10]:
y_path_mod = '../01.data/extracted/training_solutions_rev1.csv'
train_output_mod = pd.read_csv(y_path_mod,index_col='GalaxyID')
train_output_mod.sort_index(inplace=True)

In [22]:
(train_output_mod.values!=train_output.values).sum()

0

In [13]:
num_classes = 37
epochs = 1500


input_size = (32,32)
img_rows, img_cols = input_size
img_channels = 3
observations,output_classes = train_output.shape

In [14]:
from gc import collect

n = 1000#len(train_files)

train_x = np.zeros((n,img_rows,img_cols,img_channels),dtype=np.uint8)
train_y = np.zeros((n,num_classes),dtype=np.float32)

for current_id in tqdm(range(n),miniters=1000):
    
    if current_id%1000==0:
        collect()
        
    current_path = train_files[current_id]
    
    current_image  = np.array(get_image(current_path,input_size))
    current_labels = get_labels(current_path)
    
    train_x[current_id] = current_image
    train_y[current_id] = current_labels
    
    
train_y_expanded = np.expand_dims(np.expand_dims(train_y,1),1)

100%|██████████| 1000/1000 [00:08<00:00, 116.00it/s]


In [15]:
print('train_x shape:', train_x.shape)
print('train_y shape:', train_y.shape)
print('train_y shape:', train_y_expanded.shape)

('train_x shape:', (1000, 32, 32, 3))
('train_y shape:', (1000, 37))
('train_y shape:', (1000, 1, 1, 37))


In [16]:
from gc import collect

collect()

0

In [17]:
from DenseNet import densenet_fast

In [None]:
K.clear_session()

model = densenet_fast.create_dense_net(
                          nb_classes=num_classes,
                          img_dim=(img_rows,img_cols,img_channels),
                          depth=64,
                          growth_rate=6)

In [19]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
initial_conv2D (Conv2D)      (None, 32, 32, 16)        432       
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 16)        64        
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 6)         864       
_________________________________________________________________
merge_1 (Merge)              (None, 32, 32, 22)        0         
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 22)        0         
__________

In [20]:
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
from keras.callbacks import EarlyStopping, LearningRateScheduler
from keras_tqdm import TQDMNotebookCallback

tb = TensorBoard(
        log_dir='../tensorboard/'+run+'/',
        write_graph=True,
        write_images=True,
        embeddings_freq=1
    )

mc = ModelCheckpoint(filepath = '../05.model/'+run+'.h5',
                     save_best_only = True)

ec = EarlyStopping(monitor='val_loss',
                   patience=20,
                   mode='auto')

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.5,
                              patience=3,
                              min_lr=1e-15)

tqnc = TQDMNotebookCallback()

In [21]:
batch_size = 250

train_steps = 5*train_x.shape[0]/batch_size
validation_steps = 0.1 * train_steps

In [22]:
def custom_preprocessing(image_input):
    
    image_input = image_input * 1.0/255
    
    return(image_output)

def custom_generator(train_files, labels, batch_size):
    
    # Create empty arrays to contain batch of features and labels#
    batch_features = np.zeros((batch_size, img_rows,img_cols,img_channels))
    batch_labels = np.zeros((batch_size,num_classes))

    while True:
        for i in range(batch_size):
            
            # choose random index in features
            index= random.choice(len(train_files),1)
            image_path = train_files[index]
            image_input = cv2.imread(image_path)
            
            batch_features[i] = custom_preprocessing(image_input)
            
        yield batch_features, batch_labels

In [23]:
from keras.preprocessing.image import ImageDataGenerator


train_datagen = ImageDataGenerator(
                                rotation_range=180,
                                vertical_flip=True,
                                horizontal_flip=True,
                                width_shift_range=0.25, 
                                height_shift_range=0.25,
                                data_format='channels_last',
                                
)

validation_datagen = ImageDataGenerator(
                                data_format='channels_last',
)

train_generator = train_datagen.flow(
                                    x=train_x,
                                    y=train_y,
                                    batch_size=batch_size
)

validation_generator = validation_datagen.flow(
                                            x=train_x,
                                            y=train_y,
                                            batch_size=batch_size
)

In [24]:
with tf.device(device_use):

    model.compile(loss='mse',
                  optimizer=keras.optimizers.adam(lr=1e-3)
                 )
    
    loss_history = model.fit_generator(
                                    generator=train_datagen.flow(
                                                                x=train_x,
                                                                y=train_y,
                                                                batch_size=batch_size
                                                                ),
                                    validation_data=validation_datagen.flow(
                                                                            x=train_x,
                                                                            y=train_y,
                                                                            batch_size=batch_size
                                                                            ),
                                    steps_per_epoch=train_steps,
                                    validation_steps=validation_steps,
                                    callbacks=[tb,mc,ec,reduce_lr,tqnc],
                                    epochs=epochs,
                                    max_q_size=2,
                                    verbose=1
                )


loss_df = pd.DataFrame(loss_history.history)
loss_df.to_csv('../03.plots/losses/augmented_loss_df'+run+'.csv',
                   index=False)

Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"
Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"


Epoch 1/1500
 1/20 [>.............................] - ETA: 316s - loss: 0.2652

KeyboardInterrupt: 

with tf.device(device_use):

    model.compile(loss='mse',
                  optimizer=keras.optimizers.adam(lr=1e-4)
                 )
    
    loss_history = model.fit(x=train_x-30,
                             y=train_y,
                             batch_size=5,
                             validation_split=0.1,
                             callbacks=[tb,mc,ec,reduce_lr,tqnc],
                             epochs=epochs,
                             verbose=1
                            )


loss_df = pd.DataFrame(loss_history.history)
loss_df.to_csv('../03.plots/losses/augmented_loss_df'+run+'.csv',
                   index=False)

$$ \frac{1}{N} \sum_{i=0}^{N} Actual_{i} = Predicted_{i} $$

In [None]:
data_plot = loss_df.ix[1:,:]
sns.set(style='whitegrid',)
sns.plt.figure(figsize=(7.5,3.5))

plt.plot(data_plot.index,data_plot.loss,label='train')
plt.plot(data_plot.index,data_plot.val_loss,label='validation')
plt.legend()
plt.title('Train & Validation loss');

#### Test data

In [None]:
out = {}

with tf.device(device_use):  
    for file_path in tqdm(test_files):
        galaxy_id = file_path.split('/')[-1].split('.')[0]
        galaxy_img = np.expand_dims(cv2.resize(cv2.imread(file_path),
                                               input_size),
                                    axis=0)
        galaxy_pred = model.predict(galaxy_img).flatten()

        out[galaxy_id] = galaxy_pred

In [None]:
columns = pd.read_csv(y_path,
                      index_col='GalaxyID',
                      nrows=0)

test_results = pd.DataFrame.from_dict(data = out,
                                      orient='index')
test_results.index.name = 'GalaxyID'
test_results.columns = columns.columns

In [42]:
test_results.to_csv('../04.results/submission'+run+'.csv',
                    index_label='GalaxyID')