In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn
import sys
import time
from tensorflow import keras
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    print(dirname)
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.
print(tf.__version__)

In [None]:
class_names = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck'
]
train_labels_file = './cifar10/trainLabels.csv'
test_csv_file = './cifar10/sampleSubmission.csv'
train_folder = './cifar10/train'
test_folder = './cifar10/test'

def parse_csv_file(filepath, folder):
    """Parses csv files into (filename(path), label) format"""
    results = []
    with open(filepath, 'r') as f:
        lines = f.readlines()[1:]
    for line in lines:
        image_id, label_str  = line.strip('\n').split(',')
        image_full_path = os.path.join(folder, image_id + '.png')
        results.append((image_full_path, label_str))
    return results

train_labels_info = parse_csv_file(train_labels_file, train_folder)
test_csv_info = parse_csv_file(test_csv_file, test_foler)

import pprint
pprint.pprint(train_labels_info[0: 5])
pprint.pprint(test_csv_info[0:5])
print(len(train_labels_info), len(test_csv_info))

In [None]:
train_df = pd.DataFrame(train_labels_info)[0:45000]
valid_df = pd.DataFrame(train_labels_info)[45000:]
test_df = pd.DataFrame(test_csv_info)

train_df.columns = ['filepath', 'class']
valid_df.columns = ['filepath', 'class']
test_df.columns = ['filepath', 'class']

In [None]:
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10

train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1/.255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest',
)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (width, height),
    batch_size = batch_size,
    seed = 7,
    shuffle = True,
    class_mode = 'sparse'
)



valid_datagen = keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
valid_generator = valid_datagen.flow_from_dataframe(
    valid_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (width, height),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = 'sparse')
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num)
print(valid_num)

In [None]:
for i in range(2):
    x, y = train_generator.next()
    print(x.shape, y.shape)
    print(y)

In [None]:
model = keras.models.Sequential([
    keras.layers.Conv2D(filters=128, # 图片比较小，可以适当加大filter
                        kernel_size=3,
                        padding='same',
                        activation='relu',  # already scaled, selu is not better
                        input_shape=[width, height, channels]),
    #因为模型比较深，为了更快更好地训练，加上batchnormalization
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=128,
                        kernel_size=3,
                        padding='same',
                        activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',
                       activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',
                       activation='relu'),
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',
                       activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',
                       activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(loss="sparse_categorical_crossentropy", optimizer='adam',
             metrics=['accuracy'])
model.summary()

In [None]:
epochs = 300
history = model.fit_generator(train_generator,
                              steps_per_epoch = train_num // batch_size,
                             epochs = epochs,
                             validation_data = valid_generator,
                             validation_steps = valid_num // batch_size)

In [None]:
print(history.history.keys())

In [None]:
def plot_learning_curves(history, label, epochs, min_value, max_value):
    data = {}
    data[label] = history.history[label]
    data['val_' + label] = history.history['val_' + label]
    pd.DataFrame(data).plot(figsize=(8, 5))
    plt.grid(True)
    plt.axis([0, epochs, min_value, max_value])
    plt.show()
    
plot_learning_curves(history, 'accuracy', epochs, 0, 1)
plot_learning_curves(history, 'loss', epochs, 1.5, 2.5)

In [None]:
test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
test_generator = valid_datagen.flow_from_dataframe(
    test_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (width, height),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = 'sparse')

In [None]:
test_predict = model.predict_generator(test_generator,
                                       workers = 10,  # parallize
                                       use_multiprocessing = True) # if use_multiprocessing = False, paral by thread

print(test_predict.shape)
print(test_predict[0:5])
test_predict_class_indices = np.argmax(test_predict, axis = 1)
print(test_predict_class_indices[0:5]

In [None]:
test_predict_class = [class_names[index] for index in test_predict_class_indices]

In [None]:
def generate_submissions(filename, predice_class):
    with open(filename, 'w') as f:
        f.write('id,label\n')
        for i in range(len(predict_class)):
            f.write('%d,%s\n' % (i+1, predict_class[i]))
            
output_file = "./cifar10/submission.csv"
generate_submissions(output_file, test_predicgt_class)