In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

/kaggle/input/ai-input/scan-train-labels.npy
/kaggle/input/ai-input/scan-train-images.npy
/kaggle/input/ai-input-test/scan-test-images.npy


In [2]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

batch_size = 128
num_classes = 10
epochs = 12

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.03167638285419607
Test accuracy: 0.9901000261306763


In [3]:
train_x = np.load('/kaggle/input/ai-input/scan-train-images.npy')
train_y = np.load('/kaggle/input/ai-input/scan-train-labels.npy')
print("train_x shape:", train_x.shape)
print("train_y shape:", train_y.shape)

train_x shape: (3780, 28, 28)
train_y shape: (3780,)


In [4]:
train_x = train_x.reshape(train_x.shape[0], img_rows, img_cols, 1)
train_y = keras.utils.to_categorical(train_y, num_classes)

In [5]:
print("train_x shape:", train_x.shape)
print("train_y shape:", train_y.shape)

train_x shape: (3780, 28, 28, 1)
train_y shape: (3780, 10)


In [6]:
score = model.evaluate(train_x, train_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 39.96351602704604
Test accuracy: 0.9711640477180481


In [7]:
(x1, y1), (xt, yt) = mnist.load_data()
print("x1 shape:", x1.shape)
print("y1 shape:", y1.shape)

x1 shape: (60000, 28, 28)
y1 shape: (60000,)


In [8]:
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

x_train shape: (60000, 28, 28, 1)
y_train shape: (60000, 10)
x_test shape: (10000, 28, 28, 1)
y_test shape: (10000, 10)


In [9]:
model.fit(train_x, train_y,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 3780 samples, validate on 10000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 2.2978007522583006
Test accuracy: 0.1420000046491623


In [10]:
from sklearn.model_selection import train_test_split
ai_x_train, ai_x_test, ai_y_train, ai_y_test = train_test_split(train_x, train_y, test_size=0.5, random_state=1)

In [11]:
print(ai_x_train.shape)
print(ai_x_test.shape)
print(ai_y_train.shape)
print(ai_y_test.shape)

(1890, 28, 28, 1)
(1890, 28, 28, 1)
(1890, 10)
(1890, 10)


In [12]:
model.fit(ai_x_train, ai_y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(ai_x_test, ai_y_test))
score = model.evaluate(ai_x_test, ai_y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 1890 samples, validate on 1890 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.02110765918115494
Test accuracy: 0.9984126687049866


In [13]:
model.fit(train_x, train_y,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(ai_x_test, ai_y_test))
score = model.evaluate(ai_x_test, ai_y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 3780 samples, validate on 1890 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Test loss: 0.0
Test accuracy: 1.0


In [14]:
test_x = np.load('/kaggle/input/ai-input-test/scan-test-images.npy')

In [15]:
test_x = test_x.reshape(test_x.shape[0], img_rows, img_cols, 1)
prediction = model.predict(test_x)

In [16]:
print(prediction.shape)


(10010, 10)


In [17]:
lst = []
total = prediction.shape[0]
for i in range(total):
    t = -float("inf")
    mlist = [i,0]
    for k in enumerate(prediction[i]):
        if k[1]>=t:
            t = k[1]
            mlist[1] = k[0]
#             mlist = []
#             mlist.append(i)
#             mlist.append(target)
#             if i < 20:
#                 print(i)
#             break
    lst.append(mlist)

In [18]:
df = pd.DataFrame(data=lst,columns=['Id', 'Category'])

In [19]:
df

Unnamed: 0,Id,Category
0,0,3
1,1,3
2,2,1
3,3,3
4,4,5
...,...,...
10005,10005,5
10006,10006,3
10007,10007,3
10008,10008,2


In [20]:
import csv
with open('prediction_air.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(df)

In [21]:
df.to_csv (r'/kaggle/working/res.csv', index = None)