# Air Handwritten Digit Recognizer

## Dataset: [AirdigitFinalReport](https://www.kaggle.com/ggwporz/airdigitfinalreport) (Air-Handwriting Digit using Smartphone)
### *recorded Accelerometer X,Y,Z data from smartphone*

In [None]:
import os
train_path = '../input/airdigitfinalreport/AirdigitFinalReport/train/'
test_path  = '../input/airdigitfinalreport/AirdigitFinalReport/test/'

In [None]:
# train dataset
TrainFiles = sorted(os.listdir(train_path))
print(TrainFiles)

In [None]:
# test dataset
TestFiles = sorted(os.listdir(test_path))
print(TestFiles)

## Prepare Dataset

In [None]:
import numpy as np

def read_acc(filename):
    print(filename)
    f = open(filename,'r')
    line = f.readlines()
    acc = np.fromstring(line[0], dtype=float, sep=',')
    print(len(acc))
    if (len(acc)<72):                           # IF data length is less than 72
        for i in range(int((72-len(acc))/3)):
            acc = np.append(acc, [0.0,9.8,0.0]) # THEN patch accX=0, accY=9.8, accZ=0
        
    elif (len(acc)>72):                         # IF data length is larger than 72
        acc = acc[:72] # cut down to 72         # THEN cut to only 72 

    acc = acc.reshape(int(len(acc)/3),3)
    print(acc.shape)
    return acc

In [None]:
import matplotlib.pyplot as plt

def plot_acc(acc):
    x = np.linspace(0,len(acc[:,0]),len(acc[:,0]))
    plt.plot(x, acc[:,0])
    plt.plot(x, acc[:,1])
    plt.plot(x, acc[:,2])
    plt.legend(['accX', 'accY', 'accZ'], loc='upper right')
    plt.show()

In [None]:
x_train = np.array([])
y_train = np.array([])

for i in range(4): # 0~3
   for j in range(20): # 0~19
        if j < 10:
           file = str(i)+'_00'+str(j)+'.csv'
        elif j < 100:
           file = str(i)+'_0'+str(j)+'.csv'
        acc = read_acc(train_path+file)
        x_train = np.append(x_train,acc)
        y_train = np.append(y_train,i)
        # plot accelerometer x,y,z 
        if (j==0):
            plot_acc(acc)

In [None]:
x_test = np.array([])
y_test = np.array([])

for i in range(4): # 0~3
   for j in range(2): # 000, 001
        if j < 10:
           file = str(i)+'_00'+str(j)+'.csv'
        elif j < 100:
           file = str(i)+'_0'+str(j)+'.csv'
        acc = read_acc(test_path+file)
        x_test = np.append(x_test,acc)
        y_test = np.append(y_test,i)
        # plot accelerometer x,y,z 
        if (j==0):
            plot_acc(acc)

In [None]:
# reshape x_train
x_train = x_train.reshape(-1,24,3)
x_test  = x_test.reshape(-1,24,3)

# check dataset shape
print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

In [None]:
# shuffle train dataset
from sklearn.utils import shuffle
x_train, y_train = shuffle(x_train, y_train, random_state=41)

# check shuffle result
print(y_train)

In [None]:
# Converts a class vector (integers) to binary class matrix
from tensorflow.keras import utils
y_train = utils.to_categorical(y_train)

y_actual  = y_test
y_test  = utils.to_categorical(y_test) 

In [None]:
print(y_actual)

## Build Model

In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers, losses

In [None]:
input_shape = (24,3) 
num_classes = 4 #4

In [None]:
## Build Model
inputs = layers.Input(shape=input_shape)
# 1st Convolutional layer
x = layers.Conv1D(48, 3, activation = 'relu', padding = 'same')(inputs)
x = layers.Conv1D(48, 3, activation = 'relu', padding = 'same')(x)
x = layers.Conv1D(48, 3, activation = 'relu', padding = 'same')(x)
x = layers.MaxPool1D(2, padding='same')(x) 
x = layers.Conv1D(96, 3, activation = 'relu', padding = 'same')(x)
x = layers.Conv1D(96, 3, activation = 'relu', padding = 'same')(x)
x = layers.Conv1D(96, 3, activation = 'relu', padding = 'same')(x)
x = layers.MaxPool1D(4, padding='same')(x)
# Fully Connected layer        
x = layers.Flatten()(x)
x = layers.Dense(48)(x)
outputs = layers.Dense(num_classes, activation="softmax")(x)

model = models.Model(inputs=inputs, outputs=outputs)

model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train Model
history = model.fit(x_train, y_train, batch_size=10, epochs=30, validation_data=(x_test, y_test))

## Save Model

In [None]:
models.save_model(model, 'airdigit_cnn.h5')

## Evaluate Model

In [None]:
# Evaluate Model
score = model.evaluate(x_test, y_test)
print('Test loss: ', score[0])
print('Test accuracy: ', score[1])

## Test Model

In [None]:
y_pred = model.predict(x_test[0].reshape(-1,24,3)).argmax(axis=1)
print(y_pred[0])
print(y_actual[0])

## Confusion Matrix

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
predY=model.predict(x_test)
y_pred = np.argmax(predY,axis=1)
cm = confusion_matrix(y_actual, y_pred)
print(cm)

In [None]:
print(y_pred)

In [None]:
# report
labels = ['上一台', '下一台', '調大聲', '調小聲']
print(classification_report(y_actual, y_pred, target_names=labels))