## import package 

In [20]:
import numpy as np
import pandas as pd
from keras.layers import Dense,Activation,Conv2D,MaxPooling2D,Dropout,Flatten
from keras.models import Sequential
from keras.utils import np_utils

## read train data 

In [2]:
data = pd.read_csv('train.csv')

In [3]:
data.head()

Unnamed: 0,label,feature
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [4]:
X = np.array([ row.split(" ") for row in data['feature'].tolist()],dtype=np.float32)

In [5]:
y = data["label"].tolist()

## normalize

In [6]:
def normalize(data):
    return data/255

In [7]:
X = normalize(X)

## one hot encoding 

In [8]:
y = np_utils.to_categorical(y)

## split data

In [9]:
def shuffle(X,y,percent):
    percent = percent*100
    rand_arr = np.random.rand(X.shape[0])
    split = rand_arr<np.percentile(rand_arr,percent)
    X_train = X[split]
    y_train = y[split]
    X_validate = X[~split]
    y_validate = y[~split]
    
    return X_train,y_train,X_validate,y_validate
    

In [10]:
X_train,y_train,X_validate,y_validate = shuffle(X,y,0.8)

## create model

In [11]:
X_train = X_train.reshape(-1,48,48,1)
X_validate = X_validate.reshape(-1,48,48,1)

input_shape = (48,48,1)

In [18]:
model = Sequential()

In [21]:
model.add(Conv2D(filters=32,kernel_size=(3,3),input_shape=input_shape,activation='relu'))
model.add(Conv2D(filters=64,kernel_size=(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2),padding='same'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(units=128,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=7,activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [22]:
model.fit(X_train,y_train,batch_size=128,epochs=15,verbose=1,validation_data=(X_validate,y_validate))

Train on 22967 samples, validate on 5742 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x11f2f5f28>

## model save

In [23]:
from keras.models import model_from_json

## save model and weights 

In [24]:
model_json = model.to_json()
with open('model.json','w') as file:
    file.write(model_json)
    
model.save_weights("model.h5")
    

## load model and weights 

In [25]:
json_file = open('model.json','r')
model_json = json_file.read()
json_file.close()

model = model_from_json(model_json)
model.load_weights('model.h5')
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

## predict data

In [26]:
test = pd.read_csv('test.csv')

In [29]:
X_test = np.array([ row.split(" ") for row in test['feature'].tolist()],dtype=np.float32)

In [34]:
X_test = normalize(X_test)
X_test = X_test.reshape(-1,48,48,1)

In [35]:
predictions = model.predict(X_test)

In [38]:
predictions.shape

(7178, 7)

## 取出機率最高的class 

In [45]:
predict = np.argmax(predictions,axis=1)

## 輸出答案

In [53]:
ids = [ i for i,_ in enumerate(predict)]
ans = {
    "id":ids,
    "label":predict
}

In [58]:
res=pd.DataFrame(ans)
res.to_csv("predict.csv",index=False)