## 安装需要的Python库

In [None]:
!pip install numpy emnist sklearn matplotlib

## 导入库

In [None]:
from emnist import extract_training_samples
import matplotlib.pyplot as plt
print('OK')

## 下载训练需要的EMNIST图片库，并初步处理数据

In [None]:
X, y = extract_training_samples('letters')

# 确保像素值都在0和1之间
X = X / 255.

# 采用前六万份作为样本，剩下的用来测试
X_train, X_test = X[:60000], X[60000:70000]
y_train, y_test = y[:60000], y[60000:70000]

# 将数据整理成一维的数组
X_train = X_train.reshape(60000,784)
X_test = X_test.reshape(10000,784)

print('OK')

In [None]:
img_index = 14000 # <<<<<  你可以自己改一下看看
img = X_train[img_index]
print("Image Label: " + str(chr(y_train[img_index]+96)))
plt.imshow(img.reshape((28,28)))

## 创建你自己的神经网络

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier

# 这行代码创建了我们第一个有1层，50个神经的网络，而且设定要跑20次
mlp1 = MLPClassifier(hidden_layer_sizes=(50,), max_iter=20, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.1)

print("创建成功")

## 开始训练

In [None]:
mlp1.fit(X_train, y_train)
print("Training set score: %f" % mlp1.score(X_train, y_train))
print("Test set score: %f" % mlp1.score(X_test, y_test))

## 来看看准确率

In [None]:
y_pred = mlp1.predict(X_test)

# 可视化一下
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
plt.matshow(cm)

In [None]:
# 你可以把这个改成任何你认为神经网络可能搞糊涂的字母...
predicted_letter = 'u'
actual_letter = 'v'

# 数一下次数
mistake_list = []
for i in range(len(y_test)):
  if (y_test[i] == (ord(actual_letter) - 96) and y_pred[i] == (ord(predicted_letter) - 96)):
    mistake_list.append(i)
print("There were " + str(len(mistake_list)) + " times that the letter " + actual_letter + " was predicted to be the letter " + predicted_letter + ".")

# 看一下错误的例子
mistake_to_show = 0 # <<< 从零开始

# 这行代码会检查你的输入是否正确
if (len(mistake_list)> mistake_to_show):
  img = X_test[mistake_list[mistake_to_show]]
  plt.imshow(img.reshape((28,28)))
else:
  print("Couldn't show mistake number " + str(mistake_to_show + 1) + " because there were only " + str(len(mistake_list)) + " mistakes to show!")

## 再来一个

In [None]:
mlp2 = MLPClassifier(hidden_layer_sizes=(100,100,100,), max_iter=100, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.1)
mlp2.fit(X_train, y_train)
print("Training set score: %f" % mlp2.score(X_train, y_train))
print("Test set score: %f" % mlp2.score(X_test, y_test))

## 训练好了，保存一下

In [None]:
import joblib
joblib.dump(mlp2, 'character_recognition.joblib')