# MNIST

約6000枚/数字 の画像のデータセットから学習し、識別する

## 注

loadしたmnist.pklのdatasetは'image'が(データ数,784)の行列'label'が(データ数,10)の0or1行列

w1:一つ目の重み関数、b1:一つ目の閾値、w2:二つ目の重み関数、b2:二つ目の閾値

learning_rateを上げすぎると学習できない（更新にSGDを使用しているため）

## 余裕があれば

batch nomalizationの実装


In [48]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import pickle
import time
import pandas as pd

In [49]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(a):
    a=a.T
    y = np.exp(a-np.max(a,axis=0))/np.sum(np.exp(a-np.max(a,axis=0)),axis=0)
    return y.T

def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)


In [71]:
class Classification:
    def __init__(self, input_size = 28*28, hidden1_size = 100, output_size = 10, 
                 weight_init_std = 0.01):
        self.load()
        
        # 重み関数などを格納するdict
        self.params = {}
        self.params['w1'] = weight_init_std*np.random.randn(input_size, hidden1_size)
        self.params['b1'] = np.zeros(hidden1_size)
        self.params['w2'] = weight_init_std*np.random.randn(hidden1_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        self.dw1 = None
    
    def train(self, batch_size=200, n_epoch=10,leaning_rate = 0.0001):
        datlen = self.dataset['label'].shape[0]
        for n in range(n_epoch):# データセットを何周するか
            batch_mask = np.random.permutation(datlen)# 一回のバッチでデータのどこをとってくるのかランダムに並べ替え
            for b in range(datlen//batch_size):# バッチ処理
                batch_img = self.dataset['image'][batch_mask[b*batch_size:(b+1)*batch_size],:]
                batch_label = self.dataset['label'][batch_mask[b*batch_size:(b+1)*batch_size],:]
                grads = self.gradient(batch_img, batch_label)
                for key in ('w1','b1','w2','b2'):
                    self.params[key] -= leaning_rate * grads[key]
            if 0<=n<10 or n_epoch-5<=n:
                print('trial:{}, Correct Rate:{}%'.format(n+1,self.correct_rate))
        #print('w1:{}, b1:{}, w2:{}, b2:{}'.format(self.params['w1'],self.params['b1'],self.params['w2'],self.params['b2']))
        return
        
    def predict(self,batchdat):# imgdataからlabelを予測
        w1,w2 = self.params['w1'], self.params['w2']
        b1,b2 = self.params['b1'], self.params['b2']
        
        self.batchdat = batchdat
        self.a1 = np.dot(batchdat,w1) + b1
        self.z1 = sigmoid(self.a1)
        self.a2 = np.dot(self.z1,w2) + b2
        return softmax(self.a2)
    
    def gradient(self, x, t):# 勾配
        L = self.loss(x, t)
        grads = {}
        db2 = (self.y-t)*self.y*(1-self.y)
        grads['b2'] = np.sum(db2,axis=0)
        grads['w2'] = np.dot(self.z1.T, db2)
        db1 = np.dot(db2, self.params['w2'].T)*self.z1*(1-self.z1)
        grads['b1'] = np.sum(db1,axis=0)
        grads['w1'] = np.dot(self.batchdat.T, db1)
        return grads
    
    def loss(self, x, t):# 損失関数
        self.y = self.predict(x)
        self.correct(self.y,t)
        return mean_squared_error(self.y,t)
        
    def load(self):
        print('loading...')
        with open(r'./train_img/mnist.pkl', 'rb') as f:
            self.dataset= pickle.load(f)
        return
    
    def correct(self, y, t):# 正答率(データセットを一周するときの最後のバッチで計算)
        ypre = np.argmax(y,axis=1)
        ans = np.argmax(t,axis=1)
        self.correct_rate = 100*sum(ypre==ans)/t.shape[0]
        return
        
        
        

## 学習

In [72]:
cls = Classification()
start = time.time()
cls.train()
print('finish!\n 学習時間は{}sだよ(^^)'.format(time.time()-start))

loading...
trial:1, Correct Rate:54.5%
trial:2, Correct Rate:60.0%
trial:3, Correct Rate:79.0%
trial:4, Correct Rate:83.0%
trial:5, Correct Rate:78.0%
trial:6, Correct Rate:83.0%
trial:7, Correct Rate:82.0%
trial:8, Correct Rate:87.5%
trial:9, Correct Rate:92.0%
trial:10, Correct Rate:89.0%
finish!
 学習時間は13.023941993713379sだよ(^^)


## 検出

coorとtestimgarrayの行が対応している

coorの一列目にx座標、二列目にy座標

testimgarrayには28×28を一行にreshapeしたもの

ラン解析を用いて0より大きい点を取り出す。取り出したそれぞれのラベルのxとyでminとmaxの平均をとって中心とする。

In [73]:
start = time.time()
testimg = cv2.imread(r'./search_image.png', cv2.IMREAD_GRAYSCALE)
# ラン解析
halfim = 14
coor = np.empty((0, 2), int)
testimgarray = np.empty((0, 28*28), int)
label = np.zeros(testimg.shape)
for y in range(testimg.shape[0]):
    for x in range(testimg.shape[1]):
        if testimg[y,x]>0:
            if y==0 and x==0:
                label[y,x] = 1
            elif y==0:
                if label[y,x-1]>0:
                    label[y,x] = label[y,x-1]
                else:
                    label[y,x] = np.max(label)+1
            elif x==0:
                if np.any(label[y-1,x:x+2]>0):
                    ra = label[y-1,x:x+2][label[y-1,x:x+2]>0]
                    if ra.size == 2:
                        ra = np.sort(ra)
                        label[label==ra[1]] = ra[0]
                    label[y,x] = ra[0]
                else:
                    label[y,x] = np.max(label)+1
            elif x==testimg.shape[1]-1:
                if np.any(label[y-1,x-1:x+1]>0) or label[y,x-1]>0:
                    ra = np.r_[label[y-1,x-1:x+1][label[y-1,x-1:x+1]>0],label[y,x-1][label[y,x-1]>0]]
                    if ra.size >=2:
                        ra = np.sort(ra)
                        for r in range(ra.size-1):
                            r+=1
                            label[label==ra[r]] = ra[0]
                    label[y,x] = ra[0]
                else:
                    label[y,x] = np.max(label)+1
            else:
                if np.any(label[y-1,x-1:x+2]>0) or label[y,x-1]>0:
                    ra = np.r_[label[y-1,x-1:x+2][label[y-1,x-1:x+2]>0],label[y,x-1][label[y,x-1]>0]]
                    if ra.size >=2:
                        ra = np.sort(ra)
                        for r in range(ra.size-1):
                            r+=1
                            label[label==ra[r]] = ra[0]
                    label[y,x] = ra[0]
                else:
                    label[y,x] = np.max(label)+1
labelnum = np.size(np.unique(label))-1
for i in range(labelnum):
    i+=1
    if label[label==i].size==0:
        label[label==np.max(label)] = i
for i in range(labelnum):
    i+=1
    posall = np.where(label==i)
    xpos = -(-(np.max(posall[1])+np.min(posall[1]))//2)
    ypos = -(-(np.max(posall[0])+np.min(posall[0]))//2)
    coor = np.r_[coor, [[xpos, ypos]]]
    testimgarray = np.r_[testimgarray, np.reshape(testimg[ypos-halfim:ypos+halfim,xpos-halfim:xpos+halfim], (1,-1))]

# 分類
numpre = cls.predict(testimgarray)
numpre = np.argmax(numpre,axis=1)

df = pd.DataFrame({
    'number' : numpre,
    'xid' : coor[:,0],
    'yid' : coor[:,1]
})

print(df.sort_values(by='number', ascending=True))

print('finish!\n 検出にかかった時間は{}sだよ(^^)'.format(time.time()-start))

    number  xid  yid
2        0  268   55
3        0  166   74
24       0  325  322
22       0  497  280
18       0   52  223
29       1  124  340
21       1  208  274
11       1   76  127
16       1  163  370
9        2  320  405
28       2  536  365
7        2  187  417
23       2   91  290
6        3  454  111
4        3  397   83
12       3  349  129
25       3  265  326
0        5  271   15
10       5  494  372
8        5  235  112
17       5  116  202
14       5   15  160
27       6  597  335
15       7  493  200
13       7  490  140
19       8   98  247
20       8  461  253
5        8  205   92
1        8  437   39
26       8  158  329
finish!
 検出にかかった時間は1.7263884544372559sだよ(^^)
