In [1]:
import numpy as np
import struct
import matplotlib.pyplot as plt
import math 

train_images = './train-images.idx3-ubyte'
train_labels = './train-labels.idx1-ubyte'
test_images = './t10k-images.idx3-ubyte'
test_labels = './t10k-labels.idx1-ubyte'


def loadimages(train_images):
    print("--------------------------------------------------")
    print("目前處理",train_images)
    train_images = open(train_images, 'rb').read()
    offset = 0
    fmt_header = '>iiii'
    #表示 BIG ENDIAN   I表示 INTEGER
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, train_images, offset)
    print('魔數:%d, 圖片數量: %d張, 圖片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    print("offset is ",offset)

    fmt_image = '>' + str(image_size) + 'B'
    print("fmt_image is " ,fmt_image)

    images = np.empty((num_images, num_rows, num_cols))
    print("images shape is " ,images.shape)

    for i in range(num_images):
        if (i + 1) % num_images == 0:
            print('已解析 %d' % (i + 1) + '張')
        images[i] = np.array(struct.unpack_from(fmt_image, train_images, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images

def loadlabel(train_labels):
    print("---------------------------------")
    print("目前處理",train_labels)
    bin_data = open(train_labels, 'rb').read()
    offset = 0
    fmt_header = '>ii'
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
    print('魔數:%d, 圖片數量: %d張' % (magic_number, num_images))
    offset += struct.calcsize(fmt_header)
    fmt_image = '>B'
    print("fmt_image is ",struct.calcsize(fmt_image))
    labels = np.empty(num_images)
    for i in range(num_images):
        if (i + 1) % num_images == 0:
            print('已解析 %d' % (i + 1) + '張')
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels
train_images = loadimages(train_images)
train_labels = loadlabel(train_labels)
test_images  = loadimages(test_images)
test_labels  = loadlabel(test_labels)
print("--------- prior------------")
prior=[0]*10 
num_of_label=[0]*10
for i in range(len(train_labels)):
    prior[int(train_labels[i])]+=1
    num_of_label[int(train_labels[i])]+=1
_sum=0
for i in range(len(prior)):
    prior[i]/=len(train_labels)
prior=np.array(prior)
print(prior)

--------------------------------------------------
目前處理 ./train-images.idx3-ubyte
魔數:2051, 圖片數量: 60000張, 圖片大小: 28*28
offset is  16
fmt_image is  >784B
images shape is  (60000, 28, 28)
已解析 60000張
---------------------------------
目前處理 ./train-labels.idx1-ubyte
魔數:2049, 圖片數量: 60000張
fmt_image is  1
已解析 60000張
--------------------------------------------------
目前處理 ./t10k-images.idx3-ubyte
魔數:2051, 圖片數量: 10000張, 圖片大小: 28*28
offset is  16
fmt_image is  >784B
images shape is  (10000, 28, 28)
已解析 10000張
---------------------------------
目前處理 ./t10k-labels.idx1-ubyte
魔數:2049, 圖片數量: 10000張
fmt_image is  1
已解析 10000張
--------- prior------------
[0.09871667 0.11236667 0.0993     0.10218333 0.09736667 0.09035
 0.09863333 0.10441667 0.09751667 0.09915   ]


In [2]:
train_images = train_images.reshape(60000,784)
test_images  = test_images.reshape(10000,784) 
mean = np.zeros((10,28*28),dtype = np.float64)
std  = np.zeros((10,28*28),dtype = np.float64)
for index , label in enumerate(np.unique(train_labels)):   #np.unique sort and remove repeat numbers
    label_index = np.where(label==train_labels)
    mean[index] = np.mean(train_images[label_index],axis=0)
    std[index]  = np.std(train_images[label_index],axis =0)
print(std[0].shape)


(784,)


In [4]:
def continous(test_images,mean,std,test_labels):
    count = 0
    for k in range(10000):
        log_likelihood=[0]*10
        log_likelihood=np.array(log_likelihood,dtype=float)
        for i in range(10):
            temp = 0
            for j in range(784):
                if std[i][j]!=0:
                    exp_term = (-1/2)*(test_images[k][j]-mean[i][j])**2/(std[i][j]**2)
                    first_term = math.log(1/(2*(math.pi)*std[i][j]**2))
                    temp = first_term + exp_term +temp
            temp = temp * math.log(prior[i])
            log_likelihood[i]=temp  
            
        normalize_sum=0  
        for i in range(10):
            normalize_sum+=log_likelihood[i]
        for i in range(10):
            log_likelihood[i]=float(log_likelihood[i]/normalize_sum)
        
        print("-----------------------")
        print("image[%d]"%k)
        for i in range(len(log_likelihood)):
            print('%d   %f'%(i,log_likelihood[i]))
        predict = (np.argmin(log_likelihood))
        result = test_labels[k]
        print("predict is",(np.argmin(log_likelihood)))
        print("Result is %d"%test_labels[k])
        if predict == result :
            count =count+1
        #break
    print(count/10000)
    
continous(test_images,mean,std,test_labels)   

-----------------------
image[0]
0   0.008537
1   0.178276
2   0.011261
3   0.008965
4   0.008520
5   0.008310
6   0.753478
7   0.006474
8   0.009653
9   0.006527
predict is 7
Result is 7
-----------------------
image[1]
0   0.021907
1   0.008993
2   0.003056
3   0.324404
4   0.010205
5   0.005897
6   0.005740
7   0.418643
8   0.014518
9   0.186638
predict is 2
Result is 2
-----------------------
image[2]
0   0.000110
1   0.000073
2   0.000112
3   0.000105
4   0.000107
5   0.000113
6   0.000102
7   0.999064
8   0.000104
9   0.000110
predict is 1
Result is 1
-----------------------
image[3]
0   0.062574
1   0.325127
2   0.071368
3   0.067009
4   0.070332
5   0.071534
6   0.064177
7   0.129938
8   0.066014
9   0.071927
predict is 0
Result is 0
-----------------------
image[4]
0   0.085235
1   0.221584
2   0.092041
3   0.086902
4   0.084942
5   0.091962
6   0.090270
7   0.083338
8   0.084646
9   0.079081
predict is 9
Result is 4
-----------------------
image[5]
0   0.107744
1   0.070062
2

-----------------------
image[49]
0   0.071311
1   0.303730
2   0.073520
3   0.070391
4   0.067756
5   0.074578
6   0.087719
7   0.067013
8   0.068443
9   0.115539
predict is 7
Result is 4
-----------------------
image[50]
0   0.000107
1   0.000149
2   0.000114
3   0.000107
4   0.000115
5   0.000115
6   0.000100
7   0.995498
8   0.000111
9   0.003584
predict is 6
Result is 6
-----------------------
image[51]
0   0.053084
1   0.438964
2   0.040089
3   0.036390
4   0.040963
5   0.051034
6   0.052601
7   0.046345
8   0.041500
9   0.199029
predict is 3
Result is 3
-----------------------
image[52]
0   0.010891
1   0.629682
2   0.017313
3   0.010428
4   0.009075
5   0.009027
6   0.016021
7   0.275733
8   0.008438
9   0.013391
predict is 8
Result is 5
-----------------------
image[53]
0   0.000116
1   0.000315
2   0.000128
3   0.000117
4   0.000122
5   0.000126
6   0.000118
7   0.998708
8   0.000118
9   0.000132
predict is 0
Result is 5
-----------------------
image[54]
0   0.000321
1   0.43

-----------------------
image[102]
0   0.051353
1   0.132426
2   0.084828
3   0.044639
4   0.041879
5   0.042649
6   0.476331
7   0.042081
8   0.039699
9   0.044116
predict is 8
Result is 5
-----------------------
image[103]
0   0.001182
1   0.923876
2   0.000935
3   0.000928
4   0.000832
5   0.001069
6   0.065461
7   0.001075
8   0.003586
9   0.001055
predict is 4
Result is 4
-----------------------
image[104]
0   0.050023
1   0.239167
2   0.050787
3   0.046010
4   0.041424
5   0.047037
6   0.402238
7   0.041652
8   0.043853
9   0.037809
predict is 9
Result is 9
-----------------------
image[105]
0   0.051470
1   0.347090
2   0.041569
3   0.040245
4   0.041837
5   0.040569
6   0.041062
7   0.033409
8   0.330701
9   0.032048
predict is 9
Result is 9
-----------------------
image[106]
0   0.000402
1   0.001759
2   0.000423
3   0.000407
4   0.000571
5   0.000441
6   0.000756
7   0.994268
8   0.000404
9   0.000569
predict is 0
Result is 2
-----------------------
image[107]
0   0.102369
1 

-----------------------
image[156]
0   0.089174
1   0.192118
2   0.024412
3   0.068412
4   0.018954
5   0.020396
6   0.015707
7   0.093179
8   0.029751
9   0.447899
predict is 6
Result is 6
-----------------------
image[157]
0   0.079381
1   0.249391
2   0.088000
3   0.081903
4   0.083921
5   0.088165
6   0.079609
7   0.090568
8   0.081032
9   0.078030
predict is 9
Result is 0
-----------------------
image[158]
0   0.003691
1   0.007730
2   0.002574
3   0.002363
4   0.002706
5   0.003055
6   0.002619
7   0.052822
8   0.004201
9   0.918240
predict is 3
Result is 3
-----------------------
image[159]
0   0.102205
1   0.102154
2   0.106266
3   0.097943
4   0.099069
5   0.105048
6   0.104375
7   0.095341
8   0.096932
9   0.090667
predict is 9
Result is 4
-----------------------
image[160]
0   0.085736
1   0.092363
2   0.084817
3   0.078837
4   0.079730
5   0.084906
6   0.086648
7   0.116612
8   0.077787
9   0.212563
predict is 8
Result is 4
-----------------------
image[161]
0   0.000286
1 

-----------------------
image[208]
0   0.000221
1   0.000316
2   0.000211
3   0.000214
4   0.000263
5   0.000249
6   0.000200
7   0.997715
8   0.000265
9   0.000346
predict is 6
Result is 2
-----------------------
image[209]
0   0.433827
1   0.088809
2   0.036792
3   0.032704
4   0.033780
5   0.033737
6   0.091140
7   0.027542
8   0.196609
9   0.025060
predict is 9
Result is 9
-----------------------
image[210]
0   0.085986
1   0.381463
2   0.075261
3   0.066014
4   0.063045
5   0.068524
6   0.076824
7   0.060159
8   0.063824
9   0.058900
predict is 9
Result is 4
-----------------------
image[211]
0   0.074412
1   0.067410
2   0.105231
3   0.064734
4   0.067193
5   0.068928
6   0.364875
7   0.062195
8   0.063697
9   0.061325
predict is 9
Result is 5
-----------------------
image[212]
0   0.270695
1   0.094891
2   0.024675
3   0.024402
4   0.048275
5   0.025084
6   0.169936
7   0.017846
8   0.308532
9   0.015663
predict is 9
Result is 9
-----------------------
image[213]
0   0.103174
1 

-----------------------
image[257]
0   0.091460
1   0.066118
2   0.091633
3   0.085389
4   0.090892
5   0.092402
6   0.083202
7   0.209916
8   0.084069
9   0.104918
predict is 1
Result is 8
-----------------------
image[258]
0   0.000473
1   0.001398
2   0.000454
3   0.000453
4   0.000616
5   0.000554
6   0.000423
7   0.944987
8   0.000678
9   0.049964
predict is 6
Result is 2
-----------------------
image[259]
0   0.028992
1   0.640163
2   0.032391
3   0.033153
4   0.037164
5   0.035676
6   0.028390
7   0.065250
8   0.038298
9   0.060522
predict is 6
Result is 6
-----------------------
image[260]
0   0.055629
1   0.326597
2   0.067875
3   0.165459
4   0.056558
5   0.057438
6   0.089252
7   0.070995
8   0.051853
9   0.058344
predict is 8
Result is 8
-----------------------
image[261]
0   0.049143
1   0.135780
2   0.253860
3   0.238470
4   0.049037
5   0.045147
6   0.090380
7   0.045485
8   0.041705
9   0.050994
predict is 8
Result is 5
-----------------------
image[262]
0   0.043602
1 

-----------------------
image[308]
0   0.096496
1   0.130856
2   0.098063
3   0.090550
4   0.088778
5   0.096027
6   0.122064
7   0.104917
8   0.087190
9   0.085059
predict is 9
Result is 4
-----------------------
image[309]
0   0.002622
1   0.004027
2   0.002456
3   0.002246
4   0.002628
5   0.002673
6   0.003031
7   0.100019
8   0.002946
9   0.877352
predict is 3
Result is 3
-----------------------
image[310]
0   0.088729
1   0.116931
2   0.093358
3   0.085331
4   0.091549
5   0.093539
6   0.085969
7   0.163497
8   0.086258
9   0.094838
predict is 3
Result is 3
-----------------------
image[311]
0   0.000519
1   0.015074
2   0.000609
3   0.000664
4   0.000629
5   0.000643
6   0.000569
7   0.979362
8   0.000593
9   0.001338
predict is 0
Result is 0
-----------------------
image[312]
0   0.000479
1   0.002036
2   0.000540
3   0.000509
4   0.000524
5   0.000544
6   0.000496
7   0.993844
8   0.000497
9   0.000531
predict is 0
Result is 0
-----------------------
image[313]
0   0.005918
1 

KeyboardInterrupt: 