In [5]:
import os
import struct
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [48]:
train_image_file = 'mnist/train-images-idx3-ubyte'
train_label_file = 'mnist/train-labels-idx1-ubyte'
test_image_file = 'mnist/t10k-images-idx3-ubyte'
test_label_file = 'mnist/t10k-labels-idx1-ubyte'

## Load data

In [15]:
def load_images(filename):
    fp = open(filename, 'rb')
    
    # マジックナンバー
    magic = struct.unpack('>i', fp.read(4))[0]
    if magic != 2051:
        raise Exception('Invalid MNIST file!')
        
    # 各種サイズ
    n_images, height, width = struct.unpack('>iii', fp.read(4 * 3))
    
    # 画像の読み込み
    total_pixels = n_images * height * width
    images = struct.unpack('>' + 'B' * total_pixels, fp.read(total_pixels))
    
    images = np.asarray(images, dtype='uint8')
    images = images.reshape((n_images, height, width, 1))
    
    # 値の範囲を[0, 1]に変更する
    images = images.astype('float32') / 255.0
    
    fp.close()
    
    return images

In [32]:
def load_labels(filename):
    fp = open(filename, 'rb')
    
    # マジックナンバー
    magic = struct.unpack('>i', fp.read(4))[0]
    if magic != 2049:
        raise Exception('Invalid MNIST file!')
        
    # 各種サイズ
    n_labels = struct.unpack('>i', fp.read(4))[0]
    
    # ラベルの読み込み
    labels = struct.unpack('>' + 'B' * n_labels, fp.read(n_labels))
    labels = np.asarray(labels, dtype='int32')
    
    fp.close()
    
    return labels

In [41]:
def to_onehot(labels):
    # one-hot形式への変換
    return np.identity(10)[labels]

In [43]:
images = load_images(train_image_file)
labels = load_labels(train_label_file)
onehot = to_onehot(labels)

60000


## Logistic regression

In [47]:
n = len(images)
X = images.reshape((n, -1))
y = onehot
clf = LogisticRegression(random_state=0, multi_class='multinomial').fit(X, labels)

(60000, 784) (60000, 10)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [49]:
test_images = load_images(test_image_file)
test_labels = load_labels(test_label_file)

10000


In [53]:
pred_labels = clf.predict(test_images.reshape(10000, -1))

In [54]:
acc = (pred_labels == test_labels).mean()

In [55]:
acc

0.9256