In [3]:
import os
import sys

import numpy as np
import pandas as pd


sys.modules['tensorflow'] = None

def load_fashionmnist():
    # 学習データ
    x_train = np.load('../dataset/x_train.npy')
    y_train = np.load('../dataset/y_train.npy')
    
    # テストデータ
    x_test = np.load('../dataset/x_test.npy')
    
    x_train = x_train.reshape(-1, 784).astype('float32') / 255
    y_train = np.eye(10)[y_train.astype('int32')]
    x_test = x_test.reshape(-1, 784).astype('float32') / 255
    
    return x_train, y_train, x_test

from sklearn.utils import shuffle
x_train, y_train, x_test = load_fashionmnist()

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def softmax(x):
    
    x -= x.max(axis=1, keepdims=True) # expのunderflow & overflowを防ぐ
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)

W = np.random.uniform(low=-0.08, high=0.08, size=(784, 10)).astype('float32')
b = np.zeros(shape=(10,)).astype('float32')

# 学習データと検証データに分割
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.1)
def np_log(x):
    return np.log(np.clip(a=x, a_min=1e-10, a_max=x))
def train(x, t, eps=1.0):

    global W, b
    
    batch_size = x.shape[0]
    
    # 順伝播
    y = softmax(np.matmul(x, W) + b) # shape: (batch_size, 出力の次元数)
    
    # 逆伝播
    cost = (- t * np_log(y)).sum(axis=1).mean()
    delta = y - t # shape: (batch_size, 出力の次元数)
    
    # パラメータの更新
    dW = np.matmul(x.T, delta) / batch_size # shape: (入力の次元数, 出力の次元数)
    db = np.matmul(np.ones(shape=(batch_size,)), delta) / batch_size # shape: (出力の次元数,)
    W -= eps * dW
    b -= eps * db

    return cost

def valid(x):
    y = softmax(np.matmul(x, W) + b)
    #cost = (- t * np_log(y)).sum(axis=1).mean()
    
    return y
def onehot_decoder(df):
    colname_list = []
    a_df = pd.DataFrame(df)
    for index, row in a_df.iterrows():#各行を取得
        
        max_value=0
        row_value=0
        for k,v in enumerate(row):#各列の値を取得
            #if int(v) ==1:#ワンホットになっている列のカラム名を取得
            
            if v > max_value:
                max_value=v
                row_value=k
        colname_list.append(a_df.columns[row_value])

    #デコード済みリストを新規列として追加
    #df_add = pd.DataFrame(colname_list,columns=['decoded'])
    df_add = pd.DataFrame(colname_list)
    return df_add

for epoch in range(1):
    # オンライン学習
    # WRITE ME
    x_train, y_train = shuffle(x_train, y_train)
    for x, y in zip(x_train, y_train):
        cost = train(x[None, :], y[None, :])
    y_pred = valid(x_test)
    
y_pred = valid(x_test)
res = onehot_decoder(y_pred)
res[:3]
y_pred.shape

df = pd.DataFrame(res)
df.to_csv('../dataset/submission_pred.csv')