# 家賃予測モデルを作成し、WEBに上げる前に検証する

## モデルの作成

In [2]:
# データの読み込み
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df = pd.read_csv('housing.csv')

In [4]:
df.head(5)

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,y
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [5]:
len(df)

506

In [6]:
# まずはシンプルにモデルを作って、WebAPI化するところまでやる
x = df.iloc[:,:-1]
t = df.iloc[:,-1]

In [7]:
x.shape

(506, 13)

In [8]:
t.shape

(506,)

In [37]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [42]:
x_train, x_test, t_train, t_test = train_test_split(x, t, test_size=0.4, random_state=1)
model = LinearRegression()

In [46]:
t_train.shape

(303,)

In [48]:
model.fit(x_train, t_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [49]:
model.score(x_train, t_train)

0.7468316520140624

In [50]:
model.score(x_test, t_test)

0.7209056672661771

In [52]:
model.predict([x_test.iloc[3,:]])[0]

21.227239024792492

In [53]:
from sklearn.externals import joblib

In [56]:
# モデルの保存
joblib.dump(model, 'model_house.pkl')

['model_house.pkl']

## モデルをWebAPI化

In [85]:
import requests
import json
url = 'https://predict-house.herokuapp.com'
#url = 'http://127.0.0.1:5000/'

In [86]:
x_post = list(x.iloc[0,:].values)
res = requests.post(url, json={'x_post':str(x_post)})

In [87]:
res

<Response [200]>

In [89]:
res.json()

{'y': 29.423688469405622}

## それはそれとしてchainerで実装してみる

### データ準備

In [92]:
# xとtをnumpyで準備してdatasetに固める感じ
x = df.iloc[:,:-1]
t = df.iloc[:,-1]
x = x.values.astype('f')
t = t.values.astype('f')

In [96]:
x[0].dtype

dtype('float32')

In [97]:
t[0].dtype

dtype('float32')

In [98]:
t.shape

(506,)

In [99]:
t = t.reshape(506, 1)

In [100]:
t.shape

(506, 1)

In [101]:
x.shape

(506, 13)

In [102]:
dataset = list(zip(x, t))

In [105]:
n = int(len(dataset)*0.7)

In [106]:
n

354

In [138]:
import chainer
import chainer.links as L
import chainer.functions as F
from chainer import training
from chainer.training import extensions

In [139]:
train, test = chainer.datasets.split_dataset_random(dataset, n, seed=0)

In [140]:
class NN(chainer.Chain):
    def __init__(self, n_mid1=32, n_out=1):
        super().__init__()
        with self.init_scope():
            self.fc1 = L.Linear(None, n_mid1)
            self.fc2 = L.Linear(None, n_out)
            self.bn = L.BatchNormalization(13)
            
    def __call__(self, x):
        self.bn(x)
        h = F.relu(self.fc1(x))
        h = F.relu(self.fc2(h))
        return h

In [141]:
np.random.seed(0)

In [142]:
model = model = L.Classifier(NN(), lossfun= F.mean_squared_error)
model.compute_accuracy = False

In [143]:
batchsize=10
epoch = 100
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
train_iter = chainer.iterators.SerialIterator(train, batchsize)
test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)
updater = training.StandardUpdater(train_iter, optimizer, device = -1)
trainer = training.Trainer(updater, (epoch, 'epoch'), out = 'result/housing')
trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger = (1,'epoch'))

In [144]:
trainer.run()

epoch       main/accuracy  validation/main/accuracy  main/loss   validation/main/loss  elapsed_time
[J1                                                    291.937     113.253               0.0885197     
[J2                                                    81.8182     91.2356               0.15784       
[J3                                                    67.75       81.9181               0.230286      
[J4                                                    65.2175     77.3589               0.298099      
[J5                                                    61.1186     72.0782               0.37925       
[J6                                                    57.801      68.0416               0.448222      
[J7                                                    55.5932     65.6372               0.518024      
[J8                                                    55.9478     63.9415               0.586636      
[J9                                                    49.1

[J81                                                   25.6251     29.0972               6.41417       
[J82                                                   26.5727     29.8943               6.49516       
[J83                                                   31.6158     28.1975               6.59135       
[J84                                                   28.1748     30.1904               6.68238       
[J85                                                   24.4824     26.9609               6.75346       
[J86                                                   26.9866     33.9322               6.82948       
[J87                                                   28.6754     28.703                6.90861       
[J88                                                   26.1452     29.3198               6.98339       
[J89                                                   25.9421     26.7781               7.05362       
[J90                                                  