# Watson AutoAIで作った Titanic Modelの精度を検証する

### Watson Machine Learning認証情報の設定

In [None]:
wml_credentials = {
  "instance_id": "xxxx",
  "password": "xxxx",
  "url": "xxxx",
  "username": "xxxx"
}    

### APIのScoring URL設定

In [None]:
# scoring URLの設定
scoring_url = "xxxx"

### 必要ライブラリのimport

In [None]:
import urllib3, requests, json
import urllib.request

In [None]:
import numpy as np
import pandas as pd
from IPython.display import display

### Watson ML呼出し準備

In [None]:
# Basic認証用ヘッダの生成
auth = '{username}:{password}'.format(username=wml_credentials['username'], password=wml_credentials['password'])
header_basic_auth = urllib3.util.make_headers(basic_auth=auth)
url = '{}/v3/identity/token'.format(wml_credentials['url'])

# Tokenの取得
mltoken =  json.loads( requests.get(url, headers=header_basic_auth).text )['token']
#print(mltoken)

In [None]:
# Header の組立て
header_token = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken}
ml_instance_id = wml_credentials["instance_id"]
header = {'Content-Type': 'application/json', 'Authorization': 'Bearer ' + mltoken, 'ML-Instance-ID': ml_instance_id}

### テスト用CSV読み込み

In [None]:
# ファイルをカレントディレクトリにダウンロード
csv_url = 'https://raw.githubusercontent.com/makaishi2/sample-data/master/data/titanic_test.csv'
csv_filename = 'titanic_test.csv'
urllib.request.urlretrieve(csv_url, csv_filename)

In [None]:
df_titanic_test = pd.read_csv('titanic_test.csv')

In [None]:
display(df_titanic_test.head())

### 正解データ correctの抽出

In [None]:
correct = df_titanic_test['Survived'].values
print(correct)

### API呼出し用入力データ配列の作成

In [None]:
df_sub = df_titanic_test.copy()

# 目的変数列の削除
df_sub = df_sub.drop('Survived', axis=1)

# np.nan値をNoneに置き換え
df_sub = df_sub.where((pd.notnull(df_sub)), None)

In [None]:
display(df_sub.head(11))

In [None]:
# DataFrameから項目名Listの生成
fields = df_sub.columns.tolist()
print(fields)

In [None]:
# DataFrameから入力用Listの生成
values = df_sub.values.tolist()
# テスト用
#values = df_sub.head(2).values.tolist()
print(values[:2])

In [None]:
# payload変数の組立
payload_scoring = {"input_data": [{
    "fields": fields, 
    "values": values
}]}

### APIの呼出し

In [None]:
# APIの呼出し
response_scoring = requests.post(scoring_url,  json=payload_scoring, headers=header)

### 戻り値のparse

In [None]:
# 戻り値のparse
res = json.loads(response_scoring.text)

pred = res['predictions'][0]
pred_values = pred['values']
pred_fields = pred['fields']
df_res = pd.DataFrame(pred_values, columns = pred_fields)

In [None]:
display(df_res.head())

### 精度評価

In [None]:
# 予測値の抽出
prediction = df_res['prediction'].values

# 確率値のリスト取得
w1 = df_res['probability'].values.tolist()
probas = [item[1] for item in w1]

In [None]:
# Accuracyの計算
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(correct, prediction)

In [None]:
# ROC AUCの計算
from sklearn.metrics import auc
from sklearn.metrics import roc_curve

# fpr, tpr値の取得
fpr, tpr, thresholds = roc_curve(correct, probas)

# ROC AUCの計算
roc_auc = auc(fpr, tpr)

In [None]:
# 混同行列の計算
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(correct, prediction)
df_matrix = pd.DataFrame(matrix, columns=['died', 'survived'], index=['died', 'survived'])

### 精度評価結果表示

In [None]:
print("Accuracy: %0.3f" % accuracy)
print("ROC AUC : %0.3f" % roc_auc)

In [None]:
### 混同行列
display(df_matrix)

In [None]:
# ROC曲線の描画
%matplotlib inline 
import matplotlib.pyplot as plt

plt.figure(figsize=(6,6))
plt.clf()
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='ROC Curve')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.show()