In [1]:
import random

# BMIを計算して体型を返す
def calc_bmi(h, w):
    bmi = w / (h/100) ** 2
    if bmi < 18.5: return "thin"
    if bmi < 25.0: return "normal"
    return "fat"

# 出力ファイルの準備
fp = open("bmi.csv","w",encoding="utf-8")
fp.write("height,weight,label\r\n")

# ランダムなデータを生成
cnt = {"thin":0, "normal":0, "fat":0}
for i in range(20000):
    h = random.randint(120,200)
    w = random.randint(35, 80)
    label = calc_bmi(h, w)
    cnt[label] += 1
    fp.write("{0},{1},{2}\r\n".format(h, w, label))
fp.close()
print("ok,", cnt)


ok, {'thin': 6506, 'normal': 5972, 'fat': 7522}


In [2]:
from sklearn import cross_validation, svm, metrics
import matplotlib.pyplot as plt
import pandas as pd

# 身長・体重データの読み込み --- (※1)
tbl = pd.read_csv("bmi.csv")

# カラム(列)をスライスして正規化 --- (※2)
label = tbl["label"]
w = tbl["weight"] / 100 # 最大100kgと考える
h = tbl["height"] / 200 # 最長200cmと考える
wh = pd.concat([w, h], axis=1)



In [3]:
print('tbl')
print(tbl.head(3))
print('wh')
print(wh.head(5))

tbl
   height  weight   label
0     174      71  normal
1     122      64     fat
2     128      58     fat
wh
   weight  height
0    0.71   0.870
1    0.64   0.610
2    0.58   0.640
3    0.62   0.905
4    0.37   0.710


In [4]:
# 学習用とテスト用データに分ける --- (※3)
data_train, data_test, label_train, label_test = \
    cross_validation.train_test_split(wh, label)

print('data_train')
print(data_train.head(3))
print('data_test')
print(data_test.head(3))
print('label_train')
print(label_train.head(3))
print('label_test')
print(label_test.head(3))

data_train
       weight  height
18298    0.78   0.780
17393    0.75   0.805
2344     0.46   0.935
data_test
       weight  height
12467    0.54   0.895
3192     0.80   0.950
12071    0.54   0.910
label_train
18298     fat
17393     fat
2344     thin
Name: label, dtype: object
label_test
12467      thin
3192     normal
12071      thin
Name: label, dtype: object


In [5]:
# データを学習 --- (※4)
clf = svm.SVC()
clf.fit(data_train, label_train)

# データを予測 --- (※5)
predict = clf.predict(data_test)

# 合っているか結果を確認 --- (※6)
ac_score = metrics.accuracy_score(label_test, predict)
cl_report = metrics.classification_report(label_test, predict)
print("正解率=", ac_score)
print("レポート=\n", cl_report)

正解率= 0.9856
レポート=
              precision    recall  f1-score   support

        fat       1.00      0.99      0.99      1852
     normal       0.96      0.99      0.98      1521
       thin       0.99      0.98      0.98      1627

avg / total       0.99      0.99      0.99      5000

