In [1]:
import random

# BMIを計算して体型を返す
def calc_bmi(h, w):
    bmi = w / (h/100) ** 2
    if bmi < 18.5: return "thin"
    if bmi < 25.0: return "normal"
    return "fat"

# 出力ファイルの準備
fp = open("bmi.csv","w",encoding="utf-8")
fp.write("height,weight,label\r\n")

# ランダムなデータを生成
cnt = {"thin":0, "normal":0, "fat":0}
for i in range(20000):
    h = random.randint(120,200)
    w = random.randint(35, 80)
    label = calc_bmi(h, w)
    cnt[label] += 1
    fp.write("{0},{1},{2}\r\n".format(h, w, label))
fp.close()
print("ok,", cnt)


ok, {'thin': 6308, 'normal': 5963, 'fat': 7729}


In [2]:
from sklearn import cross_validation, svm, metrics
import matplotlib.pyplot as plt
import pandas as pd

# 身長・体重データの読み込み --- (※1)
tbl = pd.read_csv("bmi.csv")

# カラム(列)をスライスして正規化 --- (※2)
label = tbl["label"]
w = tbl["weight"] / 100 # 最大100kgと考える
h = tbl["height"] / 200 # 最長200cmと考える
wh = pd.concat([w, h], axis=1)

In [3]:
print('tbl')
print(tbl.head(3))
print('wh')
print(wh.head(5))

tbl
   height  weight label
0     135      49   fat
1     147      80   fat
2     125      67   fat
wh
   weight  height
0    0.49   0.675
1    0.80   0.735
2    0.67   0.625
3    0.61   0.900
4    0.74   1.000


In [4]:
# 学習用とテスト用データに分ける --- (※3)
data_train, data_test, label_train, label_test = \
    cross_validation.train_test_split(wh, label)

print('data_train')
print(data_train.head(3))
print('data_test')
print(data_test.head(3))
print('label_train')
print(label_train.head(3))
print('label_test')
print(label_test.head(3))

data_train
       weight  height
8789     0.44   0.815
12962    0.61   0.975
10491    0.43   0.730
data_test
       weight  height
6205     0.55   0.805
13667    0.52   0.645
7266     0.44   0.900
label_train
8789       thin
12962      thin
10491    normal
Name: label, dtype: object
label_test
6205     normal
13667       fat
7266       thin
Name: label, dtype: object


In [6]:
# データを学習 --- (※4)
clf = svm.SVC()
clf.fit(data_train, label_train)

# データを予測 --- (※5)
predict = clf.predict(data_test)

# 合っているか結果を確認 --- (※6)
ac_score = metrics.accuracy_score(label_test, predict)
cl_report = metrics.classification_report(label_test, predict)
print("正解率=", ac_score)
print("レポート=\n", cl_report)

正解率= 0.987
レポート=
              precision    recall  f1-score   support

        fat       1.00      0.99      0.99      1928
     normal       0.97      0.99      0.98      1509
       thin       1.00      0.98      0.99      1563

avg / total       0.99      0.99      0.99      5000

