## 목표
- sklearn 라이브러리 사용법 숙지
- 비만도 데이터 학습하고 예측하기

### 모델정의

In [4]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
bmi_model = KNeighborsClassifier(n_neighbors=5)

### 학습

In [7]:
import pandas as pd

In [8]:
bmi_data = pd.read_csv("./data/bmi_lbs.csv")

In [9]:
bmi_data.head(10)

Unnamed: 0,Label,Gender,Height,Weight(lbs)
0,Obesity,Male,174,211.6416
1,Normal,Male,189,191.8002
2,Obesity,Female,185,242.506
3,Overweight,Female,195,229.2784
4,Overweight,Male,149,134.4806
5,Overweight,Male,189,229.2784
6,Extreme Obesity,Male,147,202.8232
7,Extreme Obesity,Male,154,244.7106
8,Overweight,Male,174,198.414
9,Obesity,Female,169,227.0738


In [10]:
# 데이터 전처리(lbs->kg)
bmi_data['Weight(kg)'] = bmi_data['Weight(lbs)']*0.453

In [11]:
bmi_data

Unnamed: 0,Label,Gender,Height,Weight(lbs),Weight(kg)
0,Obesity,Male,174,211.6416,95.873645
1,Normal,Male,189,191.8002,86.885491
2,Obesity,Female,185,242.5060,109.855218
3,Overweight,Female,195,229.2784,103.863115
4,Overweight,Male,149,134.4806,60.919712
...,...,...,...,...,...
495,Extreme Obesity,Female,150,337.3038,152.798621
496,Obesity,Female,184,266.7566,120.840740
497,Extreme Obesity,Female,141,299.8256,135.820997
498,Extreme Obesity,Male,150,209.4370,94.874961


In [12]:
# weight실수->정수로 변환
bmi_data['Weight(kg)'] = bmi_data['Weight(kg)'].astype('int32')

In [13]:
bmi_data.head()

Unnamed: 0,Label,Gender,Height,Weight(lbs),Weight(kg)
0,Obesity,Male,174,211.6416,95
1,Normal,Male,189,191.8002,86
2,Obesity,Female,185,242.506,109
3,Overweight,Female,195,229.2784,103
4,Overweight,Male,149,134.4806,60


In [32]:
# 학습한 문제와 답으로 분리(x는 2차원, y는 일차원이 적합)
X = bmi_data[['Height','Weight(kg)']] 
y = bmi_data['Label']

In [33]:
X.shape

(500, 2)

In [34]:
y.shape

(500,)

In [37]:
bmi_model.fit(X,y)

KNeighborsClassifier()

### 예측

In [38]:
# 샘플데이터 추출
X_samples = X.iloc[105:110]
y_samples = y.iloc[105:110]

In [39]:
X_samples

Unnamed: 0,Height,Weight(kg)
105,195,125
106,166,159
107,159,153
108,181,105
109,149,65


In [40]:
y_samples

105            Obesity
106    Extreme Obesity
107    Extreme Obesity
108            Obesity
109         Overweight
Name: Label, dtype: object

In [42]:
pre = bmi_model.predict(X_samples) #예측할 샘플데이터 5개(키, 몸무게)
pre

array(['Obesity', 'Extreme Obesity', 'Extreme Obesity', 'Obesity',
       'Overweight'], dtype=object)

### 평가
- 정확도(accuracy): 예측한 전체 데이터 중 정확히 맞춘 데이터 비율

In [43]:
from sklearn.metrics import accuracy_score

In [44]:
score = accuracy_score(y_samples, pre) #실제정답, 모델의 예측값

In [45]:
score

1.0

In [47]:
# 활용
bmi_model.predict([[158,47],[171,74]])

array(['Normal', 'Overweight'], dtype=object)