## KNN Classification
- Feature와 Target 구분
- train과 test 데이터 구분
- 데이터 전처리 : train data로 정규화, 표준화
- Machine Learning 실행
- 평가

In [32]:
import pandas as pd

In [33]:
# 데이터 불러오기
fruits = pd.read_csv("../Data/fruits.csv")

fruits.head()


Unnamed: 0,length,weight,name
0,25.4,242.0,apple
1,26.3,290.0,apple
2,26.5,340.0,apple
3,29.0,363.0,apple
4,29.0,430.0,apple


In [34]:
fruits.info()

# 맞춰야 할 게 target (name)
# 예측해야 할 게 feature (length, weight)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   length  49 non-null     float64
 1   weight  49 non-null     float64
 2   name    49 non-null     object 
dtypes: float64(2), object(1)
memory usage: 1.3+ KB


In [35]:
fruits.describe()

Unnamed: 0,length,weight
count,49.0,49.0
mean,27.055102,444.5
std,10.242804,328.143233
min,9.8,6.7
25%,14.3,19.7
50%,31.0,500.0
75%,34.5,700.0
max,41.0,1000.0


---

In [36]:
# Feature와 Target 정의
fruits_data = fruits[['length', 'weight']]
fruits_target = fruits['name']

print(fruits_data.head())
print('--------------------------------')
print(fruits_target.head())
print('--------------------------------')
print(fruits_data.shape)
print('--------------------------------')
print(fruits_target.shape)




   length  weight
0    25.4   242.0
1    26.3   290.0
2    26.5   340.0
3    29.0   363.0
4    29.0   430.0
--------------------------------
0    apple
1    apple
2    apple
3    apple
4    apple
Name: name, dtype: object
--------------------------------
(49, 2)
--------------------------------
(49,)


In [37]:
# Train과 Test 분리
from sklearn.model_selection import train_test_split


In [46]:
train_input, test_input, train_target, test_target = train_test_split(
    fruits_data,
    fruits_target,
    random_state=42,
    stratify=fruits_target
)

---

In [47]:
# 정규화 작업
from sklearn.preprocessing import StandardScaler


In [48]:
scaler = StandardScaler()   # scaler : 정규화 작업을 위한 객체


In [49]:
# train data를 정규화
# train_scaled = scaler.fit(train_input)    # 학습
train_scaled = scaler.fit_transform(train_input)    # 학습 및 변환
train_scaled[:5]



array([[ 0.79935928,  0.85382543],
       [ 0.20479453, -0.24256812],
       [-1.54917151, -1.33066961],
       [ 0.37325454,  0.17817675],
       [-1.55908092, -1.32729137]])

In [50]:
# test data를 정규화
test_scaled = scaler.transform(test_input)  # test 는 train에 fit 한 표준편차와 평균을 사용해서 정규화 해야 하므로 transform 을 사용해야 함
test_scaled[:5]


array([[ 0.79935928,  0.86918109],
       [ 0.74981222,  0.54671239],
       [ 1.39392404,  1.56018542],
       [-1.18252324, -1.29627295],
       [-1.38071149, -1.31992065]])

In [51]:
# kNN 적용
from sklearn.neighbors import KNeighborsClassifier


In [52]:
# 훈련하기(Training)
kn = KNeighborsClassifier()
kn.fit(train_scaled, train_target)


In [53]:
kn.score(test_scaled, test_target)


1.0

In [54]:
# 25, 500 인 과일의 품종은?
new = scaler.transform([[25, 500]])
new




array([[-0.19158198,  0.17817675]])

In [55]:
kn.predict(new)


array(['apple'], dtype=object)