In [1]:
from sklearn.datasets import load_breast_cancer

cancer = load_breast_cancer()
x = cancer.data # 인풋으로 사용할 데이터
y = cancer.target # 아웃풋, target으로 사용할 데이터
col_names = cancer.feature_names # 인풋으로 사용할 데이터의 컬럼별 이름들
target_names = cancer.target_names # 아웃풋, target으로 사용할 데이터의 클래스 이름

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [4]:
bcc = pd.DataFrame(x, columns=col_names)

In [5]:
# 트레이닝셋 / 테스트셋으로 분할
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(bcc, y, test_size=0.15, random_state=2021, stratify = y)

In [6]:
# 트레이닝셋 / 벨리데이션셋으로 분할
from sklearn.model_selection import train_test_split

x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.3, random_state=2021, stratify = y_train)

In [7]:
# x_train, x_valid, x_test의 인덱스를 초기화 
x_train.reset_index(inplace=True, drop=True)
x_valid.reset_index(inplace=True, drop=True)
x_test.reset_index(inplace=True, drop=True)

In [None]:
# 표준화(standardization) 스케일링
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_sc = scaler.fit_transform(x_train)
x_valid_sc = scaler.transform(x_valid)
x_test_sc = scaler.transform(x_test)

In [None]:
# KNN 모델들을 학습
from sklearn.neighbors import KNeighborsClassifier

knns = []
for k in range(2, 16) :
    knn = KNeighborsClassifier(n_neighbors=k, weights='uniform')
    knn.fit(x_train_sc, y_train)
    knns.append(knn)

In [None]:
# KNN 모델들의 성능
accs = []
for k in range(2, 16) :
    idx = k - 2
    knn = knns[idx]
    acc = knn.score(x_valid_sc, y_valid)
    accs.append(acc)

In [None]:
# # KNN모델들의 k(이웃수)에 따른 accuracy를 시각화 
# plt.figure(figsize=(10, 6))
# plt.plot(range(2,16), accs)
# plt.grid()
# plt.show()
# best_knn = knns[4]
# print('k = 5 선택')

In [None]:
# 여기에 답안코드를 작성하세요
best_knn = knns[4]
print(f"테스트셋 위에서의 accuracy : {best_knn.score(x_test_sc, y_test)*100:.2f}%")

In [None]:
# classificaiton report
y_pred = best_knn.predict(x_test_sc)
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, target_names=target_names))

print("malignant의 precision이 100% 이기 때문에 암환자라고 예측된 사람은 전부 암환자 일 것이다.")

In [None]:
# # scaling 되지 않은 원본데이터로 학습
# from sklearn.neighbors import KNeighborsClassifier

# knns2 = []
# for k in range(2, 16) :
#     knn = KNeighborsClassifier(n_neighbors=k, weights='uniform')
#     knn.fit(x_train, y_train)
#     knns2.append(knn)

#     accs2 = []
# for k in range(2, 16) :
#     idx = k - 2
#     knn = knns2[idx]
#     acc = knn.score(x_valid, y_valid)
#     accs2.append(acc)
    
# plt.figure(figsize=(10, 6))
# plt.plot(range(2,16), accs2)
# plt.grid()
# plt.show()

# best_knn2 = knns2[4]

In [None]:
# 여기에 답안코드를 작성하세요

print(f"Q11 accuracy : {best_knn.score(x_test_sc, y_test)*100:.2f}%")
print(f"Q15 accuracy : {best_knn2.score(x_test, y_test)*100:.2f}%")

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# 딥러닝
keras.backend.clear_session()

model = Sequential()
model.add(Dense(64, activation='swish', input_shape=x_train.shape[1:]))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(32, activation='swish'))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(32, activation='swish'))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy']) 

es = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, restore_best_weights=True)
history = model.fit(x_train, y_train, epochs=2000, batch_size=32,
                    verbose=0,validation_data=(x_valid, y_valid),callbacks=[es])

In [None]:
# #  시각화
# import matplotlib.pyplot as plt

# plt.plot(history.history['accuracy'])
# plt.plot(history.history['val_accuracy'])
# plt.title('Accuracy')
# plt.xlabel('epochs')
# plt.ylabel('accuracy')
# plt.legend(['train_acc', 'val_acc'])
# plt.show()