In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv('/content/drive/MyDrive/Orange3_colab/Orange3/Neural Network/stroke-standard.csv')
df.shape

(3915, 16)

In [4]:
#표준화된 데이터프레임을 다시 언더샘플링과 데이터 분할
data = df.drop(['stroke'], axis = 1) #타깃 변수를 제외한 변수를 data에 저장
target = df['stroke'] # 타깃 변수만 target에 저장

from imblearn.under_sampling import RandomUnderSampler

#타깃 변수의 소수 클래스 및 다수 클래스를 1:3의 비율로 언더샘플링
undersample = RandomUnderSampler(sampling_strategy=0.333, random_state = 2)

#data와 target에 언더샘플링 적용
data_under, target_under = undersample.fit_resample(data, target)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    data_under, target_under, test_size = 0.5, random_state = 42, stratify = target_under
)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (386, 15)
X_test shape: (386, 15)


In [6]:
#신경망 기본 모델
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

clf_mlp = MLPClassifier(max_iter = 2000, random_state = 0)
clf_mlp.fit(X_train, y_test)

#학습된 Classifier로 테스트 데이터세트를 이용해서 타깃 변수의 예측값 생성
pred = clf_mlp.predict(X_test)
accuracy = accuracy_score(y_test, pred)

print("Training set score:{:.5f}".format(clf_mlp.score(X_train, y_train)))
print("Test set score:{:.5f}".format(accuracy_score(y_test, pred)))

Training set score:0.65285
Test set score:0.67617


In [8]:
#과적합으로 보이기에 그리드 서치 실행
#신경망 기본 모델
clf_mlp = MLPClassifier(max_iter = 2000, random_state=0)

#그리드 서치 실행
from sklearn.model_selection import GridSearchCV

params = {'solver':['sgd', 'lbfgs', 'adam'],
          'alpha':[0.0001, 0.001, 0.01, 0.1, 1],
          'activation':['tanh', 'relu', 'logistic']}

grid_mlp = GridSearchCV(
    clf_mlp, param_grid = params, scoring = 'accuracy', cv=5, n_jobs = -1
)
grid_mlp.fit(X_train, y_train)

print("GridSearchCV max accuracy:{:.5f}".format(grid_mlp.best_score_))
print("GridSearchCV best parameter:", (grid_mlp.best_params_))


GridSearchCV max accuracy:0.78505
GridSearchCV best parameter: {'activation': 'relu', 'alpha': 0.0001, 'solver': 'sgd'}


In [9]:
best_clf = grid_mlp.best_estimator_
pred = best_clf.predict(X_test)
print("Accuracy on test set:{:.5f}".format(accuracy_score(y_test, pred)))

Accuracy on test set:0.75648


In [11]:
#신경망 기본 모델
clf_mlp = MLPClassifier(max_iter = 2000, random_state=0)

#그리드 서치 실행
from sklearn.model_selection import GridSearchCV

params = {'solver':['sgd', 'lbfgs', 'adam'],
          'alpha':[0.0001, 0.01, 1],
          'activation':['tanh', 'relu', 'logistic'],
          'hidden_layer_sizes':[(100,), (100, 100)]}

grid_mlp = GridSearchCV(
    clf_mlp, param_grid = params, scoring = 'accuracy', cv=5, n_jobs = -1
)
grid_mlp.fit(X_train, y_train)

print("GridSearchCV max accuracy:{:.5f}".format(grid_mlp.best_score_))
print("GridSearchCV best parameter:", (grid_mlp.best_params_))

GridSearchCV max accuracy:0.78505
GridSearchCV best parameter: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'solver': 'sgd'}


In [12]:
#kNN 모델
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

clf_knn = KNeighborsClassifier(n_neighbors=3) # random_state 파라미터가 없음
clf_knn.fit(X_train, y_train)

#학습된 Classifier로 테스트 데이터세트를 이용해서 타깃 변수 예측값 생성

pred = clf_knn.predict(X_test)
accuracy = accuracy_score(y_test, pred)

print("Training set score: {:.5f}".format(clf_knn.score(X_train, y_train)))
print("Test set score:{:.5f}".format(accuracy_score(y_test, pred)))

Training set score: 0.84456
Test set score:0.74352


In [13]:
clf_knn = KNeighborsClassifier(n_neighbors=3)

#그리드 서치 실행
from sklearn.model_selection import GridSearchCV
params = {'n_neighbors': range(3, 31)}

grid_knn = GridSearchCV(
    clf_knn, param_grid=params, scoring = 'accuracy', cv=3, n_jobs = -1
)
grid_knn.fit(X_train, y_train)

print("GridSearchCV max accuracy:{:.5f}".format(grid_knn.best_score_))
print("GridSearchCV best parameter:", (grid_knn.best_params_))

GridSearchCV max accuracy:0.77204
GridSearchCV best parameter: {'n_neighbors': 17}


In [14]:
best_clf = grid_knn.best_estimator_
pred = best_clf.predict(X_test)
print("Accuracy on test set:{:.5f}".format(accuracy_score(y_test, pred)))

Accuracy on test set:0.77720
