# 5.5 사이킷런 신경망(Neural Network) 분류 모델

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('/content/drive/MyDrive/기말고사 과제/Bike-standard.csv')
df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8751 entries, 0 to 8750
Data columns (total 47 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Temperature      8751 non-null   float64
 1   Humidity         8751 non-null   float64
 2   Wind speed       8751 non-null   float64
 3   Visibility       8751 non-null   float64
 4   Solar Radiation  8751 non-null   float64
 5   Rented Bike B    8751 non-null   float64
 6   Functioning Day  8751 non-null   int64  
 7   Holiday          8751 non-null   int64  
 8   Date_1.0         8751 non-null   int64  
 9   Date_2.0         8751 non-null   int64  
 10  Date_3.0         8751 non-null   int64  
 11  Date_4.0         8751 non-null   int64  
 12  Date_5.0         8751 non-null   int64  
 13  Date_6.0         8751 non-null   int64  
 14  Date_7.0         8751 non-null   int64  
 15  Date_8.0         8751 non-null   int64  
 16  Date_9.0         8751 non-null   int64  
 17  Date_10.0     

In [None]:
data = df.drop(['Rented Bike B'], axis=1)   # 타겟변수를 제외한 변수만 data에 저장
target = df['Rented Bike B']                # 타겟변수만 target에 저장



In [None]:
# 50:50 데이터 분할
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    data, target, test_size=0.5, random_state=42, stratify=target)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (4375, 46)
X_test shape: (4376, 46)


In [None]:
# Neural Network 모델 (Default 모델 with adam solver)
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
clf_mlp = MLPClassifier(max_iter = 2000, random_state = 0)
                       # convergence warinng을 회피하기 위해 max_iter = 2000으로 올려서 설정
clf_mlp.fit(X_train, y_train)
pred = clf_mlp.predict(X_test)  # 학습된 Classifier로 테스트 데이터셋 자료이용해서 타겟변수 예측값 생성
accuracy = accuracy_score(y_test, pred)

print ("Neural Network Training set score:{:.5f}".format(clf_mlp.score(X_train, y_train)))
print ("Neural Network Test set score:{:.5f}".format(accuracy_score(y_test, pred)))

Neural Network Training set score:0.98743
Neural Network Test set score:0.91042


In [None]:
# alpha=0.0001이 default값임
# alpha=0.01 옵션: 이는 중간 정도의 regularization on weights를 의미
# alpha=1 옵션 추가: 이는 stronger regularization on weights를 의미

In [None]:
# 신경망 기본 모델 (with adam solver)
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

clf_mlp = MLPClassifier(max_iter=2000, random_state=0)

# 파라미터 그리드 축소 및 교차 검증 폴드 수 줄이기
params = {
    'solver': ['adam', 'sgd'],  # 줄여서 'adam'과 'sgd'만 사용
    'alpha': [0.0001, 0.01],    # 줄여서 두 값만 사용
    'activation': ['relu', 'tanh']  # 줄여서 두 값만 사용
}

grid_mlp = GridSearchCV(clf_mlp, param_grid=params, scoring='accuracy', cv=5, n_jobs=-1)  # cv 값을 3으로 줄임
grid_mlp.fit(X_train, y_train)

print("GridSearchCV max accuracy: {:.5f}".format(grid_mlp.best_score_))
print("GridSearchCV best parameter:", grid_mlp.best_params_)

GridSearchCV max accuracy: 0.90629
GridSearchCV best parameter: {'activation': 'relu', 'alpha': 0.0001, 'solver': 'sgd'}


In [None]:
best_clf = grid_mlp.best_estimator_
pred = best_clf.predict(X_test)
print("Accuracy on test set:{:.5f}".format(accuracy_score(y_test, pred)))

Accuracy on test set:0.92070


In [None]:
# 신경망 기본 모델 (with adam solver)
clf_mlp = MLPClassifier(max_iter = 2000, random_state = 0)

# 그리드 서치 재실행
from sklearn.model_selection import GridSearchCV
params = {'solver':['adam', 'sgd'],
          'alpha':[0.0001, 0.01],
          'activation':['relu','tanh'],
          'hidden_layer_sizes': [(100,), (100,100)]
         }

grid_mlp = GridSearchCV(clf_mlp, param_grid=params, scoring='accuracy', cv=5, n_jobs=-1)
grid_mlp.fit(X_train, y_train)

print("GridSearchCV max accuracy:{:.5f}".format(grid_mlp.best_score_))
print("GridSearchCV best parameter:", (grid_mlp.best_params_))

GridSearchCV max accuracy:0.91520
GridSearchCV best parameter: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (100, 100), 'solver': 'sgd'}


In [None]:
best_clf = grid_mlp.best_estimator_
pred = best_clf.predict(X_test)
print("Accuracy on test set:{:.5f}".format(accuracy_score(y_test, pred)))

Accuracy on test set:0.91933
