In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# 데이터 불러오기
filename = '/content/drive/MyDrive/4월 프젝/data(no5000).csv'
data = pd.read_csv(filename, encoding='CP949')

# 특성(feature) 선택
features = ['습도(%)', '풍속(m/s)', '기온(°C)']
X = data[features]
y = data['발생유형']

# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 그리드 서치를 이용한 하이퍼파라미터 튜닝
param_grid = {
        'n_estimators': [20, 40, 80],
    'learning_rate': [0.05, 0.1, 0.2]
}

gb = GradientBoostingClassifier(random_state=50)

grid_search = GridSearchCV(gb, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("최적의 하이퍼파라미터: {}".format(best_params))

# 최적의 하이퍼파라미터로 모델 생성
gb = GradientBoostingClassifier(n_estimators=best_params['n_estimators'],
                                learning_rate=best_params['learning_rate'],
                                random_state=50)
gb.fit(X_train, y_train)

# 정확도 출력
train_score = gb.score(X_train, y_train)
test_score = gb.score(X_test, y_test)
print("Train 정확도: {:.4f}".format(train_score))
print("Test 정확도: {:.4f}".format(test_score))
