# 교차 검증 실습

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score


In [4]:
fish_df = pd.read_csv('./data/fish.csv')
fish_input = fish_df.drop('Species', axis=1)
fish_target = fish_df['Species']

### 생선 다중 분류 with cross_val_score

In [9]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    fish_input, fish_target, test_size=0.2, random_state=42, stratify=fish_target
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000, random_state=42)

scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='accuracy')

print("교차 검증 점수:", scores)

교차 검증 점수: [0.80769231 0.88461538 0.8        0.8        0.8       ]


### 생선 다중 분류 with GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# GridSearchCV 사용 -> 최적의 파라미터, 평가 점수, 모델 확인
X_train, X_test, y_train, y_test = train_test_split(
    fish_input, fish_target, test_size=0.2, random_state=42, stratify=fish_target
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

param_grid = {
    'C': [0.1, 1, 10, 100],
    'solver': ['lbfgs', 'liblinear', 'newton-cg']
}

logistic_model = LogisticRegression(max_iter=1000, random_state=42)
grid_search = GridSearchCV(logistic_model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train_scaled, y_train)

print("최적의 하이퍼파라미터:", grid_search.best_params_)
print("최고 교차 검증 점수 (정확도):", grid_search.best_score_)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)

print("\n분류 보고서:\n", classification_report(y_test, y_pred))


최적의 하이퍼파라미터: {'C': 100, 'solver': 'liblinear'}
최고 교차 검증 점수 (정확도): 0.9212307692307693

혼동 행렬:
 [[ 7  0  0  0  0  0  0]
 [ 0  2  0  0  0  0  0]
 [ 0  0 11  0  0  0  0]
 [ 0  0  0  4  0  0  0]
 [ 0  0  0  0  4  0  0]
 [ 0  0  0  0  0  3  0]
 [ 0  0  1  0  0  0  0]]

분류 보고서:
               precision    recall  f1-score   support

       Bream       1.00      1.00      1.00         7
      Parkki       1.00      1.00      1.00         2
       Perch       0.92      1.00      0.96        11
        Pike       1.00      1.00      1.00         4
       Roach       1.00      1.00      1.00         4
       Smelt       1.00      1.00      1.00         3
   Whitefish       0.00      0.00      0.00         1

    accuracy                           0.97        32
   macro avg       0.85      0.86      0.85        32
weighted avg       0.94      0.97      0.95        32



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
