In [2]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# 와인 데이터셋 로드
data_wine = load_wine()
df_wine = pd.DataFrame(data_wine.data, columns=data_wine.feature_names)
df_wine['class'] = data_wine.target

# 특성(X)과 타겟(y) 분리
X, y = df_wine.iloc[:, :-1].values, df_wine.iloc[:, -1].values

# 데이터셋을 트레이닝, 검증, 테스트 세트로 분할
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=10, stratify=y)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=10, stratify=y_temp)

# 모델 파이프라인 설정
lr1_10 = make_pipeline(StandardScaler(), LogisticRegression(penalty='l1', C=10.0, solver='liblinear', max_iter=1000))
lr1_1 = make_pipeline(StandardScaler(), LogisticRegression(penalty='l1', C=1.0, solver='liblinear', max_iter=1000))
lr1_0_1 = make_pipeline(StandardScaler(), LogisticRegression(penalty='l1', C=0.1, solver='liblinear', max_iter=1000))

lr2_10 = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2', C=10.0))
lr2_1 = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2', C=1.0))
lr2_0_1 = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2', C=0.1))

# 모델 훈련 및 검증 세트를 사용한 성능 평가
models = [lr1_10, lr1_1, lr1_0_1, lr2_10, lr2_1, lr2_0_1]
model_names = ['L1 C=10.0', 'L1 C=1.0', 'L1 C=0.1', 'L2 C=10.0', 'L2 C=1.0', 'L2 C=0.1']

for model, name in zip(models, model_names):
    model.fit(X_train, y_train)
    train_accuracy = model.score(X_train, y_train)
    valid_accuracy = model.score(X_valid, y_valid)
    print(f'{name} - Training set accuracy: {train_accuracy}, Validation set accuracy: {valid_accuracy}')

# 최종 모델 선택 및 테스트 세트에서의 성능 평가
final_model = lr1_1  # 검증 세트에서 가장 좋은 성능을 보인 모델을 선택
final_model.fit(X_train, y_train)
test_accuracy = final_model.score(X_test, y_test)
print(f'Final model test set accuracy: {test_accuracy}')

L1 C=10.0 - Training set accuracy: 1.0, Validation set accuracy: 0.9722222222222222
L1 C=1.0 - Training set accuracy: 1.0, Validation set accuracy: 1.0
L1 C=0.1 - Training set accuracy: 0.9716981132075472, Validation set accuracy: 1.0
L2 C=10.0 - Training set accuracy: 1.0, Validation set accuracy: 0.9444444444444444
L2 C=1.0 - Training set accuracy: 1.0, Validation set accuracy: 0.9722222222222222
L2 C=0.1 - Training set accuracy: 1.0, Validation set accuracy: 1.0
Final model test set accuracy: 0.9722222222222222


## 결과분석
1. 와인 데이터셋이 상대적으로 작고, 특성이 잘 정의되어 있어 모델이 쉽게 학습할 수 있는 구조일 가능성이 높다. 