<a href="https://colab.research.google.com/github/shunnyK/AI/blob/main/winequality_white.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1) 데이터 불러오기 (3행부터 시작 → skiprows=1, header=None)
# Ensure the file path is correct and the file exists at this location.
path = "/content/drive/MyDrive/winequality-white.csv"
cols = ['fixed acidity','volatile acidity','citric acid','residual sugar',
        'chlorides','free sulfur dioxide','total sulfur dioxide',
        'density','pH','sulphates','alcohol','quality']
df = pd.read_csv(path, sep=';', skiprows=1, header=None, names=cols)

# 라벨 인코딩 (quality 3~9 → 0~6)
le = LabelEncoder()
df['quality'] = le.fit_transform(df['quality'])

# 입력(X), 출력(y) 분리
X = df.drop('quality', axis=1)
y = df['quality']

# 2) 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3) 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# 4) 모델 학습
models = {
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Logistic Regression': LogisticRegression(max_iter=2000, random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}: {accuracy_score(y_test, y_pred):.4f}")

# 5) Logistic Regression Weight & Bias
lr = models['Logistic Regression']
print("\n[Logistic Regression] Weight & Bias")
for i, f in enumerate(X.columns):
    print(f"{f}: {lr.coef_[0][i]:.4f}")
print(f"Bias: {lr.intercept_[0]:.4f}")

KNN: 0.5276
Decision Tree: 0.5929
Random Forest: 0.6776
Logistic Regression: 0.5490

[Logistic Regression] Weight & Bias
fixed acidity: 0.5557
volatile acidity: 0.4595
citric acid: -0.2089
residual sugar: -0.3834
chlorides: 0.1812
free sulfur dioxide: 0.5319
total sulfur dioxide: 0.0343
density: 0.4711
pH: 0.0747
sulphates: -0.3736
alcohol: -0.1530
Bias: -2.2851
