## Import libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Load data

In [2]:
file_path = "iris.csv" #상대경로로 파일 위치 지정
df = pd.read_csv(file_path)

## Locate y and split data into two; train data and test data

In [3]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1] # 마지막 열이 y가 됨
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True) # 훈련 데이터:테스트 데이터 = 8:2

## Standardize data for SVM and LR

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Describe models

In [5]:
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(),
    "Logistic Regression": LogisticRegression(max_iter=200)
}

## Decision Tree Model
Data is seperated by if-else type questions.

In [6]:
# DT 모델 학습
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# 예측 및 평가
y_pred = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cross_val_acc = cross_val_score(dt_model, X, y, cv=5).mean() # 교차 검증
print(f"Decision Tree Accuracy: {accuracy:.4f}")
print(f"Cross-Validation Accuracy: {cross_val_acc:.4f}")

Decision Tree Accuracy: 1.0000
Cross-Validation Accuracy: 0.9533


## Random Forest
Random Forest builds multiple decision trees, using random data in the dataset.

In [7]:
# RF 모델 학습
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 예측 및 평가
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cross_val_acc = cross_val_score(rf_model, X, y, cv=5).mean()
print(f"Random Forest Accuracy: {accuracy:.4f}")
print(f"Cross-Validation Accuracy: {cross_val_acc:.4f}")

Random Forest Accuracy: 1.0000
Cross-Validation Accuracy: 0.9667


## SVM
Support Vector Machine seperates data by maximizing the margin between them.

In [8]:
# SVM 모델 학습
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)

# 예측 및 평가
y_pred = svm_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
cross_val_acc = cross_val_score(svm_model, X, y, cv=5).mean()
print(f"Random Forest Accuracy: {accuracy:.4f}")
print(f"Cross-Validation Accuracy: {cross_val_acc:.4f}")

Random Forest Accuracy: 1.0000
Cross-Validation Accuracy: 0.9667


## LR
Logistic Regression locates best-fit linear graph and predicts the next output

In [9]:
# LR 모델 학습
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train_scaled, y_train)

# 예측 및 평가
y_pred = svm_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
cross_val_acc = cross_val_score(lr_model, X, y, cv=5).mean()
print(f"Random Forest Accuracy: {accuracy:.4f}")
print(f"Cross-Validation Accuracy: {cross_val_acc:.4f}")

Random Forest Accuracy: 1.0000
Cross-Validation Accuracy: 0.9733
