In [2]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from mlflow.models import infer_signature

# 1. 데이터 불러오기
df = pd.read_csv("../data/creditcard.csv")

In [7]:
# 2. 데이터 전처리
X = df.drop(columns="Class")
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# 데이터셋 스케일링
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# 3. MLflow 실험 이름 설정
mlflow.set_experiment("creditcard_experiment")

2025/10/01 13:56:44 INFO mlflow.tracking.fluent: Experiment with name 'creditcard_experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///C:/Users/SSAFY/Desktop/TIL/TIL/10-DataScience_Adv/6_c_mlflow/mlruns/2', creation_time=1759294604247, experiment_id='2', last_update_time=1759294604247, lifecycle_stage='active', name='creditcard_experiment', tags={}>

In [9]:
# 4. 첫 번째 모델: Logistic Regression
with mlflow.start_run(run_name="LogisticRegression"):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    # Metric 계산
    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)


    # MLflow 로깅
    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)

    signature = infer_signature(X_train, preds) # 입출력 형태 추론

    # 모델 저장
    mlflow.sklearn.log_model(model, "model", signature=signature)



🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/2/runs/1a14fad4cfb04b8f9595da2d2f7bc6e5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2


In [10]:
# 5. 두 번째 모델: Random Forest
with mlflow.start_run(run_name="RandomForest"):
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)


    # Metric 계산
    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = recall_score(y_test, preds)

    mlflow.log_param("model", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)

    signature = infer_signature(X_train, preds)

    # 모델 저장
    mlflow.sklearn.log_model(model, "model", signature=signature)



🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/2/runs/eb1247971b48489f932fc107b4490b7b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/2


In [None]:
## accuracy, precision, recall 모두 RF가 높은 것으로 보아 RF 모델을 사용하는 것이 좋아 보임