## 9. BaseLine Model Logistic

## CONTEXT 
before we deploying complex gradient boosting models, we need to establish a simple baseline with logistic regression. this provides a reference point for measuring imoporvement and help identify which features matter in a linear framework

## objective

- train a logistic regression model with proper preprocessing
- evaluate baseline performance using ROC-AUC
- Identify top features by coefficient magnitude
- establish the performance floor for comparison

In [None]:
# import and load data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import time
from datetime import datetime

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.metrics import (
    roc_auc_score, roc_curve, auc,
    precision_recall_curve, average_precision_score,
    confusion_matrix, classification_report
)

warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

data_path = Path('../data/processed/train_features.parquet')
if not data_path.exists():
    data_path = Path('../data/interim/train_merged.parquet')
    
train = pd.read_parquet(data_path)
print(f'Data loaded: {train.shape}')

Data loaded: (590540, 434)


In [None]:
# data preparation

exclude_cols = ['TransactionID',
                'isFraud',
                'TransactionDT']

# select numeric cols for logistic regression
numeric_cols = train.select_dtypes(include=['int64',
                                            'float64',
                                            'int32',
                                            'float32']
                                   ).columns.tolist()

feature_cols = [c for c in numeric_cols if c not in exclude_cols]

X = train[feature_cols]
y = train['isFraud']