# Task 3 – Feature Engineering Smoke-Test
This notebook loads a sample transactions CSV, runs the `build_pipeline()` function defined in `src/features/feature_pipeline.py`, and verifies that it returns a model-ready feature matrix without errors.

In [1]:
# --- Task-3 smoke-test --------------------------------------------------------
from pathlib import Path
import sys, pandas as pd, numpy as np

# 1️⃣  ensure project root is on sys.path
PROJECT_ROOT = (Path("..") if Path.cwd().name == "notebooks" else Path.cwd()).resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))
print("Using project root:", PROJECT_ROOT)

# 2️⃣  import pipeline once path is set
import src.features.feature_pipeline as fp
from src.features.feature_pipeline import build_pipeline, TARGET

# If your timestamp column is NOT named 'TransactionDate', set it here
RAW_DATE_COL = "TransDate"          # <–– change to the actual column name
if RAW_DATE_COL != fp.DATETIME_COL:
    fp.DATETIME_COL = RAW_DATE_COL

# 3️⃣  locate sample data  (add more paths if your layout differs)
candidate_files = [
    PROJECT_ROOT / "data" / "processed" / "transactions.csv",
    PROJECT_ROOT / "data" / "transactions.csv",
]
for path in candidate_files:
    if path.exists():
        CSV_PATH = path
        df = pd.read_csv(CSV_PATH)
        print(f"Loaded {CSV_PATH}  →  {df.shape[0]:,} rows × {df.shape[1]} cols")
        break
else:
    # fallback: generate a tiny synthetic frame so the pipeline still runs
    print("⚠️  No CSV found – generating synthetic data for demo")
    _n = 100
    df = pd.DataFrame({
        "CustomerId": np.random.randint(1, 20, _n),
        fp.DATETIME_COL: pd.date_range("2024-01-01", periods=_n, freq="h"),
        "Amount": np.random.uniform(10, 1000, _n),
        "TransactionType": np.random.choice(["PAYMENT", "TRANSFER"], _n),
        "Channel": np.random.choice(["WEB", "POS"], _n),
        TARGET: np.random.randint(0, 2, _n),
    })

# 4️⃣  run pipeline
y = df[TARGET]
X = df.drop(columns=[TARGET])

pipe = build_pipeline()
X_ready = pipe.fit_transform(X, y)

print("✅ Pipeline succeeded → output shape:", X_ready.shape)
# ----------------------------------------------------------------------------- 

Using project root: C:\Users\senta\OneDrive\Documents\Proj\10 Ac\Credit-Risk-Probability-Model
⚠️  No CSV found – generating synthetic data for demo
✅ Pipeline succeeded → output shape: (100, 12)
