# Feature Engineering Pipeline

This notebook runs the full feature engineering pipeline.

In [None]:
import pandas as pd
import sys
import os

sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from data_loader import load_data
from preprocessing import clean_fraud_data, clean_ip_data
from feature_engineering import feature_engineer_fraud_data, encode_categorical

In [None]:
# Load and Process Fraud Data
fraud_df = load_data('../data/raw/Fraud_Data.csv')
ip_df = load_data('../data/raw/IpAddress_to_Country.csv')

if fraud_df is not None and ip_df is not None:
    # Clean
    fraud_df = clean_fraud_data(fraud_df)
    ip_df = clean_ip_data(ip_df)
    
    # Engineer Features
    fraud_df = feature_engineer_fraud_data(fraud_df, ip_df)
    
    # Encode
    # Assuming 'source', 'browser', 'sex' are categorical
    cat_cols = ['source', 'browser', 'sex', 'country']
    fraud_df = encode_categorical(fraud_df, cat_cols)
    
    # Save
    fraud_df.to_csv('../data/processed/Fraud_Data_Processed.csv', index=False)
    print("Saved processed data.")