In [None]:
import pandas as pd
import pytest
import os, sys

# Add project path
sys.path.append(os.path.abspath(os.path.join('..')))

# Import your FraudDetectionPipeline class
from scripts._02_Feature_Engineering import FraudDetectionPipeline

# Use the previously defined mock_df and setup_pipeline fixture
mock_data = {
    'TransactionId': [1, 2, 3, 4, 5],
    'CustomerId': [101, 102, 101, 103, 102],
    'AccountId': [201, 202, 201, 203, 304], 
    'SubscriptionId': [301, 302, 301, 303, 304], 
    'ProductId': ['ProdA', 'ProdB', 'ProdA', 'ProdC', 'ProdD'], 
    'ProductCategory': ['Cat1', 'Cat2', 'Cat1', 'Cat3', 'Cat4'], 
    'ChannelId': ['Web', 'Mobile', 'Web', 'Web', 'Mobile'],
    'Amount': [100.0, 50.0, 120.0, 200.0, 60.0],
    'Value': [100.0, 50.0, 120.0, 200.0, 60.0],
    'CurrencyCode': ['AMZ']*5, 
    'CountryCode': ['ZYZ']*5, 
    'ResultCode': [0, 1, 0, 0, 1],
    'BatchId': [1001, 1002, 1001, 1003, 1004], 
    'TransactionStartTime': pd.date_range(['2023-01-01 10:00:00', '2023-01-01 11:00:00', 
                                        '2023-01-02 10:30:00', '2023-01-02 12:00:00', 
                                        '2023-01-03 09:00:00']),
    'ProviderId': ['ProvX', 'ProvY', 'ProvX', 'ProvZ', 'ProvA'], 
    'PricingStrategy': [1, 2, 1, 3, 4], 
    'FraudResult': [0, 1, 0, 0, 1] 
}

mock_df = pd.DataFrame(mock_data)
mock_df['TransactionStartTime'] = pd.to_datetime(mock_df['TransactionStartTime'])

@pytest.fixture(scope="module")
def setup_pipeline(tmp_path_factory):
    """Fixture to create a temporary CSV file and a pipeline instance."""
    temp_dir = tmp_path_factory.mktemp("data")
    csv_path = temp_dir / "mock_data.csv"
    mock_df.to_csv(csv_path, index=False)

    pipeline = FraudDetectionPipeline(df_path=str(csv_path), 
                                    plot_path=str(tmp_path_factory.mktemp("plots")), 
                                    mdl_dir=str(tmp_path_factory.mktemp("models")), 
                                    df_dir=str(tmp_path_factory.mktemp("processed_data")))
    yield pipeline
    # Cleanup is handled automatically by tmp_path_factory

def test_woe_transformation(setup_pipeline):
    """Tests the compute_monotonic_breaks, compute_categorical_breaks, and apply_woe_transformation methods."""
    pipeline = setup_pipeline
    pipeline.load_and_split_data()

    # Test compute_monotonic_breaks
    pipeline.compute_monotonic_breaks()
    assert isinstance(pipeline.breaks, dict)
    numeric_vars_after_load = [col for col in pipeline.numeric_vars if col in pipeline.train.columns]

    for var in numeric_vars_after_load:
        assert var in pipeline.breaks
        assert isinstance(pipeline.breaks[var], list)

    # Test compute_categorical_breaks
    pipeline.compute_categorical_breaks()
    assert isinstance(pipeline.breaks, dict)
    for var in pipeline.categorical_vars:
        assert var in pipeline.breaks
        assert isinstance(pipeline.breaks[var], dict) 

    # Test apply_woe_transformation
    pipeline.apply_woe_transformation()
    assert pipeline.bins_adj is not None
    assert isinstance(pipeline.train_woe, pd.DataFrame)
    assert isinstance(pipeline.test_woe, pd.DataFrame)

    # Check WOE column names
    woe_cols_train = [col for col in pipeline.train_woe.columns if col.endswith('_woe')]
    woe_cols_test = [col for col in pipeline.test_woe.columns if col.endswith('_woe')]

    # Check if all relevant columns have a WOE counterpart
    all_transformed_vars = numeric_vars_after_load + [col for col in pipeline.categorical_vars if col in pipeline.train.columns]
    # The number of WOE columns should match the number of original variables used for binning
    assert len(woe_cols_train) == len(pipeline.breaks)
    assert len(woe_cols_test) == len(pipeline.breaks)


    # Check number of rows
    assert len(pipeline.train_woe) == len(pipeline.train)
    assert len(pipeline.test_woe) == len(pipeline.test)

    # Check data types of WOE columns
    for col in woe_cols_train:
        assert pd.api.types.is_float_dtype(pipeline.train_woe[col]) or pd.api.types.is_integer_dtype(pipeline.train_woe[col])
    for col in woe_cols_test:
        assert pd.api.types.is_float_dtype(pipeline.test_woe[col]) or pd.api.types.is_integer_dtype(pipeline.test_woe[col])