In [None]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))


from src.data_loader import DataLoader
from src.data_cleaner import DataCleaner
from src.geo_ip_mapper import GeoIPMapper
from src.feature_engineer import FeatureEngineer

loader = DataLoader()
fraud_df = loader.load_fraud_data()
ip_df = loader.load_ip_country_data()

cleaner = DataCleaner()
fraud_df = cleaner.remove_duplicates(fraud_df)
fraud_df = cleaner.handle_missing_values(fraud_df)
fraud_df = cleaner.correct_dtypes(fraud_df)

geo = GeoIPMapper(ip_df)
fraud_df = geo.map_country(fraud_df)

fe = FeatureEngineer()
fraud_df = fe.add_time_features(fraud_df)
fraud_df = fe.transaction_velocity(fraud_df)
fraud_df.to_csv("../data/processed/fraud_processed.csv", index=False)




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["txn_count_24h"].fillna(0, inplace=True)
