In [None]:
import pandas as pd
from src.data_cleaning import DataCleaner
from src.eda import EDA
from src.geo_features import GeoFeatureEngineer
from src.feature_engineering import FeatureEngineer

# Load data
df = pd.read_csv("../data/raw/Fraud_Data.csv")
df.head()


In [None]:
# Cleaning
cleaner = DataCleaner(df)
df = cleaner.remove_duplicates()
df = cleaner.handle_missing_values()
df = cleaner.fix_data_types(["signup_time", "purchase_time"])
df.info()

In [None]:
# EDA
EDA.univariate_analysis(df, "purchase_value")
EDA.bivariate_analysis(df, "purchase_value", "class")
EDA.class_distribution(df, "class")

In [None]:
# Geolocation
ip_df = pd.read_csv("../data/raw/IpAddress_to_Country.csv")
geo = GeoFeatureEngineer(df, ip_df)
geo.convert_ip_to_int()
df = geo.merge_country()



In [None]:
# Feature Engineering
fe = FeatureEngineer(df)
df = fe.add_time_features()
df = fe.transaction_velocity()

df.head()