In [None]:
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

card_df = pd.read_csv('creditcard.csv')
card_df.drop('Time', axis=1, inplace=True)

In [None]:
import numpy as np
from pandas import DataFrame, Index

def get_outlier(df: DataFrame = None, column=None, weight: float = 1.5) -> Index:
    fraud = df[df['Class'] == 1][column]
    quantile_25 = np.percentile(fraud.values, 25)
    quantile_75 = np.percentile(fraud.values, 75)

    iqr = quantile_75 - quantile_25
    iqr_weight = iqr * weight

    lowest_val = quantile_25 - iqr_weight
    highest_val = quantile_75 + iqr_weight

    outlier_index = fraud[(fraud < lowest_val) | (fraud > highest_val)].index

    return outlier_index

In [None]:
outlier_index = get_outlier(df=card_df, column='V14', weight=1.5)
outlier_index

In [None]:
card_df.iloc[outlier_index, :]

In [None]:
from commons import Dataset, Model, ModelEvaluator, LogParameter, metrics
from lightgbm import LGBMClassifier

cols = card_df.columns.tolist()
cols.append(None)

for col in card_df.columns:
    if col == 'Class':
        continue

    outlier_index = get_outlier(df=card_df, column=col, weight=1.5)
    df = card_df.copy()
    df.drop(outlier_index, axis=0, inplace=True)
    df = df.reset_index(drop=True)
    dataset = Dataset(df)
    model = Model(LGBMClassifier(n_estimators=1000, num_leaves=64, n_jobs=-1, boost_from_average=False))
    evaluator = ModelEvaluator(model, dataset)
    log_params = LogParameter(tag=col, experiment_name='outlier removal')
    evaluator.cross_val_eval(metrics, log_params=log_params)