In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pytest
from rule_filtering.rule_filters import FilterRules, GreedyFilter, FilterCorrelatedRules
from rule_optimisation.optimisation_functions import FScore, AlertsPerDay
from correlation_reduction.similarity_functions import CosineSimilarity, JaccardSimilarity
from correlation_reduction.correlation_reduction_methods import AgglomerativeClusteringFeatureReduction
import argo_utils.argo_utils as argo_utils
import numpy as np
import pandas as pd
import random
from itertools import product


def create_data():
    def return_random_num(y, fraud_min, fraud_max, nonfraud_min, nonfraud_max, rand_func):
        data = [rand_func(fraud_min, fraud_max) if i == 1 else rand_func(
            nonfraud_min, nonfraud_max) for i in y]
        return data

    random.seed(0)
    np.random.seed(0)
    y = pd.Series(data=[0]*980 + [1]*20, index=list(range(0, 1000)))
    X_rules = pd.DataFrame(data={
        "Rule1": [0]*980 + [1]*6 + [0] * 14,
        "Rule2": [0]*987 + [1]*6 + [0] * 7,
        "Rule3": [0]*993 + [1]*6 + [0] * 1,
        "Rule4": [round(max(i, 0)) for i in return_random_num(y, 0.4, 1, 0.5, 0.6, np.random.uniform)],
        "Rule5": [round(max(i, 0)) for i in return_random_num(y, 0.2, 1, 0, 0.6, np.random.uniform)],
    },
        index=list(range(0, 1000))
    )
    weights = y.apply(lambda x: 10 if x == 1 else 1)
    return X_rules, y, weights

In [3]:
apd = AlertsPerDay(n_alerts_expected_per_day=10, no_of_days_in_file=10)

In [11]:
X_rules, y, weights = create_data()

In [15]:
filters={
    'OptMetric': {
            'Operator': '>=',
            'Value': -100
        }
}

In [16]:
fr = FilterRules(filters=filters, opt_func=apd.fit)

In [17]:
fr.fit(X_rules=X_rules)

In [18]:
fr.rules_to_keep

['Rule1', 'Rule2', 'Rule3', 'Rule5']

In [22]:
fr.rule_descriptions.index

Index(['Rule1', 'Rule2', 'Rule3', 'Rule4', 'Rule5'], dtype='object')