In [34]:
import itertools
import random

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            rule_parts.append(f"{var} > {variables[var]['value']}")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset

for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        strategy_variables = {var: {'value': 0} for var in combo}
        fraud_count = 0
        rules_with_fraud_counts = []
        filtered_dataset = [data for data in dataset if data['score'] > 500]
        for data in filtered_dataset:
            for var in combo:
                strategy_variables[var]['value'] = data[var]
            rule = create_strategy_rule(strategy_variables)
            if data['fraud'] == 1:
                fraud_count += 1

            rules_with_fraud_counts.append((rule, fraud_count))

# Sort the rules based on the fraud counts in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts
for rule, fraud_count in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {fraud_count}/{num_fraud_cases}")
    print("% fraud captured", (fraud_count / num_fraud_cases) * 100)
    print()


Rule: amount > 191 and last_amount_60day > 52 and address_change60day > 1 and score > 500
Total frauds captured: 1020/2000
% fraud captured 51.0

Rule: amount > 358 and last_amount_60day > 81 and address_change60day > 1 and score > 500
Total frauds captured: 1020/2000
% fraud captured 51.0

Rule: amount > 421 and last_amount_60day > 126 and address_change60day > 1 and score > 500
Total frauds captured: 1020/2000
% fraud captured 51.0

Rule: amount > 449 and last_amount_60day > 11 and address_change60day > 0 and score > 500
Total frauds captured: 1020/2000
% fraud captured 51.0

Rule: amount > 17 and last_amount_60day > 170 and address_change60day > 3 and score > 500
Total frauds captured: 1020/2000
% fraud captured 51.0

Rule: amount > 297 and last_amount_60day > 38 and address_change60day > 0 and score > 500
Total frauds captured: 1020/2000
% fraud captured 51.0

Rule: amount > 82 and last_amount_60day > 20 and address_change60day > 3 and score > 500
Total frauds captured: 1020/2000
%

In [2]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            rule_parts.append(f"{var} > {variables[var]['value']}")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        strategy_variables = {var: {'value': 0} for var in combo}
        fraud_count = df[df['score'] > 500]['fraud'].sum()
        rules_with_fraud_counts.append((create_strategy_rule(strategy_variables), fraud_count))

# Sort the rules based on the fraud counts in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts
for rule, fraud_count in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {fraud_count}/{num_fraud_cases}")
    print("% fraud captured", (fraud_count / num_fraud_cases) * 100)
    print()


Rule: amount > 0 and last_amount_60day > 0 and address_change60day > 0 and score > 500
Total frauds captured: 984/2000
% fraud captured 49.2



In [4]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.02  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            rule_parts.append(f"{var} > {variables[var]['value']}")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        strategy_variables = {var: {'value': 0} for var in combo}
        filtered_dataset = df[df['score'] > 500]
        for var in combo:
            filtered_dataset = filtered_dataset[filtered_dataset[var] > 0]
            strategy_variables[var]['value'] = filtered_dataset[var].max()
        fraud_count = filtered_dataset['fraud'].sum()
        fraud_percentage = (fraud_count / num_fraud_cases) * 100
        rules_with_fraud_counts.append((create_strategy_rule(strategy_variables), fraud_percentage))

# Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount > 500 and last_amount_60day > 200 and address_change60day > 3 and score > 500
Total frauds captured: 1572/4000
% fraud captured 39.300000000000004



In [5]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            rule_parts.append(f"{var} > {variables[var]['value']}")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        strategy_variables = {var: {'value': 0} for var in combo}
        filtered_dataset = df[df['score'] > 500]
        for var in combo:
            filtered_dataset = filtered_dataset[filtered_dataset[var] > 0]
            strategy_variables[var]['value'] = filtered_dataset[var].max()
        fraud_count = filtered_dataset['fraud'].sum()
        fraud_percentage = (fraud_count / num_fraud_cases) * 100
        rules_with_fraud_counts.append((create_strategy_rule(strategy_variables), fraud_percentage))

# Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount > 500 and last_amount_60day > 200 and address_change60day > 3 and score > 500
Total frauds captured: 766/2000
% fraud captured 38.3



In [8]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            if variables[var]['value'] is not None:
                rule_parts.append(f"{var} > {variables[var]['value']}")
            else:
                rule_parts.append(f"{var} is not None")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        strategy_variables = {var: {'value': None} for var in combo}
        fraud_count = 0
        for idx, data in df.iterrows():
            for var in combo:
                strategy_variables[var]['value'] = data[var]
            rule = create_strategy_rule(strategy_variables)
            if data['fraud'] == 1:
                fraud_count += 1
        fraud_percentage = (fraud_count / num_fraud_cases) * 100
        rules_with_fraud_counts.append((rule, fraud_percentage))

# Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount > 407 and last_amount_60day > 45 and address_change60day > 1 and score > 500
Total frauds captured: 2000/2000
% fraud captured 100.0



In [9]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            if variables[var]['value'] is not None:
                rule_parts.append(f"{var} > {variables[var]['value']}")
            else:
                rule_parts.append(f"{var} is not None")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        fraud_count = 0
        for idx, data in df.iterrows():
            rule_satisfied = all(data[var] > 0 for var in combo)
            if rule_satisfied and data['fraud'] == 1:
                fraud_count += 1
        fraud_percentage = (fraud_count / num_fraud_cases) * 100
        rule = create_strategy_rule({var: {'value': None} for var in combo})
        rules_with_fraud_counts.append((rule, fraud_percentage))

# Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount is not None and last_amount_60day is not None and address_change60day is not None and score > 500
Total frauds captured: 1524/2000
% fraud captured 76.2



In [10]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            if variables[var]['value'] is not None:
                rule_parts.append(f"{var} > {variables[var]['value']}")
            else:
                rule_parts.append(f"{var} is not None")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        fraud_count = 0
        filtered_dataset = df[df['score'] > 500]
        for idx, data in filtered_dataset.iterrows():
            strategy_variables = {var: {'value': data[var]} for var in combo}
            rule = create_strategy_rule(strategy_variables)
            if data['fraud'] == 1:
                fraud_count += 1
        fraud_percentage = (fraud_count / num_fraud_cases) * 100
        rules_with_fraud_counts.append((rule, fraud_percentage))

# Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount > 138 and last_amount_60day > 151 and address_change60day > 0 and score > 500
Total frauds captured: 1011/2000
% fraud captured 50.55



In [37]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            if variables[var]['value'] is not None:
                rule_parts.append(f"{var} > {variables[var]['value']}")
            else:
                rule_parts.append(f"{var} is not None")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
#rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        fraud_count = 0
        rules_with_fraud_counts = []
        for idx, data in df.iterrows():
            strategy_variables = {var: {'value': data[var]} for var in combo}
            rule = create_strategy_rule(strategy_variables)
            #print("rule", rule)
            if data['fraud'] == 1:
                fraud_count += 1
            rule_fraud_captured = (str(rule) + "|" + str(fraud_count))
            #print("rule_fraud_captured", rule_fraud_captured)
            fraud_percentage = (fraud_count / num_fraud_cases) * 100
            rules_with_fraud_counts.append((rule, fraud_percentage))
#print("rules_with_fraud_counts",rules_with_fraud_counts)


#Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount > 364 and last_amount_60day > 136 and address_change60day > 0 and score > 500
Total frauds captured: 2000/2000
% fraud captured 100.0

Rule: amount > 435 and last_amount_60day > 89 and address_change60day > 0 and score > 500
Total frauds captured: 2000/2000
% fraud captured 100.0

Rule: amount > 309 and last_amount_60day > 66 and address_change60day > 2 and score > 500
Total frauds captured: 2000/2000
% fraud captured 100.0

Rule: amount > 427 and last_amount_60day > 15 and address_change60day > 1 and score > 500
Total frauds captured: 2000/2000
% fraud captured 100.0

Rule: amount > 152 and last_amount_60day > 156 and address_change60day > 0 and score > 500
Total frauds captured: 1999/2000
% fraud captured 99.95

Rule: amount > 202 and last_amount_60day > 144 and address_change60day > 1 and score > 500
Total frauds captured: 1999/2000
% fraud captured 99.95

Rule: amount > 240 and last_amount_60day > 145 and address_change60day > 0 and score > 500
Total frauds captured: 1

In [35]:
import itertools
import random
import pandas as pd

dataset_size = 200000  # Total number of observations in the dataset
fraud_rate = 0.01  # Desired fraud rate (2% of the dataset)

# Calculate the number of fraud cases based on the fraud rate
num_fraud_cases = int(dataset_size * fraud_rate)

# Generate a dataset of observations with random variable values and fraud cases
dataset = []
for _ in range(dataset_size):
    data = {
        'amount': random.randint(1, 500),
        'last_amount_60day': random.randint(0, 200),
        'address_change60day': random.randint(0, 3),
        'score': random.randint(0, 999),
        'fraud': 0
    }
    dataset.append(data)

# Set the fraud cases in the dataset
fraud_indices = random.sample(range(dataset_size), num_fraud_cases)
for idx in fraud_indices:
    dataset[idx]['fraud'] = 1

# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dataset)

def create_strategy_rule(variables):
    rule_parts = []
    for var in variables:
        if var == 'score':
            rule_parts.append(f"{var} > 500")
        else:
            if variables[var]['value'] is not None:
                rule_parts.append(f"{var} > {variables[var]['value']}")
            else:
                rule_parts.append(f"{var} is not None")
    rule = " and ".join(rule_parts)
    return rule

# Define the list of variables to create rules on
variables = ['amount', 'last_amount_60day', 'address_change60day', 'score']

# Generate combinations of variables
combinations = []
for r in range(1, len(variables) + 1):
    combinations.extend(itertools.combinations(variables, r))

# Apply the strategy rules to the dataset
rules_with_fraud_counts = []
for combo in combinations:
    if set(variables).issubset(set(combo)):  # Check if all variables are present in the combination
        fraud_count = 0
        for _, data in df.iterrows():
            strategy_variables = {var: {'value': data[var]} for var in combo}
            rule = create_strategy_rule(strategy_variables)
            if data['fraud'] == 1:
                fraud_count += 1
        fraud_percentage = (fraud_count / num_fraud_cases) * 100
        rules_with_fraud_counts.append((rule, fraud_percentage))

# Sort the rules based on the fraud counts percentage in descending order
sorted_rules = sorted(rules_with_fraud_counts, key=lambda x: x[1], reverse=True)

# Retrieve the top 10 rules
top_10_rules = sorted_rules[:10]

# Display the top 10 rules and their fraud counts percentage
for rule, fraud_percentage in top_10_rules:
    print(f"Rule: {rule}")
    print(f"Total frauds captured: {int(fraud_percentage * num_fraud_cases / 100)}/{num_fraud_cases}")
    print("% fraud captured", fraud_percentage)
    print()


Rule: amount > 484 and last_amount_60day > 154 and address_change60day > 2 and score > 500
Total frauds captured: 2000/2000
% fraud captured 100.0

