# Rule Mining

#### Dependencies

In [None]:
# third-party imports
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier


# local imports
from lib.model import OSDT
from lib.model_selection import train_cross_validate
from lib.data_processing import read_dataset

#### Load Dataset

In [None]:
dataset = read_dataset('data/preprocessed/compas-binary.csv')
(n, m) = dataset.shape
X = dataset.values[:,:-1]
y = dataset.values[:,-1]

# Association Rule Mining (Only one feature)

In [None]:
# support
support_1 = [(dataset.values[:,i] * y).mean() for i in range(m)]
print('support_1', support_1)

support_0 = [((1 - dataset.values[:,i]) * y).mean() for i in range(m)]
print('support_0', support_0)

# confidence
confidence_1 = [sum(dataset.values[:,i] * y) / sum(dataset.values[:,i]) for i in range(m)]
print('confidence_1', confidence_1)

confidence_0 = [sum((dataset.values[:,i]==0) * y) / sum(dataset.values[:,i]==0) for i in range(m)]
print('confidence_0', confidence_0)

idx_mine = [(confidence_1[i]>=0.5) or (confidence_0[i]>=0.5) for i in range(m)]
print('idx_mine', idx_mine)

# Not sure if this is just an experiment?
# x_idx_mine[9]=True
# x_idx_mine

###### Manually selected features (Uncomment the one you want to use)

In [None]:
# Manaually select out 5 features, accoring to CORELS paper when lambda=0.01
# sex:Female, age:18-20,age:21-22, juvenile-crimes:=0, priors:>3
# idx_mine = [0,1,2,8,12]

# sex:Female, age:18-20,age:21-22, priors:2-3, priors:>3
# idx_mine = [0,1,2,9,12]

# Manaually select out 6 features, accoring to CORELS paper when lambda=0.01
# sex:Female, age:18-20,age:21-22, juvenile-crimes:=0, priors:2-3, priors:>3
# idx_mine = [0,1,2,8,9,12]

# Manaually select out 8 features, accoring to CORELS paper when lambda=0.01
# sex:Female, age:18-20,age:21-22, juvenile-crimes:=0, priors:2-3, priors:=0, priors:>3
# idx_mine = [0,1,2,7,8,9,10,12]

# Manaually select out 7 features, accoring to CORELS paper when lambda=0.01
# sex:Female, age:18-20,age:21-22, juvenile-crimes:=0, priors:2-3, priors:=0, priors:>3
# idx_mine = [0,1,2,8,9,10,12]

# OSDT objective using mined rules (Tree trained using CART)

In [None]:
# Resulting selected features
X_mine = dataset.values[:,idx_mine]
print('X_mine', X_mine)

lamb = 0.0035

model = DecisionTreeClassifier(
    max_depth = 5,
    min_samples_split = math.ceil(lamb * 2 * n),
    min_samples_leaf = math.ceil(lamb * n))
model.fit(X_mine, y)
training_accuracy = model.score(X_mine, y)

# Compute what the tree objective would be if evaluated using the OSDT objective definition
leaf_count = (model.tree_.node_count + 1) / 2
risk = 1 - training_accuracy + lamb * leaf_count
risk
print('R_c', risk)

In [None]:
# TODO: There are some experiments going on below that we'll need to refactor

## OSDT without similar support bound

In [None]:
from osdt import bbound
from osdt_penalize_depth import bbound_penalize_depth

In [None]:
bbound(x_compas1, y_compas1, lamb=100/6907, prior_metric="curiosity", init_cart=False, logon=True)

In [21]:
bbound(x_compas1, y_compas1, lamb=100/6907, prior_metric="curiosity", init_cart=False)

In [None]:
bbound(x_compas1, y_compas1, lamb=48/6907, prior_metric="curiosity", init_cart=False)

In [None]:
bbound(x_compas1, y_compas1, lamb=48/6907, prior_metric="curiosity", init_cart=False, MAXDEPTH=3)

In [None]:
bbound(x_compas1, y_compas1, lamb=100/6907, prior_metric="curiosity", init_cart=False, MAXDEPTH=2)

In [None]:
bbound(x_compas1, y_compas1, lamb=0.005, prior_metric="curiosity", init_cart=False, 
       support=False, accu_support=False, incre_support=False)

In [None]:
bbound_penalize_depth(x_compas1, y_compas1, lamb=0.02, prior_metric="curiosity", init_cart=False, 
                    support=False, accu_support=False, incre_support=False)

In [None]:
bbound_penalize_depth(x_compas1, y_compas1, lamb=0.019, prior_metric="curiosity", init_cart=False, 
                      support=False, accu_support=False, incre_support=False)

In [None]:
import cProfile

In [None]:
# 2019
# compas1, all 12 feature
cProfile.run('bbound(x_compas1, y_compas1, lamb=0.005, prior_metric="curiosity")')
#####  

In [None]:
# 2019
# compas1, all 12 feature
cProfile.run('bbound(x_compas1, y_compas1, lamb=0.0025, prior_metric="curiosity")')
#####  

In [None]:
# compas1, all 12 feature
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.005, prior_metric="objective", support=False)
#####  

In [None]:
# compas1, all 12 feature
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.005, prior_metric="objective", accu_support=False)
#####  

In [None]:
# compas1, all 12 feature
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.005, prior_metric="objective", incre_support=False)
#####  

In [None]:
# compas1, all 12 feature
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.005, prior_metric="objective", equiv_points=False)
#####  

In [None]:
# compas1, all 12 feature
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.005, prior_metric="objective", lookahead=False)
#####  

In [None]:
# compas1
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.0025, prior_metric="objective")
#####  

In [None]:
# compas1, 4 features
bbound_nosimilar_multicopies(x_compas1[:,idx4], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 5 features
bbound_nosimilar_multicopies(x_compas1[:,idx5], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 6 features
bbound_nosimilar_multicopies(x_compas1[:,idx6], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 7 features
bbound_nosimilar_multicopies(x_compas1[:,idx7], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 8 features
bbound_nosimilar_multicopies(x_compas1[:,idx8], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 9 features
bbound_nosimilar_multicopies(x_compas1[:,idx9], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 10 features
bbound_nosimilar_multicopies(x_compas1[:,idx10], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1, 11 features
bbound_nosimilar_multicopies(x_compas1[:,idx11], y_compas1, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# compas1
bbound_nosimilar_multicopies(x_compas1, y_compas1, lamb=0.01, prior_metric="objective")
#####  

In [None]:
# monk1
bbound_nosimilar_multicopies(x_monk1, y_monk1, lamb=0.05, prior_metric="objective")
#####  

In [None]:
# monk1
bbound_nosimilar_multicopies(x_monk1, y_monk1, lamb=0.02, prior_metric="objective")
#####  

In [None]:
# monk2
bbound_nosimilar_multicopies(x_monk2, y_monk2, lamb=0.025, prior_metric="objective")
#####  

In [None]:
# monk2
bbound_nosimilar_multicopies(x_monk2, y_monk2, lamb=0.035, prior_metric="objective")
#####  

In [None]:
# monk3
bbound_nosimilar_multicopies(x_monk3, y_monk3, lamb=0.02, prior_metric="objective")
#####  

In [None]:
# monk3
bbound_nosimilar_multicopies(x_monk3, y_monk3, lamb=0.025, prior_metric="objective")
#####  

In [None]:
# monk3
bbound_nosimilar_multicopies(x_monk3, y_monk3, lamb=0.03, prior_metric="objective")
#####  

In [None]:
# voting-records
bbound_nosimilar_multicopies(x_votes, y_votes, lamb=0.005, prior_metric="objective")
#####  

In [None]:
# voting-records
bbound_nosimilar_multicopies(x_votes, y_votes, lamb=0.0035, prior_metric="objective")
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.0035, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.005, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.004, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.0045, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.005, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.00475, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.00465, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  

In [None]:
### 11.3######################## 
# order x's columns according to gini_reduction
bbound_nosimilar_multicopies(x_all, y, lamb=0.05, prior_metric="objective", MAXDEPTH = 13, R_c0=1)
#####  