In [7]:
import time
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, ShuffleSplit, RandomizedSearchCV

#### LazyFca 

#### Algorithm  1

In [130]:
class LazyFca1():
    def __init__(self, scale_factor = 5):
        self.scale_factor = scale_factor
        
    def fit(self, X, Y):
        self.positive_context = X[Y == 1]
        self.negative_context = X[Y == 0]
        
    def predict(self, X):
        Y_predict = []
        for x in X:
            positive_vote = self.caculate_vote(x, 'positive')
            negative_vote = self.caculate_vote(x, 'negative')
            result = True if positive_vote > negative_vote else False
            Y_predict.append(result)
            
        return Y_predict
    
    def caculate_vote(self, x, context):
        if context == 'positive':
            base_context = self.positive_context
            opposite_context = self.negative_context
        else:
            base_context = self.negative_context
            opposite_context = self.positive_context
            
        final_vote = 0
        for g in base_context:
            x_dash = np.where(x == 1)[0]
            g_dash = np.where(g == 1)[0]
            intersection = list(set(x_dash) & set(g_dash))
            if intersection:
                base_vote = np.sum(np.all(base_context[:, intersection], axis=1))\
                            /base_context.shape[0]
                opposite_vote = np.sum(np.all(opposite_context[:, intersection], axis=1))\
                            /opposite_context.shape[0]
                if base_vote > opposite_vote * self.scale_factor:
                    final_vote += len(intersection) / len(x)
                    
        final_vote = final_vote / base_context.shape[0]
        return final_vote
    
    def get_params(self, deep = True):
        return {'scale_factor': self.scale_factor}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

#### Algorithm  2

In [133]:
class LazyFca2():
    def __init__(self, scale_factor = 5):
        self.scale_factor = scale_factor
        
    def fit(self, X, Y):
        self.num_attr = X.shape[1]
        self.positive_context = X[Y == 1]
        self.negative_context = X[Y == 0]
        
    def predict(self, X):
        Y_predict = []
        for x in X:
            positive_vote = self.caculate_vote(x, 'positive')
            negative_vote = self.caculate_vote(x, 'negative')
            result = True if positive_vote > negative_vote else False
            Y_predict.append(result)
            
        return Y_predict
    
    def caculate_vote(self, x, context):
        if context == 'positive':
            base_context = self.positive_context
            opposite_context = self.negative_context
        else:
            base_context = self.negative_context
            opposite_context = self.positive_context
            
        final_vote = 0
        for g in base_context:
            ps_intervals = {i:[] for i in range(self.num_attr)}
            for m in range(self.num_attr):
                x_m = x[m]
                g_m = g[m]
                ps_intervals[m].append(min(x_m,g_m)) 
                ps_intervals[m].append(max(x_m,g_m)) 
            base_vote = 0
            opposite_vote = 0
            for g in base_context:
                flag = True
                for m in range(self.num_attr):
                    if not ps_intervals[m][0] <= g[m] <= ps_intervals[m][1]:
                        flag = False
                        break
                if flag:
                    base_vote += 1
            for g in opposite_context:
                flag = True
                for m in range(self.num_attr):
                    if not ps_intervals[m][0] <= g[m] <= ps_intervals[m][1]:
                        flag = False
                        break
                if flag:
                    opposite_vote += 1

            base_vote = base_vote / base_context.shape[0]
            opposite_vote = opposite_vote / opposite_context.shape[0]
            
            if base_vote > opposite_vote * self.scale_factor:
                final_vote += 1
        final_vote = final_vote / base_context.shape[0]
        return final_vote
    
    def get_params(self, deep = True):
        return {'scale_factor': self.scale_factor}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [9]:
# Class evaluation metrics
class Metrics:
    def __init__(self, model_list):
        self.metrics_dict = {model:{} for model in model_list}
        self.metrics_list = ['accuracy','precision','recall','time_ms']
        for k in self.metrics_dict.keys():
            self.metrics_dict[k] = {metric:[] for metric in self.metrics_list}

    def caculate_metrics(self,model_name, y_test, y_predict, det_t):
        self.metrics_dict[model_name]['accuracy'].append(metrics.accuracy_score(y_test, y_predict))
        self.metrics_dict[model_name]['precision'].append(metrics.precision_score(y_test, y_predict))
        self.metrics_dict[model_name]['recall'].append(metrics.recall_score(y_test, y_predict))
        self.metrics_dict[model_name]['time_ms'].append(det_t*1000)
    
    def get_metrics(self):
        for k in self.metrics_dict.keys():
            sub_dict = self.metrics_dict[k]
            for sk in sub_dict.keys():
                sub_dict[sk] = np.mean(sub_dict[sk])
        df = pd.DataFrame(self.metrics_dict)
        metrics_df = pd.DataFrame(df.values.T, index=df.columns, columns=df.index)
        return metrics_df

In [10]:
def scaling(df):
    target_mapping = {'positive':1, 'negative':0}
    feature_mapping = {'x':1, 'o':0, 'b':0}
    for idx in df.columns[:-1]:
        df[idx] = df[idx].map(feature_mapping)
    df[df.columns[-1]] = df[df.columns[-1]].map(target_mapping)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    return X, y

In [11]:
format3 = lambda x:'%.3f' % x

#### Tic-Tac-Toe Endgame Data Set

In [6]:
# Tune prameter by RandomizedSearchCV
param_dist = {
        'scale_factor':np.linspace(0, 10, 100),
        }
df = pd.read_csv("tic-tac-toe.data" , names = list(range(1,10)) + ['target'])
X, y = scaling(df)
lazyfca = LazyFca1()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

grid = RandomizedSearchCV(lazyfca, param_dist ,cv = 5, scoring='accuracy', n_iter=10, n_jobs = -1)
grid.fit(X_train,y_train)
grid.best_params_, grid.best_score_


({'scale_factor': 8.787878787878787}, 0.9817417876241405)

In [7]:
# load dataset
df = pd.read_csv("tic-tac-toe.data" , names = list(range(1,10)) + ['target'])
X, y = scaling(df)
#Initial models

'''
When parameter scale-factor is around 8.89, we can get a good result 
(a = 0.996, p=0.994, r = 1.00), Setting it to 1000 is to elimate the influence of outliners 
''' 
lazyfca = LazyFca1(1000)   
cart = DecisionTreeClassifier(criterion='entropy')
lr = LogisticRegression()
svc = SVC(kernel='rbf', probability=True)
    
model_list = ['LazyFca', 'CART', 'LR', 'SVC']
my_metrics = Metrics(model_list)
 
# k-fold(k=10)
kfold = ShuffleSplit(n_splits = 10)
for train_index, test_index in kfold.split(X, y):
    
    X_train, y_train= X[train_index], y[train_index]
    X_test, y_test = X[test_index], y[test_index]
    
    # LazyFca
    t = time.time()
    lazyfca.fit(X_train, y_train)
    y_predict = lazyfca.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('LazyFca',y_test, y_predict, det_t)
    
    # CART
    t = time.time()
    cart.fit(X_train, y_train)
    y_predict = cart.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('CART',y_test, y_predict, det_t)

    # LR
    t = time.time()
    lr.fit(X_train, y_train)
    y_predict = lr.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('LR',y_test, y_predict, det_t)

    # SVC
    t = time.time()
    svc.fit(X_train, y_train)
    y_predict = svc.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('SVC',y_test, y_predict, det_t)
    

my_metrics.get_metrics().applymap(format3)

Unnamed: 0,accuracy,precision,recall,time_ms
LazyFca,1.0,1.0,1.0,4028.883
CART,0.981,0.988,0.985,1.562
LR,0.759,0.799,0.87,7.807
SVC,1.0,1.0,1.0,68.729


#### MONK's Problems Data Set

In [20]:

# Tune prameter by RandomizedSearchCV
param_dist = {
        'scale_factor':np.linspace(0, 10, 1000),
        }
df = pd.read_csv('monks/monks-1.train' , sep = ' ', header=None)
df = df.iloc[:, 1:-1]
df = df.astype('str')
Y = df.iloc[:,0].astype('int').values
X = df.iloc[:,1:]
X = pd.get_dummies(X).values
lazyfca = LazyFca1()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0) 

grid = RandomizedSearchCV(lazyfca, param_dist ,cv = 5, scoring='accuracy', n_iter=100, n_jobs = -1)
grid.fit(X_train,y_train)
grid.best_params_, grid.best_score_

({'scale_factor': 7.977977977977978}, 0.858421052631579)

In [131]:

# initial models
lazyfca = LazyFca1(8.46)
cart = DecisionTreeClassifier(criterion='entropy')
lr = LogisticRegression(penalty='l2')
svc = SVC(kernel='rbf', probability=True)
    
model_list = ['LazyFca', 'CART', 'LR', 'SVC']
my_metrics = Metrics(model_list)

for i in range(3):
    
    # load dataset
    df = pd.read_csv('monks/monks-{}.train'.format(i+1) , sep = ' ', header=None)
    df = df.iloc[:, 1:-1]
    df = df.astype('str')
    y_train = df.iloc[:,0].astype('int').values
    X = df.iloc[:,1:]
    X_train = pd.get_dummies(X).values

    df = pd.read_csv('monks/monks-{}.test'.format(i+1) , sep = ' ', header=None)
    df = df.iloc[:, 1:-1]
    df = df.astype('str')
    y_test = df.iloc[:,0].astype('int').values
    X = df.iloc[:,1:]
    X_test = pd.get_dummies(X).values
    
    # LazyFca
    t = time.time()
    lazyfca.fit(X_train, y_train)
    y_predict = lazyfca.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('LazyFca',y_test, y_predict, det_t)
    
    # CART
    t = time.time()
    cart.fit(X_train, y_train)
    y_predict = cart.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('CART',y_test, y_predict, det_t)

    # LR
    t = time.time()
    lr.fit(X_train, y_train)
    y_predict = lr.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('LR',y_test, y_predict, det_t)

    # SVC
    t = time.time()
    svc.fit(X_train, y_train)
    y_predict = svc.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('SVC',y_test, y_predict, det_t)
    
my_metrics.get_metrics().applymap(format3)

Unnamed: 0,accuracy,precision,recall,time_ms
LazyFca,0.865,0.828,0.825,3077.783
CART,0.888,0.85,0.902,0.995
LR,0.766,0.679,0.581,4.977
SVC,0.86,0.846,0.715,7.337


#### Heart disease Data Set

In [137]:
df = pd.read_csv("heart.csv" )
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [138]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [149]:
model_list = ['LazyFca','CART', 'LR', 'SVC']
my_metrics = Metrics(model_list)

lazyfca = LazyFca2(5)
cart = DecisionTreeClassifier(criterion='entropy')
lr = LogisticRegression(max_iter = 1000)
svc = SVC(kernel='rbf', probability=True)
# k-fold(k=10)
kfold = ShuffleSplit(n_splits = 10)
for train_index, test_index in kfold.split(X, y):
    
    X_train, y_train= X[train_index], y[train_index]
    X_test, y_test = X[test_index], y[test_index]
    
    # LazyFca
    t = time.time()
    lazyfca.fit(X_train, y_train)
    y_predict = lazyfca.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('LazyFca',y_test, y_predict, det_t)
    
    # CART
    t = time.time()
    cart.fit(X_train, y_train)
    y_predict = cart.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('CART',y_test, y_predict, det_t)

    # LR
    t = time.time()
    lr.fit(X_train, y_train)
    y_predict = lr.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('LR',y_test, y_predict, det_t)

    # SVC
    t = time.time()
    svc.fit(X_train, y_train)
    y_predict = svc.predict(X_test)
    det_t = time.time() - t
    my_metrics.caculate_metrics('SVC',y_test, y_predict, det_t)
    

my_metrics.get_metrics().applymap(format3)

Unnamed: 0,accuracy,precision,recall,time_ms
LazyFca,0.787,0.775,0.86,3485.077
CART,0.8,0.813,0.837,1.597
LR,0.839,0.807,0.928,170.715
SVC,0.671,0.642,0.88,16.063
