In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
project_dir_path = '/Users/keisukeonoue/ws/lukasiewicz_1/'
sys.path.append(project_dir_path)

# lukasiewicz

In [5]:
import os
import shutil

import pandas as pd
import numpy as np
import cvxpy as cp
from sklearn.model_selection import KFold

from src.setup_problem_primal_modular import Setup
from src.objective_function import linear_svm
from src.objective_function import linear_svm_loss
from src.objective_function import logistic_regression_loss
from src.evaluation import EvaluateModel

In [17]:
setting_dict = {
    'seed': 42,
    # 'test_size': 0.2,
    'n_splits': 5,
    'source_path': './../../data/pima_indian_diabetes',
    'source_data_file_name': 'diabetes_discretized.csv',
    'source_rule_file_name': 'rules.txt',
    'input_path': './../../inputs/pima_indian_diabetes_cv',
    'unsupervised_file_name': 'U.csv',
    'unsupervised_shape': (15, 21), # (data_num, data_dim)
    'output_path': './../../outputs/pima_indian_diabetes_2'
}

objectives_dict = {
    'luka_1': {
        'model_name': 'luka linear svm',
        'model': linear_svm,
        'params': {'c1': 10, 'c2': 10},
        'constraints_flag': {
            'pointwise': True,
            'logical': True,
            'consistency': True
        }
    },
    'luka_2': {
        'model_name': 'luka linear svm loss',
        'model': linear_svm_loss,
        'params': {'c1': 10, 'c2': 10},
        'constraints_flag': {
            'pointwise': False,
            'logical': False,
            'consistency': True
        }
    },
    'luka_3': {
        'model_name': 'luka logistic regression loss',
        'model': logistic_regression_loss,
        'params': {'c1': 10, 'c2': 10},
        'constraints_flag': {
            'pointwise': False,
            'logical': False,
            'consistency': True
        }
    },
}

In [12]:
def prepare_data(setting: dict) -> None:
    random_state = setting['seed']
    n_splits = setting['n_splits']

    source_data_path = os.path.join(setting['source_path'], setting['source_data_file_name'])
    input_path = setting['input_path']

    data = pd.read_csv(source_data_path, index_col=0)
    
    data = data.reset_index(drop=True)

    X = data.drop(["Outcome"], axis=1)
    y = data["Outcome"]

    kf = KFold(n_splits=n_splits)
    
    for i, (train_index, test_index) in enumerate(kf.split(X)):

        train_data = data.loc[train_index, :]
        outcome = train_data['Outcome']
        features = train_data.drop(['Outcome'], axis=1)
        feature_names = list(features.columns)

        input_train_path = os.path.join(input_path, f'fold_{i}', 'train')

        if not os.path.exists(input_train_path):
            os.makedirs(input_train_path)

        df = features.copy()
        df['target'] = outcome.replace(0, -1)

        file_name = "L_" + "Outcome" + '.csv'
        file_path = os.path.join(input_train_path, file_name)
        df.to_csv(file_path)

        for feature_name in feature_names:
            df = features.copy()
            df['target'] = df[feature_name].replace(0, -1)
            # display(df)

            file_name = "L_" + feature_name + '.csv'
            file_path = os.path.join(input_train_path, file_name)
            df.to_csv(file_path)

        unsupervised_path = os.path.join(input_train_path, setting['unsupervised_file_name'])
        unsupervised_shape = setting['unsupervised_shape']

        arr_U = np.random.randint(2, size=unsupervised_shape)
        df_U = pd.DataFrame(arr_U)
        df_U.to_csv(unsupervised_path)

        rule_file_name = setting['source_rule_file_name']
        source_rule_path = os.path.join(setting['source_path'], rule_file_name)
        rule_path = os.path.join(input_train_path, rule_file_name)

        shutil.copy(source_rule_path, rule_path)

        test_data = data.loc[test_index, :]

        outcome = test_data['Outcome']
        features = test_data.drop(['Outcome'], axis=1)
        feature_names = list(features.columns)

        input_test_path = os.path.join(input_path, f'fold_{i}', 'test')

        if not os.path.exists(input_test_path):
            os.makedirs(input_test_path)

        
        df = features.copy()
        df['target'] = outcome.replace(0, -1)

        file_name = "L_" + "Outcome" + '.csv'
        file_path = os.path.join(input_test_path, file_name)
        df.to_csv(file_path)

In [15]:
prepare_data(setting_dict)

In [25]:
for i in range(setting_dict['n_splits']):
    data_dir_path = os.path.join(setting_dict['input_path'], f'fold_{i}')
    
    file_list = os.listdir(os.path.join(data_dir_path, "train"))

    L_files = [filename for filename in file_list 
            if filename.startswith('L') and filename.endswith('.csv')]

    U_files = [filename for filename in file_list 
            if filename.startswith('U') and filename.endswith('.csv')]

    file_names_dict = {
        'supervised': L_files,
        'unsupervised': U_files,
        'rule': [setting_dict['source_rule_file_name']]
    }

    for key, obj_setting in objectives_dict.items():

        model_name = obj_setting['model_name']
        obj_constructor = obj_setting['model']
        c1 = obj_setting['params']['c1']
        c2 = obj_setting['params']['c2']
        constraints_flag = obj_setting['constraints_flag']


        print()
        print(f'model name: {model_name}')
        print(f'obj constructor: {obj_constructor}')
        print(f'c1: {c1}')
        print(f'c2: {c2}')
        print()

        problem_instance = Setup(data_dir_path,
                                 file_names_dict,
                                 obj_constructor,
                                 name=model_name)
        
        objective_function, constraints = problem_instance.main(c1=c1, c2=c2,
                                                                constraints_flag_dict=constraints_flag)
        
        problem = cp.Problem(objective_function, constraints)
        result = problem.solve(verbose=True)

        print(result)

        save_file_name = 'result_' + key + '.json'
        save_dir_path = os.path.join(setting_dict['output_path'], f'fold_{i}')
        if not os.path.exists(save_dir_path):
            os.makedirs(save_dir_path)
        save_file_path = os.path.join(save_dir_path, save_file_name)
        note = None

        evaluate_model = EvaluateModel(problem_instance, note=note)
        evaluate_model.evaluate(save_file_path=save_file_path)


model name: luka linear svm
obj constructor: <function timer.<locals>.wrapper at 0x16e03b740>
c1: 10
c2: 10

load_data took 0.06419777870178223 seconds!
load_rules took 0.0004222393035888672 seconds!
linear_svm took 0.9626669883728027 seconds!
pointwise constraints
logical constraints
consistency constraints
__call__ took 5.603661775588989 seconds!
                                     CVXPY                                     
                                     v1.3.2                                    




(CVXPY) Jan 06 02:39:09 PM: Your problem has 12322 variables, 37416 constraints, and 0 parameters.
(CVXPY) Jan 06 02:39:10 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:39:10 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:39:10 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:39:13 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:39:13 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> ECOS
(CVXPY) Jan 06 02:39:13 PM: Applying reduction Dcp2Cone
(CVXPY) Jan 06 02:39:18 PM: Applying reduction CvxAttr2Constr
(CVXPY) Jan 06 02:39:22 PM: A



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:40:28 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:40:30 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:40:30 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:40:30 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:40:32 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:40:32 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi




(CVXPY) Jan 06 02:41:31 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:41:32 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:41:32 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:41:32 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:41:33 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:41:33 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> ECOS
(CVXPY) Jan 06 02:41:33 PM: Applying reduction Dcp2Cone
(CVXPY) Jan 06 02:41:34 PM: Applying reduction CvxAttr2Constr
(CVXPY) Jan 06 02:41:34 PM: Ap

  return np.log(values[0])


nan

model name: luka linear svm
obj constructor: <function timer.<locals>.wrapper at 0x16e03b740>
c1: 10
c2: 10

load_data took 0.05536174774169922 seconds!
load_rules took 0.0003139972686767578 seconds!
linear_svm took 1.0021388530731201 seconds!
pointwise constraints
logical constraints
consistency constraints
__call__ took 8.250746011734009 seconds!




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:42:07 PM: Your problem has 12322 variables, 37416 constraints, and 0 parameters.
(CVXPY) Jan 06 02:42:08 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:42:08 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:42:08 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:42:11 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:42:11 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:43:20 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:43:23 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:43:23 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:43:23 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:43:25 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:43:25 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi



(CVXPY) Jan 06 02:44:26 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:44:26 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:44:26 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:44:27 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:44:27 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> ECOS
(CVXPY) Jan 06 02:44:27 PM: Applying reduction Dcp2Cone
(CVXPY) Jan 06 02:44:29 PM: Applying reduction CvxAttr2Constr
(CVXPY) Jan 06 02:44:30 PM: Applying reduction ConeMatrixStuffing
(CVXPY) Jan 06 02:44:48 PM: Applying reduction ECOS
(CVXPY) Ja

  return np.log(values[0])


nan

model name: luka linear svm
obj constructor: <function timer.<locals>.wrapper at 0x16e03b740>
c1: 10
c2: 10

load_data took 0.03129768371582031 seconds!
load_rules took 0.0002999305725097656 seconds!
linear_svm took 0.9979000091552734 seconds!
pointwise constraints
logical constraints
consistency constraints
__call__ took 7.245998859405518 seconds!




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:45:03 PM: Your problem has 12322 variables, 37416 constraints, and 0 parameters.
(CVXPY) Jan 06 02:45:05 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:45:05 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:45:05 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:45:07 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:45:07 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:46:25 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:46:27 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:46:27 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:46:27 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:46:29 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:46:29 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:47:35 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:47:36 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:47:36 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:47:36 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:47:37 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:47:37 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi

  return np.log(values[0])


nan

model name: luka linear svm
obj constructor: <function timer.<locals>.wrapper at 0x16e03b740>
c1: 10
c2: 10

load_data took 0.03283190727233887 seconds!
load_rules took 0.0004429817199707031 seconds!
linear_svm took 1.0933418273925781 seconds!
pointwise constraints
logical constraints
consistency constraints
__call__ took 14.159472227096558 seconds!




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:48:18 PM: Your problem has 12322 variables, 37416 constraints, and 0 parameters.
(CVXPY) Jan 06 02:48:20 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:48:20 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:48:20 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:48:23 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:48:23 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:49:50 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:49:51 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:49:51 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:49:51 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:49:54 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:49:54 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:51:09 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:51:10 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:51:10 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:51:10 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:51:11 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:51:11 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi

  return np.log(values[0])


nan

model name: luka linear svm
obj constructor: <function timer.<locals>.wrapper at 0x16e03b740>
c1: 10
c2: 10

load_data took 0.06602978706359863 seconds!
load_rules took 0.00038623809814453125 seconds!
linear_svm took 2.798705816268921 seconds!
pointwise constraints
logical constraints
consistency constraints
__call__ took 5.725691080093384 seconds!




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:51:46 PM: Your problem has 12322 variables, 37416 constraints, and 0 parameters.
(CVXPY) Jan 06 02:51:49 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:51:49 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:51:49 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:51:52 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:51:52 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:53:12 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:53:13 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:53:13 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:53:13 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:53:16 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:53:16 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi



                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Jan 06 02:54:17 PM: Your problem has 484 variables, 24244 constraints, and 0 parameters.
(CVXPY) Jan 06 02:54:18 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 06 02:54:18 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 06 02:54:18 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 06 02:54:19 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 06 02:54:19 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffi

  return np.log(values[0])


# benchmark models

In [32]:
import os
import shutil
from typing import List
import json

import pandas as pd
import numpy as np
import cvxpy as cp
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score

from src.misc import is_symbol
from src.setup_problem_primal_modular import Setup




setting_dict = {
    'seed': 42,
    # 'test_size': 0.2,
    'n_splits': 5,
    'source_path': './../../data/pima_indian_diabetes',
    'source_data_file_name': 'diabetes_discretized.csv',
    'source_data_file_name_2': 'diabetes_cleaned.csv',
    'source_rule_file_name': 'rules.txt',
    'input_path': './../../inputs/pima_indian_diabetes',
    'unsupervised_file_name': 'U.csv',
    'unsupervised_shape': (15, 21), # (data_num, data_dim)
    'output_path': './../../outputs/pima_indian_diabetes_2'
}

objectives_dict = {
    '1': {
        'model_name': 'linear svm',
        'model': SVC(kernel='linear', 
                     random_state=setting_dict['seed'],
                     probability=True),
    },
    '2': {
        'model_name': 'non-linear svm (rbf)',
        'model': SVC(kernel='rbf',
                     random_state=setting_dict['seed'],
                     probability=True),
    },
    '3': {
        'model_name': 'logistic regression',
        'model': LogisticRegression(random_state=setting_dict['seed']),
    },
    '4': {
        'model_name': 'random forest',
        'model': RandomForestClassifier(random_state=setting_dict['seed'])
    }
}


class EvaluateModelCV:
    def __init__(self,
                 path_cleaned: str,
                 path_discretized: str,
                 model: object,
                 KB_origin: List[List[str]],
                 random_state: int,
                #  test_size: float,
                 n_splits: int,
                 train_index: np.ndarray,
                 test_index: np.ndarray,
                 name: str = None,
                 note: str = None) -> None:

        self.path_cleaned = path_cleaned
        self.path_discretized = path_discretized
        self.model = model
        self.KB_origin = KB_origin
        self.random_state = random_state 
        self.n_splits = n_splits

        self.train_index = train_index
        self.test_index = test_index

        self.result_dict = {
            'name'     : name,
            'note'     : note,
            'Accuracy' : None,
            'Precision': None,
            'Recall'   : None,
            'F1-score' : None,
            'Auc'      : None,
            'len_U': None,
            'Rules'    : {'violation': 0, 'total': len(self.KB_origin)},
            'Rules_detail': {}
        }

    def calculate_scores(self) -> None:
        # まずは連続データで計算
        data = pd.read_csv(self.path_cleaned, index_col=0)
        data = data.reset_index(drop=True)

        X = data.drop('Outcome', axis=1)
        y = data['Outcome']
        y.replace(0, -1, inplace=True)

        X_train, y_train = X.loc[self.train_index, :], y.loc[self.train_index]
        X_test, y_test = X.loc[self.test_index, :], y.loc[self.test_index]

        self.model.fit(X_train, y_train)
        y_pred = self.model.predict(X_test)

        y_pred_interpreted = np.where(y_pred == 0, -1, y_pred)
        
        y_pred = self.model.predict_proba(X_test)[:, 1]

        # 精度等の一般的な評価指標の計算
        accuracy = accuracy_score(y_test, y_pred_interpreted)
        precision = precision_score(y_test, y_pred_interpreted)
        recall = recall_score(y_test, y_pred_interpreted)
        f1 = f1_score(y_test, y_pred_interpreted)
        roc_auc = roc_auc_score(y_test, y_pred)

        self.result_dict['Accuracy'] = float(accuracy)
        self.result_dict['Precision'] = float(precision)
        self.result_dict['Recall'] = float(recall)
        self.result_dict['F1-score'] = float(f1)
        self.result_dict['Auc'] = float(roc_auc)

        # ルール違反の計算の前に X_test を離散化（discretized のほうを読み込む）
        data = pd.read_csv(self.path_discretized, index_col=0)
        data = data.reset_index(drop=True)

        X = data.drop('Outcome', axis=1)
        y = data['Outcome']
        y.replace(0, -1, inplace=True)
        X_test, y_test = X.loc[self.test_index, :], y.loc[self.test_index]
        
        # ルール違反
        rules_tmp = []
        for rule in self.KB_origin:
            if "Outcome" in rule:
                tmp = {}
                for idx, item in enumerate(rule):
                    if not is_symbol(item):
                        if idx == 0 or rule[idx - 1] != '¬':
                            tmp[item] = 1
                        elif item != "Outcome":
                            tmp[item] = 0
                        else:
                            tmp[item] = -1
                rules_tmp.append(tmp)

        y_pred_interpreted = pd.DataFrame(y_pred_interpreted, index=self.test_index)

        for i, rule in enumerate(rules_tmp):
            outcome = rule["Outcome"]
            condition = " & ".join([f"{column} == {value}" for column, value in rule.items() if column != "Outcome"])

            tmp = y_pred_interpreted.loc[X_test.query(condition).index]

            violation_bool = 1 if int((tmp != outcome).sum().iloc[0]) >= 1 else 0
            self.result_dict['Rules']['violation'] += violation_bool
            self.result_dict['Rules_detail'][i] = {
                'rule': " ".join(self.KB_origin[i]),
                'violation': violation_bool,
            }

    def save_result_as_json(self, file_path) -> None:
        with open(file_path, 'w') as f:
            json.dump(self.result_dict, f, indent=4)

    def evaluate(self, save_file_path: str = './result_1.json') -> None:
        self.calculate_scores()
        self.save_result_as_json(file_path=save_file_path)



if __name__ == '__main__':
    data_dir_path = setting_dict['source_path']
    path_discretized = os.path.join(data_dir_path, setting_dict['source_data_file_name'])
    path_cleaned = os.path.join(data_dir_path, setting_dict['source_data_file_name_2'])
    random_state = setting_dict['seed']
    n_splits = setting_dict['n_splits']

    file_names_dict = {
        'rule': [setting_dict['source_rule_file_name']]
    }

    problem_instance = Setup(setting_dict['input_path'], file_names_dict, None)
    problem_instance.load_rules()
    KB_origin = problem_instance.KB_origin

    note = None

    # まずは連続データで計算
    data = pd.read_csv(path_cleaned, index_col=0)
    data = data.reset_index(drop=True)
    X = data.drop('Outcome', axis=1)    
    y = data['Outcome']
    y.replace(0, -1, inplace=True)

    kf = KFold(n_splits=n_splits)
    for i, (train_index, test_index) in enumerate(kf.split(X)):

        for key, obj_setting in objectives_dict.items():

            model_name = obj_setting['model_name']
            model = obj_setting['model']

            save_file_name = 'result_' + key + '.json'
            save_dir_path = os.path.join(setting_dict['output_path'], f'fold_{i}')
            if not os.path.exists(save_dir_path):
                os.makedirs(save_dir_path)
            save_file_path = os.path.join(save_dir_path, save_file_name)

            evaluate_model = EvaluateModelCV(path_cleaned,
                                             path_discretized,
                                             model,
                                             KB_origin,
                                             random_state,
                                             n_splits,
                                             train_index,
                                             test_index,
                                             name=model_name, 
                                             note=note)
            evaluate_model.evaluate(save_file_path=save_file_path)



load_rules took 0.0007898807525634766 seconds!


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [17]:
df = pd.DataFrame(np.random.rand(700, 3))
df.index

RangeIndex(start=0, stop=700, step=1)

In [29]:
len((1, 2, 3))

3

# rulefit