In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
# project_dir_path = '/home/onoue/ws/lukasiewicz_1'
project_dir_path = '/Users/keisukeonoue/ws/lukasiewicz_1/'
sys.path.append(project_dir_path)
import shutil

import pandas as pd
import numpy as np
import cvxpy as cp

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from src.setup_problem_primal_modular import Setup
from src.objective_function import linear_svm, linear_svm_loss, logistic_regression_loss
from src.evaluation_conti import EvaluateModel

In [3]:
random_state = 42
test_size = 0.2
data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes_2')
file_list = os.listdir(os.path.join(data_dir_path, "train"))
L_files = [f for f in file_list if f.startswith('L') and f.endswith('.csv')]
U_files = [f for f in file_list if f.startswith('U') and f.endswith('.csv')]
file_name_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ["rules_3.txt"]
}

c1 = 0.5
c2 = 10 

# constraints_flag = {
#     'pointwise': True,
#     'logical': True,
#     'consistency': True
# }

constraints_flag = {
    'pointwise': True,
    'logical': True,
    'consistency': True
}

In [4]:
problem_instance = Setup(data_dir_path, file_name_dict, linear_svm, name="linear svm")
# problem_instance = Setup(data_dir_path, file_name_dict, logistic_regression_loss, name="logistic regression")
objective_function, constraints =  problem_instance.main(c1=c1, c2=c2, constraints_flag_dict=constraints_flag)

load_data took 0.029419898986816406 seconds!
load_rules took 0.00024008750915527344 seconds!
linear_svm took 1.3395640850067139 seconds!
pointwise constraints
logical constraints
consistency constraints
__call__ took 4.9444239139556885 seconds!


In [5]:
problem = cp.Problem(objective_function, constraints)
result = problem.solve(verbose=True)

                                     CVXPY                                     
                                     v1.3.2                                    




(CVXPY) Jan 21 03:13:17 PM: Your problem has 11992 variables, 36756 constraints, and 0 parameters.
(CVXPY) Jan 21 03:13:20 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Jan 21 03:13:20 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Jan 21 03:13:20 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Jan 21 03:13:22 PM: Compiling problem (target solver=ECOS).
(CVXPY) Jan 21 03:13:22 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> ECOS
(CVXPY) Jan 21 03:13:22 PM: Applying reduction Dcp2Cone
(CVXPY) Jan 21 03:13:28 PM: Applying reduction CvxAttr2Constr
(CVXPY) Jan 21 03:13:31 PM: A

In [6]:
data = pd.read_csv('./../../data/pima_indian_diabetes/diabetes_cleaned.csv', index_col=0)
X = data.drop(['Outcome'], axis=1)
y = data['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
X_train

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age
275,2,100,70,52,40.5,0.677,25
555,7,124,70,33,25.5,0.161,37
699,4,118,70,0,44.5,0.904,26
73,4,129,86,20,35.1,0.231,23
170,6,102,82,0,30.8,0.180,36
...,...,...,...,...,...,...,...
87,2,100,68,25,38.5,0.324,26
128,1,117,88,24,34.5,0.403,40
315,2,112,68,22,34.1,0.315,26
505,10,75,82,0,33.3,0.263,38


In [7]:
p = problem_instance.predicates_dict['Outcome']
y_pred = p(X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)
y_pred_interpreted

array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])

In [8]:
y_test = y_test.replace(0, -1)

accuracy = accuracy_score(y_test, y_pred_interpreted)
precision = precision_score(y_test, y_pred_interpreted)
recall = recall_score(y_test, y_pred_interpreted)
f1 = f1_score(y_test, y_pred_interpreted)
roc_auc = roc_auc_score(y_test, y_pred)

print(f'accuracy: {accuracy}')
print(f'precision: {precision}')
print(f'recall: {recall}')
print(f'f1: {f1}')
print(f'auc: {roc_auc}')

accuracy: 0.6716417910447762
precision: 0.0
recall: 0.0
f1: 0.0
auc: 0.8073232323232323


  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
y_pred = p(X_train).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)
y_pred_interpreted

array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1

In [10]:
y_train = y_train.replace(0, -1)

accuracy = accuracy_score(y_train, y_pred_interpreted)
precision = precision_score(y_train, y_pred_interpreted)
recall = recall_score(y_train, y_pred_interpreted)
f1 = f1_score(y_train, y_pred_interpreted)
roc_auc = roc_auc_score(y_train, y_pred)

print(f'accuracy: {accuracy}')
print(f'precision: {precision}')
print(f'recall: {recall}')
print(f'f1: {f1}')
print(f'auc: {roc_auc}')

accuracy: 0.6753731343283582
precision: 0.0
recall: 0.0
f1: 0.0
auc: 0.8016923858512733


  _warn_prf(average, modifier, msg_start, len(result))


# train データに対しても上手く行っていない