In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os 
import sys
import time

project_dir_path = '/home/onoue/ws/lukasiewicz_1'
sys.path.append(project_dir_path)

for path in sys.path:
    print(path)

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC


import cvxpy as cp

/home/onoue/ws/lukasiewicz_1/notebooks/pima_indian_diabetes
/usr/lib/python310.zip
/usr/lib/python3.10
/usr/lib/python3.10/lib-dynload

/home/onoue/ws/lukasiewicz_1/myenv/lib/python3.10/site-packages
/home/onoue/ws/lukasiewicz_1


# 目次

- 通常の svm (linear kernel)
- Pointwise + Logical + Consistency
  - 通常の目的関数
  - 損失関数
- Pointwise + Logical
  - 通常の目的関数
  - 損失関数
- Logical + Consistency
  - 通常の目的関数
- Pointwise + Consistency
  - 通常の目的関数
  - 損失関数
- Pointwise
  - 通常の目的関数
  - 損失関数
- Logical
  - 通常の目的関数
- Consistency
  - 通常の目的関数

# 前提
- データ： 前処理済み（離散化は無し）
- ルール： 前処理 ＋ 離散化 をしたデータを RuleFit で抽出
- 目的関数１： 通常の目的関数（主問題）
- 目的関数２： 損失関数（Pointwise 制約を交差エントロピーにして目的関数の中に入れたもの．Logical 制約と Consistency 制約は元のまま制約として扱う）

# 概要
- Pima Indian Diabetes のデータに通常の svm を適用した場合と，論文の提案アルゴリズムを適用した場合の計算結果の違いを調べたかった
- 目的関数１ ＋ 全種類の制約 の最適化では全てのテストデータに対して同じラベルを出力していることが確認されたが原因がよくわからなかった
- 最適化の際に課す制約の種類を少しずつ変えながら，その違いを観察した
- 目的関数２を実装したので，そちらでも目的関数２の場合と同じように実験を行った

# 結果
- 目的関数１の場合は，Consistency 制約を外す（Pointwise + Logical または Pointwise only）だと学習が上手く行っていた
- 目的関数２の場合は，Pointwise 制約さえ入っていればどの組み合わせでも学習ができていた．ただし特にスコアが良かったのは，Pointwise ＋ Logical と Pointwise only の場合

<br>

```
目的関数２ (Pointwise + Logical) 

Accuracy: 
0.7910447761194029

Confusion matrix: 
[[78 12]
 [16 28]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.83      0.87      0.85        90
         1.0       0.70      0.64      0.67        44

    accuracy                           0.79       134
   macro avg       0.76      0.75      0.76       134
weighted avg       0.79      0.79      0.79       134


auc: 
0.8441919191919193
```


<br>


```
Accuracy: 
0.7910447761194029

Confusion matrix: 
[[78 12]
 [16 28]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.83      0.87      0.85        90
         1.0       0.70      0.64      0.67        44

    accuracy                           0.79       134
   macro avg       0.76      0.75      0.76       134
weighted avg       0.79      0.79      0.79       134


auc: 
0.8462121212121212
```

<br>

~~２つの違う条件での評価値が一致しているが理由はわかりません~~ auc は一致せず

# 通常の svm (linear kernel)

In [18]:
data = pd.read_csv("./data/diabetes_cleaned.csv", index_col=0)

feature_names = data.columns.to_list()[1:]

X = data.drop(["Outcome"], axis=1)
y = data["Outcome"]

random_state = 42
test_size = 0.2

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=test_size,
                                                    random_state=random_state)

train_index = X_train.index
test_index = X_test.index

# import os
# project_dir_path = "/home/onoue/ws/lukasiewicz_1/"
# data_dir_path = "inputs/pima_indian_diabetes"
# save_dir_path = os.path.join(project_dir_path, data_dir_path)

# file_path_1 = os.path.join(save_dir_path, 'train_index_3.csv')
# pd.DataFrame(train_index).to_csv(file_path_1)

# file_path_2 = os.path.join(save_dir_path, 'test_index_3.csv')
# pd.DataFrame(test_index).to_csv(file_path_2)


X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

# SVM（線形カーネル）のモデルを作成
svm_linear = SVC(kernel='linear', probability=True)

# モデルを訓練
svm_linear.fit(X_train, y_train)

# テストデータで予測
y_pred_linear = svm_linear.predict(X_test)

# Accuracyを計算
accuracy_linear = accuracy_score(y_test, y_pred_linear)

# Confusion Matrixを計算
cm_linear = confusion_matrix(y_test, y_pred_linear)

# Classification Reportを出力
report_linear = classification_report(y_test, y_pred_linear)

# 結果を出力
print("SVM (Linear Kernel) Accuracy:", accuracy_linear)
print()
print()
print("SVM (Linear Kernel) Confusion Matrix:\n", cm_linear)
print()
print()
print("SVM (Linear Kernel) Classification Report:\n", report_linear)
print()
print()

y_pred_proba_linear = svm_linear.predict_proba(X_test)
roc_auc = roc_auc_score(y_test, y_pred_proba_linear[:, 1])
print("roc auc: ")
print(roc_auc)

SVM (Linear Kernel) Accuracy: 0.7835820895522388


SVM (Linear Kernel) Confusion Matrix:
 [[77 13]
 [16 28]]


SVM (Linear Kernel) Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.86      0.84        90
           1       0.68      0.64      0.66        44

    accuracy                           0.78       134
   macro avg       0.76      0.75      0.75       134
weighted avg       0.78      0.78      0.78       134



roc auc: 
0.8578282828282827


# Pointwise + Logical + Consistency

### 通常の目的関数

In [19]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}

problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()


test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value

y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)

accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.10268449783325195 seconds! 

Loading rules ...
Done in 0.0006928443908691406 seconds! 

Identifying predicates ...
Done in 0.002677440643310547 seconds! 

Constructing objective function ...
Done in 3.7401108741760254 seconds! 

Constructing constraints ...
Done in 18.876312971115112 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:35:21 PM: Your problem has 12014 variables, 38096 constraints, and 0 parameters.
(CVXPY) Nov 27 05:35:26 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:35:26 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:35:26 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 05:35:31 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 05:35:31 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.6716417910447762

Confusion matrix: 
[[90  0]
 [44  0]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.67      1.00      0.80        90
         1.0       0.00      0.00      0.00        44

    accuracy                           0.67       134
   macro avg       0.34      0.50      0.40       134
weighted avg       0.45      0.67      0.54       134


auc: 
0.7934343434343434


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### 損失関数

In [20]:
from src.setup_problem_loss_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}

problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()


test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value

y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.12933564186096191 seconds! 

Loading rules ...
Done in 0.0019431114196777344 seconds! 

Identifying predicates ...
Done in 0.015868186950683594 seconds! 

Constructing objective function ...
Done in 0.06048178672790527 seconds! 

Constructing constraints ...
Done in 11.305725574493408 seconds! 

All done
                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:37:50 PM: Your problem has 222 variables, 26304 constraints, and 0 parameters.
(CVXPY) Nov 27 05:37:51 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:37:51 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:37:51 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
------------------------------------------------------------------

  return np.log(values[0])


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.7686567164179104

Confusion matrix: 
[[80 10]
 [21 23]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.79      0.89      0.84        90
         1.0       0.70      0.52      0.60        44

    accuracy                           0.77       134
   macro avg       0.74      0.71      0.72       134
weighted avg       0.76      0.77      0.76       134


auc: 
0.8022727272727274


# Pointwise + Logical

### 通常の目的関数

In [25]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.09522533416748047 seconds! 

Loading rules ...
Done in 0.00033402442932128906 seconds! 

Identifying predicates ...
Done in 0.0014739036560058594 seconds! 

Constructing objective function ...
Done in 9.785789728164673 seconds! 

Constructing constraints ...

pointwise + logical

Done in 16.86003613471985 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:48:03 PM: Your problem has 12014 variables, 13632 constraints, and 0 parameters.
(CVXPY) Nov 27 05:48:06 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:48:06 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:48:06 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 05:48:10 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 05:48:10 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.7686567164179104

Confusion matrix: 
[[74 16]
 [15 29]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.83      0.82      0.83        90
         1.0       0.64      0.66      0.65        44

    accuracy                           0.77       134
   macro avg       0.74      0.74      0.74       134
weighted avg       0.77      0.77      0.77       134


auc: 
0.8457070707070707


### 損失関数

In [26]:
from src.setup_problem_loss_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.11218595504760742 seconds! 

Loading rules ...
Done in 0.0003943443298339844 seconds! 

Identifying predicates ...
Done in 0.0017671585083007812 seconds! 

Constructing objective function ...
Done in 0.0294039249420166 seconds! 

Constructing constraints ...

pointwise + logical

Done in 11.045023441314697 seconds! 

All done
                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:51:00 PM: Your problem has 222 variables, 1840 constraints, and 0 parameters.
(CVXPY) Nov 27 05:51:00 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:51:00 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:51:00 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
---------------------------------------------

  return np.log(values[0])


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.7910447761194029

Confusion matrix: 
[[78 12]
 [16 28]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.83      0.87      0.85        90
         1.0       0.70      0.64      0.67        44

    accuracy                           0.79       134
   macro avg       0.76      0.75      0.76       134
weighted avg       0.79      0.79      0.79       134


auc: 
0.8441919191919193


# Logical + consistency

これは損失関数での実装は無し（損失関数は Pointwise が目的関数に必ず含まれるため）

In [27]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.09714531898498535 seconds! 

Loading rules ...
Done in 0.0003020763397216797 seconds! 

Identifying predicates ...
Done in 0.0010979175567626953 seconds! 

Constructing objective function ...
Done in 2.5238165855407715 seconds! 

Constructing constraints ...

logical + consistency

Done in 16.79934310913086 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:52:07 PM: Your problem has 12014 variables, 26304 constraints, and 0 parameters.
(CVXPY) Nov 27 05:52:09 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:52:09 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:52:09 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 05:52:11 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 05:52:11 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.4925373134328358

Confusion matrix: 
[[34 56]
 [12 32]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.74      0.38      0.50        90
         1.0       0.36      0.73      0.48        44

    accuracy                           0.49       134
   macro avg       0.55      0.55      0.49       134
weighted avg       0.62      0.49      0.50       134


auc: 
0.5785353535353536


# Pointwise + Consistency

### 通常の目的関数

In [28]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.11284995079040527 seconds! 

Loading rules ...
Done in 0.0007150173187255859 seconds! 

Identifying predicates ...
Done in 0.002452373504638672 seconds! 

Constructing objective function ...
Done in 2.9850361347198486 seconds! 

Constructing constraints ...

pointwise + consistency

Done in 17.23595380783081 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:54:13 PM: Your problem has 12014 variables, 36256 constraints, and 0 parameters.
(CVXPY) Nov 27 05:54:17 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:54:17 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:54:17 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 05:54:22 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 05:54:22 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.6716417910447762

Confusion matrix: 
[[90  0]
 [44  0]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.67      1.00      0.80        90
         1.0       0.00      0.00      0.00        44

    accuracy                           0.67       134
   macro avg       0.34      0.50      0.40       134
weighted avg       0.45      0.67      0.54       134


auc: 
0.7893939393939394


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### 損失関数

In [29]:
from src.setup_problem_loss_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.09220528602600098 seconds! 

Loading rules ...
Done in 0.00031447410583496094 seconds! 

Identifying predicates ...
Done in 0.0010645389556884766 seconds! 

Constructing objective function ...
Done in 0.025448083877563477 seconds! 

Constructing constraints ...

pointwise + consistency

Done in 5.41135048866272 seconds! 

All done
                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:56:26 PM: Your problem has 222 variables, 24464 constraints, and 0 parameters.
(CVXPY) Nov 27 05:56:28 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:56:28 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:56:28 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
---------------------------------------

  return np.log(values[0])


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.7611940298507462

Confusion matrix: 
[[79 11]
 [21 23]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.79      0.88      0.83        90
         1.0       0.68      0.52      0.59        44

    accuracy                           0.76       134
   macro avg       0.73      0.70      0.71       134
weighted avg       0.75      0.76      0.75       134


auc: 
0.8032828282828283


# Pointwise

### 通常の目的関数

In [30]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.12300252914428711 seconds! 

Loading rules ...
Done in 0.0004725456237792969 seconds! 

Identifying predicates ...
Done in 0.0014185905456542969 seconds! 

Constructing objective function ...
Done in 3.058239221572876 seconds! 

Constructing constraints ...

pointwise

Done in 14.605812549591064 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:58:24 PM: Your problem has 12014 variables, 11792 constraints, and 0 parameters.
(CVXPY) Nov 27 05:58:26 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:58:26 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:58:26 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 05:58:29 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 05:58:29 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.7835820895522388

Confusion matrix: 
[[77 13]
 [16 28]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.83      0.86      0.84        90
         1.0       0.68      0.64      0.66        44

    accuracy                           0.78       134
   macro avg       0.76      0.75      0.75       134
weighted avg       0.78      0.78      0.78       134


auc: 
0.8588383838383838


### 損失関数

In [31]:
from src.setup_problem_loss_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.0862572193145752 seconds! 

Loading rules ...
Done in 0.00027823448181152344 seconds! 

Identifying predicates ...
Done in 0.001008749008178711 seconds! 

Constructing objective function ...
Done in 0.02483987808227539 seconds! 

Constructing constraints ...

pointwise

Done in 10.369284868240356 seconds! 

All done
                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 05:59:41 PM: Your problem has 222 variables, 0 constraints, and 0 parameters.
(CVXPY) Nov 27 05:59:41 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 05:59:41 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 05:59:41 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
----------------------------------------------------------

  return np.log(values[0])


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.7910447761194029

Confusion matrix: 
[[78 12]
 [16 28]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.83      0.87      0.85        90
         1.0       0.70      0.64      0.67        44

    accuracy                           0.79       134
   macro avg       0.76      0.75      0.76       134
weighted avg       0.79      0.79      0.79       134


auc: 
0.8462121212121212


# Logical

これは損失関数での実装は無し（損失関数は Pointwise が目的関数に必ず含まれるため）

In [33]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.09073853492736816 seconds! 

Loading rules ...
Done in 0.0003485679626464844 seconds! 

Identifying predicates ...
Done in 0.001280069351196289 seconds! 

Constructing objective function ...
Done in 2.1926255226135254 seconds! 

Constructing constraints ...

logical

Done in 17.985443830490112 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 06:02:42 PM: Your problem has 12014 variables, 1840 constraints, and 0 parameters.
(CVXPY) Nov 27 06:02:43 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 06:02:43 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 06:02:43 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 06:02:44 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 06:02:44 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuff

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.3283582089552239

Confusion matrix: 
[[ 0 90]
 [ 0 44]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00        90
         1.0       0.33      1.00      0.49        44

    accuracy                           0.33       134
   macro avg       0.16      0.50      0.25       134
weighted avg       0.11      0.33      0.16       134


auc: 
0.8156565656565656


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Consistency

これは損失関数での実装は無し（損失関数は Pointwise が目的関数に必ず含まれるため）

In [34]:
from src.setup_problem_tmp import Setup

data_dir_path = os.path.join(project_dir_path, 'inputs/pima_indian_diabetes')
train_data_dir_path = os.path.join(data_dir_path, "train_4")
file_list = os.listdir(train_data_dir_path)

L_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('L') and filename.endswith('.csv')]

U_files = [filename.split('.csv')[0] for filename in file_list 
           if filename.startswith('U') and filename.endswith('.csv')]

file_names_dict = {
    'supervised': L_files,
    'unsupervised': U_files,
    'rule': ['rules']
}



problem_instance = Setup(train_data_dir_path, file_names_dict)
objective, constraints = problem_instance.main(c1=10, c2=10)

start_time = time.time()
problem = cp.Problem(objective, constraints)
result = problem.solve(verbose=True)
end_time = time.time()
print()
print(f'学習時間: {end_time - start_time} 秒')


print()
print()
print()

test_data_dir_path = os.path.join(data_dir_path, "test_4")
file_path = os.path.join(test_data_dir_path, "L_Outcome.csv")
test_df = pd.read_csv(file_path, index_col=0)
display(test_df.head())

test_data = {
    'Outcome': np.array(test_df)
}

p_dict = problem_instance.predicates_dict
selected_predicates = ['Outcome']
selected_p_dict = {key: value for key, value in p_dict.items() if key in selected_predicates}

X_test = test_data['Outcome'][:, :-1]
y_test = test_data['Outcome'][:, -1]

y_pred = p_dict['Outcome'](X_test).value
y_pred_interpreted = np.where(y_pred >= 0.5, 1, -1)


accuracy = accuracy_score(y_test, y_pred_interpreted)
print("Accuracy: ")
print(accuracy)
print()

conf_matrix = confusion_matrix(y_test, y_pred_interpreted)
print("Confusion matrix: ")
print(conf_matrix)
print()

class_report = classification_report(y_test, y_pred_interpreted)
print("classification report: ")
print(class_report)
print()

roc_auc = roc_auc_score(y_test, y_pred)
print("auc: ")
print(roc_auc)

Loading data ...
Done in 0.0779719352722168 seconds! 

Loading rules ...
Done in 0.0003418922424316406 seconds! 

Identifying predicates ...
Done in 0.0010347366333007812 seconds! 

Constructing objective function ...
Done in 2.63165545463562 seconds! 

Constructing constraints ...

consistency

Done in 17.524654150009155 seconds! 

All done




                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Nov 27 06:04:34 PM: Your problem has 12014 variables, 24464 constraints, and 0 parameters.
(CVXPY) Nov 27 06:04:35 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Nov 27 06:04:35 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Nov 27 06:04:35 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Nov 27 06:04:37 PM: Compiling problem (target solver=ECOS).
(CVXPY) Nov 27 06:04:37 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuf

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,BMI,DiabetesPedigreeFunction,Age,target
419,3,129,64,29,26.4,0.219,28,1
186,8,181,68,36,30.1,0.615,60,1
556,1,97,70,40,38.1,0.218,30,-1
738,2,99,60,17,36.6,0.453,21,-1
320,4,129,60,12,27.5,0.527,31,-1


Accuracy: 
0.6716417910447762

Confusion matrix: 
[[90  0]
 [44  0]]

classification report: 
              precision    recall  f1-score   support

        -1.0       0.67      1.00      0.80        90
         1.0       0.00      0.00      0.00        44

    accuracy                           0.67       134
   macro avg       0.34      0.50      0.40       134
weighted avg       0.45      0.67      0.54       134


auc: 
0.8393939393939394


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
