In [1]:
import json
import os

import sys
project_dir_path = "/Users/keisukeonoue/ws/lukasiewicz_2"
sys.path.append(project_dir_path)

import pandas as pd
import numpy as np

version_nums = [151]

# リスト内包表記を使用して複数のファイルパスを生成する
result_file_paths = [
    os.path.join(project_dir_path, f"experiment_result/tmp/version_{version_num}/result.json")
    for version_num in version_nums
]


result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)
    
    infos.append(json_data['rule_thr'])
    
    tmp = []

    for fold, _ in json_data['result'].items():
        result_df = pd.DataFrame(json_data['result'][fold]).T.drop(['violation_detail'], axis=1)
        tmp.append(result_df)

    result_dfs.append(tmp)

model_name_dict = {
    'linear svm': 'l-SVM', 
    'non-linear svm': 'r-SVM', 
    'logistic regression': 'RogReg', 
    'RuleFit Classifier (disc)': 'RuleFit-d', 
    'RuleFit Classifier (conti)': 'RuleFit', 
    'tree generator (disc)': 'RF-d', 
    'tree generator (conti)': 'RF', 
    'linear svm (L)': 'l-SVM-p', 
    'non-linear svm (L)': 'r-SVM-p', 
    'logistic regression (L)': 'LogReg-p', 
    'rule_based_prediction': 'RuleBase'
}

model_name_list = list(model_name_dict.keys())

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean().reindex(index=model_name_list) for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std().reindex(index=model_name_list) for combined_df in combined_dfs]

dfs_mean = [df.set_index(pd.Index(model_name_dict.values())) for df in dfs_mean]
dfs_std = [df.set_index(pd.Index(model_name_dict.values())) for df in dfs_std]


for df in dfs_mean:
    display(df)

for df in dfs_std:
    display(df)


df_mean = dfs_mean[0]
df_std = dfs_std[0]
result_df = df_mean.astype(float).round(3).astype(str) + ' ± ' + df_std.astype(float).round(2).astype(str)
result_df['compliance_rate'] = (1 - df_mean['violation_rate']).astype(float).round(3).astype(str) + ' ± ' + df_std['violation_rate'].astype(float).round(2).astype(str)
result_df['compliance_rate (instance)'] = (1 - df_mean['violation_rate (instance)']).astype(float).round(3).astype(str) + ' ± ' + df_std['violation_rate (instance)'].astype(float).round(2).astype(str)
display(result_df)

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
l-SVM,0.779104,0.711436,0.551175,0.618693,0.837805,45.4,46.0,0.988235,2726.4,3148.0,0.866152
r-SVM,0.780597,0.75907,0.479714,0.587201,0.838064,46.0,46.0,1.0,2803.4,3148.0,0.889837
RogReg,0.78209,0.731143,0.530165,0.612464,0.840573,46.0,46.0,1.0,2759.6,3148.0,0.876497
RuleFit-d,0.783582,0.742431,0.521271,0.610252,0.815651,45.4,46.0,0.988235,2734.6,3148.0,0.869052
RuleFit,0.732836,0.601979,0.553471,0.571908,0.795116,46.0,46.0,1.0,2632.8,3148.0,0.836848
RF-d,0.741791,0.627843,0.557706,0.582608,0.772322,46.0,46.0,1.0,2634.2,3148.0,0.837924
RF,0.78209,0.715158,0.568106,0.62746,0.8326,45.8,46.0,0.996078,2702.4,3148.0,0.85896
l-SVM-p,0.61791,0.471546,0.883547,0.608462,0.680065,43.0,46.0,0.926789,1408.6,3148.0,0.450271
r-SVM-p,0.683582,0.525018,0.756815,0.607215,0.795545,44.6,46.0,0.970228,1398.2,3148.0,0.443859
LogReg-p,0.738806,0.766883,0.29155,0.416785,0.827454,45.8,46.0,0.995833,1478.0,3148.0,0.469113


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
l-SVM,0.053711,0.04834,0.08646,0.061671,0.045376,3.974921,4.636809,0.026307,246.743389,288.773267,0.002317
r-SVM,0.046485,0.05776,0.048936,0.0502,0.048697,4.636809,4.636809,0.0,286.699843,288.773267,0.012451
RogReg,0.045821,0.058217,0.063637,0.047569,0.047432,4.636809,4.636809,0.0,258.467213,288.773267,0.007233
RuleFit-d,0.044776,0.048551,0.064326,0.044856,0.05095,3.974921,4.636809,0.026307,237.896616,288.773267,0.008854
RuleFit,0.040326,0.076503,0.064527,0.037194,0.032516,4.636809,4.636809,0.0,229.386573,288.773267,0.019546
RF-d,0.04995,0.094845,0.077244,0.037668,0.049073,4.636809,4.636809,0.0,238.285963,288.773267,0.043514
RF,0.037089,0.071703,0.076514,0.029962,0.039969,4.38178,4.636809,0.008769,237.099768,288.773267,0.019952
l-SVM-p,0.159132,0.089797,0.073142,0.07662,0.241256,8.746428,4.636809,0.116751,62.656205,288.773267,0.043601
r-SVM-p,0.083535,0.110493,0.123362,0.083921,0.043094,4.159327,4.636809,0.016359,142.897516,288.773267,0.007642
LogReg-p,0.060166,0.087423,0.08987,0.102183,0.05342,4.549725,4.636809,0.009317,167.020957,288.773267,0.022782


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance),compliance_rate,compliance_rate (instance)
l-SVM,0.779 ± 0.05,0.711 ± 0.05,0.551 ± 0.09,0.619 ± 0.06,0.838 ± 0.05,45.4 ± 3.97,46.0 ± 4.64,0.988 ± 0.03,2726.4 ± 246.74,3148.0 ± 288.77,0.866 ± 0.0,0.012 ± 0.03,0.134 ± 0.0
r-SVM,0.781 ± 0.05,0.759 ± 0.06,0.48 ± 0.05,0.587 ± 0.05,0.838 ± 0.05,46.0 ± 4.64,46.0 ± 4.64,1.0 ± 0.0,2803.4 ± 286.7,3148.0 ± 288.77,0.89 ± 0.01,0.0 ± 0.0,0.11 ± 0.01
RogReg,0.782 ± 0.05,0.731 ± 0.06,0.53 ± 0.06,0.612 ± 0.05,0.841 ± 0.05,46.0 ± 4.64,46.0 ± 4.64,1.0 ± 0.0,2759.6 ± 258.47,3148.0 ± 288.77,0.876 ± 0.01,0.0 ± 0.0,0.124 ± 0.01
RuleFit-d,0.784 ± 0.04,0.742 ± 0.05,0.521 ± 0.06,0.61 ± 0.04,0.816 ± 0.05,45.4 ± 3.97,46.0 ± 4.64,0.988 ± 0.03,2734.6 ± 237.9,3148.0 ± 288.77,0.869 ± 0.01,0.012 ± 0.03,0.131 ± 0.01
RuleFit,0.733 ± 0.04,0.602 ± 0.08,0.553 ± 0.06,0.572 ± 0.04,0.795 ± 0.03,46.0 ± 4.64,46.0 ± 4.64,1.0 ± 0.0,2632.8 ± 229.39,3148.0 ± 288.77,0.837 ± 0.02,0.0 ± 0.0,0.163 ± 0.02
RF-d,0.742 ± 0.05,0.628 ± 0.09,0.558 ± 0.08,0.583 ± 0.04,0.772 ± 0.05,46.0 ± 4.64,46.0 ± 4.64,1.0 ± 0.0,2634.2 ± 238.29,3148.0 ± 288.77,0.838 ± 0.04,0.0 ± 0.0,0.162 ± 0.04
RF,0.782 ± 0.04,0.715 ± 0.07,0.568 ± 0.08,0.627 ± 0.03,0.833 ± 0.04,45.8 ± 4.38,46.0 ± 4.64,0.996 ± 0.01,2702.4 ± 237.1,3148.0 ± 288.77,0.859 ± 0.02,0.004 ± 0.01,0.141 ± 0.02
l-SVM-p,0.618 ± 0.16,0.472 ± 0.09,0.884 ± 0.07,0.608 ± 0.08,0.68 ± 0.24,43.0 ± 8.75,46.0 ± 4.64,0.927 ± 0.12,1408.6 ± 62.66,3148.0 ± 288.77,0.45 ± 0.04,0.073 ± 0.12,0.55 ± 0.04
r-SVM-p,0.684 ± 0.08,0.525 ± 0.11,0.757 ± 0.12,0.607 ± 0.08,0.796 ± 0.04,44.6 ± 4.16,46.0 ± 4.64,0.97 ± 0.02,1398.2 ± 142.9,3148.0 ± 288.77,0.444 ± 0.01,0.03 ± 0.02,0.556 ± 0.01
LogReg-p,0.739 ± 0.06,0.767 ± 0.09,0.292 ± 0.09,0.417 ± 0.1,0.827 ± 0.05,45.8 ± 4.55,46.0 ± 4.64,0.996 ± 0.01,1478.0 ± 167.02,3148.0 ± 288.77,0.469 ± 0.02,0.004 ± 0.01,0.531 ± 0.02


In [4]:
import json
import os

import sys
project_dir_path = "/Users/keisukeonoue/ws/lukasiewicz_2"
sys.path.append(project_dir_path)

import pandas as pd
import numpy as np

version_nums = [156]

# リスト内包表記を使用して複数のファイルパスを生成する
result_file_paths = [
    os.path.join(project_dir_path, f"experiment_result/tmp/version_{version_num}/result.json")
    for version_num in version_nums
]


result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)
    
    infos.append(json_data['rule_thr'])
    
    tmp = []

    for fold, _ in json_data['result'].items():
        result_df = pd.DataFrame(json_data['result'][fold]).T.drop(['violation_detail'], axis=1)
        tmp.append(result_df)

    result_dfs.append(tmp)

model_name_dict = {
    'linear svm': 'l-SVM', 
    'non-linear svm': 'r-SVM', 
    'logistic regression': 'RogReg', 
    'RuleFit Classifier (disc)': 'RuleFit-d', 
    'RuleFit Classifier (conti)': 'RuleFit', 
    'tree generator (disc)': 'RF-d', 
    'tree generator (conti)': 'RF', 
    'linear svm (L)': 'l-SVM-p', 
    'non-linear svm (L)': 'r-SVM-p', 
    'logistic regression (L)': 'LogReg-p', 
    'rule_based_prediction': 'RuleBase'
}

model_name_list = list(model_name_dict.keys())

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean().reindex(index=model_name_list) for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std().reindex(index=model_name_list) for combined_df in combined_dfs]

dfs_mean = [df.set_index(pd.Index(model_name_dict.values())) for df in dfs_mean]
dfs_std = [df.set_index(pd.Index(model_name_dict.values())) for df in dfs_std]


for df in dfs_mean:
    display(df)

for df in dfs_std:
    display(df)


df_mean = dfs_mean[0]
df_std = dfs_std[0]
result_df = df_mean.astype(float).round(3).astype(str) + ' ± ' + df_std.astype(float).round(2).astype(str)
result_df['compliance_rate'] = (1 - df_mean['violation_rate']).astype(float).round(3).astype(str) + ' ± ' + df_std['violation_rate'].astype(float).round(2).astype(str)
result_df['compliance_rate (instance)'] = (1 - df_mean['violation_rate (instance)']).astype(float).round(3).astype(str) + ' ± ' + df_std['violation_rate (instance)'].astype(float).round(2).astype(str)
display(result_df)

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
l-SVM,0.779104,0.711436,0.551175,0.618693,0.837805,1.2,1.8,0.85,9.0,44.6,0.164254
r-SVM,0.780597,0.75907,0.479714,0.587201,0.838064,1.8,1.8,1.0,14.2,44.6,0.280367
RogReg,0.78209,0.731143,0.530165,0.612464,0.840573,1.8,1.8,1.0,11.4,44.6,0.225741
RuleFit-d,0.783582,0.742431,0.521271,0.610252,0.815651,1.2,1.8,0.85,8.6,44.6,0.156725
RuleFit,0.732836,0.601979,0.553471,0.571908,0.795116,1.8,1.8,1.0,12.6,44.6,0.262864
RF-d,0.741791,0.627843,0.557706,0.582608,0.772322,1.8,1.8,1.0,10.0,44.6,0.237624
RF,0.78209,0.715158,0.568106,0.62746,0.8326,1.6,1.8,0.95,11.6,44.6,0.243154
l-SVM-p,0.325373,0.325373,1.0,0.489178,0.250784,0.0,1.8,0.0,0.0,44.6,0.0
r-SVM-p,0.720896,0.550099,0.781269,0.644552,0.821473,0.4,1.8,0.15,3.6,44.6,0.050752
LogReg-p,0.765672,0.76142,0.419501,0.537892,0.828,1.8,1.8,1.0,16.2,44.6,0.33439


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
l-SVM,0.053711,0.04834,0.08646,0.061671,0.045376,0.447214,1.30384,0.33541,9.300538,25.52058,0.13018
r-SVM,0.046485,0.05776,0.048936,0.0502,0.048697,1.30384,1.30384,0.0,11.924764,25.52058,0.119016
RogReg,0.045821,0.058217,0.063637,0.047569,0.047432,1.30384,1.30384,0.0,9.607289,25.52058,0.1266
RuleFit-d,0.044776,0.048551,0.064326,0.044856,0.05095,0.447214,1.30384,0.33541,8.763561,25.52058,0.124331
RuleFit,0.040326,0.076503,0.064527,0.037194,0.032516,1.30384,1.30384,0.0,9.289779,25.52058,0.096391
RF-d,0.04995,0.094845,0.077244,0.037668,0.049073,1.30384,1.30384,0.0,4.636809,25.52058,0.075719
RF,0.037089,0.071703,0.076514,0.029962,0.039969,0.894427,1.30384,0.111803,8.264381,25.52058,0.077552
l-SVM-p,0.050228,0.050228,0.0,0.059803,0.045193,0.0,1.30384,0.0,0.0,25.52058,0.0
r-SVM-p,0.046783,0.038058,0.061296,0.035952,0.047694,0.547723,1.30384,0.223607,5.128353,25.52058,0.069555
LogReg-p,0.051864,0.095985,0.059894,0.056062,0.05137,1.30384,1.30384,0.0,11.96662,25.52058,0.09149


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance),compliance_rate,compliance_rate (instance)
l-SVM,0.779 ± 0.05,0.711 ± 0.05,0.551 ± 0.09,0.619 ± 0.06,0.838 ± 0.05,1.2 ± 0.45,1.8 ± 1.3,0.85 ± 0.34,9.0 ± 9.3,44.6 ± 25.52,0.164 ± 0.13,0.15 ± 0.34,0.836 ± 0.13
r-SVM,0.781 ± 0.05,0.759 ± 0.06,0.48 ± 0.05,0.587 ± 0.05,0.838 ± 0.05,1.8 ± 1.3,1.8 ± 1.3,1.0 ± 0.0,14.2 ± 11.92,44.6 ± 25.52,0.28 ± 0.12,0.0 ± 0.0,0.72 ± 0.12
RogReg,0.782 ± 0.05,0.731 ± 0.06,0.53 ± 0.06,0.612 ± 0.05,0.841 ± 0.05,1.8 ± 1.3,1.8 ± 1.3,1.0 ± 0.0,11.4 ± 9.61,44.6 ± 25.52,0.226 ± 0.13,0.0 ± 0.0,0.774 ± 0.13
RuleFit-d,0.784 ± 0.04,0.742 ± 0.05,0.521 ± 0.06,0.61 ± 0.04,0.816 ± 0.05,1.2 ± 0.45,1.8 ± 1.3,0.85 ± 0.34,8.6 ± 8.76,44.6 ± 25.52,0.157 ± 0.12,0.15 ± 0.34,0.843 ± 0.12
RuleFit,0.733 ± 0.04,0.602 ± 0.08,0.553 ± 0.06,0.572 ± 0.04,0.795 ± 0.03,1.8 ± 1.3,1.8 ± 1.3,1.0 ± 0.0,12.6 ± 9.29,44.6 ± 25.52,0.263 ± 0.1,0.0 ± 0.0,0.737 ± 0.1
RF-d,0.742 ± 0.05,0.628 ± 0.09,0.558 ± 0.08,0.583 ± 0.04,0.772 ± 0.05,1.8 ± 1.3,1.8 ± 1.3,1.0 ± 0.0,10.0 ± 4.64,44.6 ± 25.52,0.238 ± 0.08,0.0 ± 0.0,0.762 ± 0.08
RF,0.782 ± 0.04,0.715 ± 0.07,0.568 ± 0.08,0.627 ± 0.03,0.833 ± 0.04,1.6 ± 0.89,1.8 ± 1.3,0.95 ± 0.11,11.6 ± 8.26,44.6 ± 25.52,0.243 ± 0.08,0.05 ± 0.11,0.757 ± 0.08
l-SVM-p,0.325 ± 0.05,0.325 ± 0.05,1.0 ± 0.0,0.489 ± 0.06,0.251 ± 0.05,0.0 ± 0.0,1.8 ± 1.3,0.0 ± 0.0,0.0 ± 0.0,44.6 ± 25.52,0.0 ± 0.0,1.0 ± 0.0,1.0 ± 0.0
r-SVM-p,0.721 ± 0.05,0.55 ± 0.04,0.781 ± 0.06,0.645 ± 0.04,0.821 ± 0.05,0.4 ± 0.55,1.8 ± 1.3,0.15 ± 0.22,3.6 ± 5.13,44.6 ± 25.52,0.051 ± 0.07,0.85 ± 0.22,0.949 ± 0.07
LogReg-p,0.766 ± 0.05,0.761 ± 0.1,0.42 ± 0.06,0.538 ± 0.06,0.828 ± 0.05,1.8 ± 1.3,1.8 ± 1.3,1.0 ± 0.0,16.2 ± 11.97,44.6 ± 25.52,0.334 ± 0.09,0.0 ± 0.0,0.666 ± 0.09


In [4]:
target_column_list = ['ROCAUC', '充足率（制約単位）', '充足率（インスタンス単位）']
target_index_list = ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase']

df_1 = df_mean.copy()
df_1['ROCAUC'] = df_1['auc']
df_1['充足率（制約単位）'] = 1 - df_1['violation_rate']
df_1['充足率（インスタンス単位）'] = 1 - df_1['violation_rate (instance)']
df_1 = df_1[target_column_list]
df_1 = df_1.T[target_index_list].T
df_1['Model'] = df_1.index

target_column_list = ['Model', 'ROCAUC', '充足率（制約単位）', '充足率（インスタンス単位）']
df_1 = df_1[target_column_list]

df_1

Unnamed: 0,Model,ROCAUC,充足率（制約単位）,充足率（インスタンス単位）
l-SVM,l-SVM,0.837805,0.011765,0.133848
r-SVM,r-SVM,0.838064,0.0,0.110163
RogReg,RogReg,0.840573,0.0,0.123503
RuleFit,RuleFit,0.795116,0.0,0.163152
RF,RF,0.8326,0.003922,0.14104
RuleFit-d,RuleFit-d,0.815651,0.011765,0.130948
RF-d,RF-d,0.772322,0.0,0.162076
l-SVM-p,l-SVM-p,0.680065,0.073211,0.549729
r-SVM-p,r-SVM-p,0.795545,0.029772,0.556141
LogReg-p,LogReg-p,0.827454,0.004167,0.530887


In [5]:
target_column_list = ['ROCAUC', '充足率（制約単位）', '充足率（インスタンス単位）']
target_index_list = ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase']

df_2 = df_std.copy()
df_2['ROCAUC'] = df_2['auc']
df_2['充足率（制約単位）'] = df_2['violation_rate']
df_2['充足率（インスタンス単位）'] = df_2['violation_rate (instance)']
df_2 = df_2[target_column_list]
df_2 = df_2.T[target_index_list].T
df_2['Model'] = df_2.index

target_column_list = ['Model', 'ROCAUC', '充足率（制約単位）', '充足率（インスタンス単位）']
df_2 = df_2[target_column_list]
df_2

Unnamed: 0,Model,ROCAUC,充足率（制約単位）,充足率（インスタンス単位）
l-SVM,l-SVM,0.045376,0.026307,0.002317
r-SVM,r-SVM,0.048697,0.0,0.012451
RogReg,RogReg,0.047432,0.0,0.007233
RuleFit,RuleFit,0.032516,0.0,0.019546
RF,RF,0.039969,0.008769,0.019952
RuleFit-d,RuleFit-d,0.05095,0.026307,0.008854
RF-d,RF-d,0.049073,0.0,0.043514
l-SVM-p,l-SVM-p,0.241256,0.116751,0.043601
r-SVM-p,r-SVM-p,0.043094,0.016359,0.007642
LogReg-p,LogReg-p,0.05342,0.009317,0.022782


In [7]:
df_1.melt(id_vars='Model', var_name='Metric', value_name='Mean')

Unnamed: 0,Model,Metric,Mean
0,l-SVM,ROCAUC,0.837805
1,r-SVM,ROCAUC,0.838064
2,RogReg,ROCAUC,0.840573
3,RuleFit,ROCAUC,0.795116
4,RF,ROCAUC,0.8326
5,RuleFit-d,ROCAUC,0.815651
6,RF-d,ROCAUC,0.772322
7,l-SVM-p,ROCAUC,0.680065
8,r-SVM-p,ROCAUC,0.795545
9,LogReg-p,ROCAUC,0.827454


In [8]:
import pandas as pd
import plotly.graph_objects as go

df_mean = df_1.copy()
df_std = df_2.copy()

# データフレームを整形
df_mean_melt = df_mean.melt(id_vars='Model', var_name='Metric', value_name='Mean')
df_std_melt = df_std.melt(id_vars='Model', var_name='Metric', value_name='Std')

# データフレームをマージ
df = pd.merge(df_mean_melt, df_std_melt,  how='left', left_on=['Model','Metric'], right_on = ['Model','Metric'])

# プロット作成
fig = go.Figure()

colors = {'ROCAUC': 'teal', '充足率（制約単位）': 'coral', '充足率（インスタンス単位）': 'gold'}

for metric in df['Metric'].unique():
    df_sub = df[df['Metric'] == metric]
    fig.add_trace(go.Bar(
        name=metric,
        x=df_sub['Model'],
        y=df_sub['Mean'],
        error_y=dict(type='data', array=df_sub['Std']),
        marker_color=colors[metric]
    ))

fig.update_layout(
    barmode='group', 
    autosize=False,
    width=700,
    height=500,
    legend=dict(
        yanchor="top",
        y=1.3,  # y=1.1はグラフの上部にレジェンドを配置します
        xanchor="left",
        x=0.5,  # x=0.5はレジェンドを中央に配置します
        orientation="v",  # orientation="h"はレジェンドを水平に配置します
        font=dict(size=16),  # レジェンドの文字サイズを14に設定します
    )
    )

fig.update_xaxes(
    tickangle=75,
    tickfont=dict(size=16),  # x軸のティックの文字サイズを14に設定します。
)

fig.update_yaxes(
    tickfont=dict(size=16),  # y軸のティックの文字サイズを14に設定します。
)
fig.show()

# Association Rule によるルール作成

## 一時保存した実験結果の読み込み

In [1]:
import json
import os

import sys
project_dir_path = "/Users/keisukeonoue/ws/lukasiewicz_2"
sys.path.append(project_dir_path)

import pandas as pd
import numpy as np

version_nums = [151, 152, 153, 154, 155, 156, 157, 158]

# リスト内包表記を使用して複数のファイルパスを生成する
result_file_paths = [
    os.path.join(project_dir_path, f"experiment_result/tmp/version_{version_num}/result.json")
    for version_num in version_nums
]


result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)
    
    infos.append(json_data['rule_thr'])
    
    tmp = []

    for fold, _ in json_data['result'].items():
        result_df = pd.DataFrame(json_data['result'][fold]).T.drop(['violation_detail'], axis=1)
        tmp.append(result_df)

    result_dfs.append(tmp)

model_name_dict = {
    'linear svm': 'l-SVM', 
    'non-linear svm': 'r-SVM', 
    'logistic regression': 'RogReg', 
    'RuleFit Classifier (disc)': 'RuleFit-d', 
    'RuleFit Classifier (conti)': 'RuleFit', 
    'tree generator (disc)': 'RF-d', 
    'tree generator (conti)': 'RF', 
    'linear svm (L)': 'l-SVM-p', 
    'non-linear svm (L)': 'r-SVM-p', 
    'logistic regression (L)': 'LogReg-p', 
    'rule_based_prediction': 'RuleBase'
}

model_name_list = list(model_name_dict.keys())

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean().reindex(index=model_name_list) for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std().reindex(index=model_name_list) for combined_df in combined_dfs]

for df in dfs_mean:
    display(df)

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,45.4,46.0,0.988235,2726.4,3148.0,0.866152
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,46.0,46.0,1.0,2803.4,3148.0,0.889837
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,46.0,46.0,1.0,2759.6,3148.0,0.876497
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,45.4,46.0,0.988235,2734.6,3148.0,0.869052
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,46.0,46.0,1.0,2632.8,3148.0,0.836848
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,46.0,46.0,1.0,2634.2,3148.0,0.837924
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,45.8,46.0,0.996078,2702.4,3148.0,0.85896
linear svm (L),0.61791,0.471546,0.883547,0.608462,0.680065,43.0,46.0,0.926789,1408.6,3148.0,0.450271
non-linear svm (L),0.683582,0.525018,0.756815,0.607215,0.795545,44.6,46.0,0.970228,1398.2,3148.0,0.443859
logistic regression (L),0.738806,0.766883,0.29155,0.416785,0.827454,45.8,46.0,0.995833,1478.0,3148.0,0.469113


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,45.4,46.0,0.988235,2726.4,3148.0,0.866152
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,46.0,46.0,1.0,2803.4,3148.0,0.889837
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,46.0,46.0,1.0,2759.6,3148.0,0.876497
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,45.4,46.0,0.988235,2734.6,3148.0,0.869052
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,46.0,46.0,1.0,2632.8,3148.0,0.836848
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,46.0,46.0,1.0,2634.2,3148.0,0.837924
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,45.8,46.0,0.996078,2702.4,3148.0,0.85896
linear svm (L),0.61791,0.471546,0.883547,0.608462,0.680065,43.0,46.0,0.926789,1408.6,3148.0,0.450271
non-linear svm (L),0.683582,0.525018,0.756815,0.607215,0.795545,44.6,46.0,0.970228,1398.2,3148.0,0.443859
logistic regression (L),0.738806,0.766883,0.29155,0.416785,0.827454,45.8,46.0,0.995833,1478.0,3148.0,0.469113


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,30.8,31.4,0.984615,1620.2,1918.6,0.845419
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,31.4,31.4,1.0,1673.8,1918.6,0.871507
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,31.4,31.4,1.0,1643.0,1918.6,0.856911
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,30.8,31.4,0.984615,1620.8,1918.6,0.846207
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,31.4,31.4,1.0,1557.4,1918.6,0.813508
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,31.4,31.4,1.0,1553.4,1918.6,0.810718
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,31.2,31.4,0.994872,1603.8,1918.6,0.836936
linear svm (L),0.616418,0.467204,0.898474,0.609567,0.69472,26.6,31.4,0.835733,794.0,1918.6,0.412356
non-linear svm (L),0.708955,0.546798,0.749184,0.621242,0.809149,30.2,31.4,0.964305,827.4,1918.6,0.430133
logistic regression (L),0.732836,0.801645,0.249993,0.376407,0.832998,31.0,31.4,0.9875,940.4,1918.6,0.486252


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,14.6,15.2,0.971429,580.2,739.6,0.780355
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,15.2,15.2,1.0,607.4,739.6,0.815294
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,15.2,15.2,1.0,592.2,739.6,0.796663
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,14.6,15.2,0.971429,581.2,739.6,0.782273
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,15.2,15.2,1.0,550.4,739.6,0.740708
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,15.2,15.2,1.0,548.2,739.6,0.736835
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,15.0,15.2,0.990476,573.2,739.6,0.771385
linear svm (L),0.540299,0.429574,0.868124,0.565006,0.654651,11.2,15.2,0.712711,268.6,739.6,0.354002
non-linear svm (L),0.732836,0.579031,0.697701,0.626487,0.815141,13.8,15.2,0.912711,362.8,739.6,0.490148
logistic regression (L),0.728358,0.828788,0.212169,0.335828,0.833443,14.6,15.2,0.957949,471.8,739.6,0.650522


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,4.2,4.8,0.925,98.8,175.8,0.531184
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,4.8,4.8,1.0,111.6,175.8,0.592676
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,4.8,4.8,1.0,103.8,175.8,0.555287
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,4.2,4.8,0.925,96.8,175.8,0.527237
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,4.8,4.8,1.0,94.0,175.8,0.515464
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,4.8,4.8,1.0,88.8,175.8,0.486147
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,4.6,4.8,0.975,100.8,175.8,0.543868
linear svm (L),0.389552,0.330053,0.862182,0.475233,0.479372,3.0,4.8,0.533333,38.4,175.8,0.147824
non-linear svm (L),0.707463,0.537475,0.79283,0.634478,0.812482,3.4,4.8,0.708333,58.2,175.8,0.313441
logistic regression (L),0.761194,0.763833,0.389945,0.514601,0.83093,4.8,4.8,1.0,122.0,175.8,0.662662


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,1.2,1.8,0.85,9.0,44.6,0.164254
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,1.8,1.8,1.0,14.2,44.6,0.280367
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,1.8,1.8,1.0,11.4,44.6,0.225741
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,1.2,1.8,0.85,8.6,44.6,0.156725
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,1.8,1.8,1.0,12.6,44.6,0.262864
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,1.8,1.8,1.0,10.0,44.6,0.237624
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,1.6,1.8,0.95,11.6,44.6,0.243154
linear svm (L),0.325373,0.325373,1.0,0.489178,0.250784,0.0,1.8,0.0,0.0,44.6,0.0
non-linear svm (L),0.720896,0.550099,0.781269,0.644552,0.821473,0.4,1.8,0.15,3.6,44.6,0.050752
logistic regression (L),0.765672,0.76142,0.419501,0.537892,0.828,1.8,1.8,1.0,16.2,44.6,0.33439


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,0.8,1.2,0.8,2.6,29.2,0.090093
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,1.2,1.2,1.0,6.0,29.2,0.204658
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,1.2,1.2,1.0,4.6,29.2,0.157323
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,0.8,1.2,0.8,2.0,29.2,0.069582
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,1.2,1.2,1.0,6.2,29.2,0.213997
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,1.2,1.2,1.0,5.2,29.2,0.18568
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,1.0,1.2,0.9,5.2,29.2,0.18141
linear svm (L),0.462687,0.399772,0.897431,0.53234,0.464363,0.4,1.2,0.4,0.8,29.2,0.030484
non-linear svm (L),0.702985,0.544513,0.746173,0.617383,0.825117,0.0,1.2,0.0,0.0,29.2,0.0
logistic regression (L),0.777612,0.756847,0.481489,0.585173,0.825971,1.2,1.2,1.0,5.8,29.2,0.200582


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,0.8,1.2,0.8,2.6,29.2,0.090093
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,1.2,1.2,1.0,6.0,29.2,0.204658
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,1.2,1.2,1.0,4.6,29.2,0.157323
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815651,0.8,1.2,0.8,2.0,29.2,0.069582
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,1.2,1.2,1.0,6.2,29.2,0.213997
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,1.2,1.2,1.0,5.2,29.2,0.18568
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,1.0,1.2,0.9,5.2,29.2,0.18141
linear svm (L),0.434328,0.386968,0.927563,0.524839,0.607319,0.2,1.2,0.2,1.2,29.2,0.046154
non-linear svm (L),0.704478,0.53671,0.768934,0.624831,0.8266,0.0,1.2,0.0,0.0,29.2,0.0
logistic regression (L),0.765672,0.709353,0.49182,0.576199,0.825963,1.2,1.2,1.0,5.2,29.2,0.178531


## 実験結果の保存（クロスバリデーション）

In [18]:
import os

for version, (dfs, info) in enumerate(zip(result_dfs, infos)):
    
    for nth_fold, df in enumerate(dfs):

        df['nth_fold'] = nth_fold
        df['n_splits'] = 5
        df['C1'] = 10
        df['C2'] = 10
        df['n_unsupervised'] = 15
        df['rule_thr'] = info

        # フォルダーを作成する
        folder_path = os.path.join(project_dir_path, f'experiment_result/results_organized/experiment_15/fold_{nth_fold}')
        os.makedirs(folder_path, exist_ok=True)

        file_path = f'{folder_path}/ver_{version}.csv'
        print(file_path)

        df.to_csv(file_path)


/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_0/ver_0.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_1/ver_0.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_2/ver_0.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_3/ver_0.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_4/ver_0.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_0/ver_1.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_1/ver_1.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_2/ver_1.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/fold_3/ver_1.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_o

## 実験結果の保存（クロスバリデーション，平均と標準偏差）

In [4]:
for version, (info, df) in enumerate(zip(infos, dfs_mean)):
    df['C1'] = 10
    df['C2'] = 10
    df['n_unsupervised'] = 15
    df['rule_thr'] = info

    file_path = os.path.join(project_dir_path, f'experiment_result/results_organized/experiment_15/ver_{version}_mean.csv')
    print(file_path)
    df.to_csv(file_path)

for version, (info, df) in enumerate(zip(infos, dfs_std)):
    df['C1'] = 10
    df['C2'] = 10
    df['n_unsupervised'] = 15
    df['rule_thr'] = info

    file_path = os.path.join(project_dir_path, f'experiment_result/results_organized/experiment_15/ver_{version}_std.csv')
    print(file_path)

    df.to_csv(file_path)



/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_0_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_1_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_2_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_3_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_4_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_5_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_6_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_7_mean.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment_15/ver_0_std.csv
/Users/keisukeonoue/ws/lukasiewicz_2/experiment_result/results_organized/experiment

## ルールの整形と保存（csv）

In [46]:
import ast

for nth_fold in range(5):
    for version in range(len(infos)):
        dir_path = os.path.join(project_dir_path, f"experiment_result/tmp/version_15{version + 1}")
        file_path_1 = f"{dir_path}/rules/rules_{nth_fold}_original.csv"
        file_path_3 = f"{dir_path}/result.json"

        with open(file_path_3, 'r') as f:
            result = json.load(f)
        rule_result = result['result'][f'fold_{nth_fold}']['rule_based_prediction']['violation_detail']
        df_rule_result = pd.DataFrame(rule_result).T

        df = pd.read_csv(file_path_1, index_col=0)

        tmp = []
        for _, row in df.iterrows():

            row['antecedents_length'] = len(eval(row['antecedents']))
            row['antecedents'] = " ⊗ ".join(eval(row['antecedents']))
            conseq_tmp = eval(row['consequents'])
            # for item in conseq_tmp:
            #     if row['lift'] >= 1:
            #         row['consequents'] = item
            #     else:
            #         row['consequents'] = "¬ " + item 
            #     tmp.append(row.copy())
            for item in conseq_tmp:
                row['consequents'] = item
                tmp.append(row.copy())

        df = pd.concat(tmp, axis=1, ignore_index=True).T
        df = df[['antecedents', 'consequents', 'support', 'confidence', 'lift', 'antecedents_length']]
        df['n_violation'] = None
        df['n_violation (instance)'] = None
        df['n_evaluation (instance)'] = None
        
        for i, row in df[df['consequents'] == 'Outcome'].iterrows():
            row['n_violation (instance)'] = df_rule_result[1].to_list()

        print()
        print()
        print(len(df[df['consequents'] == 'Outcome']))
        print(len(df_rule_result))
        print()
        print()

        df.loc[df['consequents'] == 'Outcome', 'n_violation (instance)'] = df_rule_result[1].to_list()
        df.loc[df['consequents'] == 'Outcome', 'n_evaluation (instance)'] = df_rule_result[0].to_list()
        df.loc[df['consequents'] == 'Outcome', 'n_violation'] = df['n_violation (instance)'].apply(lambda x: 0 if x == 0 else 1)

        df['consequents'] = df['consequents'].where(df['lift'] >= 1, "¬ " + item)

   
        file_path = os.path.join(project_dir_path, f'experiment_result/results_organized/experiment_15/fold_{nth_fold}/rules_{version}.csv')
        df.to_csv(file_path)




48
48




48
48




32
32




15
15




3
3




1
1




1
1




1
1




44
44




44
44




28
28




13
13




4
4




2
2




1
1




1
1




39
39




39
39




25
25




12
12




3
3




1
1




1
1




1
1




51
51




51
51




39
39




21
21




8
8




4
4




2
2




2
2




48
48




48
48




33
33




15
15




6
6




1
1




1
1




1
1


