In [1]:
import json
import os

import sys
project_dir_path = "/Users/keisukeonoue/ws/lukasiewicz_2"
sys.path.append(project_dir_path)

import pandas as pd
import numpy as np

version_nums = [201]

# リスト内包表記を使用して複数のファイルパスを生成する
result_file_paths = [
    os.path.join(project_dir_path, f"experiment_result/tmp/version_{version_num}/result.json")
    for version_num in version_nums
]


result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)
    
    infos.append(json_data['rule_thr'])
    
    tmp = []

    for fold, _ in json_data['result'].items():
        result_df = pd.DataFrame(json_data['result'][fold]).T.drop(['violation_detail'], axis=1)
        tmp.append(result_df)

    result_dfs.append(tmp)

model_name_dict = {
    'linear svm': 'l-SVM', 
    'non-linear svm': 'r-SVM', 
    'logistic regression': 'RogReg', 
    'RuleFit Classifier (disc)': 'RuleFit-d', 
    'RuleFit Classifier (conti)': 'RuleFit', 
    'tree generator (disc)': 'RF-d', 
    'tree generator (conti)': 'RF', 
    'linear svm (L)': 'l-SVM-p', 
    'non-linear svm (L)': 'r-SVM-p', 
    'logistic regression (L)': 'LogReg-p', 
    'rule_based_prediction': 'RuleBase'
}

model_name_list = list(model_name_dict.keys())

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean().reindex(index=model_name_list) for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std().reindex(index=model_name_list) for combined_df in combined_dfs]

dfs_mean = [df.set_index(pd.Index(model_name_dict.values())) for df in dfs_mean]
dfs_std = [df.set_index(pd.Index(model_name_dict.values())) for df in dfs_std]


for df in dfs_mean:
    display(df)

for df in dfs_std:
    display(df)


df_mean = dfs_mean[0]
df_std = dfs_std[0]
result_df = df_mean.astype(float).round(3).astype(str) + ' ± ' + df_std.astype(float).round(2).astype(str)
result_df['compliance_rate'] = (1 - df_mean['violation_rate']).astype(float).round(3).astype(str) + ' ± ' + df_std['violation_rate'].astype(float).round(2).astype(str)
result_df['compliance_rate (instance)'] = (1 - df_mean['violation_rate (instance)']).astype(float).round(3).astype(str) + ' ± ' + df_std['violation_rate (instance)'].astype(float).round(2).astype(str)
display(result_df)

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
l-SVM,0.779104,0.711436,0.551175,0.618693,0.837805,31.2,35.6,0.878345,513.8,583.8,0.879052
r-SVM,0.780597,0.75907,0.479714,0.587201,0.838064,32.2,35.6,0.903858,521.6,583.8,0.892684
RogReg,0.78209,0.731143,0.530165,0.612464,0.840573,31.4,35.6,0.883473,516.4,583.8,0.883681
RuleFit-d,0.783582,0.742431,0.521271,0.610252,0.815699,28.8,35.6,0.801738,510.4,583.8,0.872475
RuleFit,0.732836,0.601979,0.553471,0.571908,0.795116,31.2,35.6,0.878702,512.2,583.8,0.87665
RF-d,0.741791,0.627843,0.557706,0.582608,0.772322,26.6,35.6,0.741115,501.2,583.8,0.856557
RF,0.78209,0.715158,0.568106,0.62746,0.8326,31.2,35.6,0.877264,516.0,583.8,0.882925
l-SVM-p,0.389552,0.361431,0.93173,0.504187,0.33561,23.2,35.6,0.652447,412.8,583.8,0.693728
r-SVM-p,0.71791,0.547749,0.761836,0.635842,0.820711,23.4,35.6,0.657063,151.0,583.8,0.258969
LogReg-p,0.770149,0.732412,0.470891,0.570208,0.826615,19.4,35.6,0.55034,94.6,583.8,0.162455


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance)
l-SVM,0.053711,0.04834,0.08646,0.061671,0.045376,3.271085,4.393177,0.030232,63.6726,63.566501,0.014256
r-SVM,0.046485,0.05776,0.048936,0.0502,0.048697,4.32435,4.393177,0.029686,62.320141,63.566501,0.010717
RogReg,0.045821,0.058217,0.063637,0.047569,0.047432,3.435113,4.393177,0.025154,62.71204,63.566501,0.011969
RuleFit-d,0.044776,0.048551,0.064326,0.044856,0.050857,6.058052,4.393177,0.089186,68.339593,63.566501,0.021341
RuleFit,0.040326,0.076503,0.064527,0.037194,0.032516,3.701351,4.393177,0.063186,61.054893,63.566501,0.011024
RF-d,0.04995,0.094845,0.077244,0.037668,0.049073,5.59464,4.393177,0.077494,68.754636,63.566501,0.024663
RF,0.037089,0.071703,0.076514,0.029962,0.039969,3.898718,4.393177,0.046222,63.863135,63.566501,0.01664
l-SVM-p,0.172483,0.11088,0.129725,0.081675,0.25796,3.768289,4.393177,0.071983,166.346626,63.566501,0.2473
r-SVM-p,0.050998,0.022689,0.097161,0.043228,0.046104,5.128353,4.393177,0.116543,26.78619,63.566501,0.038207
LogReg-p,0.047609,0.061147,0.07102,0.056079,0.050752,3.781534,4.393177,0.115921,23.786551,63.566501,0.037951


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance),compliance_rate,compliance_rate (instance)
l-SVM,0.779 ± 0.05,0.711 ± 0.05,0.551 ± 0.09,0.619 ± 0.06,0.838 ± 0.05,31.2 ± 3.27,35.6 ± 4.39,0.878 ± 0.03,513.8 ± 63.67,583.8 ± 63.57,0.879 ± 0.01,0.122 ± 0.03,0.121 ± 0.01
r-SVM,0.781 ± 0.05,0.759 ± 0.06,0.48 ± 0.05,0.587 ± 0.05,0.838 ± 0.05,32.2 ± 4.32,35.6 ± 4.39,0.904 ± 0.03,521.6 ± 62.32,583.8 ± 63.57,0.893 ± 0.01,0.096 ± 0.03,0.107 ± 0.01
RogReg,0.782 ± 0.05,0.731 ± 0.06,0.53 ± 0.06,0.612 ± 0.05,0.841 ± 0.05,31.4 ± 3.44,35.6 ± 4.39,0.883 ± 0.03,516.4 ± 62.71,583.8 ± 63.57,0.884 ± 0.01,0.117 ± 0.03,0.116 ± 0.01
RuleFit-d,0.784 ± 0.04,0.742 ± 0.05,0.521 ± 0.06,0.61 ± 0.04,0.816 ± 0.05,28.8 ± 6.06,35.6 ± 4.39,0.802 ± 0.09,510.4 ± 68.34,583.8 ± 63.57,0.872 ± 0.02,0.198 ± 0.09,0.128 ± 0.02
RuleFit,0.733 ± 0.04,0.602 ± 0.08,0.553 ± 0.06,0.572 ± 0.04,0.795 ± 0.03,31.2 ± 3.7,35.6 ± 4.39,0.879 ± 0.06,512.2 ± 61.05,583.8 ± 63.57,0.877 ± 0.01,0.121 ± 0.06,0.123 ± 0.01
RF-d,0.742 ± 0.05,0.628 ± 0.09,0.558 ± 0.08,0.583 ± 0.04,0.772 ± 0.05,26.6 ± 5.59,35.6 ± 4.39,0.741 ± 0.08,501.2 ± 68.75,583.8 ± 63.57,0.857 ± 0.02,0.259 ± 0.08,0.143 ± 0.02
RF,0.782 ± 0.04,0.715 ± 0.07,0.568 ± 0.08,0.627 ± 0.03,0.833 ± 0.04,31.2 ± 3.9,35.6 ± 4.39,0.877 ± 0.05,516.0 ± 63.86,583.8 ± 63.57,0.883 ± 0.02,0.123 ± 0.05,0.117 ± 0.02
l-SVM-p,0.39 ± 0.17,0.361 ± 0.11,0.932 ± 0.13,0.504 ± 0.08,0.336 ± 0.26,23.2 ± 3.77,35.6 ± 4.39,0.652 ± 0.07,412.8 ± 166.35,583.8 ± 63.57,0.694 ± 0.25,0.348 ± 0.07,0.306 ± 0.25
r-SVM-p,0.718 ± 0.05,0.548 ± 0.02,0.762 ± 0.1,0.636 ± 0.04,0.821 ± 0.05,23.4 ± 5.13,35.6 ± 4.39,0.657 ± 0.12,151.0 ± 26.79,583.8 ± 63.57,0.259 ± 0.04,0.343 ± 0.12,0.741 ± 0.04
LogReg-p,0.77 ± 0.05,0.732 ± 0.06,0.471 ± 0.07,0.57 ± 0.06,0.827 ± 0.05,19.4 ± 3.78,35.6 ± 4.39,0.55 ± 0.12,94.6 ± 23.79,583.8 ± 63.57,0.162 ± 0.04,0.45 ± 0.12,0.838 ± 0.04


In [14]:
target_column_list = ['ROCAUC', '充足率（制約単位）', '充足率（インスタンス単位）']
target_index_list = ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase']

df_1 = df_mean.copy()
df_1['ROCAUC'] = df_1['auc']
df_1['充足率（制約単位）'] = 1 - df_1['violation_rate']
df_1['充足率（インスタンス単位）'] = 1 - df_1['violation_rate (instance)']
df_1 = df_1[target_column_list]
df_1 = df_1.T[target_index_list].T
df_1

Unnamed: 0,ROCAUC,充足率（制約単位）,充足率（インスタンス単位）
l-SVM,0.837805,0.121655,0.120948
r-SVM,0.838064,0.096142,0.107316
RogReg,0.840573,0.116527,0.116319
RuleFit,0.795116,0.121298,0.12335
RF,0.8326,0.122736,0.117075
RuleFit-d,0.815699,0.198262,0.127525
RF-d,0.772322,0.258885,0.143443
l-SVM-p,0.33561,0.347553,0.306272
r-SVM-p,0.820711,0.342937,0.741031
LogReg-p,0.826615,0.44966,0.837545


In [16]:
print(df_1)

             ROCAUC 充足率（制約単位） 充足率（インスタンス単位）
l-SVM      0.837805  0.121655      0.120948
r-SVM      0.838064  0.096142      0.107316
RogReg     0.840573  0.116527      0.116319
RuleFit    0.795116  0.121298       0.12335
RF           0.8326  0.122736      0.117075
RuleFit-d  0.815699  0.198262      0.127525
RF-d       0.772322  0.258885      0.143443
l-SVM-p     0.33561  0.347553      0.306272
r-SVM-p    0.820711  0.342937      0.741031
LogReg-p   0.826615   0.44966      0.837545
RuleBase        0.5  0.412036      0.200046


In [15]:
target_column_list = ['ROCAUC', '充足率（制約単位）', '充足率（インスタンス単位）']
target_index_list = ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase']

df_2 = df_std.copy()
df_2['ROCAUC'] = df_2['auc']
df_2['充足率（制約単位）'] = df_2['violation_rate']
df_2['充足率（インスタンス単位）'] = df_2['violation_rate (instance)']
df_2 = df_2[target_column_list]
df_2 = df_2.T[target_index_list].T
df_2

Unnamed: 0,ROCAUC,充足率（制約単位）,充足率（インスタンス単位）
l-SVM,0.045376,0.030232,0.014256
r-SVM,0.048697,0.029686,0.010717
RogReg,0.047432,0.025154,0.011969
RuleFit,0.032516,0.063186,0.011024
RF,0.039969,0.046222,0.01664
RuleFit-d,0.050857,0.089186,0.021341
RF-d,0.049073,0.077494,0.024663
l-SVM-p,0.25796,0.071983,0.2473
r-SVM-p,0.046104,0.116543,0.038207
LogReg-p,0.050752,0.115921,0.037951


In [None]:
import pandas as pd 
import plotly.graph_objects as go



In [19]:
import pandas as pd
import plotly.graph_objects as go

# 平均値データフレーム
df_mean = pd.DataFrame({
    'Model': ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase'],
    'ROCAUC': [0.837805, 0.838064, 0.840573, 0.795116, 0.8326, 0.815699, 0.772322, 0.33561, 0.820711, 0.826615, 0.5],
    '充足率（制約単位）': [0.121655, 0.096142, 0.116527, 0.121298, 0.122736, 0.198262, 0.258885, 0.347553, 0.342937, 0.44966, 0.412036],
    '充足率（インスタンス単位）': [0.120948, 0.107316, 0.116319, 0.12335, 0.117075, 0.127525, 0.143443, 0.306272, 0.741031, 0.837545, 0.200046]
})

# 標準偏差データフレーム
df_std = pd.DataFrame({
    'Model': ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase'],
    'ROCAUC': [0.045376, 0.048697, 0.047432, 0.032516, 0.039969, 0.050857, 0.049073, 0.257960, 0.046104, 0.050752, 0.000000],
    '充足率（制約単位）': [0.030232, 0.029686, 0.025154, 0.063186, 0.046222, 0.089186, 0.077494, 0.071983, 0.116543, 0.115921, 0.037669],
    '充足率（インスタンス単位）': [0.014256, 0.010717, 0.011969, 0.011024, 0.016640, 0.021341, 0.024663, 0.247300, 0.038207, 0.037951, 0.034524]
})

# データフレームを整形
df_mean_melt = df_mean.melt(id_vars='Model', var_name='Metric', value_name='Mean')
df_std_melt = df_std.melt(id_vars='Model', var_name='Metric', value_name='Std')

# データフレームをマージ
df = pd.merge(df_mean_melt, df_std_melt,  how='left', left_on=['Model','Metric'], right_on = ['Model','Metric'])

# プロット作成
fig = go.Figure()

for model in df['Model'].unique():
    df_sub = df[df['Model'] == model]
    fig.add_trace(go.Bar(
        name=model,
        x=df_sub['Metric'],
        y=df_sub['Mean'],
        error_y=dict(type='data', array=df_sub['Std'])
    ))

fig.update_layout(barmode='group', xaxis_title='Metric', yaxis_title='Value', title='Evaluation Metrics with Error Bars')
fig.show()

In [46]:
import pandas as pd
import plotly.graph_objects as go

# 平均値データフレーム
df_mean = pd.DataFrame({
    'Model': ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase'],
    'ROCAUC': [0.837805, 0.838064, 0.840573, 0.795116, 0.8326, 0.815699, 0.772322, 0.33561, 0.820711, 0.826615, 0.5],
    '充足率（制約単位）': [0.121655, 0.096142, 0.116527, 0.121298, 0.122736, 0.198262, 0.258885, 0.347553, 0.342937, 0.44966, 0.412036],
    '充足率（インスタンス単位）': [0.120948, 0.107316, 0.116319, 0.12335, 0.117075, 0.127525, 0.143443, 0.306272, 0.741031, 0.837545, 0.200046]
})

# 標準偏差データフレーム
df_std = pd.DataFrame({
    'Model': ['l-SVM', 'r-SVM', 'RogReg', 'RuleFit', 'RF', 'RuleFit-d', 'RF-d', 'l-SVM-p', 'r-SVM-p', 'LogReg-p', 'RuleBase'],
    'ROCAUC': [0.045376, 0.048697, 0.047432, 0.032516, 0.039969, 0.050857, 0.049073, 0.257960, 0.046104, 0.050752, 0.000000],
    '充足率（制約単位）': [0.030232, 0.029686, 0.025154, 0.063186, 0.046222, 0.089186, 0.077494, 0.071983, 0.116543, 0.115921, 0.037669],
    '充足率（インスタンス単位）': [0.014256, 0.010717, 0.011969, 0.011024, 0.016640, 0.021341, 0.024663, 0.247300, 0.038207, 0.037951, 0.034524]
})

# データフレームを整形
df_mean_melt = df_mean.melt(id_vars='Model', var_name='Metric', value_name='Mean')
df_std_melt = df_std.melt(id_vars='Model', var_name='Metric', value_name='Std')

# データフレームをマージ
df = pd.merge(df_mean_melt, df_std_melt,  how='left', left_on=['Model','Metric'], right_on = ['Model','Metric'])

# プロット作成
fig = go.Figure()

colors = {'ROCAUC': 'teal', '充足率（制約単位）': 'coral', '充足率（インスタンス単位）': 'gold'}

for metric in df['Metric'].unique():
    df_sub = df[df['Metric'] == metric]
    fig.add_trace(go.Bar(
        name=metric,
        x=df_sub['Model'],
        y=df_sub['Mean'],
        error_y=dict(type='data', array=df_sub['Std']),
        marker_color=colors[metric]
    ))

fig.update_layout(
    barmode='group', 
    autosize=False,
    width=700,
    height=500,
    legend=dict(
        yanchor="top",
        y=1.3,  # y=1.1はグラフの上部にレジェンドを配置します
        xanchor="left",
        x=0.5,  # x=0.5はレジェンドを中央に配置します
        orientation="v",  # orientation="h"はレジェンドを水平に配置します
        font=dict(size=16),  # レジェンドの文字サイズを14に設定します
    )
    )

fig.update_xaxes(
    tickangle=75,
    tickfont=dict(size=16),  # x軸のティックの文字サイズを14に設定します。
)

fig.update_yaxes(
    tickfont=dict(size=16),  # y軸のティックの文字サイズを14に設定します。
)
fig.show()

In [2]:
print('hello')
display(result_df)

hello


Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate,n_violation (instance),n_evaluation (instance),violation_rate (instance),compliance_rate,compliance_rate (instance)
l-SVM,0.779 ± 0.05,0.711 ± 0.05,0.551 ± 0.09,0.619 ± 0.06,0.838 ± 0.05,31.2 ± 3.27,35.6 ± 4.39,0.878 ± 0.03,513.8 ± 63.67,583.8 ± 63.57,0.879 ± 0.01,0.122 ± 0.03,0.121 ± 0.01
r-SVM,0.781 ± 0.05,0.759 ± 0.06,0.48 ± 0.05,0.587 ± 0.05,0.838 ± 0.05,32.2 ± 4.32,35.6 ± 4.39,0.904 ± 0.03,521.6 ± 62.32,583.8 ± 63.57,0.893 ± 0.01,0.096 ± 0.03,0.107 ± 0.01
RogReg,0.782 ± 0.05,0.731 ± 0.06,0.53 ± 0.06,0.612 ± 0.05,0.841 ± 0.05,31.4 ± 3.44,35.6 ± 4.39,0.883 ± 0.03,516.4 ± 62.71,583.8 ± 63.57,0.884 ± 0.01,0.117 ± 0.03,0.116 ± 0.01
RuleFit-d,0.784 ± 0.04,0.742 ± 0.05,0.521 ± 0.06,0.61 ± 0.04,0.816 ± 0.05,28.8 ± 6.06,35.6 ± 4.39,0.802 ± 0.09,510.4 ± 68.34,583.8 ± 63.57,0.872 ± 0.02,0.198 ± 0.09,0.128 ± 0.02
RuleFit,0.733 ± 0.04,0.602 ± 0.08,0.553 ± 0.06,0.572 ± 0.04,0.795 ± 0.03,31.2 ± 3.7,35.6 ± 4.39,0.879 ± 0.06,512.2 ± 61.05,583.8 ± 63.57,0.877 ± 0.01,0.121 ± 0.06,0.123 ± 0.01
RF-d,0.742 ± 0.05,0.628 ± 0.09,0.558 ± 0.08,0.583 ± 0.04,0.772 ± 0.05,26.6 ± 5.59,35.6 ± 4.39,0.741 ± 0.08,501.2 ± 68.75,583.8 ± 63.57,0.857 ± 0.02,0.259 ± 0.08,0.143 ± 0.02
RF,0.782 ± 0.04,0.715 ± 0.07,0.568 ± 0.08,0.627 ± 0.03,0.833 ± 0.04,31.2 ± 3.9,35.6 ± 4.39,0.877 ± 0.05,516.0 ± 63.86,583.8 ± 63.57,0.883 ± 0.02,0.123 ± 0.05,0.117 ± 0.02
l-SVM-p,0.39 ± 0.17,0.361 ± 0.11,0.932 ± 0.13,0.504 ± 0.08,0.336 ± 0.26,23.2 ± 3.77,35.6 ± 4.39,0.652 ± 0.07,412.8 ± 166.35,583.8 ± 63.57,0.694 ± 0.25,0.348 ± 0.07,0.306 ± 0.25
r-SVM-p,0.718 ± 0.05,0.548 ± 0.02,0.762 ± 0.1,0.636 ± 0.04,0.821 ± 0.05,23.4 ± 5.13,35.6 ± 4.39,0.657 ± 0.12,151.0 ± 26.79,583.8 ± 63.57,0.259 ± 0.04,0.343 ± 0.12,0.741 ± 0.04
LogReg-p,0.77 ± 0.05,0.732 ± 0.06,0.471 ± 0.07,0.57 ± 0.06,0.827 ± 0.05,19.4 ± 3.78,35.6 ± 4.39,0.55 ± 0.12,94.6 ± 23.79,583.8 ± 63.57,0.162 ± 0.04,0.45 ± 0.12,0.838 ± 0.04


In [None]:
import os
# 実験によってパラメータが違うので注意
######################################
######################################
######################################
######################################


for version, (dfs, info) in enumerate(zip(result_dfs, infos)):
    
    for nth_fold, df in enumerate(dfs):

        df['nth_fold'] = nth_fold
        df['n_splits'] = 5
        df['C1'] = 10
        df['C2'] = 10
        df['n_unsupervised'] = info
        df['rule_thr'] = 0

        # フォルダーを作成する
        ########################################
        ########################################
        ########################################
        ########################################
        ########################################
        folder_path = os.path.join(project_dir_path, f'experiment_result/results_organized/experiment_{expt_num}/fold_{nth_fold}')
        os.makedirs(folder_path, exist_ok=True)

        file_path = f'{folder_path}/ver_{version}.csv'
        print(file_path)

        df.to_csv(file_path)


In [6]:
df_mean.to_csv("result_mean.csv")

In [7]:
df_std.to_csv("result_std.csv")

# 1

In [1]:
import json
import pandas as pd

In [2]:
result_file_path = "./experiments/version_10/result.json"

with open(result_file_path, 'r') as f:
    json_data = json.load(f)

result_dfs = []

for fold, _ in json_data["result"].items():
    result_df = pd.DataFrame(json_data['result'][fold]).T
    result_dfs.append(result_df)

In [3]:
original_idx_order = result_dfs[0].index
original_idx_order

Index(['RuleFit Classifier (disc)', 'tree generator (disc)',
       'RuleFit Classifier (conti)', 'tree generator (conti)',
       'linear svm (L)', 'non-linear svm (L)', 'logistic regression (L)',
       'linear svm', 'non-linear svm', 'logistic regression'],
      dtype='object')

In [4]:
combined_df = pd.concat(result_dfs, ignore_index=False)

In [5]:
result_df_mean = combined_df.groupby(combined_df.index).mean()
result_df_mean  = result_df_mean.reindex(original_idx_order)
result_df_mean

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate
RuleFit Classifier (disc),0.783582,0.742431,0.521271,0.610252,0.815869,28.2,34.8,0.80488
tree generator (disc),0.741791,0.627843,0.557706,0.582608,0.772322,25.8,34.8,0.738615
RuleFit Classifier (conti),0.732836,0.601979,0.553471,0.571908,0.795116,30.6,34.8,0.88115
tree generator (conti),0.78209,0.715158,0.568106,0.62746,0.8326,30.6,34.8,0.879968
linear svm (L),0.450746,0.386806,0.955416,0.540092,0.536953,21.8,34.8,0.629842
non-linear svm (L),0.716418,0.54619,0.752745,0.631315,0.822545,23.2,34.8,0.663664
logistic regression (L),0.770149,0.732412,0.470891,0.570208,0.826615,19.2,34.8,0.552303
linear svm,0.779104,0.711436,0.551175,0.618693,0.837805,30.6,34.8,0.880463
non-linear svm,0.780597,0.75907,0.479714,0.587201,0.838064,31.6,34.8,0.908449
logistic regression,0.78209,0.731143,0.530165,0.612464,0.840573,30.8,34.8,0.886178


In [6]:
result_df_std = combined_df.groupby(combined_df.index).std()
result_df_std = result_df_std.reindex(original_idx_order)

result_df_std

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate
RuleFit Classifier (disc),0.044776,0.048551,0.064326,0.044856,0.05152,5.167204,3.271085,0.08447
tree generator (disc),0.04995,0.094845,0.077244,0.037668,0.049073,4.086563,3.271085,0.068354
RuleFit Classifier (conti),0.040326,0.076503,0.064527,0.037194,0.032516,2.966479,3.271085,0.065019
tree generator (conti),0.037089,0.071703,0.076514,0.029962,0.039969,3.04959,3.271085,0.047223
linear svm (L),0.190744,0.108598,0.069723,0.104232,0.318435,0.83666,3.271085,0.048672
non-linear svm (L),0.051972,0.024709,0.108037,0.049436,0.045971,4.438468,3.271085,0.086825
logistic regression (L),0.047609,0.061147,0.07102,0.056079,0.050752,4.086563,3.271085,0.109578
linear svm,0.053711,0.04834,0.08646,0.061671,0.045376,2.50998,3.271085,0.029227
non-linear svm,0.046485,0.05776,0.048936,0.0502,0.048697,3.04959,3.271085,0.039147
logistic regression,0.045821,0.058217,0.063637,0.047569,0.047432,2.48998,3.271085,0.02616


In [7]:
print(result_df_mean)

                            accuracy  precision    recall        f1       auc  \
RuleFit Classifier (disc)   0.783582   0.742431  0.521271  0.610252  0.815869   
tree generator (disc)       0.741791   0.627843  0.557706  0.582608  0.772322   
RuleFit Classifier (conti)  0.732836   0.601979  0.553471  0.571908  0.795116   
tree generator (conti)      0.782090   0.715158  0.568106  0.627460  0.832600   
linear svm (L)              0.450746   0.386806  0.955416  0.540092  0.536953   
non-linear svm (L)          0.716418   0.546190  0.752745  0.631315  0.822545   
logistic regression (L)     0.770149   0.732412  0.470891  0.570208  0.826615   
linear svm                  0.779104   0.711436  0.551175  0.618693  0.837805   
non-linear svm              0.780597   0.759070  0.479714  0.587201  0.838064   
logistic regression         0.782090   0.731143  0.530165  0.612464  0.840573   

                            n_violation  n_rule  violation_rate  
RuleFit Classifier (disc)          28.2   

In [8]:
print(result_df_std)

                            accuracy  precision    recall        f1       auc  \
RuleFit Classifier (disc)   0.044776   0.048551  0.064326  0.044856  0.051520   
tree generator (disc)       0.049950   0.094845  0.077244  0.037668  0.049073   
RuleFit Classifier (conti)  0.040326   0.076503  0.064527  0.037194  0.032516   
tree generator (conti)      0.037089   0.071703  0.076514  0.029962  0.039969   
linear svm (L)              0.190744   0.108598  0.069723  0.104232  0.318435   
non-linear svm (L)          0.051972   0.024709  0.108037  0.049436  0.045971   
logistic regression (L)     0.047609   0.061147  0.071020  0.056079  0.050752   
linear svm                  0.053711   0.048340  0.086460  0.061671  0.045376   
non-linear svm              0.046485   0.057760  0.048936  0.050200  0.048697   
logistic regression         0.045821   0.058217  0.063637  0.047569  0.047432   

                            n_violation    n_rule  violation_rate  
RuleFit Classifier (disc)      5.167204 

In [9]:
# データフレームを結合して、指定された形式で値を表示する
df1 = result_df_mean
df2 = result_df_std

merged_df = df1.copy()
for col in df1.columns:
    merged_df[col] = df1[col].apply(lambda x: "{:.3f}".format(x)) + ' ± ' + df2[col].apply(lambda x: "{:.3f}".format(x))

print(merged_df)

                                 accuracy      precision         recall  \
RuleFit Classifier (disc)   0.784 ± 0.045  0.742 ± 0.049  0.521 ± 0.064   
tree generator (disc)       0.742 ± 0.050  0.628 ± 0.095  0.558 ± 0.077   
RuleFit Classifier (conti)  0.733 ± 0.040  0.602 ± 0.077  0.553 ± 0.065   
tree generator (conti)      0.782 ± 0.037  0.715 ± 0.072  0.568 ± 0.077   
linear svm (L)              0.451 ± 0.191  0.387 ± 0.109  0.955 ± 0.070   
non-linear svm (L)          0.716 ± 0.052  0.546 ± 0.025  0.753 ± 0.108   
logistic regression (L)     0.770 ± 0.048  0.732 ± 0.061  0.471 ± 0.071   
linear svm                  0.779 ± 0.054  0.711 ± 0.048  0.551 ± 0.086   
non-linear svm              0.781 ± 0.046  0.759 ± 0.058  0.480 ± 0.049   
logistic regression         0.782 ± 0.046  0.731 ± 0.058  0.530 ± 0.064   

                                       f1            auc     n_violation  \
RuleFit Classifier (disc)   0.610 ± 0.045  0.816 ± 0.052  28.200 ± 5.167   
tree generator (disc) 

In [10]:
merged_df

Unnamed: 0,accuracy,precision,recall,f1,auc,n_violation,n_rule,violation_rate
RuleFit Classifier (disc),0.784 ± 0.045,0.742 ± 0.049,0.521 ± 0.064,0.610 ± 0.045,0.816 ± 0.052,28.200 ± 5.167,34.800 ± 3.271,0.805 ± 0.084
tree generator (disc),0.742 ± 0.050,0.628 ± 0.095,0.558 ± 0.077,0.583 ± 0.038,0.772 ± 0.049,25.800 ± 4.087,34.800 ± 3.271,0.739 ± 0.068
RuleFit Classifier (conti),0.733 ± 0.040,0.602 ± 0.077,0.553 ± 0.065,0.572 ± 0.037,0.795 ± 0.033,30.600 ± 2.966,34.800 ± 3.271,0.881 ± 0.065
tree generator (conti),0.782 ± 0.037,0.715 ± 0.072,0.568 ± 0.077,0.627 ± 0.030,0.833 ± 0.040,30.600 ± 3.050,34.800 ± 3.271,0.880 ± 0.047
linear svm (L),0.451 ± 0.191,0.387 ± 0.109,0.955 ± 0.070,0.540 ± 0.104,0.537 ± 0.318,21.800 ± 0.837,34.800 ± 3.271,0.630 ± 0.049
non-linear svm (L),0.716 ± 0.052,0.546 ± 0.025,0.753 ± 0.108,0.631 ± 0.049,0.823 ± 0.046,23.200 ± 4.438,34.800 ± 3.271,0.664 ± 0.087
logistic regression (L),0.770 ± 0.048,0.732 ± 0.061,0.471 ± 0.071,0.570 ± 0.056,0.827 ± 0.051,19.200 ± 4.087,34.800 ± 3.271,0.552 ± 0.110
linear svm,0.779 ± 0.054,0.711 ± 0.048,0.551 ± 0.086,0.619 ± 0.062,0.838 ± 0.045,30.600 ± 2.510,34.800 ± 3.271,0.880 ± 0.029
non-linear svm,0.781 ± 0.046,0.759 ± 0.058,0.480 ± 0.049,0.587 ± 0.050,0.838 ± 0.049,31.600 ± 3.050,34.800 ± 3.271,0.908 ± 0.039
logistic regression,0.782 ± 0.046,0.731 ± 0.058,0.530 ± 0.064,0.612 ± 0.048,0.841 ± 0.047,30.800 ± 2.490,34.800 ± 3.271,0.886 ± 0.026


In [11]:
filtered_df = merged_df.loc[:, ['auc', 'n_violation', 'n_rule', 'violation_rate']]
filtered_df

Unnamed: 0,auc,n_violation,n_rule,violation_rate
RuleFit Classifier (disc),0.816 ± 0.052,28.200 ± 5.167,34.800 ± 3.271,0.805 ± 0.084
tree generator (disc),0.772 ± 0.049,25.800 ± 4.087,34.800 ± 3.271,0.739 ± 0.068
RuleFit Classifier (conti),0.795 ± 0.033,30.600 ± 2.966,34.800 ± 3.271,0.881 ± 0.065
tree generator (conti),0.833 ± 0.040,30.600 ± 3.050,34.800 ± 3.271,0.880 ± 0.047
linear svm (L),0.537 ± 0.318,21.800 ± 0.837,34.800 ± 3.271,0.630 ± 0.049
non-linear svm (L),0.823 ± 0.046,23.200 ± 4.438,34.800 ± 3.271,0.664 ± 0.087
logistic regression (L),0.827 ± 0.051,19.200 ± 4.087,34.800 ± 3.271,0.552 ± 0.110
linear svm,0.838 ± 0.045,30.600 ± 2.510,34.800 ± 3.271,0.880 ± 0.029
non-linear svm,0.838 ± 0.049,31.600 ± 3.050,34.800 ± 3.271,0.908 ± 0.039
logistic regression,0.841 ± 0.047,30.800 ± 2.490,34.800 ± 3.271,0.886 ± 0.026


In [12]:
print(filtered_df)

                                      auc     n_violation          n_rule  \
RuleFit Classifier (disc)   0.816 ± 0.052  28.200 ± 5.167  34.800 ± 3.271   
tree generator (disc)       0.772 ± 0.049  25.800 ± 4.087  34.800 ± 3.271   
RuleFit Classifier (conti)  0.795 ± 0.033  30.600 ± 2.966  34.800 ± 3.271   
tree generator (conti)      0.833 ± 0.040  30.600 ± 3.050  34.800 ± 3.271   
linear svm (L)              0.537 ± 0.318  21.800 ± 0.837  34.800 ± 3.271   
non-linear svm (L)          0.823 ± 0.046  23.200 ± 4.438  34.800 ± 3.271   
logistic regression (L)     0.827 ± 0.051  19.200 ± 4.087  34.800 ± 3.271   
linear svm                  0.838 ± 0.045  30.600 ± 2.510  34.800 ± 3.271   
non-linear svm              0.838 ± 0.049  31.600 ± 3.050  34.800 ± 3.271   
logistic regression         0.841 ± 0.047  30.800 ± 2.490  34.800 ± 3.271   

                           violation_rate  
RuleFit Classifier (disc)   0.805 ± 0.084  
tree generator (disc)       0.739 ± 0.068  
RuleFit Classifier (

In [13]:
model_names = [
    'linear svm',
    'non-linear svm',
    'logistic regression',
    'RuleFit Classifier (disc)',
    'tree generator (disc)',
    'RuleFit Classifier (conti)',
    'tree generator (conti)',
    'linear svm (L)',
    'non-linear svm (L)',
    'logistic regression (L)',
]

In [14]:
filtered_df = filtered_df.reindex(model_names)
filtered_df

Unnamed: 0,auc,n_violation,n_rule,violation_rate
linear svm,0.838 ± 0.045,30.600 ± 2.510,34.800 ± 3.271,0.880 ± 0.029
non-linear svm,0.838 ± 0.049,31.600 ± 3.050,34.800 ± 3.271,0.908 ± 0.039
logistic regression,0.841 ± 0.047,30.800 ± 2.490,34.800 ± 3.271,0.886 ± 0.026
RuleFit Classifier (disc),0.816 ± 0.052,28.200 ± 5.167,34.800 ± 3.271,0.805 ± 0.084
tree generator (disc),0.772 ± 0.049,25.800 ± 4.087,34.800 ± 3.271,0.739 ± 0.068
RuleFit Classifier (conti),0.795 ± 0.033,30.600 ± 2.966,34.800 ± 3.271,0.881 ± 0.065
tree generator (conti),0.833 ± 0.040,30.600 ± 3.050,34.800 ± 3.271,0.880 ± 0.047
linear svm (L),0.537 ± 0.318,21.800 ± 0.837,34.800 ± 3.271,0.630 ± 0.049
non-linear svm (L),0.823 ± 0.046,23.200 ± 4.438,34.800 ± 3.271,0.664 ± 0.087
logistic regression (L),0.827 ± 0.051,19.200 ± 4.087,34.800 ± 3.271,0.552 ± 0.110


In [15]:
print(filtered_df)

                                      auc     n_violation          n_rule  \
linear svm                  0.838 ± 0.045  30.600 ± 2.510  34.800 ± 3.271   
non-linear svm              0.838 ± 0.049  31.600 ± 3.050  34.800 ± 3.271   
logistic regression         0.841 ± 0.047  30.800 ± 2.490  34.800 ± 3.271   
RuleFit Classifier (disc)   0.816 ± 0.052  28.200 ± 5.167  34.800 ± 3.271   
tree generator (disc)       0.772 ± 0.049  25.800 ± 4.087  34.800 ± 3.271   
RuleFit Classifier (conti)  0.795 ± 0.033  30.600 ± 2.966  34.800 ± 3.271   
tree generator (conti)      0.833 ± 0.040  30.600 ± 3.050  34.800 ± 3.271   
linear svm (L)              0.537 ± 0.318  21.800 ± 0.837  34.800 ± 3.271   
non-linear svm (L)          0.823 ± 0.046  23.200 ± 4.438  34.800 ± 3.271   
logistic regression (L)     0.827 ± 0.051  19.200 ± 4.087  34.800 ± 3.271   

                           violation_rate  
linear svm                  0.880 ± 0.029  
non-linear svm              0.908 ± 0.039  
logistic regression 

In [16]:
print(filtered_df['violation_rate'].values)

['0.880 ± 0.029' '0.908 ± 0.039' '0.886 ± 0.026' '0.805 ± 0.084'
 '0.739 ± 0.068' '0.881 ± 0.065' '0.880 ± 0.047' '0.630 ± 0.049'
 '0.664 ± 0.087' '0.552 ± 0.110']


In [17]:
for val in filtered_df['violation_rate'].values:
    print(val)

0.880 ± 0.029
0.908 ± 0.039
0.886 ± 0.026
0.805 ± 0.084
0.739 ± 0.068
0.881 ± 0.065
0.880 ± 0.047
0.630 ± 0.049
0.664 ± 0.087
0.552 ± 0.110


# 2

In [47]:
result_file_paths = [
    "./experiments/version_21/result.json",
    "./experiments/version_22/result.json",
    "./experiments/version_23/result.json",
    "./experiments/version_24/result.json",
    "./experiments/version_25/result.json",
    "./experiments/version_26/result.json",
    "./experiments/version_27/result.json",
]

result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)

    infos.append(json_data['c2'])

    tmp = []

    for fold, _ in json_data["result"].items():
        result_df = pd.DataFrame(json_data['result'][fold]).T
        tmp.append(result_df)

    result_dfs.append(tmp)

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean() for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std() for combined_df in combined_dfs]

In [48]:
infos

[0.1, 1, 5, 15, 50, 100, 1000]

In [49]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色


# グラフ1: AUCのデータ
col_auc = 'auc'
tmp_mean_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_std)}
], index=index)

# グラフ2: ルール違反率のデータ
col_vr = 'violation_rate'
tmp_mean_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=2)

# グラフ1: AUC
for i in range(len(tmp_mean_auc)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_auc.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i])  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] + tmp_std_auc.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] - tmp_std_auc.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# グラフ2: ルール違反率
for i in range(len(tmp_mean_vr)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_vr.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i]),  # マーカーの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] + tmp_std_vr.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] - tmp_std_vr.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=2
    )

# x軸の設定
fig.update_xaxes(title_text='C2', row=1, col=1)
fig.update_xaxes(title_text='C2', row=1, col=2)

# レイアウトの設定
fig.update_layout(
    height=400,
    width=600,
    xaxis=dict(
        domain=[0, 0.49],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0.5, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
)

# y軸の設定
fig.update_yaxes(title_text='AUC', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=1のグラフのy軸を左側に配置
fig.update_yaxes(title_text='ルール違反率', range=[0, 1], row=1, col=2, side='right', title_font=dict(size=16))  # col=2のグラフのy軸を右側に配置

# グラフの表示
fig.show()


In [50]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色


# グラフ1: AUCのデータ
col_auc = 'auc'
tmp_mean_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_std)}
], index=index)

# グラフ2: ルール違反率のデータ
col_vr = 'violation_rate'
tmp_mean_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=2)

# グラフ1: AUC
for i in range(len(tmp_mean_auc)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_auc.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i])  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] + tmp_std_auc.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] - tmp_std_auc.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# グラフ2: ルール違反率
for i in range(len(tmp_mean_vr)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_vr.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i]),  # マーカーの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] + tmp_std_vr.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] - tmp_std_vr.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=2
    )

# x軸の設定
fig.update_xaxes(title_text='C2', row=1, col=1, type='log')
fig.update_xaxes(title_text='C2', row=1, col=2, type='log')

# レイアウトの設定
fig.update_layout(
    height=400,
    width=600,
    xaxis=dict(
        domain=[0, 0.49],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0.5, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    annotations=[
        dict(
            xref="paper",
            yref="paper",
            x=0.02,
            y=1.11,
            text="※ x軸は対数スケール",
            showarrow=False,
            font=dict(
                size=15,
                color="black"
            )
        )
    ]
)

# y軸の設定
fig.update_yaxes(title_text='AUC', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=1のグラフのy軸を左側に配置
fig.update_yaxes(title_text='ルール違反率', range=[0, 1], row=1, col=2, side='right', title_font=dict(size=16))  # col=2のグラフのy軸を右側に配置

# グラフの表示
fig.show()


In [51]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色


# グラフ1: AUCのデータ
col_auc = 'auc'
tmp_mean_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=1)

# グラフ1: AUC
for i in range(len(tmp_mean_auc)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_auc.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i])  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] + tmp_std_auc.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] - tmp_std_auc.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# x軸の設定
fig.update_xaxes(title_text='C2', row=1, col=1, type='log')

# レイアウトの設定
fig.update_layout(
    height=400,
    width=500,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.15,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    annotations=[
        dict(
            xref="paper",
            yref="paper",
            x=0.00,
            y=1.25,
            text="※ x軸は対数スケール",
            showarrow=False,
            font=dict(
                size=15,
                color="black"
            )
        )
    ]
)


# y軸の設定
fig.update_yaxes(title_text='AUC', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=1のグラフのy軸を左側に配置

# グラフの表示
fig.show()

In [52]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色

# グラフ2: ルール違反率のデータ
col_vr = 'violation_rate'
tmp_mean_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=1)

# グラフ2: ルール違反率
for i in range(len(tmp_mean_vr)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_vr.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i]),  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] + tmp_std_vr.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] - tmp_std_vr.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# レイアウトの設定
fig.update_layout(
    height=400,
    width=500,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.15,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    annotations=[
        dict(
            xref="paper",
            yref="paper",
            x=0.00,
            y=1.25,
            text="※ x軸は対数スケール",
            showarrow=False,
            font=dict(
                size=15,
                color="black"
            )
        )
    ]
)

# x軸の設定
fig.update_xaxes(title_text='C2', row=1, col=1, type='log')


# y軸の設定
fig.update_yaxes(title_text='ルール違反率', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=2のグラフのy軸を右側に配置

# グラフの表示
fig.show()

# 4

In [53]:
result_file_paths = [
    "./experiments/version_31/result.json",
    "./experiments/version_32/result.json",
    "./experiments/version_33/result.json",
    "./experiments/version_34/result.json",
    "./experiments/version_35/result.json"
]

result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)

    infos.append(json_data['n_unsupervised'])

    tmp = []

    for fold, _ in json_data["result"].items():
        result_df = pd.DataFrame(json_data['result'][fold]).T
        tmp.append(result_df)

    result_dfs.append(tmp)

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean() for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std() for combined_df in combined_dfs]

In [23]:
infos

[1, 5, 15, 50, 100]

In [54]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色


# グラフ1: AUCのデータ
col_auc = 'auc'
tmp_mean_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_std)}
], index=index)

# グラフ2: ルール違反率のデータ
col_vr = 'violation_rate'
tmp_mean_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=2)

# グラフ1: AUC
for i in range(len(tmp_mean_auc)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_auc.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i])  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] + tmp_std_auc.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] - tmp_std_auc.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# グラフ2: ルール違反率
for i in range(len(tmp_mean_vr)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_vr.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i]),  # マーカーの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] + tmp_std_vr.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] - tmp_std_vr.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=2
    )

# x軸の設定
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=1)
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=2)

# レイアウトの設定
fig.update_layout(
    height=400,
    width=600,
    xaxis=dict(
        domain=[0, 0.49],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0.5, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    
)

# y軸の設定
fig.update_yaxes(title_text='AUC', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=1のグラフのy軸を左側に配置
fig.update_yaxes(title_text='ルール違反率', range=[0, 1], row=1, col=2, side='right', title_font=dict(size=16))  # col=2のグラフのy軸を右側に配置

# グラフの表示
fig.show()

In [25]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色


# グラフ1: AUCのデータ
col_auc = 'auc'
tmp_mean_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_std)}
], index=index)

# グラフ2: ルール違反率のデータ
col_vr = 'violation_rate'
tmp_mean_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=2)

# グラフ1: AUC
for i in range(len(tmp_mean_auc)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_auc.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i])  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] + tmp_std_auc.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] - tmp_std_auc.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# グラフ2: ルール違反率
for i in range(len(tmp_mean_vr)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_vr.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i]),  # マーカーの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] + tmp_std_vr.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] - tmp_std_vr.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=2
    )

# x軸の設定
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=1, type='log')
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=2, type='log')

# レイアウトの設定
fig.update_layout(
    height=400,
    width=600,
    xaxis=dict(
        domain=[0, 0.49],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0.5, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    annotations=[
        dict(
            xref="paper",
            yref="paper",
            x=0.02,
            y=1.11,
            text="※ x軸は対数スケール",
            showarrow=False,
            font=dict(
                size=15,
                color="black"
            )
        )
    ]
)

# y軸の設定
fig.update_yaxes(title_text='AUC', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=1のグラフのy軸を左側に配置
fig.update_yaxes(title_text='ルール違反率', range=[0, 1], row=1, col=2, side='right', title_font=dict(size=16))  # col=2のグラフのy軸を右側に配置

# x軸の設定
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=1, type='log', tickvals=[1, 10, 100, 1000], ticktext=["1", "10", "100", "1000"])
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=2, type='log', tickvals=[1, 10, 100, 1000], ticktext=["1", "10", "100", "1000"])


# グラフの表示
fig.show()

In [55]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色


# グラフ1: AUCのデータ
col_auc = 'auc'
tmp_mean_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_auc = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_auc] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_auc] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=1)

# グラフ1: AUC
for i in range(len(tmp_mean_auc)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_auc.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i])  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] + tmp_std_auc.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_auc.columns,
            y=tmp_mean_auc.iloc[i, :] - tmp_std_auc.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# x軸の設定
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=1, type='log')

# レイアウトの設定
fig.update_layout(
    height=400,
    width=500,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.15,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    annotations=[
        dict(
            xref="paper",
            yref="paper",
            x=0.00,
            y=1.25,
            text="※ x軸は対数スケール",
            showarrow=False,
            font=dict(
                size=15,
                color="black"
            )
        )
    ]
)


# y軸の設定
fig.update_yaxes(title_text='AUC', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=1のグラフのy軸を左側に配置

# グラフの表示
fig.show()

In [56]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']
colors_mean = ['rgba(0,176,246,1)', 'rgba(231,107,243,1)', 'rgba(21,213,69,1)']  # indexに対応する色
colors_std  = ['rgba(0,176,246,0.2)', 'rgba(231,107,243,0.2)', 'rgba(21,213,69,0.2)']  # indexに対応する色

# グラフ2: ルール違反率のデータ
col_vr = 'violation_rate'
tmp_mean_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_mean)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_mean)}
], index=index)
tmp_std_vr = pd.DataFrame([
    {info: df.loc['linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['non-linear svm (L)', col_vr] for info, df in zip(infos, dfs_std)},
    {info: df.loc['logistic regression (L)', col_vr] for info, df in zip(infos, dfs_std)}
], index=index)


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=1, cols=1)

# グラフ2: ルール違反率
for i in range(len(tmp_mean_vr)):
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :],
            mode='lines+markers',
            name=tmp_mean_vr.index[i],
            line=dict(color=colors_mean[i]),  # ラインの色を設定
            marker=dict(color=colors_mean[i]),  # マーカーの色を設定
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] + tmp_std_vr.iloc[i, :],
            mode='lines',
            line=dict(color=colors_std[i]),  # ラインの色を設定
            showlegend=False
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=tmp_mean_vr.columns,
            y=tmp_mean_vr.iloc[i, :] - tmp_std_vr.iloc[i, :],
            mode='lines',
            fill='tonexty',
            fillcolor=colors_std[i],
            line=dict(color='rgba(255,255,255,0)'),
            showlegend=False
        ),
        row=1, col=1
    )

# レイアウトの設定
fig.update_layout(
    height=400,
    width=500,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.15,
        y=1.15,
        orientation='h'  # 水平配置
    ),
    annotations=[
        dict(
            xref="paper",
            yref="paper",
            x=0.00,
            y=1.25,
            text="※ x軸は対数スケール",
            showarrow=False,
            font=dict(
                size=15,
                color="black"
            )
        )
    ]
)

# x軸の設定
fig.update_xaxes(title_text='教師なしデータ点数', row=1, col=1, type='log')


# y軸の設定
fig.update_yaxes(title_text='ルール違反率', range=[0, 1], row=1, col=1, side='left', title_font=dict(size=16))  # col=2のグラフのy軸を右側に配置

# グラフの表示
fig.show()