# 教師なしデータ点数の違いによる予測結果の変化

In [1]:
import json
import pandas as pd

In [2]:
result_file_paths = [
    "./experiments/version_91/result.json",
    "./experiments/version_92/result.json",
    "./experiments/version_93/result.json",
    "./experiments/version_94/result.json",
    "./experiments/version_95/result.json",
]

result_dfs = []
infos = []

for path in result_file_paths:
    with open(path, 'r') as f:
        json_data = json.load(f)

    infos.append(json_data['n_unsupervised'])

    tmp = []

    for fold, _ in json_data["result"].items():
        
        result_df = pd.DataFrame(json_data['result'][fold]).T.drop(['violation_detail'], axis=1)
        tmp.append(result_df)

    result_dfs.append(tmp)

combined_dfs = [pd.concat(dfs, ignore_index=False) for dfs in result_dfs]
dfs_mean = [combined_df.groupby(combined_df.index).mean() for combined_df in combined_dfs]
dfs_std = [combined_df.groupby(combined_df.index).std() for combined_df in combined_dfs]

In [3]:
infos

[1, 5, 15, 50, 100]

In [4]:
import plotly.express as px

def hex_to_rgb(hex_color):
    # カラーコードの先頭の # を削除する
    hex_color = hex_color.lstrip('#')
    
    # カラーコードを RGB 形式に変換する
    red = int(hex_color[0:2], 16)
    green = int(hex_color[2:4], 16)
    blue = int(hex_color[4:6], 16)
    
    # RGBA 形式に変換して返す
    return red, green, blue

color_codes = [hex_to_rgb(color_code) for color_code in px.colors.qualitative.Plotly]
colors_mean = [f'rgba({r},{g},{b},{1})' for r, g, b in color_codes]
colors_std = [f'rgba({r},{g},{b},{0.2})' for r, g, b in color_codes]

In [4]:
dfs_mean[0].columns

Index(['accuracy', 'precision', 'recall', 'f1', 'auc', 'n_violation', 'n_rule',
       'violation_rate', 'n_violation (instance)', 'n_evaluation (instance)',
       'violation_rate (instance)'],
      dtype='object')

In [6]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
# index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']

# index = [
#     'linear svm (L)', 
#     'non-linear svm (L)', 
#     'logistic regression (L)'
# ]

# index = [
#     'linear svm', 
#     'non-linear svm', 
#     'logistic regression'
# ]

index = [
    'RuleFit Classifier (disc)',
    'tree generator (disc)',
    'RuleFit Classifier (conti)', 
    'tree generator (conti)',
]

columns = ['auc', 'violation_rate', 'violation_rate (instance)']

title_text = "n unsupervised"


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=len(index), cols=1)
showlegend=True

for row_num, col in enumerate(columns):
    tmp_mean = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_mean)} for model_name in index],
        index=index
    )
    tmp_std = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_std)} for model_name in index],
        index=index
    )

    # グラフ1: AUC
    for i in range(len(tmp_mean)):
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :],
                mode='lines+markers',
                name=tmp_mean.index[i],
                line=dict(color=colors_mean[i]),  # ラインの色を設定
                marker=dict(color=colors_mean[i]),  # マーカーの色を設定
                showlegend=showlegend,
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] + tmp_std.iloc[i, :],
                mode='lines',
                line=dict(color=colors_std[i]),  # ラインの色を設定
                showlegend=False
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] - tmp_std.iloc[i, :],
                mode='lines',
                fill='tonexty',
                fillcolor=colors_std[i],
                line=dict(color='rgba(255,255,255,0)'),
                showlegend=False
            ),
            row=row_num+1, col=1
        )
    
    # x, y 軸の設定
    fig.update_xaxes(title_text=title_text, row=row_num+1, col=1)
    fig.update_yaxes(title_text=col, range=[0, 1], row=row_num+1, col=1, side='right', title_font=dict(size=16))

    showlegend=False
    
# レイアウトの設定
fig.update_layout(
    height=len(columns) * 300,
    width=600,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
)


# グラフの表示
fig.show()

In [8]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
# index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']

index = [
    'linear svm (L)', 
    'non-linear svm (L)', 
    'logistic regression (L)'
]

# index = [
#     'linear svm', 
#     'non-linear svm', 
#     'logistic regression'
# ]

# index = [
#     'RuleFit Classifier (disc)',
#     'tree generator (disc)',
#     'RuleFit Classifier (conti)', 
#     'tree generator (conti)',
# ]

columns = ['auc', 'violation_rate', 'violation_rate (instance)']

title_text = "n unsupervised"


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=len(columns), cols=1)
showlegend=True

for row_num, col in enumerate(columns):
    tmp_mean = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_mean)} for model_name in index],
        index=index
    )
    tmp_std = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_std)} for model_name in index],
        index=index
    )

    # グラフ1: AUC
    for i in range(len(tmp_mean)):
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :],
                mode='lines+markers',
                name=tmp_mean.index[i],
                line=dict(color=colors_mean[i]),  # ラインの色を設定
                marker=dict(color=colors_mean[i]),  # マーカーの色を設定
                showlegend=showlegend,
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] + tmp_std.iloc[i, :],
                mode='lines',
                line=dict(color=colors_std[i]),  # ラインの色を設定
                showlegend=False
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] - tmp_std.iloc[i, :],
                mode='lines',
                fill='tonexty',
                fillcolor=colors_std[i],
                line=dict(color='rgba(255,255,255,0)'),
                showlegend=False
            ),
            row=row_num+1, col=1
        )
    
    # x, y 軸の設定
    # fig.update_xaxes(title_text=title_text, row=row_num+1, col=1, tickvals=[num / 5 for num in range(6)])
    fig.update_xaxes(title_text=title_text, row=row_num+1, col=1)
    fig.update_yaxes(title_text=col, range=[0, 1], row=row_num+1, col=1, side='right', title_font=dict(size=16))

    showlegend=False
    
# レイアウトの設定
fig.update_layout(
    height=len(columns) * 300,
    width=600,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
)


# グラフの表示
fig.show()

In [9]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
# index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']

# index = [
#     'linear svm (L)', 
#     'non-linear svm (L)', 
#     'logistic regression (L)'
# ]

index = [
    'linear svm', 
    'non-linear svm', 
    'logistic regression'
]

# index = [
#     'RuleFit Classifier (disc)',
#     'tree generator (disc)',
#     'RuleFit Classifier (conti)', 
#     'tree generator (conti)',
# ]

columns = ['auc', 'violation_rate', 'violation_rate (instance)']

title_text = "C2"


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=len(columns), cols=1)
showlegend=True

for row_num, col in enumerate(columns):
    tmp_mean = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_mean)} for model_name in index],
        index=index
    )
    tmp_std = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_std)} for model_name in index],
        index=index
    )

    # グラフ1: AUC
    for i in range(len(tmp_mean)):
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :],
                mode='lines+markers',
                name=tmp_mean.index[i],
                line=dict(color=colors_mean[i]),  # ラインの色を設定
                marker=dict(color=colors_mean[i]),  # マーカーの色を設定
                showlegend=showlegend,
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] + tmp_std.iloc[i, :],
                mode='lines',
                line=dict(color=colors_std[i]),  # ラインの色を設定
                showlegend=False
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] - tmp_std.iloc[i, :],
                mode='lines',
                fill='tonexty',
                fillcolor=colors_std[i],
                line=dict(color='rgba(255,255,255,0)'),
                showlegend=False
            ),
            row=row_num+1, col=1
        )
    
    # x, y 軸の設定
    fig.update_xaxes(title_text=title_text, row=row_num+1, col=1)
    fig.update_yaxes(title_text=col, range=[0, 1], row=row_num+1, col=1, side='right', title_font=dict(size=16))

    showlegend=False
    
# レイアウトの設定
fig.update_layout(
    height=len(columns) * 300,
    width=600,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
)


# グラフの表示
fig.show()

In [10]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
# index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']

# index = [
#     'linear svm (L)', 
#     'non-linear svm (L)', 
#     'logistic regression (L)'
# ]

# index = [
#     'linear svm', 
#     'non-linear svm', 
#     'logistic regression'
# ]

index = [
    'RuleFit Classifier (disc)',
    'tree generator (disc)',
]

columns = ['auc', 'violation_rate', 'violation_rate (instance)']

title_text = "C2"


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=len(columns), cols=1)
showlegend=True

for row_num, col in enumerate(columns):
    tmp_mean = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_mean)} for model_name in index],
        index=index
    )
    tmp_std = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_std)} for model_name in index],
        index=index
    )

    # グラフ1: AUC
    for i in range(len(tmp_mean)):
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :],
                mode='lines+markers',
                name=tmp_mean.index[i],
                line=dict(color=colors_mean[i]),  # ラインの色を設定
                marker=dict(color=colors_mean[i]),  # マーカーの色を設定
                showlegend=showlegend,
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] + tmp_std.iloc[i, :],
                mode='lines',
                line=dict(color=colors_std[i]),  # ラインの色を設定
                showlegend=False
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] - tmp_std.iloc[i, :],
                mode='lines',
                fill='tonexty',
                fillcolor=colors_std[i],
                line=dict(color='rgba(255,255,255,0)'),
                showlegend=False
            ),
            row=row_num+1, col=1
        )
    
    # x, y 軸の設定
    fig.update_xaxes(title_text=title_text, row=row_num+1, col=1)
    fig.update_yaxes(title_text=col, range=[0, 1], row=row_num+1, col=1, side='right', title_font=dict(size=16))

    showlegend=False
    
# レイアウトの設定
fig.update_layout(
    height=len(columns) * 300,
    width=600,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=1.15,
        orientation='h'  # 水平配置
    ),
)


# グラフの表示
fig.show()

In [11]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# グラフのデータを準備
# index = ['l-SVM-p', 'r-SVM-p', 'LogReg-p']

# index = [
#     'linear svm (L)', 
#     'non-linear svm (L)', 
#     'logistic regression (L)'
# ]

index = [
    'linear svm', 
    'non-linear svm', 
    'logistic regression',
    'RuleFit Classifier (disc)',
    'tree generator (disc)',
]

columns = ['violation_rate']

title_text = "C2"


# サブプロットの作成
# fig = make_subplots(rows=1, cols=2, subplot_titles=('AUC', 'ルール違反率'))
fig = make_subplots(rows=len(columns), cols=1)
showlegend=True

for row_num, col in enumerate(columns):
    tmp_mean = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_mean)} for model_name in index],
        index=index
    )
    tmp_std = pd.DataFrame(
        [{info: df.loc[model_name, col] for info, df in zip(infos, dfs_std)} for model_name in index],
        index=index
    )

    # グラフ1: AUC
    for i in range(len(tmp_mean)):
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :],
                mode='lines+markers',
                name=tmp_mean.index[i],
                line=dict(color=colors_mean[i]),  # ラインの色を設定
                marker=dict(color=colors_mean[i]),  # マーカーの色を設定
                showlegend=showlegend,
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] + tmp_std.iloc[i, :],
                mode='lines',
                line=dict(color=colors_std[i]),  # ラインの色を設定
                showlegend=False
            ),
            row=row_num+1, col=1
        )
        fig.add_trace(
            go.Scatter(
                x=tmp_mean.columns,
                y=tmp_mean.iloc[i, :] - tmp_std.iloc[i, :],
                mode='lines',
                fill='tonexty',
                fillcolor=colors_std[i],
                line=dict(color='rgba(255,255,255,0)'),
                showlegend=False
            ),
            row=row_num+1, col=1
        )
    
    # x, y 軸の設定
    fig.update_xaxes(title_text=title_text, row=row_num+1, col=1)
    fig.update_yaxes(title_text=col, range=[0, 1], row=row_num+1, col=1, side='right', title_font=dict(size=16))

    showlegend=False
    
# レイアウトの設定
fig.update_layout(
    height=len(columns) * 300,
    width=600,
    xaxis=dict(
        domain=[0, 1],  # 左側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    xaxis2=dict(
        domain=[0, 1],  # 右側のsubplotの幅を調整
        title_font=dict(size=16)
    ),
    legend=dict(
        x=0.5,
        y=2,
        orientation='h'  # 水平配置
    ),
)


# グラフの表示
fig.show()