In [63]:
import pandas as pd
import numpy as np
import plotly.express as px

In [64]:
def load_datasets(dataset_name):
    methods = ["IBDD", "CDT", "IKS", "WRS", "BASELINE", "TOPLINE"]
    
    df_artificial = pd.DataFrame()
    df_normal = pd.DataFrame()
    
    for method in methods:
        df_a = pd.read_csv(f"./results/{dataset_name}_{method}_artificial.csv")
        df_n = pd.read_csv(f"./results/{dataset_name}_{method}_normal.csv")
        
        df_a["method"] = method
        df_n["method"] = method
        
        df_artificial = pd.concat([df_artificial, df_a], ignore_index=True)
        df_normal = pd.concat([df_normal, df_n], ignore_index=True)
    
    return df_artificial, df_normal

In [None]:
DATASET = "AedesQuinx"

df_artificial, df_normal = load_datasets(DATASET)

In [66]:
# Agrupar por 'method' e calcular a média e mediana de 'time (s)' e 'classification'
method_summary = df_normal.groupby('method').agg({
    'time (s)': 'mean',
    'classification': 'mean',
    'drifs_detected': 'sum',
    'false_alarms': 'sum'
}).reset_index()

# Renomear colunas para melhor legibilidade
method_summary.columns = ['method', 'time', 'classification', 'drifts', 'false_alarms']

# Converter 'classification' para porcentagem
method_summary['classification'] = (method_summary['classification'] * 100).round(2)

# Exibir os resultados
print("\nResumo por Método:")
method_summary


Resumo por Método:


Unnamed: 0,method,time,classification,drifts,false_alarms
0,BASELINE,0.065122,73.9,0.0,0.0
1,CDT,0.227224,76.8,1.0,0.0
2,IBDD,0.0814,79.29,8.0,7.0
3,IKS,0.109803,76.89,1.0,0.0
4,TOPLINE,0.066221,76.47,1.0,0.0
5,WRS,0.088275,78.26,4.0,3.0


In [67]:
# Agrupar por 'method' e calcular a média e mediana de 'time (s)' e 'classification'
method_summary = df_artificial.groupby('method').agg({
    'time (s)': 'mean',
    'classification': 'mean',
    'drifs_detected': 'sum',
    'false_alarms': 'sum'
}).reset_index()

# Renomear colunas para melhor legibilidade
method_summary.columns = ['method', 'time', 'classification', 'drifts', 'false_alarms']

method_summary['classification'] = (method_summary['classification'] * 100).round(2)

# Exibir os resultados
print("\nResumo por Método:")
method_summary


Resumo por Método:


Unnamed: 0,method,time,classification,drifts,false_alarms
0,BASELINE,0.063144,71.31,0.0,0.0
1,CDT,0.23432,73.86,1.0,0.0
2,IBDD,0.089696,73.9,12.0,11.0
3,IKS,0.111417,70.32,6.0,5.0
4,TOPLINE,0.066378,76.27,1.0,0.0
5,WRS,0.241631,72.27,85.0,84.0


In [68]:
# Agrupar por 'class_distribution' e 'method' e calcular a média de 'classification'
df_agg = df_artificial.groupby(['class_distribution', 'method']).agg({
    'classification': 'mean'
}).reset_index()

# Renomear colunas para melhor legibilidade
df_agg.columns = ['class_distribution', 'method', 'classification_mean']

# Ordenar os métodos pela classificação média
ordered_methods = df_agg.groupby('method')['classification_mean'].mean().sort_values(ascending=False).index

# Criar o boxplot
fig = px.box(df_agg, x='method', y='classification_mean', 
             title=f"Artificial",
             category_orders={'method': ordered_methods},
             color='method', 
             labels={'method': 'Método', 'classification_mean': 'Classificação Média'})

fig.show()

## Normal

In [69]:
# Agrupar por 'method' e calcular a média e mediana de 'time (s)' e 'classification'
method_summary = df_normal.groupby('method').agg({
    'time (s)': ['mean', 'median'],
    'classification': ['mean', 'median'],
    'drifs_detected': 'sum',
    'false_alarms': 'sum'
}).reset_index()

# Renomear colunas para melhor legibilidade
method_summary.columns = ['method', 'time_mean', 'time_median', 'classification_mean', 'classification_median', 'drifts', 'false_alarms']

# Exibir os resultados
print("\nResumo por Método:")
method_summary


Resumo por Método:


Unnamed: 0,method,time_mean,time_median,classification_mean,classification_median,drifts,false_alarms
0,BASELINE,0.065122,0.063129,0.738967,0.738333,0.0,0.0
1,CDT,0.227224,0.138005,0.767983,0.766667,1.0,0.0
2,IBDD,0.0814,0.070762,0.792883,0.795,8.0,7.0
3,IKS,0.109803,0.108883,0.768933,0.768333,1.0,0.0
4,TOPLINE,0.066221,0.062435,0.764683,0.766667,1.0,0.0
5,WRS,0.088275,0.075717,0.78265,0.786667,4.0,3.0


In [70]:
# Agrupar por 'class_distribution' e 'method' e calcular a média de 'classification'
df_agg = df_normal.groupby(['class_distribution', 'method']).agg({
    'classification': 'mean'
}).reset_index()

# Renomear colunas para melhor legibilidade
df_agg.columns = ['class_distribution', 'method', 'classification_mean']

# Ordenar os métodos pela classificação média
ordered_methods = df_agg.groupby('method')['classification_mean'].mean().sort_values(ascending=False).index

# Criar o boxplot
fig = px.box(df_agg, x='method', y='classification_mean', 
             title=f"Normal",
             category_orders={'method': ordered_methods},
             color='method', 
             labels={'method': 'Método', 'classification_mean': 'Classificação Média'})

fig.show()