En este cuaderno se revisan las pruebas estadísticas que determinan si una combinación de pesos es mejor con respecto a otra

In [2]:
from tqdm.notebook import tqdm as tqm
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from utils.func_aux import *
from utils.func_vis import *
import plotly.express as px
import scipy.stats as st
import seaborn as sns
import pandas as pd
import numpy as np

# Opciones de matplotlib
rc = plt.rcParams
rc["figure.figsize"] = [15, 5]

# Para mostrar todas las columnas cuando se imprime un df
pd.set_option("display.max_columns", None)

# Para poner el estilo de las gráficas de matplotlib parecido al de ggplot
plt.style.use("ggplot") 

# Definimos tablas que usaremos en el cuaderno
df_todos_PI=pd.read_csv('../tablas_generadas/todos_QI.csv')
w_sorted=np.sort(df_todos_PI['w_0'].unique())

df_QI=pd.read_csv('../tablas_generadas/QI_carac.csv')
dicc_indicadores={df_QI.indicador[i]:df_QI.Meta[i] for i in range(len(df_QI))}

go_to_Assesment()


# Kruskal-Wallis

Usamos la prueba de Kruskal Wallis para ver si hay diferencia significativa entre cada una de las diferentes combinaciones de parámetros. 

Aquellos donde se rechaza la hiṕótesis nula presentan una media diferente y por lo tanto si dependen de esta elección.

In [5]:
# def get_KW(df):
#     indicador_lista=[]
#     for wi in df.w_0.unique():
#         df_w0_unico =df.query(f'(w_0=={wi})').sort_values('run')
#         indicador_lista.append(df_w0_unico.valor_indicador.values)
#     try:
#         return st.kruskal(*indicador_lista)[1]
#     except:
#         return np.nan

# # Tarda 28 segundos
# df_KW_todos=df_todos_PI.dropna().groupby(['n_objetivos','problema','indicador','hiperparam_ind_conv']).apply(get_KW).reset_index().rename(columns={0:'p-value_KW'})
# df_KW_todos['Medias_Distintas']=df_KW_todos['p-value_KW'].apply(lambda x: 1 if x <= 0.05 else 0) # Para que se puedan sumar los False y True
# df_KW_todos

df_KW_todos=pd.read_csv('../tablas_generadas/KW_todos.csv')
df_KW_todos.sample(5)

Unnamed: 0,hiperparam_ind_conv,problema,n_objetivos,indicador,Medias_Distintas
1179,R2,DTLZ5,7,hv,0.0
489,R2,DTLZ3,4,spd,0.0
420,IGD+,WFG8,3,eps+,1.0
1171,R2,DTLZ4,7,r2,0.0
707,R2,DTLZ3,5,igd+,0.0


# Prueba de Friedman

La prueba de Kruskal se realiza con la suposición de que las muestras vienen de poblaciones no relacionadas. Dado que nuestros algoritmos distintos comparten puntos iniciales (las inicializaciones de las poblaciones dadas por las semillas) entonces tenemos que usar una prueba que compare a cada una de las corridas entre sí. 

Para esto, usamos la [prueba de Friedman](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.friedmanchisquare.html), obtenemos los $p$-valores y determinamos si se rechaza la hipótesis nula:

```H0: Todos los algoritmos tienen el mismo desempeño```

In [14]:
# def get_Friedman(df):
#     indicador_lista=[]
#     for wi in df.w_0.unique():
#         df_w0_unico =df.query(f'(w_0=={wi})').sort_values('run')
#         indicador_lista.append(df_w0_unico.valor_indicador.values)
#     try:
#         return st.friedmanchisquare(*indicador_lista)[1]
#     except:
#         return np.nan

# # Tarda 28 segundos
# df_Friedman_todos=df_todos_PI.dropna().groupby(['n_objetivos','problema','indicador','hiperparam_ind_conv']).apply(get_Friedman).reset_index().rename(columns={0:'p-value_Friedman'})
# df_Friedman_todos['Medias_Distintas']=df_Friedman_todos['p-value_Friedman'].apply(lambda x: 1 if x <= 0.05 else 0) # Para que se puedan sumar los False y True
df_Friedman_todos =pd.read_csv('../tablas_generadas/Friedman_todos.csv')
df_Friedman_todos.sample(5)

Unnamed: 0,n_objetivos,problema,indicador,hiperparam_ind_conv,p-value_Friedman,Medias_Distintas
952,6,DTLZ5,eps+,IGD+,0.3462987,0
1152,7,DTLZ3,igd,IGD+,0.01805136,1
988,6,DTLZ7,r2,IGD+,2.101891e-07,1
858,5,WFG7,igd,IGD+,0.4828978,0
746,5,DTLZ6,igd,IGD+,1.089259e-07,1


In [7]:
df_KW_todos = pd.read_csv('../tablas_generadas/KW_todos.csv')
print(f"Porcentaje de Medias distintas con Friedman: {df_Friedman_todos['Medias_Distintas'].mean():2.2f}%\nPorcentaje de Medias distintas con Kruskal-Wallis {df_KW_todos['Medias_Distintas'].mean():2.2f}%")

Porcentaje de Medias distintas con Friedman: 0.25%
Porcentaje de Medias distintas con Kruskal-Wallis 0.22%


Creamos tablas de la clasificación principal de cada indicador de calidad

In [8]:
QI_carac=pd.read_csv('../tablas_generadas/QI_carac.csv')
QI_carac

Unnamed: 0,indicador,Categoría,Meta
0,igd+,Convergencia,Minimize
1,r2,Convergencia,Maximize
2,s-energy,Diversidad,Minimize
3,eps+,Convergencia,Minimize
4,igd,Convergencia,Minimize
5,spd,Diversidad,Maximize
6,hv,Convergencia,Maximize


In [17]:
ind_conv_lista=['IGD+','R2']
for ind_conv in ind_conv_lista:
    df_KW_ind=df_Friedman_todos.query(f'hiperparam_ind_conv=="{ind_conv}"')
    
    
    df_KW_ind.n_objetivos=df_KW_ind.n_objetivos.astype(str)
    df_KW_ind=df_KW_ind.merge(right=QI_carac,how='inner',on='indicador')
    df_KW_ind = df_KW_ind.rename(columns={'n_objetivos': 'n_obj','problema':'problem','indicador':'QI','hiperparam_ind_conv':"DE_conv_ind","p-value_Friedman":"p-value"})
    fig=px.bar(data_frame=df_KW_ind.groupby(['n_obj','QI']).Medias_Distintas.mean().reset_index(),x='n_obj',y='Medias_Distintas',color='QI', barmode='group')
    fig.update_layout(
        xaxis_title='n obj', yaxis_title='Avg Differences', yaxis=dict(range=[0,1]),
        height=500, width=900
    )
    fig.layout.yaxis.tickformat = ',.0%' 
    fig.update_xaxes(categoryorder='array',categoryarray=['2', '3','4', '5','6', '7', '10']) 
    fig.write_image(f'../imgs_pdf/Friedman_obj_indconv_{ind_conv}.pdf')
    fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Para igd+ en 3 dimensiones casi todas las dimensiones sufren un cambio cuando se cambia $w_0"

In [21]:
QI_carac.replace({'Convergencia':'Convergence','Diversidad':'Diversity'})

Unnamed: 0,indicador,Categoría,Meta
0,igd+,Convergence,Minimize
1,r2,Convergence,Maximize
2,s-energy,Diversity,Minimize
3,eps+,Convergence,Minimize
4,igd,Convergence,Minimize
5,spd,Diversity,Maximize
6,hv,Convergence,Maximize


In [22]:
for ind_conv in ind_conv_lista:
    df_Friedman_ind=df_Friedman_todos.query(f'hiperparam_ind_conv=="{ind_conv}"')
    df_Friedman_ind=df_Friedman_ind.merge(QI_carac,on='indicador')
    df_Friedman_ind=df_Friedman_ind.rename(columns={'Categoría':'QI_cat'}).replace({'Convergencia':'Convergence','Diversidad':'Diversity'})
    fig=px.bar(data_frame=df_Friedman_ind.groupby(['n_objetivos','QI_cat']).Medias_Distintas.mean().reset_index(),x='n_objetivos',y='Medias_Distintas',color='QI_cat', barmode='group')
    fig.update_layout(
        # title=f'Proporción de diferencias por categoría. indicador {ind_conv}',
        xaxis_title='n obj', yaxis_title='Avg Differences', yaxis=dict(range=[0,1]),
        height=500, width=900)
    fig.layout.yaxis.tickformat = ',.0%' 
    fig.update_xaxes(categoryorder='array',categoryarray=['2', '3', '4','5','6', '7', '10']) 
    fig.write_image(f'../imgs_pdf/Friedman_Diferencia_por_categoria_{ind_conv}.pdf')
    fig.show()

Hay que ver si estas diferencias se deben a los indicadores que no están

# Comparación uno a uno

## Wilcoxon

En el siguiente código se calcula el valor $p$ de la prueba intercalando para los indicadores que se quieren maximizar y los que se quieren minimizar.

Esto se guarda en un dataframe que se lee para cada llave como el valor p de la prueba estadística de que los datos i no son mejores que los datos j. Así, cuando este valor sea menor a 0.05 podremos decir que de manera significativa el renglón es mejor a la columna para cada indicador

In [9]:
import pandas as pd
pd.read_csv('./tablas_generadas/conteo_borda_todos.csv')#.n_objetivos.unique()

Unnamed: 0,n_objetivos,problema,indicador,hiperparam_ind_conv,0,1,2,3,4,5,6,7,8,9,10
0,2,DTLZ1,eps+,IGD+,0,0,0,0,0,0,0,0,0,0,0
1,2,DTLZ1,eps+,R2,0,0,1,3,0,0,0,0,0,0,0
2,2,DTLZ1,hv,IGD+,0,0,0,0,0,1,2,3,2,0,0
3,2,DTLZ1,hv,R2,0,0,1,5,0,0,0,0,1,0,0
4,2,DTLZ1,igd,IGD+,0,2,0,0,0,0,0,0,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,7,WFG9,r2,R2,0,0,0,0,0,0,0,0,0,0,0
1340,7,WFG9,s-energy,IGD+,8,7,2,3,6,0,0,0,0,0,0
1341,7,WFG9,s-energy,R2,0,0,0,0,0,0,0,0,0,0,0
1342,7,WFG9,spd,IGD+,1,1,0,1,1,0,1,0,1,1,0


In [4]:
dicc_indicadores

{'igd+': 'Minimize',
 'r2': 'Maximize',
 's-energy': 'Minimize',
 'eps+': 'Minimize',
 'igd': 'Minimize',
 'spd': 'Maximize',
 'hv': 'Maximize'}

In [29]:
def WC_apply(df):
    n_objetivos,problema,indicador = df.name
    mat_WC=np.zeros(shape=(len(w_sorted),len(w_sorted)))
    for i,wi in enumerate(w_sorted):
        for j,wj in enumerate(w_sorted):
            datos_i=df.query(f'w_0=={wi}').valor_indicador.values
            datos_j=df.query(f'w_0=={wj}').valor_indicador.values
            if (datos_i==datos_j).all():
                mat_WC[i,j]=np.nan
            elif dicc_indicadores[indicador]=='Maximize':
                try:
                    mat_WC[i,j]=st.wilcoxon(datos_i,datos_j,alternative='greater')[1]
                except:
                    mat_WC[i,j]=np.nan
            
            elif dicc_indicadores[indicador]=='Minimize':
                try:
                    mat_WC[i,j]=st.wilcoxon(datos_i,datos_j,alternative='less')[1]
                except:
                    mat_WC[i,j]=np.nan
        
    return pd.DataFrame(mat_WC,index=w_sorted,columns=w_sorted)

w_sorted= np.sort(df_todos_PI_IGDp['w_0'].unique())
# Tarda como 3.5 minutos en acabar
df_WC_IGDp=df_todos_PI_IGDp.groupby(['n_objetivos','problema','indicador']).apply(WC_apply)
df_WC_IGDp_exp=df_WC_IGDp.applymap(format_scientific_6_digits)

# df_WC_IGDp=pd.read_csv('../tablas_generadas/WC_QI_v2.csv')
# df_WC_IGDp=df_WC_IGDp.set_index(['n_objetivos','problema','indicador']).iloc[:,1:]
pd.set_option('display.max_rows', 10)
df_WC_IGDp

  d = x - y
  d = x - y


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0.001,0.100,0.200,0.300,0.400,0.500,0.600,0.700,0.800,0.900,0.999
dimension,problema,Indicador,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,DTLZ1,eps+,0.001,,0.862305,0.500000,0.422852,0.838867,0.812500,0.883789,0.838867,0.862305,0.539062,0.753906
2,DTLZ1,eps+,0.100,0.161133,,0.312500,0.187500,0.384766,0.278320,0.577148,0.215820,0.215820,0.161133,0.215820
2,DTLZ1,eps+,0.200,0.539062,0.721680,,0.460938,0.784180,0.615234,0.919922,0.883789,0.687500,0.422852,0.753906
2,DTLZ1,eps+,0.300,0.615234,0.838867,0.577148,,0.721680,0.919922,0.862305,0.784180,0.687500,0.460938,0.753906
2,DTLZ1,eps+,0.400,0.187500,0.652344,0.246094,0.312500,,0.539062,0.652344,0.615234,0.460938,0.137695,0.384766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,WFG9,spd,0.600,0.841345,0.841345,0.281851,,0.841345,0.281851,,0.041632,0.841345,0.841345,0.078650
7,WFG9,spd,0.700,0.977250,0.977250,0.841345,0.958368,0.977250,0.841345,0.958368,,0.977250,0.977250,0.672640
7,WFG9,spd,0.800,,,0.078650,0.158655,,0.078650,0.158655,0.022750,,,0.089856
7,WFG9,spd,0.900,,,0.078650,0.158655,,0.078650,0.158655,0.022750,,,0.089856


Hacemos para el problema con los dos tipos de indicadores de convergencia `R2,IGD+` al mismo tiempo

In [24]:
def WC_apply_all(df):
    _,_,indicador,_ = df.name
    mat_WC=np.zeros(shape=(len(w_sorted),len(w_sorted)))
    for i,wi in enumerate(w_sorted):
        for j,wj in enumerate(w_sorted):
            datos_i=df.query(f'w_0=={wi}').valor_indicador.values
            datos_j=df.query(f'w_0=={wj}').valor_indicador.values

            if (datos_i==datos_j).all():
                mat_WC[i,j]=np.nan
            elif dicc_indicadores[indicador]=='Maximize':
                try:
                    mat_WC[i,j]=st.wilcoxon(datos_i,datos_j,alternative='greater')[1]
                except:
                    mat_WC[i,j]=np.nan
            
            elif dicc_indicadores[indicador]=='Minimize':
                try:
                    mat_WC[i,j]=st.wilcoxon(datos_i,datos_j,alternative='less')[1]
                except:
                    mat_WC[i,j]=np.nan
        
    return pd.DataFrame(mat_WC,index=w_sorted,columns=w_sorted)

# Tarda como 10 minutos en acabar
# df_WC_todos=df_todos_PI.groupby(['n_objetivos','problema','indicador','hiperparam_ind_conv']).apply(WC_apply_all)
# df_WC_todos_exp=df_WC_todos.applymap(format_scientific_6_digits)
# df_WC_todos_exp.to_csv('../tablas_generadas/WC_QI_todos.csv')
df_WC_todos=pd.read_csv('../tablas_generadas/WC_QI_todos.csv').drop(columns=['Unnamed: 4']).set_index(['n_objetivos','problema','indicador','hiperparam_ind_conv'])
df_WC_todos

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0.001,0.1,0.2,0.3,0.4,0.5,0.6000000000000001,0.7000000000000001,0.8,0.9,0.999
n_objetivos,problema,indicador,hiperparam_ind_conv,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,DTLZ1,eps+,IGD+,,0.862305,0.500000,0.422852,0.838867,0.812500,0.883789,0.838867,0.862305,0.539062,0.753906
2,DTLZ1,eps+,IGD+,0.161133,,0.312500,0.187500,0.384766,0.278320,0.577148,0.215820,0.215820,0.161133,0.215820
2,DTLZ1,eps+,IGD+,0.539062,0.721680,,0.460938,0.784180,0.615234,0.919922,0.883789,0.687500,0.422852,0.753906
2,DTLZ1,eps+,IGD+,0.615234,0.838867,0.577148,,0.721680,0.919922,0.862305,0.784180,0.687500,0.460938,0.753906
2,DTLZ1,eps+,IGD+,0.187500,0.652344,0.246094,0.312500,,0.539062,0.652344,0.615234,0.460938,0.137695,0.384766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,WFG9,spd,R2,0.910144,0.841345,,,,,,,0.841345,0.841345,0.910144
7,WFG9,spd,R2,0.910144,0.841345,,,,,,,0.841345,0.841345,0.910144
7,WFG9,spd,R2,0.327360,0.327360,0.158655,0.158655,0.158655,0.158655,0.158655,0.158655,,,0.672640
7,WFG9,spd,R2,0.327360,0.327360,0.158655,0.158655,0.158655,0.158655,0.158655,0.158655,,,0.672640


## Heatmaps

Se hace una función para calcular un heatmap por cada problema, dimensión e indicador

In [25]:
df_todos_PI_R2=df_todos_PI.query('hiperparam_ind_conv=="R2"')
df_todos_PI_IGDp=df_todos_PI.query('hiperparam_ind_conv=="IGD+"')

df_WC_R2=df_WC_todos.query('hiperparam_ind_conv=="R2"')
df_WC_IGDp=df_WC_todos.query('hiperparam_ind_conv=="IGD+"')

In [26]:
df_WC_todos=pd.read_csv('../tablas_generadas/WC_QI_todos.csv').drop(columns=['Unnamed: 4']).set_index(['n_objetivos','problema','indicador','hiperparam_ind_conv'])


In [4]:

def get_heatmap(df_WC, df_todos_PI, problema, n_objetivos, indicador,ind_conv='IGD+'):
    """Regresa el heatmap de wilcoxon, el boxplot para ver que efectivamente uno le está ganando al otro y un dataframe con la media para ver si coinciden los datos"""
    plt.figure(figsize=(10,5))
    # plt.title(
    #     f"$p$-value Wilcoxon. {problema} {n_objetivos} objetivos  {indicador.upper()}\n Color $\\rightarrow$ renglón mejor que columna"
    # )
    mask = df_WC.loc[(n_objetivos, problema, indicador.lower())].astype(float) < 0.05
    fig, ax = plt.subplots(figsize=(10, 5))
    sns.heatmap(
        df_WC.loc[(n_objetivos, problema, indicador.lower())].astype(float),
        vmin=0,
        vmax=0.05,
        mask=~mask,
        ax=ax
    )
    ax.set_xlabel("$w_0$")
    ax.set_ylabel("$w_0$")
    # Set custom x-ticks and labels
    ax.set_xticks(np.arange(len(w_0)) + 0.5)
    ax.set_xticklabels([round(w0i[0], 2) for w0i in w_0], rotation=45)

    # Set custom y-ticks and labels
    ax.set_yticks(np.arange(len(w_0)) + 0.5)
    ax.set_yticklabels([round(w0i[0], 2) for w0i in w_0])


    plt.savefig(f'../imgs_pdf/Heatmap_{problema}_obj{n_objetivos}_{indicador}_indconv_{ind_conv}.pdf')
    # plt.show()
    plt.close()
    get_boxplot(problema, n_objetivos, indicador, df_PI=df_todos_PI,save_img_path=f'../imgs_pdf/Box_{problema}_obj{n_objetivos}_{indicador}_indconv_{ind_conv}.pdf')
    return (
        df_todos_PI[
            (df_todos_PI["problema"] == problema)
            & (df_todos_PI["n_objetivos"] == n_objetivos)
            & (df_todos_PI["indicador"] == indicador.lower())
        ]
        .groupby(["w_0"])
        .valor_indicador.mean()
        .reset_index()
    )


#* Para guardar las imágenes como pdf
# for hiperparam_ind_conv in df_todos_PI.hiperparam_ind_conv.unique():
#     for prob in df_todos_PI.problema.unique():
#         for n_obj in df_todos_PI.n_objetivos.unique():
#             for ind in df_todos_PI.indicador.unique():
#                 get_heatmap(df_WC_todos, df_todos_PI, prob, n_obj, ind, hiperparam_ind_conv)

In [5]:
problema='WFG4'
n_objetivos=3
indicador='hv'


get_heatmap(df_todos_PI=df_todos_PI_IGDp,df_WC=df_WC_IGDp,problema=problema,n_objetivos=n_objetivos,indicador=indicador)


Unnamed: 0,w_0,valor_indicador
0,0.001,75.828923
1,0.1,76.094108
2,0.2,76.234159
3,0.3,76.454552
4,0.4,76.524328
5,0.5,76.496631
6,0.6,76.602911
7,0.7,76.650783
8,0.8,76.612061
9,0.9,76.674197


<Figure size 1000x500 with 0 Axes>

## Conteo de borda

Se define como el número de victorias que tuvo sobre los demás

In [14]:
df_WC_todos.reset_index()

Unnamed: 0,n_objetivos,problema,indicador,hiperparam_ind_conv,0.001,0.1,0.2,0.3,0.4,0.5,0.6000000000000001,0.7000000000000001,0.8,0.9,0.999
0,2,DTLZ1,eps+,IGD+,,0.862305,0.500000,0.422852,0.838867,0.812500,0.883789,0.838867,0.862305,0.539062,0.753906
1,2,DTLZ1,eps+,IGD+,0.161133,,0.312500,0.187500,0.384766,0.278320,0.577148,0.215820,0.215820,0.161133,0.215820
2,2,DTLZ1,eps+,IGD+,0.539062,0.721680,,0.460938,0.784180,0.615234,0.919922,0.883789,0.687500,0.422852,0.753906
3,2,DTLZ1,eps+,IGD+,0.615234,0.838867,0.577148,,0.721680,0.919922,0.862305,0.784180,0.687500,0.460938,0.753906
4,2,DTLZ1,eps+,IGD+,0.187500,0.652344,0.246094,0.312500,,0.539062,0.652344,0.615234,0.460938,0.137695,0.384766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14779,7,WFG9,spd,R2,0.910144,0.841345,,,,,,,0.841345,0.841345,0.910144
14780,7,WFG9,spd,R2,0.910144,0.841345,,,,,,,0.841345,0.841345,0.910144
14781,7,WFG9,spd,R2,0.327360,0.327360,0.158655,0.158655,0.158655,0.158655,0.158655,0.158655,,,0.672640
14782,7,WFG9,spd,R2,0.327360,0.327360,0.158655,0.158655,0.158655,0.158655,0.158655,0.158655,,,0.672640


In [45]:
def conteo_borda(df):
    victorias=(df<0.05).sum(axis=1).reset_index()
    return pd.DataFrame({'w_0':victorias.index, 'victorias':victorias[0]})
# df_WC_todos = df_WC_todos.drop(columns='Unnamed: 4')
df_borda=df_WC_todos.reset_index().groupby(['n_objetivos','problema','indicador','hiperparam_ind_conv']).apply(conteo_borda).reset_index().drop(columns='level_4')
# df_borda=pd.read_csv('../tablas_generadas/conteo_borda_todos.csv').set_index(['n_objetivos','problema','indicador','hiperparam_ind_conv'])
df_borda=df_borda.pivot(values='victorias',index=['n_objetivos','problema','indicador','hiperparam_ind_conv'],columns='w_0')
# df_borda.to_csv('../tablas_generadas/conteo_borda_todos.csv')
df_borda

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,w_0,0,1,2,3,4,5,6,7,8,9,10
n_objetivos,problema,indicador,hiperparam_ind_conv,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,DTLZ1,eps+,IGD+,0,0,0,0,0,0,0,0,0,0,0
2,DTLZ1,eps+,R2,0,0,1,3,0,0,0,0,0,0,0
2,DTLZ1,hv,IGD+,0,0,0,0,0,1,2,3,2,0,0
2,DTLZ1,hv,R2,0,0,1,5,0,0,0,0,1,0,0
2,DTLZ1,igd,IGD+,0,2,0,0,0,0,0,0,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,WFG9,r2,R2,0,0,0,0,0,0,0,0,0,0,0
7,WFG9,s-energy,IGD+,8,7,2,3,6,0,0,0,0,0,0
7,WFG9,s-energy,R2,0,0,0,0,0,0,0,0,0,0,0
7,WFG9,spd,IGD+,1,1,0,1,1,0,1,0,1,1,0


In [44]:
def get_random_problem(random_state=42):
    DE_conv_ind, problem, n_obj, w_0,run,QI,value_QI = list(df_todos_PI.sample(1,random_state=random_state).iloc[0].values)
    return DE_conv_ind, problem, n_obj, QI 
    
DE_conv_ind, problem, n_obj, QI  = get_random_problem()
DE_conv_ind, problem, n_obj, QI

('IGD+', 'DTLZ7', 7, 'hv')

In [4]:
df_borda=pd.read_csv('../tablas_generadas/conteo_borda_todos.csv').set_index(['n_objetivos','problema','indicador','hiperparam_ind_conv'])
df_borda=df_borda.rename_axis(columns='w_0')

In [46]:
df_borda.sum(axis=0)

w_0
0     1057
1     1048
2     1055
3     1129
4     1058
5     1098
6     1136
7     1077
8     1207
9     1248
10    1267
dtype: int64

In [62]:
w_0 = get_w_espaciados(10)
def conteo_borda_problema(df_borda,n_objetivos,problema,indicador):
    df_vs=pd.melt(df_borda.loc[n_objetivos,problema,indicador].reset_index(),id_vars=['DE_conv_ind'])

    df_vs['w_0']=df_vs['w_0'].replace({i:w_0[i][0] for i in range(len(w_0))})
    fig=px.bar(data_frame=df_vs,x='w_0',y='value',color='DE_conv_ind',barmode='group')
    fig.update_layout(
        # title=f'Conteo de borda {problema} {n_objetivos}D {indicador.upper()}',
        xaxis_title='$w_0$', yaxis_title='Victories',
        height=500, width=900
    )
    fig.write_image(f'../imgs_pdf/conteo_borda_{problema}_obj{n_objetivos}_ind{indicador}.pdf')
    fig.show()
    return df_vs

df_borda= df_borda.reset_index().rename(columns={'hiperparam_ind_conv':'DE_conv_ind'}).set_index(['n_objetivos','problema','indicador','DE_conv_ind'])
n_objetivos,problema,indicador=3,'WFG4','hv'
df_vs=conteo_borda_problema(df_borda,n_objetivos,problema,indicador)

In [64]:
df_borda_stack=df_borda.groupby(['n_objetivos','DE_conv_ind'])[df_borda.columns].mean().unstack().reset_index()

for n_objetivos in df_borda_stack.n_objetivos.unique():
    df=df_borda_stack[df_borda_stack['n_objetivos']==n_objetivos].iloc[:,1:].unstack().reset_index().drop(columns=['level_2'])
    df['w_0']=df['w_0'].replace({f'{i}':w_0[i][0] for i in range(len(w_0))})
    fig=px.bar(df,x='w_0',y=0,color='DE_conv_ind',barmode='group')
    fig.update_layout(
        # title=f'Conteo de borda por peso.\nNúmero de objetivos {n_objetivos}',
        xaxis_title='$w_0$', yaxis_title='Avg Victories',
        height=400, width=600
    )
    fig.write_image(f'../imgs_pdf/borda_obj_{n_objetivos}.pdf')
    fig.show()

In [66]:
df_borda_stack=df_borda.groupby(['indicador','DE_conv_ind'])[df_borda.columns].mean().unstack().reset_index()

for indicador in df_borda_stack.indicador.unique():
    print(indicador)
    df=df_borda_stack[df_borda_stack['indicador']==indicador].iloc[:,1:].unstack().reset_index().drop(columns=['level_2'])
    df['w_0']=df['w_0'].replace({f'{i}':w_0[i][0] for i in range(len(w_0))})
 
    fig=px.bar(df,x='w_0',y=0,color='DE_conv_ind',barmode='group')
    fig.update_layout(
        # title=f'Conteo de borda por peso.\nindicador {indicador}',
        xaxis_title='$w_0$', yaxis_title='Avg Victories',
        height=400, width=600
    )
    fig.write_image(f'../imgs_pdf/borda_obj_ind_{indicador}.pdf')

    fig.show()

eps+


hv


igd


igd+


r2


s-energy


spd
