In [1]:
import mlflow
import pandas as pd



In [2]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Itaipu_Benchmarking_Vazao_Mensal_V1")

drop_columns = ['run_id','experiment_id', 'status','artifact_uri', 
                'start_time', 'end_time', 'tags.mlflow.source.type', 
                'tags.mlflow.source.name','params.label',
                'tags.mlflow.user'] # params.model

filter_columns = ['metrics.mae', 'metrics.r2', 'metrics.corr', 'metrics.rmse', 
                  'params.n_so_retro', 'params.model', 'params.f_so_pred']

In [3]:
# Initialize an empty list to store the dataframes
dfs = []

models = ["decision_tree", "gru", "linear_regression", "lstm", "random_forest"] # , "xgboost" p/ futura implementação

for model_name in models:
    df = mlflow.search_runs(filter_string=f"params.model = '{model_name}'")
    df = df.drop(columns=drop_columns)
    dfs.append(df)

# Concatenate the dataframes along the rows
df = pd.concat(dfs)

In [4]:
print("Foram realizados:", df.shape[0], "experimentos")
df

Foram realizados: 22400 experimentos


Unnamed: 0,metrics.r2,metrics.rmse,metrics.mae,metrics.corr,params.f_so_pred,params.criterion,params.min_samples_split,params.splitter,params.max_depth,params.n_so_retro,...,tags.run_name,tags.mlflow.runName,params.max_epochs,params.patience,params.activation,params.n_neurons_hl,params.monitor_metric,params.fit_intercept,params.bootstrap,params.n_estimators
0,0.334453,111224.334204,85689.506224,0.750934,8,absolute_error,7,random,7,8,...,decision_tree_n=8_f=8_criterion=absolute_error...,decision_tree_n=8_f=8_criterion=absolute_error...,,,,,,,,
1,0.417878,104020.140026,84028.935510,0.687357,8,absolute_error,7,random,7,8,...,decision_tree_n=8_f=8_criterion=absolute_error...,decision_tree_n=8_f=8_criterion=absolute_error...,,,,,,,,
2,0.453582,100779.610560,73722.152041,0.695106,8,absolute_error,7,random,7,8,...,decision_tree_n=8_f=8_criterion=absolute_error...,decision_tree_n=8_f=8_criterion=absolute_error...,,,,,,,,
3,0.681093,76991.383490,60753.753571,0.864142,8,absolute_error,5,random,7,8,...,decision_tree_n=8_f=8_criterion=absolute_error...,decision_tree_n=8_f=8_criterion=absolute_error...,,,,,,,,
4,0.265807,116819.555616,87880.073673,0.666091,8,absolute_error,5,random,7,8,...,decision_tree_n=8_f=8_criterion=absolute_error...,decision_tree_n=8_f=8_criterion=absolute_error...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3067,0.777781,62880.579741,49449.965455,0.883982,1,squared_error,,,5,1,...,random_forest_n=1_f=1_criterion=squared_error_...,random_forest_n=1_f=1_criterion=squared_error_...,,,,,,,True,50
3068,0.589558,85457.753998,67621.045189,0.775846,1,squared_error,,,3,1,...,random_forest_n=1_f=1_criterion=squared_error_...,random_forest_n=1_f=1_criterion=squared_error_...,,,,,,,False,50
3069,0.671935,76402.112139,60254.526219,0.824218,1,squared_error,,,3,1,...,random_forest_n=1_f=1_criterion=squared_error_...,random_forest_n=1_f=1_criterion=squared_error_...,,,,,,,True,50
3070,0.501450,94184.582504,78105.750758,0.766762,1,squared_error,,,,1,...,random_forest_n=1_f=1_criterion=squared_error_...,random_forest_n=1_f=1_criterion=squared_error_...,,,,,,,False,50


In [5]:
df = df[filter_columns]
df

Unnamed: 0,metrics.mae,metrics.r2,metrics.corr,metrics.rmse,params.n_so_retro,params.model,params.f_so_pred
0,85689.506224,0.334453,0.750934,111224.334204,8,decision_tree,8
1,84028.935510,0.417878,0.687357,104020.140026,8,decision_tree,8
2,73722.152041,0.453582,0.695106,100779.610560,8,decision_tree,8
3,60753.753571,0.681093,0.864142,76991.383490,8,decision_tree,8
4,87880.073673,0.265807,0.666091,116819.555616,8,decision_tree,8
...,...,...,...,...,...,...,...
3067,49449.965455,0.777781,0.883982,62880.579741,1,random_forest,1
3068,67621.045189,0.589558,0.775846,85457.753998,1,random_forest,1
3069,60254.526219,0.671935,0.824218,76402.112139,1,random_forest,1
3070,78105.750758,0.501450,0.766762,94184.582504,1,random_forest,1


In [6]:
# df_r2[df_r2['params.f_so_pred'] == '1']#.sort_values(by='metrics.r2', ascending=True)

In [7]:
# df_r2[df_r2['params.f_so_pred'] == '1']['params.model'].values #.sort_values(by='metrics.r2', ascending=True)

In [8]:
# df_r2[df_r2['params.f_so_pred'] == '1']['metrics.r2'].values # #.sort_values(by='metrics.r2', ascending=True)

In [9]:
# df_r2[df_r2['params.model'] == 'decision_tree']

In [10]:
# df_r2[df_r2['params.model'] == 'decision_tree']['metrics.r2'].values.tolist()

In [11]:
# x_array = []
# y_array = []

# for f in range(1,8+1):
#     x_array.append(df_r2[df_r2['params.f_so_pred'] == f'{f}']['params.model'].values.tolist())
#     y_array.append(df_r2[df_r2['params.f_so_pred'] == f'{f}']['metrics.r2'].values.tolist())

### Best R2-score for each "f"

In [12]:
df_r2 = ( df
    .sort_values(by=['params.model','params.f_so_pred', 'metrics.r2'], ascending=False)
    .drop_duplicates(subset=['params.model','params.f_so_pred'], keep='first')
)

df_r2.head(16)

Unnamed: 0,metrics.mae,metrics.r2,metrics.corr,metrics.rmse,params.n_so_retro,params.model,params.f_so_pred
1195,53299.598005,0.811836,0.904738,62561.180344,5,random_forest,8
857,54193.74768,0.805063,0.900044,63677.198781,6,random_forest,7
497,52711.776606,0.814122,0.907438,62179.92411,7,random_forest,6
159,54399.012292,0.808915,0.903528,63044.809736,8,random_forest,5
227,57506.241212,0.775046,0.897073,77163.510106,8,random_forest,4
2955,40810.835858,0.776121,0.883198,53171.107687,1,random_forest,3
2635,37372.465488,0.814162,0.903757,48443.515162,2,random_forest,2
1535,46824.625992,0.796353,0.903775,58307.394028,5,random_forest,1
41,43005.802721,0.840221,0.929586,56445.362752,8,lstm,8
3923,45588.411327,0.835607,0.915233,60378.722662,5,lstm,7


In [13]:
model_order = ['decision_tree', 'random_forest', 'linear_regression', 'gru', 'lstm']
df_r2['params.model'] = pd.Categorical(df_r2['params.model'], categories=model_order, ordered=True)
df_r2 = df_r2.sort_values(by=['params.model', 'params.f_so_pred'])
df_r2.head(16)

Unnamed: 0,metrics.mae,metrics.r2,metrics.corr,metrics.rmse,params.n_so_retro,params.model,params.f_so_pred
9144,50521.653946,0.78142,0.886781,62363.480205,1,decision_tree,1
5510,50817.962255,0.775536,0.887815,61214.961076,4,decision_tree,2
5417,55792.894512,0.731813,0.856666,72623.882089,4,decision_tree,3
700,64832.604437,0.771629,0.885032,77747.322215,8,decision_tree,4
8572,50674.077278,0.767115,0.876046,62352.685858,1,decision_tree,5
1477,58088.7729,0.760674,0.87387,70555.512731,7,decision_tree,6
2488,56759.1919,0.759658,0.873441,70705.215553,6,decision_tree,7
3520,59026.1337,0.763523,0.877766,70134.368919,5,decision_tree,8
1535,46824.625992,0.796353,0.903775,58307.394028,5,random_forest,1
2635,37372.465488,0.814162,0.903757,48443.515162,2,random_forest,2


In [14]:
dict_plot = []

for model in model_order:
    dict_plot.append({f'{model}': df_r2[df_r2['params.model'] == f'{model}']['metrics.r2'].values.tolist()})

In [15]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

branches = ['F=1', 'F=2', 'F=3', 'F=4', 
            'F=5', 'F=6', 'F=7', 'F=8']

decision_tree = dict_plot[0]['decision_tree']
random_forest = dict_plot[1]['random_forest']
linear_regression = dict_plot[2]['linear_regression']
gru = dict_plot[3]['gru']
lstm = dict_plot[4]['lstm']

# Define a color scale with 5 colors
color_scale = px.colors.qualitative.Plotly[:5]

# Create a dictionary with colors for each category
colors = {
    'decision_tree': color_scale[0],
    'random_forest': color_scale[1],
    'linear_regression': color_scale[2],
    'gru': color_scale[3],
    'lstm': color_scale[4]
}


# Create subplots
fig = make_subplots(rows=2, cols=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = decision_tree[:4],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.3f}' for x in decision_tree[:4]],
   marker=dict(color=colors['decision_tree']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = random_forest[:4],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.3f}' for x in random_forest[:4]],
   marker=dict(color=colors['random_forest']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = linear_regression[:4],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.3f}' for x in linear_regression[:4]],
   marker=dict(color=colors['linear_regression']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = gru[:4],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.3f}' for x in gru[:4]],
   marker=dict(color=colors['gru']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = lstm[:4],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.3f}' for x in lstm[:4]],
   marker=dict(color=colors['lstm']),
), row=1, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = decision_tree[4:],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.3f}' for x in decision_tree[4:]],
   marker=dict(color=colors['decision_tree']),
   showlegend=False,
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = random_forest[4:],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.3f}' for x in random_forest[4:]],
   marker=dict(color=colors['random_forest']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = linear_regression[4:],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.3f}' for x in linear_regression[4:]],
   marker=dict(color=colors['linear_regression']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = gru[4:],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.3f}' for x in gru[4:]],
   marker=dict(color=colors['gru']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = lstm[4:],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.3f}' for x in lstm[4:]],
   marker=dict(color=colors['lstm']),
   showlegend=False
), row=2, col=1)

fig.update_layout(
    title=f"Modelos com melhor R2-score para previsão da vazão no 'F'-ésimo mês", # com 'N' livre 
    barmode='group',
)

# Set the same y-axis range for both subplots
fig.update_yaxes(range=[0.68, 1.0], row=1, col=1)
fig.update_yaxes(range=[0.68, 1.0], row=2, col=1)

# Add annotations
# fig.add_annotation(text="'N' = Número de meses retroativos de vazão observada e precipitação na bacia, a serem utilizadas durante o treinamento",
#                    xref="paper", yref="paper",
#                    x=0.5, y=-0.18, showarrow=False)

# Add annotations
fig.add_annotation(text="'F' = Número do mês futuro a onde se prevê a vazão",
                   xref="paper", yref="paper",
                   x=0.5, y=-0.18, showarrow=False) # -0.25

fig.show()

### Best RMSE-score for each "f"

In [16]:
df_rmse = ( df
    .sort_values(by=['params.model','params.f_so_pred', 'metrics.rmse'], ascending=True) # pois quanto menor melhor
    .drop_duplicates(subset=['params.model','params.f_so_pred'], keep='first')
)

model_order = ['decision_tree', 'random_forest', 'linear_regression', 'gru', 'lstm']
df_rmse['params.model'] = pd.Categorical(df_rmse['params.model'], categories=model_order, ordered=True)
df_rmse = df_rmse.sort_values(by=['params.model', 'params.f_so_pred'])

dict_plot = []

for model in model_order:
    dict_plot.append({f'{model}': df_rmse[df_rmse['params.model'] == f'{model}']['metrics.rmse'].values.tolist()})

In [17]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

branches = ['F=1', 'F=2', 'F=3', 'F=4', 
            'F=5', 'F=6', 'F=7', 'F=8']

decision_tree = dict_plot[0]['decision_tree']
random_forest = dict_plot[1]['random_forest']
linear_regression = dict_plot[2]['linear_regression']
gru = dict_plot[3]['gru']
lstm = dict_plot[4]['lstm']

# Define a color scale with 5 colors
color_scale = px.colors.qualitative.Plotly[:5]

# Create a dictionary with colors for each category
colors = {
    'decision_tree': color_scale[0],
    'random_forest': color_scale[1],
    'linear_regression': color_scale[2],
    'gru': color_scale[3],
    'lstm': color_scale[4]
}


# Create subplots
fig = make_subplots(rows=2, cols=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = decision_tree[:4],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.0f}' for x in decision_tree[:4]],
   marker=dict(color=colors['decision_tree']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = random_forest[:4],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.0f}' for x in random_forest[:4]],
   marker=dict(color=colors['random_forest']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = linear_regression[:4],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.0f}' for x in linear_regression[:4]],
   marker=dict(color=colors['linear_regression']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = gru[:4],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.0f}' for x in gru[:4]],
   marker=dict(color=colors['gru']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = lstm[:4],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.0f}' for x in lstm[:4]],
   marker=dict(color=colors['lstm']),
), row=1, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = decision_tree[4:],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.0f}' for x in decision_tree[4:]],
   marker=dict(color=colors['decision_tree']),
   showlegend=False,
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = random_forest[4:],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.0f}' for x in random_forest[4:]],
   marker=dict(color=colors['random_forest']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = linear_regression[4:],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.0f}' for x in linear_regression[4:]],
   marker=dict(color=colors['linear_regression']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = gru[4:],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.0f}' for x in gru[4:]],
   marker=dict(color=colors['gru']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = lstm[4:],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.0f}' for x in lstm[4:]],
   marker=dict(color=colors['lstm']),
   showlegend=False
), row=2, col=1)

fig.update_layout(
    title=f"Modelos com melhor RMSE para previsão da vazão no 'F'-ésimo mês ", # com 'N' livre 
    barmode='group',
)

# Set the same y-axis range for both subplots
# fig.update_yaxes(range=[0.68, 1.0], row=1, col=1)
# fig.update_yaxes(range=[0.68, 1.0], row=2, col=1)

# Add annotations
# fig.add_annotation(text="'N' = Número de meses retroativos de vazão observada e precipitação na bacia, a serem utilizadas durante o treinamento",
#                    xref="paper", yref="paper",
#                    x=0.5, y=-0.18, showarrow=False)

# Add annotation for y-axis title
fig.add_annotation(
    text='Erro da Vazão Acumulada Mensal (m^3)/s',
    xref='paper', 
    yref='paper',
    x=-0.05, y=0.5,  # Adjust x and y position to move the title away from the y-axis
    xanchor='center', 
    yanchor='middle',
    showarrow=False,
    textangle=-90  # Rotate the text to be vertical
)

# Add annotations
fig.add_annotation(text="'F' = Número do mês futuro a onde se prevê a vazão",
                   xref="paper", yref="paper",
                   x=0.5, y=-0.18, showarrow=False)

fig.show()

#### Best MAE for each "F"

In [18]:
df_mae = ( df
    .sort_values(by=['params.model','params.f_so_pred', 'metrics.mae'], ascending=True) # pois quanto menor melhor
    .drop_duplicates(subset=['params.model','params.f_so_pred'], keep='first')
)

model_order = ['decision_tree', 'random_forest', 'linear_regression', 'gru', 'lstm']
df_mae['params.model'] = pd.Categorical(df_mae['params.model'], categories=model_order, ordered=True)
df_mae = df_mae.sort_values(by=['params.model', 'params.f_so_pred'])

dict_plot = []

for model in model_order:
    dict_plot.append({f'{model}': df_mae[df_mae['params.model'] == f'{model}']['metrics.mae'].values.tolist()})

In [19]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

branches = ['F=1', 'F=2', 'F=3', 'F=4', 
            'F=5', 'F=6', 'F=7', 'F=8']

decision_tree = dict_plot[0]['decision_tree']
random_forest = dict_plot[1]['random_forest']
linear_regression = dict_plot[2]['linear_regression']
gru = dict_plot[3]['gru']
lstm = dict_plot[4]['lstm']

# Define a color scale with 5 colors
color_scale = px.colors.qualitative.Plotly[:5]

# Create a dictionary with colors for each category
colors = {
    'decision_tree': color_scale[0],
    'random_forest': color_scale[1],
    'linear_regression': color_scale[2],
    'gru': color_scale[3],
    'lstm': color_scale[4]
}

# Create subplots
fig = make_subplots(rows=2, cols=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = decision_tree[:4],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.0f}' for x in decision_tree[:4]],
   marker=dict(color=colors['decision_tree']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = random_forest[:4],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.0f}' for x in random_forest[:4]],
   marker=dict(color=colors['random_forest']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = linear_regression[:4],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.0f}' for x in linear_regression[:4]],
   marker=dict(color=colors['linear_regression']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = gru[:4],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.0f}' for x in gru[:4]],
   marker=dict(color=colors['gru']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = lstm[:4],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.0f}' for x in lstm[:4]],
   marker=dict(color=colors['lstm']),
), row=1, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = decision_tree[4:],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.0f}' for x in decision_tree[4:]],
   marker=dict(color=colors['decision_tree']),
   showlegend=False,
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = random_forest[4:],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.0f}' for x in random_forest[4:]],
   marker=dict(color=colors['random_forest']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = linear_regression[4:],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.0f}' for x in linear_regression[4:]],
   marker=dict(color=colors['linear_regression']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = gru[4:],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.0f}' for x in gru[4:]],
   marker=dict(color=colors['gru']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = lstm[4:],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.0f}' for x in lstm[4:]],
   marker=dict(color=colors['lstm']),
   showlegend=False
), row=2, col=1)

fig.update_layout(
    title=f"Modelos com melhor MAE para previsão da vazão no 'F'-ésimo mês", #  com 'N' livre 
    barmode='group',
)

# Set the same y-axis range for both subplots
# fig.update_yaxes(range=[0.68, 1.0], row=1, col=1)
# fig.update_yaxes(range=[0.68, 1.0], row=2, col=1)

# # Add annotations
# fig.add_annotation(text="'N' = Número de meses retroativos de vazão observada e precipitação na bacia, a serem utilizadas durante o treinamento",
#                    xref="paper", yref="paper",
#                    x=0.5, y=-0.18, showarrow=False)

# Add annotations
fig.add_annotation(text="'F' = Número do mês futuro a onde se prevê a vazão",
                   xref="paper", yref="paper",
                   x=0.5, y=-0.25, showarrow=False)

fig.show()

### Best correlation for each "F"

In [20]:
df_corr = ( df
    .sort_values(by=['params.model','params.f_so_pred', 'metrics.corr'], ascending=False) # pois quanto menor melhor
    .drop_duplicates(subset=['params.model','params.f_so_pred'], keep='first')
)

model_order = ['decision_tree', 'random_forest', 'linear_regression', 'gru', 'lstm']
df_corr['params.model'] = pd.Categorical(df_corr['params.model'], categories=model_order, ordered=True)
df_corr = df_corr.sort_values(by=['params.model', 'params.f_so_pred'])

dict_plot = []

for model in model_order:
    dict_plot.append({f'{model}': df_corr[df_corr['params.model'] == f'{model}']['metrics.corr'].values.tolist()})

In [21]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

branches = ['F=1', 'F=2', 'F=3', 'F=4', 
            'F=5', 'F=6', 'F=7', 'F=8']

decision_tree = dict_plot[0]['decision_tree']
random_forest = dict_plot[1]['random_forest']
linear_regression = dict_plot[2]['linear_regression']
gru = dict_plot[3]['gru']
lstm = dict_plot[4]['lstm']

# Define a color scale with 5 colors
color_scale = px.colors.qualitative.Plotly[:5]

# Create a dictionary with colors for each category
colors = {
    'decision_tree': color_scale[0],
    'random_forest': color_scale[1],
    'linear_regression': color_scale[2],
    'gru': color_scale[3],
    'lstm': color_scale[4]
}


# Create subplots
fig = make_subplots(rows=2, cols=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = decision_tree[:4],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.3f}' for x in decision_tree[:4]],
   marker=dict(color=colors['decision_tree']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = random_forest[:4],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.3f}' for x in random_forest[:4]],
   marker=dict(color=colors['random_forest']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = linear_regression[:4],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.3f}' for x in linear_regression[:4]],
   marker=dict(color=colors['linear_regression']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = gru[:4],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.3f}' for x in gru[:4]],
   marker=dict(color=colors['gru']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = lstm[:4],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.3f}' for x in lstm[:4]],
   marker=dict(color=colors['lstm']),
), row=1, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = decision_tree[4:],
   name = 'decision_tree',
   legendgroup='decision_tree',
   text=[f'{x:.3f}' for x in decision_tree[4:]],
   marker=dict(color=colors['decision_tree']),
   showlegend=False,
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = random_forest[4:],
   name = 'random_forest',
   legendgroup='random_forest',
   text=[f'{x:.3f}' for x in random_forest[4:]],
   marker=dict(color=colors['random_forest']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = linear_regression[4:],
   name = 'linear_regression',
   legendgroup='linear_regression',
   text=[f'{x:.3f}' for x in linear_regression[4:]],
   marker=dict(color=colors['linear_regression']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = gru[4:],
   name = 'gru',
   legendgroup='gru',
   text=[f'{x:.3f}' for x in gru[4:]],
   marker=dict(color=colors['gru']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = lstm[4:],
   name = 'lstm',
   legendgroup='lstm',
   text=[f'{x:.3f}' for x in lstm[4:]],
   marker=dict(color=colors['lstm']),
   showlegend=False
), row=2, col=1)

fig.update_layout(
    title=f"Modelos com melhor Correlação para previsão da vazão no 'F'-ésimo mês", #  com 'N' livre 
    barmode='group',
)

# Set the same y-axis range for both subplots
fig.update_yaxes(range=[0.68, 1.0], row=1, col=1)
fig.update_yaxes(range=[0.68, 1.0], row=2, col=1)

# Add annotations
# fig.add_annotation(text="'N' = Número de meses retroativos de vazão observada e precipitação na bacia, a serem utilizadas durante o treinamento",
#                    xref="paper", yref="paper",
#                    x=0.5, y=-0.18, showarrow=False)

# Add annotations
fig.add_annotation(text="'F' = Número do mês futuro a onde se prevê a vazão",
                   xref="paper", yref="paper",
                   x=0.5, y=-0.25, showarrow=False)

fig.show()

## Protótipos / Testes

#### ----------------- Protótipo 1 ----------------- 

In [None]:
import plotly.graph_objects as go

branches = ['CSE', 'Mech', 'Electronics', 'BBB']
fy = [23,17,35,14]
sy = [20, 23, 30,5]
ty = [30,20,15,18]
by = [12,14,28,24]
trace1 = go.Bar(
   x = branches,
   y = fy,
   name = 'FY'
)
trace2 = go.Bar(
   x = branches,
   y = sy,
   name = 'SY'
)
trace3 = go.Bar(
   x = branches,
   y = ty,
   name = 'TY'
)

trace4 = go.Bar(
   x = branches,
   y = by,
   name = 'BY'
)

data = [trace1, trace2, trace3,trace4]
layout = go.Layout(barmode = 'group')
fig = go.Figure(data = data, layout = layout)

fig.show()


#### ----------------- Protótipo 2 ----------------- 

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

branches = ['a','b','c','d','e','f','g','h']

fy = [23,17,35,14,27, 19, 33, 16]
sy = [20, 23, 30,10,22, 24, 32, 12]
ty = [30,20,15,18,29, 21, 17, 20]
by = [12,14,28,24,14, 16, 30, 26]
zy = [8,20,28,12,9, 12, 24, 20]

# Define a color scale with 5 colors
color_scale = px.colors.qualitative.Plotly[:5]

# Create a dictionary with colors for each category
colors = {
    'fy': color_scale[0],
    'sy': color_scale[1],
    'ty': color_scale[2],
    'by': color_scale[3],
    'zy': color_scale[4]
}


# Create subplots
fig = make_subplots(rows=2, cols=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = fy[:4],
   name = 'fy',
   legendgroup='fy',
   text=[f'{x:.2f}' for x in fy[:4]],
   marker=dict(color=colors['fy']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = sy[:4],
   name = 'sy',
   legendgroup='sy',
   text=[f'{x:.2f}' for x in sy[:4]],
   marker=dict(color=colors['sy']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = ty[:4],
   name = 'ty',
   legendgroup='ty',
   text=[f'{x:.2f}' for x in ty[:4]],
   marker=dict(color=colors['ty']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = by[:4],
   name = 'by',
   legendgroup='by',
   text=[f'{x:.2f}' for x in by[:4]],
   marker=dict(color=colors['by']),
), row=1, col=1)

# Add traces to subplot 1
fig.add_trace(go.Bar(
   x = branches[:4],
   y = zy[:4],
   name = 'zy',
   legendgroup='zy',
   text=[f'{x:.2f}' for x in zy[:4]],
   marker=dict(color=colors['zy']),
), row=1, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = fy[4:],
   name = 'fy',
   legendgroup='fy',
   text=[f'{x:.2f}' for x in fy[4:]],
   marker=dict(color=colors['fy']),
   showlegend=False,
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = sy[4:],
   name = 'sy',
   legendgroup='sy',
   text=[f'{x:.2f}' for x in sy[4:]],
   marker=dict(color=colors['sy']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = ty[4:],
   name = 'ty',
   legendgroup='ty',
   text=[f'{x:.2f}' for x in ty[4:]],
   marker=dict(color=colors['ty']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = by[4:],
   name = 'by',
   legendgroup='by',
   text=[f'{x:.2f}' for x in by[4:]],
   marker=dict(color=colors['by']),
   showlegend=False
), row=2, col=1)

# Add traces to subplot 2
fig.add_trace(go.Bar(
   x = branches[4:],
   y = zy[4:],
   name = 'zy',
   legendgroup='zy',
   text=[f'{x:.2f}' for x in zy[4:]],
   marker=dict(color=colors['zy']),
   showlegend=False
), row=2, col=1)

fig.update_layout(
    title=f"Título teste",
    barmode='group',
)

# Add annotations
fig.add_annotation(text="'n' = Número de meses retroativos de vazão observada e precipitação na bacia, a serem utilizadas durante o treinamento",
                   xref="paper", yref="paper",
                   x=0.5, y=-0.18, showarrow=False)

# Add annotations
fig.add_annotation(text="'f' = Número do mês futuro a onde se prevê a vazão",
                   xref="paper", yref="paper",
                   x=0.5, y=-0.25, showarrow=False)

fig.show()