In [165]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [166]:
# read excel file
path = "./data/data_by_concept.xlsx"
# read with a context manager
excel_data = pd.ExcelFile(path)

In [167]:
gene_sheet_name = "Gene"
gen_prog_sheet_name = "GeneProgDesp"
gen_reprog_sheet_name = "GeneProgRedesp"
gen_offer_price = "PrecOferDesp"
offer_national_price = "PrecBolsNaci"

In [168]:
COL_DATE = "Date"
COL_HOURS = [f"Values_Hour{i:02}" for i in range(1, 25)]

def read_each_sheet(sheet_name, excel_data, plant_name: str=None):
    df_data = excel_data.parse(sheet_name, index_col=0)
    if plant_name:
        df_data = df_data[df_data["Values_code"] == plant_name]
        df_data.drop(columns=["Values_code"], inplace=True)

    df_data.drop(columns=["Id"], inplace=True)
    df_data[COL_DATE] = pd.to_datetime(df_data[COL_DATE])
    # remove days lower than 2024-03-08
    df_data = df_data[df_data[COL_DATE] >= pd.Timestamp("2024-03-08")]
    # df_data.set_index(COL_DATE, inplace=True)
    # df_data = df_data.sort_index()

    return df_data

In [169]:
# open the sheet of generation in a dataframe
df_gene = read_each_sheet(gene_sheet_name, excel_data, "3IRX")
df_gen_prog = read_each_sheet(gen_prog_sheet_name, excel_data, "3IRX")
df_gen_reprog = read_each_sheet(gen_reprog_sheet_name, excel_data, "3IRX")
df_gen_offer_price = read_each_sheet(gen_offer_price, excel_data, "3IRX")
df_offer_national_price = read_each_sheet(offer_national_price, excel_data)

In [170]:
COL_UNIFIED = "Total"

def unfied_hours(df):
    if COL_UNIFIED in df.columns:
        return df
    df_aux = df[[COL_DATE]].copy()
    df_aux[COL_UNIFIED] = df[COL_HOURS].sum(axis=1)
    return df_aux

def operation_between_df(df1, df2, operation, absolute=False, unified=False):
    columns = COL_HOURS
    if unified:
        df1 = unfied_hours(df1)
        df2 = unfied_hours(df2)
        columns = [COL_UNIFIED]

    common_dates = pd.merge(df1, df2, on=COL_DATE, how="inner", suffixes=('_1', '_2'))
    common_dates.fillna(0, inplace=True)

    result = common_dates[[COL_DATE]].copy()

    for hour in columns:
        if operation == "sum":
            result[hour] = common_dates[f"{hour}_1"]+common_dates[f"{hour}_2"]
        elif operation == "subs":
                result[hour] = common_dates[f"{hour}_1"]-common_dates[f"{hour}_2"]
        elif operation == "mult":
                result[hour] = common_dates[f"{hour}_1"]*common_dates[f"{hour}_2"]
        elif operation == "div":
                result[hour] = 100*common_dates[f"{hour}_1"]/common_dates[f"{hour}_2"]
        else:
            raise ValueError("Operation not supported")

    if absolute:
        result[columns] = result[columns].abs()

    return result

def calculate_toleration(df, gap):
    df[[COL_UNIFIED]] = df[[COL_UNIFIED]].apply(lambda x: print(x.values))
    return df

In [171]:
df_desv_prog = operation_between_df(df_gene, df_gen_prog, "subs", absolute=True , unified=True)
df_desv_prog_percentage = operation_between_df(df_desv_prog, df_gen_prog, "div", unified=True)
df_desv_prog_percentage[COL_UNIFIED] = df_desv_prog_percentage[COL_UNIFIED].apply(lambda x: x if x > 15 else 0)

df_desv_reprog = operation_between_df(df_gene, df_gen_reprog, "subs", absolute=True , unified=True)
df_desv_reprog_percentage = operation_between_df(df_desv_reprog, df_gen_reprog, "div", unified=True)
df_desv_reprog_percentage[COL_UNIFIED] = df_desv_reprog_percentage[COL_UNIFIED].apply(lambda x: x if x > 8 else 0)

In [172]:
monthly_avg_prog = df_desv_prog_percentage.resample('ME', on=COL_DATE)[COL_UNIFIED].mean().reset_index()
monthly_avg_reprog = df_desv_reprog_percentage.resample('ME', on=COL_DATE)[COL_UNIFIED].mean().reset_index()

In [173]:

def plot_results(df_daily, df_month, title, title_1, title_2):
    # Calcular promedio y desviación estándar para cada DataFrame
    mean_daily = df_daily[COL_UNIFIED].mean()
    std_daily = df_daily[COL_UNIFIED].std()
    std_daily_percent = (std_daily / mean_daily) * 100  # Desviación estándar como porcentaje del promedio
    
    mean_month = df_month[COL_UNIFIED].mean()
    std_month = df_month[COL_UNIFIED].std()
    std_month_percent = (std_month / mean_month) * 100  # Desviación estándar como porcentaje del promedio
    
    # Crear la figura con subplots
    fig = make_subplots(rows=1, cols=2, subplot_titles=(title_1, title_2))

    # Gráfico de datos horarios
    fig.add_trace(
        go.Scatter(x=df_daily[COL_DATE], y=df_daily[COL_UNIFIED], mode='lines', name='Horario'),
        row=1, col=1
    )

    # Añadir anotación para promedio y desviación estándar en la esquina superior derecha del primer subplot
    fig.add_annotation(
        xref="x domain", yref="y domain", x=0.95, y=0.95, showarrow=False,
        text=f"X̅: {mean_daily:.2f}<br>σ: {std_daily_percent:.2f}%",
        row=1, col=1
    )

    # Gráfico de datos mensuales
    fig.add_trace(
        go.Scatter(x=df_month[COL_DATE], y=df_month[COL_UNIFIED], mode='lines', name='Mensual'),
        row=1, col=2
    )

    # Añadir anotación para promedio y desviación estándar en la esquina superior derecha del segundo subplot
    fig.add_annotation(
        xref="x domain", yref="y domain", x=0.95, y=0.95, showarrow=False,
        text=f"X̅: {mean_month:.2f}<br>σ: {std_month_percent:.2f}%",
        row=1, col=2
    )

    # Setear dimensiones y título de la figura
    fig.update_layout(height=400, width=1000, title_text=title)
    # Setear título del eje y en el primer subplot
    fig.update_yaxes(title_text="Desviación [%]", row=1, col=1)

    return fig

In [174]:

# Crear la figura con subplots
fig = plot_results(
    df_desv_prog_percentage, 
    monthly_avg_prog, 
    "Desviación del Despacho", 
    "Desviación diaria de energía",
    "Desviación promedio mensual de energía", 
)
fig.show()

In [175]:
# fig_avg_reprog = px.line(monthly_avg_reprog, x=COL_DATE, y=COL_UNIFIED, title='Total mensual')
# fig_avg_reprog.update_layout(title='Desviación reprogramada', xaxis_title='Fecha', yaxis_title='Desviación (%)')

# Crear la figura con subplots
fig = plot_results(
    df_desv_reprog_percentage, 
    monthly_avg_reprog, 
    "Desviación Redespacho", 
    "Desviación diaria de energía",
    "Desviación promedio mensual de energía", 
)
fig.show()


In [176]:
# Crear la figura con subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Desviación despacho", "Desviación del redespacho"))

# Agregar el diagrama de bigotes para el primer DataFrame
fig.add_trace(
    go.Box(y=df_desv_prog_percentage[COL_UNIFIED], name="Despacho", boxpoints='all', jitter=0.3, pointpos=-1.8),
    row=1, col=1
)

# Agregar el diagrama de bigotes para el segundo DataFrame
fig.add_trace(
    go.Box(y=df_desv_reprog_percentage[COL_UNIFIED], name="Redespacho", boxpoints='all', jitter=0.3, pointpos=-1.8),
    row=1, col=2
)


In [180]:
def remove_outliers(df):
    q1 = df[COL_UNIFIED].quantile(0.25)
    q3 = df[COL_UNIFIED].quantile(0.75)
    iqr = q3 - q1
    return df[(df[COL_UNIFIED] > q1 - 1.5*iqr) & (df[COL_UNIFIED] < q3 + 1.5*iqr)]

df_desv_prog_percentage_without_outliers = remove_outliers(df_desv_prog_percentage)
df_desv_reprog_percentage_without_outliers = remove_outliers(df_desv_reprog_percentage)

monthly_avg_prog_without_outliers = df_desv_prog_percentage_without_outliers.resample('ME', on=COL_DATE)[COL_UNIFIED].mean().reset_index()
monthly_avg_reprog_without_outliers = df_desv_reprog_percentage_without_outliers.resample('ME', on=COL_DATE)[COL_UNIFIED].mean().reset_index()

In [181]:
fig = plot_results(
    df_desv_prog_percentage_without_outliers, 
    monthly_avg_prog_without_outliers, 
    "Desviación Despacho sin outliers", 
    "Desviación diaria de energía",
    "Desviación promedio mensual de energía", 
)
fig.show()

In [182]:
fig = plot_results(
    df_desv_reprog_percentage_without_outliers, 
    monthly_avg_reprog_without_outliers, 
    "Desviación Redespacho sin outliers", 
    "Desviación diaria de energía",
    "Desviación promedio mensual de energía", 
)
fig.show()