# Import libs


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pvlib
import json
import os
from pvlib.pvsystem import PVSystem, Array, FixedMount
from pvlib.location import Location
from pvlib.modelchain import ModelChain
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
import plotly.graph_objects as go
import plotly.io as pio
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, root_mean_squared_error, r2_score
from sklearn.inspection import permutation_importance
import forestci as fci
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
import threading
from sklearn.metrics import make_scorer
import dash
from dash import dcc, html
import plotly.graph_objects as go
from dash.dependencies import Input, Output
import webbrowser
from threading import Timer
import requests
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import datetime
from sklearn.utils.parallel import Parallel, delayed
from sklearn.utils.validation import (
    check_is_fitted,
)
from sklearn.ensemble._base import _partition_estimators

import pickle
import joblib
import os

# Setup


## Setup Directories


In [None]:
pio.renderers.default = "browser"  # render plotly figures in browser

PARENT_DATA_DIR = os.getenv('PARENT_DATA_DIR')
if PARENT_DATA_DIR is None:
    raise ValueError("PARENT_DATA_DIR environment variable is not set")


data_dirpath = PARENT_DATA_DIR + r"\PRiOT\dataExport_2"  # "/Applications/Documents/TM Maxime/dataExport_3400_daily"#
cache_dirpath = os.path.join(data_dirpath, "cache")
logs_dirpath = "../logs"

if not os.path.exists(logs_dirpath):
    os.makedirs(logs_dirpath)

if not os.path.exists(cache_dirpath):
    os.makedirs(cache_dirpath)

## Setup Parameters


In [None]:
use_cache = False
force_train_hsr = True
force_tune_MaxProductionNormalizer = True
random_state = 42

max_training_days = None # None = Maximum possible
min_training_days = 14
testing_days = 14


# Import


## Import metadata


## Import data


# Max production Normalizer

In [None]:
data_handler = DataHandler(data_dirpath, cache_dirpath)
data_handler.load_metadata()
data_handler.load_data()
data_handler.check_integrity()


max_production_normalizers = MaxProductionNormalizers()
max_production_normalizers.run(data_handler, tune=force_tune_MaxProductionNormalizer)


In [None]:
data_handler.check_outliers(max_threshold=1.1, min_threshold=0.01)
data_handler.create_train_test_set(test_size=testing_days, max_train_size=50, random_state=random_state, shuffle=False)
data_handler.check_training_size(min_training_days=min_training_days)

# App

In [None]:
# Initialize the Dash app
# no_raise_mode = True

app = dash.Dash(__name__)

tab_height = '2em'
app.layout = html.Div([
    html.Div([
        dcc.Dropdown(
            id='system-dropdown',
            options=[{'label': name, 'value': name} for name in data_handler.valid_systems],
            value=data_handler.valid_systems[0],
            style={'width': '50%'}  # Adjust width and font size
        ),
        html.Div(id='metric-text-container', style={'display': 'inline-block', 'margin-left': '20px'})  # Container for the metric text
    ], style={'display': 'flex', 'align-items': 'center'}),  # Align items horizontally
    dcc.Tabs(id='plot-tabs', value='tab-energy', children=[
        dcc.Tab(label='Energy', value='tab-energy', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height
        dcc.Tab(label='Normalizer Tuning', value='tab-norm-tuning', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height
        dcc.Tab(label='Relative Energy', value='tab-rel-energy', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height
        # plot systemsData_RelativeDelta_val
        dcc.Tab(label='Delta Error', value='tab-delta-rel-energy', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height
        dcc.Tab(label='All Relative Energy', value='tab-rel-energy-all', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height
        dcc.Tab(label='All Missing Value', value='tab-miss-val-all', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height
        dcc.Tab(label='Similar neighboring systems', value='tab-neighbors', style={'padding': '0px', 'lineHeight': tab_height}, selected_style={'padding': '0px', 'lineHeight': tab_height, 'fontWeight': 'bold'}),  # Adjust height and line height

    ]),  # Adjust height for tabs
    html.Div(id='tabs-content', style={'flex': '1 1 auto'})  # Allow the tabs-content div to grow
], style={'display': 'flex', 'flexDirection': 'column', 'height': '100vh'})  # Make the outer container fill the screen height


@app.callback(
    [Output('tabs-content', 'children'),
     Output('metric-text-container', 'children')],
    [Input('plot-tabs', 'value'),
     Input('system-dropdown', 'value')]
)
def render_content(tab, selected_system):
    # Statistic text
    try:
        mae_train = regressorsMetrics_train.loc[selected_system]
    except:
        mae_train = np.nan
    try:
        mae_val = regressorsMetrics_val.loc[selected_system]
    except:
        mae_val = np.nan
    try:
        loss = data_handler.get_metadata(selected_system)['metadata']['loss']
    except:
        loss = np.nan

    mae_train_text = f"Estimator Train Error : {mae_train * 100:.2f}%"
    mae_test_text = f"Estimator Test Error  : {mae_val * 100:.2f}%"
    loss_text = f"System Loss   : {loss * 100:.2f}%"

    metric_text_div = html.Div([
        html.Div(mae_train_text),
        html.Div(mae_test_text),
        html.Div(loss_text)
    ], style={'fontSize': 16})

    if tab == 'tab-energy':
        fig1 = go.Figure(layout_yaxis_title="Daily Energy (kWh)")

        # remove nan from systemsData_EstimatedMaxDailyEnergy[selected_system]

        try:
            data = data_handler.max_production[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Estimated Max Daily Energy',
                marker_color='LightSeaGreen'
            ))
        except:
            pass

        try:
            data = data_handler.get_measures(set='all', systems_name = selected_system)
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Measured Daily Energy',
                marker_color='blue'
            ))
        except:
            pass

        try:
            expectedDailyEnergy_val_mean = systemsData_ExpectedDailyEnergy_val_mean[selected_system].dropna()
            expectedDailyEnergy_val_std = systemsData_ExpectedDailyEnergy_val_std[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=expectedDailyEnergy_val_mean.index,
                y=expectedDailyEnergy_val_mean,
                mode='markers',
                name='Expected Daily Energy',
                marker_color='red'
                # error_y=dict(
                #     type='data',
                #     array=expectedDailyEnergy_val_std,
                #     visible=True
                # )
            ))
        except:
            pass

        # Update layout for legend position
        fig1.update_layout(
            legend=dict(
                x=0.99,
                y=0.99,
                xanchor='right',
                yanchor='top',
                orientation='h'
            )
        )

        return dcc.Graph(figure=fig1, style={'height': '100%', 'width': '100%'}), metric_text_div  # Adjust height and width of the figure

    elif tab == 'tab-norm-tuning':
        fig1 = go.Figure(layout_yaxis_title="Daily Energy (kWh)")
        try:
            data = data_handler.get_measures(set='all', systems_name = selected_system)
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Measured Daily Energy',
                marker_color='blue'
            ))
        except:
            pass
        try:
            data = data_handler._measures[data_handler.tuner_measures_max_mask][selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Max Measured Daily Energy (7 days)',
                marker_color='red'
            ))
        except:
            pass
        try:
            data = data_handler._measures[data_handler.tuner_measures_outliers_mask][selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Tuning Outliers',
                marker_color='yellow'
            ))
        except:
            pass
        try:
            data = data_handler.tuner_max_productions_untuned[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Estimated Max Daily Energy (Untuned)',
                marker_color='violet'
            ))
        except:
            pass
        try:
            data = data_handler.max_production[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=data.index,
                y=data,
                mode='markers',
                name='Estimated Max Daily Energy',
                marker_color='LightSeaGreen'
            ))
        except:
            pass

        fig1.update_layout(
            legend=dict(
                x=0.99,
                y=0.99,
                xanchor='right',
                yanchor='top',
                orientation='h'
            )
        )

        return dcc.Graph(figure=fig1, style={'height': '100%', 'width': '100%'}), metric_text_div
    elif tab == 'tab-rel-energy':
        fig1 = go.Figure(layout_yaxis_title="Proportional Daily Energy (%)")
        # add a line at 100% for the Estimated Max Daily Energy
        data = data_handler.max_production[selected_system].dropna()
        fig1.add_shape(
            type="line",
            x0=data.index.min(),
            y0=100,
            x1=data.index.max(),
            y1=100,
            name='Estimated Max Daily Energy',
            line_color='LightSeaGreen'
        )
        # try:
        data = data_handler.normalize(data_handler.get_measures(set='all', systems_name = selected_system))
        fig1.add_trace(go.Scatter(
            x=data.index,
            y=data * 100,
            mode='markers',
            name='Measured Daily Energy',
            marker_color='blue'
        ))
        # except:
        #     pass
        try:
            relativeExpectedDailyEnergy_val_mean = systemsData_RelativeExpectedDailyEnergy_val_mean[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=relativeExpectedDailyEnergy_val_mean.index,
                y=relativeExpectedDailyEnergy_val_mean * 100,
                mode='markers',
                name='Expected Daily Energy',
                marker_color='red'
                # error_y=dict(
                #     type='data',
                #     array=systemsData_RelativeExpectedDailyEnergy_val_std[selected_system] * 100,
                #     visible=True
                # )
            ))
        except:
            pass

        return dcc.Graph(figure=fig1, style={'height': '100%', 'width': '100%'}), metric_text_div  # Adjust height and width of the figure
    elif tab == 'tab-delta-rel-energy':
        fig1 = go.Figure(layout_yaxis_title="Proportional Daily Energy Error(%)")
        try:
            relativeDelta_val = systemsData_RelativeDelta_val[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=relativeDelta_val.index,
                y=relativeDelta_val * 100,
                mode='markers',
                name='Relative Delta Energy',
            ))
        except:
            pass
        try:
            relativeDelta_val_detected = systemsData_RelativeDelta_val_detected[selected_system].dropna()
            fig1.add_trace(go.Scatter(
                x=relativeDelta_val_detected.index,
                y=relativeDelta_val_detected * 100,
                mode='markers',
                name='Detected Errors',
                marker_color='red'
            ))
        except:
            pass

        return dcc.Graph(figure=fig1, style={'height': '100%', 'width': '100%'}), metric_text_div  # Adjust height and width of the figure
    elif tab == 'tab-rel-energy-all':
        fig1 = go.Figure(layout_yaxis_title="Proportional Daily Energy (%)")
        try:
            all_data = data_handler.normalize(data_handler.get_measures(set='all'))
            for system_name in data_handler.valid_systems:
                if system_name != selected_system:
                    fig1.add_trace(go.Scatter(
                        x=all_data[system_name].index,
                        y=all_data[system_name] * 100,
                        mode='markers',
                        name=f'{system_name}',
                        marker_color='blue'
                    ))
            fig1.add_trace(go.Scatter(
                x=all_data[selected_system].index,
                y=all_data[selected_system] * 100,
                mode='markers',
                name=f'{selected_system}',
                marker_color='red'
            ))
            fig1.update_layout(yaxis=dict(range=[-5, 120]))

        except:
            pass

        return dcc.Graph(figure=fig1, style={'height': '100%', 'width': '100%'}), metric_text_div  # Adjust height and width of the figure
    elif tab == 'tab-miss-val-all':
        data = (~data_handler.get_missing_value(sorted=True)).astype(int)

        fig = go.Figure(data=go.Heatmap(
            z=data,
            x=data.columns,
            y=data.index,
            showscale=False,
            colorscale='Greys'  # Set colorscale to black and white
        ))
        fig.update_layout(
            yaxis=dict(
                autorange='reversed',  # Invert the y-axis
                showticklabels=True,
                tickmode='array',
                tickvals=pd.date_range(start=data.index.min(), end=data.index.max(), freq='ME'),
                ticktext=pd.date_range(start=data.index.min(), end=data.index.max(), freq='ME').strftime('%b %Y')
            )
        )

        return dcc.Graph(figure=fig, style={'height': '100%', 'width': '100%'}), metric_text_div  # Adjust height and width of the figure

    elif tab == 'tab-neighbors':
        fig2 = go.Figure()

        # Add initial traces with secondary y-axis
        try:
            fig2.add_trace(go.Bar(
                x=features_importance_df.columns,
                y=features_importance_df.loc[selected_system],
                name='Impurity-based Importance',
                yaxis='y1',
                offsetgroup=1
            ))
            fig2.update_layout(
                yaxis1=dict(
                    title='Impurity-based Importance',
                    range=[0, features_importance_df.loc[selected_system].max()],
                )
            )
        except:
            pass
        try:
            fig2.add_trace(go.Bar(
                x=permutation_importance_mean_df.columns,
                y=permutation_importance_mean_df.loc[selected_system],
                name='Permutation Importance',
                yaxis='y2',
                offsetgroup=2
            ))
            fig2.update_layout(
                yaxis2=dict(
                    title='Permutation Importance',
                    overlaying='y',
                    side='right',
                    range=[0, permutation_importance_mean_df.loc[selected_system].max()],
                )
            )
        except:
            pass

        return dcc.Graph(figure=fig2, style={'height': '100%', 'width': '100%'}), metric_text_div  # Adjust height and width of the figure


def open_browser():
    webbrowser.open("http://127.0.0.1:8060/")


if __name__ == '__main__':
    # Open the Dash app in a new browser window
    Timer(1, open_browser).start()
    app.run_server(debug=True, use_reloader=False, port=8060)