## Setup & Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pvlib
import json
import os
from pvlib.pvsystem import PVSystem, Array, FixedMount
from pvlib.location import Location
from pvlib.modelchain import ModelChain
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
import plotly.graph_objects as go
import plotly.io as pio

pio.renderers.default = "browser" # render plotly figures in browser

PARENT_DATA_DIR = os.getenv('PARENT_DATA_DIR')
if PARENT_DATA_DIR is None:
    raise ValueError("PARENT_DATA_DIR environment variable is not set")


dataDirpath = PARENT_DATA_DIR + r"\PRiOT\dataExport_3400_daily"
logsDirpath = r"..\logs"


## Convert PRiOT database

Convert arguments using a string to identify the array number to a dictionary of arrays:   
`"a001359": {"metadata": {"pv_mod1_wp": 365}}` becomes `"a001359": {"arrays": {"1": {"pv_wp": 395}}}`


## Import PRiOT data

In [None]:
# Load the metadata JSON file
metadataFilepath = os.path.join(dataDirpath, "metadata.json")

with open(metadataFilepath, 'r') as f:
    metadata = json.load(f)

# Load all csv files from the data directory
systemsData = {}
for file in os.listdir(dataDirpath):
    if file.endswith(".csv"):
        systemName = file.split("_")[0]
        systemsData[systemName] = pd.read_csv(os.path.join(dataDirpath, file))
        systemsData[systemName]['Datetime'] = pd.to_datetime(systemsData[systemName]['Timestamp'], unit='ms', utc=True).dt.tz_convert('Europe/Zurich')
        systemsData[systemName]['Date'] = (systemsData[systemName]['Datetime']+pd.Timedelta(hours=1)).dt.date # Convert the datetime to only the date, as the production is the daily production. The +1h is to manage the saving time. Normally PRiOT exports the data at midnight (local time) for the day after (e.g. the energy for the July 1st is saved at July 1st 00:00 Europe/Zurich). However it seams that the saving time is not always correctly handled, and sometime the export is done at 23:00 the day before (e.g. the energy for the July 1st is saved at June 30th 23:00 Europe/Zurich). This is why we add 1h to the datetime to be sure to have the correct date.
        # systemsData[systemName]['energy_daily_norm'] = systemsData[systemName]['tt_forward_active_energy_total_toDay'] / metadata[systemName]['metadata']['pv_kwp']

systemsName = list(systemsData.keys())

df_duplicate_list = list()
for systemName, systemData in systemsData.items():
    # Save duplicate dates to log list, and the in a log file
    df_duplicate_list.append(systemData[systemsData[systemName]['Date'].duplicated(keep=False)])

    # Remove duplicate date where tt_forward_active_energy_total_toDay is the smallest 
    # TODO maybe we should sum the energy of the duplicates instead of removing the smallest one. However, when looking in PRiOT Portal, it seams that in the daily energy, only the biggest value is represented. We do the same here.
    systemData.sort_values('tt_forward_active_energy_total_toDay', ascending=True, inplace=True)
    systemsData[systemName].drop_duplicates(subset='Date', keep='last', inplace=True)

    # Set date as the index and sort the data by date
    systemsData[systemName].set_index('Date', inplace=True)
    systemData.sort_index(ascending=True, inplace=True)

# Save duplicate dates to log file
df_duplicate = pd.concat(df_duplicate_list)
print(f"Number of duplicate dates found: {len(df_duplicate)}")
df_duplicate.to_csv(os.path.join(logsDirpath,'duplicateDates.csv'), index=True)



### Convert data & Filter out invalid PRiOT systems

In [None]:
systemsNameRemaining = systemsName.copy()
for systemName in systemsName:
    missingData = False
    if len(systemsData[systemName]) == 0:
        missingData = True
        print(f"No measures found for system {systemName}")
    for key in ['loc_latitude', 'loc_longitude', 'pv_kwp']:
        if key not in metadata[systemName]['metadata']:
            missingData = True
            print(f"No {key} found for {systemName}")
        # test that the value is a number
        elif not isinstance(metadata[systemName]['metadata'][key], (int, float)):
            try:
                metadata[systemName]['metadata'][key] = int(metadata[systemName]['metadata'][key])
            except ValueError:
                try:
                    metadata[systemName]['metadata'][key] = float(metadata[systemName]['metadata'][key])
                except ValueError:
                    missingData = True
                    print(f"The key-value '{key}:{metadata[systemName]['metadata'][key]}' is not a number for system {systemName}")


    if(len(metadata[systemName]['arrays'])==0):
        print(f"No PV arrays found for system {systemName}")
        missingData = True  
    for array_num, arrayData in metadata[systemName]['arrays'].items():
        for key in ['pv_tilt', 'pv_azimut', 'pv_wp', 'pv_number']:
            if key not in arrayData:
                missingData = True
                print(f"No {key} found for array {array_num} of system {systemName}")
            # test that the value is a number
            elif not isinstance(arrayData[key], (int, float)):
                try:
                    arrayData[key] = int(arrayData[key])
                except ValueError:
                    try:
                        arrayData[key] = float(arrayData[key])
                    except ValueError:
                        missingData = True
                        print(f"The key-value '{key}:{arrayData[key]}' is not a number for array {array_num} of system {systemName}")

    if missingData:
        systemsNameRemaining.remove(systemName)
        print(f"-> Removing system {systemName} from the list of systems")

print(f"Number of systems with all the necessary data: {len(systemsNameRemaining)}/{len(systemsName)}")


## Simulate production

### Create Model chains

In [None]:
# Create a model chain for each system
modelChains = {}
for systemName in systemsNameRemaining:
    latitude = metadata[systemName]['metadata']['loc_latitude']
    longitude = metadata[systemName]['metadata']['loc_longitude']
    altitude = 533 # TODO: Get the altitude from the metadata or an API
    Wp_Tot = metadata[systemName]['metadata']['pv_kwp']*1000

    arrays = []
    for array_num, arrayData in metadata[systemName]['arrays'].items():
        array = Array(
            mount=FixedMount(surface_tilt=arrayData['pv_tilt'], surface_azimuth=arrayData['pv_azimut'], racking_model = 'open_rack'),
            module_parameters={'pdc0': arrayData['pv_wp'], 'gamma_pdc' : -0.004},
            module_type = 'glass_polymer',
            modules_per_string = arrayData['pv_number'],
            strings = 1,
            temperature_model_parameters=TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_polymer'],
        )
        arrays.append(array)


    location = Location(latitude=latitude, longitude=longitude, altitude=altitude, tz='Europe/Zurich', name=systemName)
    system = PVSystem(arrays=arrays, inverter_parameters={'pdc0': Wp_Tot, 'eta_inv_nom': 0.96})
    modelChain = ModelChain(system, location, clearsky_model = 'ineichen', aoi_model='no_loss', spectral_model="no_loss")

    modelChains[systemName] = modelChain

### Compute maximum expected energy production



In [None]:
def daily_energy(df_power):
    # Get the frequency in minutes
    freq_in_minutes = pd.Timedelta(df_power.index.freq).seconds/60
    # Convert power from kW to kWh
    df_energy = df_power * (freq_in_minutes/60)
    # Resample to daily frequency and sum the values
    daily_energy = df_energy.resample('D').sum()
    # daily_energy.index = daily_energy.index.date
    
    return daily_energy

def simulateDailyProduction(startDate, endDate, modelChain, samplingFreq='1h'):
    # The end date is included in the simulation (end date at 23:59).
    # So we add 1 day to the end date to include the entire end date in the date_range(), and then we exclude the last value (end date +1 at 00:00) in the date_range().
    # TODO It is possible to take into account the horizon, using this method: https://pvlib-python.readthedocs.io/en/stable/gallery/shading/plot_simple_irradiance_adjustment_for_horizon_shading.html
    endDate = endDate + pd.Timedelta(days=1)

    times = pd.date_range(start=startDate, end=endDate, freq=samplingFreq, tz=modelChain.location.tz, inclusive='left')
    weatherClearSky = modelChain.location.get_clearsky(times) # In W/m2
    modelChain.run_model(weatherClearSky)
    production = modelChain.results.ac/1000 # Convert W to kW
    dailyProduction = daily_energy(production)
    dailyProduction.index = dailyProduction.index.date
    return dailyProduction

In [None]:
# Simulate the daily production for each system with 1h sampling rate
dailyProductions = {}

for systemName, modelChain in modelChains.items():
    try:
        dailyProductions[systemName] = simulateDailyProduction(systemsData[systemName].index.min(), systemsData[systemName].index.max(), modelChain, samplingFreq='1h')
    except Exception as e:
        print(f"Error for system {systemName}: {e}")
        continue

### Plot result

In [None]:
# Plot the max daily simulated energy and the measured energy for each pv systems
 
systemName = systemsNameRemaining[0]

fig = go.Figure()

# Add initial traces
fig.add_trace(go.Scatter(x=dailyProductions[systemName].index, y=dailyProductions[systemName], mode='markers', name='Max simulated daily energy'))
fig.add_trace(go.Scatter(x=systemsData[systemName].index, y=systemsData[systemName]['tt_forward_active_energy_total_toDay'], mode='markers', name='Measured daily energy'))

# Create dropdown menu
buttons = []
for name in systemsNameRemaining:
    button = dict(
        label=name,
        method="update",
        args=[{"x": [dailyProductions[name].index, systemsData[name].index], 
               "y": [dailyProductions[name], systemsData[name]['tt_forward_active_energy_total_toDay']],
               "name": ['Max simulated daily energy', 'Measured daily energy']}]
    )
    buttons.append(button)

fig.update_layout(
    updatemenus=[
        dict(
            buttons=buttons,
            direction="down",
            showactive=True,
            x=0.05,
            xanchor="left",
            y=1.15,
            yanchor="top"
        ),
    ]
)

fig.show()



## Relative production

In [None]:
# Do the same for all the systems
for systemName in systemsNameRemaining:
    systemsData[systemName]['simulated_energy'] = dailyProductions[systemName]
    if systemsData[systemName]['simulated_energy'].isna().any():
        raise(f"There are NaN values in the simulated energy for system {systemName}")

    systemsData[systemName]['relative_energy'] = systemsData[systemName]['tt_forward_active_energy_total_toDay'] / systemsData[systemName]['simulated_energy']

In [None]:
# plot all the relative energy on the same graph
fig = go.Figure()
for systemName in systemsNameRemaining:
    fig.add_trace(go.Scatter(x=systemsData[systemName].index, y=systemsData[systemName]['relative_energy'], mode='markers', name=systemName))

fig.show()

## Compare difference between simulation with hourly and 10min sampling rate

In [None]:
# Simulate the daily production for each system with 1h and 10min sampling rate
dailyProductions = {}

for systemName, modelChain in modelChains.items():
    try:
        dailyProduction_hour = simulateDailyProduction(systemName, systemsData, modelChains, samplingFreq='1h')
        dailyProduction_min = simulateDailyProduction(systemName, systemsData, modelChains, samplingFreq='10min')
        dailyProductions[systemName] = pd.DataFrame({'Simulator hour': dailyProduction_hour, 'Simulator 10min': dailyProduction_min})
    except Exception as e:
        print(f"Error for system {systemName}: {e}")
        continue


In [None]:
systemsData