## Setup & Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pvlib
import json
import os
from pvlib.pvsystem import PVSystem, Array, FixedMount
from pvlib.location import Location
from pvlib.modelchain import ModelChain
from pvlib.temperature import TEMPERATURE_MODEL_PARAMETERS
import plotly.graph_objects as go

PARENT_DATA_DIR = os.getenv('PARENT_DATA_DIR')
if PARENT_DATA_DIR is None:
    raise ValueError("PARENT_DATA_DIR environment variable is not set")


dataDirpath = PARENT_DATA_DIR + r"\PRiOT\dataExport_3400_daily"
logsDirpath = r"..\logs"


## Convert PRiOT database

Convert arguments using a string to identify the array number to a dictionary of arrays:   
`"a001359": {"metadata": {"pv_mod1_wp": 365}}` becomes `"a001359": {"arrays": {"1": {"pv_wp": 395}}}`


In [None]:
# Load the metadata JSON file
metadataFilepath = os.path.join(dataDirpath, "metadata_origin.json")

with open(metadataFilepath, 'r') as f:
    metadataOrigin = json.load(f)

# Group linear module data to list of modules
for systemName, systemData in metadataOrigin.items():
    arrays = {}
    keys_to_delete = []
    for key, value in systemData['metadata'].items():
        if 'mod' in key:
            # Extract the module number
            array_num = key.split('_')[1][-1]
            # Remove the module number from the key
            new_key = '_'.join(key.split('_')[:1] + key.split('_')[2:])
            # Add the key-value pair to the appropriate module dictionary
            if array_num not in arrays:
                arrays[array_num] = {}
            arrays[array_num][new_key] = value
            keys_to_delete.append(key)
    systemData['arrays'] = arrays
    for key in keys_to_delete:
        del systemData['metadata'][key]

json.dump(metadataOrigin, open(os.path.join(dataDirpath, "metadata.json"), 'w'), indent=4)

## Import PRiOT data

In [None]:
# Load the metadata JSON file
metadataFilepath = os.path.join(dataDirpath, "metadata.json")

with open(metadataFilepath, 'r') as f:
    metadata = json.load(f)

# Load all csv files from the data directory
systemsData = {}
for file in os.listdir(dataDirpath):
    if file.endswith(".csv"):
        systemName = file.split("_")[0]
        systemsData[systemName] = pd.read_csv(os.path.join(dataDirpath, file))
        systemsData[systemName]['Datetime'] = pd.to_datetime(systemsData[systemName]['Timestamp'], unit='ms', utc=True).dt.tz_convert('Europe/Zurich')
        systemsData[systemName]['Date'] = (systemsData[systemName]['Datetime']+pd.Timedelta(hours=1)).dt.date # Convert the datetime to only the date, as the production is the daily production. The +1h is to manage the saving time. Normally PRiOT exports the data at midnight (local time) for the day after (e.g. the energy for the July 1st is saved at July 1st 00:00 Europe/Zurich). However it seams that the saving time is not always correctly handled, and sometime the export is done at 23:00 the day before (e.g. the energy for the July 1st is saved at June 30th 23:00 Europe/Zurich). This is why we add 1h to the datetime to be sure to have the correct date.
        systemsData[systemName]['energy_daily_norm'] = systemsData[systemName]['tt_forward_active_energy_total_toDay'] / metadata[systemName]['metadata']['pv_kwp']

systemsName = list(systemsData.keys())

# Save duplicate dates to log file
df_duplicate_list = list()
for systemName, systemData in systemsData.items():
    df_duplicate_list.append(systemData[systemData['Date'].duplicated(keep=False)])
df_duplicate = pd.concat(df_duplicate_list)
print(f"Number of duplicate dates found: {len(df_duplicate)}")
df_duplicate.to_csv(os.path.join(logsDirpath,'duplicateDates.csv'), index=True)

# Remove duplicate date where tt_forward_active_energy_total_toDay is the smallest 
#TODO maybe we should sum the energy of the duplicates instead of removing the smallest one
for systemName, systemData in systemsData.items():
    systemData = systemData.sort_values('tt_forward_active_energy_total_toDay', ascending=False)
    systemData = systemData.drop_duplicates(subset='Date', keep='first')
    systemData = systemData.sort_values('Date', ascending=True)
    systemsData[systemName] = systemData



## Simulate production

### Create Model chains

In [None]:
# Create a model chain for each system
modelChains = {}
for systemName in systemsName:
    error = False

    try:
        latitude = metadata[systemName]['metadata']['loc_latitude']
        longitude = metadata[systemName]['metadata']['loc_longitude']
        altitude = 533 # TODO: Get the altitude from the metadata or an API
        Wp_Tot = metadata[systemName]['metadata']['pv_kwp']*1000
    except KeyError as e:
        print(f"Missing metadata {e} for system {systemName}")
        error = True

    if(len(metadata[systemName]['arrays'])==0):
        print(f"No arrays found for system {systemName}")
        error = True  

    arrays = []
    for array_num, arrayData in metadata[systemName]['arrays'].items():
        try:
            array = Array(
                mount=FixedMount(surface_tilt=arrayData['pv_tilt'], surface_azimuth=arrayData['pv_azimut'], racking_model = 'open_rack'),
                module_parameters={'pdc0': arrayData['pv_wp'], 'gamma_pdc' : -0.004},
                module_type = 'glass_polymer',
                modules_per_string = arrayData['pv_number'],
                strings = 1,
                temperature_model_parameters=TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_polymer'],
            )
            arrays.append(array)
        except KeyError as e:
            print(f"Missing metadata {e} for array {array_num} of system {systemName}")
            error = True        


    if(error):
        print(f"Error found during creation of the model chain. Skipping system {systemName}...")
        continue

    location = Location(latitude=latitude, longitude=longitude, altitude=altitude, tz='Europe/Zurich', name=systemName)
    system = PVSystem(arrays=arrays, inverter_parameters={'pdc0': Wp_Tot, 'eta_inv_nom': 0.96})
    modelChain = ModelChain(system, location, clearsky_model = 'ineichen', aoi_model='no_loss', spectral_model="no_loss")

    modelChains[systemName] = modelChain
    systemsNameRemaining = list(modelChains.keys())

print(f"Model chains created for {len(systemsNameRemaining)} out of {len(systemsName)} systems")

In [None]:
def daily_energy(df):
    # Get the frequency in minutes
    freq_in_minutes = pd.Timedelta(df.index.freq).seconds/60
    # Convert power from kW to kWh
    df_kWh = df * (freq_in_minutes/60)
    # Resample to daily frequency and sum the values
    daily_energy = df_kWh.resample('D').sum()

    return daily_energy

In [None]:
# TODO do the same for each system

startDate = systemsData['a001035']['Date'].min()
endDate = systemsData['a001035']['Date'].max()
# times_min = pd.date_range(start=startDate, end=endDate, freq='10min', tz='Europe/Zurich')
times_hour = pd.date_range(start=startDate, end=endDate, freq='1h', tz='Europe/Zurich')

# weatherClearSky_min = modelChains['a001035'].location.get_clearsky(times_min) # In W/m2
weatherClearSky_hour = modelChains['a001035'].location.get_clearsky(times_hour) # In W/m2

# modelChains['a001035'].run_model(weatherClearSky_min)
# production_min = modelChains['a001035'].results.ac
# dailyProduction_min = daily_energy(production_min)
modelChains['a001035'].run_model(weatherClearSky_hour)
production_hour = modelChains['a001035'].results.ac/1000 # Convert W to kW
dailyProduction_hour = daily_energy(production_hour)


In [None]:
# plot modelChains['a001035'].results.ac with plotly.graph_objects
fig = go.Figure()
# fig.add_trace(go.Scatter(x=production_min.index, y=production_min, mode='markers', name='AC power min'))
fig.add_trace(go.Scatter(x=production_hour.index, y=production_hour, mode='markers', name='AC power hour'))
# fig.add_trace(go.Scatter(x=dailyProduction_min.index, y=dailyProduction_min, mode='markers', name='Daily energy min'))
fig.add_trace(go.Scatter(x=dailyProduction_hour.index, y=dailyProduction_hour, mode='markers', name='Daily energy hour'))
fig.add_trace(go.Scatter(x=systemsData['a001035']['Date'], y=systemsData['a001035']['tt_forward_active_energy_total_toDay'], mode='markers', name='Daily energy PRiOT'))

fig.update_layout(title='AC power of system a001035', xaxis_title='Time', yaxis_title='Power (W)')
fig.show()

## Plot result

In [None]:
# Plot the data in different trace on the same figure
fig = go.Figure()
for device, df in systemsData.items():
    fig.add_trace(go.Scatter(x=df['Timestamp'], y=df['energy_daily_norm'], name=device, mode='markers', visible='legendonly'))
    # fig.add_trace(go.Bar(x=df['Timestamp'], y=df['energy_daily_norm'], name=device, width=24 * 60 * 60 * 1000 * 0.8, visible='legendonly'))


fig.update_layout(title='Superposition of Daily Data', xaxis_title='Timestamp', yaxis_title='Energy')
fig.show()