# Data preparation for PERSEE

This notebook creates time series in the right format to be used in PERSEE for optimization.

In [43]:
import pandas as pd
from utilities import import_excel, format_load_data
import os
import glob

First, the user need to choose the countries of interest.

In [56]:
countries = ['France', 'Spain', 'Algeria', 'Peru', 'Japan']
region_name = None

We start to build the csv file with the head and the time column.

In [57]:
seconds_in_a_year = 31536000

# Generate times from 0 to the total seconds in a year at 1-hour intervals
times = list(range(3600, seconds_in_a_year + 1, 3600))
column_data =  [
        "01/01/2021 00:00",
        "s",
        "true"]
column_data += times

data = {
    "Time": column_data
}

# Create the DataFrame
df = pd.DataFrame(data)


Then we need to retrieve the time series for consumption and production.

In [58]:
path_input_data = '../input_time_series/'

In [59]:
for country in countries :
    
    folder = path_input_data + f'/{country}'

    # we retrieve the conso time series 
    file_name_conso = format_load_data(country, region_name)
    conso = pd.read_excel(path_input_data+file_name_conso)

    # we add them to the dataframe
    df.loc[0, f'Consumption{country}']='demande'
    df.loc[1, f'Consumption{country}']='MW'
    df.loc[2, f'Consumption{country}']='true'
    for i in range(3, 8763):
        df.loc[i,f'Consumption{country}']=conso.iloc[i-3,0]

    # we retrieve PV time series
    partie_name_file = f'grid_locations_averaged_pv_{country}_2021.xlsx'
    chemin_pattern = os.path.join(folder, f'*{partie_name_file}*')
    fichiers_trouves = glob.glob(chemin_pattern)
    file_name = fichiers_trouves[0].split('/',2)[-1]
    pv = pd.read_excel(path_input_data+file_name)

     # we add them to the dataframe
    df.loc[0, f'PVProduction{country}']='prod pv'
    df.loc[1, f'PVProduction{country}']='MW'
    df.loc[2, f'PVProduction{country}']='true'
    for i in range(3, 8763):
        df.loc[i,f'PVProduction{country}']=pv.iloc[i-3,0]

    # we retrieve Wind time series
    partie_name_file = f'grid_locations_averaged_wind_{country}_2021.xlsx'
    
    chemin_pattern = os.path.join(folder, f'*{partie_name_file}*')
    fichiers_trouves = glob.glob(chemin_pattern)
    file_name = fichiers_trouves[0].split('/',2)[-1]
    wind = pd.read_excel(path_input_data+file_name)


     # we add them to the dataframe
    df.loc[0, f'WindProduction{country}']='prod eolienne'
    df.loc[1, f'WindProduction{country}']='MW'
    df.loc[2, f'WindProduction{country}']='true'
    for i in range(3, 8763):
        df.loc[i,f'WindProduction{country}']=wind.iloc[i-3,0]


In [60]:
# Display dataframe to make sure eeverything is right

print(df.head())

               Time ConsumptionFrance PVProductionFrance WindProductionFrance  \
0  01/01/2021 00:00           demande            prod pv        prod eolienne   
1                 s                MW                 MW                   MW   
2              true              true               true                 true   
3              3600             76517                0.0                0.006   
4              7200             76117                0.0                0.006   

  ConsumptionSpain PVProductionSpain WindProductionSpain ConsumptionAlgeria  \
0          demande           prod pv       prod eolienne            demande   
1               MW                MW                  MW                 MW   
2             true              true                true               true   
3            28706               0.0            0.231333               3872   
4            27572               0.0               0.193               3754   

  PVProductionAlgeria WindProductionAl

Finally, we choose a filename for the .csv file that will be stored

In [61]:
save_name = f'test_countries_{len(countries)}.csv'
df.to_csv(f'../input_time_series/persee/{save_name}', index = False, sep=';')