# Microgrid in Japan - Scenario generation from real data

https://www.nature.com/articles/sdata201920?fbclid=IwAR3qOLHs0Ra5HNiXr3GZt5BdkG56WryLDnubRFd21lil12LAQXJlzb0tTT8

## 1. Read input data - Cleaned data per second

In [36]:
import csv
from io import TextIOWrapper
from zipfile import ZipFile
import pandas as pd
import os
import glob
import multiprocessing as mp
import timeit
#import py7zr

## 2. Extract whole zip file

### IMPORTANT: It is necessary to previously extract the zip file '2 Cleaned_data_per_second.zip' to the folder /tmp/microgrid.

In [37]:
temp_dir = '/tmp/microgrid'
if not os.path.exists(temp_dir):
    os.makedirs(temp_dir)
# NOW, UNZIP THE FILE TO THE temp_dir FOLDER, USING UNZIP TOOL IN COMMAND-LINE !

## 3. Quick analysis of dataframe data

### 3.1. Read one of the CSV files and adjust the header

In [38]:
# https://www.nature.com/articles/sdata201920.pdf
column_names_dict = {'#' : 'timestamp', '10101' : 'Active power of the battery (kW)', '10105' : 'Direct voltage of the battery (V)', '10106' : 'Direct current of the battery (A)', 
                '10201' : 'Voltage of purchased electricity at the receiving end (V)', '10203' : 'Active power of purchased electricity at the receiving end (kW)',
                '10307' : 'Total active power generation by all four solar arrays (kW)', '12144' : 'Active battery power command value (kW)',
                '12152' : 'State of charge of the battery (%)', '20104' : 'Solar irradiance (W/m^2)', 
                '20106' : 'Active power generation by solar array 1 (kW)', '20109' : 'Active power generation by solar array 2 (kW)',
                '20112' : 'Active power generation by solar array 3 (kW)', '20115' : 'Active power generation by solar array 4 (kW)'}

In [73]:
# Each file contains a period, e.g., '20180413-20180424SecCsv.csv'
df_ = pd.read_csv(os.path.join(temp_dir, '20180413-20180424SecCsv.csv'), skiprows=[0, 2], encoding='mac_roman')
df_ = df_.rename(column_names_dict, axis='columns')
df_

Unnamed: 0,timestamp,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Active power generation by solar array 1 (kW),Active power generation by solar array 2 (kW),Active power generation by solar array 3 (kW),Active power generation by solar array 4 (kW)
0,'2018/04/13 00:00:00,-0.825,344.536255,-1.6250,6626.25,619.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0
1,'2018/04/13 00:00:01,-0.900,344.788757,-1.0000,6633.00,603.599976,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2,'2018/04/13 00:00:02,-0.975,344.915009,-0.6875,6635.25,620.400024,0.0,0.0,95.0,0.0,0.0,0.0,0.0
3,'2018/04/13 00:00:03,-0.975,344.915009,-0.6875,6635.25,620.400024,0.0,0.0,95.0,0.0,0.0,0.0,0.0
4,'2018/04/13 00:00:04,-0.975,345.293762,-0.3750,6635.25,603.599976,0.0,0.0,95.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036795,'2018/04/24 23:59:55,-1.050,345.546265,0.2500,6612.75,650.400024,0.0,0.0,95.0,0.0,0.0,0.0,0.0
1036796,'2018/04/24 23:59:56,-1.050,346.303741,0.2500,6612.75,595.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0
1036797,'2018/04/24 23:59:57,-1.050,346.303741,0.2500,6612.75,595.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0
1036798,'2018/04/24 23:59:58,-1.050,345.420013,-2.5625,6590.25,595.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0


### 3.2. Convert the first column to timestamp and set as dataframe index

In [74]:
df_.timestamp = pd.to_datetime(df_.timestamp)
df_.timestamp

0         2018-04-13 00:00:00
1         2018-04-13 00:00:01
2         2018-04-13 00:00:02
3         2018-04-13 00:00:03
4         2018-04-13 00:00:04
                  ...        
1036795   2018-04-24 23:59:55
1036796   2018-04-24 23:59:56
1036797   2018-04-24 23:59:57
1036798   2018-04-24 23:59:58
1036799   2018-04-24 23:59:59
Name: timestamp, Length: 1036800, dtype: datetime64[ns]

In [75]:
df_.set_index('timestamp', inplace=True)
df_

Unnamed: 0_level_0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Active power generation by solar array 1 (kW),Active power generation by solar array 2 (kW),Active power generation by solar array 3 (kW),Active power generation by solar array 4 (kW)
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-04-13 00:00:00,-0.825,344.536255,-1.6250,6626.25,619.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-13 00:00:01,-0.900,344.788757,-1.0000,6633.00,603.599976,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-13 00:00:02,-0.975,344.915009,-0.6875,6635.25,620.400024,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-13 00:00:03,-0.975,344.915009,-0.6875,6635.25,620.400024,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-13 00:00:04,-0.975,345.293762,-0.3750,6635.25,603.599976,0.0,0.0,95.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2018-04-24 23:59:55,-1.050,345.546265,0.2500,6612.75,650.400024,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-24 23:59:56,-1.050,346.303741,0.2500,6612.75,595.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-24 23:59:57,-1.050,346.303741,0.2500,6612.75,595.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0
2018-04-24 23:59:58,-1.050,345.420013,-2.5625,6590.25,595.200012,0.0,0.0,95.0,0.0,0.0,0.0,0.0


### 3.3. Check dataframe types and basic statistics

In [76]:
print(df_.dtypes)

Active power of the battery (kW)                                   float64
Direct voltage of the battery (V)                                  float64
Direct current of the battery (A)                                  float64
Voltage of purchased electricity at the receiving end (V)          float64
Active power of purchased electricity at the receiving end (kW)    float64
Total active power generation by all four solar arrays (kW)        float64
Active battery power command value (kW)                            float64
State of charge of the battery (%)                                 float64
Active power generation by solar array 1 (kW)                      float64
Active power generation by solar array 2 (kW)                      float64
Active power generation by solar array 3 (kW)                      float64
Active power generation by solar array 4 (kW)                      float64
dtype: object


In [77]:
df_.describe()

Unnamed: 0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Active power generation by solar array 1 (kW),Active power generation by solar array 2 (kW),Active power generation by solar array 3 (kW),Active power generation by solar array 4 (kW)
count,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0,1036800.0
mean,-1.245432,344.9301,-0.09704891,6628.459,647.6238,11.17647,-1.908379,93.26258,7.613193,3.072817,1.001189,0.1848077
std,11.59815,11.13223,33.56057,56.17449,77.3837,18.33226,15.327,1.956565,12.08594,4.865558,1.613788,0.4184695
min,-77.325,314.7412,-196.625,6387.75,446.4,0.0,-78.36,89.3,0.0,0.0,0.0,0.0
25%,-1.05,340.7487,-2.5625,6588.0,589.2,0.0,0.0,91.09,0.0,0.0,0.0,0.0
50%,-0.975,343.6525,-0.375,6626.25,630.0,0.0,0.0,94.22,0.0,0.0,0.0,0.0
75%,-0.9,345.1675,1.5,6666.75,705.6,16.16,0.0,95.0,11.17,4.75,1.44,0.0
max,80.925,408.9237,268.0625,6806.25,961.2,90.88,84.6,100.0,57.28,28.28,10.02,2.4


### 3.4. Check for NA values

In [78]:
df_.isna().sum()

Active power of the battery (kW)                                   0
Direct voltage of the battery (V)                                  0
Direct current of the battery (A)                                  0
Voltage of purchased electricity at the receiving end (V)          0
Active power of purchased electricity at the receiving end (kW)    0
Total active power generation by all four solar arrays (kW)        0
Active battery power command value (kW)                            0
State of charge of the battery (%)                                 0
Active power generation by solar array 1 (kW)                      0
Active power generation by solar array 2 (kW)                      0
Active power generation by solar array 3 (kW)                      0
Active power generation by solar array 4 (kW)                      0
dtype: int64

### 3.5. There are negative values for PV solar production. Let's replace them by zero.

In [None]:
df_.loc[df_['Total active power generation by all four solar arrays (kW)'] < 0, 'Total active power generation by all four solar arrays (kW)'] = 0

### 3.6. Now let's calculate the quantity of energy consumed/produced during delta=10 min

In [79]:
df_['Building_Consumption'] = -(df_['Total active power generation by all four solar arrays (kW)'] 
                                    + df_['Active battery power command value (kW)'] + df_['Active power of purchased electricity at the receiving end (kW)'])

In [80]:
# The first dataframe groups all columns (sum of values) with 'delta_size' frequency
df_.drop(columns=['Active power generation by solar array 1 (kW)', 'Active power generation by solar array 2 (kW)', 
                      'Active power generation by solar array 3 (kW)', 'Active power generation by solar array 4 (kW)',
                      'Active power of the battery (kW)', 'Direct voltage of the battery (V)', 'Direct current of the battery (A)',
                      'State of charge of the battery (%)', 
                      'Voltage of purchased electricity at the receiving end (V)'], inplace=True)
df_.rename({'Total active power generation by all four solar arrays (kW)' : 'PV_Production'}, axis='columns', inplace=True)
# Convert these two columns from power (kW) to power consumption/load or production (kWh) during one minute
# First column represents uncertain consumption from the main grid, second one represents uncertain PV production (from all 4 solar arrays)
df_['Building_Consumption'] /= 60.0
df_['PV_Production'] /= 60.0
df_['Active power of purchased electricity at the receiving end (kW)'] /= 60.0
df_['Active battery power command value (kW)'] /= 60.0
# The first dataframe groups all columns (sum of values) with 'delta_size' frequency
df_delta = df_.groupby(pd.Grouper(freq=('%dmin' % delta_size))).sum()

In [81]:
df_delta[197:212]

Unnamed: 0_level_0,Active power of purchased electricity at the receiving end (kW),PV_Production,Active battery power command value (kW),Building_Consumption
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-04-14 08:50:00,5719.020012,55.608,0.0,-5774.628012
2018-04-14 09:00:00,5860.960011,29.434667,0.0,-5890.394677
2018-04-14 09:10:00,6139.220011,35.994667,0.0,-6175.214678
2018-04-14 09:20:00,6137.460007,43.466667,0.0,-6180.926673
2018-04-14 09:30:00,6021.320016,86.765333,0.0,-6108.08535
2018-04-14 09:40:00,5951.720029,123.725333,0.0,-6075.445362
2018-04-14 09:50:00,5832.040016,205.706668,0.0,-6037.746684
2018-04-14 10:00:00,5932.079995,159.789334,43.637333,-6135.506662
2018-04-14 10:10:00,5742.280008,430.474666,25.9955,-6198.750174
2018-04-14 10:20:00,5570.980004,396.950665,1.983833,-5969.914502


### 3.7. Then aggregate the min/max consumption/production during 10 min, per hour

In [82]:
# Percentile 10%
def q10(x):
    return x.quantile(0.1)

# Percentile 90%
def q90(x):
    return x.quantile(0.9)

In [83]:
df_pdt = df_delta.groupby(pd.Grouper(freq='60min')).agg({'Building_Consumption': [q10, q90, 'sum'], 
                                  'PV_Production' : [q10, q90, 'sum']})
df_pdt[(df_pdt.index >= '2018-04-14 09:00:00') & (df_pdt.index <= '2018-04-14 11:10:00')]

Unnamed: 0_level_0,Building_Consumption,Building_Consumption,Building_Consumption,PV_Production,PV_Production,PV_Production
Unnamed: 0_level_1,q10,q90,sum,q10,q90,sum
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2018-04-14 09:00:00,-6178.070676,-5964.07068,-36467.813424,32.714667,164.716,525.093334
2018-04-14 10:00:00,-6167.128418,-5849.629513,-35909.067365,270.860001,435.706666,2213.49733
2018-04-14 11:00:00,-5890.553683,-5742.522847,-34843.698571,178.170667,470.382667,1839.656003


In [84]:
# Flatten index names
df_pdt.columns = ['_'.join(col).strip() for col in df_pdt.columns.values]
df_pdt.head(2)

Unnamed: 0_level_0,Building_Consumption_q10,Building_Consumption_q90,Building_Consumption_sum,PV_Production_q10,PV_Production_q90,PV_Production_sum
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-13 00:00:00,-6068.110003,-5930.850018,-35929.920088,0.0,0.0,0.0
2018-04-13 01:00:00,-6786.699992,-5995.529998,-38706.239995,0.0,0.0,0.0


In [85]:
df_pdt.rename({'Building_Consumption_q10' : 'Pdt_min_UNDS0_Building', 'Building_Consumption_q90' : 'Pdt_max_UNDS0_Building', 'Building_Consumption_sum' : 'Pdt_sum_UNDS0_Building',
               'PV_Production_q10' : 'Pdt_min_UNDS1_PV', 'PV_Production_q90' : 'Pdt_max_UNDS1_PV', 'PV_Production_sum' : 'Pdt_sum_UNDS1_PV',
               }, axis='columns', inplace=True)

### 3.8. We now have min (Pdt_min), max (Pdt_max) and sum (Pdt_sum) of values of consumption/production during 10 min, aggregated by hour

In [86]:
df_pdt[(df_pdt.index >= '2018-04-14 09:00:00') & (df_pdt.index <= '2018-04-14 11:10:00')]

Unnamed: 0_level_0,Pdt_min_UNDS0_Building,Pdt_max_UNDS0_Building,Pdt_sum_UNDS0_Building,Pdt_min_UNDS1_PV,Pdt_max_UNDS1_PV,Pdt_sum_UNDS1_PV
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-14 09:00:00,-6178.070676,-5964.07068,-36467.813424,32.714667,164.716,525.093334
2018-04-14 10:00:00,-6167.128418,-5849.629513,-35909.067365,270.860001,435.706666,2213.49733
2018-04-14 11:00:00,-5890.553683,-5742.522847,-34843.698571,178.170667,470.382667,1839.656003


In [87]:
df_pdt.index.hour[0:48]

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
            10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
           dtype='int64', name='timestamp')

In [88]:
df_hour = df3.groupby(pd.Grouper(freq='60min')).sum()

In [89]:
df_pdt[(df_pdt.index >= '2018-04-14 09:00:00') & (df_pdt.index <= '2018-04-14 11:10:00')]

Unnamed: 0_level_0,Pdt_min_UNDS0_Building,Pdt_max_UNDS0_Building,Pdt_sum_UNDS0_Building,Pdt_min_UNDS1_PV,Pdt_max_UNDS1_PV,Pdt_sum_UNDS1_PV
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-14 09:00:00,-6178.070676,-5964.07068,-36467.813424,32.714667,164.716,525.093334
2018-04-14 10:00:00,-6167.128418,-5849.629513,-35909.067365,270.860001,435.706666,2213.49733
2018-04-14 11:00:00,-5890.553683,-5742.522847,-34843.698571,178.170667,470.382667,1839.656003


## 4. Define a function to generate daily scenarios from real data

In [90]:
# Each hour is a period, which is subdivided into smaller time frames (each frame of size delta - in minutes)
def generate_daily_scenarios(df, delta_size = 10):
    out_dir = os.path.join(os.getcwd(), 'scenarios')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # Group all columns (average) by frequency 'freq'
    # https://stackoverflow.com/questions/24082784/pandas-dataframe-groupby-datetime-month
    df_ = df.groupby(pd.Grouper(freq=('%dmin' % delta_size))).mean()
    # Drop unnecessary / redundant columns
    df_.drop(columns=['Active power generation by solar array 1 (kW)', 'Active power generation by solar array 2 (kW)', 
                      'Active power generation by solar array 3 (kW)', 'Active power generation by solar array 4 (kW)'], inplace=True)
    df_.loc[df_['Total active power generation by all four solar arrays (kW)'] < 0, 'Total active power generation by all four solar arrays (kW)'] = 0
    df_['Building_Consumption'] = -(df_['Total active power generation by all four solar arrays (kW)'] 
                                    + df_['Active battery power command value (kW)'] + df_['Active power of purchased electricity at the receiving end (kW)'])
    # Create a column with the scenario name (date)
    df_['scenario_name'] = df_.index.date
    initial_date = df_['scenario_name'][0]
    final_date = df_['scenario_name'][-1]
    # Save resulting scenarios to CSV
    output_path = os.path.join(out_dir, 'scenarios_%dm_%s_%s.csv.gz' % (delta_size, initial_date, final_date))
    df_.to_csv(output_path)
    print('Saved CSV file to ', output_path)

## 5. Define a function to collect hourly microgrid data statistics, concerning uncertain devices consumption/production of energy

In [91]:
def collect_hourly_microgrid_data(df, delta_size = 10):
    df_ = df.copy()
    df_.loc[df_['Total active power generation by all four solar arrays (kW)'] < 0, 'Total active power generation by all four solar arrays (kW)'] = 0
    df_['Building_Consumption'] = -(df_['Total active power generation by all four solar arrays (kW)'] 
                                    + df_['Active battery power command value (kW)'] + df_['Active power of purchased electricity at the receiving end (kW)'])
    df_.drop(columns=['Active power generation by solar array 1 (kW)', 'Active power generation by solar array 2 (kW)', 
                      'Active power generation by solar array 3 (kW)', 'Active power generation by solar array 4 (kW)',
                      'Active power of the battery (kW)', 'Direct voltage of the battery (V)', 'Direct current of the battery (A)',
                      'State of charge of the battery (%)', 'Active battery power command value (kW)',
                      'Voltage of purchased electricity at the receiving end (V)'], inplace=True)
    # Rename columns to simplify
    # The following column represents uncertain PV power (from all 4 solar arrays)
    df_.rename({'Total active power generation by all four solar arrays (kW)' : 'PV_Production'}, axis='columns', inplace=True)
    # Convert these two columns from power (kW) to power consumption/load or production (kWh) during one minute
    # First column represents uncertain consumption from the main grid, second one represents uncertain PV production (from all 4 solar arrays)
    df_['Building_Consumption'] /= 60.0
    df_['PV_Production'] /= 60.0
    # The first dataframe groups all columns (sum of values) with 'delta_size' frequency
    df_delta = df_.groupby(pd.Grouper(freq=('%dmin' % delta_size))).sum()
    # Then group all columns (min, max, sum) by frequency '60min' (hourly)
    df_pdt = df_delta.groupby(pd.Grouper(freq='60min')).agg({'Building_Consumption': [q10, q90, 'sum'], 
                                  'PV_Production' : [q10, q90, 'sum']})
    # Flatten column names after aggregation
    df_pdt.columns = ['_'.join(col).strip() for col in df_pdt.columns.values]
    # Rename columns
    df_pdt.rename({'Building_Consumption_q10' : 'Pdt_min_UNDS0_Building', 'Building_Consumption_q90' : 'Pdt_max_UNDS0_Building', 'Building_Consumption_sum' : 'Pdt_sum_UNDS0_Building',
               'PV_Production_q10' : 'Pdt_min_UNDS1_PV', 'PV_Production_q90' : 'Pdt_max_UNDS1_PV', 'PV_Production_sum' : 'Pdt_sum_UNDS1_PV',
               }, axis='columns', inplace=True)
    # Create a column with the hour of the corresponding period
    df_pdt['hour'] = df_pdt.index.hour
    return df_pdt

In [92]:
def read_process_dataframe(csv_filename_and_frequency):
    filename, frequency = csv_filename_and_frequency
    print('Processing CSV file %s for frequency %d...\n' % (filename, frequency))
    df_ = pd.read_csv(filename, skiprows=[0, 2], encoding='mac_roman')
    df_ = df_.rename(column_names_dict, axis='columns')
    df_.timestamp = pd.to_datetime(df_.timestamp)
    df_.set_index('timestamp', inplace=True)
    # Process each dataframe, generating scenarios for each day with 5min and 10min intervals
    generate_daily_scenarios(df_, frequency)
    # Collect hourly aggregated data to use when generating the microgrid instance for the RCCP
    df_hourly = collect_hourly_microgrid_data(df_, frequency)
    print('Done processing CSV file %s.\n' % filename)
    return df_hourly    

In [100]:
def parallelize_dataframe_processing(file_list, func, n_cores=16):
    pool = mp.Pool(n_cores)
    df = pd.concat(pool.map(func, file_list))
    pool.close()
    pool.join()
    return df

In [96]:
def generate_microgrid_instance(df, file_suffix):
    out_dir = os.path.join(os.getcwd(), 'instances')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # The new dataframe groups all sum columns (Pdt_sum) by hour, obtaining min/max values (Pmin, Pmax)
    # For the existing Pdt_min / Pdt_max columns, obtain min(Pdt_min) and max(Pdt_max) of each column
    df_hour = df.groupby(by=['hour']).agg({'Pdt_min_UNDS0_Building': ['min'], 'Pdt_max_UNDS0_Building' : ['max'],
                                           'Pdt_min_UNDS1_PV' : ['min'], 'Pdt_max_UNDS1_PV' : ['max'],
                                           'Pdt_sum_UNDS0_Building' : [q10, q90], 'Pdt_sum_UNDS1_PV' : [q10, q90]})
    # Flatten column names after aggregation
    df_hour.columns = ['_'.join(col).strip() for col in df_hour.columns.values]
    # Rename columns
    df_hour.rename({'Pdt_min_UNDS0_Building_min' : 'Pdt_min_UNDS0_Building', 'Pdt_max_UNDS0_Building_max' : 'Pdt_max_UNDS0_Building', 
                    'Pdt_min_UNDS1_PV_min' : 'Pdt_min_UNDS1_PV', 'Pdt_max_UNDS1_PV_max' : 'Pdt_max_UNDS1_PV', 
                    'Pdt_sum_UNDS0_Building_q10' : 'Pmin_UNDS0_Building', 'Pdt_sum_UNDS0_Building_q90' : 'Pmax_UNDS0_Building', 
                    'Pdt_sum_UNDS1_PV_q10' : 'Pmin_UNDS1_PV', 'Pdt_sum_UNDS1_PV_q90' : 'Pmax_UNDS1_PV'
               }, axis='columns', inplace=True)
    # Save resulting scenarios to CSV
    output_path = os.path.join(out_dir, 'instance_delta%s.csv' % file_suffix)
    df_hour.to_csv(output_path)
    print('Saved CSV file to ', output_path)
    return df_hour

In [None]:
# Each file contains a period, e.g., '20180413-20180424SecCsv.csv'
file_list = [(x, 5) for x in glob.glob(os.path.join(temp_dir, '*.csv'))]
df_all_hourly_5 = parallelize_dataframe_processing(file_list, read_process_dataframe)

Processing CSV file /tmp/microgrid/20170325-20170331SecCsv.csv for frequency 5...
Processing CSV file /tmp/microgrid/20150601-20150612SecCsv.csv for frequency 5...
Processing CSV file /tmp/microgrid/20151001-20151012SecCsv.csv for frequency 5...

Processing CSV file /tmp/microgrid/20150201-20150212SecCsv.csv for frequency 5...
Processing CSV file /tmp/microgrid/20150713-20150724SecCsv.csv for frequency 5...

Processing CSV file /tmp/microgrid/20150313-20150324SecCsv.csv for frequency 5...
Processing CSV file /tmp/microgrid/20150425-20150430SecCsv.csv for frequency 5...




In [None]:
# Generate hourly aggregated data to use when generating the microgrid instance for the RCCP
generate_microgrid_instance(df_all_hourly_5, '5min')

In [101]:
file_list = [(x, 10) for x in glob.glob(os.path.join(temp_dir, '*.csv'))]
df_all_hourly_10 = parallelize_dataframe_processing(file_list, read_process_dataframe)

Processing CSV file /tmp/microgrid/20170325-20170331SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150225-20150228SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150601-20150612SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150201-20150212SecCsv.csv for frequency 10...

Processing CSV file /tmp/microgrid/20150313-20150324SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150513-20150524SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150113-20150124SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150625-20150630SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150713-20150724SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150401-20150412SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150425-20150430SecCsv.csv for frequency 10...
Processing CSV file /tmp/microgrid/20150825-20150831SecCsv.csv for frequency 10...
Pro

In [102]:
generate_microgrid_instance(df_all_hourly_10, '10min')

Saved CSV file to  /projetos/CZT0/doutorado_files/microgrid/instances/instance_delta10min.csv


Unnamed: 0_level_0,Pdt_min_UNDS0_Building,Pdt_max_UNDS0_Building,Pdt_min_UNDS1_PV,Pdt_max_UNDS1_PV,Pmin_UNDS0_Building,Pmax_UNDS0_Building,Pmin_UNDS1_PV,Pmax_UNDS1_PV
hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,-7886.500003,-2568.020016,-9.518667,0.0,-39597.963921,-29503.717987,0.0,0.0
1,-7735.979989,-2542.710009,-10.248667,0.0,-39406.921971,-29689.515992,0.0,0.0
2,-7821.759976,-2521.700012,-11.093333,0.0,-39324.555982,-29559.798009,0.0,0.0
3,-7696.720011,-2548.380025,-11.493333,0.0,-39269.354068,-29463.490261,0.0,0.0
4,-7600.209961,-2494.589997,-11.825333,0.0,-39372.652069,-29555.801952,0.0,0.0
5,-7781.044661,-2498.75,-11.890667,107.199332,-39912.995946,-29669.210106,0.0,84.086133
6,-8338.190011,0.0,-15.541333,260.642001,-42104.025493,-30478.89176,0.0,680.674002
7,-8686.909359,0.0,-7.546,418.568666,-44073.414455,-31169.095484,1.592133,1557.566004
8,-10858.620648,0.0,-8.964667,545.276666,-52868.332847,-32526.169548,151.589867,2373.77773
9,-11234.33798,0.0,-0.458,622.827333,-58028.915742,-32745.974239,351.872267,2964.286397
