# Microgrid in Japan - Robust instance generation from real data

https://www.nature.com/articles/sdata201920?fbclid=IwAR3qOLHs0Ra5HNiXr3GZt5BdkG56WryLDnubRFd21lil12LAQXJlzb0tTT8

In [2]:
import csv
from io import TextIOWrapper
from zipfile import ZipFile
import pandas as pd
import os
import glob
import multiprocessing as mp

In [3]:
# Percentile 10%
def q10(x):
    return x.quantile(0.1)

# Percentile 90%
def q90(x):
    return x.quantile(0.9)

In [5]:
# https://www.nature.com/articles/sdata201920.pdf
column_names_dict = {'#' : 'timestamp', '10101' : 'Active power of the battery (kW)', '10105' : 'Direct voltage of the battery (V)', '10106' : 'Direct current of the battery (A)', 
                '10201' : 'Voltage of purchased electricity at the receiving end (V)', '10203' : 'Active power of purchased electricity at the receiving end (kW)',
                '10307' : 'Total active power generation by all four solar arrays (kW)', '12144' : 'Active battery power command value (kW)',
                '12152' : 'State of charge of the battery (%)', '20104' : 'Solar irradiance (W/m^2)', 
                '20106' : 'Active power generation by solar array 1 (kW)', '20109' : 'Active power generation by solar array 2 (kW)',
                '20112' : 'Active power generation by solar array 3 (kW)', '20115' : 'Active power generation by solar array 4 (kW)'}

### IMPORTANT: It is necessary to previously extract the zip file '2 Cleaned_data_per_second.zip' to the folder /tmp/microgrid.

In [13]:
temp_dir = '/tmp/microgrid'
if not os.path.exists(temp_dir):
    os.makedirs(temp_dir)
# NOW, UNZIP THE FILE TO THE temp_dir FOLDER, USING UNZIP TOOL IN COMMAND-LINE !

## 1. Define a function to collect hourly microgrid data statistics, concerning uncertain devices consumption/production of energy

In [4]:
def collect_hourly_microgrid_data(df, delta_size = 10):
    df_ = df.copy()
    #### There are negative values for PV solar production. Let's replace them by zero.
    df_.loc[df_['Total active power generation by all four solar arrays (kW)'] < 0, 'Total active power generation by all four solar arrays (kW)'] = 0
    df_['Building_Consumption'] = -(df_['Total active power generation by all four solar arrays (kW)'] 
                                    + df_['Active battery power command value (kW)'] + df_['Active power of purchased electricity at the receiving end (kW)'])
    df_.drop(columns=['Active power generation by solar array 1 (kW)', 'Active power generation by solar array 2 (kW)', 
                      'Active power generation by solar array 3 (kW)', 'Active power generation by solar array 4 (kW)',
                      'Active power of the battery (kW)', 'Direct voltage of the battery (V)', 'Direct current of the battery (A)',
                      'State of charge of the battery (%)', 'Active battery power command value (kW)',
                      'Voltage of purchased electricity at the receiving end (V)'], inplace=True)
    # Rename columns to simplify
    # The following column represents uncertain PV power (from all 4 solar arrays)
    df_.rename({'Total active power generation by all four solar arrays (kW)' : 'PV_Production'}, axis='columns', inplace=True)
    # Convert these two columns from power (kW) to power consumption/load or production (kWh) during one minute
    # First column represents uncertain consumption from the main grid, second one represents uncertain PV production (from all 4 solar arrays)
    df_['Building_Consumption'] /= 60.0
    df_['PV_Production'] /= 60.0
    # The first dataframe groups all columns (sum of values) with 'delta_size' frequency
    df_delta = df_.groupby(pd.Grouper(freq=('%dmin' % delta_size))).sum()
    # Then group all columns (min, max, sum) by frequency '60min' (hourly)
    df_pdt = df_delta.groupby(pd.Grouper(freq='60min')).agg({'Building_Consumption': [q10, q90, 'sum'], 
                                  'PV_Production' : [q10, q90, 'sum']})
    # Flatten column names after aggregation
    df_pdt.columns = ['_'.join(col).strip() for col in df_pdt.columns.values]
    # Rename columns
    df_pdt.rename({'Building_Consumption_q10' : 'Pdt_min_UNDS0_Building', 'Building_Consumption_q90' : 'Pdt_max_UNDS0_Building', 'Building_Consumption_sum' : 'Pdt_sum_UNDS0_Building',
               'PV_Production_q10' : 'Pdt_min_UNDS1_PV', 'PV_Production_q90' : 'Pdt_max_UNDS1_PV', 'PV_Production_sum' : 'Pdt_sum_UNDS1_PV',
               }, axis='columns', inplace=True)
    # Create a column with the hour of the corresponding period
    df_pdt['hour'] = df_pdt.index.hour
    return df_pdt

## 2. Define a function to process each original dataframe from the microgrid dataset

In [6]:
def read_process_dataframe(csv_filename_and_frequency):
    filename, frequency = csv_filename_and_frequency
    print('Processing CSV file %s with frequency %d...\n' % (filename, frequency))
    df_ = pd.read_csv(filename, skiprows=[0, 2], encoding='mac_roman')
    df_ = df_.rename(column_names_dict, axis='columns')
    df_.timestamp = pd.to_datetime(df_.timestamp)
    df_.set_index('timestamp', inplace=True)
    # Collect hourly aggregated data to use when generating the microgrid instance for the RCCP
    df_hourly = collect_hourly_microgrid_data(df_, frequency)
    print('Done processing CSV file %s.\n' % filename)
    return df_hourly    

In [7]:
def parallelize_dataframe_processing(file_list, func, n_cores=16):
    pool = mp.Pool(n_cores)
    df = pd.concat(pool.map(func, file_list))
    pool.close()
    pool.join()
    return df

## 3. Define a function to create a microgrid instance based on data with month in [start_month, end_month] and year in year_list

In [8]:
def generate_microgrid_instance(df, file_suffix):
    out_dir = os.path.join(os.getcwd(), 'instances')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # The new dataframe groups all sum columns (Pdt_sum) by hour, obtaining min/max values (Pmin, Pmax)
    # For the existing Pdt_min / Pdt_max columns, obtain min(Pdt_min) and max(Pdt_max) of each column
    df_hour = df.groupby(by=['hour']).agg({'Pdt_min_UNDS0_Building': ['min'], 'Pdt_max_UNDS0_Building' : ['max'],
                                           'Pdt_min_UNDS1_PV' : ['min'], 'Pdt_max_UNDS1_PV' : ['max'],
                                           'Pdt_sum_UNDS0_Building' : [q10, q90], 'Pdt_sum_UNDS1_PV' : [q10, q90]})
    # Flatten column names after aggregation
    df_hour.columns = ['_'.join(col).strip() for col in df_hour.columns.values]
    # Rename columns
    df_hour.rename({'Pdt_min_UNDS0_Building_min' : 'Pdt_min_UNDS0_Building', 'Pdt_max_UNDS0_Building_max' : 'Pdt_max_UNDS0_Building', 
                    'Pdt_min_UNDS1_PV_min' : 'Pdt_min_UNDS1_PV', 'Pdt_max_UNDS1_PV_max' : 'Pdt_max_UNDS1_PV', 
                    'Pdt_sum_UNDS0_Building_q10' : 'Pmin_UNDS0_Building', 'Pdt_sum_UNDS0_Building_q90' : 'Pmax_UNDS0_Building', 
                    'Pdt_sum_UNDS1_PV_q10' : 'Pmin_UNDS1_PV', 'Pdt_sum_UNDS1_PV_q90' : 'Pmax_UNDS1_PV'
               }, axis='columns', inplace=True)
    # Save resulting scenarios to CSV
    output_path = os.path.join(out_dir, 'instance_delta%s.csv' % file_suffix)
    df_hour.to_csv(output_path)
    print('Saved CSV file to ', output_path)
    return df_hour

## 3. Generate microgrid instance data for delta = 5 min in [2015, 2016]

In [11]:
# Japan is Northern Hemisphere
start_month = {'winter' : 12, 'spring': 3, 'summer': 6, 'autumn': 9}
end_month = {'winter' : 2, 'spring': 5, 'summer': 8, 'autumn' : 11}

In [17]:
for season in ['summer', 'spring', 'autumn', 'winter']:
    # First, collect hourly data in 5 min resolution for months in this season
    file_list = []
    for year in [2015, 2016]:
        if season == 'winter':
            month_list = [12, 1, 2]
        else:
            month_list = [x for x in range(start_month[season], end_month[season] + 1)]
        # end if
        for month in month_list:
            file_list += [(x, 5) for x in glob.glob(os.path.join(temp_dir, '%d%02d*.csv' % (year, month)))]
    # end for
    print("Season %s: will process the following dataset files: " % (season), file_list)
    df_all_hourly_5 = parallelize_dataframe_processing(file_list, read_process_dataframe)
    generate_microgrid_instance(df_all_hourly_5, '5min_%s' % (season))

Pattern: 201506*.csv
Pattern: 201507*.csv
Pattern: 201508*.csv
Pattern: 201606*.csv
Pattern: 201607*.csv
Pattern: 201608*.csv
Season summer: will process the following dataset files:  [('/tmp/microgrid/20150601-20150612SecCsv.csv', 5), ('/tmp/microgrid/20150613-20150624SecCsv.csv', 5), ('/tmp/microgrid/20150625-20150630SecCsv.csv', 5), ('/tmp/microgrid/20150701-20150712SecCsv.csv', 5), ('/tmp/microgrid/20150713-20150724SecCsv.csv', 5), ('/tmp/microgrid/20150725-20150731SecCsv.csv', 5), ('/tmp/microgrid/20150801-20150812SecCsv.csv', 5), ('/tmp/microgrid/20150813-20150824SecCsv.csv', 5), ('/tmp/microgrid/20150825-20150831SecCsv.csv', 5), ('/tmp/microgrid/20160601-20160612SecCsv.csv', 5), ('/tmp/microgrid/20160613-20160624SecCsv.csv', 5), ('/tmp/microgrid/20160625-20160630SecCsv.csv', 5), ('/tmp/microgrid/20160701-20160712SecCsv.csv', 5), ('/tmp/microgrid/20160713-20160724SecCsv.csv', 5), ('/tmp/microgrid/20160725-20160731SecCsv.csv', 5), ('/tmp/microgrid/20160801-20160812SecCsv.csv', 5),

## 4. Generate microgrid instances for delta = 10 min in [2015, 2016]

In [None]:
for season in ['summer', 'spring', 'autumn', 'winter']:
    # First, collect hourly data in 5 min resolution for months in this season
    file_list = []
    for year in [2015, 2016]:
        if season == 'winter':
            month_list = [12, 1, 2]
        else:
            month_list = [x for x in range(start_month[season], end_month[season] + 1)]
        # end if
        for month in month_list:
            file_list += [(x, 10) for x in glob.glob(os.path.join(temp_dir, '%d%02d*.csv' % (year, month)))]
    # end for
    print("Season %s: will process the following dataset files: " % (season), file_list)
    df_all_hourly_10 = parallelize_dataframe_processing(file_list, read_process_dataframe)
    generate_microgrid_instance(df_all_hourly_10, '10min_%s' % (season))

Season summer: will process the following dataset files:  [('/tmp/microgrid/20150601-20150612SecCsv.csv', 10), ('/tmp/microgrid/20150613-20150624SecCsv.csv', 10), ('/tmp/microgrid/20150625-20150630SecCsv.csv', 10), ('/tmp/microgrid/20150701-20150712SecCsv.csv', 10), ('/tmp/microgrid/20150713-20150724SecCsv.csv', 10), ('/tmp/microgrid/20150725-20150731SecCsv.csv', 10), ('/tmp/microgrid/20150801-20150812SecCsv.csv', 10), ('/tmp/microgrid/20150813-20150824SecCsv.csv', 10), ('/tmp/microgrid/20150825-20150831SecCsv.csv', 10), ('/tmp/microgrid/20160601-20160612SecCsv.csv', 10), ('/tmp/microgrid/20160613-20160624SecCsv.csv', 10), ('/tmp/microgrid/20160625-20160630SecCsv.csv', 10), ('/tmp/microgrid/20160701-20160712SecCsv.csv', 10), ('/tmp/microgrid/20160713-20160724SecCsv.csv', 10), ('/tmp/microgrid/20160725-20160731SecCsv.csv', 10), ('/tmp/microgrid/20160801-20160812SecCsv.csv', 10), ('/tmp/microgrid/20160813-20160824SecCsv.csv', 10), ('/tmp/microgrid/20160825-20160831SecCsv.csv', 10)]
Proce