# Microgrid in Japan - Exploratory Analysis

https://www.nature.com/articles/sdata201920?fbclid=IwAR3qOLHs0Ra5HNiXr3GZt5BdkG56WryLDnubRFd21lil12LAQXJlzb0tTT8

In [106]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import matplotlib.pyplot
import os
import csv
from io import TextIOWrapper
from zipfile import ZipFile
import glob
import multiprocessing as mp

%matplotlib inline

## 1. Load pre-processed dataset files containing power values with 5 min resolution

In [107]:
def load_dataset(path):
    df = pd.read_csv(path).set_index('timestamp')
    return df

In [108]:
def parallelize_dataframe_processing(file_list, func, n_cores=16):
    pool = mp.Pool(n_cores)
    df = pd.concat(pool.map(func, file_list))
    pool.close()
    pool.join()
    return df

In [124]:
dataset_treated_dir = os.path.join(os.getcwd(), 'scenarios')
print("dataset_treated_dir: " + dataset_treated_dir)
file_list = glob.glob(os.path.join(str(dataset_treated_dir), '*_5m_*.csv.gz'))
df = parallelize_dataframe_processing(file_list, load_dataset)
df.head(4)

dataset_treated_dir: /projetos/CZT0/doutorado_files/microgrid/scenarios


Unnamed: 0_level_0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Building_Consumption,scenario_name,Unnamed: 13,Unnamed: 14
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-04-25 00:00:00,-0.884,344.981073,-0.53125,6528.5475,465.308003,0.0,0.0,95.0,-465.308003,2015-04-25,,
2015-04-25 00:05:00,-0.86725,344.857349,-0.402083,6583.8375,474.532002,0.0,0.0,95.0,-474.532002,2015-04-25,,
2015-04-25 00:10:00,-0.8705,344.82705,-0.430208,6589.545,486.660001,0.0,0.0,95.0,-486.660001,2015-04-25,,
2015-04-25 00:15:00,-0.86675,344.745829,-0.442708,6560.505,474.939998,0.0,0.0,95.0,-474.939998,2015-04-25,,


In [125]:
df = df.drop(columns=['Unnamed: 13', 'Unnamed: 14'])
df['Building_Consumption'] *= -1
df.index = pd.DatetimeIndex(df.index)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 348480 entries, 2015-04-25 00:00:00 to 2018-03-24 23:55:00
Data columns (total 10 columns):
 #   Column                                                           Non-Null Count   Dtype  
---  ------                                                           --------------   -----  
 0   Active power of the battery (kW)                                 347988 non-null  float64
 1   Direct voltage of the battery (V)                                347853 non-null  float64
 2   Direct current of the battery (A)                                341950 non-null  float64
 3   Voltage of purchased electricity at the receiving end (V)        347988 non-null  float64
 4   Active power of purchased electricity at the receiving end (kW)  347988 non-null  float64
 5   Total active power generation by all four solar arrays (kW)      347988 non-null  float64
 6   Active battery power command value (kW)                          347988 non-null  float64


## 2. Analyzing data types and their distribution

In [126]:
df.describe()

Unnamed: 0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Building_Consumption
count,347988.0,347853.0,341950.0,347988.0,347988.0,347988.0,347988.0,341946.0,347988.0
mean,-1.327977,343.582773,-2.386844,6644.043147,650.560689,10.034491,-2.280293,91.780048,658.314887
std,13.853739,14.735598,41.509841,76.317432,123.801778,16.646133,18.053633,16.390221,131.558409
min,-88.7165,0.0,-245.358333,6432.2775,108.319999,-2.278933,-90.0,30.128633,108.319999
25%,-0.91875,339.88141,-1.690625,6589.26,563.575998,0.0,0.0,91.190002,566.035535
50%,-0.85275,343.653338,-0.245833,6641.1675,630.316003,0.0,0.0,94.860001,632.292003
75%,-0.7525,345.917423,-0.007292,6694.5525,719.276,15.049667,0.0,95.0,730.364469
max,90.239,421.41198,312.526042,6987.1125,1238.983996,75.885867,90.0,322.158166,1292.72513


#### There are negative values for PV solar production. Let's replace them by zero.

In [127]:
df.loc[df['Total active power generation by all four solar arrays (kW)'] < 0, 'Total active power generation by all four solar arrays (kW)'] = 0
df.describe()

Unnamed: 0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Building_Consumption
count,347988.0,347853.0,341950.0,347988.0,347988.0,347988.0,347988.0,341946.0,347988.0
mean,-1.327977,343.582773,-2.386844,6644.043147,650.560689,10.035891,-2.280293,91.780048,658.314887
std,13.853739,14.735598,41.509841,76.317432,123.801778,16.645261,18.053633,16.390221,131.558409
min,-88.7165,0.0,-245.358333,6432.2775,108.319999,0.0,-90.0,30.128633,108.319999
25%,-0.91875,339.88141,-1.690625,6589.26,563.575998,0.0,0.0,91.190002,566.035535
50%,-0.85275,343.653338,-0.245833,6641.1675,630.316003,0.0,0.0,94.860001,632.292003
75%,-0.7525,345.917423,-0.007292,6694.5525,719.276,15.049667,0.0,95.0,730.364469
max,90.239,421.41198,312.526042,6987.1125,1238.983996,75.885867,90.0,322.158166,1292.72513


In [129]:
df.describe(percentiles=[0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 0.997, 0.998, 0.999, 0.9999])

Unnamed: 0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Building_Consumption
count,347988.0,347853.0,341950.0,347988.0,347988.0,347988.0,347988.0,341946.0,347988.0
mean,-1.327977,343.582773,-2.386844,6644.043147,650.560689,10.035891,-2.280293,91.780048,658.314887
std,13.853739,14.735598,41.509841,76.317432,123.801778,16.645261,18.053633,16.390221,131.558409
min,-88.7165,0.0,-245.358333,6432.2775,108.319999,0.0,-90.0,30.128633,108.319999
25%,-0.91875,339.88141,-1.690625,6589.26,563.575998,0.0,0.0,91.190002,566.035535
50%,-0.85275,343.653338,-0.245833,6641.1675,630.316003,0.0,0.0,94.860001,632.292003
75%,-0.7525,345.917423,-0.007292,6694.5525,719.276,15.049667,0.0,95.0,730.364469
90%,5.23275,351.354845,17.304479,6744.51,828.956,39.169066,5.369477,95.0,850.127093
95%,12.866475,372.396174,42.084948,6776.157375,896.055997,48.907707,13.118383,95.235834,921.770073
99%,50.066847,393.866754,162.341198,6833.430975,990.04452,60.017437,55.443318,147.089996,1031.448502


In [128]:
df.isna().sum()

Active power of the battery (kW)                                    492
Direct voltage of the battery (V)                                   627
Direct current of the battery (A)                                  6530
Voltage of purchased electricity at the receiving end (V)           492
Active power of purchased electricity at the receiving end (kW)     492
Total active power generation by all four solar arrays (kW)         492
Active battery power command value (kW)                             492
State of charge of the battery (%)                                 6534
Building_Consumption                                                492
scenario_name                                                         0
dtype: int64

In [132]:
df[df['Active power of the battery (kW)'].isna()]

Unnamed: 0_level_0,Active power of the battery (kW),Direct voltage of the battery (V),Direct current of the battery (A),Voltage of purchased electricity at the receiving end (V),Active power of purchased electricity at the receiving end (kW),Total active power generation by all four solar arrays (kW),Active battery power command value (kW),State of charge of the battery (%),Building_Consumption,scenario_name
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-11-13 10:40:00,,,,,,,,,,2015-11-13
2015-11-13 10:45:00,,,,,,,,,,2015-11-13
2015-11-13 12:10:00,,,,,,,,,,2015-11-13
2015-11-13 12:15:00,,,,,,,,,,2015-11-13
2015-11-14 06:20:00,,,,,,,,,,2015-11-14
...,...,...,...,...,...,...,...,...,...,...
2017-11-18 18:00:00,,,,,,,,,,2017-11-18
2017-11-18 18:05:00,,,,,,,,,,2017-11-18
2017-11-18 18:10:00,,,,,,,,,,2017-11-18
2017-11-18 18:15:00,,,,,,,,,,2017-11-18


## For now, we will ignore these NaN values, and they will NOT be replaced by zeros