In [43]:
# This is for Japan data source #5, Kansai Electric Power.
# Contributor(s): Sang-Won Yu

import pandas as pd
import numpy as np
from datetime import datetime
from urllib.request import Request, urlopen  # Python 3

def read_kansai_csv():
  combined_data = pd.DataFrame()
  for year in range(2016, datetime.now().year + 1):
    # Bypass 403 Forbidden error
    req = Request('https://www.kansai-td.co.jp/denkiyoho/csv/area_jyukyu_jisseki_' +
                      str(year) + '.csv')
    req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0')
    content = urlopen(req)

    csv = pd.read_csv(content, header = 1, encoding = 'shift_jis')

    # Combine multi-year CSVs into one dataframe
    combined_data = combined_data.append(csv, ignore_index=True)

    # Drop NaN columns
    combined_data.drop(combined_data.iloc[:, 13:], axis=1, inplace=True)

  # Translate Japanese column names to English
  combined_data.columns = ['Date_Time', 'Area_Demand', 'Nuclear', 'Thermal',
                          'Hydraulic', 'Geothermal', 'Biomass', 'Solar(Actual)',
                          'Solar(Output_Control)', 'Wind(Actual)', 'Wind(Output_Control)',
                          'Pumped_Hydro', 'Interconnector']

  # assign units and region
  combined_data['Region'] = 'Kansai'
  combined_data['Unit'] = 'MWh'

  # Format the datetime
  combined_data['Date_Time']=pd.to_datetime(combined_data['Date_Time'], format='%Y/%m/%d %H:%M')

  # get demand data into one df
  demand_df = combined_data[['Date_Time', 'Region', 'Unit', 'Area_Demand']].copy()
  demand_df.sort_values(by=['Date_Time'],ascending=False, inplace=True)
  # Drop NaN rows (not sure why there are NaN rows...)
  demand_df.dropna(inplace = True)

  # get supply data into another df
  supply_df = combined_data
  supply_df.drop('Area_Demand', axis=1, inplace=True)
  # Pivot "wide" to "long" format
  supply_df = pd.melt(combined_data, id_vars=['Date_Time','Region', 'Unit'], var_name='Fuel_Type', value_name='Supply')
  supply_df.sort_values(by=['Date_Time','Fuel_Type'], ascending=False, inplace=True)
  # Drop NaN rows (not sure why there are NaN rows...)
  supply_df.dropna(inplace = True)
  
  return demand_df, supply_df

kansai_data = read_kansai_csv()

In [44]:
kansai_data[0]

Unnamed: 0,Date_Time,Region,Unit,Area_Demand
53306,2022-04-30 23:00:00,Kansai,MWh,11211.0
53305,2022-04-30 22:00:00,Kansai,MWh,11618.0
53304,2022-04-30 21:00:00,Kansai,MWh,12286.0
53303,2022-04-30 20:00:00,Kansai,MWh,12907.0
53302,2022-04-30 19:00:00,Kansai,MWh,13311.0
...,...,...,...,...
4,2016-04-01 04:00:00,Kansai,MWh,13307.0
3,2016-04-01 03:00:00,Kansai,MWh,13591.0
2,2016-04-01 02:00:00,Kansai,MWh,13351.0
1,2016-04-01 01:00:00,Kansai,MWh,12835.0


In [45]:
kansai_data[1]

Unnamed: 0,Date_Time,Region,Unit,Fuel_Type,Supply
479762,2022-04-30 23:00:00,Kansai,MWh,Wind(Output_Control),0.0
426455,2022-04-30 23:00:00,Kansai,MWh,Wind(Actual),39.0
106613,2022-04-30 23:00:00,Kansai,MWh,Thermal,5327.0
373148,2022-04-30 23:00:00,Kansai,MWh,Solar(Output_Control),0.0
319841,2022-04-30 23:00:00,Kansai,MWh,Solar(Actual),0.0
...,...,...,...,...,...
0,2016-04-01 00:00:00,Kansai,MWh,Nuclear,0.0
533070,2016-04-01 00:00:00,Kansai,MWh,Interconnector,1625.0
106614,2016-04-01 00:00:00,Kansai,MWh,Hydraulic,758.0
159921,2016-04-01 00:00:00,Kansai,MWh,Geothermal,0.0
