# Get Energy data

- 자료 출처: https://www.data.go.kr/iim/api/selectAPIAcountView.do
- 선정 기준: 2017년 1월부터 2020년 7월까지의 발전량 데이터가 결측치 없이 존재하는 지역으로 선정

In [1]:
from glob import glob
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
capacity = {'jinju': 905, 'gumi': 992, 'changwon': 77, 'yeosu': 63, 'dangjin': 1000, 'ulsan': 500,
            'gangneung': 1065, 'yecheon': 2000, 'jangseong': 1010}

In [3]:
namdong_path = '../data/한국남동발전(주)_태양광 발전 시간대별 발전량'
dongseo_path = '../data/한국동서발전(주)_태양광 발전량 현황'

In [4]:
namdong_path_list = sorted(glob(namdong_path+'/*.csv'))
dongseo_path_list = sorted(glob(dongseo_path+'/*.csv'))

In [5]:
dongseo_path_list

['../data/한국동서발전(주)_태양광 발전량 현황/태양광 발전량 현황(2017_2020).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전 시간대별 태양광 및 풍력 발전량 현황(2018_2019).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전 시간대별 태양광 발전량 현황(2015_2017).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전 태양광 설치현황 정보(2019).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전 태양광 일별 발전량 현황(2014_2018).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전 호남 태양광 발전량 현황(2015_2019).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전_태양광 발전량 현황(-2017).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전_태양광 발전량 현황(2016_2018).csv',
 '../data/한국동서발전(주)_태양광 발전량 현황/한국동서발전_태양광 발전량 현황(2018-).csv']

In [6]:
time_df = pd.DataFrame(pd.date_range(start='20170101 01:00:00', 
                                     end='20200701 00:00:00', freq='H'), columns=['time'])

In [7]:
def make_df(path, start, location):
    df_2017 = pd.read_csv(namdong_path_list[start], engine='python', encoding='cp949')
    df_2018 = pd.read_csv(namdong_path_list[start+1], engine='python', encoding='cp949')
    df_2019 = pd.read_csv(namdong_path_list[start+2], engine='python', encoding='cp949')
    df_2020 = pd.read_csv(namdong_path_list[start+3], engine='python', encoding='cp949')
    df_total = pd.concat([df_2017, df_2018, df_2019, df_2020])
    
    energy_df = df_total.melt(id_vars=['년월일', '호기', '구분'], var_name='hour', value_name=location)
    energy_df = energy_df.rename({'년월일':'time'}, axis='columns')
    energy_df['time']=pd.to_datetime(energy_df['time'])+energy_df['hour'].astype(int).map(
        lambda x: pd.DateOffset(hours=x))
    energy_df = energy_df[['time', location]]
    energy_df = energy_df.sort_values('time').reset_index(drop=True)
    energy_df = pd.merge(time_df, energy_df, on='time', how='outer')
    energy_df = energy_df[[location]]
    return energy_df

In [8]:
energy_jinju = make_df(namdong_path_list, 0, 'jinju')
energy_gumi = make_df(namdong_path_list, 4, 'gumi')
energy_changwon = make_df(namdong_path_list, 8, 'changwon')
energy_yeosu = make_df(namdong_path_list, 24, 'yeosu')
energy_gangneung = make_df(namdong_path_list, 28, 'gangneung')
energy_yecheon = make_df(namdong_path_list, 40, 'yecheon')

In [9]:
energy_df = pd.concat([time_df, energy_jinju, energy_gumi, energy_changwon, energy_yeosu, energy_gangneung, energy_yecheon], axis=1)

In [10]:
energy_df

Unnamed: 0,time,jinju,gumi,changwon,yeosu,gangneung,yecheon
0,2017-01-01 01:00:00,0.00,0.0,,0.0,0.0,0.00
1,2017-01-01 02:00:00,0.00,0.0,,0.0,0.0,0.00
2,2017-01-01 03:00:00,0.00,0.0,,0.0,0.0,0.00
3,2017-01-01 04:00:00,0.00,0.0,,0.0,0.0,0.00
4,2017-01-01 05:00:00,0.00,0.0,,0.0,0.0,0.00
...,...,...,...,...,...,...,...
30643,2020-06-30 20:00:00,9.36,,0.72,0.0,0.0,2.16
30644,2020-06-30 21:00:00,0.00,,0.00,0.0,0.0,0.00
30645,2020-06-30 22:00:00,0.00,,0.00,0.0,0.0,0.00
30646,2020-06-30 23:00:00,0.00,,0.00,0.0,0.0,0.00


In [11]:
energy_dongseo = pd.read_csv(dongseo_path_list[-3], engine='python', encoding='cp949', index_col=0)
energy_dongseo['time'] = pd.to_datetime(energy_dongseo['time'])
energy_dongseo

Unnamed: 0,time,dangjin_floating,dangjin_warehouse,dangjin,ulsan
0,2015-01-01 01:00:00,,0.0,0.0,0.0
1,2015-01-01 02:00:00,,0.0,0.0,0.0
2,2015-01-01 03:00:00,,0.0,0.0,0.0
3,2015-01-01 04:00:00,,0.0,0.0,0.0
4,2015-01-01 05:00:00,,0.0,0.0,0.0
...,...,...,...,...,...
25627,2021-01-31 20:00:00,0.0,0.0,0.0,0.0
25628,2021-01-31 21:00:00,0.0,0.0,0.0,0.0
25629,2021-01-31 22:00:00,0.0,0.0,0.0,0.0
25630,2021-01-31 23:00:00,0.0,0.0,0.0,0.0


In [12]:
energy_df = energy_df.merge(energy_dongseo, on='time', how='inner')
energy_df = energy_df[['time', 'gangneung', 'dangjin', 'jinju']]
energy_df

Unnamed: 0,time,gangneung,dangjin,jinju
0,2017-01-01 01:00:00,0.0,0.0,0.00
1,2017-01-01 02:00:00,0.0,0.0,0.00
2,2017-01-01 03:00:00,0.0,0.0,0.00
3,2017-01-01 04:00:00,0.0,0.0,0.00
4,2017-01-01 05:00:00,0.0,0.0,0.00
...,...,...,...,...
30643,2020-06-30 20:00:00,0.0,0.0,9.36
30644,2020-06-30 21:00:00,0.0,0.0,0.00
30645,2020-06-30 22:00:00,0.0,0.0,0.00
30646,2020-06-30 23:00:00,0.0,0.0,0.00


In [13]:
energy_df.to_csv('../energy/energy_df.csv')