# kma 관측치 Historical Data Preprocess

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime
from ipywidgets import interact, interact_manual
pd.options.mode.chained_assignment = None

from functions.kma_forecast import interval6_to_interval3, preprocess_hour_interval, get_df_temp, merge_df, preprocess_forecast

In [4]:
data_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..','data', 'raw', 'kma'))

In [3]:
name_columns = ['지점',
                '시간', 
                '기온(°C)', 
                '강수량(mm)', 
                '풍속(m/s)', 
                '풍향(16방위)', 
                '습도(%)', 
                '일사(MJ/m2)', 
                '적설(cm)', 
                '전운량(10분위)']

In [5]:
def preprocess_observation(data_dir, filename, location) :
    file_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data', 'raw', 'kma', filename))
    print(file_dir)
    df_temp = pd.read_csv(file_dir, encoding="ISO-8859-1")
    df_temp.columns = name_columns

    df_temp = df_temp.drop(df_temp.columns[[0]], axis=1)

    df_temp = df_temp.rename({'시간':'datetime'}, axis=1)
    df_temp['datetime'] = pd.to_datetime(df_temp['datetime'])
    df_temp['date'] = df_temp['datetime'].dt.date
    df_temp['location'] = np.empty(len(df_temp))
    df_temp['location'] = df_temp['location'].apply(lambda x : location)
    
    df_temp = df_temp.reindex(columns=['datetime', 
                                       'date', 
                                       'location', 
                                       '기온(°C)', 
                                       '강수량(mm)', 
                                       '풍속(m/s)', 
                                       '풍향(16방위)',
                                       '습도(%)',
                                       '증기압(hPa)',
                                       '현지기압(hPa)',
                                       '해면기압(hPa)',
                                       '일사(MJ/m2)',
                                       '적설(cm)',
                                       '전운량(10분위)'
                                       ])
    
    df_temp['강수량(mm)'] = df_temp['강수량(mm)'].fillna(0)
    df_temp['적설(cm)'] = df_temp['적설(cm)'].fillna(0)
    df_temp['증기압(hPa)'] = df_temp['증기압(hPa)'].fillna(0)
    df_temp['현지기압(hPa)'] = df_temp['현지기압(hPa)'].fillna(0)
    df_temp['해면기압(hPa)'] = df_temp['해면기압(hPa)'].fillna(0)
    df_temp['전운량(10분위)'] = df_temp['전운량(10분위)'].fillna(0)
    df_temp['일사(MJ/m2)'] = df_temp['일사(MJ/m2)'].fillna(0)
    
    return df_temp

In [6]:
def oneshot(data_dir, filename, location, filename_tosave) :
    df = preprocess_observation(data_dir, filename, location)
    display(df.head())
    display(df.tail())
    df.to_pickle(os.path.abspath(os.path.join(os.getcwd() ,'..', 'data', filename_tosave)))
    return

## 관측치, 충남 서산시 수석동 2017~2019

In [7]:
years = [2017,2018,2019]

for year in years:
    filename = '관측치_충남_서산시_수석동_'+str(year)+'.csv'
    location = 'Korea, Chungcheongnam-do, Seosan-si, Suseok-dong'
    filename_tosave = 'df_kma_obs_Chungcheongnam-do_Seosan-si_Suseok-dong_'+str(year)+'.pkl'

    oneshot(data_dir, filename, location, filename_tosave)


/home/jeon/Desktop/kpx/data/raw/kma/관측치_충남_서산시_수석동_2017.csv


Unnamed: 0,datetime,date,location,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),현지기압(hPa),해면기압(hPa),일사(MJ/m2),적설(cm),전운량(10분위)
0,2017-01-01 01:00:00,2017-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.6,0.0,0.5,320.0,99.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2017-01-01 02:00:00,2017-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.0,0.0,0.6,360.0,99.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2017-01-01 03:00:00,2017-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",1.2,0.0,0.3,0.0,99.0,0.0,0.0,0.0,0.0,0.0,9.0
3,2017-01-01 04:00:00,2017-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.1,0.0,1.0,20.0,99.0,0.0,0.0,0.0,0.0,0.0,7.0
4,2017-01-01 05:00:00,2017-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.6,0.0,0.1,0.0,99.0,0.0,0.0,0.0,0.0,0.0,9.0


Unnamed: 0,datetime,date,location,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),현지기압(hPa),해면기압(hPa),일사(MJ/m2),적설(cm),전운량(10분위)
8719,2017-12-30 20:00:00,2017-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",2.2,0.0,1.3,70.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0
8720,2017-12-30 21:00:00,2017-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",1.8,2.5,1.5,50.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0
8721,2017-12-30 22:00:00,2017-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.9,0.0,1.0,340.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0
8722,2017-12-30 23:00:00,2017-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.4,0.0,1.9,20.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0
8723,2017-12-31 00:00:00,2017-12-31,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",0.7,2.8,2.0,360.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0


/home/jeon/Desktop/kpx/data/raw/kma/관측치_충남_서산시_수석동_2018.csv


Unnamed: 0,datetime,date,location,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),현지기압(hPa),해면기압(hPa),일사(MJ/m2),적설(cm),전운량(10분위)
0,2018-01-01 01:00:00,2018-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-3.5,0.0,1.0,90,79.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2018-01-01 02:00:00,2018-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-4.3,0.0,0.4,0,86.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2018-01-01 03:00:00,2018-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-5.1,0.0,0.2,0,89.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2018-01-01 04:00:00,2018-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-5.6,0.0,0.6,20,90.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2018-01-01 05:00:00,2018-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-5.3,0.0,0.1,0,88.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,datetime,date,location,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),현지기압(hPa),해면기압(hPa),일사(MJ/m2),적설(cm),전운량(10분위)
8707,2018-12-30 20:00:00,2018-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-5.4,0.0,1.1,20,64.0,0.0,0.0,0.0,0.0,0.0,9.0
8708,2018-12-30 21:00:00,2018-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-6.9,0.0,0.2,0,71.0,0.0,0.0,0.0,0.0,0.0,9.0
8709,2018-12-30 22:00:00,2018-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-8.3,0.0,0.0,0,79.0,0.0,0.0,0.0,0.0,0.0,7.0
8710,2018-12-30 23:00:00,2018-12-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-7.8,0.0,0.8,20,78.0,0.0,0.0,0.0,0.0,0.0,9.0
8711,2018-12-31 00:00:00,2018-12-31,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-7.6,0.0,1.1,20,73.0,0.0,0.0,0.0,0.0,0.0,0.0


/home/jeon/Desktop/kpx/data/raw/kma/관측치_충남_서산시_수석동_2019.csv


Unnamed: 0,datetime,date,location,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),현지기압(hPa),해면기압(hPa),일사(MJ/m2),적설(cm),전운량(10분위)
0,2019-01-01 01:00:00,2019-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-2.4,0.0,1.0,360.0,65.0,0.0,0.0,0.0,0.0,0.0,8.0
1,2019-01-01 02:00:00,2019-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-3.0,0.0,0.2,0.0,71.0,0.0,0.0,0.0,0.0,0.0,7.0
2,2019-01-01 03:00:00,2019-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-1.8,0.0,1.6,320.0,66.0,0.0,0.0,0.0,0.0,0.0,7.0
3,2019-01-01 04:00:00,2019-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-1.7,0.0,1.2,320.0,63.0,0.0,0.0,0.0,0.0,0.0,6.0
4,2019-01-01 05:00:00,2019-01-01,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",-1.7,0.0,1.6,340.0,61.0,0.0,0.0,0.0,0.0,0.0,10.0


Unnamed: 0,datetime,date,location,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),증기압(hPa),현지기압(hPa),해면기압(hPa),일사(MJ/m2),적설(cm),전운량(10분위)
4315,2019-06-29 20:00:00,2019-06-29,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",22.9,0.0,1.1,20.0,89.0,0.0,0.0,0.0,0.06,0.0,4.0
4316,2019-06-29 21:00:00,2019-06-29,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",22.3,0.0,1.8,360.0,91.0,0.0,0.0,0.0,0.0,0.0,7.0
4317,2019-06-29 22:00:00,2019-06-29,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",22.3,0.0,1.3,50.0,90.0,0.0,0.0,0.0,0.0,0.0,5.0
4318,2019-06-29 23:00:00,2019-06-29,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",21.7,0.0,1.1,360.0,93.0,0.0,0.0,0.0,0.0,0.0,4.0
4319,2019-06-30 00:00:00,2019-06-30,"Korea, Chungcheongnam-do, Seosan-si, Suseok-dong",20.9,0.0,0.8,320.0,97.0,0.0,0.0,0.0,0.0,0.0,5.0


## 관측치 제주 서귀포시 표선면 2017~2019

In [None]:
years = [2017,2018,2019]

for year in years:
    filename = '관측치_제주_서귀포시_표선면_'+str(year)+'.csv'
    location = 'Korea, Jeju-do, Seogwipo-si, Pyoseon-myeon'
    filename_tosave = 'df_kma_obs_Jeju-do_Seogwipo-si_Pyoseon-myeon_2_'+str(year)+'.pkl'

    oneshot(data_dir, filename, location, filename_tosave)

## 1분당 관측치 제주 서귀포시 표선면 2017 ~ 2019

In [8]:
filename = '2017_per_minute.csv'
file_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data', 'per_minute_data', filename))
print(file_dir)

/home/jeon/Desktop/kpx/data/per_minute_data/2017_per_minute.csv


In [9]:
df_temp = pd.read_csv(file_dir, encoding='cp949')
df_temp.drop(df_temp.columns[[9,10]],axis=1).head()
data_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..','data', 'raw', 'per_minute_data'))
print(data_dir)

/home/jeon/Desktop/data/raw/per_minute_data


In [10]:
name_columns = ['지점',
                '시간', 
                '기온(°C)', 
                '강수량(mm)', 
                '풍향(16방위)', 
                '풍속(m/s)', 
                '현지기압(hPa)',
                '해면기압(hPa)',
                '습도(%)', 
                '일사(MJ/m2)', 
                '일조(Sec)'
               ]

In [11]:
# Per_minute
def preprocess_observation(data_dir, filename, location) :
    file_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data', 'per_minute_data', filename))
    print(file_dir)
    df_temp = pd.read_csv(file_dir, encoding="ISO-8859-1")
    df_temp.columns = name_columns

    df_temp = df_temp.drop(df_temp.columns[[0]], axis=1)
    df_temp = df_temp.drop(df_temp.columns[[8,9]],axis=1)
    df_temp = df_temp.rename({'시간':'datetime'}, axis=1)
    df_temp['datetime'] = pd.to_datetime(df_temp['datetime'])
    df_temp['date'] = df_temp['datetime'].dt.date
    df_temp['location'] = np.empty(len(df_temp))
    df_temp['location'] = df_temp['location'].apply(lambda x : location)
    
    df_temp = df_temp.reindex(columns=['location',
                                       'datetime', 
                                       'date',  
                                       '기온(°C)', 
                                       '강수량(mm)', 
                                       '풍향(16방위)',
                                       '풍속(m/s)', 
                                       '현지기압(hPa)',
                                       '해면기압(hPa)',
                                       '습도(%)',
                                       ])
    
    df_temp['강수량(mm)'] = df_temp['강수량(mm)'].fillna(0)
    df_temp['현지기압(hPa)'] = df_temp['현지기압(hPa)'].fillna(0)
    df_temp['해면기압(hPa)'] = df_temp['해면기압(hPa)'].fillna(0)
    
    return df_temp

In [12]:
filename_tosave = 'df_kma_obs_Jeju-do_Seogwipo-si_Pyoseon-myeon_per_minute_'+str(year)+'.pkl'

In [13]:
def oneshot(data_dir, filename, location, filename_tosave) :
    df = preprocess_observation(data_dir, filename, location)
    display(df.head())
    display(df.tail())
    df.to_pickle(os.path.abspath(os.path.join(os.getcwd() ,'..', 'data', filename_tosave)))
    return

In [None]:
years = [2017,2018,2019]

for year in years:
    filename = str(year)+'_per_minute.csv'
    location = 'Korea, Jeju-do, Seogwipo-si, Pyoseon-myeon'
    filename_tosave = 'df_kma_obs_Jeju-do_Seogwipo-si_Pyoseon-myeon_per_minute_'+str(year)+'.pkl'

    oneshot(data_dir, filename, location, filename_tosave)