In [1]:
from keras.utils import np_utils, plot_model

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG

import numpy as np
import pandas as pd

from datetime import datetime,timedelta
import matplotlib.pyplot as plt

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
def addPrefixTo(cols, prefix):
    c = []
    for col in cols:
        if col == "time":
            c.append(col)
        else:
            c.append("{0}_{1}".format(prefix, col))
    return c


def addSuffixTo(cols, suffix):
    c = []
    for col in cols:
        if col == "time":
            c.append(col)
        else:
            c.append("{0}_{1}".format(col, suffix))
    return c


def searchColumn(word, cols):
    return [col for col in cols if word in col]

# Load data

In [3]:
# 定数
# msm
lats = ["35.10", "35.15", "35.20", "35.25", "35.30", "35.35", "35.40", "35.45", "35.50"]
lons = ["139.1250", "139.1875", "139.2500", "139.3125", "139.3750", "139.4375", "139.5000", "139.5625", "139.6250"]
data_names = ["UGRD", "VGRD", "TMP", "RH", "PRMSL", "PRES", "APCP"]

# 平塚観測塔
hiratsuka_data_path = "../data/hiratsuka/avg_1h"

# 気象庁
kishocho_data_path = "../data/kishocho"
locations = ["hiratsuka", "odawara", "tsujido"]
skiprow_counts = [5, 6, 6] # csvのheader行数
colmns = [[0,1], [0,1,4,7,10,12], [0,1,4,7,10,12]]
colmn_names = [["time", "rain"], ["time", "tmp", "rain", "sun", "wind_vel", "wind_dir"], ["time", "tmp", "rain", "sun", "wind_vel", "wind_dir"]]
wind_dir = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE", "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW", "Q"]

step = 10
msm_first = -3
predict_hour = 1

In [4]:
def get_msm_data(year, lat, lon, data):
    csv_dir_path = "../data/csv"
    file_path="{0}/{1}/{2}".format(csv_dir_path, lon, lat)
    file_name="{0}/{1}_{2}_{3}_{4}.csv".format(file_path, lon, lat, data, year)
    data = np.genfromtxt(file_name, delimiter=",", dtype='float')
    return data[:,1]

data_msm = pd.DataFrame()
for year in range(2013, 2017 + 1):
    data_msm_year = pd.DataFrame()
    data_msm_year["time"] = pd.date_range('{0}-07-01 9:00:00'.format(year), periods=1488, freq='H')
    for lat in lats:
        for lon in lons:
            for data_name in data_names:
                data_msm_year["{0}_{1}_{2}".format(data_name, lat, lon)] = get_msm_data(year, lat, lon, data_name)
    data_msm = pd.concat([data_msm, data_msm_year])
data_msm = data_msm.reset_index(drop=True)
data_msm.columns = addPrefixTo(data_msm.columns, "msm")

In [5]:
data_hiratsuka = pd.DataFrame()
for year in range(2013, 2017 + 1):
    data_path = "{0}/{1}".format(hiratsuka_data_path, year)
    data_hiratsuka_year = pd.DataFrame(columns=["UGRD", "VGRD"])
    for month in range(7, 8 + 1):
        for day in range(1, 31 + 1):
            file_name = "{0}{1:02d}{2:02d}.csv".format(year, month, day)
            file_path = "{0}/{1}".format(data_path, file_name)

            data_day = pd.read_csv(file_path, header=0)
            data_hiratsuka_year = pd.concat([data_hiratsuka_year, data_day[["UGRD", "VGRD"]]])
            
    data_hiratsuka_year["time"] = pd.date_range('{0}-07-01 01:00:00'.format(year), periods=1488 ,freq='1h')
    data_hiratsuka = pd.concat([data_hiratsuka, data_hiratsuka_year])
data_hiratsuka = data_hiratsuka.reset_index(drop = True)
data_hiratsuka = data_hiratsuka[["time", "UGRD", "VGRD"]]
data_hiratsuka.columns = addPrefixTo(data_hiratsuka.columns, "measured")

In [6]:
wind_dir_class = pd.DataFrame([wind_dir, np.arange(0, len(wind_dir)).tolist()]).T
wind_dir_class.columns = ["wind_dir", "wind_dir_class"]

data = {}
for loc, skiprows, cols, col_name in zip(locations, skiprow_counts, colmns, colmn_names):
    df = pd.DataFrame()
    for year in range(2013, 2017 + 1):
        file_path = "{0}/{1}/{1}_{2}.csv".format(kishocho_data_path, loc, year)
        df = pd.concat([df, pd.read_csv(file_path, skiprows=skiprows, header=None)[cols]])
    df.columns = col_name
    df = df.reset_index(drop = True)
    if loc != "hiratsuka":
        df["sun"] = df["sun"].fillna(0) # 夜間日照時間をnanから0へ
        df_wind = pd.DataFrame(np_utils.to_categorical(pd.merge(df, wind_dir_class)["wind_dir_class"], 17)) # 風向きをクラスに
        df_wind.columns = wind_dir
        df = pd.concat([df, df_wind], axis=1)
        df = df.drop("wind_dir", axis=1)
    df["time"] = pd.to_datetime(df["time"])
    df.columns = addPrefixTo(df.columns, "kishocho")
    df.columns = addSuffixTo(df.columns, loc)
    data[loc] = df
dict_data_kishocho = data

# Data concat

In [7]:
# hiratsuka
time_series_hiratsuka = pd.DataFrame(data_hiratsuka["time"])
for i in range(0, step):
    data_hiratsuka_ = data_hiratsuka.copy()
    data_hiratsuka_.columns = addSuffixTo(data_hiratsuka_.columns, i)
    data_hiratsuka_["time"] = data_hiratsuka_["time"] + timedelta(hours=i)
    time_series_hiratsuka = pd.merge(time_series_hiratsuka, data_hiratsuka_, on="time", how="outer").sort_values(by="time")

# MSM
time_series_msm = pd.DataFrame(data_msm["time"])
for i in range(msm_first, msm_first + step):
    data_msm_ = data_msm.copy()
    data_msm_.columns = addSuffixTo(data_msm_.columns, i)
    data_msm_["time"] = data_msm_["time"] + timedelta(hours=i)
    time_series_msm = pd.merge(time_series_msm, data_msm_, on="time", how="outer").sort_values(by="time")

# 気象庁観測データ    
data_kishocho = pd.DataFrame(dict_data_kishocho["hiratsuka"]["time"])
for loc in locations:
    data_kishocho = pd.merge(data_kishocho, dict_data_kishocho[loc])
    
time_series_kishocho = pd.DataFrame(data_kishocho["time"])
for i in range(0, step):
    data_kishocho_ = data_kishocho.copy()
    data_kishocho_.columns = addSuffixTo(data_kishocho_.columns, i)
    data_kishocho_["time"] = data_kishocho_["time"] + timedelta(hours=i)
    time_series_kishocho = pd.merge(time_series_kishocho, data_kishocho_, on="time", how="outer").sort_values(by="time")
time_series_kishocho

# 全体concat
data = pd.merge(time_series_msm, time_series_hiratsuka, how='outer').sort_values(by="time")
data = pd.merge(data, time_series_kishocho, how='outer').sort_values(by="time")
data["time_"] = data["time"] + timedelta(hours=predict_hour)
data_hiratsuka_ = data_hiratsuka.copy()
data_hiratsuka_.columns = ["time_", "label_UGRD", "label_VGRD"]
data = pd.merge(data, data_hiratsuka_, on="time_")

data = data.dropna()
data = data.reset_index(drop=True)
data

Unnamed: 0,time,msm_UGRD_35.10_139.1250_-3,msm_VGRD_35.10_139.1250_-3,msm_TMP_35.10_139.1250_-3,msm_RH_35.10_139.1250_-3,msm_PRMSL_35.10_139.1250_-3,msm_PRES_35.10_139.1250_-3,msm_APCP_35.10_139.1250_-3,msm_UGRD_35.10_139.1875_-3,msm_VGRD_35.10_139.1875_-3,...,kishocho_SW_tsujido_9,kishocho_WSW_tsujido_9,kishocho_W_tsujido_9,kishocho_WNW_tsujido_9,kishocho_NW_tsujido_9,kishocho_NNW_tsujido_9,kishocho_Q_tsujido_9,time_,label_UGRD,label_VGRD
0,2013-07-01 15:00:00,-1.249200,2.649450,295.595,79.6818,101483.0,100672.0,0.000000,-1.319520,3.540070,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 16:00:00,0.359080,4.842662
1,2013-07-01 16:00:00,-1.123600,2.513320,295.345,78.8690,101482.0,100678.0,0.000000,-1.607970,3.528940,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 17:00:00,0.590439,4.273714
2,2013-07-01 17:00:00,-0.950352,2.533700,295.218,79.2012,101565.0,100751.0,0.000000,-1.192540,3.502450,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 18:00:00,0.278968,3.870992
3,2013-07-01 18:00:00,-1.102330,2.600840,294.812,80.9952,101591.0,100782.0,0.000000,-1.539830,3.569590,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 19:00:00,0.542161,3.329635
4,2013-07-01 19:00:00,-1.209800,2.202090,294.628,82.8290,101584.0,100771.0,0.000000,-1.561360,2.991150,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 20:00:00,0.834674,3.139508
5,2013-07-01 20:00:00,-1.656260,2.110900,294.661,82.9519,101635.0,100822.0,0.000000,-1.953140,2.853090,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 21:00:00,1.039117,2.750700
6,2013-07-01 21:00:00,-0.749887,1.285260,294.279,86.2307,101538.0,100726.0,0.000000,-1.359260,2.160260,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 22:00:00,2.012317,1.678111
7,2013-07-01 22:00:00,-0.709417,2.096230,294.356,85.9904,101507.0,100702.0,0.000000,-1.131290,2.869670,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-01 23:00:00,1.911910,1.154175
8,2013-07-01 23:00:00,-0.521511,2.126610,294.322,87.1652,101539.0,100727.0,0.000000,-0.982449,2.892240,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-02 00:00:00,2.321923,0.931603
9,2013-07-02 00:00:00,1.186700,0.860281,293.930,85.0152,101534.0,100727.0,0.000000,0.303883,2.125910,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2013-07-02 01:00:00,1.935121,0.217229


In [8]:
data.to_csv("rowdata.csv")