In [1]:
import pandas as pd
import sklearn.cross_validation
import sklearn.svm
from sklearn import grid_search
#from sklearn.externals.joblib import Parallel, delayed
import threading as th
 
# 四国電力の電力消費量データを読み込み
ed = [pd.read_csv(
    './learning_sample/07/shikoku_electricity_%d.csv' % year,
    skiprows=3,
    names=['DATE', 'TIME', 'consumption'],
    parse_dates={'date_hour': ['DATE', 'TIME']},
    index_col='date_hour')
    for year in [2012, 2013, 2014, 2015, 2016]
]
 
elec_data = pd.concat(ed)
 
# 気象データを読み込み
tmp = pd.read_csv(
    u'./learning_sample/07/47891_高松.csv',
    parse_dates={'date_hour': ["日時"]},
    index_col="date_hour",
    low_memory=False,
    na_values=["×", "--"]
)
 
del tmp["時"]  # 「時」の列は使わないので、削除
 
# 列の名前に日本語が入っているとよくないので、これから使う列の名前のみ英語に変更
columns = {
    "降水量(mm)": "rain",
    "気温(℃)": "temperature",
    "日照時間(h)": "sunhour",
    "湿度(％)": "humid",
}
tmp.rename(columns=columns, inplace=True)
# tmp.fillna(-1,inplace=True)
tmp.fillna(tmp.median(),inplace=True)

# 月, 日, 時の取得
tmp["month"] = tmp.index.month
tmp['day'] = tmp.index.day
tmp['dayofyear'] = tmp.index.dayofyear
tmp['hour'] = tmp.index.hour
tmp['dayofweek'] = tmp.index.dayofweek
 
# 気象データと電力消費量データをいったん統合して時間軸を合わせたうえで、再度分割
#takamatsu = elec_data.join(tmp[["temperature","sunhour","month","hour"]]).dropna().as_matrix()
takamatsu = elec_data.join(tmp[["temperature","sunhour","month","hour", "dayofweek"]]).dropna().values
 
takamatsu_elec = takamatsu[:, 0:1]
takamatsu_wthr = takamatsu[:, 1:]
 
# 学習と性能の評価
import sklearn.cross_validation
import sklearn.svm
model = sklearn.svm.SVR()
 
param_grid = {
    'C': [18, 20, 22],
    'epsilon':[18, 20, 22]
   }

x_train, x_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    takamatsu_wthr, takamatsu_elec, test_size=0.2)
 
y_train = y_train.flatten()
y_test = y_test.flatten()
 
#model.fit(x_train, y_train)
# 手法:線形SVM
classifier = grid_search.GridSearchCV( sklearn.svm.SVR(), param_grid, n_jobs=4)
 
# 学習
classifier.fit(x_train, y_train)
print("最適なパラメーター =", classifier.best_params_)
print("精度 =", classifier.best_score_)
 
# Grid Search結果表示
print("Best Estimator:\n%s\n" % classifier.best_estimator_)
for params, mean_score, all_scores in classifier.grid_scores_:
    print("{:.3f} (+/- {:.3f}) for {}".format(mean_score, all_scores.std() / 2, params))

# 学習と性能の評価 
data_count = len(takamatsu_elec)
 
# 交差検定の準備
kf = sklearn.cross_validation.KFold(data_count, n_folds=5)
 
def train_test(num, train, test):
    x_train = takamatsu_wthr[train]
    x_test = takamatsu_wthr[test]
    y_train = takamatsu_elec[train]
    y_test = takamatsu_elec[test]

    # -- SVR --
    model = sklearn.svm.SVR()
    y_train = y_train.flatten()
    y_test = y_test.flatten()

    model.fit(x_train, y_train)
    print ("SVR[%d]: Training Score = %f, Testing(Validate) Score = %f" %
           (num, model.score(x_train, y_train), model.score(x_test, y_test)))


# 交差検定実施(全てのパターンを実施)
kf_itr = iter(kf)
train, test = next(kf_itr)
t1 = th.Thread(target=train_test, args=([1,train, test]))
print("Start Thread1")
t1.start()
 
train, test = next(kf_itr)
t2 = th.Thread(target=train_test, args=([2,train, test]))
print("Start Thread2")
t2.start()
 
train, test = next(kf_itr)
t3 = th.Thread(target=train_test, args=([3,train, test]))
print("Start Thread3")
t3.start()
 
train, test = next(kf_itr)
t4 = th.Thread(target=train_test, args=([4,train, test]))
print("Start Thread4")
t4.start()
 
train, test = next(kf_itr)
t5 = th.Thread(target=train_test, args=([5,train, test]))
print("Start Thread5")
t5.start()

# スレッドの同期をとって終了させる（ゾンビになる）
t1.join()
t2.join()
t3.join()
t4.join()
t5.join()



最適なパラメーター = {'C': 22, 'epsilon': 18}
精度 = 0.8823914604700764
Best Estimator:
SVR(C=22, cache_size=200, coef0=0.0, degree=3, epsilon=18, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

0.880 (+/- 0.001) for {'C': 18, 'epsilon': 18}
0.877 (+/- 0.001) for {'C': 18, 'epsilon': 20}
0.873 (+/- 0.001) for {'C': 18, 'epsilon': 22}
0.881 (+/- 0.001) for {'C': 20, 'epsilon': 18}
0.878 (+/- 0.001) for {'C': 20, 'epsilon': 20}
0.874 (+/- 0.001) for {'C': 20, 'epsilon': 22}
0.882 (+/- 0.001) for {'C': 22, 'epsilon': 18}
0.879 (+/- 0.001) for {'C': 22, 'epsilon': 20}
0.876 (+/- 0.001) for {'C': 22, 'epsilon': 22}
Start Thread1
Start Thread2
Start Thread3
Start Thread4
Start Thread5
SVR[3]: Training Score = 0.787727, Testing(Validate) Score = 0.774663
SVR[5]: Training Score = 0.799062, Testing(Validate) Score = 0.745446
SVR[1]: Training Score = 0.779099, Testing(Validate) Score = 0.717941
SVR[2]: Training Score = 0.781787, Testing(Validate) Score = 0.720783
SVR[4