In [2]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import math
from random import random
import matplotlib.pyplot as plt
import seaborn as sns
import operator
from functools import reduce
from tqdm import tqdm
import matplotlib.animation as ani
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
import random
random.seed(12345)

In [3]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
plt.rcParams["font.family"] = "Times New Roman"
plt.style.use('seaborn-whitegrid')
plt.style.use('seaborn-poster')
plt.style.use('seaborn-dark-palette')
plt.rcParams["mathtext.fontset"] = "cm"

In [4]:
def accuracy_plot(true, pred):
    plt.scatter(true, pred, s = 10, c = -true, cmap='viridis')  
    plt.ylabel('predicted migration intensity(birds km-3)')
    plt.xlabel('actual migration intensity(birds km-3)')
    x = np.arange(0, int(max(true)))
    plt.fill_between(x, x*0.9, x*1.1, alpha = 0.3, where=None, interpolate=False, step=None, data=None)
    plt.show()

In [5]:
def hyperparameter_tuning(df_forecasting, coef, radar):
    
    #print(df_forecasting)
    X = df_forecasting.iloc[:,1:]
    y = df_forecasting.iloc[:,0]
    x_train, x_valid = X[:int(coef * (len(X)/30))*30], X[int(coef * (len(X)/30))*30:]
    y_train, y_valid = y[:int(coef * (len(y)/30))*30], y[int(coef * (len(y)/30))*30:]
    
    #param_test1 = {'n_estimators':[10, 50, 100, 150, 200]}
    #model = RandomForestRegressor()
    
    param_grid = [{'n_estimators': range(30, 101, 10), 'random_state':[100]}]
                  #{'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},]

    forest_reg = RandomForestRegressor(n_jobs = -1)
    grid_search = GridSearchCV(forest_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
 
    grid_search.fit(x_train, y_train)
    print(grid_search.best_params_)
    
    pred = grid_search.best_estimator_.predict(x_valid)
    pred = pd.Series(pred, index = y_valid.index)
    
    return pred, grid_search.best_params_

def drop_altitude(data):
    df = []
    for i in tqdm(np.arange(0, len(data), 30)):
        df.append(data[i:i+30].sum())
    return df


### weather with altitude and mean

In [10]:
radars = pd.read_csv('../dataset/radar.csv').values.reshape(1, -1)
radars = radars[0]
print(radars)

['KABR' 'KABX' 'KAKQ' 'KAMA' 'KAMX' 'KAPX' 'KARX' 'KATX' 'KBBX' 'KBGM'
 'KBHX' 'KBIS' 'KBLX' 'KBMX' 'KBOX' 'KBRO' 'KBUF' 'KBYX' 'KCAE' 'KCBW'
 'KCBX' 'KCCX' 'KCLE' 'KCLX' 'KCRP' 'KCXX' 'KCYS' 'KDAX' 'KDDC' 'KDFX'
 'KDGX' 'KDIX' 'KDLH' 'KDMX' 'KDOX' 'KDTX' 'KDVN' 'KDYX' 'KEAX' 'KEMX'
 'KENX' 'KEPZ' 'KESX' 'KEVX' 'KEWX' 'KEYX' 'KFCX' 'KFDR' 'KFDX' 'KFFC'
 'KFSD' 'KFSX' 'KFTG' 'KFWS' 'KGGW' 'KGJX' 'KGLD' 'KGRB' 'KGRK' 'KGRR'
 'KGSP' 'KGWX' 'KGYX' 'KHDX' 'KHGX' 'KHNX' 'KHTX' 'KICT' 'KICX' 'KILN'
 'KILX' 'KIND' 'KINX' 'KIWA' 'KIWX' 'KJAX' 'KJGX' 'KJKL' 'KLBB' 'KLCH'
 'KLIX' 'KLNX' 'KLOT' 'KLRX' 'KLSX' 'KLTX' 'KLVX' 'KLWX' 'KLZK' 'KMAF'
 'KMAX' 'KMHX' 'KMKX' 'KMLB' 'KMOB' 'KMPX' 'KMQT' 'KMRX' 'KMSX' 'KMTX'
 'KMUX' 'KMVX' 'KNKX' 'KNQA' 'KOAX' 'KOHX' 'KOKX' 'KOTX' 'KPAH' 'KPBZ'
 'KPDT' 'KPUX' 'KRAX' 'KRGX' 'KRIW' 'KRLX' 'KRTX' 'KSFX' 'KSGF' 'KSHV'
 'KSJT' 'KSOX' 'KSRX' 'KTBW' 'KTFX' 'KTLH' 'KTLX' 'KTWX' 'KTYX' 'KUDX'
 'KUEX' 'KVNX' 'KVTX' 'KVWX' 'KYUX']


In [None]:
coef = 0.5
radar = 'KABR'
radars = ['KFDX', 'KFFC', 'KFSD', 'KFSX', 'KFTG', 'KFWS', 
          'KGGW', 'KGJX', 'KGLD', 'KGRB', 'KGRK', 'KGRR',
          'KGSP', 'KGWX', 'KGYX', 'KHDX', 'KHGX', 'KHNX',
          'KHTX', 'KICT', 'KICX', 'KILN', 'KILX', 'KIND',
          'KINX', 'KIWA', 'KIWX', 'KJAX', 'KJGX', 'KJKL', 
          'KLBB', 'KLCH', 'KLIX', 'KLNX', 'KLOT', 'KLRX', 
          'KLSX', 'KLTX', 'KLVX', 'KLWX', 'KLZK', 'KMAF',
          'KMAX', 'KMHX', 'KMKX', 'KMLB', 'KMOB', 'KMPX',
          'KMQT', 'KMRX', 'KMSX', 'KMTX', 'KMUX', 'KMVX', 
          'KNKX', 'KNQA', 'KOAX', 'KOHX', 'KOKX', 'KOTX',
          'KPAH', 'KPBZ', 'KPDT', 'KPUX', 'KRAX', 'KRGX',
          'KRIW', 'KRLX', 'KRTX', 'KSFX', 'KSGF', 'KSHV',
          'KSJT', 'KSOX', 'KSRX', 'KTBW', 'KTFX', 'KTLH',
          'KTLX', 'KTWX', 'KTYX', 'KUDX', 'KUEX', 'KVNX',
          'KVTX', 'KVWX', 'KYUX']
order_lags = 3
#result = pd.DataFrame(columns=('radar', 'mse', 'r2', 'explained_variance', 'n_estimators'))
result = pd.read_csv('../dataset/result_mean_estimator.csv')
for radar in tqdm(radars):
    df_forecasting = pd.read_csv('../dataset/regressor_mean/regressor_mean%i_%s.csv'%(order_lags, radar))
    pred, par = hyperparameter_tuning(df_forecasting, coef, radar)
    pred = drop_altitude(pred)
    true = drop_altitude(df_forecasting['linear_eta'].values)
    test_y = true[(len(true)-len(pred)):]
    train_y = true[:(len(true)-len(pred))]
    #rfm.line_plot1(ts_1 = test_y, ts_label_1 = rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, title = '%s forecasting (testing data)'%rfm.radar, m = 'weather_lags5')
    #rfm.line_plot2(ts_1 = test_y, ts_label_1 = '%s observed'%rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, ts_3 = train_y, ts_label_3 = '%s training'%rfm.radar, title = '%s forecasting(training + testing)'%rfm.radar, m = 'weather_lags%i'%j)
    var = explained_variance_score(test_y, pred)
    mse = mean_squared_error(test_y, pred)
    r2 = r2_score(test_y, pred)
    print(order_lags, var, mse, r2)
    result = result.append(pd.DataFrame({'radar':[radar],
                                         'mse':[np.round(mse, 3)],
                                         'r2':[np.round(r2, 3)],
                                         'explained_variance':[np.round(var, 3)],
                                         'n_estimators':[par['n_estimators']]}),
                           ignore_index = True)
    result.to_csv('../dataset/result_mean_estimator.csv', index = False)
    pd.DataFrame(pred).to_csv('../dataset/pred_mean/pred_mean_%i_opt_%s.csv'%(order_lags, radar), index = False)

  0%|          | 0/87 [00:00<?, ?it/s]
100%|██████████| 455/455 [00:00<00:00, 7027.76it/s]

100%|██████████| 910/910 [00:00<00:00, 159233.07it/s]
  1%|          | 1/87 [05:25<7:46:55, 325.77s/it]

{'n_estimators': 30, 'random_state': 100}
3 0.6620715780506712 216437790.6935835 0.6613583093014013


### weather with altitude

In [76]:
coef = 0.5
radar = 'KABR'
result = pd.DataFrame(columns=('radar', 'mse', 'r2', 'explained_variance', 'n_estimators'))
order_lags = 3
for radar in tqdm(radars):
    df_forecasting = pd.read_csv('../dataset/regressor_mean/regressor_mean%i_%s.csv'%(order_lags, radar))
    df_forecasting = df_forecasting.drop('rolling_mean', axis = 1)
    pred, par = hyperparameter_tuning(df_forecasting, coef, radar)
    pred_y = drop_altitude(pred.values)

    true = drop_altitude(df_forecasting['linear_eta'].values)
    test_y = true[(len(true)-len(pred_y)):]
    train_y = true[:(len(true)-len(pred_y))]
    #rfm.line_plot1(ts_1 = test_y, ts_label_1 = rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, title = '%s forecasting (testing data)'%rfm.radar, m = 'weather_lags5')
    #rfm.line_plot2(ts_1 = test_y, ts_label_1 = '%s observed'%rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, ts_3 = train_y, ts_label_3 = '%s training'%rfm.radar, title = '%s forecasting(training + testing)'%rfm.radar, m = 'weather_lags%i'%j)
    var = explained_variance_score(test_y, pred_y)
    mse = mean_squared_error(test_y, pred_y)
    r2 = r2_score(test_y, pred_y)
    print(order_lags, var, mae, r2)
    result = result.append(pd.DataFrame({'radar':[radar],
                                         'mse':[np.round(mse, 3)],
                                         'r2':[np.round(r2, 3)],
                                         'explained_variance':[np.round(var, 3)],
                                         'n_estimators':[par['n_estimators']]}),
                           ignore_index = True)
    result.to_csv('../dataset/result_estimator.csv', index = False)
    pd.DataFrame(pred_y).to_csv('../dataset/pred/pred_%i_opt_%s.csv'%(order_lags, radar), index = False)

100%|██████████| 455/455 [00:00<00:00, 121299.71it/s]
100%|██████████| 910/910 [00:00<00:00, 190774.06it/s]

{'n_estimators': 70, 'random_state': 0} -1414679.4944869713
3 0.3540279401266294 515748359.4937152 0.34905566463298865





### configuration

In [None]:
import json
def ReadJson(JsonFile):
    df = pd.DataFrame()
    with open(filename, 'r') as f:
        for jsonstr in f.readlines(): 
            data = json.loads(jsonstr)           
            df. = (data[0],data[3],data[6]) 

    
def WriteJson(JsonFile, data):
    with open(JsonFile,'w',encoding = 'GBK') as f1:
        str1 = {'lags':str(3)}
        f1.writelines(jison.dump(str1)+'\n')
        for radar in radars:
            str1 ={'radar':str(radar),
                   'n_estimators':str(data.loc(radar, 'n_estimators')),
                   'max_depth':str(data.loc(radar, 'max_depth')),
                   'max_features':str(data.loc(radar, 'max_features'))}
            f1.writelines(jison.dump(str1)+'\n')


In [None]:
WriteJson('config.json', data)
JsonPara = ReadJson('config.json')