In [2]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import math
from random import random
import matplotlib.pyplot as plt
import seaborn as sns
import operator
from functools import reduce
from tqdm import tqdm
import matplotlib.animation as ani
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
import random
random.seed(12345)

In [3]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
plt.rcParams["font.family"] = "Times New Roman"
plt.style.use('seaborn-whitegrid')
plt.style.use('seaborn-poster')
plt.style.use('seaborn-dark-palette')
plt.rcParams["mathtext.fontset"] = "cm"

In [4]:
def accuracy_plot(true, pred):
    plt.scatter(true, pred, s = 10, c = -true, cmap='viridis')  
    plt.ylabel('predicted migration intensity(birds km-3)')
    plt.xlabel('actual migration intensity(birds km-3)')
    x = np.arange(0, int(max(true)))
    plt.fill_between(x, x*0.9, x*1.1, alpha = 0.3, where=None, interpolate=False, step=None, data=None)
    plt.show()

In [5]:
def hyperparameter_tuning(df_forecasting, coef, radar):
    
    #print(df_forecasting)
    X = df_forecasting.iloc[:,1:]
    y = df_forecasting.iloc[:,0]
    x_train, x_valid = X[:int(coef * (len(X)/30))*30], X[int(coef * (len(X)/30))*30:]
    y_train, y_valid = y[:int(coef * (len(y)/30))*30], y[int(coef * (len(y)/30))*30:]
    
    #param_test1 = {'n_estimators':[10, 50, 100, 150, 200]}
    #model = RandomForestRegressor()
    
    param_grid = [{'n_estimators': range(30, 151, 10), 'random_state':[100]}]
                  #{'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},]

    forest_reg = RandomForestRegressor(n_jobs = -1)
    grid_search = GridSearchCV(forest_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
 
    grid_search.fit(x_train, y_train)
    print(grid_search.best_params_)
    
    pred = grid_search.best_estimator_.predict(x_valid)
    pred = pd.Series(pred, index = y_valid.index)
    
    return pred, grid_search.best_params_

def drop_altitude(data):
    df = []
    for i in tqdm(np.arange(0, len(data), 30)):
        df.append(data[i:i+30].sum())
    return df


In [17]:
def hyperparameter_tuning1(df_forecasting, coef, radar, n_estimators, max_depth):
    
    #print(df_forecasting)
    X = df_forecasting.iloc[:,1:]
    y = df_forecasting.iloc[:,0]
    x_train, x_valid = X[:int(coef * (len(X)/30))*30], X[int(coef * (len(X)/30))*30:]
    y_train, y_valid = y[:int(coef * (len(y)/30))*30], y[int(coef * (len(y)/30))*30:]
    
    #param_test1 = {'n_estimators':[10, 50, 100, 150, 200]}
    #model = RandomForestRegressor()
    
    param_grid = [{'n_estimators': n_estimators, 'max_depth':max_depth, 'random_state':[100]}]
                  #{'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},]

    forest_reg = RandomForestRegressor(n_jobs = -1)
    grid_search = GridSearchCV(forest_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
 
    grid_search.fit(x_train, y_train)
    print(grid_search.best_params_)
    
    pred = grid_search.best_estimator_.predict(x_valid)
    pred = pd.Series(pred, index = y_valid.index)
    
    return pred, grid_search.best_params_

# weather with altitude and mean

## n_estimators

In [31]:
radars = pd.read_csv('../dataset/radar.csv').values.reshape(1, -1)
radars = radars[0]
print(radars)

['KABR' 'KABX' 'KAKQ' 'KAMA' 'KAMX' 'KAPX' 'KARX' 'KATX' 'KBBX' 'KBGM'
 'KBHX' 'KBIS' 'KBLX' 'KBMX' 'KBOX' 'KBRO' 'KBUF' 'KBYX' 'KCAE' 'KCBW'
 'KCBX' 'KCCX' 'KCLE' 'KCLX' 'KCRP' 'KCXX' 'KCYS' 'KDAX' 'KDDC' 'KDFX'
 'KDGX' 'KDIX' 'KDLH' 'KDMX' 'KDOX' 'KDTX' 'KDVN' 'KDYX' 'KEAX' 'KEMX'
 'KENX' 'KEPZ' 'KESX' 'KEVX' 'KEWX' 'KEYX' 'KFCX' 'KFDR' 'KFDX' 'KFFC'
 'KFSD' 'KFSX' 'KFTG' 'KFWS' 'KGGW' 'KGJX' 'KGLD' 'KGRB' 'KGRK' 'KGRR'
 'KGSP' 'KGWX' 'KGYX' 'KHDX' 'KHGX' 'KHNX' 'KHTX' 'KICT' 'KICX' 'KILN'
 'KILX' 'KIND' 'KINX' 'KIWA' 'KIWX' 'KJAX' 'KJGX' 'KJKL' 'KLBB' 'KLCH'
 'KLIX' 'KLNX' 'KLOT' 'KLRX' 'KLSX' 'KLTX' 'KLVX' 'KLWX' 'KLZK' 'KMAF'
 'KMAX' 'KMHX' 'KMKX' 'KMLB' 'KMOB' 'KMPX' 'KMQT' 'KMRX' 'KMSX' 'KMTX'
 'KMUX' 'KMVX' 'KNKX' 'KNQA' 'KOAX' 'KOHX' 'KOKX' 'KOTX' 'KPAH' 'KPBZ'
 'KPDT' 'KPUX' 'KRAX' 'KRGX' 'KRIW' 'KRLX' 'KRTX' 'KSFX' 'KSGF' 'KSHV'
 'KSJT' 'KSOX' 'KSRX' 'KTBW' 'KTFX' 'KTLH' 'KTLX' 'KTWX' 'KTYX' 'KUDX'
 'KUEX' 'KVNX' 'KVTX' 'KVWX' 'KYUX']


In [None]:
coef = 0.5
order_lags = 3
#result = pd.DataFrame(columns=('radar', 'mse', 'r2', 'explained_variance', 'n_estimators'))
result = pd.read_csv('../dataset/result_mean_estimator1.csv')
for radar in tqdm(radars):
    df_forecasting = pd.read_csv('../dataset/regressor_mean1/regressor_mean%i_%s.csv'%(order_lags, radar))
    pred, par = hyperparameter_tuning(df_forecasting, coef, radar)
    pred = drop_altitude(pred)
    true = drop_altitude(df_forecasting['linear_eta'].values)
    test_y = true[(len(true)-len(pred)):]
    train_y = true[:(len(true)-len(pred))]
    #rfm.line_plot1(ts_1 = test_y, ts_label_1 = rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, title = '%s forecasting (testing data)'%rfm.radar, m = 'weather_lags5')
    #rfm.line_plot2(ts_1 = test_y, ts_label_1 = '%s observed'%rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, ts_3 = train_y, ts_label_3 = '%s training'%rfm.radar, title = '%s forecasting(training + testing)'%rfm.radar, m = 'weather_lags%i'%j)
    var = explained_variance_score(test_y, pred)
    mse = mean_squared_error(test_y, pred)
    r2 = r2_score(test_y, pred)
    print(order_lags, var, mse, r2)
    result = result.append(pd.DataFrame({'radar':[radar],
                                         'mse':[np.round(mse, 3)],
                                         'r2':[np.round(r2, 3)],
                                         'explained_variance':[np.round(var, 3)],
                                         'n_estimators':[par['n_estimators']]}),
                           ignore_index = True)
    result.to_csv('../dataset/result_mean_estimator1.csv', index = False)
    pd.DataFrame(pred).to_csv('../dataset/pred_mean1/pred_mean_%i_opt_%s.csv'%(order_lags, radar), index = False)

  0%|          | 0/135 [00:00<?, ?it/s]
100%|██████████| 455/455 [00:00<00:00, 7775.62it/s]

100%|██████████| 910/910 [00:00<00:00, 214620.82it/s]
  1%|          | 1/135 [04:43<10:32:44, 283.32s/it]

{'n_estimators': 30, 'random_state': 100}
3 0.42650765773949595 441756767.03292763 0.42347545931953545



100%|██████████| 455/455 [00:00<00:00, 9665.62it/s]

100%|██████████| 910/910 [00:00<00:00, 199686.96it/s]
  1%|▏         | 2/135 [09:51<11:01:05, 298.23s/it]

{'n_estimators': 50, 'random_state': 100}
3 0.5486403669561486 126356754.68635319 0.5485417103542828



100%|██████████| 455/455 [00:00<00:00, 7243.86it/s]

100%|██████████| 910/910 [00:00<00:00, 120131.46it/s]

{'n_estimators': 90, 'random_state': 100}
3 0.5905917940255563 1319145283.2864063 0.5904229690681113



  2%|▏         | 3/135 [14:55<11:01:22, 300.63s/it]
100%|██████████| 455/455 [00:00<00:00, 9211.31it/s]

100%|██████████| 910/910 [00:00<00:00, 144379.51it/s]

{'n_estimators': 100, 'random_state': 100}
3 0.467604555117037 247008796.33681613 0.4603653297918737



  3%|▎         | 4/135 [20:09<11:07:31, 305.73s/it]
100%|██████████| 455/455 [00:00<00:00, 8633.57it/s]

100%|██████████| 910/910 [00:00<00:00, 190336.44it/s]
  4%|▎         | 5/135 [24:35<10:31:36, 291.51s/it]

{'n_estimators': 50, 'random_state': 100}
3 -0.8794702512757382 526698282.85636026 -0.8827318715548562



100%|██████████| 455/455 [00:00<00:00, 7910.30it/s]

100%|██████████| 910/910 [00:00<00:00, 139254.14it/s]
  4%|▍         | 6/135 [29:10<10:14:35, 285.86s/it]

{'n_estimators': 40, 'random_state': 100}
3 0.5737514502314829 805864402.5376662 0.5718825514464303



100%|██████████| 455/455 [00:00<00:00, 8274.98it/s]

100%|██████████| 910/910 [00:00<00:00, 203032.96it/s]
  5%|▌         | 7/135 [33:24<9:47:53, 275.57s/it] 

{'n_estimators': 100, 'random_state': 100}
3 0.6455881960389958 1697775984.3959892 0.6452881018004137



100%|██████████| 455/455 [00:00<00:00, 9271.54it/s]

100%|██████████| 910/910 [00:00<00:00, 191060.55it/s]
  6%|▌         | 8/135 [37:07<9:07:36, 258.71s/it]

{'n_estimators': 90, 'random_state': 100}
3 0.6325339277657254 527492640.8615583 0.6276158071441942



100%|██████████| 455/455 [00:00<00:00, 9484.71it/s]

100%|██████████| 910/910 [00:00<00:00, 182195.65it/s]
  7%|▋         | 9/135 [40:34<8:29:25, 242.58s/it]

{'n_estimators': 90, 'random_state': 100}
3 0.7530712081431821 545113157.3321265 0.7518483180109174



100%|██████████| 455/455 [00:00<00:00, 9227.52it/s]

100%|██████████| 910/910 [00:00<00:00, 215700.29it/s]
  7%|▋         | 10/135 [44:46<8:31:14, 245.40s/it]

{'n_estimators': 80, 'random_state': 100}
3 0.6401454938832194 548029361.2041845 0.6298428338091693



100%|██████████| 455/455 [00:00<00:00, 9064.61it/s]

100%|██████████| 910/910 [00:00<00:00, 216201.24it/s]
  8%|▊         | 11/135 [49:05<8:35:57, 249.65s/it]

{'n_estimators': 100, 'random_state': 100}
3 0.7462951795301018 207414373.6274604 0.7438869798687758



100%|██████████| 455/455 [00:00<00:00, 8769.57it/s]

100%|██████████| 910/910 [00:00<00:00, 193276.11it/s]
  9%|▉         | 12/135 [53:48<8:52:48, 259.91s/it]

{'n_estimators': 100, 'random_state': 100}
3 0.6664042411448591 334766221.0842351 0.6599712344319206



100%|██████████| 455/455 [00:00<00:00, 8960.21it/s]

100%|██████████| 910/910 [00:00<00:00, 244088.80it/s]
 10%|▉         | 13/135 [58:13<8:51:13, 261.26s/it]

{'n_estimators': 90, 'random_state': 100}
3 0.18037838342546242 631610363.7446053 0.1748493240913206



100%|██████████| 455/455 [00:00<00:00, 9387.62it/s]

100%|██████████| 910/910 [00:00<00:00, 224809.56it/s]
 10%|█         | 14/135 [1:01:57<8:24:24, 250.12s/it]

{'n_estimators': 80, 'random_state': 100}
3 0.5964653656562195 1397401481.3364596 0.5961768953074932



100%|██████████| 455/455 [00:00<00:00, 8838.91it/s]

100%|██████████| 910/910 [00:00<00:00, 226934.81it/s]
 11%|█         | 15/135 [1:06:36<8:37:41, 258.84s/it]

{'n_estimators': 90, 'random_state': 100}
3 0.49263826754544915 911783870.0483294 0.48948832780166696



100%|██████████| 455/455 [00:00<00:00, 8760.44it/s]

100%|██████████| 910/910 [00:00<00:00, 230136.67it/s]
 12%|█▏        | 16/135 [1:10:15<8:09:52, 246.99s/it]

{'n_estimators': 30, 'random_state': 100}
3 0.6587368548541839 490373951.1506026 0.6559414087400888



100%|██████████| 455/455 [00:00<00:00, 8831.18it/s]

100%|██████████| 910/910 [00:00<00:00, 209152.10it/s]
 13%|█▎        | 17/135 [1:14:30<8:10:05, 249.20s/it]

{'n_estimators': 100, 'random_state': 100}
3 0.5899345258554558 933787016.5811658 0.5883018289664594



100%|██████████| 455/455 [00:00<00:00, 9704.59it/s]

100%|██████████| 910/910 [00:00<00:00, 187254.90it/s]
 13%|█▎        | 18/135 [1:18:39<8:06:05, 249.28s/it]

{'n_estimators': 50, 'random_state': 100}
3 -0.09166954695792517 76968964.35690773 -0.09344979603126502



100%|██████████| 455/455 [00:00<00:00, 9139.84it/s]

100%|██████████| 910/910 [00:00<00:00, 179176.45it/s]
 14%|█▍        | 19/135 [1:22:55<8:05:27, 251.10s/it]

{'n_estimators': 100, 'random_state': 100}
3 0.7864312747992677 724295871.3244724 0.7802279333124462



100%|██████████| 455/455 [00:00<00:00, 9146.28it/s]

100%|██████████| 910/910 [00:00<00:00, 235402.53it/s]
 15%|█▍        | 20/135 [1:27:13<8:05:35, 253.35s/it]

{'n_estimators': 100, 'random_state': 100}
3 0.34402990162799896 1934524413.7460332 0.3415726241184739



100%|██████████| 455/455 [00:00<00:00, 9868.54it/s]

100%|██████████| 910/910 [00:00<00:00, 225860.50it/s]
 16%|█▌        | 21/135 [1:31:17<7:55:45, 250.40s/it]

{'n_estimators': 60, 'random_state': 100}
3 0.6317973294378293 141950822.78354567 0.6294583730311982



100%|██████████| 455/455 [00:00<00:00, 9593.03it/s]

100%|██████████| 910/910 [00:00<00:00, 224136.28it/s]
 16%|█▋        | 22/135 [1:35:22<7:48:29, 248.76s/it]

{'n_estimators': 30, 'random_state': 100}
3 0.7065253612385956 637683004.37263 0.686353262192235



100%|██████████| 455/455 [00:00<00:00, 9547.58it/s]

100%|██████████| 910/910 [00:00<00:00, 227557.18it/s]
 17%|█▋        | 23/135 [1:39:07<7:30:59, 241.60s/it]

{'n_estimators': 30, 'random_state': 100}
3 0.7293940196015289 960377346.7510754 0.7293020107174987



100%|██████████| 455/455 [00:00<00:00, 6316.64it/s]

100%|██████████| 910/910 [00:00<00:00, 132985.49it/s]

{'n_estimators': 60, 'random_state': 100}
3


 18%|█▊        | 24/135 [1:43:49<7:49:21, 253.71s/it]
100%|██████████| 455/455 [00:00<00:00, 10024.68it/s]

100%|██████████| 910/910 [00:00<00:00, 222282.72it/s]
 19%|█▊        | 25/135 [1:47:59<7:43:31, 252.83s/it]

 0.79273208135504 533677438.18989 0.7886006503378536
{'n_estimators': 60, 'random_state': 100}
3 0.4316064447383289 796667007.7486355 0.4220989830471702



100%|██████████| 455/455 [00:00<00:00, 8705.85it/s]

100%|██████████| 910/910 [00:00<00:00, 222373.38it/s]
 19%|█▉        | 26/135 [1:52:13<7:39:34, 252.97s/it]

{'n_estimators': 30, 'random_state': 100}
3 0.5864421337637824 809931740.9270011 0.5856290892340146



100%|██████████| 455/455 [00:00<00:00, 8109.91it/s]

100%|██████████| 910/910 [00:00<00:00, 170851.24it/s]
 20%|██        | 27/135 [1:56:47<7:46:43, 259.30s/it]

{'n_estimators': 100, 'random_state': 100}
3 0.6765193714677382 194364730.24691176 0.6662824129681655


## max_depth

In [None]:
order_lags = 3
#result = pd.DataFrame(columns=('radar', 'mse', 'r2', 'explained_variance', 'n_estimators'))
n_estimators = pd.read_csv('../dataset/result_mean_estimator.csv')
result = pd.DataFrame(columns=('radar', 'mse', 'r2', 'explained_variance', 'n_estimators', 'max_depth'))
for radar in tqdm(radars):
    df_forecasting = pd.read_csv('../dataset/regressor_mean/regressor_mean%i_%s.csv'%(order_lags, radar))
    n_esti = n_estimators[n_estimators['radar']==radar][['n_estimators']].values[0]
    #print(n_esti)
    pred, par = hyperparameter_tuning1(df_forecasting, coef, radar, n_esti, range(8, 16))
    pred = drop_altitude(pred)
    true = drop_altitude(df_forecasting['linear_eta'].values)
    test_y = true[(len(true)-len(pred)):]
    train_y = true[:(len(true)-len(pred))]
    #rfm.line_plot1(ts_1 = test_y, ts_label_1 = rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, title = '%s forecasting (testing data)'%rfm.radar, m = 'weather_lags5')
    #rfm.line_plot2(ts_1 = test_y, ts_label_1 = '%s observed'%rfm.radar, ts_2 = pred_y, ts_label_2 = '%s - Forecasting'%rfm.radar, ts_3 = train_y, ts_label_3 = '%s training'%rfm.radar, title = '%s forecasting(training + testing)'%rfm.radar, m = 'weather_lags%i'%j)
    var = explained_variance_score(test_y, pred)
    mse = mean_squared_error(test_y, pred)
    r2 = r2_score(test_y, pred)
    print(order_lags, var, mse, r2)
    result = result.append(pd.DataFrame({'radar':[radar],
                                         'mse':[np.round(mse, 3)],
                                         'r2':[np.round(r2, 3)],
                                         'explained_variance':[np.round(var, 3)],
                                         'n_estimators':n_esti, 
                                         'max_depth':[par['max_depth']]}),
                           ignore_index = True)
    result.to_csv('../dataset/result_mean_depth.csv', index = False)
    pd.DataFrame(pred).to_csv('../dataset/pred_mean/pred_mean_%i_opt_%s.csv'%(order_lags, radar), index = False)

# Configuration

In [22]:
# Generate the configuration file with best performance parameters
import json
def WriteJson(JsonFile, data, radars):
    with open(JsonFile,'w',encoding = 'GBK') as f1:
        str1 = {'lags':str(3), 'random_state':str(100)}
        f1.writelines(json.dumps(str1, ensure_ascii=False)+'\n')
        for i in range(len(data)):
            str1 ={'radar':data.iloc[i,0], 'n_estimators':str(data.iloc[i,4])}
                   #'max_depth':str(data.loc(radar, 'max_depth'))}
            f1.writelines(json.dumps(str1, ensure_ascii=False)+'\n')


In [23]:
radars = pd.read_csv('../dataset/radar.csv').values.reshape(1, -1)
radars = radars[0]
result = pd.read_csv('../dataset/result_mean_estimator.csv')
WriteJson('config.json', result, radars)