In [1]:
import pandas as pd
from prophet import Prophet
from pymongo import MongoClient
from prophet.plot import plot_plotly, plot_components_plotly
import os
import time
import pickle
from datetime import datetime
from dask_kubernetes import KubeCluster

Importing plotly failed. Interactive plots will not work.


# Global Model

In [3]:
total_records = 1064950

db = MongoClient("lattice-100", 27018)
collection = 'covid_county_formatted'
pipeline = [
    {
        "$sample": {"size": total_records//100}
    }
]
cursor = db.sustaindb[collection].aggregate(pipeline)
df_original = pd.DataFrame(list(cursor))

In [4]:
df = df_original
covid_features = ['cases', 'deaths']
unique_dates = df['date'].unique()
df_map = {}

for selected_feature in covid_features:
    df_s = df[['date', selected_feature]]
    sums = []
    for t in unique_dates:
        sum_ = df_s[df_s['date'] == t][selected_feature].sum()
        sums.append(sum_)
    df_sums = pd.DataFrame(list(zip(unique_dates, sums)), columns=['ds', 'y'])
#     df_means.index.name = 'ds'
    df_map[selected_feature] = df_sums
    
df_map['cases'].head()

Unnamed: 0,ds,y
0,2021-01-06,4306
1,2020-06-25,1334
2,2021-02-02,1767
3,2020-07-09,750
4,2020-11-10,1277


In [11]:
pickle.dump(df_map, open('pickles/covid/df_map.pkl', 'wb'))

In [5]:
# Start by loading pickled 'df_map'
df_map = pickle.load(open('pickles/covid/df_map.pkl', 'rb'))

In [6]:
# select one feature
df0 = df_map['cases']
df0.head()
df0.columns

Index(['ds', 'y'], dtype='object')

## Build Global Model

In [7]:
def predict(df_train):
    m = Prophet(
        daily_seasonality=True,
        weekly_seasonality=True,
        yearly_seasonality=True,
    )
    # model.fit(df, algorithm='LBFGS')
    fit_m = m.fit(df_train, algorithm='LBFGS')
    df_train_future = m.make_future_dataframe(periods=300)
    df_train_forecast = m.predict(df_train_future)

    return fit_m, df_train_future, df_train_forecast

fit_m, df_train, df_train_forecast = predict(df0)

Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        856.93   4.23956e-06       100.084      0.9923      0.9923      121   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     110       856.932   1.65885e-05       103.203   1.623e-07       0.001      192  LS failed, Hessian reset 
     136       856.934   3.36273e-08       100.163      0.2146      0.9771      225   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance


In [8]:
global_model = fit_m

In [9]:
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics

df0_cv = cross_validation(fit_m, initial= '100 days', period='100 days', horizon='100')
df0_p = performance_metrics(df0_cv)
df0_p.head()

INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


  0%|          | 0/3 [00:00<?, ?it/s]

Initial log joint probability = -4.06139
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      63       282.705    9.9467e-09        100.48       0.168       0.168       85   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
Initial log joint probability = -3.1626
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      74       612.982   6.55743e-09       100.557      0.2472      0.2472      103   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
Initial log joint probability = -3.1832
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       854.235   7.01577e-07       99.5517      0.5651      0.5651      130   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     131       854.235   3.46827e-08    

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,0 days 00:00:00.000000100,2100069.0,1449.161391,1001.159585,1.415313,0.587237,0.733411,0.666667


In [10]:
model = fit_m
print(f'seasonality_prior_scale: {model.seasonality_prior_scale}')
print(f'changepoint_prior_scale: {model.changepoint_prior_scale}')
print(f'uncertainty_samples: {model.uncertainty_samples}')
print(f'seasonlity_mode: {model.seasonality_mode}')
print(f'interval_width: {model.interval_width}')
print(f'growth: {model.growth}')

seasonality_prior_scale: 10.0
changepoint_prior_scale: 0.05
uncertainty_samples: 1000
seasonlity_mode: additive
interval_width: 0.8
growth: linear


## GridSearch on Global Model (Bare-Metal Dask)

In [11]:
import itertools
from dask.distributed import Client
from dask_kubernetes import KubeCluster

USE_KUBERNETES = False

if USE_KUBERNETES:
    client = Client('localhost:9000')  
else:
    client = Client('lattice-150:8786')    

def grid_search(df):
    param_grid = {  
        'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
        'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    }

    # Generate all combinations of parameters
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
    rmses = []  # Store the RMSEs for each params here

    time1 = time.monotonic()
    # Use cross validation to evaluate all parameters
    for params in all_params:
        m = Prophet(**params).fit(df)  # Fit model with given params
        df_cv = cross_validation(m, initial= '100 days', period='100 days', horizon='100', parallel="dask")
        df_p = performance_metrics(df_cv, rolling_window=1)
        rmses.append(df_p['rmse'].values[0])

    # Find the best parameters
    tuning_results = pd.DataFrame(all_params)
    tuning_results['rmse'] = rmses

    print(f'Time: {time.monotonic() - time1}')
    
    optimal_params = tuning_results[tuning_results['rmse'] == min(tuning_results['rmse'])]
    rmse = optimal_params['rmse'].iloc[0]
    changepoint_prior_scale = optimal_params['changepoint_prior_scale'].iloc[0]
    seasonality_prior_scale = optimal_params['seasonality_prior_scale'].iloc[0]
    
    return rmse, changepoint_prior_scale, seasonality_prior_scale


time1 = time.monotonic()
rmse, changepoint_prior_scale, seasonality_prior_scale = grid_search(df0)
print(f'Time to complete Grid Search on Global Model: {time.monotonic() - time1} s')

# Optimal values
print(f'rmse: {rmse}')
print(f'changepoint_prior_scale: {changepoint_prior_scale}')
print(f'seasonality_prior_scale: {seasonality_prior_scale}')

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      62       806.397   4.00199e-09       5030.43      0.1458      0.1458       77   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       811.427   2.00898e-06       5000.87           1           1      125   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     105        811.44   1.41032e-06       4656.05   2.678e-10       0.001      172  LS failed, Hessian reset 
     122       811.445   9.36789e-09       4758.17      0.3158      0.3158      196   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      92       810.936   2.66325e-06       4967.84   5.612e-10       0.001      159  LS failed, Hessian reset 
      99       810.947   8.69794e-07       4891.53           1           1      168   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     120       810.952   2.25599e-07       5079.27   4.441e-11       0.001      244  LS failed, Hessian reset 
     132       810.953   6.29935e-09       4902.75      0.4915      0.4915      260   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       810.892   5.54362e-05       4951.63           1           1      126   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     137       810.949   7.57771e-09        4812.5      0.5136      0.5136      173   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      74       824.468   9.17283e-05       479.339   1.878e-07       0.001      139  LS failed, Hessian reset 
      99       824.493   1.80661e-07        501.77       0.514       0.514      170   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     105        824.51   3.63466e-05       491.967   6.737e-08       0.001      212  LS failed, Hessian reset 
     154       824.533   4.67361e-06       452.291   9.713e-09       0.001      315  LS failed, Hessian reset 
     189       824.539   9.41874e-09       452.771      0.3045      0.3045      354   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       825.369   1.01675e-06       482.762           1           1      135   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     117       825.371   7.34346e-07       490.355    1.52e-09       0.001      192  LS failed, Hessian reset 
     134       825.371   6.70705e-09       435.591      0.4174      0.4174      214   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       824.222   2.20651e-05       537.163           1           1      134   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     102       824.288   0.000128774       502.206   2.393e-07       0.001      188  LS failed, Hessian reset 
     148       824.329   9.78578e-09       429.633      0.3811      0.3811      247   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       824.233   2.82186e-07       450.878      0.7254      0.7254      136   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     132       824.294   8.64723e-05       397.734   1.465e-07       0.001      223  LS failed, Hessian reset 
     176       824.323   1.38468e-07       471.861   2.617e-10       0.001      319  LS failed, Hessian reset 
     181       824.323   2.83265e-09       393.384      0.1791      0.1791      325   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      65       836.226   0.000432276       64.6011   6.372e-06       0.001      114  LS failed, Hessian reset 
      99       836.926    0.00014467       34.0437           1           1      158   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     135       837.141   0.000594224       79.9412   1.253e-05       0.001      230  LS failed, Hessian reset 
     199       837.414   0.000246931       38.0865      0.8171      0.8171      307   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     276       838.173   0.000330166       35.4948   7.461e-06       0.001      433  LS failed, Hessian reset 
     299       838.187    1.6855e-05       39.6944      0.9653     0.09653      465   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes

INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      68       836.716    0.00175951       102.742    2.03e-05       0.001      113  LS failed, Hessian reset 
      99       837.259   0.000144873       45.0044      0.2404      0.2404      149   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       838.685   7.40736e-06       40.4284           1           1      275   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     239       838.686   9.84509e-08       33.3462           1           1      328   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       837.513     0.0007721       41.7448      0.8241      0.8241      130   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     189       838.674   1.18651e-07       38.1845        1.09      0.3779      246   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance


INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       837.361    0.00151852       43.0247           1           1      124   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       838.585   0.000133768       40.3343      0.2437           1      250   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     262       838.603   0.000174922       31.5939   3.597e-06       0.001      374  LS failed, Hessian reset 
     299       838.621   6.63169e-06       39.5633      0.4389      0.4389      420   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     325       838.624   4.81453e-05       48.3816   1.164e-06       0.001      490  LS failed, Hessian reset 
     394       838.666    0.00063198       44.5587   1.313e-05       0.001      613  LS failed, Hessian reset

INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       847.855    0.00343583       57.1956           1           1      122   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     193       849.571   0.000231619       24.7055   1.164e-05       0.001      287  LS failed, Hessian reset 
     199       849.607    0.00335516       28.9583           1           1      293   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     240       849.733    0.00110678       17.4783   0.0001055       0.001      380  LS failed, Hessian reset 
     299       850.023    0.00499694       13.4124           1           1      448   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399       850.472   0.000489891       24.7422      0.8564      0.8564      570   
    Iter      log prob

INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       848.752    0.00366129       15.5414       1.458      0.1458      126   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     157       849.617   0.000307224       35.3031   9.844e-06       0.001      228  LS failed, Hessian reset 
     199       850.214    0.00110307       16.5534      0.5194      0.5194      274   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       850.605    0.00412871       80.4802           1           1      394   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     395       851.007   0.000358175       21.7618   2.591e-05       0.001      538  LS failed, Hessian reset 
     399       851.009   0.000155607       15.2653           1           1      542   
    Iter      log prob

INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       848.372    0.00912565       28.9317           1           1      114   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       849.929    0.00427142       32.5392      0.7994      0.7994      243   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       850.735    0.00376087       33.0263       0.391           1      363   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     364       851.002   9.87863e-05       14.8234   8.335e-06       0.001      498  LS failed, Hessian reset 
     399       851.027    0.00120007       16.9904           1           1      538   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     419       851.033   9.45736e-05     

INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:Making 3 forecasts with cutoffs between 2020-08-18 23:59:59.999999900 and 2021-03-06 23:59:59.999999900


Initial log joint probability = -3.1737
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99         848.6    0.00297262       26.3962           1           1      118   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       850.048    0.00231131       13.3927           1           1      228   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       850.699    0.00867182       32.4437           1           1      351   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     311       850.807   0.000422346       31.7506   1.939e-05       0.001      403  LS failed, Hessian reset 
     399       851.027   0.000728181       9.46524       3.245      0.3245      509   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     458       851.046   1.31902e-05     

INFO:prophet:Applying in parallel with <Client: 'tcp://129.82.208.160:8786' processes=24 threads=384, memory=1.47 TiB>


Time: 201.9827385660028
Time to complete Grid Search on Global Model: 201.98487584700342 s
rmse: 1365.082761731964
changepoint_prior_scale: 0.5
seasonality_prior_scale: 10.0


In [12]:
gis_joins = df_original['GISJOIN'].unique()
print(f'#GISJOINs: {len(gis_joins)}')

#GISJOINs: 2993


## Dictionary of GISJOIN --> DF

In [34]:
import dask
selected_feature = 'cases'
child_dfs = {}

def get_df_by_gis_join(gis_join):
    print(gis_join, end=' ')
    cursor = db.sustaindb[collection].aggregate([{"$match": {"GISJOIN": gis_join}}])
    df = pd.DataFrame(list(cursor))[['date', selected_feature]]
    df.columns = ['ds','y']
    return df

futures = []
for gis_join in gis_joins:
    try:
        child_dfs[gis_join] = get_df_by_gis_join(gis_join)
    except:
        print(f'Error on {gis_join}')

G5000030 G5501210 G2100750 G4100490 G4500150 G2900510 G3700710 G5101250 G4803370 G2400450 G2901810 G5400050 G2400150 G5400090 G2601570 G2902090 G4000190 G3900330 G3200070 G4900190 G0201500 G2800570 G3100190 G1800050 G4100250 G2300250 G3800590 G3100430 G2500010 G4802030 G4802550 G5107400 G2800910 G3100770 G1800510 G4802070 G4700990 G0101150 G1701690 G5106000 G4701890 G4201090 G3901010 G2000910 G1701950 G4601370 G2100710 G4200050 G2800830 G2600290 G3701990 G3200150 G5300570 G3701270 G2600070 G3900350 G2700210 G1700270 G4801170 G3000210 G2200910 G2300210 G5400910 G4600950 G2200710 G2100890 G2200470 G2900690 G5100250 G0100450 G2701490 G4701730 G2100230 G2200970 G4500030 G5400570 G3000770 G3101210 G3000910 G3900630 G2801250 G1701170 G3000710 G1901350 G5101950 G4601270 G4800770 G3300010 G4800030 G2600330 G3600250 G3400090 G3900870 G4803090 G4701630 G1700930 G2101330 G3900550 G5101130 G4800510 G2900810 G1302110 G3100670 G3500470 G3701830 G1901650 G4804710 G2400430 G4500590 G3800090 G1701890 G

In [14]:
# pickle.dump(child_dfs, open('pickles/covid/child_dfs.pkl', 'wb'))
child_dfs = pickle.load(open('pickles/covid/child_dfs.pkl', 'rb'))

In [15]:
child_dfs

{'G5000030':              ds   y
 0    2020-03-24   5
 1    2020-03-31   1
 2    2020-04-01   0
 3    2020-03-19   1
 4    2020-04-22   0
 ..          ...  ..
 361  2021-01-11  11
 362  2021-01-15  24
 363  2021-01-18  14
 364  2021-02-15  22
 365  2021-02-20   8
 
 [366 rows x 2 columns],
 'G5501210':              ds    y
 0    2020-04-06    0
 1    2020-06-12    0
 2    2020-06-14    1
 3    2020-10-06   20
 4    2020-10-13   20
 ..          ...  ...
 335  2020-11-17  120
 336  2020-11-26   32
 337  2021-01-07   17
 338  2021-01-20    9
 339  2021-02-04    6
 
 [340 rows x 2 columns],
 'G2100750':              ds  y
 0    2020-04-21  0
 1    2020-05-10  0
 2    2020-05-01  0
 3    2020-04-20  0
 4    2020-04-27  0
 ..          ... ..
 318  2021-02-03  3
 319  2021-02-08  0
 320  2021-02-17  0
 321  2021-02-25  2
 322  2021-03-07  0
 
 [323 rows x 2 columns],
 'G4100490':              ds  y
 0    2020-04-03  1
 1    2020-04-13  0
 2    2020-04-15  0
 3    2020-04-23  0
 4    2020-05-0

In [16]:
class TLModel:    
    def __init__(self, gis_join, time, model):
        self.rmse = rmse
        self.gis_join = gis_join
        self.time = time
        self.model = model
        
    def __str__(self):
        return f'{self.gis_join}: TLModel(time={self.time})'
    
class NonTLModel:
    def __init__(self, gis_join, time):
        self.gis_join = gis_join
        self.time = time
        
    def __str__(self):
        return f'{self.gis_join}: NonTLModel(time={self.time})'

# Child Models (Non-TL)

In [36]:
import time

non_tl_models = {}
counter = 1
for gis_join in list(child_dfs.keys())[:200]:
    time1 = time.monotonic()
    predict(child_dfs[gis_join])
    time2 = time.monotonic() - time1
    print(f'{counter} - {gis_join}: {time2} seconds')
    non_tl_models[gis_join] = NonTLModel(gis_join, time2)
    counter += 1

Initial log joint probability = -7.58523
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       698.183   6.77796e-06       98.9018           1           1      129   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     110       698.191   5.22915e-05       91.2124    5.17e-07       0.001      179  LS failed, Hessian reset 
     156       698.194   7.83914e-09       100.763      0.2164      0.2164      237   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
1 - G5000030: 4.2993900801520795 seconds
Initial log joint probability = -5.69869
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       727.912   2.09887e-06        99.463           1           1      132   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     100       727.912   4.27019

16 - G2902090: 4.386342637008056 seconds
Initial log joint probability = -3.95016
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      71       779.905   2.66543e-05        99.977   2.634e-07       0.001      128  LS failed, Hessian reset 
      94       779.908   5.66569e-09        101.07      0.1214      0.1214      160   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
17 - G4000190: 4.44537535100244 seconds
Initial log joint probability = -11.5151
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      93       638.468    8.3532e-05       101.906   8.147e-07       0.001      158  LS failed, Hessian reset 
      99       638.474   1.00394e-05       99.8791      0.5769      0.5769      164   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     153       638.495   2.82494e-08       101.288      0.30

34 - G3100770: 4.39449835778214 seconds
Initial log joint probability = -11.4212
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      89       660.928   4.82441e-07       102.805   4.524e-09       0.001      153  LS failed, Hessian reset 
      99       660.928   3.07782e-08       97.3162      0.4116           1      166   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     102       660.928   2.98019e-08       84.8771      0.5698      0.5698      169   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
35 - G1800510: 4.341004014015198 seconds
Initial log joint probability = -2.65176
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      65       687.456   1.48187e-08       104.159      0.3005           1       83   
Optimization terminated normally: 
  Convergence detected: relative gradient magnit

52 - G3200150: 4.389165328815579 seconds
Initial log joint probability = -3.43913
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      97       827.814   8.39166e-06       104.665   8.027e-08       0.001      198  LS failed, Hessian reset 
      99       827.814   7.47719e-06       104.367           1           1      200   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     154       827.822   2.00251e-08       93.0911      0.2825           1      269   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
53 - G5300570: 4.15404160390608 seconds
Initial log joint probability = -3.94271
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      83       811.471   1.04905e-05       99.8162   1.036e-07       0.001      146  LS failed, Hessian reset 
      99       811.472   1.83011e-07       99.2204      0.3

66 - G2100890: 4.362809994025156 seconds
Initial log joint probability = -6.12012
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      62         556.4   2.60101e-08         96.19      0.2879           1       84   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
67 - G2200470: 3.9751546517945826 seconds
Initial log joint probability = -4.56044
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      79       711.279   1.31491e-08       99.5743      0.2902           1      104   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
68 - G2900690: 4.288648869143799 seconds
Initial log joint probability = -4.64126
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      71       645.823   9.30779e-09        102.96       0.336       0.336       93   
Opti

86 - G4601270: 4.324560853186995 seconds
Initial log joint probability = -6.25133
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      67       568.102   6.05395e-09       98.5522      0.2204           1       92   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
87 - G4800770: 4.290536785963923 seconds
Initial log joint probability = -10.4436
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      73       646.313   2.27737e-08         97.49      0.2887           1       98   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
88 - G3300010: 4.164143719011918 seconds
Initial log joint probability = -11.5699
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      65       394.693   8.69869e-09       100.395       0.459           1       92   
Optimi

# Child Models (TL)

In [17]:
def predict_transfer(df_train):
    time1 = time.monotonic()
    # initilaize model with hyperparameters from parent model
    m = Prophet(
        seasonality_prior_scale = 10.0,
        changepoint_prior_scale = 0.5,
    )
    m.fit(df_train, algorithm='LBFGS')
    df_train_future = m.make_future_dataframe(periods=300, freq='H')
    df_train_forecast = m.predict(df_train_future)
    
    time2 = time.monotonic()

    return m, df_train_future, df_train_forecast, (time2 - time1)


def predict_transfer_task(df_train, gis_join):
    m, df_train_future, df_train_forecast, time_taken = predict_transfer(df_train)
    return gis_join, time_taken



# tl_models = {}
# counter = 1
# for gis_join in list(child_dfs.keys())[:200]:
#     time1 = time.monotonic()
#     m, df_train_future, df_train_forecast = predict_transfer(child_dfs[gis_join])
#     time2 = time.monotonic() - time1
#     print(f'{counter} - {gis_join}: {time2} seconds')
#     tl_models[gis_join] = TLModel(gis_join, time2, m)
#     counter += 1

import dask
client = Client('lattice-150:8786')

counter = 1
lazy_results = []
for gis_join, df_ in child_dfs.items():
    try:
        lazy_result = dask.delayed(predict_transfer_task)(df_, gis_join)
        lazy_results.append(lazy_result)
    except:
        print(f'Error on {gis_join}')
    if counter % 100 == 0:
        print(counter, end=', ')
    counter += 1
#     break

futures = dask.persist(*lazy_results)  # trigger computation in the background
results = dask.compute(*futures)
results[:5]

100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 

(('G5000030', 126.54635917698033),
 ('G5501210', 127.66774946311489),
 ('G2100750', 124.80110791604966),
 ('G4100490', 127.3607460160274),
 ('G4500150', 127.69638833892532))

In [20]:
gis_joins = []
tl_times = []

for r, t in results:
    gis_joins.append(r)
    tl_times.append(t)
    
df_tl_times = pd.DataFrame(zip(gis_joins, tl_times), columns=['GISJOIN', 'tl-time'])
df_tl_times

Unnamed: 0,GISJOIN,tl-time
0,G5000030,126.546359
1,G5501210,127.667749
2,G2100750,124.801108
3,G4100490,127.360746
4,G4500150,127.696388
...,...,...
2876,G0600170,128.917248
2877,G3300190,128.618757
2878,G4200690,128.865729
2879,G1301210,126.154915


## Collect Times taken to build Models

In [31]:
import pandas as pd

non_tl_times = [x.time for x in non_tl_models.values()]

df_non_tl = pd.DataFrame(list(zip(non_tl_models.keys(), non_tl_times)), columns=['GISJOIN', 'Time'])
df_non_tl.to_csv('covid_non_tl_times.csv', index=False)

In [18]:
tl_times = [x.time for x in tl_models.values()]

df_non_tl = pd.DataFrame(list(zip(tl_models.keys(), tl_times)), columns=['GISJOIN', 'Time'])
df_non_tl.to_csv('covid_tl_times_all.csv', index=False)

NameError: name 'tl_models' is not defined