In [1]:
import pandas as pd
import numpy as np
import csv
import statsmodels.api as sm
import warnings
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from itertools import combinations
from scipy import stats
from datetime import datetime
from sklearn.metrics import mean_absolute_error
from datetime import datetime
from dateutil.relativedelta import relativedelta
warnings.filterwarnings("ignore")

file = '../data/manipulated_data/rainfalldata.csv'
rd = pd.read_csv(file)
file2 = '../data/manipulated_data/ncrainfalldata.csv'
ncrd = pd.read_csv(file2)
rd.Date = pd.to_datetime(rd.Date)
rd = rd.set_index('Date')
ncrd.Date = pd.to_datetime(ncrd.Date)
ncrd = ncrd.set_index('Date')

In [8]:
# this cell takes the stored exogen dictionary that is stored in the Data_Wrangling_CAP1 jupyter notebook
# that was imported above.
%store -r exogen


In [2]:
def sarima_model_creation(data, p, d, q, P, D, Q, m, exog=None):
    my_order = [p,d,q]
    my_sorder = [P,D,Q,m]
    sarimamod = sm.tsa.statespace.SARIMAX(data, exog, order=my_order, seasonal_order=my_sorder, 
                                          enforce_stationarity=False, enforce_invertibility=False,
                                          initialization='approximate_diffuse')
    model_fit = sarimamod.fit()# start_params=[0, 0, 0, 0, 1])
    return(model_fit)

In [6]:
def hyperparameter_find(training_data, comb, testing_data, search = False, exogtr = None, exogtest = None):
    leastmae = 1000
    for com in tqdm(comb):
        li_one_step = []
        for i in tqdm(range(len(testing_data))):
            if i == 0:
                copytraining = training_data.copy()
                if exogtr is not None:
                    excopy = exogtr.copy()
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, 
                                                  com[3], 12, exog=excopy)
                    one_step_pred = mod_1.forecast(exog=excopy.iloc[[-1]]) #uses the data from the year before
                    excopy = pd.concat([excopy, exogtest.iloc[[i]]])
                else:
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, com[3], 12)
                    one_step_pred = mod_1.forecast()
                li_one_step.append(one_step_pred[0])
                copytraining = pd.concat([copytraining, testing_data[[i]]])
            else:
                if exogtr is not None:
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, 
                                                  com[3], 12, exog=excopy)
                    one_step_pred2 = mod_1.forecast(exog=excopy.iloc[[-1]])
                    excopy = pd.concat([excopy, exogtest.iloc[[i]]])
                else:
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, com[3], 12)
                    one_step_pred2 = mod_1.forecast()
                li_one_step.append(one_step_pred2[0])
                copytraining = pd.concat([copytraining, testing_data[[i]]])
        mae = mean_absolute_error(testing_data, li_one_step)
        if search is True:
            if mae < leastmae:
                leastmae = mae
                H_AR = com[0]
                H_MA = com[1]
                H_SAR = com[2]
                H_SMA = com[3]
            print(com,mae)            
    if search is True:
        return('AR: '+ str(H_AR), 'MA: ' +str(H_MA), 'SAR: '+str(H_SAR), 'SMA: '+str(H_SMA))
    else:
        return(mae)

In [6]:
def exog_combinations(df, exoe):
    lo_dfs = []
    if len(exoe) == 1:
        lo_dfs.append(df.loc[:,exoe])
    if len(exoe) > 1:
        lo_dfs.append(df.loc[:,exoe])
        for ex in exoe:
            lo_dfs.append(df.loc[:,[ex]])
        if len(exoe) >2:
            for i in range(2, len(exoe)):
                combolist = list(combinations(exoe,i))
                for c in combolist:
                    lo_dfs.append(df.loc[:,c])
    return(lo_dfs)


In [7]:
todokeys = ('TAPOCO, NC', 'TRYON, NC', 'WAYNESVILLE 1 E, NC', 'Boone 1 SE, NC', 'DANBURY, NC', 'EDEN, NC', ' MOUNT AIRY 2 W, NC', 'REIDSVILLE 2 NW, NC', 'HAYESVILLE 1 NE, NC', 'MURPHY 4ESE, NC', ' KING, NC')
sub_exogen = {k: exogen[k] for k in todokeys}

In [8]:
from collections import defaultdict
l_o_dfs = defaultdict(list)
for key,value in tqdm(sub_exogen.items()):
    lo_dfs2 = exog_combinations(rd, value)
    l_o_dfs[key] = lo_dfs2
# l_o_dfs['ROBBINSVILLE AG 5 NE, NC']

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))




In [9]:
def exogenous_var(data, ncloc, l_exoloc, best_comb):
    dat = data[ncloc]
    tr, test = train_test_split(dat, test_size = 0.2, shuffle=False)
    keymae = hyperparameter_find(tr, best_comb, test)
    print('keymae of: '+ key +' = '+str(keymae))
    bettermae = {}
    for exog in tqdm(l_exoloc):
        extr, extest = train_test_split(exog, test_size = 0.2, shuffle=False)
        exmae = hyperparameter_find(tr, best_comb, test, exogtr=extr, exogtest = extest)
        co = tuple(exog.columns)
        print('exmae = {}'.format(co) + ' '+ str(exmae))
        if exmae < keymae:
            bettermae[co] = exmae
            bettermae2 = {key: bettermae}
    return(co)

In [10]:
best_comb = [[4,3,3,4]]
warnings.filterwarnings("ignore")
for key,value in tqdm(l_o_dfs.items()):
    exogenous_var(rd, key, value, best_comb)

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

keymae of: TAPOCO, NC = 0.9800673566131274


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN', 'NEWFOUND GAP, TN', ' TOWNSEND 5S, TN') 1.5764560295913925


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN',) 1.4972163335360191


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('NEWFOUND GAP, TN',) 1.5501456737516661


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = (' TOWNSEND 5S, TN',) 2.1402191932997416


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN', 'NEWFOUND GAP, TN') 1.5489398080891763


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN', ' TOWNSEND 5S, TN') 1.616671690504988


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('NEWFOUND GAP, TN', ' TOWNSEND 5S, TN') 2.436971933451168


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

keymae of: TRYON, NC = 2.6823507498407078


HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.731134415701127


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC',) 2.986931810013167


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC',) 3.1072872524201767


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC',) 2.815911182694172


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CLEVELAND 3S, SC',) 3.077446176795452


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('SPARTANBURG 3 SSE, SC',) 2.812649001967126


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC') 2.942549753368086


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC') 2.8608751566191


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CLEVELAND 3S, SC') 3.049632259088924


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'SPARTANBURG 3 SSE, SC') 2.9144776099202763


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC') 2.9762863578022682


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CLEVELAND 3S, SC') 3.20189310778487


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'SPARTANBURG 3 SSE, SC') 3.096819228892839


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 2.9903458656920554


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.890486851482266


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.0436085745720485


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC') 2.9033859279655085


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CLEVELAND 3S, SC') 3.154525441526979


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'SPARTANBURG 3 SSE, SC') 2.9314747667962813


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 2.9713486547381884


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.86854756656046


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.0161890393875477


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 2.99567340879987


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.9370572008592566


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.277737214007516


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 2.9667226323555127


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 3.190416268016882


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.8771958771274124


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.0292999192163945


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 2.975167876273045


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.258738835983667


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

keymae of: WAYNESVILLE 1 E, NC = 1.747482139423062


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('MT LECONTE, TN', 'NEWFOUND GAP, TN') 2.2339993455520246


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('MT LECONTE, TN',) 2.067047382901454


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('NEWFOUND GAP, TN',) 2.262076438940648


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

MemoryError: 

In [3]:
with_exogs = ['WHITEVILLE 7 NW, NC', 'CASAR, NC', 'FOREST CITY 8 W, NC', 'GASTONIA, NC', 'LAKE LURE 2, NC', 
                       'ELIZABETHTOWN, NC', ' MOUNT HOLLY 4 NE, NC','GRANDFATHER MTN, NC']
ncrd2 = ncrd.copy()
ncrd_less = ncrd2.drop(with_exogs,axis=1)

In [10]:
def prediction_fx(data, begin, end):
    base = datetime.strptime(begin,'%Y-%m-%d')
    date_list = [base + relativedelta(months=x) for x in range(600)]
    prediction1_df = pd.DataFrame(index=date_list)
    for col in tqdm(data.columns):
        loc = data[col]
        mod_fit1 = sarima_model_creation(loc, 4,0,3,3,0,4,12)
        point_predictions = pd.DataFrame(mod_fit1.predict(start=begin, end=end), columns=[col])
        future_pred1 = mod_fit1.get_prediction(start=begin, end=end)
        future_pred1_ci = future_pred1.conf_int(alpha=0.5)
        point_predictions_df = pd.merge(point_predictions, future_pred1_ci, left_index=True, right_index=True)
        prediction1_df = pd.merge(prediction1_df, point_predictions_df, left_index=True, right_index=True)
    return(prediction1_df)

In [11]:
pre_df = prediction_fx(ncrd_less, '2019-05-01', '2069-05-01')
pre_df.head(10)

HBox(children=(IntProgress(value=0, max=104), HTML(value='')))




Unnamed: 0,"Raleigh, NC","lower Raleigh, NC","upper Raleigh, NC","Fayetteville, NC","lower Fayetteville, NC","upper Fayetteville, NC","Albemarle, NC","lower Albemarle, NC","upper Albemarle, NC","Arcola, NC",...,"upper YADKINVILLE 6 E, NC","HAYESVILLE 1 NE, NC","lower HAYESVILLE 1 NE, NC","upper HAYESVILLE 1 NE, NC","MURPHY 4ESE, NC","lower MURPHY 4ESE, NC","upper MURPHY 4ESE, NC","KING, NC","lower KING, NC","upper KING, NC"
2019-05-01,3.366287,1.881736,4.850837,3.111657,1.523134,4.700181,3.461614,1.960157,4.963072,4.101742,...,5.865633,6.052865,4.681224,7.424506,6.054696,4.605734,7.503659,4.555015,3.238458,5.871571
2019-06-01,4.311068,2.826038,5.796097,4.639369,3.038618,6.24012,4.748512,3.229404,6.267621,4.453031,...,5.482044,5.717889,4.3323,7.103479,6.150096,4.690559,7.609633,3.477236,2.154377,4.800094
2019-07-01,6.314961,4.828354,7.801568,4.736273,3.135275,6.33727,5.211461,3.682058,6.740863,4.030026,...,6.198524,6.071343,4.673333,7.469353,5.999118,4.534753,7.463484,4.060046,2.734256,5.385835
2019-08-01,4.145673,2.653229,5.638117,5.171562,3.569744,6.77338,4.538334,3.002113,6.074555,3.948713,...,6.188571,5.126533,3.712651,6.540416,4.852656,3.379151,6.326161,4.301428,2.974102,5.628753
2019-09-01,4.875324,3.382859,6.36779,4.613723,3.009782,6.217664,3.479564,1.943112,5.016015,4.547796,...,5.482877,5.06393,3.647114,6.480747,4.718284,3.234249,6.202319,4.442798,3.114995,5.770601
2019-10-01,4.620644,3.125386,6.115903,3.449842,1.84566,5.054025,3.654706,2.117471,5.191941,4.270212,...,4.877054,4.244699,2.824788,5.664609,3.892131,2.403399,5.380862,4.220072,2.891967,5.548176
2019-11-01,2.836573,1.338815,4.334331,2.833024,1.22844,4.437609,3.578847,2.041373,5.116321,4.145416,...,4.565123,5.128604,3.708378,6.548829,4.46293,2.971968,5.953892,4.280093,2.951938,5.608247
2019-12-01,3.028145,1.52985,4.52644,3.043657,1.438876,4.648439,3.945386,2.407895,5.482877,4.089217,...,5.244915,5.843728,4.423051,7.264404,5.297449,3.806008,6.78889,4.52121,3.193,5.849421
2020-01-01,3.99628,2.495186,5.497373,2.90744,1.302569,4.512311,3.020206,1.482598,4.557815,3.895951,...,4.858702,5.472307,4.051615,6.892998,4.975608,3.483947,6.467269,4.132021,2.803813,5.460228
2020-02-01,2.822907,1.321267,4.324546,2.760683,1.155748,4.365618,3.227472,1.689957,4.764987,3.833445,...,4.400556,5.590564,4.16983,7.011297,5.443202,3.951571,6.934832,4.021765,2.693549,5.349981


In [5]:
exo_var_dict2 = {
    'WHITEVILLE 7 NW, NC': rd[[' LORIS 2 S, SC']],
    'CASAR, NC': rd[['GAFFNEY 6 E, SC']],
    'FOREST CITY 8 W, NC': rd[['GAFFNEY 6 E, SC']],
    'GASTONIA, NC': rd[['FORT MILL 4 NW, SC','GAFFNEY 6 E, SC']],
    'LAKE LURE 2, NC': rd[['CHESNEE 7 WSW, SC']],
    ' MOUNT HOLLY 4 NE, NC': rd[['CHESNEE 7 WSW, SC','GAFFNEY 6 E, SC']],
    'ELIZABETHTOWN, NC': rd[[' LORIS 2 S, SC']],
    'GRANDFATHER MTN, NC': rd[['ELIZABETHTON, TN']]
    
}

In [6]:
def prediction_exog_fx2(data, exog_dict, begin, end):
    base = datetime.strptime(begin,'%Y-%m-%d')
    date_list = [base + relativedelta(months=x) for x in range(600)]
    prediction_df = pd.DataFrame(index = date_list)
    pred_val_df = pd.DataFrame(index = date_list)
    exog_predictions_df = pd.DataFrame(index = date_list)
    for key,value in tqdm(exog_dict.items()):
        loc = data[key]
        mod_fit1 = sarima_model_creation(loc, 4,0,3,3,0,4, 12,exog=value)
        if value.shape[1] > 1:
            shap = value.shape[1]
            for i in range(shap):
                exog_mod_fit = sarima_model_creation(value.iloc[:,i],4,0,3,3,0,4,12)
                e_preds2 = pd.DataFrame(exog_mod_fit.predict(start=begin, end=end))
                if i is 0:
                    exog_predictions_df = e_preds2
                else:
                    exog_predictions_df = pd.merge(exog_predictions_df, e_preds2, left_index=True, 
                                                   right_index=True)
        else:
            exog_mod_fit = sarima_model_creation(value, 4,0,3,3,0,4,12)
            exog_predictions_df = pd.DataFrame(exog_mod_fit.predict(start=begin, end=end))
        future_pred = mod_fit1.get_prediction(exog=exog_predictions_df,start=begin, end=end)
        future_pred_ci = future_pred.conf_int(alpha=0.5)
        future_pred_val= pd.DataFrame(mod_fit1.predict(exog=exog_predictions_df, start=begin, end=end), 
                                      columns = [key])
        future_pred_full = pd.merge(future_pred_val, future_pred_ci, left_index=True, right_index=True)
        prediction_df = pd.merge(prediction_df, future_pred_full, left_index=True, right_index=True)
    return(prediction_df)

In [7]:
e_ci_df = prediction_exog_fx2(rd, exo_var_dict2, '2019-05-01', '2069-05-01')

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))




In [8]:
e_ci_df.head(10)

Unnamed: 0,"WHITEVILLE 7 NW, NC","lower WHITEVILLE 7 NW, NC","upper WHITEVILLE 7 NW, NC","CASAR, NC","lower CASAR, NC","upper CASAR, NC","FOREST CITY 8 W, NC","lower FOREST CITY 8 W, NC","upper FOREST CITY 8 W, NC","GASTONIA, NC",...,"upper LAKE LURE 2, NC","MOUNT HOLLY 4 NE, NC","lower MOUNT HOLLY 4 NE, NC","upper MOUNT HOLLY 4 NE, NC","ELIZABETHTOWN, NC","lower ELIZABETHTOWN, NC","upper ELIZABETHTOWN, NC","GRANDFATHER MTN, NC","lower GRANDFATHER MTN, NC","upper GRANDFATHER MTN, NC"
2019-05-01,5.753516,4.268563,7.23847,3.591515,2.521395,4.661635,3.899437,2.865787,4.933087,4.178921,...,8.308546,3.718346,2.804238,4.632454,4.539367,3.042719,6.036016,7.329324,5.403461,9.255187
2019-06-01,3.724518,2.227311,5.221724,4.899144,3.793062,6.005226,4.043647,2.995293,5.092002,3.854757,...,6.631163,3.893037,2.974888,4.811186,4.742576,3.212933,6.272219,6.904584,4.970852,8.838316
2019-07-01,5.557662,4.057042,7.058282,4.932119,3.824471,6.039766,4.229915,3.177496,5.282334,3.854248,...,6.500716,3.564906,2.645156,4.484657,5.759743,4.228032,7.291454,7.503359,5.569007,9.43771
2019-08-01,6.962032,5.457507,8.466557,4.510153,3.402346,5.617961,4.273738,3.220887,5.32659,3.864488,...,7.107185,3.462391,2.535286,4.389496,6.681779,5.150062,8.213496,6.57049,4.636118,8.504862
2019-09-01,5.759614,4.254405,7.264822,4.223978,3.115213,5.332743,4.151798,3.098847,5.20475,3.730619,...,7.046454,3.215256,2.288001,4.14251,7.964787,6.432967,9.496607,5.825237,3.890625,7.759848
2019-10-01,3.492725,1.984399,5.001051,3.624053,2.515007,4.733099,3.761492,2.708524,4.814459,3.63001,...,6.591657,3.428503,2.50108,4.355927,5.165368,3.63351,6.697226,5.788173,3.853509,7.722838
2019-11-01,3.845209,2.334046,5.356373,4.54598,3.436555,5.655404,4.187163,3.134137,5.240188,3.811119,...,6.136522,3.288401,2.356889,4.219913,4.493145,2.961288,6.025003,5.711345,3.776455,7.646235
2019-12-01,3.647705,2.136544,5.158867,4.362686,3.252763,5.472608,3.789722,2.7367,4.842743,3.485347,...,6.744548,3.782379,2.850735,4.714022,4.593129,3.061223,6.125036,6.099219,4.164232,8.034206
2020-01-01,2.551145,1.037041,4.06525,4.507149,3.396863,5.617435,4.227263,3.174245,5.280282,3.782411,...,6.266513,3.369102,2.43746,4.300745,4.532317,3.000391,6.064244,5.944489,4.009276,7.879702
2020-02-01,4.253857,2.738272,5.769442,4.019727,2.909019,5.130436,3.890641,2.83761,4.943671,3.878353,...,6.474912,3.404534,2.470614,4.338455,5.310876,3.778936,6.842817,6.137058,4.201706,8.07241


In [12]:
merged_ci_vals = pd.merge(pre_df, e_ci_df, left_index=True, right_index=True)

In [13]:
merged_ci_vals.to_csv('../data/manipulated_data/predictions.csv')

In [14]:
merged_ci_vals.head(10)

Unnamed: 0,"Raleigh, NC","lower Raleigh, NC","upper Raleigh, NC","Fayetteville, NC","lower Fayetteville, NC","upper Fayetteville, NC","Albemarle, NC","lower Albemarle, NC","upper Albemarle, NC","Arcola, NC",...,"upper LAKE LURE 2, NC","MOUNT HOLLY 4 NE, NC","lower MOUNT HOLLY 4 NE, NC","upper MOUNT HOLLY 4 NE, NC","ELIZABETHTOWN, NC","lower ELIZABETHTOWN, NC","upper ELIZABETHTOWN, NC","GRANDFATHER MTN, NC","lower GRANDFATHER MTN, NC","upper GRANDFATHER MTN, NC"
2019-05-01,3.366287,1.881736,4.850837,3.111657,1.523134,4.700181,3.461614,1.960157,4.963072,4.101742,...,8.308546,3.718346,2.804238,4.632454,4.539367,3.042719,6.036016,7.329324,5.403461,9.255187
2019-06-01,4.311068,2.826038,5.796097,4.639369,3.038618,6.24012,4.748512,3.229404,6.267621,4.453031,...,6.631163,3.893037,2.974888,4.811186,4.742576,3.212933,6.272219,6.904584,4.970852,8.838316
2019-07-01,6.314961,4.828354,7.801568,4.736273,3.135275,6.33727,5.211461,3.682058,6.740863,4.030026,...,6.500716,3.564906,2.645156,4.484657,5.759743,4.228032,7.291454,7.503359,5.569007,9.43771
2019-08-01,4.145673,2.653229,5.638117,5.171562,3.569744,6.77338,4.538334,3.002113,6.074555,3.948713,...,7.107185,3.462391,2.535286,4.389496,6.681779,5.150062,8.213496,6.57049,4.636118,8.504862
2019-09-01,4.875324,3.382859,6.36779,4.613723,3.009782,6.217664,3.479564,1.943112,5.016015,4.547796,...,7.046454,3.215256,2.288001,4.14251,7.964787,6.432967,9.496607,5.825237,3.890625,7.759848
2019-10-01,4.620644,3.125386,6.115903,3.449842,1.84566,5.054025,3.654706,2.117471,5.191941,4.270212,...,6.591657,3.428503,2.50108,4.355927,5.165368,3.63351,6.697226,5.788173,3.853509,7.722838
2019-11-01,2.836573,1.338815,4.334331,2.833024,1.22844,4.437609,3.578847,2.041373,5.116321,4.145416,...,6.136522,3.288401,2.356889,4.219913,4.493145,2.961288,6.025003,5.711345,3.776455,7.646235
2019-12-01,3.028145,1.52985,4.52644,3.043657,1.438876,4.648439,3.945386,2.407895,5.482877,4.089217,...,6.744548,3.782379,2.850735,4.714022,4.593129,3.061223,6.125036,6.099219,4.164232,8.034206
2020-01-01,3.99628,2.495186,5.497373,2.90744,1.302569,4.512311,3.020206,1.482598,4.557815,3.895951,...,6.266513,3.369102,2.43746,4.300745,4.532317,3.000391,6.064244,5.944489,4.009276,7.879702
2020-02-01,2.822907,1.321267,4.324546,2.760683,1.155748,4.365618,3.227472,1.689957,4.764987,3.833445,...,6.474912,3.404534,2.470614,4.338455,5.310876,3.778936,6.842817,6.137058,4.201706,8.07241
