In [4]:
import pandas as pd
import numpy as np
import csv
import statsmodels.api as sm
import warnings
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from itertools import combinations
from scipy import stats
from datetime import datetime
from sklearn.metrics import mean_absolute_error
from datetime import datetime
from dateutil.relativedelta import relativedelta
warnings.filterwarnings("ignore")

file = 'data/manipulated_data/rainfalldata.csv'
rd = pd.read_csv(file)
file2 = 'data/manipulated_data/ncrainfalldata.csv'
ncrd = pd.read_csv(file2)
rd.Date = pd.to_datetime(rd.Date)
rd = rd.set_index('Date')
ncrd.Date = pd.to_datetime(ncrd.Date)
ncrd = ncrd.set_index('Date')

In [8]:
# this cell takes the stored exogen dictionary that is stored in the Data_Wrangling_CAP1 jupyter notebook
# that was imported above.
%store -r exogen


In [5]:
def sarima_model_creation(data, p, d, q, P, D, Q, m, exog=None):
    my_order = [p,d,q]
    my_sorder = [P,D,Q,m]
    sarimamod = sm.tsa.statespace.SARIMAX(data, exog, order=my_order, seasonal_order=my_sorder, 
                                          enforce_stationarity=False, enforce_invertibility=False,
                                          initialization='approximate_diffuse')
    model_fit = sarimamod.fit()# start_params=[0, 0, 0, 0, 1])
    return(model_fit)

In [6]:
def hyperparameter_find(training_data, comb, testing_data, search = False, exogtr = None, exogtest = None):
    leastmae = 1000
    for com in tqdm(comb):
        li_one_step = []
        for i in tqdm(range(len(testing_data))):
            if i == 0:
                copytraining = training_data.copy()
                if exogtr is not None:
                    excopy = exogtr.copy()
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, 
                                                  com[3], 12, exog=excopy)
                    one_step_pred = mod_1.forecast(exog=excopy.iloc[[-1]]) #uses the data from the year before
                    excopy = pd.concat([excopy, exogtest.iloc[[i]]])
                else:
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, com[3], 12)
                    one_step_pred = mod_1.forecast()
                li_one_step.append(one_step_pred[0])
                copytraining = pd.concat([copytraining, testing_data[[i]]])
            else:
                if exogtr is not None:
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, 
                                                  com[3], 12, exog=excopy)
                    one_step_pred2 = mod_1.forecast(exog=excopy.iloc[[-1]])
                    excopy = pd.concat([excopy, exogtest.iloc[[i]]])
                else:
                    mod_1 = sarima_model_creation(copytraining, com[0], 0, com[1], com[2], 0, com[3], 12)
                    one_step_pred2 = mod_1.forecast()
                li_one_step.append(one_step_pred2[0])
                copytraining = pd.concat([copytraining, testing_data[[i]]])
        mae = mean_absolute_error(testing_data, li_one_step)
        if search is True:
            if mae < leastmae:
                leastmae = mae
                H_AR = com[0]
                H_MA = com[1]
                H_SAR = com[2]
                H_SMA = com[3]
            print(com,mae)            
    if search is True:
        return('AR: '+ str(H_AR), 'MA: ' +str(H_MA), 'SAR: '+str(H_SAR), 'SMA: '+str(H_SMA))
    else:
        return(mae)

In [6]:
def exog_combinations(df, exoe):
    lo_dfs = []
    if len(exoe) == 1:
        lo_dfs.append(df.loc[:,exoe])
    if len(exoe) > 1:
        lo_dfs.append(df.loc[:,exoe])
        for ex in exoe:
            lo_dfs.append(df.loc[:,[ex]])
        if len(exoe) >2:
            for i in range(2, len(exoe)):
                combolist = list(combinations(exoe,i))
                for c in combolist:
                    lo_dfs.append(df.loc[:,c])
    return(lo_dfs)


In [7]:
todokeys = ('TAPOCO, NC', 'TRYON, NC', 'WAYNESVILLE 1 E, NC', 'Boone 1 SE, NC', 'DANBURY, NC', 'EDEN, NC', ' MOUNT AIRY 2 W, NC', 'REIDSVILLE 2 NW, NC', 'HAYESVILLE 1 NE, NC', 'MURPHY 4ESE, NC', ' KING, NC')
sub_exogen = {k: exogen[k] for k in todokeys}

In [8]:
from collections import defaultdict
l_o_dfs = defaultdict(list)
for key,value in tqdm(sub_exogen.items()):
    lo_dfs2 = exog_combinations(rd, value)
    l_o_dfs[key] = lo_dfs2
# l_o_dfs['ROBBINSVILLE AG 5 NE, NC']

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))




In [9]:
def exogenous_var(data, ncloc, l_exoloc, best_comb):
    dat = data[ncloc]
    tr, test = train_test_split(dat, test_size = 0.2, shuffle=False)
    keymae = hyperparameter_find(tr, best_comb, test)
    print('keymae of: '+ key +' = '+str(keymae))
    bettermae = {}
    for exog in tqdm(l_exoloc):
        extr, extest = train_test_split(exog, test_size = 0.2, shuffle=False)
        exmae = hyperparameter_find(tr, best_comb, test, exogtr=extr, exogtest = extest)
        co = tuple(exog.columns)
        print('exmae = {}'.format(co) + ' '+ str(exmae))
        if exmae < keymae:
            bettermae[co] = exmae
            bettermae2 = {key: bettermae}
    return(co)

In [10]:
best_comb = [[4,3,3,4]]
warnings.filterwarnings("ignore")
for key,value in tqdm(l_o_dfs.items()):
    exogenous_var(rd, key, value, best_comb)

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

keymae of: TAPOCO, NC = 0.9800673566131274


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN', 'NEWFOUND GAP, TN', ' TOWNSEND 5S, TN') 1.5764560295913925


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN',) 1.4972163335360191


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('NEWFOUND GAP, TN',) 1.5501456737516661


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = (' TOWNSEND 5S, TN',) 2.1402191932997416


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN', 'NEWFOUND GAP, TN') 1.5489398080891763


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('GATLINBURG 2 SW, TN', ' TOWNSEND 5S, TN') 1.616671690504988


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('NEWFOUND GAP, TN', ' TOWNSEND 5S, TN') 2.436971933451168


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

keymae of: TRYON, NC = 2.6823507498407078


HBox(children=(IntProgress(value=0, max=31), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.731134415701127


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC',) 2.986931810013167


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC',) 3.1072872524201767


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC',) 2.815911182694172


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CLEVELAND 3S, SC',) 3.077446176795452


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('SPARTANBURG 3 SSE, SC',) 2.812649001967126


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC') 2.942549753368086


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC') 2.8608751566191


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CLEVELAND 3S, SC') 3.049632259088924


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'SPARTANBURG 3 SSE, SC') 2.9144776099202763


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC') 2.9762863578022682


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CLEVELAND 3S, SC') 3.20189310778487


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'SPARTANBURG 3 SSE, SC') 3.096819228892839


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 2.9903458656920554


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.890486851482266


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.0436085745720485


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC') 2.9033859279655085


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CLEVELAND 3S, SC') 3.154525441526979


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'SPARTANBURG 3 SSE, SC') 2.9314747667962813


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 2.9713486547381884


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.86854756656046


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.0161890393875477


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 2.99567340879987


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.9370572008592566


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.277737214007516


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 2.9667226323555127


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC') 3.190416268016882


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'SPARTANBURG 3 SSE, SC') 2.8771958771274124


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CAESARS HEAD, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.0292999192163945


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('Greenville-Spartanburg Area, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 2.975167876273045


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('CAESARS HEAD, SC', 'CHESNEE 7 WSW, SC', 'CLEVELAND 3S, SC', 'SPARTANBURG 3 SSE, SC') 3.258738835983667


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

keymae of: WAYNESVILLE 1 E, NC = 1.747482139423062


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('MT LECONTE, TN', 'NEWFOUND GAP, TN') 2.2339993455520246


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('MT LECONTE, TN',) 2.067047382901454


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

exmae = ('NEWFOUND GAP, TN',) 2.262076438940648


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))

MemoryError: 

In [7]:
with_exogs = ['WHITEVILLE 7 NW, NC', 'CASAR, NC', 'FOREST CITY 8 W, NC', 'GASTONIA, NC', 'LAKE LURE 2, NC', 
                       'ELIZABETHTOWN, NC', ' MOUNT HOLLY 4 NE, NC','GRANDFATHER MTN, NC']
ncrd2 = ncrd.copy()
ncrd_less = ncrd2.drop(with_exogs,axis=1)

In [8]:
def prediction_fx(data, begin, end):
    base = datetime.strptime(begin,'%Y-%m-%d')
    date_list = [base + relativedelta(months=x) for x in range(600)]
    prediction1_df = pd.DataFrame(index=date_list)
    for col in tqdm(data.columns):
        loc = data[col]
        mod_fit1 = sarima_model_creation(loc, 4,0,3,3,0,4,12)
        point_predictions = pd.DataFrame(mod_fit1.predict(start=begin, end=end), columns=[col])
        future_pred1 = mod_fit1.get_prediction(start=begin, end=end)
        future_pred1_ci = future_pred1.conf_int(alpha=0.2)
        point_predictions_df = pd.merge(point_predictions, future_pred1_ci, left_index=True, right_index=True)
        prediction1_df = pd.merge(prediction1_df, point_predictions_df, left_index=True, right_index=True)
    return(prediction1_df)

In [106]:
pre_df = prediction_fx(ncrd_less, '2019-05-01', '2069-05-01')
pre_df.head(10)

HBox(children=(IntProgress(value=0, max=104), HTML(value='')))

Unnamed: 0,"Raleigh, NC","lower Raleigh, NC","upper Raleigh, NC","Fayetteville, NC","lower Fayetteville, NC","upper Fayetteville, NC","Albemarle, NC","lower Albemarle, NC","upper Albemarle, NC","Arcola, NC",...,"upper YADKINVILLE 6 E, NC","HAYESVILLE 1 NE, NC","lower HAYESVILLE 1 NE, NC","upper HAYESVILLE 1 NE, NC","MURPHY 4ESE, NC","lower MURPHY 4ESE, NC","upper MURPHY 4ESE, NC","KING, NC","lower KING, NC","upper KING, NC"
2019-05-01,3.366287,-0.94759,7.680163,3.111657,-1.504349,7.727664,3.461614,-0.901392,7.824621,4.101742,...,8.403074,6.052865,2.067085,10.038645,6.054696,1.844234,10.265159,4.555015,0.729303,8.380726
2019-06-01,4.311068,-0.004202,8.626337,4.639369,-0.012169,9.290906,4.748512,0.334216,9.162809,4.453031,...,8.033802,5.717889,1.691579,9.7442,6.150096,1.908906,10.391286,3.477236,-0.366788,7.32126
2019-07-01,6.314961,1.995109,10.634814,4.736273,0.084019,9.388527,5.211461,0.767251,9.655671,4.030026,...,8.752115,6.071343,2.008941,10.133745,5.999118,1.743897,10.25434,4.060046,0.207505,7.912587
2019-08-01,4.145673,-0.191141,8.482487,5.171562,0.516924,9.826201,4.538334,0.074311,9.002357,3.948713,...,8.744874,5.126533,1.018007,9.235059,4.852656,0.570876,9.134435,4.301428,0.444422,8.158433
2019-09-01,4.875324,0.538447,9.212201,4.613723,-0.047083,9.27453,3.479564,-0.985128,7.944255,4.547796,...,8.041144,5.06393,0.946878,9.180982,4.718284,0.405907,9.030662,4.442798,0.584406,8.301189
2019-10-01,4.620644,0.275651,8.965637,3.449842,-1.211666,8.111351,3.654706,-0.812263,8.121676,4.270212,...,7.43548,4.244699,0.118656,8.370741,3.892131,-0.433895,8.218157,4.220072,0.360803,8.079341
2019-11-01,2.836573,-1.515683,7.188829,2.833024,-1.829652,7.4957,3.578847,-0.888818,8.046511,4.145416,...,7.125765,5.128604,1.001646,9.255561,4.46293,0.130424,8.795437,4.280093,0.420679,8.139506
2019-12-01,3.028145,-1.32567,7.38196,3.043657,-1.619592,7.706907,3.945386,-0.522327,8.413099,4.089217,...,7.805577,5.843728,1.715459,9.971997,5.297449,0.96355,9.631349,4.52121,0.661633,8.380788
2020-01-01,3.99628,-0.365668,8.358228,2.90744,-1.75607,7.57095,3.020206,-1.447848,7.488261,3.895951,...,7.420529,5.472307,1.343995,9.600618,4.975608,0.641069,9.310147,4.132021,0.272453,7.991588
2020-02-01,2.822907,-1.540627,7.18644,2.760683,-1.903013,7.424379,3.227472,-1.240312,7.695256,3.833445,...,6.962277,5.590564,1.46213,9.718997,5.443202,1.108752,9.777651,4.021765,0.162172,7.881357


In [9]:
exo_var_dict2 = {
    'WHITEVILLE 7 NW, NC': rd[[' LORIS 2 S, SC']],
    'CASAR, NC': rd[['GAFFNEY 6 E, SC']],
    'FOREST CITY 8 W, NC': rd[['GAFFNEY 6 E, SC']],
    'GASTONIA, NC': rd[['FORT MILL 4 NW, SC','GAFFNEY 6 E, SC']],
    'LAKE LURE 2, NC': rd[['CHESNEE 7 WSW, SC']],
    ' MOUNT HOLLY 4 NE, NC': rd[['CHESNEE 7 WSW, SC','GAFFNEY 6 E, SC']],
    'ELIZABETHTOWN, NC': rd[[' LORIS 2 S, SC']],
    'GRANDFATHER MTN, NC': rd[['ELIZABETHTON, TN']]
    
}

In [10]:
def prediction_exog_fx2(data, exog_dict, begin, end):
    base = datetime.strptime(begin,'%Y-%m-%d')
    date_list = [base + relativedelta(months=x) for x in range(600)]
    prediction_df = pd.DataFrame(index = date_list)
    pred_val_df = pd.DataFrame(index = date_list)
    exog_predictions_df = pd.DataFrame(index = date_list)
    for key,value in tqdm(exog_dict.items()):
        loc = data[key]
        mod_fit1 = sarima_model_creation(loc, 4,0,3,3,0,4, 12,exog=value)
        if value.shape[1] > 1:
            shap = value.shape[1]
            for i in range(shap):
                exog_mod_fit = sarima_model_creation(value.iloc[:,i],4,0,3,3,0,4,12)
                e_preds2 = pd.DataFrame(exog_mod_fit.predict(start=begin, end=end))
                if i is 0:
                    exog_predictions_df = e_preds2
                else:
                    exog_predictions_df = pd.merge(exog_predictions_df, e_preds2, left_index=True, 
                                                   right_index=True)
        else:
            exog_mod_fit = sarima_model_creation(value, 4,0,3,3,0,4,12)
            exog_predictions_df = pd.DataFrame(exog_mod_fit.predict(start=begin, end=end))
        future_pred = mod_fit1.get_prediction(exog=exog_predictions_df,start=begin, end=end)
        future_pred_ci = future_pred.conf_int(alpha=0.2)
        future_pred_val= pd.DataFrame(mod_fit1.predict(exog=exog_predictions_df, start=begin, end=end), 
                                      columns = [key])
        future_pred_full = pd.merge(future_pred_val, future_pred_ci, left_index=True, right_index=True)
        prediction_df = pd.merge(prediction_df, future_pred_full, left_index=True, right_index=True)
    return(prediction_df)

In [11]:
e_ci_df = prediction_exog_fx2(rd, exo_var_dict2, '2019-05-01', '2069-05-01')

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))




In [12]:
e_ci_df.head(10)

Unnamed: 0,"WHITEVILLE 7 NW, NC","lower WHITEVILLE 7 NW, NC","upper WHITEVILLE 7 NW, NC","CASAR, NC","lower CASAR, NC","upper CASAR, NC","FOREST CITY 8 W, NC","lower FOREST CITY 8 W, NC","upper FOREST CITY 8 W, NC","GASTONIA, NC",...,"upper LAKE LURE 2, NC","MOUNT HOLLY 4 NE, NC","lower MOUNT HOLLY 4 NE, NC","upper MOUNT HOLLY 4 NE, NC","ELIZABETHTOWN, NC","lower ELIZABETHTOWN, NC","upper ELIZABETHTOWN, NC","GRANDFATHER MTN, NC","lower GRANDFATHER MTN, NC","upper GRANDFATHER MTN, NC"
2019-05-01,5.753516,2.932058,8.574975,3.591515,1.558254,5.624777,3.899437,1.93547,5.863404,4.178921,...,9.529583,3.718346,1.981513,5.45518,4.539367,1.695688,7.383046,7.329324,3.670124,10.988524
2019-06-01,3.724518,0.879779,6.569256,4.899144,2.797554,7.000734,4.043647,2.051741,6.035554,3.854757,...,7.859595,3.893037,2.148525,5.637549,4.742576,1.836207,7.648946,6.904584,3.230433,10.578735
2019-07-01,5.557662,2.706438,8.408886,4.932119,2.827554,7.036683,4.229915,2.230285,6.229544,3.854248,...,7.729423,3.564906,1.817351,5.312461,5.759743,2.849444,8.670042,7.503359,3.82803,11.178687
2019-08-01,6.962032,4.103387,9.820676,4.510153,2.405284,6.615022,4.273738,2.273288,6.274189,3.864488,...,8.337127,3.462391,1.700863,5.22392,6.681779,3.771469,9.592089,6.57049,2.895123,10.245857
2019-09-01,5.759614,2.899671,8.619557,4.223978,2.11729,6.330666,4.151798,2.151158,6.152439,3.730619,...,8.277638,3.215256,1.453443,4.977068,7.964787,5.054281,10.875293,5.825237,2.149414,9.501059
2019-10-01,3.492725,0.626859,6.358591,3.624053,1.516832,5.731275,3.761492,1.760821,5.762163,3.63001,...,7.823008,3.428503,1.66637,5.190637,5.165368,2.25479,8.075946,5.788173,2.11225,9.464097
2019-11-01,3.845209,0.973952,6.716466,4.54598,2.438038,6.653921,4.187163,2.186382,6.187944,3.811119,...,7.367885,3.288401,1.518499,5.058303,4.493145,1.582568,7.403722,5.711345,2.034994,9.387696
2019-12-01,3.647705,0.776451,6.518959,4.362686,2.253797,6.471574,3.789722,1.788948,5.790495,3.485347,...,7.975982,3.782379,2.012226,5.552531,4.593129,1.682459,7.503799,6.099219,2.422684,9.775755
2020-01-01,2.551145,-0.3257,5.427991,4.507149,2.397572,6.616726,4.227263,2.226496,6.228031,3.782411,...,7.497958,3.369102,1.598952,5.139252,4.532317,1.62161,7.443025,5.944489,2.267524,9.621454
2020-02-01,4.253857,1.374198,7.133516,4.019727,1.909347,6.130108,3.890641,1.88985,5.891431,3.878353,...,7.706397,3.404534,1.630056,5.179013,5.310876,2.400142,8.22161,6.137058,2.459829,9.814287


In [120]:
merged_ci_vals = pd.merge(pre_df, e_ci_df, left_index=True, right_index=True)

In [121]:
merged_ci_vals.to_csv('predictions.csv')

In [122]:
merged_ci_vals.head(10)

Unnamed: 0,"Raleigh, NC","lower Raleigh, NC","upper Raleigh, NC","Fayetteville, NC","lower Fayetteville, NC","upper Fayetteville, NC","Albemarle, NC","lower Albemarle, NC","upper Albemarle, NC","Arcola, NC",...,"upper LAKE LURE 2, NC","MOUNT HOLLY 4 NE, NC","lower MOUNT HOLLY 4 NE, NC","upper MOUNT HOLLY 4 NE, NC","ELIZABETHTOWN, NC","lower ELIZABETHTOWN, NC","upper ELIZABETHTOWN, NC","GRANDFATHER MTN, NC","lower GRANDFATHER MTN, NC","upper GRANDFATHER MTN, NC"
2019-05-01,3.366287,-0.94759,7.680163,3.111657,-1.504349,7.727664,3.461614,-0.901392,7.824621,4.101742,...,10.894134,3.718346,1.062089,6.374604,4.539367,0.190336,8.888398,7.329324,1.73306,12.925588
2019-06-01,4.311068,-0.004202,8.626337,4.639369,-0.012169,9.290906,4.748512,0.334216,9.162809,4.453031,...,9.23241,3.893037,1.225036,6.561039,4.742576,0.297668,9.187485,6.904584,1.285455,12.523713
2019-07-01,6.314961,1.995109,10.634814,4.736273,0.084019,9.388527,5.211461,0.767251,9.655671,4.030026,...,9.102545,3.564906,0.892252,6.23756,5.759743,1.308825,10.210661,7.503359,1.882429,13.124288
2019-08-01,4.145673,-0.191141,8.482487,5.171562,0.516924,9.826201,4.538334,0.074311,9.002357,3.948713,...,9.711629,3.462391,0.768366,6.156417,6.681779,2.230843,11.132715,6.57049,0.949501,12.191479
2019-09-01,4.875324,0.538447,9.212201,4.613723,-0.047083,9.27453,3.479564,-0.985128,7.944255,4.547796,...,9.653529,3.215256,0.520796,5.909716,7.964787,3.513552,12.416022,5.825237,0.203551,11.446923
2019-10-01,4.620644,0.275651,8.965637,3.449842,-1.211666,8.111351,3.654706,-0.812263,8.121676,4.270212,...,9.199086,3.428503,0.733553,6.123454,5.165368,0.714024,9.616713,5.788173,0.166334,11.410013
2019-11-01,2.836573,-1.515683,7.188829,2.833024,-1.829652,7.4957,3.578847,-0.888818,8.046511,4.145416,...,8.743976,3.288401,0.581569,5.995233,4.493145,0.041802,8.944489,5.711345,0.088851,11.333839
2019-12-01,3.028145,-1.32567,7.38196,3.043657,-1.619592,7.706907,3.945386,-0.522327,8.413099,4.089217,...,9.352152,3.782379,1.075164,6.489593,4.593129,0.141644,9.044615,6.099219,0.476443,11.721995
2020-01-01,3.99628,-0.365668,8.358228,2.90744,-1.75607,7.57095,3.020206,-1.447848,7.488261,3.895951,...,8.874139,3.369102,0.661891,6.076313,4.532317,0.080774,8.983861,5.944489,0.321057,11.567922
2020-02-01,2.822907,-1.540627,7.18644,2.760683,-1.903013,7.424379,3.227472,-1.240312,7.695256,3.833445,...,9.082625,3.404534,0.690703,6.118366,5.310876,0.859292,9.76246,6.137058,0.513221,11.760895
