In [2]:
import pandas as pd
import numpy as np
import scipy.stats as st
import pmdarima as pm

In [3]:
Location = f'C:\pothole\Pothole_Map.csv'
df = pd.read_csv(Location)
df.dropna(subset=['Created Date', 'Latitude', 'Longitude'], inplace=True)

In [4]:
df['Created Date'] = pd.to_datetime(df['Created Date'])
df['Closed Date'] = pd.to_datetime(df['Closed Date'])
sortdf = df[df['Resolution Description'] ==  'The Department of Transportation inspected this complaint and repaired the problem.'].copy()

In [5]:
#getting rid of outlier
sortdf['Created Day'] = sortdf['Created Date'].dt.date
sortdf['xOutlier'] = (abs(sortdf['Latitude'] - sortdf['Latitude'].mean()) > 1.96*sortdf['Latitude'].std())
sortdf['yOutlier'] = (abs(sortdf['Longitude'] - sortdf['Longitude'].mean()) > 1.96*sortdf['Longitude'].std())
sortdf = sortdf[sortdf['xOutlier']==False]
sortdf = sortdf[sortdf['yOutlier']==False]

In [6]:
sortdf['month'] = sortdf['Created Date'].dt.to_period('M')
sortdf['month'] = sortdf['month'].dt.strftime('%Y-%m')
sortdf['Date'] = pd.to_datetime(sortdf['Created Day']).dt.to_period('m')
sortdf['value'] =1
date = sortdf[['Date', 'value']].groupby(['Date'],as_index=False).sum()
date['year'] = date['Date'].dt.year
date['month'] = date['Date'].dt.month
#print(date)

In [7]:
x = sortdf['Latitude'].to_numpy()
y = sortdf['Longitude'].to_numpy()
deltaX = (max(x) - min(x))/10
deltaY = (max(y) - min(y))/10
xmin = min(x) - deltaX
xmax = max(x) + deltaX
ymin = min(y) - deltaY
ymax = max(y) + deltaY
ydiff2 = ymax - ymin
xdiff2 = xmax - xmin
#print(xmin, xmax, ymin, ymax)
sq = ydiff2*xdiff2/(10000)
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])

In [8]:
def densev(input):
    m1 = sortdf[sortdf['month'] == input]
    sx = m1['Latitude'].to_numpy()
    sy = m1['Longitude'].to_numpy()
    values = np.vstack([sx, sy])
    kernel = st.gaussian_kde(values)
    sf = np.reshape(kernel(positions).T, xx.shape)
    return sx, sy, sf

In [9]:
denset = []
for index, row in date.iterrows():
    tempx, tempy, tempf = densev(str(row['Date']))
    denset.append(tempf)
#
block = [[[0 for x in range(len(denset))] for x in range(100)] for y in range(100)]
for i in range(0,len(denset[0])):
    for j in range(0,len(denset[0])):
        for k in range(0,len(denset)):
            block[i][j][k] = denset[k][i][j]

In [29]:
def timeseries(cordx, cordy, n_period):
    data1 = {'Month': date.Date, 'values': block[cordx][cordy]}
    df2 = pd.DataFrame(data1)
    #print(df2)      
    df3 = df2.drop(range(60,66))
    df3['Month'] = df3['Month'].dt.to_timestamp('s').dt.strftime('%Y-%m-%d %H:%M:%S.000')
    df3['Month'] = pd.to_datetime(df3['Month'], format='%Y-%m-%d %H:%M:%S.%f')
    df3 = df3.set_index('Month')
    if df3['values'].mean() > 10:
        data = df3[['values']]
        smodel = pm.auto_arima(data, start_p=1, start_q=1,
                           test='adf',
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=True,
                           d=None, D=1, trace=True,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=True)

        smodel.summary()
        n_periods = 24
        fitted, confint = smodel.predict(n_periods=n_periods, return_conf_int=True)
        #print(fitted)
        index_of_fc = pd.date_range(data.index[-1], periods = n_periods, freq='MS')

        fitted_series = pd.Series(fitted, index=index_of_fc)
        lower_series = pd.Series(confint[:, 0], index=index_of_fc)
        upper_series = pd.Series(confint[:, 1], index=index_of_fc)
        fitted_series.to_pickle('./forecast/fittedseries_'+str(cordx)+'_'+str(cordy)+'_'+str(n_period)+'.pkl')
        lower_series.to_pickle('./forecast/lowerseries_'+str(cordx)+'_'+str(cordy)+'_'+str(n_period)+'.pkl')
        upper_series.to_pickle('./forecast/upperseries_'+str(cordx)+'_'+str(cordy)+'_'+str(n_period)+'.pkl')

    

In [11]:
forecastperiod = 12

In [12]:
#for i in range(0,100):
#    for j in range(0, 100):
#        print(str(i) +' -- '+str(j))
#        timeseries(i,j,forecastperiod)


In [30]:
timeseries(44,83,forecastperiod)

Performing stepwise search to minimize aic
 ARIMA(1,2,1)(0,1,1)[12]             : AIC=inf, Time=0.13 sec
 ARIMA(0,2,0)(0,1,0)[12]             : AIC=541.651, Time=0.02 sec
 ARIMA(1,2,0)(1,1,0)[12]             : AIC=514.870, Time=0.11 sec
 ARIMA(0,2,1)(0,1,1)[12]             : AIC=inf, Time=0.17 sec
 ARIMA(1,2,0)(0,1,0)[12]             : AIC=512.907, Time=0.02 sec
 ARIMA(1,2,0)(0,1,1)[12]             : AIC=514.881, Time=0.17 sec
 ARIMA(1,2,0)(1,1,1)[12]             : AIC=inf, Time=0.33 sec
 ARIMA(2,2,0)(0,1,0)[12]             : AIC=497.885, Time=0.03 sec
 ARIMA(2,2,0)(1,1,0)[12]             : AIC=499.876, Time=0.12 sec
 ARIMA(2,2,0)(0,1,1)[12]             : AIC=499.877, Time=0.12 sec
 ARIMA(2,2,0)(1,1,1)[12]             : AIC=inf, Time=0.68 sec
 ARIMA(3,2,0)(0,1,0)[12]             : AIC=495.105, Time=0.05 sec
 ARIMA(3,2,0)(1,1,0)[12]             : AIC=497.047, Time=0.12 sec
 ARIMA(3,2,0)(0,1,1)[12]             : AIC=497.058, Time=0.16 sec
 ARIMA(3,2,0)(1,1,1)[12]             : AIC=499.04

In [32]:
tt = [1,2,3,4,5]
tt

[1, 2, 3, 4, 5]

In [16]:
data1 = {'Month': date.Date, 'values': block[0][83]}
df2 = pd.DataFrame(data1)
df2

Unnamed: 0,Month,values
0,2016-01,3.723715e-18
1,2016-02,2.638328e-17
2,2016-03,1.249277e-27
3,2016-04,2.421516e-30
4,2016-05,1.206509e-15
...,...,...
61,2021-02,5.077575e-08
62,2021-03,6.874665e-15
63,2021-04,3.482807e-16
64,2021-05,1.105408e-29


In [17]:
df2['values'].mean()

8.127109370227034e-08

In [26]:
df2.to_pickle('./forecast/fittedseries_example.pkl')

In [27]:
tempdata = pd.read_pickle("./forecast/fittedseries_example.pkl")

In [28]:
tempdata

Unnamed: 0,Month,values
0,2016-01,3.723715e-18
1,2016-02,2.638328e-17
2,2016-03,1.249277e-27
3,2016-04,2.421516e-30
4,2016-05,1.206509e-15
...,...,...
61,2021-02,5.077575e-08
62,2021-03,6.874665e-15
63,2021-04,3.482807e-16
64,2021-05,1.105408e-29


In [23]:
for i in range(0, len(tempdata)):
    print(tempdata[i])

490.18640855045254
305.6773414776327
566.779623203207
344.77174855677083
364.90153030319397
1447.6218586344523
764.7770343232318
182.92173794384905
390.0654741127428
433.9755935911893
643.711299123748
1186.3046295814415
621.3889606891679
321.9651028703164
480.47948887627217
368.45729708151475
320.5449211119378
1651.8806611332213
774.6085794826718
225.63182295777676
470.68978207700604
352.19508075728
775.5704272592084
1247.388380009882
