<a href="https://colab.research.google.com/github/ooeyad/AutoArima-Time-Series-Blog/blob/master/MOI_Accidents_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
from datetime import datetime as dt,timedelta
from matplotlib import pyplot as plt
from fbprophet import Prophet
import datetime
    
def fix_hour(time):
    time_parts = time.split(':')
    hour = int(time_parts[0])
    if hour == 0:
        time_parts[0] = '12'
    elif hour > 12:
        time_parts[0] = str(hour - 12)
    time_parts = ':'.join(time_parts)
    return time_parts

def get_time_shift(time):
    hour = time.hour
    if hour >= 0 and hour < 6:
        return 1
    elif hour >= 6 and hour < 12:
        return 2
    elif hour >= 12 and hour < 18:
        return 3
    else:
        return 4
  
def predict_for_area_shift(area , shift,last_date):

    #read the accidents files
    df  = pd.read_csv('/content/acc.csv')
    df2019 = pd.read_csv('/content/acc_2019.csv')

    # correct date and time columns
    df2019['AGI_AREACDE'] = df2019['AGI_AREACDE'].astype(np.object).str.strip()
    df2019['AGI_AREACDE'] = pd.to_numeric(df2019['AGI_AREACDE'], errors='coerce').fillna(0)
    df2019['AGI_AREACDE'] = df2019['AGI_AREACDE'].astype(np.int)
    df2019 = df2019[df2019['AGI_AREACDE'] > 0 ]

    #no need for coordinates 
    del df2019['AGI_XCOORD']
    del df2019['AGI_YCOORD']

    # fix hours to be between 1-12, and change the column to datetime with 24 hour format
    df2019['AGI_ACCTIME'] = df2019['AGI_ACCTIME'].apply(fix_hour)
    df2019['FULL_DATE'] = df2019['AGI_ACCDATE'] + ' ' + df2019['AGI_ACCTIME'] + ' ' + (np.where(df2019['AGI_TMETYP'] == 1 , 'AM' , 'PM'))
    df2019['FULL_DATE'] = pd.to_datetime(df2019['FULL_DATE'],format = '%Y-%m-%d %I:%M:%S %p')

    # fix hours to be between 1-12, and change the column to datetime with 24 hour format
    df['AGI_ACCTIME'] = df['AGI_ACCTIME'].apply(fix_hour)
    df['FULL_DATE'] = df['AGI_ACCDATE'] + ' ' + df['AGI_ACCTIME'] + ' ' + (np.where(df['AGI_TMETYP'] == 1 , 'AM' , 'PM'))
    df['FULL_DATE'] = pd.to_datetime(df['FULL_DATE'],format = '%m/%d/%Y %I:%M:%S %p')

    df = pd.concat([df,df2019])

    df = df.sort_values('FULL_DATE').reset_index(drop = True)


    # create a time shift column
    df['TIME_SHIFT'] = df['FULL_DATE'].apply(get_time_shift)

    #drop null area codes.
    df.drop(df[df.AGI_AREACDE.isna()].index, inplace=True)

    #sort by full_date
    df = df.sort_values('FULL_DATE')

    from scipy.stats import boxcox
    from scipy.special import inv_boxcox

    if shift == 0:
      df_area57_ts = df[df['AGI_AREACDE']==area]
    else:
      df_area57_ts = df[(df['AGI_AREACDE']==area) & (df['TIME_SHIFT']==shift)]
    df_area57_ts['NO_OF_ACCIDENTS'] = np.zeros(len(df_area57_ts['FULL_DATE']))
    df_area57_ts = df_area57_ts[['FULL_DATE','NO_OF_ACCIDENTS']].set_index('FULL_DATE').groupby(pd.Grouper(freq='D')).count().reset_index()
    df_area57_ts = df_area57_ts[df_area57_ts['NO_OF_ACCIDENTS'] > 0]
    df_area57_ts = df_area57_ts.sort_values('FULL_DATE')
    #df_area57_ts2 = df_area57_ts[(df_area57_ts['FULL_DATE'] >= '2018-01-01') & (df_area57_ts['FULL_DATE'] <= '2019-02-28')]
    df_area57_ts2 = df_area57_ts.set_index('FULL_DATE')
    df_area57_ts2 = df_area57_ts2.reset_index()
    
    df_orig = df_area57_ts2.copy()
    df_area57_ts2 = df_area57_ts2[df_area57_ts2['FULL_DATE'] < last_date]
    
    national_day = pd.DataFrame({'holiday': 'national','ds': pd.to_datetime(['2018-12-18','2017-12-18','2016-12-18','2015-12-18']),'lower_window': 0,'upper_window': 10,})
    df_area57_ts2 = df_area57_ts2.rename(columns={'FULL_DATE': 'ds', 'NO_OF_ACCIDENTS': 'y'})
    
    #transform data using boxcox
    df_area57_ts2['y'] = np.log(df_area57_ts2['y'])
    
    m = Prophet(weekly_seasonality = True)
    m.add_seasonality(name="monthly", period=30.5, fourier_order=5, prior_scale=0.02)
    
    m.fit(df_area57_ts2)
    
    future = m.make_future_dataframe(periods=90)
    
    forecast = m.predict(future)
    
    #m.plot_components(forecast);
    
    forecast[['yhat','yhat_upper','yhat_lower']] = forecast[['yhat','yhat_upper','yhat_lower']].apply(lambda x: np.exp(x))
    forecast[['yhat','yhat_upper','yhat_lower']] = forecast[['yhat','yhat_upper','yhat_lower']].apply(lambda x: np.round(x))
    return df_orig,forecast

#get_predictions receives list of areas
def get_predictions(areas,from_date,to_date):

  df_result = pd.DataFrame(columns = ['ACC_DATE','NO_OF_ACCIDENTS'])

  for a_area in areas:
    for a_shift in range(5):    
      ret_actual , acc_forecast = predict_for_area_shift(a_area , a_shift,from_date)
      if a_shift == 0:
        col_str = 'area_%s'%str(a_area)
      else:
        col_str = 'area_%s_shift%s'%(str(a_area),str(a_shift))
      if df_result['ACC_DATE'].count() == 0:
        df_result['ACC_DATE'] = acc_forecast['ds']
        df_result[col_str] = acc_forecast['yhat']
      else:
        df_result[col_str] = acc_forecast['yhat']
  df_result = df_result[(df_result['ACC_DATE'] >= from_date) & (df_result['ACC_DATE'] < to_date) ]
  del df_result['NO_OF_ACCIDENTS']
  print('Shifts : ----------------------')
  print('Shift1 : 0 - 6am')
  print('Shift2 : 6am - 12pm')
  print('Shift3 : 12pm - 6pm')
  print('Shift2 : 6pm - 0')
  
  return df_result


In [21]:
from_date = datetime.date(2019, 4, 1)
to_date = datetime.date(2019, 5, 1)
areas = [57,56,55]
get_predictions(areas,from_date,to_date)


Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.


Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Comparing Series of datetimes with 'datetime.date'.  Currently, the
'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and a TypeError will be raised. To retain the current
behavior, convert the 'datetime.date' to a datetime with
'pd.Timestamp'.

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.


C

Shifts : ----------------------
Shift1 : 0 - 6am
Shift2 : 6am - 12pm
Shift3 : 12pm - 6pm
Shift2 : 6pm - 0



Comparing Series of datetimes with 'datetime.date'.  Currently, the
'datetime.date' is coerced to a datetime. In the future pandas will
not coerce, and a TypeError will be raised. To retain the current
behavior, convert the 'datetime.date' to a datetime with
'pd.Timestamp'.



Unnamed: 0,ACC_DATE,area_57,area_57_shift1,area_57_shift2,area_57_shift3,area_57_shift4,area_56,area_56_shift1,area_56_shift2,area_56_shift3,area_56_shift4,area_55,area_55_shift1,area_55_shift2,area_55_shift3,area_55_shift4
438,2019-04-01,7.0,12.0,13.0,8.0,2.0,4.0,6.0,4.0,4.0,4.0,5.0,6.0,7.0,4.0,4.0
439,2019-04-02,6.0,1.0,14.0,8.0,3.0,4.0,5.0,4.0,6.0,4.0,4.0,6.0,2.0,5.0,3.0
440,2019-04-03,7.0,2.0,13.0,7.0,3.0,2.0,5.0,1.0,8.0,3.0,4.0,3.0,4.0,6.0,4.0
441,2019-04-04,7.0,14.0,12.0,6.0,3.0,3.0,4.0,2.0,7.0,3.0,2.0,4.0,8.0,6.0,5.0
442,2019-04-05,2.0,11.0,2.0,6.0,2.0,5.0,4.0,5.0,7.0,2.0,3.0,6.0,6.0,5.0,4.0
443,2019-04-06,5.0,10.0,9.0,6.0,2.0,4.0,2.0,4.0,7.0,3.0,4.0,5.0,6.0,5.0,4.0
444,2019-04-07,8.0,13.0,12.0,3.0,3.0,4.0,3.0,4.0,7.0,3.0,4.0,5.0,6.0,5.0,3.0
445,2019-04-08,7.0,13.0,11.0,7.0,2.0,4.0,5.0,4.0,3.0,3.0,4.0,5.0,7.0,3.0,3.0
446,2019-04-09,6.0,1.0,10.0,7.0,3.0,4.0,4.0,4.0,5.0,3.0,4.0,6.0,2.0,4.0,3.0
447,2019-04-10,6.0,2.0,9.0,6.0,3.0,2.0,5.0,1.0,6.0,3.0,4.0,3.0,4.0,4.0,4.0
