In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go#visualization
import plotly.offline as py

In [28]:
def plot_line(x,y,color,name) :
    tracer = go.Scatter(x = x,y = y,mode = "lines",
                        marker = dict(color = color,
                                      line = dict(width =1)),
                       name = name)
    return tracer

In [29]:
def plot_layout(title) :
    layout = go.Layout(dict(title = title,
                            plot_bgcolor  = "rgb(243,243,243)",
                            paper_bgcolor = "rgb(243,243,243)",
                            xaxis = dict(gridcolor = 'rgb(255, 255, 255)',
                                         zerolinewidth=1,ticklen=5,gridwidth=2),
                            yaxis = dict(gridcolor = 'rgb(255, 255, 255)',
                                            zerolinewidth=1,ticklen=5,gridwidth=2),
                        margin = dict(b = 100)
                       )
                  )
    return layout

In [40]:
data = pd.read_csv("extracted_data.csv", nrows = 100000)

In [41]:
data.head()

Unnamed: 0.1,Unnamed: 0,Trip End Timestamp,Trip Seconds,Trip Miles,Pickup Community Area,Dropoff Community Area,Fare,Tips,Tolls,Extras,...,Payment Type,Pickup Centroid Latitude,Pickup Centroid Longitude,Dropoff Centroid Latitude,Dropoff Centroid Longitude,Tripstart_month,Tripstart_year,Tripstart_day,hour,Tripstart_time
0,0,10-08-2013 12:15,180.0,1.2,76.0,76.0,5.85,0.0,0.0,0.0,...,Cash,41.979071,-87.90304,41.979071,-87.90304,10,2013,8,21,2013-10-08
1,1,12/14/2013 10:00:00 PM,240.0,0.6,32.0,8.0,4.84,0.0,0.0,1.0,...,Cash,41.880994,-87.632746,41.893216,-87.637844,12,2013,14,21,2013-12-14
2,2,11/30/2013 09:30:00 PM,480.0,1.4,8.0,32.0,6.85,0.0,0.0,1.0,...,Cash,41.899156,-87.626211,41.877406,-87.621972,11,2013,30,21,2013-11-30
3,3,12/20/2013 05:15:00 PM,540.0,0.12,32.0,8.0,6.65,0.0,0.0,0.0,...,Cash,41.884987,-87.620993,41.893216,-87.637844,12,2013,20,17,2013-12-20
4,4,12/29/2013 07:45:00 PM,600.0,0.21,8.0,32.0,8.45,0.0,0.0,0.0,...,Cash,41.891972,-87.612945,41.870607,-87.622173,12,2013,29,19,2013-12-29


In [42]:
# data["pickup_datetime"] = data["pickup_datetime"].str.replace(" UTC","")
# data["pickup_datetime"] = pd.to_datetime(data["pickup_datetime"],format="%Y-%m-%d %H:%M:%S")
# data.head()

In [44]:
# data["year"]  = pd.DatetimeIndex(data["pickup_datetime"]).year
# #extract month
# data["month"] = pd.DatetimeIndex(data["pickup_datetime"]).month
# data["month_name"] = data["month"].map({1:"JAN",2:"FEB",3:"MAR",
#                                                 4:"APR",5:"MAY",6:"JUN",
#                                                 7:"JUL",8:"AUG",9:"SEP",
#                                                 10:"OCT",11:"NOV",12:"DEC"
#                                                })
# #merge year month
# data["month_year"] = data["year"].astype(str) + " - " + data["month_name"]
# #extract week day 
# data["week_day"]   = data["pickup_datetime"].dt.weekday_name
# #extract day 
# data["day"]        = data["pickup_datetime"].dt.day
# #extract hour
# data["hour"]        = data["pickup_datetime"].dt.hour 
data = data.sort_values(by = "Tripstart_time",ascending = False)

In [61]:
total_fare = (data.groupby(["Tripstart_year","Tripstart_month","Tripstart_day"])["Fare"].sum().reset_index())

In [91]:
ts_fare = total_fare.copy()
ts_fare.head()

Unnamed: 0,Tripstart_year,Tripstart_month,Tripstart_day,Fare
0,2013,10,1,15724.02
1,2013,10,2,8922.84
2,2013,10,3,7560.67
3,2013,10,4,8016.19
4,2013,10,5,7894.05


In [92]:
ts_fare["date"] = ts_fare["Tripstart_year"].astype(str) + "-" + ts_fare["Tripstart_month"].astype(str) + "-" + ts_fare["Tripstart_day"].astype(str)
#selecting columns
ts_fare = ts_fare[["date","Fare"]]
# convert to date format
# ts_fare["date"] = pd.to_datetime(ts_fare["date"],format = "%Y-%m-%d")
ts_fare=ts_fare.set_index('date')
# ts_fare = ts_fare.drop(columns  = ["date"],axis = 1)
ts_fare.head(10)

Unnamed: 0_level_0,Fare
date,Unnamed: 1_level_1
2013-10-1,15724.02
2013-10-2,8922.84
2013-10-3,7560.67
2013-10-4,8016.19
2013-10-5,7894.05
2013-10-6,7313.32
2013-10-7,6065.06
2013-10-8,5871.85
2013-10-9,7323.02
2013-10-10,12253.11


In [110]:
log_ts_fare = np.log(ts_fare)

In [111]:
ts_fare_diff = log_ts_fare - log_ts_fare.shift()
ts_fare_diff.dropna(inplace = True)

In [112]:
from statsmodels.tsa.arima_model import ARIMA

#ARIMA model
def arima_model(time_series,p,d,q) :
    arima_model   = ARIMA(time_series , order = (p,d,q))
    results_arima = arima_model.fit(disp = -1)
    fitted_values = results_arima.fittedvalues
    
    trace1 = plot_line(fitted_values.index,
                       fitted_values.values,
                       "blue","fitted values")
    
    trace2 = plot_line(ts_fare_diff.index,
                       ts_fare_diff["Fare"],
                       "red","log differenced values")

    layout = plot_layout(("ARIMA model p = " + str(p) + 
                          ", d = " + str(d) + ", q = " + str(q)))
    data  = [trace2,trace1]
    fig   = go.Figure(data = data,layout = layout)
    py.iplot(fig)
    
    print (results_arima.summary())

In [113]:
import warnings
warnings.filterwarnings("ignore")

In [114]:
arima_model(log_ts_fare,3,2,1)

                             ARIMA Model Results                              
Dep. Variable:                D2.Fare   No. Observations:                  197
Model:                 ARIMA(3, 2, 1)   Log Likelihood               -1820.873
Method:                       css-mle   S.D. of innovations           2460.783
Date:                Mon, 11 Nov 2019   AIC                           3653.746
Time:                        19:24:10   BIC                           3673.445
Sample:                             2   HQIC                          3661.721
                                                                              
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.2831      2.068      0.137      0.891      -3.770       4.336
ar.L1.D2.Fare    -0.1612      0.071     -2.262      0.025      -0.301      -0.022
ar.L2.D2.Fare    -0.1459      0.071     