In [None]:
import pandas as pd
data = pd.read_csv('../input/new-york-taxi/nyc_taxi.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])
data.head()

In [None]:
# create moving-averages
data['MA60'] = data['value'].rolling(60).mean()
data['MA365'] = data['value'].rolling(365).mean()
data.tail()

In [None]:
# plot 
import plotly.express as px
fig = px.line(data, x="timestamp", y=['value', 'MA60', 'MA365'], title='NYC Taxi Trips', template = 'plotly_dark')
fig.show()

In [None]:
# drop moving-average columns
data.drop(['MA60', 'MA365'], axis=1, inplace=True)
data.head()

In [None]:
# set timestamp to index
data.set_index('timestamp', drop=True, inplace=True)
data.head()

In [None]:
# resample timeseries to hourly 
data = data.resample('H').sum()
data.head()


In [None]:
# creature features from date
data['day'] = [i.day for i in data.index]
data['day_name'] = [i.day_name() for i in data.index]
data['day_of_year'] = [i.dayofyear for i in data.index]
data['week_of_year'] = [i.weekofyear for i in data.index]
data['hour'] = [i.hour for i in data.index]
data['is_weekday'] = [i.isoweekday() for i in data.index]
data.head()

In [None]:
# install slim version (default)
!pip install pycaret

In [None]:
# init setup
from pycaret.anomaly import *
s = setup(data, session_id = 42,
          ordinal_features = {'day_name' : ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
       'Friday','Sunday','Saturday',]},
          numeric_features=['is_weekday'])

In [None]:
# check list of available models
models()

In [None]:
# train model
mcd = create_model('mcd')
mcd_results = assign_model(mcd)
mcd_results.head()

In [None]:
# check anomalies
mcd_results[mcd_results['Anomaly'] == 1].head()

In [None]:
import plotly.graph_objects as go
# plot value on y-axis and date on x-axis
fig = px.line(mcd_results, x=mcd_results.index, y="value", title='NYC TAXI TRIPS - UNSUPERVISED ANOMALY DETECTION', template = 'plotly_dark')
# create list of outlier_dates
outlier_dates = mcd_results[mcd_results['Anomaly'] == 1].index
# obtain y value of anomalies to plot
y_values = [mcd_results.loc[i]['value'] for i in outlier_dates]
fig.add_trace(go.Scatter(x=outlier_dates, y=y_values, mode = 'markers', 
                name = 'Anomaly', 
                marker=dict(color='red',size=5)))
        
fig.show()

In [None]:
plot_model(mcd)

In [None]:
plot_model(mcd, plot = 'umap')