# Preprocess Data

In [None]:
import pandas as pd
import os

DataSets_PATH = "/kaggle/input/uber-dataset"
DataFrame = pd.read_csv(os.path.join(DataSets_PATH, "taxi_pickups_area.csv"))
DataFrame.fillna(0, inplace=True)

## Mean Absolute Error Function

In [None]:
def get_MAE(df1, df2):
    assert df1.shape == df2.shape, "DataFrames must have the same shape!"
    mae = (df1 - df2).abs().mean().mean()
    print(f'Mean Absolute Error (MAE): {mae}')

## Preprocessing the TimeStamp

In [None]:
DataFrame["Trip Start Timestamp"] = pd.to_datetime(DataFrame["Trip Start Timestamp"])

In [None]:
if not DataFrame.index.name == "Trip Start Timestamp":
    DataFrame.set_index('Trip Start Timestamp', inplace=True)
    DataFrame = DataFrame.astype(int)

In [None]:
## Ploting the Trend/Seasonality/Noise

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt


DataFrame_plot = DataFrame.asfreq('D')
decomposition = seasonal_decompose(DataFrame_plot['Pickup Community Area_0'], model='additive')

plt.figure(figsize=(10, 8))
plt.subplot(411)
plt.plot(DataFrame_plot['Pickup Community Area_0'], label='Original Data')
plt.legend()

plt.subplot(412)
plt.plot(decomposition.trend, label='Trend', color='green')
plt.legend()

plt.subplot(413)
plt.plot(decomposition.seasonal, label='Seasonality', color='orange')
plt.legend()

plt.subplot(414)
plt.plot(decomposition.resid, label='Residual (Noise)', color='red')
plt.legend()

plt.tight_layout()
plt.show()

DataFrame_plot = None

## Splitting Data To train models

In [None]:
num_intervals = 673
DataFrame_train = DataFrame[:-num_intervals]
DataFrame_test = DataFrame[-num_intervals:]

# Naive Average

In [None]:
import numpy as np

naive_avg = DataFrame_train.mean()

naive_avg_df = pd.DataFrame(index = DataFrame_test.index, columns = DataFrame_test.columns, dtype=int)
naive_avg_df = naive_avg_df.fillna(naive_avg).round().astype(int)

print("Naïve Average:", naive_avg)
naive_avg_df.head()

## MAE Naive

In [None]:
get_MAE(DataFrame_test, naive_avg_df)

# Moving Averages

In [None]:
mv_avg_df_train = DataFrame_train[-num_intervals:]
empty_next_mv_avg_df = pd.DataFrame(index=DataFrame_test.index, columns=DataFrame_test.columns)
mv_avg_df = pd.concat([mv_avg_df_train, empty_next_mv_avg_df], axis=0)
for key, _ in enumerate(mv_avg_df[num_intervals:].iterrows()):
    split_df = mv_avg_df[key:key+num_intervals].mean()
    mv_avg_df.iloc[key+num_intervals] = split_df

mv_avg_df = mv_avg_df[-num_intervals:].round().astype(int)
mv_avg_df

## MAE Moving_AVG

In [None]:
get_MAE(DataFrame_test, mv_avg_df)

# Exponential Smoothing Algorithms

## Simple Exponential Smoothing (SES)

- Used for time series data with no trend or seasonality.

SES function : U(Xt) = a * Xt + (1 - a) * U(Xt-1).

a = 2/(N+1)

with a is the smoothing factor

In [None]:
def EMA_func(value, prev_ema, span=4):
    alpha = 2/(span+1)
    ema = alpha * value + (1 - alpha) * prev_ema

    return ema


In [None]:
#Calculate the current EMA for the Data Train
EMA_df_train = pd.DataFrame(index=DataFrame_train.index, columns=DataFrame_train.columns)
for key, _ in enumerate(DataFrame_train.iterrows()):
    if key == 0:
        EMA_df_train.iloc[key] = DataFrame_train.iloc[key]
        continue
    EMA_df_train.iloc[key] = EMA_func(DataFrame_train.iloc[key], DataFrame_train.iloc[key-1])

EMA_df_train

In [None]:
next_EMA_df = pd.DataFrame(index=DataFrame_test.index, columns=DataFrame_test.columns)

for key, _ in enumerate(next_EMA_df.iterrows()):
    if key == 0:
        ema = EMA_df_train.iloc[-1]
    else:
        ema = next_EMA_df.iloc[key-1]
    next_EMA_df.iloc[key] = EMA_func(ema, ema)

next_EMA_df = next_EMA_df.round().astype(int)
next_EMA_df

### MAE

In [None]:
get_MAE(DataFrame_test, next_EMA_df)

## Holt’s Exponential Smoothing (Double Exponential Smoothing)

- Used for time series data with trend but no seasonality.

Math func:
```
Level : L(Xt) = a * Xt + (1 - a) * (L(Xt-1) + B(Xt-1))
Trend : B(Xt) = b *(L(Xt) - L(Xt-1)) + (1 - b) * B(Xt-1)
Forecast : F(Xt+h) = L(Xt) + h*B(Xt)
```

with:
- Xt : the actual value of the time series at time
- L(Xt) : estimated level at time t
- B(Xt) : estimated trend at time t
- F(Xt+h) : forecast for time t + h
- a, b : smoothing parameters
- h : steps ahead to forecast

In [None]:
def Level(df, level_df, trend_df, t, alpha):
    return alpha * df.iloc[t] + (1 - alpha) * (level_df.iloc[t-1] + trend_df.iloc[t-1])

def Trend(level_df, trend_df, t, beta):
    return beta * (level_df.iloc[t] - level_df.iloc[t-1]) + (1 - beta) * trend_df.iloc[t-1]

def Forecast(level_df, trend_df, t, h):
    if t - h < 0:
        raise ValueError("Forecast: Cannot index negative.")
    return level_df.iloc[t-h] + (h * trend_df.iloc[t-h])

In [None]:
DES_level = pd.DataFrame(index=DataFrame_train.index, columns=DataFrame_train.columns)
DES_trend = DES_level.copy()
DES_forcast = DES_level.copy()
DES_level.iloc[0] = DataFrame_train.iloc[0]
DES_trend.iloc[0] = DataFrame_train.iloc[1] - DataFrame_train.iloc[0]
DES_forcast.iloc[0] = DataFrame_train.iloc[0]

alpha = 0.1
beta = 0.1
h = 1

for t in range(1, len(DataFrame_train)):
    DES_level.iloc[t] = Level(DataFrame_train, DES_level, DES_trend, t, alpha)
    DES_trend.iloc[t] = Trend(DES_level, DES_trend, t, beta)
    if h <= t:
        DES_forcast.iloc[t] = Forecast(DES_level, DES_trend, t, h)

DES_level = None
DES_trend = None

DES_forcast


In [None]:
next_DES_df = pd.DataFrame(index=DataFrame_test.index, columns=DataFrame_test.columns)
next_DES_df.iloc[:] = DES_forcast.iloc[-1]
next_DES_df = next_DES_df.round().astype(int)

DES_forcast = None

next_DES_df

### MAE

In [None]:
get_MAE(DataFrame_test, next_DES_df)

## Holt-Winters Exponential Smoothing (Triple Exponential Smoothing)

- Used for time series data with trend and seasonality.

Math func:
```
Level : L(Xt) = a * (Xt - (S(Xt-m)) + (1 - a) * (L(Xt-1) + B(Xt-1))
Trend : B(Xt) = b *(L(Xt) - L(Xt-1)) + (1 - b) * B(Xt-1)
Seasonal : S(Xt) = g * ( Xt - L(Xt-1) - B(Xt-1) ) + (1 - g) * S(Xt-m)
Forecast : F(Xt+h) = L(Xt) + h*B(Xt) + S(Xt+h-m)
```

with:
- m : season length (e.g., 96 for daily seasonality with 15min intervals)
- g : seasonal smoothing parameter
- Xt : the actual value of the time series at time
- S(Xt) : seasonal component
- L(Xt) : estimated level at time t
- B(Xt) : estimated trend at time t
- F(Xt+h) : forecast for time t + h
- a, b : smoothing parameters
- h : steps ahead to forecast

In [None]:
def Level(df, level_df, trend_df, Seasonal_df, t, m, alpha):
    if t - m < 0:
        raise ValueError("Level: Cannot index negative.")
    return alpha * (df.iloc[t] - Seasonal_df.iloc[t-m]) + (1 - alpha) * (level_df.iloc[t-1] + trend_df.iloc[t-1])

def Trend(level_df, trend_df, t, beta):
    return beta * (level_df.iloc[t] - level_df.iloc[t-1]) + (1 - beta) * trend_df.iloc[t-1]

def Seasonal(df, level_df, trend_df, Seasonal_df, t, m, gamma):
    if t - m < 0:
        return 0
    return gamma * (df.iloc[t] - level_df.iloc[t-1] - trend_df.iloc[t-1]) + (1 - gamma) * Seasonal_df.iloc[t-m]

def Forecast(level_df, trend_df, Seasonal_df, t, m, h):
    if t + h - m < 0 or t - h < 0:
        raise ValueError("Forecast: Cannot index negative.")
    return level_df.iloc[t-h] + (h * trend_df.iloc[t-h]) + Seasonal_df.iloc[t+h-m]

In [None]:
TES_level = pd.DataFrame(index=DataFrame_train.index, columns=DataFrame_train.columns)
TES_trend = TES_level.copy()
TES_seasonal = TES_level.copy()
TES_forcast = TES_level.copy()

h = 1
m = 4
alpha = 0.1
beta = 0.1
gamma = 0.1

for i in range(m):
    TES_level.iloc[i] = DataFrame_train.iloc[i]
    TES_seasonal.iloc[i] = 1
    TES_forcast.iloc[i] = DataFrame_train.iloc[i]
    TES_trend.iloc[i] = DataFrame_train.iloc[i+1] - DataFrame_train.iloc[i]

for t in range(m, len(DataFrame_train)):
    TES_level.iloc[t] = Level(DataFrame_train, TES_level, TES_trend, TES_seasonal, t, m, alpha)
    TES_trend.iloc[t] = Trend(TES_level, TES_trend, t, beta)
    TES_seasonal.iloc[t] = Seasonal(DataFrame_train, TES_level, TES_trend, TES_seasonal, t, m, gamma)
    TES_forcast.iloc[t] = Forecast(TES_level, TES_trend, TES_seasonal, t, m, h)

TES_forcast

In [None]:
empty_df = pd.DataFrame(index=DataFrame_test.index, columns=DataFrame_test.columns)
next_TES_lvl_df = pd.concat([TES_level, empty_df], axis=0)
next_TES_trd_df = pd.concat([TES_trend, empty_df], axis=0)
next_TES_season_df = pd.concat([TES_seasonal, empty_df], axis=0)
next_TES_forc_df = pd.concat([TES_forcast, empty_df], axis=0)

last_index = len(TES_forcast)

for t in range(last_index, last_index + len(empty_df)):
    next_TES_lvl_df.iloc[t] = Level(next_TES_forc_df, next_TES_lvl_df, next_TES_trd_df, next_TES_season_df, t-1, m, alpha)
    next_TES_trd_df.iloc[t] = Trend(next_TES_lvl_df, next_TES_trd_df, t-1, beta)
    next_TES_season_df.iloc[t] = Seasonal(next_TES_forc_df, next_TES_lvl_df, next_TES_trd_df, next_TES_season_df, t-1, m, gamma)
    next_TES_forc_df.iloc[t] = Forecast(next_TES_lvl_df, next_TES_trd_df, next_TES_season_df, t-1, m, h)

next_TES_df = next_TES_forc_df.iloc[-len(empty_df):]
next_TES_df = next_TES_df.round().astype(int)

TES_level = None
TES_trend = None
TES_seasonal = None
TES_forcast = None
next_TES_lvl_df = None
next_TES_trd_df = None
next_TES_season_df = None
next_TES_forc_df = None

next_TES_df

### MAE

In [None]:
get_MAE(DataFrame_test, next_TES_df)

# ARIMA

### TODO: predict Next predictions using this func

In [None]:
future_timestamps = pd.date_range(start=DataFrame_train.index[-1] + pd.Timedelta(minutes=15), periods=num_intervals, freq='15min')