In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import prophet
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, GRU, RepeatVector, TimeDistributed
from keras.callbacks import EarlyStopping

In [None]:
# Load your data and rename # Date to Date
data = pd.read_csv('data_daily.csv')
data = data.rename(columns={'# Date': 'Date'})

In [None]:
# Scaling data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data['Receipt_Count'].values.reshape(-1, 1))

# Convert data to appropriate shape for LSTM
def create_dataset(dataset, look_back=30, forecast_horizon=30):
    X, Y = [], []
    for i in range(len(dataset) - look_back - forecast_horizon + 1):
        X.append(dataset[i:(i + look_back), 0])
        Y.append(dataset[i + look_back:i + look_back + forecast_horizon, 0])
    return np.array(X), np.array(Y)

X, y = create_dataset(scaled_data)

# Reshape input to be [samples, time steps, features]
X = X.reshape(X.shape[0], X.shape[1], 1)
y = y.reshape(y.shape[0], y.shape[1], 1)

# LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model.add(RepeatVector(30))  # Repeat the feature vector 30 times
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(1)))
model.compile(optimizer='adam', loss='mse')

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.fit(X, y, epochs=100, batch_size=1, verbose=1, callbacks=[early_stopping], validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100


ValueError: ignored

In [None]:
predictions_2022 = []

last_30_days = scaled_data[-30:]

for i in range(13):  # 13 chunks of predictions
    forecast = model.predict(last_30_days.reshape(1, 30, 1))
    forecast_original = scaler.inverse_transform(forecast[0])

    if i == 12:  # On the 13th loop, only take the first 5 days
        predictions_2022.extend(forecast_original[:5].flatten())
    else:
        predictions_2022.extend(forecast_original.flatten())

    # Update last_30_days to the most recently predicted values for the next iteration
    last_30_days = scaler.transform(forecast_original)  # Scale the forecasted values

# Ensure we have 365 days of predictions
assert len(predictions_2022) == 365

# Convert to DataFrame
forecast_dates = pd.date_range(start="2022-01-01", end="2022-12-31", freq='D')
forecast_df = pd.DataFrame({'Date': forecast_dates, 'Predictions': predictions_2022})

print(forecast_df)

# If you want monthly aggregates:
monthly_predictions = forecast_df.resample('M', on='Date').sum()
print(monthly_predictions)

          Date  Predictions
0   2022-01-01   10830395.0
1   2022-01-02    9989262.0
2   2022-01-03   10379571.0
3   2022-01-04   10257825.0
4   2022-01-05   10277565.0
..         ...          ...
360 2022-12-27   10597984.0
361 2022-12-28    9903116.0
362 2022-12-29   10227574.0
363 2022-12-30   10137037.0
364 2022-12-31   10152232.0

[365 rows x 2 columns]
            Predictions
Date                   
2022-01-31  312716608.0
2022-02-28  280440288.0
2022-03-31  310449920.0
2022-04-30  300502880.0
2022-05-31  311062368.0
2022-06-30  300448640.0
2022-07-31  310347168.0
2022-08-31  310669888.0
2022-09-30  300441696.0
2022-10-31  310578528.0
2022-11-30  300441408.0
2022-12-31  310593632.0


In [None]:
# Scaling data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data['Receipt_Count'].values.reshape(-1, 1))

# Convert data to appropriate shape for LSTM
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 360  # Using last 30 days to predict the next day
X, y = create_dataset(scaled_data, look_back)

# Reshape to [samples, time steps, features]
X = np.reshape(X, (X.shape[0], 1, X.shape[1]))

# LSTM model
model = Sequential()
model.add(LSTM(25, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(10 , return_sequences=True))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit(X, y, epochs=100, batch_size=1, verbose=1,callbacks=[early_stopping], validation_split=0.2)

# Predict daily data for 2022
start_values = scaled_data[-look_back:]
predictions = []

for i in range(365):  # For each day of 2022
    pred = model.predict(start_values.reshape(1, 1, look_back))
    predictions.append(pred[0,0])
    start_values = np.append(start_values[1:], pred)

# Invert predictions to original scale
predictions_original = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Convert to DataFrame to resample monthly
forecast_dates = pd.date_range(start="2022-01-01", end="2022-12-31", freq='D')
forecast_df = pd.DataFrame({'Date': forecast_dates, 'Predictions': predictions_original.flatten()})

# Aggregate to monthly data
monthly_predictions = forecast_df.resample('M', on='Date').sum()

print(monthly_predictions)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
            Predictions
Date                   
2022-01-31  315194144.0
2022-02-28  285877280.0
2022-03-31  317354048.0
2022-04-30  307747424.0
2022-05-31  318503488.0
2022-06-30  308559296.0
2022-07-31  319075008.0
2022-08-31  319274432.0
2022-09-30  309104992.0
2022-10-31  319514560.0
2022-11-30  309281600.0
2022-12-31  319634464.0


In [None]:
import plotly.express as px
import plotly.graph_objects as go

# Create the base line plot for 2021 data
fig = px.line(data, x='Date', y='Receipt_Count', title='Daily Receipts for 2021 and Predicted Monthly Receipts for 2022',
              labels={'Receipt_Count': 'Receipt Count'},
              template="plotly_dark")

# Overlay the predicted monthly data for 2022 as a red line
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Predictions'], mode='lines',
                         line=dict(color='red', width=2),

                         name='Predicted Monthly Receipts for 2022'))

# Enhance the layout
fig.update_layout(showlegend=True,
                  xaxis_title="Date",
                  yaxis_title="Receipt Count",
                  hovermode="x unified")

fig.show()
