In [218]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [219]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [220]:
# Set random seed for reproducibility
np.random.seed(6)
tf.random.set_seed(6)

In [221]:
df = pd.read_csv('/content/drive/MyDrive/fetch/data_daily.csv')

In [222]:
daily_receipts = np.array([df['Receipt_Count']], dtype=np.float32)

In [223]:
# Feature scaling
receipts_mean = daily_receipts.mean()
receipts_std = daily_receipts.std()

receipts_scaled = (daily_receipts - receipts_mean) / receipts_std

In [224]:
# Day indices and scaling
days = np.arange(1, 366, dtype=np.float32)

days_mean = days.mean()
days_std = days.std()

days_scaled = (days - days_mean) / days_std

In [225]:
# Model parameters
learning_rate = 0.01
epochs = 200

In [226]:
# Model variables
W_day = tf.Variable(np.random.randn(), name="weight_day")
b = tf.Variable(np.random.randn(), name="bias")

In [227]:
# Model
def linear_regression(receipts):
    return tf.add(tf.multiply(receipts, W_day), b)

In [228]:
# Loss function (mean squared error)
def mean_squared_error(y_pred, y_true):
    return tf.reduce_mean(tf.square(y_pred - y_true))

In [229]:
# Optimizer
optimizer = tf.optimizers.SGD(learning_rate)

In [230]:
# Training
for epoch in range(epochs):
    with tf.GradientTape() as tape:
        predictions = linear_regression(receipts_scaled)
        loss = mean_squared_error(predictions, receipts_scaled)

    gradients = tape.gradient(loss, [W_day, b])
    optimizer.apply_gradients(zip(gradients, [W_day, b]))

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}")

Epoch 10/200, Loss: 1.5655995607376099
Epoch 20/200, Loss: 1.0452067852020264
Epoch 30/200, Loss: 0.697788417339325
Epoch 40/200, Loss: 0.46584925055503845
Epoch 50/200, Loss: 0.3110046088695526
Epoch 60/200, Loss: 0.20762912929058075
Epoch 70/200, Loss: 0.13861484825611115
Epoch 80/200, Loss: 0.09254040569067001
Epoch 90/200, Loss: 0.061780642718076706
Epoch 100/200, Loss: 0.0412452332675457
Epoch 110/200, Loss: 0.0275356974452734
Epoch 120/200, Loss: 0.018383048474788666
Epoch 130/200, Loss: 0.01227265689522028
Epoch 140/200, Loss: 0.008193323388695717
Epoch 150/200, Loss: 0.00546992989256978
Epoch 160/200, Loss: 0.003651759820058942
Epoch 170/200, Loss: 0.0024379456881433725
Epoch 180/200, Loss: 0.00162759970407933
Epoch 190/200, Loss: 0.0010865948861464858
Epoch 200/200, Loss: 0.0007254189113155007


In [231]:
# Make predictions
predictions = linear_regression(receipts_scaled)

In [232]:
# Denormalize predictions
predictions = predictions * receipts_std + receipts_mean

In [233]:
predictions = predictions.numpy().flatten()

In [234]:
# Create a DataFrame with the dates for 2022 as the index
date_index = pd.date_range(start='2022-01-01', end='2022-12-31')
predictions_2022 = pd.DataFrame(data={'Predicted_Receipts': predictions}, index=date_index)

In [235]:
# get monthly totals
monthly_totals = predictions_2022.resample('M').sum()

In [236]:
monthly_totals

Unnamed: 0,Predicted_Receipts
2022-01-31,237898128.0
2022-02-28,220939296.0
2022-03-31,249496160.0
2022-04-30,251271728.0
2022-05-31,263703744.0
2022-06-30,261052752.0
2022-07-31,275059808.0
2022-08-31,284015552.0
2022-09-30,281069344.0
2022-10-31,295760128.0
