In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/fetch/data_daily.csv')

In [None]:
daily_receipts = np.array([df['Receipt_Count']], dtype=np.float32)

In [None]:
# Feature scaling
receipts_mean = daily_receipts.mean()
receipts_std = daily_receipts.std()

daily_receipts = (daily_receipts - receipts_mean) / receipts_std

In [None]:
# Day indices and scaling
days = np.arange(1, 366, dtype=np.float32)

days_mean = days.mean()
days_std = days.std()

days = (days - days_mean) / days_std

In [None]:
# Model parameters
learning_rate = 0.01
epochs = 1000

In [None]:
# Model variables
W_day = tf.Variable(np.random.randn(), name="weight_day")
b = tf.Variable(np.random.randn(), name="bias")

In [None]:
# Model
def linear_regression(day):
    return tf.add(tf.multiply(day, W_day), b)

In [None]:
# Loss function (mean squared error)
def mean_squared_error(y_pred, y_true):
    return tf.reduce_mean(tf.square(y_pred - y_true))

In [None]:
# Optimizer
optimizer = tf.optimizers.SGD(learning_rate)

In [None]:
# Training
for epoch in range(epochs):
    with tf.GradientTape() as tape:
        predictions = linear_regression(days)
        loss = mean_squared_error(predictions, daily_receipts)

    gradients = tape.gradient(loss, [W_day, b])
    optimizer.apply_gradients(zip(gradients, [W_day, b]))

    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.numpy()}")

Epoch 100/1000, Loss: 0.09472983330488205
Epoch 200/1000, Loss: 0.07744564861059189
Epoch 300/1000, Loss: 0.0771416500210762
Epoch 400/1000, Loss: 0.07713630795478821
Epoch 500/1000, Loss: 0.07713621109724045
Epoch 600/1000, Loss: 0.07713621109724045
Epoch 700/1000, Loss: 0.07713621109724045
Epoch 800/1000, Loss: 0.07713621109724045
Epoch 900/1000, Loss: 0.07713621109724045
Epoch 1000/1000, Loss: 0.07713621109724045


In [None]:
# Make predictions
predictions = linear_regression(days)

In [None]:
# Denormalize predictions
predictions = predictions * receipts_std + receipts_mean

In [None]:
# Create a DataFrame with the dates for 2022 as the index
date_index = pd.date_range(start='2022-01-01', end='2022-12-31')
predictions_2022 = pd.DataFrame(data={'Predicted_Receipts': predictions}, index=date_index)

In [None]:
# get monthly totals
monthly_totals = predictions_2022.resample('M').sum()

In [None]:
monthly_totals

Unnamed: 0,Predicted_Receipts
2022-01-31,236763008.0
2022-02-28,219731632.0
2022-03-31,249785600.0
2022-04-30,248242848.0
2022-05-31,263249632.0
2022-06-30,261272560.0
2022-07-31,276713664.0
2022-08-31,283556032.0
2022-09-30,280923904.0
2022-10-31,297020064.0
