Let's load the autoformer model to check format our data should be in.

In [None]:
from huggingface_hub import hf_hub_download
import torch
from transformers import AutoformerForPrediction
import os

os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

file = hf_hub_download(repo_id="hf-internal-testing/tourism-monthly-batch",filename="train-batch.pt", repo_type="dataset")
# batch has type dict
batch = torch.load(file)

model = AutoformerForPrediction.from_pretrained("huggingface/autoformer-tourism-monthly")

In [None]:
batch.keys()

In [None]:
print(f"past_values shape: {batch['past_values'].shape}")
print(f"past_time_features shape: {batch['past_time_features'].shape}")
print(f"future_values shape: {batch['future_values'].shape}")
print(f"future_time_features shape: {batch['future_time_features'].shape}")
print(f"past_observed_mask shape: {batch['past_observed_mask'].shape}")
print(f"static_categorical_features shape: {batch['static_categorical_features'].shape}")
# we see the data is all torch tensors as we would expect

If mps is available to speed up our calculations - move all of the data for inference / training and the model to mps

In [None]:
device = torch.device('mps' if torch.backends.mps.is_available else 'cpu')

past_values = batch['past_values'].to(device)
past_time_features = batch['past_time_features'].to(device)
future_values = batch['future_values'].to(device)
future_time_features = batch['future_time_features'].to(device)
past_observed_mask = batch['past_observed_mask'].to(device)
static_categorical_features = batch['static_categorical_features'].to(device)

model.to(device)

In [None]:
def forward_with_cpu_fallback(model, device, **inputs):
    # Move model to CPU
    model_cpu = model.to('cpu')
    # Move inputs to CPU
    inputs_on_cpu = {k: v.to('cpu') for k, v in inputs.items()}
    # Perform forward pass on CPU
    with torch.no_grad():
        outputs = model_cpu(**inputs_on_cpu)
    # Move outputs back to the original device
    outputs_on_device = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in outputs.items()}
    return outputs_on_device

def generate_with_cpu_fallback(model, device, **inputs):
    # Move model to CPU
    model_cpu = model.to('cpu')
    # Move inputs to CPU
    inputs_on_cpu = {k: v.to('cpu') for k, v in inputs.items()}
    # Perform generate pass on CPU
    with torch.no_grad():
        outputs = model_cpu.generate(**inputs_on_cpu)
    # Move outputs back to the original device
    if isinstance(outputs, tuple):
        outputs_on_device = tuple([o.to(device) for o in outputs])
    elif isinstance(outputs, torch.Tensor):
        outputs_on_device = outputs.to(device)
    else:
        outputs_on_device = outputs
    return outputs_on_device


In [None]:
# inference to generate last hidden states output
outputs = forward_with_cpu_fallback(
    model,
    device,
    past_values=past_values,
    past_time_features=past_time_features,
    future_values=future_values,
    future_time_features=future_time_features,
    past_observed_mask=past_observed_mask,
    static_categorical_features=static_categorical_features
)


In [None]:
# inference to generate time series output
predictions = generate_with_cpu_fallback(
    model,
    device,
    past_values=past_values,
    past_time_features=past_time_features,
    past_observed_mask=past_observed_mask,
    static_categorical_features=static_categorical_features,
    future_time_features=future_time_features)


In [None]:
# the generated predictions are 100 sequences for each of the 64 output sequences. Each sequence has length 24
# we average along the 2nd dimension (get the mean of the 100 sequences for each time step in the output) leaving us 
# with shape [64,24]
print(predictions.sequences.shape)
print(predictions.sequences.mean(dim=1))
print(predictions.sequences.mean(dim=1).shape)

During training, one provides both past and future values as well as possible additional features
\
\
In particular let's supply past_values, past_time_features, past_observed_mask, static_categorical_features, future_values, and future_time_features

In [None]:
import pandas as pd

trans_df = pd.read_csv('/Users/tarikrashada/Downloads/Transactions - Sheet1.csv')
trans_df.head()

Let's experiment with sequence lengths. But the underlying question is, given transaction data covering the previous n months with a frequency of a few transactions per day can we predict the transactions that will occur in the next k days. In particular we want to know the approximate aggregate that will be spent.
\
\
It is possible that this is easier to predict but will require some model alterations. Let's start with what we have.

In [None]:
# e.g. how this will work
'''
obj = [1,2,3,4,5,6,7,8,9,10]
past_length = 3
future_length = 4
[1,2,3] -> [4,5,6,7]
[2,3,4] -> [5,6,7,8]
[3,4,5] -> [6,7,8,9]
[4,5,6] -> [7,8,9,10]

len of dataset = 4 = len(obj) - past_length - future_length + 1
'''


In [None]:
v = [1,2,3,4,5,6,7,8,9,10]
past_start = 0
past_length = 3
future_length = 4
past_end = past_start + past_length
future_end = past_end + future_length

print(v[past_start:past_end])
print(v[past_end:future_end])

In [None]:
# Reverse dataframe

trans_df.sort_values(by='Transaction Date',inplace=True)
trans_df.reset_index(inplace=True,drop=True)
trans_df

In [None]:
trans_df = trans_df.dropna(subset=['Debit'])
trans_df.reset_index(inplace=True,drop=True)

In [None]:
trans_df['Debit'].count()

In [None]:
trans_df.head()

In [None]:
trans_df.iloc[0:10]['Debit'].values

In [None]:
trans_df['Transaction Date']

In [None]:
trans_df['Transaction Date'] = pd.to_datetime(trans_df['Transaction Date'], format='%Y-%m-%d')

In [None]:
import numpy as np
days_in_months = np.array([0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31])

days_in_months[trans_df['Transaction Date'].dt.month]


Use sin and cos functions to generate day of the month cyclical time features. These help the model to understand month to month periods.

In [None]:
import numpy as np

days_in_months = np.array([0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31])

def generate_day_of_month_features(dates):
    
    day_cos = np.cos(2 * np.pi * dates.dt.day / days_in_months[dates.dt.month])
    day_sin = np.sin(2 * np.pi * dates.dt.day / days_in_months[dates.dt.month])
    
    date_strings = dates.dt.date.apply(lambda x: x.strftime('%Y-%m-%d'))
    return np.vstack([day_cos, day_sin]).T, np.vstack([date_strings])

def generate_age_features(length):
    
    age_features = np.arange(length) / length
    return age_features.reshape(-1,1)

def generate_time_features(dates):
    
    day_features, date_data = generate_day_of_month_features(dates)
    age_features = generate_age_features(len(dates))
    #return np.hstack([day_features, age_features])
    return day_features, date_data

After generating day of month and age features, we concatenate the features together.

In [None]:
day_of_month_features = generate_day_of_month_features(trans_df['Transaction Date'])
print(day_of_month_features)

In [None]:
generate_time_features(trans_df['Transaction Date'])

In [None]:
from torch.utils.data import Dataset,DataLoader
import torch

class TimeSeriesDataset(Dataset):
    def __init__(self,df,past_length,future_length):
        self.df = df
        self.past_length = past_length
        self.future_length = future_length
    
    # the standard len() works by calling an object's __len__ method
    # here we define our __len__ method to be the number of samples
    def __len__(self):
        return len(self.df) - (self.past_length + self.future_length) + 1
    
    # defines behavior for when an item is accessed self[key]
    def __getitem__(self,idx):
        past_start = idx
        past_end = idx + self.past_length
        future_end = past_end + self.future_length
        
        past_seq = self.df.iloc[past_start:past_end]
        fut_seq = self.df.iloc[past_end:future_end]
        
        past_values = past_seq['Debit'].values
        future_values = fut_seq['Debit'].values
        
        past_observed_mask = ~np.isnan(past_values)
        
        static_categorical_features = np.ones((1,))
        
        past_time_features, dates_past = generate_time_features(past_seq['Transaction Date'])
        future_time_features, dates_future = generate_time_features(fut_seq['Transaction Date'])
        
        return {
            'past_values': torch.tensor(past_values, dtype=torch.float32, requires_grad=True),
            'past_time_features': torch.tensor(past_time_features, dtype=torch.float32, requires_grad=True),
            'past_observed_mask': torch.tensor(past_observed_mask, dtype=torch.bool),
            'static_categorical_features': torch.tensor(static_categorical_features, dtype=torch.long),
            'future_values': torch.tensor(future_values, dtype=torch.float32, requires_grad=True),
            'future_time_features': torch.tensor(future_time_features, dtype=torch.float32, requires_grad=True),
            'dates_past': dates_past,
            'dates_future': dates_future
        }

past_length = 61
future_length = 24 

dataset = TimeSeriesDataset(trans_df,past_length,future_length)

train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_size = 64

def custom_collate_fn(batch):
    # Separate out the 'dates_past' and 'dates_future' which are lists of np.datetime64
    dates_past = [item['dates_past'] for item in batch]
    dates_future = [item['dates_future'] for item in batch]
    
    dates_past = np.array(dates_past).reshape(len(batch), -1)
    dates_future = np.array(dates_future).reshape(len(batch), -1)
    # Handle the rest of the data normally (convert to tensors)
    batch_no_dates = [{k: v for k, v in item.items() if k not in ['dates_past', 'dates_future']} for item in batch]
    
    # Use the default collate function for the rest
    collated_batch = torch.utils.data.default_collate(batch_no_dates)
    
    # Add back the dates
    collated_batch['dates_past'] = dates_past
    collated_batch['dates_future'] = dates_future
    
    
    return collated_batch

train_loader = DataLoader(train_dataset, batch_size, shuffle=False, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False, collate_fn=custom_collate_fn)

for batch in train_loader:
    print(f"past_values shape: {batch['past_values'].shape}")
    print(f"future_values shape: {batch['future_values'].shape}")
    print(f"past_time_features shape: {batch['past_time_features'].shape}")
    print(f"future_time_features shape: {batch['future_time_features'].shape}")
    print(f"static_categorical_features shape: {batch['static_categorical_features'].shape}")
    print(f"past_observed_mask shape: {batch['past_observed_mask'].shape}")
    print(f"dates_past shape: {batch['dates_past'].shape}")
    print(f"dates_future shape: {batch['dates_future'].shape}")
    break

num_time_features = batch['past_time_features'].shape[-1]
        

In [None]:
batch['dates_future'][4]

Initially tried get_scheduler and copied arguments from hugging face's fine-tune a model article.\
Other things we can try - changing the optimizer, adjusting batch_size or other parts of config

In [None]:
import os
import torch
from transformers import get_scheduler, AutoformerForPrediction
from torch.optim.lr_scheduler import ReduceLROnPlateau

model_ckpt = "huggingface/autoformer-tourism-monthly"
model = AutoformerForPrediction.from_pretrained(model_ckpt)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay = 1e-5)
num_epochs = 10
num_training_steps = num_epochs * len(train_loader)

lr_scheduler = ReduceLROnPlateau(optimizer = optimizer, mode = 'min', factor = 0.1, patience = 2, verbose = True)

device = torch.device('cpu')


In [None]:
for k,v in batch.items():
    print(k)

In [None]:
from tqdm.auto import tqdm

progress_bar = tqdm(range(num_training_steps))

def evaluate(model, test_loader):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in test_loader:
            input_batch = { k : v for k, v in batch.items()}
            outputs = model(
                past_values=input_batch['past_values'],
                past_time_features=input_batch['past_time_features'],
                past_observed_mask=input_batch['past_observed_mask'],
                static_categorical_features=input_batch['static_categorical_features'],
                future_values=input_batch['future_values'],
                future_time_features=input_batch['future_time_features'])
            
            loss = outputs.loss
            val_loss += loss.item()
    return val_loss / len(test_loader)

model.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    running_loss = 0.0
    for step, batch in enumerate(train_loader):
        input_batch = { k : v for k, v in batch.items()}
        outputs = model(
            past_values=input_batch['past_values'],
            past_time_features=input_batch['past_time_features'],
            past_observed_mask=input_batch['past_observed_mask'],
            static_categorical_features=input_batch['static_categorical_features'],
            future_values=input_batch['future_values'],
            future_time_features=input_batch['future_time_features'])
        
        loss = outputs.loss
        loss.backward()
        
        optimizer.step()
        optimizer.zero_grad()
        
        running_loss += loss.item()
        epoch_loss += loss.item()
        progress_bar.update(1)
        
        if (step + 1) % 1 == 0:
            tqdm.write(f"Epoch [{epoch + 1}/{num_epochs}], Step[{step + 1}/{len(train_loader)}], Loss: {running_loss}")
            running_loss = 0.0
            
    val_avg_loss = evaluate(model,test_loader)
    lr_scheduler.step(val_avg_loss)
    tqdm.write(f"Epoch [{epoch + 1}/{num_epochs}] completed, Validation Loss: {val_avg_loss}")
    
progress_bar.close()
    
            
        

In [None]:
past_vals = []
future_vals = []
predictions_mean_all = []
predictions_min_all = []
predictions_max_all = []
predictions_median_all = []
predictions_65th_all = []
predictions_75th_all = []
predictions_85th_all = []
past_dates = []
future_dates = []
for batch in train_loader:
    
    sums_pred = []
    sums_target = []
    
    input_batch = { k : v for k, v in batch.items()}
    outputs = model.generate(
        past_values=input_batch['past_values'],
        past_time_features=input_batch['past_time_features'],
        past_observed_mask=input_batch['past_observed_mask'],
        static_categorical_features=input_batch['static_categorical_features'],
        future_time_features=input_batch['future_time_features'],
    )
    
    past_vals.append(input_batch['past_values'])
    future_vals.append(input_batch['future_values'])
    past_dates.append(input_batch['dates_past'])
    future_dates.append(input_batch['dates_future'])
    
    predictions_85th = torch.quantile(outputs.sequences, 0.85, dim=1)
    predictions_75th = torch.quantile(outputs.sequences, 0.75, dim=1)
    predictions_65th = torch.quantile(outputs.sequences, 0.65, dim=1)
    predictions_min = outputs.sequences.min(dim=1)[0]
    predictions_mean = outputs.sequences.mean(dim=1)
    predictions_max = outputs.sequences.max(dim=1)[0]
    predictions_median = outputs.sequences.median(dim=1)[0]
    
    predictions_65th_all.append(predictions_65th)
    predictions_75th_all.append(predictions_75th)
    predictions_85th_all.append(predictions_85th)
    predictions_min_all.append(predictions_min)
    predictions_mean_all.append(predictions_mean)
    predictions_max_all.append(predictions_max)
    predictions_median_all.append(predictions_median)
    



In [None]:
batch_residuals_min = []
batch_residuals_mean = []
batch_residuals_max = []
batch_residuals_median = []
for i in range(len(predictions_mean_all)):
    batch_target = future_vals[i]
    sum_target = batch_target.sum(dim=1)
    
    batch_prediction_min = predictions_min_all[i]
    batch_prediction_mean = predictions_mean_all[i]
    batch_prediction_max = predictions_max_all[i]
    batch_prediction_median = predictions_median_all[i]
    
    sum_pred_min = batch_prediction_min.sum(dim=1)
    sum_pred_mean = batch_prediction_mean.sum(dim=1)
    sum_pred_max = batch_prediction_max.sum(dim=1)
    sum_pred_median = batch_prediction_median.sum(dim=1)
    
    residuals_min = (sum_pred_min - sum_target).detach()
    residuals_mean = (sum_pred_mean - sum_target).detach()
    residuals_max = (sum_pred_max - sum_target).detach()
    residuals_median = (sum_pred_median - sum_target).detach()
    
    batch_residuals_min.append(residuals_min)
    batch_residuals_mean.append(residuals_mean)
    batch_residuals_max.append(residuals_max)
    batch_residuals_median.append(residuals_median)

There are 12 batches in train_dataloader. Each batch has size 64.

The future vals has elements that are the input_batch['future_values'] for each batch, that is the targets for each of the train_loader batches

These histograms depict the residual per predicted sequence in each batch from the train_dataloader. So if there are 64 batches it will sum up all of the transanction values and take the difference between this value and the sum of all of the future values in the batch. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

for residual in batch_residuals_mean:
    plt.figure(figsize=(10,6))
    plt.hist(residual, bins=30, edgecolor='k', alpha=0.7)
    plt.title('Histogram of Residuals')
    plt.xlabel('Residual')
    plt.ylabel('Frequency')
    plt.show()


Here we want to take an input sequence and get the corresponding future sequence for it. Then plot these for any pair of input sequence, future value sequence - for one sequence in one batch. 

There are 12 batches. Each batch has size 64. The past lengths are 61 and the future lengths are 24

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def prediction_plot(sequence_index, batch_index):
    past_vals_plot = past_vals[batch_index][sequence_index].detach().numpy()
    past_dates_plot = past_dates[batch_index][sequence_index]

    future_vals_plot = future_vals[batch_index][sequence_index].detach().numpy()
    future_dates_plot = future_dates[batch_index][sequence_index]

    predictions_mean_plot = predictions_mean_all[batch_index][sequence_index].numpy()
    predictions_max_plot = predictions_max_all[batch_index][sequence_index].numpy()
    predictions_median_plot = predictions_median_all[batch_index][sequence_index].numpy()
    predictions_65_plot = predictions_65th_all[batch_index][sequence_index].numpy()
    predictions_75_plot = predictions_75th_all[batch_index][sequence_index].numpy()
    predictions_85_plot = predictions_85th_all[batch_index][sequence_index].numpy()
    
    all_values_mean = np.concatenate([past_vals_plot, predictions_mean_plot])
    all_values_max = np.concatenate([past_vals_plot, predictions_max_plot])
    all_values_median = np.concatenate([past_vals_plot, predictions_median_plot])
    all_dates = np.concatenate([past_dates_plot, future_dates_plot])

    plt.figure(figsize=(10, 6))

    # Plot the actual past transactions
    plt.plot(past_dates_plot, np.cumsum(past_vals_plot), label="Actual Past Transactions", color='blue')

    # Plot the predicted future transactions (Mean)
    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_mean_plot), label="Predicted Future Transactions (Mean)", color='orange', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(future_vals_plot), label="Predicted Future Transactions (Actual)", color='blue', linestyle='--')

    #plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_65_plot), label="Predicted Future Transactions (65th Percentile)", color='green', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_75_plot), label="Predicted Future Transactions (75th Percentile)", color='purple', linestyle='--')
    
    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_85_plot), label="Predicted Future Transactions (85th Percentile)", color='black', linestyle='--')

    # Plot the predicted future transactions (Max)
    #plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_max_plot), label="Predicted Future Transactions (Max)", color='red', linestyle='--')

    plt.xlabel('Date')
    plt.ylabel('Cumulative Transaction Amount')
    plt.title('Actual and Predicted Transactions (Mean and Max)')
    plt.legend()
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.show()
    result_dict = {"Mean" : np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_mean_plot)[-1], "75th" : np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_75_plot)[-1], "85th" : np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_85_plot)[-1]}
    return result_dict

In [None]:
transact_expense_predictions = prediction_plot(sequence_index=0, batch_index=0)

We can take the total amount we expect a customer to spend over the whole time period (starting from past_values through to the end of the prediction window) and subtract this from the income over this period or the balance at the beginning of the period

For simplicity let's say this amount is 3k NGN then we can make a dictionary for amount left over for each prediction scenario

In [None]:
income_over_period = 3000
curr_bal = 700
thresh = 0.90

In [None]:
savings_potential = {key : income_over_period - value for (key,value) in transact_expense_predictions.items()}
savings_potential

In [None]:
withdraw_amount = min(savings_potential['75th'], curr_bal * thresh)
withdraw_amount

Let's see if we can change the configuration and make this work

In [None]:
def generate_time_features_(dates):
    
    day_features = generate_day_of_month_features(dates)
    age_features = generate_age_features(len(dates))
    return np.hstack([day_features, age_features])

In [None]:
from torch.utils.data import Dataset,DataLoader
import torch

past_length = 100
future_length = 80

dataset = TimeSeriesDataset(trans_df,past_length,future_length)

train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size, shuffle=False, collate_fn=custom_collate_fn)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False, collate_fn=custom_collate_fn)

for batch in train_loader:
    print(f"past_values shape: {batch['past_values'].shape}")
    print(f"future_values shape: {batch['future_values'].shape}")
    print(f"past_time_features shape: {batch['past_time_features'].shape}")
    print(f"future_time_features shape: {batch['future_time_features'].shape}")
    print(f"static_categorical_features shape: {batch['static_categorical_features'].shape}")
    print(f"past_observed_mask shape: {batch['past_observed_mask'].shape}")
    print(f"dates_past shape: {batch['dates_past'].shape}")
    print(f"dates_future shape: {batch['dates_future'].shape}")
    break

num_time_features = batch['past_time_features'].shape[-1]
        

In [None]:
# reconfigure for longer context and prediction windows
from transformers import get_scheduler, AutoformerForPrediction, AutoformerConfig
from torch.optim.lr_scheduler import ReduceLROnPlateau
from huggingface_hub import hf_hub_download
import torch

config2 = AutoformerConfig.from_pretrained('huggingface/autoformer-tourism-monthly')
# future values has length 80
# past values has length 100

# sequence length of past values must be larger than the context_length of the model
# and the past sequence length = context_length + max(lags_sequence)
config2.context_length = 63
config2.prediction_length = 80

model2 = AutoformerForPrediction.from_pretrained('huggingface/autoformer-tourism-monthly',
                                                config=config2,
                                                ignore_mismatched_sizes=True)

# num_features (length of time_features) = num_time_features + num_dynamic_real_features


In [None]:
import os
import torch
from transformers import get_scheduler, AutoformerForPrediction
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm.auto import tqdm

optimizer = torch.optim.Adam(model2.parameters(), lr=1e-4, weight_decay = 1e-5)
num_epochs = 10
num_training_steps = num_epochs * len(train_loader)

lr_scheduler = ReduceLROnPlateau(optimizer = optimizer, mode = 'min', factor = 0.1, patience = 2, verbose = True)

device = torch.device('cpu')


progress_bar = tqdm(range(num_training_steps))

def evaluate(model, test_loader):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for batch in test_loader:
            input_batch = { k : v for k, v in batch.items()}
            outputs = model(
                past_values=input_batch['past_values'],
                past_time_features=input_batch['past_time_features'],
                past_observed_mask=input_batch['past_observed_mask'],
                static_categorical_features=input_batch['static_categorical_features'],
                future_values=input_batch['future_values'],
                future_time_features=input_batch['future_time_features'])
            
            loss = outputs.loss
            val_loss += loss.item()
    return val_loss / len(test_loader)

model2.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    running_loss = 0.0
    for step, batch in enumerate(train_loader):
        input_batch = { k : v for k, v in batch.items() }
        outputs = model2(
            past_values=input_batch['past_values'],
            past_time_features=input_batch['past_time_features'],
            past_observed_mask=input_batch['past_observed_mask'],
            static_categorical_features=input_batch['static_categorical_features'],
            future_values=input_batch['future_values'],
            future_time_features=input_batch['future_time_features'])
        
        loss = outputs.loss
        loss.backward()
        
        optimizer.step()
        optimizer.zero_grad()
        
        running_loss += loss.item()
        epoch_loss += loss.item()
        progress_bar.update(1)
        
        if (step + 1) % 10 == 0:
            avg_loss = running_loss / 10
            tqdm.write(f"Epoch [{epoch + 1}/{num_epochs}], Step[{step + 1}/{len(train_loader)}], Loss: {avg_loss}")
            running_loss = 0.0
            
    val_avg_loss = evaluate(model2,test_loader)
    lr_scheduler.step(val_avg_loss)
    
    epoch_avg_loss = epoch_loss / len(train_loader)
    tqdm.write(f"Epoch [{epoch + 1}/{num_epochs}] completed, Training Loss: {epoch_avg_loss}, Validation Loss: {val_avg_loss}")
    
progress_bar.close()
    
            
        


In [None]:
past_vals = []
future_vals = []
predictions_mean_all = []
predictions_min_all = []
predictions_max_all = []
predictions_median_all = []
predictions_65th_all = []
predictions_75th_all = []
predictions_85th_all = []
past_dates = []
future_dates = []
for batch in train_loader:
    
    sums_pred = []
    sums_target = []
    
    input_batch = { k : v for k, v in batch.items()}
    outputs = model2.generate(
        past_values=input_batch['past_values'],
        past_time_features=input_batch['past_time_features'],
        past_observed_mask=input_batch['past_observed_mask'],
        static_categorical_features=input_batch['static_categorical_features'],
        future_time_features=input_batch['future_time_features'],
    )
    
    past_vals.append(input_batch['past_values'])
    future_vals.append(input_batch['future_values'])
    past_dates.append(input_batch['dates_past'])
    future_dates.append(input_batch['dates_future'])
    
    predictions_85th = torch.quantile(outputs.sequences, 0.85, dim=1)
    predictions_75th = torch.quantile(outputs.sequences, 0.75, dim=1)
    predictions_65th = torch.quantile(outputs.sequences, 0.65, dim=1)
    predictions_min = outputs.sequences.min(dim=1)[0]
    predictions_mean = outputs.sequences.mean(dim=1)
    predictions_max = outputs.sequences.max(dim=1)[0]
    predictions_median = outputs.sequences.median(dim=1)[0]
    
    predictions_65th_all.append(predictions_65th)
    predictions_75th_all.append(predictions_75th)
    predictions_85th_all.append(predictions_85th)
    predictions_min_all.append(predictions_min)
    predictions_mean_all.append(predictions_mean)
    predictions_max_all.append(predictions_max)
    predictions_median_all.append(predictions_median)
    



In [None]:
len(past_dates)

In [None]:
past_dates[0][0]

In [None]:
future_dates[0][3]

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def prediction_plot(sequence_index, batch_index):
    past_vals_plot = past_vals[batch_index][sequence_index].detach().numpy()
    past_dates_plot = past_dates[batch_index][sequence_index]

    future_vals_plot = future_vals[batch_index][sequence_index].detach().numpy()
    future_dates_plot = future_dates[batch_index][sequence_index]

    predictions_mean_plot = predictions_mean_all[batch_index][sequence_index].numpy()
    predictions_max_plot = predictions_max_all[batch_index][sequence_index].numpy()
    predictions_median_plot = predictions_median_all[batch_index][sequence_index].numpy()
    predictions_65_plot = predictions_65th_all[batch_index][sequence_index].numpy()
    predictions_75_plot = predictions_75th_all[batch_index][sequence_index].numpy()
    predictions_85_plot = predictions_85th_all[batch_index][sequence_index].numpy()
    
    all_values_mean = np.concatenate([past_vals_plot, predictions_mean_plot])
    all_values_max = np.concatenate([past_vals_plot, predictions_max_plot])
    all_values_median = np.concatenate([past_vals_plot, predictions_median_plot])
    all_dates = np.concatenate([past_dates_plot, future_dates_plot])

    plt.figure(figsize=(10, 6))

    # Plot the actual past transactions
    plt.plot(past_dates_plot, np.cumsum(past_vals_plot), label="Actual Past Transactions", color='blue')

    # Plot the predicted future transactions (Mean)
    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_mean_plot), label="Predicted Future Transactions (Mean)", color='orange', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(future_vals_plot), label="Predicted Future Transactions (Actual)", color='blue', linestyle='--')

    #plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_65_plot), label="Predicted Future Transactions (65th Percentile)", color='green', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_75_plot), label="Predicted Future Transactions (75th Percentile)", color='purple', linestyle='--')
    
    plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_85_plot), label="Predicted Future Transactions (85th Percentile)", color='black', linestyle='--')

    # Plot the predicted future transactions (Max)
    #plt.plot(future_dates_plot, np.cumsum(past_vals_plot)[-1] + np.cumsum(predictions_max_plot), label="Predicted Future Transactions (Max)", color='red', linestyle='--')

    plt.xlabel('Date')
    plt.ylabel('Cumulative Transaction Amount')
    plt.title('Actual and Predicted Transactions Past and Future Window')
    plt.legend()
    plt.xticks(rotation=90)
    plt.grid(True)
    plt.show()
    
    plt.plot(future_dates_plot, np.cumsum(predictions_mean_plot), label="Predicted Future Transactions (Mean)", color='orange', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(future_vals_plot), label="Predicted Future Transactions (Actual)", color='blue', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(predictions_65_plot), label="Predicted Future Transactions (65th Percentile)", color='green', linestyle='--')

    plt.plot(future_dates_plot, np.cumsum(predictions_75_plot), label="Predicted Future Transactions (75th Percentile)", color='purple', linestyle='--')
    
    plt.plot(future_dates_plot, np.cumsum(predictions_85_plot), label="Predicted Future Transactions (85th Percentile)", color='black', linestyle='--')
    
    plt.xlabel('Date')
    plt.ylabel('Cumulative Predicted Transaction Amount')
    plt.title('Actual and Predicted Transactions Future Window')
    plt.legend()
    plt.xticks(rotation=90)
    plt.grid(True)
    plt.show()
    
    result_dict = {"Mean" : np.cumsum(predictions_mean_plot)[-1], "65th" : np.cumsum(predictions_65_plot)[-1], "75th" : np.cumsum(predictions_75_plot)[-1], "85th" : np.cumsum(predictions_85_plot)[-1]}
    return result_dict

In [None]:
prediction_cum_sums = prediction_plot(1,5)

In [None]:
prediction_cum_sums

In [None]:
savings_potential = {key : 3100 - value for key,value in prediction_cum_sums.items()}

In [None]:
savings_potential

So let's conclude that depending on how aggressive this customer wants to be with their saving - they can save between $1000 and $600 in this month. We will adjust these values down by some buffer. Each week we will take the transaction data, cumulatively sum it up and adjust down or up based on the current trend in their spending and how it compares to what our model is predicting

Next we will format the savings tab, move this into some Python file to create a Flask app containing the model so that we can make an API call to the endpoint that serves the model and send the predictions as a response to the frontend.