In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabpfn import TabPFNRegressor
import torch

In [3]:

df = pd.read_csv('C:/Kenil Ramani/QB/GoalBot/CSV/dataset_v4.05.csv') 
df['invoiceDate'] = pd.to_datetime(df['invoiceDate'], errors='coerce')
df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
# df = df.dropna(subset=['invoiceDate', 'amount', 'productId'])

product_id = 246 
df = df[df['productId'] == product_id]

# Create features
df['date_num'] = df['invoiceDate'].astype(np.int64) // 10**9
df['month'] = df['invoiceDate'].dt.month
df['day'] = df['invoiceDate'].dt.day
df['dow'] = df['invoiceDate'].dt.dayofweek

train_df = df[(df['invoiceDate'] >= '2021-01-01') & (df['invoiceDate'] <= '2023-12-31')].copy()
test_df = df[(df['invoiceDate'] >= '2024-01-01') & (df['invoiceDate'] <= '2024-12-31')].copy()


features = ['date_num', 'month', 'day', 'dow']
X_train = train_df[features].values
y_train = train_df['amount'].values
X_test = test_df[features].values
y_true = test_df['amount'].values

# Train TabPFNRegressor
model = TabPFNRegressor(device='cuda')  # uses GPU
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Metrics
def smape(y_true, y_pred):
    denom = (np.abs(y_true) + np.abs(y_pred)) / 2
    return np.mean(np.where(denom == 0, 0, np.abs(y_true - y_pred) / denom)) * 100

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

print(f"SMAPE: {smape(y_true, y_pred):.2f}%")
print(f"MAPE: {safe_mape(y_true, y_pred):.2f}%")

# Plot
# plt.figure(figsize=(14, 6))
# plt.plot(train_df['invoiceDate'], train_df['amount'], label='Historical Sales (2021–2023)', alpha=0.5)
# plt.plot(test_df['invoiceDate'], y_true, label='Actual Sales (2024)', color='green')
# plt.plot(test_df['invoiceDate'], y_pred, label='Predicted Sales (2024)', color='red', linestyle='--')
# plt.xlabel("Date")
# plt.ylabel("Amount")
# plt.title(f"Sales Prediction for Product ID: {product_id}")
# plt.legend()
# plt.tight_layout()
# plt.show()


SMAPE: 79.24%
MAPE: 168.93%


Prediction for 6 months

In [None]:
df = pd.read_csv('C:/Kenil Ramani/QB/GoalBot/CSV/dataset_v4.05.csv') 
df['invoiceDate'] = pd.to_datetime(df['invoiceDate'], errors='coerce')
df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
# df = df.dropna(subset=['invoiceDate', 'amount', 'productId'])

product_id = 50 
df = df[df['productId'] == product_id]

# Create features
df['date_num'] = df['invoiceDate'].astype(np.int64) // 10**9
df['month'] = df['invoiceDate'].dt.month
df['day'] = df['invoiceDate'].dt.day
df['dow'] = df['invoiceDate'].dt.dayofweek

train_df = df[(df['invoiceDate'] >= '2021-01-01') & (df['invoiceDate'] <= '2023-12-31')].copy()
test_df = df[(df['invoiceDate'] >= '2024-01-01') & (df['invoiceDate'] <= '2024-06-30')].copy()


features = ['date_num', 'month', 'day', 'dow']
X_train = train_df[features].values
y_train = train_df['amount'].values
X_test = test_df[features].values
y_true = test_df['amount'].values

# Train TabPFNRegressor
model = TabPFNRegressor(device='cuda')  # uses GPU
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Metrics
def smape(y_true, y_pred):
    denom = (np.abs(y_true) + np.abs(y_pred)) / 2
    return np.mean(np.where(denom == 0, 0, np.abs(y_true - y_pred) / denom)) * 100

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

print(f"SMAPE: {smape(y_true, y_pred):.2f}%")
print(f"MAPE: {safe_mape(y_true, y_pred):.2f}%")

# Plot
# plt.figure(figsize=(14, 6))
# plt.plot(train_df['invoiceDate'], train_df['amount'], label='Historical Sales (2021–2023)', alpha=0.5)
# plt.plot(test_df['invoiceDate'], y_true, label='Actual Sales (2024)', color='green')
# plt.plot(test_df['invoiceDate'], y_pred, label='Predicted Sales (2024)', color='red', linestyle='--')
# plt.xlabel("Date")
# plt.ylabel("Amount")
# plt.title(f"Sales Prediction for Product ID: {product_id}")
# plt.legend()
# plt.tight_layout()
# plt.show()

SMAPE: 79.51%
MAPE: 90.21%


Dynamic

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabpfn import TabPFNRegressor
import torch
from dateutil.relativedelta import relativedelta

months_to_predict = 3  # Change this to 1, 3, 6, or 9 as needed

df = pd.read_csv('C:/Kenil Ramani/QB/GoalBot/CSV/dataset_v4.05.csv') 
df['invoiceDate'] = pd.to_datetime(df['invoiceDate'], errors='coerce')
df['amount'] = pd.to_numeric(df['amount'], errors='coerce')

product_id = 283 
df = df[df['productId'] == product_id]

df['date_num'] = df['invoiceDate'].astype(np.int64) // 10**9
df['month'] = df['invoiceDate'].dt.month
df['day'] = df['invoiceDate'].dt.day
df['dow'] = df['invoiceDate'].dt.dayofweek

train_df = df[(df['invoiceDate'] >= '2021-01-01') & (df['invoiceDate'] <= '2023-12-31')].copy()

start_test = pd.to_datetime('2024-01-01')
end_test = start_test + relativedelta(months=months_to_predict) - pd.Timedelta(days=1)

test_df = df[(df['invoiceDate'] >= start_test) & (df['invoiceDate'] <= end_test)].copy()

features = ['date_num', 'month', 'day', 'dow']
X_train = train_df[features].values
y_train = train_df['amount'].values
X_test = test_df[features].values
y_true = test_df['amount'].values

model = TabPFNRegressor(device='cuda')  # uses GPU
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

def smape(y_true, y_pred):
    denom = (np.abs(y_true) + np.abs(y_pred)) / 2
    return np.mean(np.where(denom == 0, 0, np.abs(y_true - y_pred) / denom)) * 100

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

print(f"Predicting from {start_test.date()} to {end_test.date()}")
print(f"SMAPE: {smape(y_true, y_pred):.2f}%")
print(f"MAPE: {safe_mape(y_true, y_pred):.2f}%")


Predicting from 2024-01-01 to 2024-03-31
SMAPE: 86.59%
MAPE: 186.64%


In [2]:
df_check = pd.DataFrame({'Actual': y_true, 'Predicted': y_pred})
print(df_check.head(10))

     Actual    Predicted
0   2196.40  3864.175049
1    109.82  3655.125000
2   2196.40  3585.794434
3    439.28  3655.125000
4      0.00  3655.125000
5   5271.36  3683.448242
6   3514.24  3683.448242
7      0.00  3493.676270
8  14056.96  3920.161377
9   1976.76  3655.125000


In [20]:
print(f"Min amount: {y_true.min()}, Max amount: {y_true.max()}")

Min amount: 0.0, Max amount: 7714.5


AutoTabPFN

In [None]:
# !git clone https://github.com/priorlabs/tabpfn-extensions.git
# !pip install -e tabpfn-extensions
# Clone and install the repository
# !pip install "tabpfn-extensions[all] @ git+https://github.com/PriorLabs/tabpfn-extensions.git"

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# from tabpfn import AutoTabPFNRegressor
from tabpfn_extensions import AutoTabPFNRegressor
import torch
from dateutil.relativedelta import relativedelta

  import pkg_resources


In [2]:
months_to_predict = 1  # Change this to 3, 6, or 9 as needed

df = pd.read_csv('C:/Kenil Ramani/QB/GoalBot/CSV/dataset_v4.05.csv') 
df['invoiceDate'] = pd.to_datetime(df['invoiceDate'], errors='coerce')
df['amount'] = pd.to_numeric(df['amount'], errors='coerce')

product_id = 283 
df = df[df['productId'] == product_id]

df['date_num'] = df['invoiceDate'].astype(np.int64) // 10**9
df['month'] = df['invoiceDate'].dt.month
df['day'] = df['invoiceDate'].dt.day
df['dow'] = df['invoiceDate'].dt.dayofweek

train_df = df[(df['invoiceDate'] >= '2021-01-01') & (df['invoiceDate'] <= '2023-12-31')].copy()

start_test = pd.to_datetime('2024-01-01')
end_test = start_test + relativedelta(months=months_to_predict) - pd.Timedelta(days=1)

test_df = df[(df['invoiceDate'] >= start_test) & (df['invoiceDate'] <= end_test)].copy()

features = ['date_num', 'month', 'day', 'dow']
X_train = train_df[features].values
y_train = train_df['amount'].values
X_test = test_df[features].values
y_true = test_df['amount'].values

model = AutoTabPFNRegressor(device='cuda')  # uses GPU
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

def smape(y_true, y_pred):
    denom = (np.abs(y_true) + np.abs(y_pred)) / 2
    return np.mean(np.where(denom == 0, 0, np.abs(y_true - y_pred) / denom)) * 100

def safe_mape(y_true, y_pred):
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

print(f"Predicting from {start_test.date()} to {end_test.date()}")
print(f"SMAPE: {smape(y_true, y_pred):.2f}%")
print(f"MAPE: {safe_mape(y_true, y_pred):.2f}%")


INFO:tabpfn_extensions.post_hoc_ensembles.greedy_weighted_ensemble:Order of selections: [np.int64(0), np.int64(1), np.int64(0), np.int64(1), np.int64(0), np.int64(0), np.int64(1), np.int64(0), np.int64(1), np.int64(0), np.int64(0), np.int64(1), np.int64(0), np.int64(1), np.int64(0), np.int64(0), np.int64(1), np.int64(0), np.int64(1), np.int64(0), np.int64(0), np.int64(1), np.int64(0), np.int64(1), np.int64(0)]
INFO:tabpfn_extensions.post_hoc_ensembles.greedy_weighted_ensemble:Val loss over iterations: [np.float64(28022752.79749122), np.float64(27752688.386326306), np.float64(27745570.939858213), np.float64(27745570.939858213), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27736761.248422693), np.float64(27

Predicting from 2024-01-01 to 2024-01-31
SMAPE: 95.57%
MAPE: 389.43%
