In [1]:
import mlflow
# Connect to Databricks workspace with submitted credentials or use stored credentials
mlflow.login()
# Set tracking URI to Databricks -  tell MLflow to send the data into Databricks Workspace
mlflow.set_tracking_uri("databricks")

2025/10/22 19:27:55 INFO mlflow.utils.credentials: Successfully connected to MLflow hosted tracking server! Host: https://dbc-7d1169bb-4536.cloud.databricks.com.


In [2]:
import os;
import mlflow
from datetime import datetime
from dotenv import load_dotenv

#Do not use .env file in shared databricks environment
#https://medium.com/@generative_ai/environment-variables-setting-in-databricks-dde16e3c3888 
load_dotenv()

experiment_name = f"/Users/{os.environ['USER_EMAIL']}/test-experiment"
try:
    mlflow.create_experiment(experiment_name)
except Exception as e:
    print(f"Experiment {experiment_name} already exists.")

mlflow.set_experiment(experiment_name)


with mlflow.start_run(run_name=f'run-{datetime.now()}') as run:
    mlflow.log_param("param1", 5)
    mlflow.log_metric("metric1", 0.85)

Experiment /Users/marijo.maracic@gmail.com/test-experiment already exists.
🏃 View run run-2025-10-22 19:27:58.860990 at: https://dbc-7d1169bb-4536.cloud.databricks.com/ml/experiments/3044475861801323/runs/590cb7f29976470197079bc27f32d252
🧪 View experiment at: https://dbc-7d1169bb-4536.cloud.databricks.com/ml/experiments/3044475861801323


In [3]:
import pandas as pd
df = pd.read_csv('data/train.csv')
df.head()

Unnamed: 0,Date,store,product,number_sold
0,2010-01-01,0,0,801
1,2010-01-02,0,0,810
2,2010-01-03,0,0,818
3,2010-01-04,0,0,796
4,2010-01-05,0,0,808


In [4]:
# Count unique combinations of store and product
unique_combinations = df[['store','product']].drop_duplicates().shape[0]
print(f"Number of unique (store, product) combinations: {unique_combinations}")

# Count rows per (store, product) combination, sort descending
counts = df.groupby(['store', 'product']).size().reset_index(name='count')
counts_sorted = counts.sort_values('count', ascending=False)
display(counts_sorted)

# Show min and max counts
min_count = counts_sorted['count'].min()
max_count = counts_sorted['count'].max()
print(f"Min rows per (store, product): {min_count}")
print(f"Max rows per (store, product): {max_count}")

Number of unique (store, product) combinations: 70


Unnamed: 0,store,product,count
0,0,0,3287
1,0,1,3287
2,0,2,3287
3,0,3,3287
4,0,4,3287
...,...,...,...
65,6,5,3287
66,6,6,3287
67,6,7,3287
68,6,8,3287


Min rows per (store, product): 3287
Max rows per (store, product): 3287


In [5]:
# Show minimum and maximum date in the dataset
print('Min date:', df['Date'].min())
print('Max date:', df['Date'].max())

Min date: 2010-01-01
Max date: 2018-12-31


In [None]:
#train and log models
import mlflow
from statsmodels.tsa.arima.model import ARIMA
from mlflow.models.signature import infer_signature
from mlflow.statsmodels import log_model
from tqdm import tqdm
from quality import calculate_metrics

mlflow.set_experiment(f"/Users/{os.environ['USER_EMAIL']}/timeseries-forecasting-experiment-{datetime.now()}")
models = {}
groups = list(df.groupby(['store', 'product']))
for (store, product), group in tqdm(groups, desc='Training ARIMA models'):
    with mlflow.start_run(run_name=f'{store}-{product}') as run:
        order = (1, 1, 1)
        mlflow.log_param("store", store)
        mlflow.log_param("product", product)
        mlflow.log_param("order", order)
        
        group_sorted = group.sort_values('Date')
        group_sorted = group_sorted.set_index('Date')
        try:
            input = group_sorted['number_sold']
            model = ARIMA(input, order=order)
            fit = model.fit()
            models[(store, product)] = fit
            log_model(statsmodels_model=model, name="ARIMA", signature=infer_signature(input))

            metrics = calculate_metrics(store, product, model, input)
            mlflow.log_metric("MSE", metrics.mse)
            mlflow.log_metric("MAE", metrics.mae)
            mlflow.log_metric("RMSE", metrics.rmse)
        except Exception as e:
            print(f"Failed to train model for store {store}, product {product}: {e}")
            break


2025/10/22 19:28:51 INFO mlflow.tracking.fluent: Experiment with name '/Users/marijo.maracic@gmail.com/timeseries-forecasting-experiment-2025-10-22 19:28:51.403488' does not exist. Creating a new experiment.
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Failed to train model for store 0, product 0: 'ARIMA' object has no attribute 'save'
🏃 View run 0-0 at: https://dbc-7d1169bb-4536.cloud.databricks.com/ml/experiments/1692052176608893/runs/ed780e3ac1e140cb927bb09bac7adfbf
🧪 View experiment at: https://dbc-7d1169bb-4536.cloud.databricks.com/ml/experiments/1692052176608893


Training ARIMA models:   0%|          | 0/70 [00:02<?, ?it/s]


In [None]:
import pandas as pd
test_df = pd.read_csv('data/test.csv')
test_df.head()

In [None]:
# Statistics for store, product combinations in test data
unique_combinations_test = test_df[['store','product']].drop_duplicates().shape[0]
print(f"Number of unique (store, product) combinations in test data: {unique_combinations_test}")

counts_test = test_df.groupby(['store', 'product']).size().reset_index(name='count')
counts_test_sorted = counts_test.sort_values('count', ascending=False)
display(counts_test_sorted)

min_count_test = counts_test_sorted['count'].min()
max_count_test = counts_test_sorted['count'].max()
print(f"Min points per (store, product) in test: {min_count_test}")
print(f"Max points per (store, product) in test: {max_count_test}")

In [None]:
# Generate 365 predictions for each (store, product) using trained models and compare with test data
import numpy as np
predictions = {}
for key, model in models.items():
    if model is not None:
        try:
            forecast = model.forecast(steps=365)
            predictions[key] = forecast
        except Exception as e:
            predictions[key] = f'Error: {e}'
    else:
        predictions[key] = None

# Compare predictions with test data and calculate error metrics
comparison_results = []
for (store, product), forecast in predictions.items():
    test_points = test_df[(test_df['store'] == store) & (test_df['product'] == product)].sort_values('Date')
    if isinstance(forecast, (list, pd.Series)) and len(test_points) > 0:
        actual = test_points['number_sold'].values[:365]
        pred = forecast[:len(actual)]
        mse = np.mean((actual - pred) ** 2)
        mae = np.mean(np.abs(actual - pred))
        rmse = np.sqrt(mse)
        comparison_results.append({
            'store': store,
            'product': product,
            'actual': actual,
            'predicted': pred,
            'mse': mse,
            'mae': mae,
            'rmse': rmse
        })

# Sort by descending MSE
comparison_results_sorted = sorted(comparison_results, key=lambda x: x['mse'], reverse=True)

# Show best and worst MSE value
if comparison_results_sorted:
    best = comparison_results_sorted[-1]
    worst = comparison_results_sorted[0]
    print(f"Best MSE: {best['mse']:.2f} (Store: {best['store']}, Product: {best['product']})")
    print(f"Worst MSE: {worst['mse']:.2f} (Store: {worst['store']}, Product: {worst['product']})")

# Show comparison and error metrics for first few combinations (highest MSE)
for result in comparison_results_sorted[:5]:
    print(f"Store: {result['store']}, Product: {result['product']}")
    print("Actual:", result['actual'])
    print("Predicted:", result['predicted'])
    print(f"MSE: {result['mse']:.2f}, MAE: {result['mae']:.2f}, RMSE: {result['rmse']:.2f}")
    print()

In [None]:
# Calculate ARIMA model quality metrics on training data
import numpy as np
train_metrics = []
for (store, product), model in models.items():
    if model is not None:
        train_points = df[(df['store'] == store) & (df['product'] == product)].sort_values('Date')
        actual = train_points['number_sold'].values
        pred = model.fittedvalues.values[:len(actual)]
        mse = np.mean((actual - pred) ** 2)
        mae = np.mean(np.abs(actual - pred))
        rmse = np.sqrt(mse)
        train_metrics.append({
            'store': store,
            'product': product,
            'mse': mse,
            'mae': mae,
            'rmse': rmse
        })

# Sort by descending MSE
train_metrics_sorted = sorted(train_metrics, key=lambda x: x['mse'], reverse=True)

# Show best and worst MSE value
if train_metrics_sorted:
    best = train_metrics_sorted[-1]
    worst = train_metrics_sorted[0]
    print(f"Best training MSE: {best['mse']:.2f} (Store: {best['store']}, Product: {best['product']})")
    print(f"Worst training MSE: {worst['mse']:.2f} (Store: {worst['store']}, Product: {worst['product']})")

# Show metrics for first few combinations (highest MSE)
for result in train_metrics_sorted[:5]:
    print(f"Store: {result['store']}, Product: {result['product']}")
    print(f"Training MSE: {result['mse']:.2f}, MAE: {result['mae']:.2f}, RMSE: {result['rmse']:.2f}")
    print()