# Moaaz Trend Pipeline
Run training and prediction for the Moaaz Trend forecasting model.


In [1]:
# Imports and Setup
import sys
import os
from pathlib import Path
import pandas as pd

# Ensure we are operating relative to the /functions directory
FUNCTIONS_DIR = Path.cwd().parent
sys.path.insert(0, str(FUNCTIONS_DIR))

# Firebase setup
from firebase_admin import initialize_app, credentials, get_app
from google.cloud import firestore as google_firestore
import logging

logging.basicConfig(level=logging.INFO)

# Use direct path to service account key inside /functions
service_account_path = FUNCTIONS_DIR / 'serviceAccountKey.json'

# Initialize Firebase Admin SDK
try:
    app = get_app()
except ValueError:
    cred = credentials.Certificate(service_account_path)
    app = initialize_app(cred, {'storageBucket': 'uno-y-b48fb.appspot.com'})

# Create Firestore client using the same credentials
firestore_client = google_firestore.Client.from_service_account_json(str(service_account_path))

# planogram imports
from planogram import data_loader
# Load data
print("Loading data...")
firebase_data = data_loader.load_firebase_collections(firestore_client)  # Loads all collections
sales_data = data_loader.load_all_sales_data(firestore_client) # Loads and combines historical + latest sales
sales_data = data_loader.enrich_with_purchase_prices(sales_data, firebase_data.get('product_purchase_prices'))
print("Data loaded.")

products_df = firebase_data.get('products')
machines_df = firebase_data.get('machines')




Loading data...
Loading collection: app_machines
app_machines slots cleaned: 8193 -> 8172 (removed 21, 0.26%)
app_machines: 234 rows (machines themselves not filtered)
Loading collection: products
products (filtered unspecific rows): 2410 -> 1961 rows (removed 449, 18.63%)
Loading collection: product_purchase_prices
product_purchase_prices: 1370 -> 1370 rows (removed 0, 0.00%)
Loading historical sales from Parquet files...
Historical sales (after filtering): 6979402 -> 5390446 rows (removed 1588956, 22.77%)
No latest sales provided; only historical sales will be used.
Data loaded.


In [3]:
# Import Moaaz Trend functions
from planogram.moaaz_trend import moaaz_train, moaaz_predict, map_ean_to_product_name

print(f"Machines: {len(machines_df)}")
print(f"Products: {len(products_df)}")
print(f"Sales: {len(sales_data)}")

artifact_dir = FUNCTIONS_DIR / 'planogram' / 'moaaz_trend' / 'artifact'
if not artifact_dir.exists():
    raise FileNotFoundError(f"Artifact directory does not exist: {artifact_dir}")
model_path = artifact_dir  # 'model_path' now refers to the artifact directory



Machines: 234
Products: 1961
Sales: 5390446


In [None]:
# Train Model
print("Starting Training...")

forecaster = moaaz_train(
    sales_df=sales_data,
    products_df=products_df,
    machines_df=machines_df,
    model_path=model_path
)
print(f"Training Complete. Model saved to {model_path}")


INFO:planogram.moaaz_trend.train_predict:Starting moaaz_train pipeline...


Starting Training...


INFO:planogram.moaaz_trend.train_predict:Processing sales data...


üöÄ STARTING ETL PIPELINE
Input: 5,390,446 raw transactions
Input columns: 32 - address, card_brand, card_type, category, currency, customer_id, ean, is_ICA_refiller, latitude, local_timestamp, longitude, machine_eva_group, machine_group_tag, machine_id, machine_key, machine_model, machine_name, machine_sub_group, moms, nayax_name, position, price, product_name, provider, purchase_price_kr, pusher_friendly, refiller, sielaff_id, spiral, subcategory, too_unspecific, width
Date range: 2018-06-29 14:18:33+00:00 to 2025-11-18 23:59:50+00:00
Machines: 1,930
Products: 1,358

üîß STEP 1: DATA CLEANING
üßπ CLEANING SALES DATA
Starting with 5,390,446 transactions
‚úì Removed 250,969 rows with missing essential fields
‚úì Removed 627 test transactions (price ‚â§ 2 SEK)
‚úì Removed 444 transactions from 127 test machines
‚úì Removed 39,488 rows with missing provider
‚úì Kept 14 relevant columns
‚úÖ CLEANING COMPLETE
   Original: 5,390,446 transactions
   Removed:  291,528 transactions (5.4%)
 

Processing swaps:   0%|          | 0/15501 [00:00<?, ?positions/s]

‚úì Added 30,747 outgoing product entries
üì∏ CREATING COMPLETE SNAPSHOTS (OPTIMIZED)
‚úì Generating week_start and week_end from year/week
‚úì Input: 1,594,697 records
‚úì Built grid: 15,997,910 potential slots
‚úì Merged with data: 16,037,750 records
‚úì Forward-filled 4 columns
‚úì Removed empty slots: 6,158,263 records remain
‚úì Removed 3,353,476 stale records (‚â•8 weeks no sales)
‚ö†Ô∏è  Fixed 612 positions with >2 entries in same week
‚úì Removed 628 duplicate entries
‚úì Final snapshots: 2,804,159 records
‚úì Expansion ratio: 1.76x
üßπ REMOVING STALE PRODUCTS AND MACHINES
Step 1: Identifying stale products (8+ weeks zero sales)...
‚úì Removed 477,976 stale entries from 40,738 products
Step 2: Identifying decommissioned machines (3+ weeks total zero sales)...
‚úì Removed 0 entries from 281 machines with decommissioned periods
‚úÖ REMOVAL COMPLETE
   Original records: 2,804,159
   Removed:          477,976 (17.0%)
   Final records:    2,326,183




‚úÖ MACHINE SNAPSHOTS COMPLETE
‚úì After snapshot creation: 2,326,183 records
‚úì Columns: 27 - _is_zero, _streak_group, category_first, customer_id_first, date_key, ean, first_sale_timestamp, machine_eva_group_first, machine_key, machine_sub_group_first, position, prev_ean, price_mean, product_name_first, provider_first, purchase_price_kr_first, refiller_first, subcategory_first, was_replaced, week, week_end, week_end_first, week_share, week_start, week_start_first, weekly_sales, year
‚úì Total sales: 5,098,747 (expected: 5,098,918, diff: 171)

üìÖ STEP 3: CALENDAR INTEGRATION
üìÖ ADDING CALENDAR INFORMATION
Processing 2,326,183 weekly records
‚úì Initializing Swedish holiday calendar
‚úì Found 383 unique weeks
‚úì Pre-calculating working days and holidays...


Calculating calendar:   0%|          | 0/383 [00:00<?, ?weeks/s]

‚úì Pre-calculated calendar info for 383 unique weeks
‚úì Mapping calendar info (vectorized)...
‚úÖ CALENDAR INTEGRATION COMPLETE
   Records processed: 2,326,183
   Unique weeks:      383
‚úì After calendar: 29 columns (added 2)

üéâ ETL PIPELINE COMPLETE!
üìà FINAL RESULTS:
   Records:            2,326,183
   Columns:            29 - _is_zero, _streak_group, category, customer_id, date_key, ean, first_sale_timestamp, holidays, machine_eva_group, machine_key, machine_sub_group, position, prev_ean, price_mean, product_name, provider, purchase_price_kr, refiller, subcategory, was_replaced, week, week_end, week_end_first, week_share, week_start, week_start_first, weekly_sales, working_days, year
   Total sales:        5,098,747
   Expected sales:     5,098,918
   Sales difference:   171 (0.00%)
   Date range:         2018-06-25 00:00:00 to 2025-11-17 00:00:00
   Unique machines:    1,606
   Unique products:    1,253
   Unique positions:   150
   Compression:        2.3x


INFO:planogram.moaaz_trend.train_predict:Generating features...



üìä Creating 7 feature group(s): BASE, TEMPORAL, PRODUCT, MACHINE, HISTORICAL_SALES, PRODUCT_LIFECYCLE, BRAND


Feature Groups: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [03:27<00:00, 29.70s/it]
INFO:planogram.moaaz_trend.train_predict:Training model...



Training: RECURSIVE MULTI
‚úì Identified 120 features
  - Categorical: 6
  - Numerical: 114
‚ö† Ignored 3 columns with unsupported dtypes
‚úì Prepared 2,150,450 samples (92.4% of input)

Training 4 chained models...
  Base input: 117 features
  Using OOF predictions to simulate recursive features

  Model 1 (week +1): 117 features
    Train MAE: 1.406

  Model 2 (week +2): 118 features (base + prev_sales)
    Train MAE: 1.446

  Model 3 (week +3): 118 features (base + prev_sales)
    Train MAE: 1.467

  Model 4 (week +4): 118 features (base + prev_sales)


INFO:planogram.moaaz_trend.train_predict:Model saved to /Users/sam/Documents/Code/Planogram/uno/functions/planogram/moaaz_trend/artifact


    Train MAE: 1.482
‚úì Training complete

‚úì Models saved to /Users/sam/Documents/Code/Planogram/uno/functions/planogram/moaaz_trend/artifact
Training Complete. Model saved to /Users/sam/Documents/Code/Planogram/uno/functions/planogram/moaaz_trend/artifact


In [4]:
# Generate Predictions
print("Starting Prediction...")


predictions = moaaz_predict(
    sales_df=sales_data,
    machines_df=machines_df,
    product_df=products_df,
    model_path=model_path
)

print(f"Generated {len(predictions)} predictions.")
display(predictions.head())


Starting Prediction...
‚úì Models loaded from /Users/sam/Documents/Code/Planogram/uno/functions/planogram/moaaz_trend/artifact


INFO:planogram.moaaz_trend.predict:Filtering raw sales data: 2024-11-19 23:59:50+00:00 onwards


üöÄ STARTING ETL PIPELINE
Input: 773,231 raw transactions
Input columns: 32 - address, card_brand, card_type, category, currency, customer_id, ean, is_ICA_refiller, latitude, local_timestamp, longitude, machine_eva_group, machine_group_tag, machine_id, machine_key, machine_model, machine_name, machine_sub_group, moms, nayax_name, position, price, product_name, provider, purchase_price_kr, pusher_friendly, refiller, sielaff_id, spiral, subcategory, too_unspecific, width
Date range: 2024-11-20 00:02:37+00:00 to 2025-11-18 23:59:50+00:00
Machines: 227
Products: 817

üîß STEP 1: DATA CLEANING
üßπ CLEANING SALES DATA
Starting with 773,231 transactions
‚úì Removed 27,171 rows with missing essential fields
‚úì Removed 37 test transactions (price ‚â§ 2 SEK)
‚úì Removed 3 transactions from 1 test machines
‚úì Removed 5,898 rows with missing provider
‚úì Kept 14 relevant columns
‚úÖ CLEANING COMPLETE
   Original: 773,231 transactions
   Removed:  33,109 transactions (4.3%)
   Final:    740,12

INFO:planogram.moaaz_trend.utils.calendar:Loaded 180 Swedish holidays


‚úì Removed 0 entries from 25 machines with decommissioned periods
‚úÖ REMOVAL COMPLETE
   Original records: 290,324
   Removed:          34,032 (11.7%)
   Final records:    256,292
‚úÖ MACHINE SNAPSHOTS COMPLETE
‚úì After snapshot creation: 256,292 records
‚úì Columns: 27 - _is_zero, _streak_group, category_first, customer_id_first, date_key, ean, first_sale_timestamp, machine_eva_group_first, machine_key, machine_sub_group_first, position, prev_ean, price_mean, product_name_first, provider_first, purchase_price_kr_first, refiller_first, subcategory_first, was_replaced, week, week_end, week_end_first, week_share, week_start, week_start_first, weekly_sales, year
‚úì Total sales: 740,083 (expected: 740,122, diff: 39)

üìÖ STEP 3: CALENDAR INTEGRATION
üìÖ ADDING CALENDAR INFORMATION
Processing 256,292 weekly records
‚úì Initializing Swedish holiday calendar
‚úì Found 53 unique weeks
‚úì Pre-calculating working days and holidays...
‚úì Pre-calculated calendar info for 53 unique weeks
‚ú

INFO:planogram.moaaz_trend.predict:Preparing prediction data for target week: 2025-11-24 (derived from max history: 2025-11-17)
INFO:planogram.moaaz_trend.predict:Using 234 machines from machines_df
INFO:planogram.moaaz_trend.predict:Using 1416 products from product_df
INFO:planogram.moaaz_trend.predict:Created 331,344 (machine, product) combinations
INFO:planogram.moaaz_trend.predict:Created 331,344 future prediction rows for 2025-11-24



üìä Creating 7 feature group(s): BASE, TEMPORAL, PRODUCT, MACHINE, HISTORICAL_SALES, PRODUCT_LIFECYCLE, BRAND


Feature Groups: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:38<00:00,  5.45s/it]
INFO:planogram.moaaz_trend.predict:Generating predictions for 331344 items for week of 2025-11-24



Generating Predictions: RECURSIVE MULTI
‚úì Prepared 331,344 samples for prediction
‚úì Generated 331,344 predictions

Generated 331344 predictions.


Unnamed: 0,machine_key,ean,week_start,trusted_prediction,pred_week_1,pred_week_2,pred_week_3,pred_week_4
0,1. Viadidakt Katrineholm_230547890,2200121649944.0,2025-11-24,False,5.638758,5.766668,4.349124,3.882371
1,1. Viadidakt Katrineholm_230547890,4030300022682.0,2025-11-24,False,6.255793,5.786046,3.689594,3.552881
2,1. Viadidakt Katrineholm_230547890,7350144680037.0,2025-11-24,False,4.249914,3.327425,4.167014,4.049806
3,1. Viadidakt Katrineholm_230547890,7350144680068.0,2025-11-24,False,4.249914,3.327425,4.167014,4.049806
4,1. Viadidakt Katrineholm_230547890,7350144680099.0,2025-11-24,False,6.374361,6.488276,3.88306,3.770433


In [5]:
predictions_names = map_ean_to_product_name(
    df=predictions,
    product_df=products_df
)

predictions_names.sort_values(by="pred_week_1", ascending=False)




Unnamed: 0,machine_key,ean,week_start,trusted_prediction,pred_week_1,pred_week_2,pred_week_3,pred_week_4,product_name
11393,PRO M - Bilbolaget Uppsala_874347336,5760466913133.0,2025-11-24,False,10.988876,8.779390,8.995770,3.695104,Kelda Toscansk Tomatsoppa
157562,Mat - Dahl Ulvs√§tra v√§nster_587343290,7350112800078.0,2025-11-24,False,9.997738,12.272857,14.238729,15.666771,IQ Fuel Hydrate Lemon Yuzu 33cl
133238,Mat - G√•shaga 1 H√∂ger_260016462,7318690501718.0,2025-11-24,False,9.719573,11.326602,10.632500,9.538171,ICA Caesarsallad Kyckling
134199,Mat - IAR Systems_155510204,7318690132684.0,2025-11-24,False,9.579480,9.733576,7.539760,7.785613,ICA R√∂d Curry Kyckling
133281,Mat - G√•shaga 2 Mitten_257032889,4318020172413.0,2025-11-24,False,9.268907,12.461085,16.371387,14.667116,ICA Mandlar Eco 100g
...,...,...,...,...,...,...,...,...,...
242195,Mat - SMP Ilsbo_548337221,4011800563516.0,2025-11-24,True,0.346284,1.698241,2.581617,3.820620,Corny Big Chocolate
305915,Conapto nya_619402560,5000159376655.0,2025-11-24,False,0.152919,2.033530,3.852900,3.400677,Skittles 38g
325739,Pro M - Billerud Sk√§rblacka_342051139,7317731501205.0,2025-11-24,False,0.152919,2.033530,3.843585,3.400677,Salt√• Kvarn Russin
29795,Pro M - Ahlsells Karlstad_256226127,7310401003290.0,2025-11-24,False,0.152919,2.031733,3.843585,3.445266,Schweppes Indian Tonic 50cl


In [6]:
from inventory_score import calculate_inventory_scores

inventory_scores = calculate_inventory_scores(
    machines_df=machines_df,
    products_df=products_df,
    predictions_df=predictions
)



In [7]:
inventory_scores

Unnamed: 0,machine_key,inventory_score
0,PRO M - W√§m√∂_594015928,0.600440
1,Pro - L√§nsstyrelsen V√§ster√•s V√§nster_639546040,0.549767
2,PRO S - Verisure Link√∂ping_72327866,0.697071
3,Mat - Vasakronan_219574714,0.594431
4,912. Postnord Mat liten_521675443,0.676619
...,...,...
225,Pro Custom - SCA Ny_204330420,0.780353
226,Mat - Nordlock Mattmar 2_585332045,0.593319
227,Mat - Hangon Hillerstorp_877492288,0.672930
228,PRO M 8 - F√∂rsvarsmakten Enk√∂ping_580593846,0.667466


In [8]:
machines_df

Unnamed: 0,slots,machine_sub_group,sielaff_id,machine_eva_group,n_sales,notes,machine_name,product_wishes,last_sale,machine_id,machine_group_tag,machine_model,machine_key,refillers,location,sales_counter,doc_id,lastUpdated,createdAt,id
0,"[{'is_discount': False, 'price': 75, 'stock_cu...",Sjukhus,90548319,WAITING ROOM,1750,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,PRO M - W√§m√∂,[],2025-02-16,594015928,,PRO M,PRO M - W√§m√∂_594015928,[Region Blekinge],"{'address': 'B√§ttringsv√§gen 2, Byggnad 4, 371 ...",0.0,0I7EsuVLPWhooW6JFch9,,,0I7EsuVLPWhooW6JFch9
1,"[{'is_discount': False, 'image_url': 'https://...",Kontor,,WORK,802,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,Pro - L√§nsstyrelsen V√§ster√•s V√§nster,"[My new product wish, Test, Test, Test]",2025-02-14,639546040,,PRO M,Pro - L√§nsstyrelsen V√§ster√•s V√§nster_639546040,[Ozer Foods AB],"{'address': 'V√§stra Ringv√§gen 1, 721 86 V√§ster...",0.0,0UFPqnnadAqS8Xz7HPzH,,,0UFPqnnadAqS8Xz7HPzH
2,"[{'is_discount': False, 'category': 'Mat', 'st...",Kontor,90543705,WORK,11070,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,PRO S - Verisure Link√∂ping,,2025-02-16,72327866,,PRO S,PRO S - Verisure Link√∂ping_72327866,[Tornby Minimarknad AB],"{'address': 'Gumpekullav√§gen 8, 582 78 Link√∂pi...",0.0,1Ek0S7N3BoYqhL0lCRFq,,,1Ek0S7N3BoYqhL0lCRFq
3,"[{'product_name': 'Gooh! Korv Stroganoff', 'ca...",Kontor,0,WORK,8447,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,Mat - Vasakronan,[],2025-02-16,219574714,,PRO M,Mat - Vasakronan_219574714,[ICA N√§ra Stabby],"{'address': 'Ljusb√§rargatan 2, 754 23 Uppsala,...",0.0,1MPyIWSuen3Znb6KpapR,,,1MPyIWSuen3Znb6KpapR
4,"[{'product_name': 'Gooh! Korv Stroganoff', 'ca...",Industri,90548296,WORK,3226,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,912. Postnord Mat liten,,2025-02-14,521675443,,PRO M,912. Postnord Mat liten_521675443,[Magnus Jeppsson Livs AB],"{'address': 'Industrigatan 5, 641 34 Katrineho...",0.0,1VV42yepdPw5peDOIFC2,2025-11-11,,1VV42yepdPw5peDOIFC2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,"[{'is_discount': False, 'category': None, 'sto...",Industri,90543340,WORK,5417,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,Pro Custom - SCA Ny,,2025-02-16,204330420,,PRO M,Pro Custom - SCA Ny_204330420,[Kvantum Nacksta Fasand AB],"{'address': 'Tegelv√§gen 3a, 853 50 Sundsvall, ...",0.0,yvU8JwBkt8Rkwp2MQO4e,,,yvU8JwBkt8Rkwp2MQO4e
230,"[{'is_discount': False, 'category': 'Mat', 'st...",Industri,90539188,WORK,13626,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,Mat - Nordlock Mattmar 2,,2025-02-16,585332045,,PRO M,Mat - Nordlock Mattmar 2_585332045,[E Bernhardssons Matmarknad AB],"{'address': 'Halabacken 180, 837 91 Mattmar, S...",0.0,z9HDtuXr00P9uR1kqewJ,,,z9HDtuXr00P9uR1kqewJ
231,"[{'is_discount': False, 'category': '√ñvrigt', ...",Industri,90504601,WORK,24683,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,Mat - Hangon Hillerstorp,,2025-02-15,877492288,,PRO M,Mat - Hangon Hillerstorp_877492288,"[ICA N√§ra HillerstorpJJ Anner Mat AB, Ekbacken...","{'address': 'Industriv√§gen 7, 335 73 Hillersto...",0.0,zUMpU2iEVJiOblHGZKu5,,,zUMpU2iEVJiOblHGZKu5
232,"[{'is_discount': False, 'category': 'Mat', 'st...",Milit√§r,90552814,WORK,4680,[{'text': 'Anv√§nd detta f√§lt f√∂r att notera pr...,PRO M 8 - F√∂rsvarsmakten Enk√∂ping,,2025-02-14,580593846,,PRO M,PRO M 8 - F√∂rsvarsmakten Enk√∂ping_580593846,[Wems Mat AB],"{'address': 'Garnisonsv√§gen, 749 40 Enk√∂ping, ...",0.0,zUrR4cRLQ76A0eEC1dTV,,,zUrR4cRLQ76A0eEC1dTV
