In [3]:
import pandas as pd
from pathlib import Path

In [4]:
# Set up paths
data_dir = Path('/workspaces/demand-forecasting/data').expanduser()
# Load sales data
sales = pd.read_csv(data_dir / 'sales.csv', index_col=0)
online = pd.read_csv(data_dir / 'online.csv', index_col=0)
# Combine sales and online
sales['channel'] = 'offline'
online['channel'] = 'online'
all_sales = pd.concat([sales, online], ignore_index=True)

In [5]:
# Parse and filter data for specified date range
all_sales['date'] = pd.to_datetime(sales['date'])
mask = (all_sales['date'] >= '2023-12-01') & (all_sales['date'] <= '2024-09-30')
sales_filtered = all_sales.loc[mask].copy()  # Create copy to avoid SettingWithCopyWarning

In [6]:
# 2. Compute monthly averages
# Convert to monthly period for grouping
sales_filtered['year_month'] = sales_filtered['date'].dt.to_period('M')

In [7]:
sales_filtered.head()

Unnamed: 0,date,item_id,quantity,price_base,sum_total,store_id,channel,year_month
535529,2023-12-01,a36b00204cbe,1.0,129.9,129.9,1,offline,2023-12
535530,2023-12-01,b568135541ea,1.0,325.0,325.0,1,offline,2023-12
535531,2023-12-01,57f6f314c80c,1.0,149.9,149.9,1,offline,2023-12
535532,2023-12-01,3549414407b4,1.0,199.9,199.9,1,offline,2023-12
535533,2023-12-01,3dd6b77c8dd3,13.0,54.9,713.7,1,offline,2023-12


In [8]:
monthly_avgs = (
    sales_filtered.groupby(['item_id', 'store_id'])['quantity']
    .mean()
    .reset_index()
    .rename(columns={'quantity': 'monthly_avg'})
)

print(f"Computed averages for {len(monthly_avgs)} item-store combinations")

Computed averages for 18696 item-store combinations


In [17]:
# 3. Read the test CSV and prepare for predictions
print("Loading test data...")
# Read test data with semicolon separator and split columns
test = pd.read_csv(data_dir / 'test.csv', sep=';')

Loading test data...


In [18]:
# Convert date format (DD.MM.YYYY to YYYY-MM-DD)
test['date'] = pd.to_datetime(test['date'], format='%d.%m.%Y').dt.strftime('%Y-%m-%d')
test['quantity'] = 0.0  # default value for missing combinations

In [19]:
test.head()

Unnamed: 0,row_id,item_id,store_id,date,quantity
0,0,c578da8e8841,1,2024-09-27,0.0
1,1,c578da8e8841,1,2024-09-28,0.0
2,2,c578da8e8841,1,2024-09-29,0.0
3,3,c578da8e8841,1,2024-09-30,0.0
4,4,c578da8e8841,1,2024-10-01,0.0


In [20]:
# Merge test with monthly averages
merged = test.merge(monthly_avgs, on=['item_id', 'store_id'], how='left')
merged['quantity'] = merged['monthly_avg'].fillna(0.0)

In [21]:
# merged.head()

Unnamed: 0,row_id,item_id,store_id,date,quantity,monthly_avg
0,0,c578da8e8841,1,2024-09-27,7.628763,7.628763
1,1,c578da8e8841,1,2024-09-28,7.628763,7.628763
2,2,c578da8e8841,1,2024-09-29,7.628763,7.628763
3,3,c578da8e8841,1,2024-09-30,7.628763,7.628763
4,4,c578da8e8841,1,2024-10-01,7.628763,7.628763


In [22]:
# 4. Format and save submission
submission = merged[['row_id', 'quantity']]
submission.head()

Unnamed: 0,row_id,quantity
0,0,7.628763
1,1,7.628763
2,2,7.628763
3,3,7.628763
4,4,7.628763


In [24]:
submission.to_csv(data_dir / 'submission_avg_20250106.csv', index=False)