# Random Forest Model
Train and evaluate a Random Forest regressor for sales forecasting.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Demo feature set
dates = pd.date_range('2020-01-01','2023-12-31')
np.random.seed(42)
df = pd.DataFrame({'date':dates})
df['y'] = np.random.normal(5000,1500,len(dates)).clip(0)
df['month']=df['date'].dt.month
df['dayofweek']=df['date'].dt.dayofweek
df['sin_month']=np.sin(2*np.pi*df['month']/12)
df['cos_month']=np.cos(2*np.pi*df['month']/12)
for lag in [7,30]:
    df[f'y_lag_{lag}'] = df['y'].shift(lag)
for w in [7,30]:
    df[f'y_roll_{w}'] = df['y'].rolling(w).mean()
df = df.dropna().reset_index(drop=True)

X = df.drop(columns=['y','date']); y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

rf = RandomForestRegressor(n_estimators=300, max_depth=None, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
pred = rf.predict(X_test)
mae = mean_absolute_error(y_test, pred)
rmse = mean_squared_error(y_test, pred, squared=False)
r2 = r2_score(y_test, pred)
print({'MAE': mae, 'RMSE': rmse, 'R2': r2})
importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
print(importances.head(10))
