In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

In [2]:
df = pd.read_csv('../data/sales.csv')
df['date'] = pd.to_datetime(df['date'])

In [3]:
df['month'] = df['date'].dt.month
df['day_of_week'] = df['date'].dt.dayofweek

In [4]:
features = df[['cost', 'month', 'day_of_week']].dropna()
target = df.loc[features.index, 'revenue'].fillna(0)
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42
)

In [5]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [6]:
score = model.score(X_test, y_test)
print(f'R-squared: {score:.4f}')

In [7]:
importances = model.feature_importances_
for name, imp in zip(['cost', 'month', 'day_of_week'], importances):
    print(f'{name}: {imp:.4f}')