In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load Data
sales_data = pd.read_csv('data/sales_data.csv')
inventory_data = pd.read_csv('data/inventory_data.csv')

# Data Preprocessing
sales_data['date'] = pd.to_datetime(sales_data['date'])
sales_data['month'] = sales_data['date'].dt.month
sales_data['year'] = sales_data['date'].dt.year

# Aggregate Sales Data
monthly_sales = sales_data.groupby(['year', 'month', 'product_id']).agg({
    'quantity_sold': 'sum',
    'revenue': 'sum'
}).reset_index()

# Prepare Features and Target
X = monthly_sales[['year', 'month', 'product_id']]
y = monthly_sales['quantity_sold']

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Training
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Visualize Predictions
plt.figure(figsize=(10,6))
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Sales')
plt.ylabel('Predicted Sales')
plt.title('Actual vs Predicted Sales')
plt.show()