In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# Generate sample sales data
np.random.seed(0)
date_rng = pd.date_range(start='2020-01-01', end='2023-12-31', freq='M')
sales_data = np.random.poisson(lam=200, size=len(date_rng)) + np.linspace(0, 50, len(date_rng))
sales_df = pd.DataFrame(sales_data, index=date_rng, columns=['Sales'])

# Visualize the sales data
plt.figure(figsize=(12, 6))
plt.plot(sales_df, label='Monthly Sales')
plt.title('Sales Data')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

# Split the data into training and testing sets
train_size = int(len(sales_df) * 0.8)
train, test = sales_df.iloc[:train_size], sales_df.iloc[train_size:]

# Fit the ARIMA model
model = ARIMA(train, order=(5, 1, 0))  # You can tune the order based on ACF and PACF plots
model_fit = model.fit()

# Make predictions
predictions = model_fit.forecast(steps=len(test))
predictions = pd.Series(predictions, index=test.index)

# Plot the predictions against the actual values
plt.figure(figsize=(12, 6))
plt.plot(train, label='Training Data')
plt.plot(test, label='Actual Sales', color='orange')
plt.plot(predictions, label='Predicted Sales', color='green')
plt.title('Sales Forecasting')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

# Calculate the Mean Squared Error
mse = mean_squared_error(test, predictions)
print(f'Mean Squared Error: {mse}')