In [None]:
import pandas as pd
from datetime import datetime
from pmdarima.arima import auto_arima
from pmdarima.arima import ADFTest
from sklearn.metrics import r2_score, mean_squared_error
import math
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("monthly_sales.csv")
data["month"] = data["month"].apply(lambda x: datetime.strptime(x, "%Y-%m"))
data.set_index("month", inplace=True)

In [None]:
data.shape

In [None]:
data.dtypes

In [None]:
data

In [None]:
data.plot()

In [None]:
adf_test = ADFTest()
adf_test.should_diff(data)

In [None]:
training_sample_count = 96
train = data[:training_sample_count]
test = data[training_sample_count:]
plt.plot(train)
plt.plot(test)

In [None]:
arima_model = auto_arima(
    train,
    start_p=0, d=1, start_q=0,
    max_p=5, max_d=5, max_q=5,
    start_P=0, D=1, start_Q=0, 
    max_P=0, max_D=5, max_Q=5,
    m=12, seasonal=True,
)

In [None]:
arima_model.summary()

In [None]:
prediction_periods = data.shape[0] - training_sample_count
prediction = pd.DataFrame(arima_model.predict(n_periods=prediction_periods), index=test.index)
prediction

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(train, label="Train")
plt.plot(test, label="Test")
plt.plot(prediction, label="Prediction")
plt.legend(loc='upper right')
plt.show()

In [None]:
test

In [None]:
test["predicted_sales"] = prediction
r2_score(test["sales"], test["predicted_sales"])

In [None]:
math.sqrt(mean_squared_error(test["sales"], test["predicted_sales"]))