Load the dataset using pandas.

In [None]:
import pandas as pd
data = pd.read_csv('dataset.csv')

Preprocess the data by removing missing values.

In [None]:
data.dropna(inplace=True)

Convert the date column to datetime format.

In [None]:
data['date'] = pd.to_datetime(data['date'])

Filter the data by a specific date range.

In [None]:
filtered_data = data[data['date'] >= '2020-01-01']

Sort the values of the dataset by date.

In [None]:
sorted_data = filtered_data.sort_values(by='date')

Rename columns for clarity.

In [None]:
data.rename(columns={'old_name': 'new_name'}, inplace=True)

Format the Ibex values for numerical analysis.

In [None]:
data['ibex'] = data['ibex'].replace(',', '.', regex=True).astype(float)

Plot the time series data.

In [None]:
import matplotlib.pyplot as plt
plt.plot(data['date'], data['value'])
plt.show()

Apply log transformation to the value column.

In [None]:
import numpy as np
data['log_value'] = np.log(data['value'])

Calculate the log difference to check for trends.

In [None]:
data['log_diff'] = data['log_value'].diff()

Check for stationarity in the dataset.

In [None]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(data['log_diff'].dropna())

Visualize ACF and PACF plots.

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(data['log_diff'].dropna())
plot_pacf(data['log_diff'].dropna())
plt.show()

Prepare the train and test datasets.

In [None]:
train = data.iloc[:-12]
test = data.iloc[-12:]

Optimize hyperparameters using auto ARIMA.

In [None]:
from pmdarima import auto_arima
model = auto_arima(train['value'], seasonal=True, m=12)

Fit the SARIMAX model to the training data.

In [None]:
model.fit(train['value'])

Make predictions for the test dataset.

In [None]:
predictions = model.predict(n_periods=len(test))

Plot the actual values against the predictions.

In [None]:
plt.plot(test['date'], test['value'], label='Actual')
plt.plot(test['date'], predictions, label='Predictions')
plt.legend()
plt.show()