In [None]:
import pandas as pd

data = pd.read_csv('dataset.csv', index_col='date')
data.index.freq = '1min'
data.index = pd.DatetimeIndex(data.index).to_period('1min')

co2_data = data['CO2']

split_point = int(0.7 * len(co2_data))
train, test = co2_data[:split_point], co2_data[split_point:]


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Scatterplots
sns.pairplot(data)
plt.show()

# Correlation Matrix
corr_matrix = data.corr()
sns.heatmap(corr_matrix, annot=True)
plt.show()


In [None]:
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima

# Check for stationarity in the CO2 data
adf_result = adfuller(train)
adf_result

# Finding best ARIMA parameters
auto_model = auto_arima(train, seasonal=False, trace=True)  # error_action='ignore', suppress_warnings=True
auto_model.summary()


In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Stepwise approach for prediction
forecast = []

for t in test:
    # Fit the model
    model = ARIMA(train, order=auto_model.order)
    model_fit = model.fit()
    
    # Forecast the next value
    forecast_value = model_fit.forecast(steps=1)
    forecast.append(forecast_value.iloc[0])

    # Update the train dataset with the actual observed value
    new_data = pd.Series([t], index=[test.index[len(forecast) - 1]])
    train = pd.concat([train, new_data])


In [None]:
import numpy as np

# Plot the predictions against the actual values
plt.figure(figsize=(10, 6))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, forecast, label='Forecast')
plt.title('CO2 Forecast vs Actual (Rolling Forecast)')
plt.xlabel('Date')
plt.ylabel('CO2 Levels')
plt.legend()
plt.show()

# Mean Absolute Error (MAE)
MAE = np.mean(abs(forecast - test))
print('Mean Absolute Error (MAE): ' + str(np.round(MAE, 2)))
