In [15]:
%pip install torch
%pip install pmdarima

Note: you may need to restart the kernel to use updated packages.




In [1]:
DATA_TRAIN_PROCESSED = '../data/prepocessed/train.csv'

# Import

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [179]:
from statsmodels.tsa.forecasting.theta import ThetaModel
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima

from sklearn.linear_model import LinearRegression
import xgboost as xgb


from sklearn.preprocessing import MinMaxScaler, StandardScaler

import torch
import torch.nn as nn

# Define

## Preprocessing

In [17]:
def normalize_data(data, method="minmax"):
    """Chuẩn hóa dữ liệu theo phương pháp MinMax hoặc StandardScaler."""
    if method == "minmax":
        scaler = MinMaxScaler(feature_range=(0, 1))
    elif method == "standard":
        scaler = StandardScaler()
    else:
        return data, None

    data_scaled = scaler.fit_transform(np.array(data).reshape(-1, 1))
    return data_scaled, scaler


def create_sequences(data, lookback):
    """Tạo dữ liệu theo dạng (X, y) cho các model dùng sliding window."""
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback])
    return np.array(X), np.array(y)

## Model

In [121]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])

In [122]:
def train_lstm(X_train, y_train, epochs=50):
    """Huấn luyện mô hình LSTM."""
    model = LSTMModel(input_dim=1, hidden_dim=50, num_layers=2, output_dim=1)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)

    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

    return model

In [123]:
def train_arima(train_data):
    """Huấn luyện mô hình ARIMA."""
    model = auto_arima(train_data, seasonal=False, stepwise=True, suppress_warnings=True, trace=True)
    return model

In [124]:
def train_sarima(train_data, seasonality=12):
    """Huấn luyện mô hình SARIMA."""
    model = auto_arima(train_data, seasonal=True, m=seasonality, stepwise=True, suppress_warnings=True, trace=True)
    return model

In [125]:
def train_theta(train_data, seasonality=12):
    """Huấn luyện mô hình Theta."""
    return ThetaModel(train_data, period=seasonality).fit()

In [126]:
def train_xgboost(X_train, y_train):
    """Huấn luyện mô hình XGBoost."""
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100)
    model.fit(X_train.reshape(X_train.shape[0], -1), y_train)
    return model

In [127]:
def train_linear(X_train, y_train):
    """Huấn luyện mô hình Linear Regression."""
    model = LinearRegression()
    model.fit(X_train.reshape(X_train.shape[0], -1), y_train)
    return model

In [None]:
# ---------------- Base Class ---------------- #
class BaseTimeSeriesModel:
    def __init__(self, train_data):
        self.train_data = train_data
        self.model = None
    
    def train(self):
        raise NotImplementedError("train() must be implemented in subclass")
    
    def predict(self, test_data):
        raise NotImplementedError("predict() must be implemented in subclass")


# ---------------- LSTM Model ---------------- #
class LSTMModel(BaseTimeSeriesModel):
    def __init__(self, train_data, lookback):
        super().__init__(train_data)
        self.lookback = lookback
        self.model = None

    def train(self, X_train, y_train, epochs=50, lr=0.001):
        input_dim = X_train.shape[2]
        self.model = nn.LSTM(input_size=input_dim, hidden_size=50, num_layers=2, batch_first=True)
        optimizer = optim.Adam(self.model.parameters(), lr=lr)
        loss_fn = nn.MSELoss()
        
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

        for epoch in range(epochs):
            optimizer.zero_grad()
            output, _ = self.model(X_train_tensor)
            loss = loss_fn(output[:, -1], y_train_tensor)
            loss.backward()
            optimizer.step()

    def predict(self, X_test):
        self.model.eval()
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
        with torch.no_grad():
            y_pred = self.model(X_test_tensor)[0][:, -1].numpy()
        return y_pred


# ---------------- ARIMA Model ---------------- #
class ARIMAModel(BaseTimeSeriesModel):
    def __init__(self, train_data, order=(1,1,1)):
        super().__init__(train_data)
        self.order = order

    def train(self):
        self.model = ARIMA(self.train_data, order=self.order).fit()

    def predict(self, test_data):
        history = list(self.train_data)
        predictions, conf_int = [], []

        for t in range(len(test_data)):
            model = ARIMA(history, order=self.order).fit()
            y_pred = model.forecast(steps=1)[0]
            predictions.append(y_pred)

            residuals = self.train_data - model.fittedvalues
            std_error = np.std(residuals, ddof=1)
            conf_int.append([y_pred - 1.96 * std_error, y_pred + 1.96 * std_error])

            history.append(test_data.iloc[t])

        return np.array(predictions), np.array(conf_int)


# ---------------- SARIMA Model ---------------- #
class SARIMAModel(BaseTimeSeriesModel):
    def __init__(self, train_data, seasonal_order, order=(1,1,1)):
        super().__init__(train_data)
        self.order = order
        self.seasonal_order = seasonal_order

    def train(self):
        self.model = SARIMAX(self.train_data, order=self.order, seasonal_order=self.seasonal_order).fit()

    def predict(self, test_data):
        history = list(self.train_data)
        predictions, conf_int = [], []

        for t in range(len(test_data)):
            model = SARIMAX(history, order=self.order, seasonal_order=self.seasonal_order).fit()
            y_pred = model.forecast(steps=1)[0]
            predictions.append(y_pred)

            residuals = self.train_data - model.fittedvalues
            std_error = np.std(residuals, ddof=1)
            conf_int.append([y_pred - 1.96 * std_error, y_pred + 1.96 * std_error])

            history.append(test_data.iloc[t])

        return np.array(predictions), np.array(conf_int)


# ---------------- Theta Model ---------------- #
class ThetaForecastModel(BaseTimeSeriesModel):
    def __init__(self, train_data, seasonality):
        super().__init__(train_data)
        self.seasonality = seasonality

    def train(self):
        self.model = ThetaModel(self.train_data, period=self.seasonality).fit()

    def predict(self, test_data):
        history = list(self.train_data)
        predictions, conf_int = [], []

        for t in range(len(test_data)):
            model = ThetaModel(history, period=self.seasonality).fit()
            y_pred = model.forecast(steps=1)[0]
            predictions.append(y_pred)

            residuals = self.train_data - model.fittedvalues
            std_error = np.std(residuals, ddof=1)
            conf_int.append([y_pred - 1.96 * std_error, y_pred + 1.96 * std_error])

            history.append(test_data.iloc[t])

        return np.array(predictions), np.array(conf_int)




In [200]:
class ARIMAModel:
    def __init__(self,train_data):
        self.train_data = train_data
    def fit(self):
        self.model = auto_arima(
            self.train_data, 
            max_p=5, max_d=2, max_q=5,
            stepwise=True, suppress_warnings=True
        )
        self.order = self.model.order  

    def predict(self, steps):
        history = list(self.train_data)
        predictions, conf_int = [], []

        for t in range(steps):
            model = ARIMA(history, order=self.order).fit()
            y_pred = model.forecast(steps=1)[0]
            predictions.append(y_pred)

            residuals = self.train_data - model.fittedvalues
            std_error = np.std(residuals, ddof=1)
            conf_int.append([y_pred - 1.96 * std_error, y_pred + 1.96 * std_error])

            history.append(y_pred)

        return np.array(predictions), np.array(conf_int)



In [201]:
class SARIMAXModel:
    def __init__(self,train_data, seasonality):
        self.train_data = train_data
        self.seasonality = seasonality
    def fit(self):
        self.model = auto_arima(
            self.train_data, 
            seasonal=True, m=self.seasonality,  
            max_p=5, max_d=2, max_q=5,
            max_P=2, max_D=1, max_Q=2,
            stepwise=True, suppress_warnings=True
        )
        self.order = self.model.order
        self.seasonal_order = self.model.seasonal_order

    def predict(self, steps):
        forecast_result = self.model.predict(n_periods=steps, return_conf_int=True)
        predictions, conf_int = forecast_result[0], forecast_result[1]

        return np.array(predictions), (conf_int[:, 0], conf_int[:, 1])


In [202]:
class THETAModel:
    def __init__(self,train_data, seasonality):
        self.train_data = train_data
        self.seasonality = seasonality
    def fit(self):
        self.model = ThetaModel(self.train_data, period=self.seasonality).fit()
    def predict(self, steps):
        predictions = self.model.forecast(steps=steps)
        return predictions.values, None

## Pipeline

In [150]:
def forecast_pipeline(data, model_type, normalize="minmax", lookback=10, **kwargs):
    """
    Pipeline huấn luyện và dự báo chuỗi thời gian với nhiều model.
    
    Parameters:
        - data: pd.DataFrame (chuỗi thời gian, gồm cột 'Date' và 'temperatures')
        - model_type: str (['LSTM', 'ARIMA', 'SARIMA', 'Theta', 'XGBoost', 'Linear'])
        - normalize: str (['minmax', 'standard', 'none'])
        - lookback: int (số bước nhìn lại cho mô hình LSTM, XGBoost, Linear Regression)
        - kwargs: Các tham số bổ sung cho từng loại model
            + seasonality: chỉ yêu cầu cho SARIMA và Theta
    """
    # ________________________________________ CHECK INPUT _______________________________________ #
    if not isinstance(data, pd.DataFrame) or "Date" not in data.columns or "temperatures" not in data.columns:
        raise ValueError("Input data must be a DataFrame with columns ['Date', 'temperatures']")

    data_copy = data.copy()
    data_copy["Date"] = pd.to_datetime(data_copy["Date"])
    data_copy.set_index("Date", inplace=True)
    
    # _________________________________________ PREPROCESS ________________________________________ #
    data_scaled, scaler = normalize_data(data_copy, method=normalize)
    split_idx = int(len(data_copy) * 0.8)
    train_data = data_scaled[:split_idx]
    val_data = data_scaled[:split_idx]
    steps = len(val_data)
    
    #___________________________________________TRAIN MODEL_________________________________________#
    if model_type in ["ARIMA","SARIMA","Theta"]:
        if model_type == "ARIMA":
            model = ARIMAModel(train_data)
        elif model_type == "SARIMA":
            model = SARIMAXModel(train_data, seasonality)
        elif model_type == "Theta":
            model = THETAModel(train_data, seasonality)
        
        model.fit()
        prediction, conf_int = model.predict(steps)
        
        
    elif model_type in ["LSTM", "XGBoost", "Linear"]:
        X_train, y_train = create_sequences(train_data, lookback)
        X_val, y_val = create_sequences(val_data, lookback)
        if model_type == "LSTM":
            model = LSTMModel(train_data, lookback)
            model.train(X_train, y_train)
            X_val = torch.tensor(X_val, dtype=torch.float32)
            with torch.no_grad():
                y_pred = model.model(X_val)[0][:, -1].numpy()

                

# class LinearRegressionModel(BaseTimeSeriesModel):
#     def __init__(self, train_data):
#         super().__init__(train_data)

#     def train(self, X_train, y_train):
#         self.model = LinearRegression()
#         self.model.fit(X_train, y_train)

#     def predict(self, X_test):
#         return self.model.predict(X_test)
    
#         elif model_type == "XGBoost":
#             model = xgb.XGBRegressor(objective="reg:squarederror")
#             model.fit(X_train, y_train)
#             y_pred = model.predict(X_val)

#         elif model_type == "Linear":
#             model = LinearRegression()
#             model.train(X_train, y_train)
#             y_pred = model.predict(X_val)

    
    else:
        raise ValueError("Unsupported model type. Choose from ['LSTM', 'ARIMA', 'SARIMA', 'Theta', 'XGBoost', 'Linear'].")
    
    # _______________________________________ EVALUATE ___________________________________________ #
    if scaler:
        y_pred_actual, y_val_actual = val_data[:len(y_pred)].copy(), val_data[:len(y_pred)].copy()
        y_pred_actual["temperatures"] = scaler.inverse_transform(y_pred.reshape(-1, 1))
        y_val_actual["temperatures"] = scaler.inverse_transform(y_val_actual["temperatures"].values.reshape(-1, 1))
        if conf_int is not None:
            lower_bound = scaler.inverse_transform(conf_int[0].reshape(-1, 1))
            upper_bound = scaler.inverse_transform(conf_int[1].reshape(-1, 1))
            conf_int = (lower_bound.flatten(), upper_bound.flatten())
    else:
        y_pred_actual, y_val_actual = y_pred, val_data[:len(y_pred)]
    
    return data_copy, y_val_actual, y_pred_actual, model_type, normalize, conf_int


In [None]:
class StatisticModelPipeline:
    def __init__(self, model_class, train_data, scaler=None, **model_params):
        """
        Pipeline cho các mô hình thống kê (ARIMA, SARIMA, Theta) với tùy chọn scaler.
        
        :param model_class: Lớp mô hình (ARIMAModel, SARIMAModel, ThetaForecastModel)
        :param train_data: Dữ liệu train (Series hoặc DataFrame)
        :param scaler: Bộ scaler để chuẩn hóa dữ liệu (nếu có), mặc định là None
        :param model_params: Các tham số khởi tạo cho mô hình (order, seasonality,...)
        """
        self.scaler = scaler
        self.train_data_original = train_data  
        self.train_data = self._scale_data(train_data) if scaler else train_data
        self.model = model_class(self.train_data, **model_params)

    def _scale_data(self, data):
        """Chuẩn hóa dữ liệu nếu có scaler"""
        data = np.array(data).reshape(-1, 1)  
        return self.scaler.fit_transform(data).flatten() 

    def _inverse_scale(self, data):
        """Khôi phục dữ liệu về giá trị gốc nếu có scaler"""
        return self.scaler.inverse_transform(np.array(data).reshape(-1, 1)).flatten() if self.scaler else data

    def run(self, steps):
        """
        Huấn luyện mô hình và dự báo.
        
        :param steps: Số bước cần dự đoán
        :return: Kết quả dự đoán (có thể đã được inverse transform nếu có scaler) và khoảng tin cậy (nếu có)
        """
        self.model.train()
        predictions, conf_int = self.model.predict(steps)

        # Nếu có scaler, khôi phục dữ liệu về giá trị gốc
        if self.scaler:
            predictions = self._inverse_scale(predictions)
            if conf_int is not None:
                lower_bound = self._inverse_scale(conf_int[:, 0])
                upper_bound = self._inverse_scale(conf_int[:, 1])
                conf_int = np.column_stack((lower_bound, upper_bound))

        return predictions, conf_int


In [191]:
def forecast_pipeline(data, model_type, normalize="minmax", **kwargs):
    """
    Pipeline huấn luyện và dự báo chuỗi thời gian với nhiều model.
    
    Parameters:
        - data: pd.DataFrame (chuỗi thời gian, gồm cột 'Date' và 'temperatures')
        - model_type: str (['ARIMA', 'SARIMA', 'Theta'])
        - normalize: str (['minmax', 'standard', 'none'])
        - kwargs: Các tham số bổ sung cho từng loại model
            + seasonality: chỉ yêu cầu cho SARIMA và Theta
    """
    # ________________________________________ CHECK INPUT _______________________________________ #
    if not isinstance(data, pd.DataFrame) or "Date" not in data.columns or "temperatures" not in data.columns:
        raise ValueError("Input data must be a DataFrame with columns ['Date', 'temperatures']")

    data_copy = data.copy()
    data_copy["Date"] = pd.to_datetime(data_copy["Date"])
    data_copy.set_index("Date", inplace=True)
    
    # _________________________________________ PREPROCESS ________________________________________ #
    data_scaled, scaler = normalize_data(data_copy, method=normalize)
    split_idx = int(len(data_copy) * 0.8)
    train_data, val_data = data_scaled[:split_idx], data_scaled[split_idx:]
    
    # ________________________________________ TRAIN MODEL _______________________________________ #
    conf_int = None  
    model = None
    
    # ------------------------- STATISTICAL MODELS --------------------------- #
    if model_type == "ARIMA":
        model = ARIMAModel(train_data)
    
    elif model_type == "SARIMA":
        seasonality = kwargs.get("seasonality", 12)  
        model = SARIMAXModel(train_data, m=seasonality)
    
    elif model_type == "Theta":
        seasonality = kwargs.get("seasonality", 12) 
        model = THETAModel(train_data, seasonality)
    model.train()
    y_pred, conf_int = model.predict(len(val_data))
        
    # _______________________________________ EVALUATE ___________________________________________ #
    if scaler:
        y_pred_actual, y_val_actual = val_data.copy(), val_data.copy()
        print(y_pred_actual)
        y_pred_actual["temperatures"] = scaler.inverse_transform(y_pred.reshape(-1, 1))
        if conf_int is not None:
            lower_bound = scaler.inverse_transform(conf_int[0].reshape(-1, 1))
            upper_bound = scaler.inverse_transform(conf_int[1].reshape(-1, 1))
            conf_int = (lower_bound.flatten(), upper_bound.flatten())
    else:
        y_pred_actual, y_val_actual = y_pred, val_data
    
    return data_copy, y_val_actual, y_pred_actual, model_type, normalize, conf_int


In [167]:
def plot_forecast(data_original, y_val_actual, y_pred_actual, model_type, normalize, conf_int=None):
    plt.figure(figsize=(12, 6))

    original_index = data_original.index
    val_index = y_val_actual.index
    
    plt.plot(original_index, data_original, label="Original Data", color='gray', alpha=0.5)
    
    # Vẽ dữ liệu thực tế
    plt.plot(val_index, y_val_actual, label="Actual", color='blue')
    
    # Vẽ dữ liệu dự báo
    plt.plot(val_index[:len(y_pred_actual)], y_pred_actual, label="Predicted", color='red', linestyle='dashed')

    # Vẽ vùng tin cậy (nếu có)
    if conf_int is not None:
        conf_int = np.array(conf_int)
        if conf_int.shape[1] == 2:
            lower_bound, upper_bound = conf_int[:, 0], conf_int[:, 1]
            plt.fill_between(val_index[:len(y_pred_actual)], lower_bound, upper_bound, 
                             color='pink', alpha=0.3, label="Confidence Interval")

    # Thiết lập đồ thị
    plt.title(f"{model_type} Forecast (Normalization: {normalize})")
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.legend(ncol=4)
    plt.show()


# Run

In [168]:
data = pd.read_csv(DATA_TRAIN_PROCESSED)

In [132]:
data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int = forecast_pipeline(data=data, 
                                                                                             model_type="SARIMA",
                                                                                             normalize="standard", 
                                                                                             seasonality=7)
plot_forecast(data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int)

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[7] intercept   : AIC=4315.708, Time=2.93 sec
 ARIMA(0,0,0)(0,0,0)[7] intercept   : AIC=6684.166, Time=0.02 sec
 ARIMA(1,0,0)(1,0,0)[7] intercept   : AIC=4598.305, Time=0.45 sec
 ARIMA(0,0,1)(0,0,1)[7] intercept   : AIC=5158.407, Time=0.37 sec
 ARIMA(0,0,0)(0,0,0)[7]             : AIC=6682.174, Time=0.02 sec
 ARIMA(2,0,2)(0,0,1)[7] intercept   : AIC=4305.306, Time=2.96 sec
 ARIMA(2,0,2)(0,0,0)[7] intercept   : AIC=4303.736, Time=1.70 sec
 ARIMA(2,0,2)(1,0,0)[7] intercept   : AIC=4305.294, Time=2.07 sec
 ARIMA(1,0,2)(0,0,0)[7] intercept   : AIC=4321.407, Time=1.05 sec
 ARIMA(2,0,1)(0,0,0)[7] intercept   : AIC=4337.347, Time=1.41 sec
 ARIMA(3,0,2)(0,0,0)[7] intercept   : AIC=4329.140, Time=1.90 sec
 ARIMA(2,0,3)(0,0,0)[7] intercept   : AIC=4304.064, Time=2.77 sec
 ARIMA(1,0,1)(0,0,0)[7] intercept   : AIC=4586.156, Time=0.34 sec
 ARIMA(1,0,3)(0,0,0)[7] intercept   : AIC=4309.280, Time=1.01 sec
 ARIMA(3,0,1)(0,0,0)[7] intercept

AttributeError: 'numpy.ndarray' object has no attribute 'index'

<Figure size 1200x600 with 0 Axes>

In [192]:
data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int = forecast_pipeline(data=data, 
                                                                                             model_type="Theta",
                                                                                             normalize="minmax", 
                                                                                             seasonality=365)
plot_forecast(data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int)

<class 'numpy.ndarray'>
[[0.19011407]
 [0.1634981 ]
 [0.24334601]
 [0.41064639]
 [0.29657795]
 [0.32319392]
 [0.36882129]
 [0.38022814]
 [0.41825095]
 [0.3878327 ]
 [0.25095057]
 [0.23193916]
 [0.2243346 ]
 [0.33840304]
 [0.49429658]
 [0.47908745]
 [0.20532319]
 [0.22813688]
 [0.29657795]
 [0.34220532]
 [0.15969582]
 [0.11406844]
 [0.17110266]
 [0.23574144]
 [0.45247148]
 [0.4486692 ]
 [0.35741445]
 [0.36501901]
 [0.35741445]
 [0.2661597 ]
 [0.33840304]
 [0.35361217]
 [0.25855513]
 [0.2851711 ]
 [0.30418251]
 [0.31558935]
 [0.1026616 ]
 [0.14828897]
 [0.15589354]
 [0.19011407]
 [0.22053232]
 [0.16730038]
 [0.15589354]
 [0.22053232]
 [0.3460076 ]
 [0.30038023]
 [0.19011407]
 [0.10646388]
 [0.17870722]
 [0.33840304]
 [0.20532319]
 [0.26996198]
 [0.34220532]
 [0.35741445]
 [0.23954373]
 [0.2661597 ]
 [0.24334601]
 [0.25475285]
 [0.05703422]
 [0.11026616]
 [0.18250951]
 [0.23954373]
 [0.21673004]
 [0.2661597 ]
 [0.33460076]
 [0.33079848]
 [0.34220532]
 [0.36501901]
 [0.30418251]
 [0.319391

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
forecast_pipeline(data=data, model_type="Theta", normalize="standard", seasonality=7)
forecast_pipeline(data=data, model_type="Linear", normalize="minmax", lookback=24)

In [117]:
type(y_pred_actual)

pandas.core.series.Series

In [149]:
y_test_actual.values

array([[ 5. ],
       [ 4.3],
       [ 6.4],
       [10.8],
       [ 7.8],
       [ 8.5],
       [ 9.7],
       [10. ],
       [11. ],
       [10.2],
       [ 6.6],
       [ 6.1],
       [ 5.9],
       [ 8.9],
       [13. ],
       [12.6],
       [ 5.4],
       [ 6. ],
       [ 7.8],
       [ 9. ],
       [ 4.2],
       [ 3. ],
       [ 4.5],
       [ 6.2],
       [11.9],
       [11.8],
       [ 9.4],
       [ 9.6],
       [ 9.4],
       [ 7. ],
       [ 8.9],
       [ 9.3],
       [ 6.8],
       [ 7.5],
       [ 8. ],
       [ 8.3],
       [ 2.7],
       [ 3.9],
       [ 4.1],
       [ 5. ],
       [ 5.8],
       [ 4.4],
       [ 4.1],
       [ 5.8],
       [ 9.1],
       [ 7.9],
       [ 5. ],
       [ 2.8],
       [ 4.7],
       [ 8.9],
       [ 5.4],
       [ 7.1],
       [ 9. ],
       [ 9.4],
       [ 6.3],
       [ 7. ],
       [ 6.4],
       [ 6.7],
       [ 1.5],
       [ 2.9],
       [ 4.8],
       [ 6.3],
       [ 5.7],
       [ 7. ],
       [ 8.8],
       [ 8.7],
       [ 9

In [133]:
y_test_actual

array([[ 5. ],
       [ 4.3],
       [ 6.4],
       [10.8],
       [ 7.8],
       [ 8.5],
       [ 9.7],
       [10. ],
       [11. ],
       [10.2],
       [ 6.6],
       [ 6.1],
       [ 5.9],
       [ 8.9],
       [13. ],
       [12.6],
       [ 5.4],
       [ 6. ],
       [ 7.8],
       [ 9. ],
       [ 4.2],
       [ 3. ],
       [ 4.5],
       [ 6.2],
       [11.9],
       [11.8],
       [ 9.4],
       [ 9.6],
       [ 9.4],
       [ 7. ],
       [ 8.9],
       [ 9.3],
       [ 6.8],
       [ 7.5],
       [ 8. ],
       [ 8.3],
       [ 2.7],
       [ 3.9],
       [ 4.1],
       [ 5. ],
       [ 5.8],
       [ 4.4],
       [ 4.1],
       [ 5.8],
       [ 9.1],
       [ 7.9],
       [ 5. ],
       [ 2.8],
       [ 4.7],
       [ 8.9],
       [ 5.4],
       [ 7.1],
       [ 9. ],
       [ 9.4],
       [ 6.3],
       [ 7. ],
       [ 6.4],
       [ 6.7],
       [ 1.5],
       [ 2.9],
       [ 4.8],
       [ 6.3],
       [ 5.7],
       [ 7. ],
       [ 8.8],
       [ 8.7],
       [ 9

In [91]:
data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int = forecast_pipeline(data=data, model_type="SARIMA", normalize="none", seasonality=365)
plot_forecast(data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int)

Performing stepwise search to minimize aic


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_19672\506921167.py", line 1, in <module>
    data_copy, y_test_actual, y_pred_actual, model_type, normalize, conf_int = forecast_pipeline(data=data, model_type="SARIMA", normalize="none", seasonality=365)
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_19672\2251155891.py", line 47, in forecast_pipeline
    model = train_sarima(train_data, seasonality=kwargs["seasonality"])
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_19672\2851338018.py", line 8, in train_sarima
  File "C:\Users\Admin\anaconda3\lib\site-packages\pmdarima\arima\auto.py", line 701, in auto_arima
    sorted_res = search.solve()
  File "C:\Users\Admin\anaconda3\lib\site-packages\pmdarima\arima\_auto_solvers.py", line 288, in solve
    self._do_fit((p, d, q), (P, D, 

TypeError: object of type 'NoneType' has no len()

In [72]:
data

Unnamed: 0,Date,temperatures
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8
...,...,...
2915,1988-12-26,9.5
2916,1988-12-27,12.9
2917,1988-12-28,12.9
2918,1988-12-29,14.8
