In [2]:
import numpy as np
import modin.pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

def hybrid_model(data, time_steps=60):
    # Ensure data is numpy array
    if isinstance(data, pd.DataFrame):
        df = data.values
    else:
        df = np.array(data)
    
    df = df.reshape(-1, 1)

    # ARIMA model
    model_auto = auto_arima(df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                            d=None, seasonal=False, start_P=0, D=0, trace=True,
                            error_action='ignore', suppress_warnings=True, stepwise=True)

    arima_model = ARIMA(df, order=model_auto.order)
    arima_results = arima_model.fit()

    # Get ARIMA residuals
    arima_residuals = df - arima_results.fittedvalues.values.reshape(-1, 1)

    # Prepare data for LSTM
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(arima_residuals)

    X, y = prepare_data(residuals_scaled, time_steps)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # LSTM model
    lstm_model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
        LSTM(units=50),
        Dense(units=1)
    ])
    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)

    # Make hybrid prediction
    last_60_days = residuals_scaled[-60:]
    X_test = np.array([last_60_days])
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    lstm_prediction = lstm_model.predict(X_test)
    lstm_prediction = scaler.inverse_transform(lstm_prediction)

    arima_forecast = arima_results.forecast(steps=1)

    hybrid_prediction = arima_forecast + lstm_prediction[0][0]

    return hybrid_prediction[0]

# Example usage with custom data
# Assuming you have a CSV file named 'my_stock_data.csv' with a 'Close' column
custom_data = pd.read_csv('../Data/SBI Train data.csv')
close_prices = custom_data['Close']

prediction = hybrid_model(close_prices)
print(f"Hybrid model prediction for next day closing price: ${prediction:.2f}")

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=30202.237, Time=0.62 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=30233.097, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=30205.175, Time=0.16 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=30203.304, Time=0.22 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=30231.827, Time=0.04 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=30196.421, Time=0.84 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=30201.003, Time=0.20 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=30198.399, Time=1.24 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=30198.353, Time=1.94 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=30196.789, Time=1.17 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=30202.291, Time=0.30 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=30199.886, Time=1.15 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=30195.213, Time=0.28 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=30200.893, Time=0.38 sec
 ARIMA(2,1,0)(0,0,0

AttributeError: 'numpy.ndarray' object has no attribute 'values'

In [3]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

def hybrid_model(data, time_steps=60):
    # Ensure data is numpy array
    if isinstance(data, pd.Series):
        df = data.values
    elif isinstance(data, pd.DataFrame):
        df = data.values
    else:
        df = np.array(data)
    
    df = df.reshape(-1, 1)

    # ARIMA model
    model_auto = auto_arima(df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                            d=None, seasonal=False, start_P=0, D=0, trace=True,
                            error_action='ignore', suppress_warnings=True, stepwise=True)

    arima_model = ARIMA(df, order=model_auto.order)
    arima_results = arima_model.fit()

    # Get ARIMA residuals
    arima_residuals = df - arima_results.fittedvalues.reshape(-1, 1)

    # Prepare data for LSTM
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(arima_residuals)

    X, y = prepare_data(residuals_scaled, time_steps)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # LSTM model
    lstm_model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
        LSTM(units=50),
        Dense(units=1)
    ])
    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)

    # Make hybrid prediction
    last_60_days = residuals_scaled[-60:]
    X_test = np.array([last_60_days])
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    lstm_prediction = lstm_model.predict(X_test)
    lstm_prediction = scaler.inverse_transform(lstm_prediction)

    arima_forecast = arima_results.forecast(steps=1)

    hybrid_prediction = arima_forecast + lstm_prediction[0][0]

    return hybrid_prediction[0]

# Example usage with custom data
custom_data = pd.read_csv('../Data/SBI Train data.csv')
close_prices = custom_data['Close']

prediction = hybrid_model(close_prices)
print(f"Hybrid model prediction for next day closing price: ${prediction:.2f}")

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=30202.237, Time=1.22 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=30233.097, Time=0.09 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=30205.175, Time=0.55 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=30203.304, Time=0.46 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=30231.827, Time=0.06 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=30196.421, Time=1.17 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=30201.003, Time=0.20 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=30198.399, Time=1.24 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=30198.353, Time=1.94 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=30196.789, Time=1.18 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=30202.291, Time=0.31 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=30199.886, Time=1.17 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=30195.213, Time=0.31 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=30200.893, Time=0.39 sec
 ARIMA(2,1,0)(0,0,0

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
Hybrid model prediction for next day closing price: $245.77


In [4]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error

def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

def hybrid_model(train_data, test_data, time_steps=60):
    # Ensure data is numpy array
    if isinstance(train_data, pd.Series):
        train_df = train_data.values
    elif isinstance(train_data, pd.DataFrame):
        train_df = train_data.values
    else:
        train_df = np.array(train_data)
    
    train_df = train_df.reshape(-1, 1)

    # ARIMA model
    model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                            d=None, seasonal=False, start_P=0, D=0, trace=True,
                            error_action='ignore', suppress_warnings=True, stepwise=True)

    arima_model = ARIMA(train_df, order=model_auto.order)
    arima_results = arima_model.fit()

    # Get ARIMA residuals
    arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)

    # Prepare data for LSTM
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(arima_residuals)

    X, y = prepare_data(residuals_scaled, time_steps)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # LSTM model
    lstm_model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
        LSTM(units=50),
        Dense(units=1)
    ])
    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)

    # Make predictions for test data
    predictions = []
    test_data = np.array(test_data).reshape(-1, 1)
    combined_data = np.vstack((train_df, test_data))

    for i in range(len(test_data)):
        # ARIMA prediction
        arima_forecast = arima_results.forecast(steps=1)

        # LSTM prediction
        last_60_days = scaler.transform(combined_data[-(time_steps+1):-1])
        X_test = np.array([last_60_days])
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        lstm_prediction = lstm_model.predict(X_test)
        lstm_prediction = scaler.inverse_transform(lstm_prediction)

        # Combine predictions
        hybrid_prediction = arima_forecast + lstm_prediction[0][0]
        predictions.append(hybrid_prediction[0])

        # Update ARIMA model
        arima_results = arima_model.append(test_data[i]).fit()

    return np.array(predictions)

# Load and prepare data
train_data = pd.read_csv('../Data/SBI Train data.csv')
test_data = pd.read_csv('../Data/SBI Test data.csv')

train_close_prices = train_data['Close']
test_close_prices = test_data['Close']

# Make predictions
predictions = hybrid_model(train_close_prices, test_close_prices)

# Calculate accuracy metrics
mae = mean_absolute_error(test_close_prices, predictions)
rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))

print(f"Mean Absolute Error: ${mae:.2f}")
print(f"Root Mean Squared Error: ${rmse:.2f}")

# You can also calculate percentage error
mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100
print(f"Mean Absolute Percentage Error: {mape:.2f}%")

# Plot actual vs predicted prices
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')
plt.plot(test_data['Date'], predictions, label='Predicted Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=30202.237, Time=0.62 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=30233.097, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=30205.175, Time=0.15 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=30203.304, Time=0.21 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=30231.827, Time=0.03 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=30196.421, Time=0.85 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=30201.003, Time=0.21 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=30198.399, Time=1.25 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=30198.353, Time=1.96 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=30196.789, Time=1.21 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=30202.291, Time=0.32 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=30199.886, Time=1.15 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=30195.213, Time=0.29 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=30200.893, Time=0.43 sec
 ARIMA(2,1,0)(0,0,0

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step


AttributeError: 'ARIMA' object has no attribute 'append'

In [5]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error

def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

def hybrid_model(train_data, test_data, time_steps=60):
    # Ensure data is numpy array
    if isinstance(train_data, pd.Series):
        train_df = train_data.values
    elif isinstance(train_data, pd.DataFrame):
        train_df = train_data.values
    else:
        train_df = np.array(train_data)
    
    train_df = train_df.reshape(-1, 1)

    # ARIMA model
    model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                            d=None, seasonal=False, start_P=0, D=0, trace=True,
                            error_action='ignore', suppress_warnings=True, stepwise=True)

    arima_model = ARIMA(train_df, order=model_auto.order)
    arima_results = arima_model.fit()

    # Get ARIMA residuals
    arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)

    # Prepare data for LSTM
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(arima_residuals)

    X, y = prepare_data(residuals_scaled, time_steps)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # LSTM model
    lstm_model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
        LSTM(units=50),
        Dense(units=1)
    ])
    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)

    # Make predictions for test data
    predictions = []
    test_data = np.array(test_data).reshape(-1, 1)
    combined_data = np.vstack((train_df, test_data))

    for i in range(len(test_data)):
        # ARIMA prediction
        arima_forecast = arima_results.forecast(steps=1)

        # LSTM prediction
        last_60_days = scaler.transform(combined_data[-(time_steps+1):-1])
        X_test = np.array([last_60_days])
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        lstm_prediction = lstm_model.predict(X_test)
        lstm_prediction = scaler.inverse_transform(lstm_prediction)

        # Combine predictions
        hybrid_prediction = arima_forecast + lstm_prediction[0][0]
        predictions.append(hybrid_prediction[0])

        # Update ARIMA model
        arima_results = arima_model.append(test_data[i]).fit()

    return np.array(predictions)

# Load and prepare data
train_data = pd.read_csv('../Data/SBI Train data.csv')
test_data = pd.read_csv('../Data/SBI Test data.csv')

train_close_prices = train_data['Close']
test_close_prices = test_data['Close']

# Make predictions
predictions = hybrid_model(train_close_prices, test_close_prices)

# Calculate accuracy metrics
mae = mean_absolute_error(test_close_prices, predictions)
rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))

print(f"Mean Absolute Error: ${mae:.2f}")
print(f"Root Mean Squared Error: ${rmse:.2f}")

# You can also calculate percentage error
mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100
print(f"Mean Absolute Percentage Error: {mape:.2f}%")

# Plot actual vs predicted prices
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')
plt.plot(test_data['Date'], predictions, label='Predicted Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=30202.237, Time=0.63 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=30233.097, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=30205.175, Time=0.17 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=30203.304, Time=0.24 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=30231.827, Time=0.04 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=30196.421, Time=0.93 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=30201.003, Time=0.23 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=30198.399, Time=1.30 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=30198.353, Time=1.94 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=30196.789, Time=1.22 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=30202.291, Time=0.30 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=30199.886, Time=1.15 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=30195.213, Time=0.31 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=30200.893, Time=0.42 sec
 ARIMA(2,1,0)(0,0,0

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step


AttributeError: 'ARIMA' object has no attribute 'append'

In [6]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib
import os

def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

def create_hybrid_model(train_data, time_steps=60):
    # Ensure data is numpy array
    if isinstance(train_data, pd.Series):
        train_df = train_data.values
    elif isinstance(train_data, pd.DataFrame):
        train_df = train_data.values
    else:
        train_df = np.array(train_data)
    
    train_df = train_df.reshape(-1, 1)

    # ARIMA model
    model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                            d=None, seasonal=False, start_P=0, D=0, trace=True,
                            error_action='ignore', suppress_warnings=True, stepwise=True)

    arima_model = ARIMA(train_df, order=model_auto.order)
    arima_results = arima_model.fit()

    # Get ARIMA residuals
    arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)

    # Prepare data for LSTM
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(arima_residuals)

    X, y = prepare_data(residuals_scaled, time_steps)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # LSTM model
    lstm_model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
        LSTM(units=50),
        Dense(units=1)
    ])
    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=0)

    return arima_results, lstm_model, scaler

def save_model(arima_results, lstm_model, scaler, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    
    # Save ARIMA model
    joblib.dump(arima_results, os.path.join(folder_path, 'arima_model.pkl'))
    
    # Save LSTM model
    lstm_model.save(os.path.join(folder_path, 'lstm_model.h5'))
    
    # Save scaler
    joblib.dump(scaler, os.path.join(folder_path, 'scaler.pkl'))

def load_model(folder_path):
    # Load ARIMA model
    arima_results = joblib.load(os.path.join(folder_path, 'arima_model.pkl'))
    
    # Load LSTM model
    lstm_model = load_model(os.path.join(folder_path, 'lstm_model.h5'))
    
    # Load scaler
    scaler = joblib.load(os.path.join(folder_path, 'scaler.pkl'))
    
    return arima_results, lstm_model, scaler

def make_predictions(arima_results, lstm_model, scaler, test_data, time_steps=60):
    predictions = []
    test_data = np.array(test_data).reshape(-1, 1)

    for i in range(len(test_data)):
        # ARIMA prediction
        arima_forecast = arima_results.forecast(steps=1)

        # LSTM prediction
        last_60_days = scaler.transform(test_data[i:i+time_steps])
        X_test = np.array([last_60_days])
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        lstm_prediction = lstm_model.predict(X_test)
        lstm_prediction = scaler.inverse_transform(lstm_prediction)

        # Combine predictions
        hybrid_prediction = arima_forecast + lstm_prediction[0][0]
        predictions.append(hybrid_prediction[0])

        # Update ARIMA model
        arima_results = arima_results.append(test_data[i])

    return np.array(predictions)

# Example usage
if __name__ == "__main__":
    # Load data
    train_data = pd.read_csv('../Data/SBI Train data.csv')
    test_data = pd.read_csv('../Data/SBI Test data.csv')

    train_close_prices = train_data['Close']
    test_close_prices = test_data['Close']

    # Create and save the model
    arima_results, lstm_model, scaler = create_hybrid_model(train_close_prices)
    save_model(arima_results, lstm_model, scaler, 'saved_model')

    # Later, load the model and make predictions
    loaded_arima, loaded_lstm, loaded_scaler = load_model('saved_model')
    predictions = make_predictions(loaded_arima, loaded_lstm, loaded_scaler, test_close_prices)

    # Calculate accuracy metrics
    mae = mean_absolute_error(test_close_prices, predictions)
    rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))
    mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100

    print(f"Mean Absolute Error: ${mae:.2f}")
    print(f"Root Mean Squared Error: ${rmse:.2f}")
    print(f"Mean Absolute Percentage Error: {mape:.2f}%")

    # Plot results
    import matplotlib.pyplot as plt

    plt.figure(figsize=(12,6))
    plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')
    plt.plot(test_data['Date'], predictions, label='Predicted Prices')
    plt.title('Actual vs Predicted Stock Prices')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=30202.237, Time=0.59 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=30233.097, Time=0.06 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=30205.175, Time=0.16 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=30203.304, Time=0.25 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=30231.827, Time=0.04 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=30196.421, Time=0.85 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=30201.003, Time=0.20 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=30198.399, Time=1.28 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=30198.353, Time=2.09 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=30196.789, Time=1.29 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=30202.291, Time=0.33 sec
 ARIMA(3,1,2)(0,0,0)[0] intercept   : AIC=30199.886, Time=1.23 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=30195.213, Time=0.30 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=30200.893, Time=0.38 sec
 ARIMA(2,1,0)(0,0,0

  super().__init__(**kwargs)


FileNotFoundError: [Errno 2] No such file or directory: 'saved_model\\lstm_model.h5\\arima_model.pkl'

In [9]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib  # For saving the ARIMA model
import os

# Data preparation
def prepare_data(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps), 0])
        y.append(data[i + time_steps, 0])
    return np.array(X), np.array(y)

# Hybrid ARIMA-LSTM Model
def hybrid_model(train_data, test_data, time_steps=60, model_dir='./model'):
    # Ensure data is a numpy array
    train_df = np.array(train_data).reshape(-1, 1)

    # Create a directory to save models if it doesn't exist
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # ARIMA Model
    arima_model_path = os.path.join(model_dir, 'arima_model.pkl')
    if not os.path.exists(arima_model_path):
        model_auto = auto_arima(train_df, start_p=1, start_q=1, max_p=3, max_q=3, m=1,
                                d=None, seasonal=False, start_P=0, D=0, trace=True,
                                error_action='ignore', suppress_warnings=True, stepwise=True)

        arima_model = ARIMA(train_df, order=model_auto.order)
        arima_results = arima_model.fit()
        # Save ARIMA model
        joblib.dump(arima_results, arima_model_path)
    else:
        arima_results = joblib.load(arima_model_path)

    # Get ARIMA residuals
    arima_residuals = train_df - arima_results.fittedvalues.reshape(-1, 1)

    # Prepare data for LSTM
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(arima_residuals)

    X, y = prepare_data(residuals_scaled, time_steps)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    # LSTM Model
    lstm_model_path = os.path.join(model_dir, 'lstm_model.keras')  # Updated file extension
    if not os.path.exists(lstm_model_path):
        lstm_model = Sequential([
            LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)),
            LSTM(units=50),
            Dense(units=1)
        ])
        lstm_model.compile(optimizer='adam', loss='mean_squared_error')

        # Early stopping and model checkpoint
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(lstm_model_path, save_best_only=True, monitor='loss')

        # Train LSTM model
        lstm_model.fit(X, y, epochs=50, batch_size=32, verbose=1, callbacks=[early_stopping, model_checkpoint])

    else:
        lstm_model = load_model(lstm_model_path)

    # Make predictions for test data
    predictions = []
    test_data = np.array(test_data).reshape(-1, 1)
    combined_data = np.vstack((train_df, test_data))

    for i in range(len(test_data)):
        # ARIMA prediction
        arima_forecast = arima_results.forecast(steps=1)

        # LSTM prediction
        last_60_days = scaler.transform(combined_data[-(time_steps+1):-1])
        X_test = np.array([last_60_days])
        X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
        lstm_prediction = lstm_model.predict(X_test)
        lstm_prediction = scaler.inverse_transform(lstm_prediction)

        # Combine predictions
        hybrid_prediction = arima_forecast + lstm_prediction[0][0]
        predictions.append(hybrid_prediction[0])

        # Update ARIMA model with test data
        arima_results = arima_model.append(test_data[i]).fit()

    return np.array(predictions)

# Load and prepare data
train_data = pd.read_csv('../Data/SBI Train data.csv')
test_data = pd.read_csv('../Data/SBI Test data.csv')

train_close_prices = train_data['Close']
test_close_prices = test_data['Close']

# Make predictions
predictions = hybrid_model(train_close_prices, test_close_prices)

# Calculate accuracy metrics
mae = mean_absolute_error(test_close_prices, predictions)
rmse = np.sqrt(mean_squared_error(test_close_prices, predictions))

print(f"Mean Absolute Error: ${mae:.2f}")
print(f"Root Mean Squared Error: ${rmse:.2f}")

# Calculate Mean Absolute Percentage Error (MAPE)
mape = np.mean(np.abs((test_close_prices - predictions) / test_close_prices)) * 100
print(f"Mean Absolute Percentage Error: {mape:.2f}%")

# Plot actual vs predicted prices
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(test_data['Date'], test_close_prices, label='Actual Prices')
plt.plot(test_data['Date'], predictions, label='Predicted Prices')
plt.title('Actual vs Predicted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()






[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step


UnboundLocalError: cannot access local variable 'arima_model' where it is not associated with a value