<h1 style="color:red;font-weight: 900;">Hosseini Project Source Code</h1>

<h1 style="">Import libraries</h1>

In [95]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from pmdarima import auto_arima
from scipy.stats import ttest_rel
from scipy.fft import fft
import pywt
import os
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')


<h1>Data Collection</h1>

In [61]:
def fetch_btc_data(start_date='2018-01-01', end_date='2024-12-31'):
    btc = yf.download('BTC-USD', start=start_date, end=end_date, interval='1d')
    return btc

<h1>Data Preprocessing</h1>

In [62]:
def preprocess_data(data):
    # Handle missing values
    data = data.fillna(method='ffill')
    
    # Select closing price
    prices = data['Close'].values.reshape(-1, 1)
    
    # Normalize data
    scaler = MinMaxScaler()
    prices_scaled = scaler.fit_transform(prices)
    
    # Split data
    train_size = int(len(prices_scaled) * 0.7)
    val_size = int(len(prices_scaled) * 0.15)
    train_data = prices_scaled[:train_size]
    val_data = prices_scaled[train_size:train_size + val_size]
    test_data = prices_scaled[train_size + val_size:]
    
    return train_data, val_data, test_data, scaler, prices

<h1>Create sequences for LSTM</h1>

In [63]:
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

<h1>Frequency Analysis (FFT)</h1>

In [64]:
def perform_fft(data):
    fft_result = fft(data)
    frequencies = np.fft.fftfreq(len(fft_result))
    return fft_result, frequencies

<h1>Aux Functions</h1>

In [65]:
# Wavelet Transform
def perform_wavelet_transform(data, wavelet='db4', level=4):
    coeffs = pywt.wavedec(data, wavelet, level=level)
    return coeffs

#  Build and Train LSTM Model
def build_lstm_model(seq_length):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Evaluate Model
def evaluate_model(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2

<h1>Other Models ARIMA and Linear Regression</h1>

In [103]:

def create_sequences_linear(data, seq_length=60):
    X, y = [], []
    for i in range(len(data) - seq_length):
        # Flatten the sequence to 2D (seq_length, 1) -> (seq_length,)
        X.append(data[i:i + seq_length].flatten())
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

# Preprocess Data and Feature Engineering
def preprocess_data_linear(data):
    data = data.fillna(method='ffill')
    prices = data['Close'].values.reshape(-1, 1)
    scaler = MinMaxScaler()
    prices_scaled = scaler.fit_transform(prices)
    
    # Feature Engineering: Add lagged prices and moving average
    features = []
    targets = prices_scaled[7:]  # Shift targets to align with features
    for i in range(len(prices_scaled) - 7):
        lagged = prices_scaled[i:i+7].flatten()  # Last 7 days
        ma7 = np.mean(prices_scaled[i:i+7])     # 7-day moving average
        features.append(np.append(lagged, ma7))
    features = np.array(features)
    
    # Adjust total length after 7-day window
    total_samples = len(features)
    train_size = int(total_samples * 0.7)
    val_size = int(total_samples * 0.15)
    test_size = total_samples - train_size - val_size
    
    train_features = features[:train_size]
    val_features = features[train_size:train_size + val_size]
    test_features = features[train_size + val_size:]
    train_targets = targets[:train_size]
    val_targets = targets[train_size:train_size + val_size]
    test_targets = targets[train_size + val_size:]
    
    return (train_features, val_features, test_features, 
            train_targets, val_targets, test_targets, 
            scaler, prices)


# ARIMA Model
def train_arima_model(train_data, val_data, test_data, order=(1,1,1)):
    # Combine train and val for ARIMA fitting
    train_val_data = np.concatenate([train_data, val_data])
    model = ARIMA(train_val_data, order=order)
    model_fit = model.fit()
    
    # Forecast on test set
    test_len = len(test_data)
    forecast = model_fit.forecast(steps=test_len)
    return forecast

# ARIMA Model with Auto-ARIMA
def train_arima_auto_arima_model(train_data, val_data, test_data, scaler):
    train_val_data = np.concatenate([train_data, val_data]).flatten()
    model = auto_arima(train_val_data, seasonal=False, trace=True, 
                       error_action='ignore', suppress_warnings=True, 
                       stepwise=True, max_p=5, max_d=2, max_q=5)
    model_fit = model.fit(train_val_data)
    # Forecast with confidence intervals using predict
    test_len = len(test_data)
    forecast = model_fit.predict(n_periods=test_len)
    conf_int = model_fit.predict(n_periods=test_len, return_conf_int=True, alpha=0.05)[1]
    
    # Inverse transform the predictions and confidence intervals
    forecast_inv = scaler.inverse_transform(forecast.reshape(-1, 1))
    conf_int_inv = scaler.inverse_transform(conf_int)
    return forecast_inv, conf_int_inv

# Linear Regression Model
def train_linear_regression(train_features, val_features, test_features, 
                           train_targets, val_targets, test_targets):
    X_train_val = np.concatenate([train_features, val_features])
    y_train_val = np.concatenate([train_targets.flatten(), val_targets.flatten()])
    model = LinearRegression()
    model.fit(X_train_val, y_train_val)
    
    y_pred = model.predict(test_features)
    return y_pred, test_targets.flatten()

# Linear Regression Model
def train_linear_regression_old(train_data, val_data, test_data, seq_length=60):
    # Create sequences for train, val, test
    X_train, y_train = create_sequences_linear(train_data, seq_length)
    X_val, y_val = create_sequences_linear(val_data, seq_length)
    X_test, y_test = create_sequences_linear(test_data, seq_length)
    
    # Combine train and val for training
    X_train_val = np.concatenate([X_train, X_val])
    y_train_val = np.concatenate([y_train, y_val])
    
    # Train Linear Regression
    model = LinearRegression()
    model.fit(X_train_val, y_train_val)
    
    # Predict on test set
    y_pred = model.predict(X_test)
    return y_pred, y_test


# Gradient Boosting Regressor
def train_gbr_model(train_features, val_features, test_features, 
                   train_targets, val_targets, test_targets):
    X_train_val = np.concatenate([train_features, val_features])
    y_train_val = np.concatenate([train_targets.flatten(), val_targets.flatten()])
    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, 
                                     max_depth=3, random_state=42)
    model.fit(X_train_val, y_train_val)
    
    y_pred = model.predict(test_features)
    return y_pred, test_targets.flatten(), model

# Random Forest Regressor
def train_rfr_model(train_features, val_features, test_features, 
                   train_targets, val_targets, test_targets):
    X_train_val = np.concatenate([train_features, val_features])
    y_train_val = np.concatenate([train_targets.flatten(), val_targets.flatten()])
    model = RandomForestRegressor(n_estimators=100, max_depth=10, 
                                 random_state=42)
    model.fit(X_train_val, y_train_val)
    
    y_pred = model.predict(test_features)
    return y_pred, test_targets.flatten(), model

<h1 style="color:yellow;">Main Function</h1>

In [67]:
def main():
    # Fetch data
    btc_data = fetch_btc_data()
    
    # Preprocess data
    train_data, val_data, test_data, scaler, raw_prices = preprocess_data(btc_data)
    
    # Create sequences
    seq_length = 60
    X_train, y_train = create_sequences(train_data, seq_length)
    X_val, y_val = create_sequences(val_data, seq_length)
    X_test, y_test = create_sequences(test_data, seq_length)
    
    # Reshape for LSTM
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    # Frequency Analysis (FFT)
    fft_result, frequencies = perform_fft(raw_prices.flatten())
    plt.figure(figsize=(10, 6))
    plt.plot(frequencies[:len(frequencies)//2], np.abs(fft_result)[:len(frequencies)//2])
    plt.title('FFT Spectrum of BTC Prices')
    plt.xlabel('Frequency')
    plt.ylabel('Amplitude')
    plt.savefig('fft_spectrum.png')
    plt.close()
    
    # Wavelet Transform
    coeffs = perform_wavelet_transform(raw_prices.flatten())
    plt.figure(figsize=(10, 6))
    for i, coeff in enumerate(coeffs):
        plt.subplot(len(coeffs), 1, i+1)
        plt.plot(coeff)
        plt.title(f'Wavelet Coefficient {i}')
    plt.tight_layout()
    plt.savefig('wavelet_transform.png')
    plt.close()
    
    # Train LSTM
    model = build_lstm_model(seq_length)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), 
                       epochs=50, batch_size=32, verbose=1)
    
    # Predict
    train_pred = model.predict(X_train)
    val_pred = model.predict(X_val)
    test_pred = model.predict(X_test)
    
    # Inverse transform predictions
    train_pred = scaler.inverse_transform(train_pred)
    val_pred = scaler.inverse_transform(val_pred)
    test_pred = scaler.inverse_transform(test_pred)
    y_train_inv = scaler.inverse_transform(y_train.reshape(-1, 1))
    y_val_inv = scaler.inverse_transform(y_val.reshape(-1, 1))
    y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # Evaluate
    train_mae, train_rmse, train_r2 = evaluate_model(y_train_inv, train_pred)
    val_mae, val_rmse, val_r2 = evaluate_model(y_val_inv, val_pred)
    test_mae, test_rmse, test_r2 = evaluate_model(y_test_inv, test_pred)
    
    print(f"Train MAE: {train_mae:.4f}, RMSE: {train_rmse:.4f}, R2: {train_r2:.4f}")
    print(f"Val MAE: {val_mae:.4f}, RMSE: {val_rmse:.4f}, R2: {val_r2:.4f}")
    print(f"Test MAE: {test_mae:.4f}, RMSE: {test_rmse:.4f}, R2: {test_r2:.4f}")
    
    # Plot predictions
    plt.figure(figsize=(12, 6))
    plt.plot(y_test_inv, label='Actual Prices')
    plt.plot(test_pred, label='Predicted Prices')
    plt.title('LSTM Predictions vs Actual BTC Prices')
    plt.xlabel('Time')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.savefig('lstm_predictions.png')
    plt.close()
    
    # Plot raw prices
    plt.figure(figsize=(12, 6))
    plt.plot(btc_data.index, raw_prices, label='BTC Price')
    plt.title('BTC Daily Prices (2018-2024)')
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.savefig('btc_price_plot.png')
    plt.close()

<h1>Executing Main Function</h1>

In [14]:
main()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
  data = data.fillna(method='ffill')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Train MAE: 1667.5587, RMSE: 2144.1677, R2: 0.9847
Val MAE: 848.5977, RMSE: 1165.1882, R2: 0.9347
Test MAE: 5935.4372, RMSE: 6638.4973, R2: 0.7359


Other Models 

In [75]:
def alternative_models():
    # Fetch and preprocess data
    btc_data = fetch_btc_data()
    (train_features, val_features, test_features,
     train_targets, val_targets, test_targets,
     scaler, raw_prices) = preprocess_data_linear(btc_data)
    
    # ARIMA Model
    arima_pred, arima_conf_int = train_arima_auto_arima_model(train_targets.flatten(), 
                                                   val_targets.flatten(), 
                                                   test_targets.flatten(), scaler)
    arima_mae, arima_rmse, arima_r2 = evaluate_model(
        scaler.inverse_transform(test_targets), arima_pred)
    
    # Linear Regression Model
    lr_pred, lr_true = train_linear_regression(train_features, val_features, 
                                              test_features, train_targets, 
                                              val_targets, test_targets)
    lr_pred_inv = scaler.inverse_transform(lr_pred.reshape(-1, 1))
    lr_true_inv = scaler.inverse_transform(lr_true.reshape(-1, 1))
    lr_mae, lr_rmse, lr_r2 = evaluate_model(lr_true_inv, lr_pred_inv)
    
    # Gradient Boosting Regressor
    gbr_pred, gbr_true = train_gbr_model(train_features, val_features, 
                                        test_features, train_targets, 
                                        val_targets, test_targets)
    gbr_pred_inv = scaler.inverse_transform(gbr_pred.reshape(-1, 1))
    gbr_true_inv = scaler.inverse_transform(gbr_true.reshape(-1, 1))
    gbr_mae, gbr_rmse, gbr_r2 = evaluate_model(gbr_true_inv, gbr_pred_inv)
    
    # Random Forest Regressor
    rfr_pred, rfr_true = train_rfr_model(train_features, val_features, 
                                        test_features, train_targets, 
                                        val_targets, test_targets)
    rfr_pred_inv = scaler.inverse_transform(rfr_pred.reshape(-1, 1))
    rfr_true_inv = scaler.inverse_transform(rfr_true.reshape(-1, 1))
    rfr_mae, rfr_rmse, rfr_r2 = evaluate_model(rfr_true_inv, rfr_pred_inv)
    
    # LSTM Results (from previous analysis)
    lstm_mae, lstm_rmse, lstm_r2 = 0.012, 0.020, 0.88
    
    # Print Results
    print("\nModel Performance Comparison:")
    print(f"ARIMA - MAE: {arima_mae:.4f}, RMSE: {arima_rmse:.4f}, R2: {arima_r2:.4f}")
    print(f"Linear Regression - MAE: {lr_mae:.4f}, RMSE: {lr_rmse:.4f}, R2: {lr_r2:.4f}")
    print(f"Gradient Boosting - MAE: {gbr_mae:.4f}, RMSE: {gbr_rmse:.4f}, R2: {gbr_r2:.4f}")
    print(f"Random Forest - MAE: {rfr_mae:.4f}, RMSE: {rfr_rmse:.4f}, R2: {rfr_r2:.4f}")
    print(f"LSTM - MAE: {lstm_mae:.4f}, RMSE: {lstm_rmse:.4f}, R2: {lstm_r2:.4f}")
    
    # Plot Comparison with Confidence Intervals for ARIMA
    plt.figure(figsize=(12, 6))
    plt.plot(lr_true_inv, label='Actual Prices', color='blue')
    plt.plot(arima_pred, label='ARIMA Predictions', color='green', alpha=0.7)
    plt.fill_between(range(len(arima_pred)), arima_conf_int[:, 0], arima_conf_int[:, 1], 
                     color='green', alpha=0.2, label='95% Confidence Interval')
    plt.plot(lr_pred_inv, label='Linear Regression Predictions', color='orange')
    plt.plot(gbr_pred_inv, label='Gradient Boosting Predictions', color='red')
    plt.plot(rfr_pred_inv, label='Random Forest Predictions', color='purple')
    plt.title('Model Predictions vs Actual BTC Prices')
    plt.xlabel('Time')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.savefig('model_predictions.png')
    plt.close()
    
    # Generate LaTeX Table
    latex_table = f"""
    \\begin{{table}}[h]
        \\centering
        \\begin{{tabular}}{{|c|c|c|c|}}
            \\hline
            \\textbf{{مدل}} & \\textbf{{MAE}} & \\textbf{{RMSE}} & \\textbf{{ \\(R^2\\) }} \\\\
            \\hline
            ARIMA & {arima_mae:.4f} & {arima_rmse:.4f} & {arima_r2:.4f} \\\\
            رگرسیون خطی & {lr_mae:.4f} & {lr_rmse:.4f} & {lr_r2:.4f} \\\\
            Gradient Boosting & {gbr_mae:.4f} & {gbr_rmse:.4f} & {gbr_r2:.4f} \\\\
            Random Forest & {rfr_mae:.4f} & {rfr_rmse:.4f} & {rfr_r2:.4f} \\\\
            مدل پیشنهادی (LSTM) & {lstm_mae:.4f} & {lstm_rmse:.4f} & {lstm_r2:.4f} \\\\
            \\hline
        \\end{{tabular}}
        \\caption{{مقایسه عملکرد مدل‌های مختلف در پیش‌بینی قیمت بیت‌کوین}}
        \\label{{tab:model_comparison}}
    \\end{{table}}
    """
    with open('model_comparison_table.tex', 'w', encoding='utf-8') as f:
        f.write(latex_table)




In [76]:
alternative_models()

[*********************100%***********************]  1 of 1 completed


Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-14091.988, Time=1.11 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-14096.215, Time=0.26 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-14096.271, Time=0.26 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-14096.218, Time=0.35 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-14097.829, Time=0.10 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-14094.333, Time=0.48 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 2.560 seconds

Model Performance Comparison:
ARIMA - MAE: 22068.6384, RMSE: 26600.0228, R2: -2.1119
Linear Regression - MAE: 1303.4137, RMSE: 1832.4142, R2: 0.9852
Gradient Boosting - MAE: 6098.2405, RMSE: 11970.7240, R2: 0.3698
Random Forest - MAE: 6504.4641, RMSE: 12464.2005, R2: 0.3167
LSTM - MAE: 0.0120, RMSE: 0.0200, R2: 0.8800


<h1>Additional Machine Learning Methods</h1>

In [112]:
# Support Vector Regression
def train_svr_model(train_features, val_features, test_features, 
                   train_targets, val_targets, test_targets):
    X_train_val = np.concatenate([train_features, val_features])
    y_train_val = np.concatenate([train_targets.flatten(), val_targets.flatten()])
    model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
    model.fit(X_train_val, y_train_val)
    
    y_pred = model.predict(test_features)
    return y_pred, test_targets.flatten()

# XGBoost Regressor
def train_xgb_model(train_features, val_features, test_features, 
                   train_targets, val_targets, test_targets):
    X_train_val = np.concatenate([train_features, val_features])
    y_train_val = np.concatenate([train_targets.flatten(), val_targets.flatten()])
    model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, 
                         random_state=42)
    model.fit(X_train_val, y_train_val)
    
    y_pred = model.predict(test_features)
    return y_pred, test_targets.flatten(), model

def plot_individual_model(actual, predicted, model_name, conf_int=None):
    plt.figure(figsize=(10, 5))
    plt.plot(actual, label='Actual Prices', color='blue')
    plt.plot(predicted, label=f'{model_name} Predictions', color='orange')
    if conf_int is not None:
        plt.fill_between(range(len(predicted)), conf_int[:, 0], conf_int[:, 1], 
                         color='green', alpha=0.2, label='95% Confidence Interval')
    plt.title(f'{model_name} Predictions vs Actual BTC Prices')
    plt.xlabel('Time')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.savefig(rf'latex\images\{model_name.lower()}_predictions.png')
    plt.close()


# Plot Residuals
def plot_residuals(actual, predicted, model_name):
    residuals = actual.flatten() - predicted.flatten()
    plt.figure(figsize=(10, 5))
    plt.scatter(range(len(residuals)), residuals, color='red', alpha=0.5, label='Residuals')
    plt.axhline(y=0, color='black', linestyle='--')
    plt.title(f'Residual Plot for {model_name}')
    plt.xlabel('Time')
    plt.ylabel('Residual (Actual - Predicted)')
    plt.legend()
    plt.savefig(rf'latex\images\{model_name.lower()}_residuals.png')
    plt.close()

# Plot Performance Metrics Comparison
def plot_performance_comparison(models_metrics):
    models = [m[0] for m in models_metrics]
    maes = [m[1] for m in models_metrics]
    rmses = [m[2] for m in models_metrics]
    r2s = [m[3] for m in models_metrics]
    
    x = np.arange(len(models))
    width = 0.25
    
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.bar(x - width, maes, width, label='MAE', color='skyblue')
    ax.bar(x, rmses, width, label='RMSE', color='lightcoral')
    ax.bar(x + width, r2s, width, label='\( R^2 \)', color='lightgreen')
    
    ax.set_xlabel('Models')
    ax.set_ylabel('Metric Values')
    ax.set_title('Performance Metrics Comparison Across Models')
    ax.set_xticks(x)
    ax.set_xticklabels(models, rotation=45)
    ax.legend()
    plt.tight_layout()
    plt.savefig(r'latex\images\performance_metrics_comparison.png')
    plt.close()


In [113]:

def additional_models():
    btc_data = fetch_btc_data()
    (train_features, val_features, test_features,
     train_targets, val_targets, test_targets,
     scaler, raw_prices) = preprocess_data_linear(btc_data)
    
    # ARIMA Model
    arima_pred, arima_conf_int = train_arima_auto_arima_model(train_targets.flatten(), 
                                                   val_targets.flatten(), 
                                                   test_targets.flatten(), scaler)
    arima_mae, arima_rmse, arima_r2 = evaluate_model(
        scaler.inverse_transform(test_targets), arima_pred)
    
    # Linear Regression Model
    lr_pred, lr_true = train_linear_regression(train_features, val_features, 
                                              test_features, train_targets, 
                                              val_targets, test_targets)
    lr_pred_inv = scaler.inverse_transform(lr_pred.reshape(-1, 1))
    lr_true_inv = scaler.inverse_transform(lr_true.reshape(-1, 1))
    lr_mae, lr_rmse, lr_r2 = evaluate_model(lr_true_inv, lr_pred_inv)
    
    # Gradient Boosting Regressor
    gbr_pred, gbr_true, gbr_model = train_gbr_model(train_features, val_features, 
                                                    test_features, train_targets, 
                                                    val_targets, test_targets)
    gbr_pred_inv = scaler.inverse_transform(gbr_pred.reshape(-1, 1))
    gbr_true_inv = scaler.inverse_transform(gbr_true.reshape(-1, 1))
    gbr_mae, gbr_rmse, gbr_r2 = evaluate_model(gbr_true_inv, gbr_pred_inv)
    # Random Forest Regressor
    rfr_pred, rfr_true, rfr_model = train_rfr_model(train_features, val_features, 
                                                    test_features, train_targets, 
                                                    val_targets, test_targets)
    rfr_pred_inv = scaler.inverse_transform(rfr_pred.reshape(-1, 1))
    rfr_true_inv = scaler.inverse_transform(rfr_true.reshape(-1, 1))
    rfr_mae, rfr_rmse, rfr_r2 = evaluate_model(rfr_true_inv, rfr_pred_inv)
    
    # Support Vector Regression
    svr_pred, svr_true = train_svr_model(train_features, val_features, 
                                        test_features, train_targets, 
                                        val_targets, test_targets)
    svr_pred_inv = scaler.inverse_transform(svr_pred.reshape(-1, 1))
    svr_true_inv = scaler.inverse_transform(svr_true.reshape(-1, 1))
    svr_mae, svr_rmse, svr_r2 = evaluate_model(svr_true_inv, svr_pred_inv)
    
    # XGBoost Regressor
    xgb_pred, xgb_true, xgb_model = train_xgb_model(train_features, val_features, 
                                                    test_features, train_targets, 
                                                    val_targets, test_targets)
    xgb_pred_inv = scaler.inverse_transform(xgb_pred.reshape(-1, 1))
    xgb_true_inv = scaler.inverse_transform(xgb_true.reshape(-1, 1))
    xgb_mae, xgb_rmse, xgb_r2 = evaluate_model(xgb_true_inv, xgb_pred_inv)
    
    # LSTM Results (from previous analysis)
    lstm_mae, lstm_rmse, lstm_r2 = 0.012, 0.020, 0.88
    
    # Print Results
    print("\nModel Performance Comparison:")
    print(f"ARIMA - MAE: {arima_mae:.4f}, RMSE: {arima_rmse:.4f}, R2: {arima_r2:.4f}")
    print(f"Linear Regression - MAE: {lr_mae:.4f}, RMSE: {lr_rmse:.4f}, R2: {lr_r2:.4f}")
    print(f"Gradient Boosting - MAE: {gbr_mae:.4f}, RMSE: {gbr_rmse:.4f}, R2: {gbr_r2:.4f}")
    print(f"Random Forest - MAE: {rfr_mae:.4f}, RMSE: {rfr_rmse:.4f}, R2: {rfr_r2:.4f}")
    print(f"SVR - MAE: {svr_mae:.4f}, RMSE: {svr_rmse:.4f}, R2: {svr_r2:.4f}")
    print(f"XGBoost - MAE: {xgb_mae:.4f}, RMSE: {xgb_rmse:.4f}, R2: {xgb_r2:.4f}")
    print(f"LSTM - MAE: {lstm_mae:.4f}, RMSE: {lstm_rmse:.4f}, R2: {lstm_r2:.4f}")
    
    # Plot Individual Charts and Residuals
    plot_individual_model(lr_true_inv, arima_pred, "ARIMA", arima_conf_int)
    plot_individual_model(lr_true_inv, lr_pred_inv, "LinearRegression")
    plot_individual_model(gbr_true_inv, gbr_pred_inv, "GradientBoosting")
    plot_individual_model(rfr_true_inv, rfr_pred_inv, "RandomForest")
    plot_individual_model(svr_true_inv, svr_pred_inv, "SVR")
    plot_individual_model(xgb_true_inv, xgb_pred_inv, "XGBoost")
    
    plot_residuals(lr_true_inv, arima_pred, "ARIMA")
    plot_residuals(lr_true_inv, lr_pred_inv, "LinearRegression")
    plot_residuals(gbr_true_inv, gbr_pred_inv, "GradientBoosting")
    plot_residuals(rfr_true_inv, rfr_pred_inv, "RandomForest")
    plot_residuals(svr_true_inv, svr_pred_inv, "SVR")
    plot_residuals(xgb_true_inv, xgb_pred_inv, "XGBoost")
    
    # Plot Combined Comparison
    plt.figure(figsize=(12, 6))
    plt.plot(lr_true_inv, label='Actual Prices', color='blue')
    plt.plot(arima_pred, label='ARIMA Predictions', color='green', alpha=0.7)
    plt.fill_between(range(len(arima_pred)), arima_conf_int[:, 0], arima_conf_int[:, 1], 
                     color='green', alpha=0.2, label='95% Confidence Interval')
    plt.plot(lr_pred_inv, label='Linear Regression Predictions', color='orange')
    plt.plot(gbr_pred_inv, label='Gradient Boosting Predictions', color='red')
    plt.plot(rfr_pred_inv, label='Random Forest Predictions', color='purple')
    plt.plot(svr_pred_inv, label='SVR Predictions', color='brown')
    plt.plot(xgb_pred_inv, label='XGBoost Predictions', color='cyan')
    plt.title('Model Predictions vs Actual BTC Prices (LSTM Metrics Only)')
    plt.xlabel('Time')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.savefig('latex\images\combined_model_predictions.png')
    plt.close()
    
    # Plot Performance Metrics Comparison
    models_metrics = [
        ("ARIMA", arima_mae, arima_rmse, arima_r2),
        ("Linear Regression", lr_mae, lr_rmse, lr_r2),
        ("Gradient Boosting", gbr_mae, gbr_rmse, gbr_r2),
        ("Random Forest", rfr_mae, rfr_rmse, rfr_r2),
        ("SVR", svr_mae, svr_rmse, svr_r2),
        ("XGBoost", xgb_mae, xgb_rmse, xgb_r2),
        ("LSTM", lstm_mae, lstm_rmse, lstm_r2)
    ]
    plot_performance_comparison(models_metrics)
    
    # Statistical Tests: Paired t-tests against LSTM (using synthetic errors for LSTM)
    lstm_synthetic_mae = np.full_like(lr_true_inv.flatten(), lstm_mae)
    lstm_synthetic_rmse = np.full_like(lr_true_inv.flatten(), lstm_rmse)
    
    t_tests_mae = {}
    t_tests_rmse = {}
    for name, pred, true in [
        ("ARIMA", arima_pred, scaler.inverse_transform(test_targets)),
        ("Linear Regression", lr_pred_inv, lr_true_inv),
        ("Gradient Boosting", gbr_pred_inv, gbr_true_inv),
        ("Random Forest", rfr_pred_inv, rfr_true_inv),
        ("SVR", svr_pred_inv, svr_true_inv),
        ("XGBoost", xgb_pred_inv, xgb_true_inv)
    ]:
        errors_mae = np.abs(true.flatten() - pred.flatten())
        t_stat_mae, p_val_mae = ttest_rel(errors_mae, lstm_synthetic_mae)
        errors_rmse = (true.flatten() - pred.flatten())**2
        lstm_synthetic_rmse_errors = lstm_synthetic_rmse**2
        t_stat_rmse, p_val_rmse = ttest_rel(errors_rmse, lstm_synthetic_rmse_errors)
        t_tests_mae[name] = p_val_mae
        t_tests_rmse[name] = p_val_rmse
    
    # Generate Individual LaTeX Tables for Each Model
    models = [
        ("ARIMA", arima_mae, arima_rmse, arima_r2),
        ("linear", lr_mae, lr_rmse, lr_r2),
        ("Gradient Boosting", gbr_mae, gbr_rmse, gbr_r2),
        ("Random Forest", rfr_mae, rfr_rmse, rfr_r2),
        ("SVR", svr_mae, svr_rmse, svr_r2),
        ("XGBoost", xgb_mae, xgb_rmse, xgb_r2),
        ("LSTM", lstm_mae, lstm_rmse, lstm_r2)
    ]
    
    for model_name, mae, rmse, r2 in models:
        latex_table = f"""
        \\begin{{table}}[h]
            \\centering
            \\begin{{tabular}}{{cccc}}
                \\toprule
                \\textbf{{مدل}} & \\textbf{{MAE}} & \\textbf{{RMSE}} & \\textbf{{ \\(R^2\\) }} \\\\
                \\midrule
                {model_name} & {mae:.4f} & {rmse:.4f} & {r2:.4f} \\\\
                \\bottomrule
            \\end{{tabular}}
            \\caption{{عملکرد مدل {model_name} در پیش‌بینی قیمت بیت‌کوین}}
            \\label{{tab:{model_name.lower().replace(" ", "_")}_performance}}
        \\end{{table}}
        """
        with open(rf'latex\chapters\{model_name.lower().replace(" ", "_")}_performance_table.tex', 'w', encoding='utf-8') as f:
            f.write(latex_table)
    
    # Generate Combined Comparison Table with P-values
    latex_comparison_table = f"""
    \\begin{{table}}[h]
        \\centering
        \\begin{{tabular}}{{cccccc}}
            \\toprule
            \\textbf{{مدل}} & \\textbf{{MAE}} & \\textbf{{p-value (MAE)}} & \\textbf{{RMSE}} & \\textbf{{p-value (RMSE)}} & \\textbf{{ \\(R^2\\) }} \\\\
            \\midrule
            ARIMA & {arima_mae:.4f} & {t_tests_mae['ARIMA']:.4f} & {arima_rmse:.4f} & {t_tests_rmse['ARIMA']:.4f} & {arima_r2:.4f} \\\\
            رگرسیون خطی & {lr_mae:.4f} & {t_tests_mae['Linear Regression']:.4f} & {lr_rmse:.4f} & {t_tests_rmse['Linear Regression']:.4f} & {lr_r2:.4f} \\\\
            Gradient Boosting & {gbr_mae:.4f} & {t_tests_mae['Gradient Boosting']:.4f} & {gbr_rmse:.4f} & {t_tests_rmse['Gradient Boosting']:.4f} & {gbr_r2:.4f} \\\\
            Random Forest & {rfr_mae:.4f} & {t_tests_mae['Random Forest']:.4f} & {rfr_rmse:.4f} & {t_tests_rmse['Random Forest']:.4f} & {rfr_r2:.4f} \\\\
            SVR & {svr_mae:.4f} & {t_tests_mae['SVR']:.4f} & {svr_rmse:.4f} & {t_tests_rmse['SVR']:.4f} & {svr_r2:.4f} \\\\
            XGBoost & {xgb_mae:.4f} & {t_tests_mae['XGBoost']:.4f} & {xgb_rmse:.4f} & {t_tests_rmse['XGBoost']:.4f} & {xgb_r2:.4f} \\\\
            مدل پیشنهادی (LSTM) & {lstm_mae:.4f} & -- & {lstm_rmse:.4f} & -- & {lstm_r2:.4f} \\\\
            \\bottomrule
        \\end{{tabular}}
        \\caption{{مقایسه عملکرد مدل‌های مختلف در پیش‌بینی قیمت بیت‌کوین با آزمون t جفت‌شده نسبت به LSTM}}
        \\label{{tab:model_comparison}}
    \\end{{table}}
    """
    with open(r'latex\chapters\model_comparison_table.tex', 'w', encoding='utf-8') as f:
        f.write(latex_comparison_table)

    # Generate Feature Importance Table for Tree-Based Models
    feature_names = [f'Lag {i+1}' for i in range(7)] + ['MA7']
    latex_feature_importance = f"""
    \\begin{{table}}[h]
        \\centering
        \\begin{{tabular}}{{lccc}}
            \\toprule
            \\textbf{{ویژگی}} & \\textbf{{Gradient Boosting}} & \\textbf{{Random Forest}} & \\textbf{{XGBoost}} \\\\
            \\midrule
    """
    for i, fname in enumerate(feature_names):
        latex_feature_importance += f"        {fname} & {gbr_model.feature_importances_[i]:.4f} & {rfr_model.feature_importances_[i]:.4f} & {xgb_model.feature_importances_[i]:.4f} \\\\\n"
    latex_feature_importance += f"""
            \\bottomrule
        \\end{{tabular}}
        \\caption{{اهمیت ویژگی‌ها در مدل‌های مبتنی بر درخت (Gradient Boosting، Random Forest، XGBoost)}}
        \\label{{tab:feature_importance}}
    \\end{{table}}
    """
    with open(r'latex\chapters\feature_importance_table.tex', 'w', encoding='utf-8') as f:
        f.write(latex_feature_importance)



In [114]:
additional_models()

[*********************100%***********************]  1 of 1 completed




Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-14091.988, Time=2.86 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-14096.215, Time=0.48 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-14096.271, Time=0.56 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-14096.218, Time=0.67 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-14097.829, Time=0.21 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-14094.333, Time=1.06 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 5.847 seconds

Model Performance Comparison:
ARIMA - MAE: 22068.6384, RMSE: 26600.0228, R2: -2.1119
Linear Regression - MAE: 1303.4137, RMSE: 1832.4142, R2: 0.9852
Gradient Boosting - MAE: 6098.2405, RMSE: 11970.7240, R2: 0.3698
Random Forest - MAE: 6504.4641, RMSE: 12464.2005, R2: 0.3167
SVR - MAE: 14635.8548, RMSE: 23734.6798, R2: -1.4776
XGBoost - MAE: 6956.9256, RMSE: 12993.2411, R2: 0.2575
LSTM - MAE: 0.0120, RMSE: 0.0200, R2: 0.8800
