In [None]:
import time
start_time = time.time()
#!pip install --upgrade tensorflow -q
import numpy as np
import pandas as pd
import tensorflow as tf
import itertools
import os
import shutil
import openpyxl
import matplotlib.pyplot as plt
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from math import sqrt
from openpyxl.styles import Alignment
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
drive.mount('/content/drive')
tf.keras.backend.set_floatx('float32') # Setting the default data type for TensorFlow tensors

Assets = ["BTC", "BIST", "NASDAQ", "GOLD"]  # List of assets to train on "BTC", "BIST", "NASDAQ", "GOLD"
Time_interval = "D"  # Time interval for the dataset
Lstm_units_range = [512]
Epochs_range = [500]
Sequence_length_range = [60]
Train_test_ratio_range = [0.95]
Dropout_ratio_range = [0.1]
TI_window_range = [7]
Batch_size = 32  # Batch size for training the model
learning_rate = 0.0001  # Learning rate for the model
Loss_function = 'mean_squared_error'  # Loss function for the model
Activation_function = 'tanh'  # Activation function for LSTM layers
filename = 'LSTM evaluation_results.xlsx'
google_drive_path = '/content/drive/My Drive/Code_Results/LTSM evaluation_results.xlsx'

# Main processing loop for each asset
for Asset in Assets:

    # Iterate over combinations of hyperparameters
    for lstm_units, epochs, sequence_length, train_test_ratio, dropout_ratio, ti_window in itertools.product(
    Lstm_units_range, Epochs_range, Sequence_length_range, Train_test_ratio_range, Dropout_ratio_range, TI_window_range):
        # Printing the configuration being trained
        print(f"\nTraining with Asset={Asset}, Time Int={Time_interval}, LSTM_units={lstm_units}, Epochs={epochs}...")

        # Update the constants with the current values for training
        #LSTM_units = lstm_units
        #Epochs = epochs
        #Sequence_length = sequence_length
        Optimizer = Adam(learning_rate=learning_rate)

        # Load data from Excel file for the specific asset and time interval
        #xlsx_file_path = f"{Asset}_{Time_interval}.xlsx"
        xlsx_file_path = f"/content/drive/My Drive/Master_Data/Last/{Asset}_{Time_interval}.xlsx"
        data = pd.read_excel(xlsx_file_path)
        data['Date'] = pd.to_datetime(data['Date'])
        data = data.sort_values('Date')

        # Function to calculate Exponential Moving Average (EMA)
        def calculate_ema(data, column_name, window=ti_window):
            return data[column_name].ewm(span=window, adjust=False).mean()
        data['EMA'] = calculate_ema(data, 'Close')

        # Function to calculate Relative Strength Index (RSI)
        def calculate_rsi(data, column_name, window=ti_window):
            close_prices = data[column_name]
            diff = close_prices.diff(1)
            gain = diff.where(diff > 0, 0)
            loss = -diff.where(diff < 0, 0)

            avg_gain = gain.rolling(window=window, min_periods=1).mean()
            avg_loss = loss.rolling(window=window, min_periods=1).mean()

            rs = avg_gain / avg_loss
            rsi = 100 - (100 / (1 + rs))

            return rsi

        # Calculate RSI and add it to the dataframe
        data['RSI'] = calculate_rsi(data, 'Close')

        # Function to calculate Bollinger Bands
        def calculate_bollinger_bands(data, column_name='Close', window=ti_window):
            # Calculate the moving average and standard deviation
            data['MA'] = data[column_name].rolling(window=window).mean()
            data['std'] = data[column_name].rolling(window=window).std()

            # Calculate upper and lower bands
            data['UpperBand'] = data['MA'] + 2 * data['std']
            data['LowerBand'] = data['MA'] - 2 * data['std']

            # Drop rows with NaN values
            data = data.dropna()

            return data

        # Apply Bollinger Bands calculation to your data
        data = calculate_bollinger_bands(data)

        # Replace zeros with NaN and drop NaN values
        data = data.replace(0, np.nan).dropna()
        data = data[['Date', 'EMA', 'RSI', 'UpperBand', 'LowerBand', 'Close']]

        # Preprocess data using MinMaxScaler
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(data[['EMA', 'RSI', 'UpperBand', 'LowerBand', 'Close']])
        # Create sequences for training
        sequences = []
        for i in range(len(data) - sequence_length):
            sequence = scaled_data[i:i + sequence_length, :]
            sequences.append(sequence)

        # Convert to numpy array
        sequences = np.array(sequences)

        # Split data into train and test sets
        X_train, X_test, y_train, y_test = train_test_split(
            sequences[:, :-1, :], sequences[:, -1, -1],
            test_size=1-train_test_ratio, shuffle=False)

        # Reshape y_train and y_test to match the Dense layer output
        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        # Build and compile the LSTM model
        model = Sequential([
            LSTM(lstm_units, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]), activation=Activation_function),
            Dropout(dropout_ratio),
            #LSTM(lstm_units, activation=Activation_function, return_sequences=True),
            #Dropout(dropout_ratio),
            LSTM(lstm_units, activation=Activation_function),
            Dropout(dropout_ratio),
            Dense(1)
        ])
        model.compile(loss=Loss_function, optimizer=Optimizer)

        # Train the model
        history = model.fit(X_train, y_train, epochs=epochs,
                            batch_size=Batch_size, validation_split=0.1, shuffle=False, callbacks=[EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)], verbose=0)
        scaled_forecast = model.predict(X_test)
        forecast = scaler.inverse_transform(np.column_stack((np.zeros_like(X_test[:, -1, :-1]), scaled_forecast)))[:, -1]

        # Calculate evaluation metrics
        y_test_original = data['Close'].values[-len(scaled_forecast):]
        mape = mean_absolute_percentage_error(y_test_original, forecast)*100
        mse = mean_squared_error(y_test_original, forecast)
        rmse = sqrt(mse)
        mae = mean_absolute_error(y_test_original, forecast)
        print(f'Results are MAPE: {mape:.2f}%, RMSE: {rmse:.0f}, MAE: {mae:.0f}\n')


        # Get the corresponding dates for the forecast
        forecast_dates = data['Date'].values[-len(forecast):]

        # Plotting the results
        plt.figure(figsize=(14, 7))
        plt.plot(forecast_dates, y_test_original[-len(forecast):], label='Actual', color='blue')
        plt.plot(forecast_dates, forecast, color='red', label='Forecast')
        plt.title(f'LSTM {Asset} Actual vs Forecast')
        plt.xlabel('Date')
        plt.ylabel('Close Value')
        plt.legend()
        plt.grid(True)  # Add a grid
        plt.show()

        # Save results to Excel
        # Function to save results to an Excel file and Google Drive
        def save_results_to_excel(filename, Asset, Time_interval, lstm_units, epochs, sequence_length, ti_window,
                                  learning_rate, dropout_ratio, train_test_ratio, Batch_size, mape, rmse, mae):
            mape_formatted = "{:.2f}".format(mape).replace('.', ',')
            rmse_formatted = f"{rmse:.0f}"
            mae_formatted = f"{mae:.0f}"
            # Create a DataFrame with the results
            results_df = pd.DataFrame({
                'Model': ['LSTM'],
                'Asset': [Asset],
                'Time Interval': [Time_interval],
                'LSTM Units': [lstm_units],
                'Epochs': [epochs],
                'Seq. Length': [sequence_length],
                'TI Window': [ti_window],
                'LR': [learning_rate],
                'Dropout': [dropout_ratio],
                'T/T Ratio': [train_test_ratio],
                'Batch Size': [Batch_size],
                'MAPE': [mape_formatted],
                'RMSE': [rmse_formatted],
                'MAE': [mae_formatted]
            })
            try:
                existing_df = pd.read_excel(filename)
                combined_df = pd.concat([existing_df, results_df], ignore_index=True)
            except FileNotFoundError:
                combined_df = results_df
            # Save DataFrame to an Excel file
            combined_df.to_excel(filename, index=False)
            # Load the Excel file
            workbook = openpyxl.load_workbook(filename)
            worksheet = workbook.active
            # Set a fixed size and center alignment for all columns
            for col in worksheet.columns:
                for cell in col:
                    cell.alignment = Alignment(horizontal='center')
        # Set a fixed size for all columns
            for column_cells in worksheet.columns:
                max_length = 12  # Set the desired fixed width for the column
                for cell in column_cells:
                    cell.alignment = Alignment(horizontal='center')
                    worksheet.column_dimensions[openpyxl.utils.get_column_letter(cell.column)].width = max_length
        # Save the workbook
            workbook.save(filename)
        # Check if the file exists locally
            if os.path.exists(filename):
                # Copy the file from local to Google Drive
                shutil.copy(filename, google_drive_path)
                #print(f"File copied to Google Drive at {google_drive_path}")
            else:
                print(f"Local file {filename} not found. Check the saving path.")
        save_results_to_excel(filename, Asset, Time_interval, lstm_units, epochs, sequence_length, ti_window,
                              learning_rate, dropout_ratio, train_test_ratio, Batch_size, mape, rmse, mae)


# Print completion message
print(f'Execution completed and results saved to local and drive')
end_time = time.time()
duration_seconds = end_time - start_time
duration_minutes = duration_seconds / 60
print(f'Execution duration: {duration_minutes:.2f} minutes')
