In [3]:
from datetime import date, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [21]:
# List of stocks to predict
stocks = ['BTC-USD']
stock = stocks[0]
today = date.today()

# Create an empty dictionary to store prediction values
predictions = {}
stock_data = yf.download(stock, start='2018-01-01', end='2024-05-18')
ohlc = stock_data[['Open', 'High', 'Low', 'Close']].values


[*********************100%%**********************]  1 of 1 completed


In [22]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_ohlc = scaler.fit_transform(ohlc)

In [23]:
# Create sequences of length 60
seq_length = 60
X = []
y = []
for i in range(seq_length, len(scaled_ohlc)):
    X.append(scaled_ohlc[i-seq_length:i, :])
    y.append(scaled_ohlc[i, :])

X = np.array(X)
y = np.array(y)

In [24]:
# Reshape the input data for LSTM
X = np.reshape(X, (X.shape[0], X.shape[1], 4))

In [25]:
# Define the LSTM model
model = Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X.shape[1], 4)))
model.add(LSTM(units=100))
model.add(Dense(units=25))
model.add(Dense(units=4))

In [26]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [27]:
# Train the model
model.fit(X, y, epochs=1, batch_size=32)



<keras.src.callbacks.History at 0x7b0c423d1e10>

In [41]:
predicted_prices = []
last_seq = ohlc[-seq_length:]
for _ in range(7):
    last_seq_scaled = scaler.transform(last_seq)
    next_day_scaled = model.predict(np.array([last_seq_scaled]))
    next_day = scaler.inverse_transform(next_day_scaled)[0]
    predicted_prices.append(next_day)
    last_seq = np.append(last_seq[1:], [next_day], axis=0)



In [42]:
# Convert predicted prices to DataFrame with dates
date_range = pd.date_range(start=stock_data.index[-1] + timedelta(days=1), periods=7)
predicted_df = pd.DataFrame(predicted_prices, columns=['Open', 'High', 'Low', 'Close'], index=date_range)

# Print the predictions DataFrame
print(predicted_df)

                    Open          High           Low         Close
2024-05-18  63219.039062  63732.089844  60926.117188  62305.535156
2024-05-19  63402.746094  63961.113281  61095.355469  62504.433594
2024-05-20  63524.242188  64117.195312  61202.941406  62635.894531
2024-05-21  63602.375000  64213.613281  61266.593750  62714.546875
2024-05-22  63649.593750  64266.191406  61299.976562  62756.089844
2024-05-23  63674.976562  64287.859375  61312.675781  62771.746094
2024-05-24  63684.937500  64288.585938  61311.433594  62769.613281


In [2]:
from tensorflow.keras.models import load_model
model.save('model.h5')

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
model = load_model("/content/drive/MyDrive/Code/models/model(current).h5")

In [7]:
stock = 'BTC-USD'
start_date = '2018-01-01'
prediction_start_date = '2024-04-01'
prediction_end_date = '2024-04-07'

In [8]:
# Download historical stock data up to the start of the prediction period
historical_data = yf.download(stock, start=start_date, end=prediction_start_date)

# Extract the OHLC values
ohlc = historical_data[['Open', 'High', 'Low', 'Close']].values

[*********************100%%**********************]  1 of 1 completed


In [9]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_ohlc = scaler.fit_transform(ohlc)

# Prepare the sequence for prediction
seq_length = 60
last_seq = scaled_ohlc[-seq_length:]

# Calculate the number of days to predict
num_days_to_predict = (pd.to_datetime(prediction_end_date) - pd.to_datetime(prediction_start_date)).days + 1

# Make predictions
predicted_prices = []

In [10]:
for _ in range(num_days_to_predict):
    next_day_scaled = model.predict(np.array([last_seq]))
    next_day = scaler.inverse_transform(next_day_scaled)[0]
    predicted_prices.append(next_day)
    last_seq = np.append(last_seq[1:], [next_day], axis=0)

# Convert predicted prices to DataFrame with dates
date_range = pd.date_range(start=prediction_start_date, periods=num_days_to_predict)
predicted_df = pd.DataFrame(predicted_prices, columns=['Open', 'High', 'Low', 'Close'], index=date_range)

# Print predicted prices
print("Predicted Prices:")
print(predicted_df)

Predicted Prices:
                    Open          High           Low         Close
2024-04-01  68264.835938  70009.929688  66185.281250  68946.234375
2024-04-02  72738.609375  71197.773438  73976.960938  66372.507812
2024-04-03  74958.539062  69599.007812  81222.031250  65315.175781
2024-04-04  72207.781250  67633.726562  80342.523438  63458.785156
2024-04-05  69934.703125  66757.125000  79763.726562  61487.171875
2024-04-06  68465.625000  64587.250000  80433.632812  60211.335938
2024-04-07  66885.468750  65116.199219  81624.085938  60170.421875


In [11]:
# Fetch actual data from Yahoo Finance for the comparison period
actual_stock_data = yf.download(stock, start=prediction_start_date, end=(pd.to_datetime(prediction_end_date) + timedelta(days=1)).strftime('%Y-%m-%d'))
actual_ohlc = actual_stock_data[['Open', 'High', 'Low', 'Close']]

# Ensure the indices are comparable
actual_ohlc.index = actual_ohlc.index.strftime('%Y-%m-%d')
predicted_df.index = predicted_df.index.strftime('%Y-%m-%d')

# Calculate performance metrics (assuming actual_ohlc is available)
comparison_df = predicted_df.join(actual_ohlc, lsuffix='_pred', rsuffix='_actual')

print("\nComparison of Predicted vs Actual Prices:")
print(comparison_df)

[*********************100%%**********************]  1 of 1 completed


Comparison of Predicted vs Actual Prices:
               Open_pred     High_pred      Low_pred    Close_pred  \
2024-04-01  68264.835938  70009.929688  66185.281250  68946.234375   
2024-04-02  72738.609375  71197.773438  73976.960938  66372.507812   
2024-04-03  74958.539062  69599.007812  81222.031250  65315.175781   
2024-04-04  72207.781250  67633.726562  80342.523438  63458.785156   
2024-04-05  69934.703125  66757.125000  79763.726562  61487.171875   
2024-04-06  68465.625000  64587.250000  80433.632812  60211.335938   
2024-04-07  66885.468750  65116.199219  81624.085938  60170.421875   

             Open_actual   High_actual    Low_actual  Close_actual  
2024-04-01  71333.484375  71342.093750  68110.695312  69702.148438  
2024-04-02  69705.023438  69708.382812  64586.593750  65446.972656  
2024-04-03  65446.671875  66914.320312  64559.898438  65980.812500  
2024-04-04  65975.695312  69291.257812  65113.796875  68508.843750  
2024-04-05  68515.757812  68725.757812  66011.47656




In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Ensure actual_ohlc is in DataFrame format and has the same index as predicted_df
actual_ohlc = actual_ohlc.loc[predicted_df.index]

# Compute MAE
mae = mean_absolute_error(actual_ohlc, predicted_df)

# Compute MSE
mse = mean_squared_error(actual_ohlc, predicted_df)

# Compute RMSE
rmse = np.sqrt(mse)

# Compute MAPE
mape = np.mean(np.abs((actual_ohlc - predicted_df) / actual_ohlc)) * 100

# Compute R-squared
r2 = r2_score(actual_ohlc, predicted_df)

# Print metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}%")
print(f"R-squared (R²): {r2}")


Mean Absolute Error (MAE): 5697.949776785715
Mean Squared Error (MSE): 56183828.567770824
Root Mean Squared Error (RMSE): 7495.587273040774
Mean Absolute Percentage Error (MAPE): 8.486913213454574%
R-squared (R²): -20.59381478510946
