In [None]:
import pandas as pd
import os
import numpy as np


# Paths
MODEL_PATH = '../models/stock_price_predictor_model.keras'
SCALER_X_PATH = '../models/scaler_X.pkl'
SCALER_Y_PATH = '../models/scaler_y.pkl'
PREDICTIONS_DIR = '../data/predictions'

# Load and preprocess data
df = pd.read_csv(os.path.join(PREDICTIONS_DIR, 'X_predictions.csv'), parse_dates=['Date'], index_col='Date')

# Map sentiment strings to numerical values
sentiment_map = {'Bearish': -1, 'Neutral': 0, 'Bullish': 1}
df['SentimentNum'] = df['Sentiment'].map(sentiment_map)


# Extract date features from the index
df['date'] = df.index
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['day_of_week'] = df['date'].dt.dayofweek

# Cyclical encoding of month and day of week
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
df['day_of_week_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
df['day_of_week_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

# Recalculate days starting from 1.1.2000
df['days_since_start'] = (df['date'] - pd.Timestamp('2000-01-01')).dt.days



# Select features from Processed_data

X = df[["month_sin", "month_cos", "day_of_week_sin", "day_of_week_cos", "days_since_start", "GDP growth rate (%)", "Unemployment rate (%)", "Real interest rate (%)",
        "Inflation rate (%)", "Population growth (%)", "Export growth (%)", "Import growth (%)", "SentimentNum"]]

X.head()


Unnamed: 0_level_0,month_sin,month_cos,day_of_week_sin,day_of_week_cos,days_since_start,GDP growth rate (%),Unemployment rate (%),Real interest rate (%),Inflation rate (%),Population growth (%),Export growth (%),Import growth (%),SentimentNum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-12-31,-2.449294e-16,1.0,-0.781831,0.62349,8765,3.2,4.5,2.2,4.2,1,1.15,1.2,1
2026-09-01,-1.0,-1.83697e-16,0.781831,0.62349,9740,3.2,4.5,2.2,4.2,1,1.15,1.2,0
2027-09-01,-1.0,-1.83697e-16,0.974928,-0.222521,10105,3.2,4.5,2.2,4.2,1,1.15,1.2,1


In [None]:
import joblib
from tensorflow import keras
import tensorflow as tf


# Load and apply the scaler
scaler_X = joblib.load(SCALER_X_PATH)
scaler_y = joblib.load(SCALER_Y_PATH)


X_scaled = scaler_X.transform(X)

# Load model and make predictions

def masked_mse(y_true, y_pred):
    mask = ~tf.math.is_nan(y_true)
    y_true = tf.where(mask, y_true, 0.0)
    y_pred = tf.where(mask, y_pred, 0.0)
    mse = tf.reduce_sum(tf.square(y_true - y_pred) * tf.cast(mask,
                        tf.float32)) / tf.reduce_sum(tf.cast(mask, tf.float32))
    return mse

model = keras.models.load_model(MODEL_PATH, custom_objects={'masked_mse': masked_mse})

predictions = model.predict(X_scaled)

# Inverse transform predictions
predictions = scaler_y.inverse_transform(predictions)

# Save predictions
pred_df = pd.DataFrame(predictions, index=df.index, columns=['AAPL', 'BTC-USD', 'GOOGL', 'MSFT'])
pred_df.to_csv(os.path.join(PREDICTIONS_DIR, 'predictions.csv'))

print(f"Predictions saved to {os.path.join(PREDICTIONS_DIR, 'predictions.csv')}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
Predictions saved to ../data/predictions/predictions.csv
