In [None]:
#@title Packages

import random
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from scipy import stats
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score, mean_absolute_error, r2_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, LSTM, GRU, Dense, LeakyReLU, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adagrad, Adadelta
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError
from tensorflow.keras.backend import sqrt, mean, square
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.backend as K

In [None]:
#@tile Read and Prepare Data

def read_prepare_data(symbol):
    #read
    data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
    train = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/train.csv')
    test = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/test.csv')
    
    #we're going to use only one symbol
    data = data[data['Symbol'] == symbol].copy()
    train = train[train['Symbol'] == symbol].copy()
    test = test[test['Symbol'] == symbol].copy()
    
    #we're going to use the price variable
    data = data[['Date', 'Close']].copy()
    train = train[['Date', 'Close']].copy()
    test = test[['Date', 'Close']].copy()
    
    #set date as index
    data.set_index('Date', inplace=True)
    train.set_index('Date', inplace=True)
    test.set_index('Date', inplace=True)

    #normalize
    scaler = MinMaxScaler(feature_range=(0, 1))
    train = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
    test = pd.DataFrame(scaler.transform(test), columns=test.columns, index=test.index)
    data = pd.DataFrame(scaler.transform(data), columns=data.columns, index=data.index) 

    return scaler, data, train, test

In [None]:
#@title Create Dataset

def create_dataset(dataframe, look_back):
    dataset = dataframe.values
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
        
    return np.array(dataX), np.array(dataY)

In [None]:
#@title Reshape

def reshape(train, test, look_back):
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], trainX.shape[2]))
    testX = np.reshape(testX, (testX.shape[0], testX.shape[1], testX.shape[2]))

    return trainX, trainY, testX, testY

In [None]:
#@title Forecast

def forecast_values(testY, look_back, horizon, model):
    testY_copy = testY.copy()
    for val in range(0, horizon+1):
        a = testY_copy[-(1+look_back):-1]
        a = np.reshape(a, (1, look_back, 1)) 
        a_predict = model.predict(a, verbose=0)[0]
        a_predict = np.reshape(a_predict, (1, 1))
        testY_copy = np.concatenate((testY_copy, a_predict), axis=0)
    
    forecast = testY_copy[len(testY):]
    return forecast

In [None]:
#@title Calculate All Metrics

def calculate_all_metrics(actual, predicted):
    #regression
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mse = mean_squared_error(actual, predicted)
    mae = mean_absolute_error(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / np.where(actual != 0, actual, 1))) * 100
    r2 = r2_score(actual, predicted)
    
    #classification (auxiliary)
    actual_direction = np.sign(np.diff(actual.flatten()))
    predicted_direction = np.sign(np.diff(predicted.flatten()))
    actual_binary = (actual_direction >= 0).astype(int)
    predicted_binary = (predicted_direction >= 0).astype(int)
    
    #classification
    accuracy = accuracy_score(actual_binary, predicted_binary) * 100
    precision = precision_score(actual_binary, predicted_binary, zero_division=0) * 100
    recall = recall_score(actual_binary, predicted_binary, zero_division=0) * 100
    f1 = f1_score(actual_binary, predicted_binary, zero_division=0) * 100
    
    return rmse, mse, mae, mape, r2, accuracy, precision, recall, f1

In [None]:
#@title Auxiliary Function

def predict_forecast_plot(data, train, test, trainX, trainY, testX, testY, nepochs, look_back, horizon, plot_predictions, model):
    #make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    
    #forecast
    forecast = forecast_values(testY, look_back, horizon, model)
    
    #invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform(trainY)
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform(testY)
    forecast = scaler.inverse_transform(forecast)
    
    #calculate all metrics for test set
    test_rmse, test_mse, test_mae, test_mape, test_r2, test_accuracy, test_precision, test_recall, test_f1 = calculate_all_metrics(testY, testPredict)
    
    print('Evaluation:')
    print('- Regression:')
    print(f'RMSE: {test_rmse:.2f}')
    print(f'MSE: {test_mse:.2f}')
    print(f'MAE: {test_mae:.2f}')
    print(f'MAPE: {test_mape:.2f}%')
    print(f'RÂ²: {test_r2:.2f}')
    print('- Classification:')
    print(f'Accuracy: {test_accuracy:.2f}%')
    print(f'Precision: {test_precision:.2f}%')
    print(f'Recall: {test_recall:.2f}%')
    print(f'F1-Score: {test_f1:.2f}%')
    
    #plot predictions
    if plot_predictions==True: 
        #shift train predictions for plotting
        trainPredictPlot = np.empty_like(data)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
        
        #shift test predictions for plotting
        testPredictPlot = np.empty_like(data)
        testPredictPlot[:, :] = np.nan
        testPredictPlot[len(trainPredict)+(look_back*2)+1:len(data)-1, :] = testPredict
        
        #shift forecast for plotting
        forecastPlot = np.empty_like(pd.concat([data, pd.DataFrame(forecast)]))
        forecastPlot[:, :] = np.nan
        forecastPlot[len(data):len(forecastPlot),:] = forecast
        
        #plot baseline, predictions and forecast
        plt.figure(figsize=(15,7))
        plt.plot(scaler.inverse_transform(data), label='real')
        plt.plot(trainPredictPlot, label='train set prediction')
        plt.plot(testPredictPlot, label='test set prediction')
        plt.plot(forecastPlot, label='forecast')
        plt.legend()
        plt.title('Price Predictions and Forecast')
        plt.show()
    
    return (test_rmse, test_mse, test_mae, test_mape, test_r2, test_accuracy, test_precision, test_recall, test_f1, 
            testPredict.flatten(), testY.flatten(), forecast.flatten())

In [9]:
#@title Train and Predict

def model(data, train, test, look_back=30, nepochs=50, horizon=7, plot_predictions=False):
    #reshape
    trainX, trainY, testX, testY = reshape(train, test, look_back)
    
    #create the network
    input_layer = Input(shape=(trainX.shape[1], trainX.shape[2]))
    x = LSTM(16, activation='relu')(input_layer)
    output = Dense(1, activation='linear')(x)
    model_instance = Model(inputs=input_layer, outputs=output)
    model_instance.compile(loss='mean_squared_error', optimizer='adam')
    
    #fit
    model_instance.fit(trainX, trainY, epochs=nepochs, batch_size=1, verbose=1)
    
    #predict, forecast and plot
    results = predict_forecast_plot(data, train, test, trainX, trainY, testX, testY, nepochs, look_back, horizon, plot_predictions, model_instance)
    
    return results

scaler, data, train, test = read_prepare_data('AAPL')
rmse, mse, mae, mape, r2, accuracy, precision, recall, f1, _, _, _ = model(data, train, test, look_back=30, nepochs=50, horizon=7, plot_predictions=True)

In [None]:
#@title Evaluate each Symbol

results = []
risk_return = []
symbols = stock_data["Symbol"].unique()

for symbol in symbols:
    try:
        scaler, data, train, test = read_prepare_data(symbol)
        
        full_results = model(data, train, test, look_back=30, nepochs=50, horizon=7, plot_predictions=False)
        
        (rmse, mse, mae, mape, r2, accuracy, precision, recall, f1, 
         test_predictions, test_actual, forecast_values) = full_results
        
        
        results.append({
            "Symbol": symbol,
            "RMSE": rmse,
            "MSE": mse,
            "MAE": mae,
            "MAPE": mape,
            "R2": r2,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1": f1
        })
        
        risk_return.append({
            "Symbol": symbol,
            "Test Predictions": test_predictions,
            "Test Actual": test_actual,
            "Forecast Values": forecast_values
        })
        
    except Exception as e:
        print(f"Error processing {symbol}: {e}")

results_df = pd.DataFrame(results)
risk_return_df = pd.DataFrame(risk_return)
results_df.to_csv('/Users/pedroalexleite/Desktop/Tese/Dados/final_results.csv', index=False)
risk_return_df.to_csv('/Users/pedroalexleite/Desktop/Tese/Dados/risk_return.csv', index=False)

In [9]:
#@title DUMMY DFs !!!!!!!!!!!!!!!!!!

data = pd.read_csv('/Users/pedroalexleite/Desktop/Tese/Dados/dataset4.csv')
symbols = data["Symbol"].unique()

np.random.seed(42)  

dummy_1 = {
    "Symbol": symbols,
    "RMSE": np.random.uniform(0.5, 2.0, len(symbols)),
    "MSE": np.random.uniform(0.2, 4.0, len(symbols)),
    "MAE": np.random.uniform(0.3, 1.5, len(symbols)),
    "MAPE": np.random.uniform(5, 20, len(symbols)),
    "R2": np.random.uniform(0.0, 1.0, len(symbols)),
    "Accuracy": np.random.uniform(0.5, 1.0, len(symbols)),
    "Precision": np.random.uniform(0.5, 1.0, len(symbols)),
    "Recall": np.random.uniform(0.5, 1.0, len(symbols)),
    "F1": np.random.uniform(0.5, 1.0, len(symbols)),
}

results_df = pd.DataFrame(dummy_1)
results_df.to_csv('/Users/pedroalexleite/Desktop/Tese/Dados/final_results.csv', index=False)

dummy_2 = {
    "Symbol": symbols,
    "Test Predictions": [np.random.uniform(50, 300, 335) for _ in range(len(symbols))],
    "Test Actual": [np.random.uniform(50, 300, 335) for _ in range(len(symbols))],    
    "Forecast Values": [np.random.uniform(50, 300, 7) for _ in range(len(symbols))], 
}

risk_return_df = pd.DataFrame(dummy_2)

In [11]:
#@title Risk-Return Calculation

def calculate_risk_return_tradeoff(predicted_prices, actual_prices=None):
    predicted_returns = np.diff(predicted_prices) / predicted_prices[:-1]
    expected_return = np.mean(predicted_returns)
    risk = np.std(predicted_returns)
    
    return expected_return, risk

daily_returns = []
daily_risks = []

for idx, row in risk_return_df.iterrows():
    try:
        test_predictions = eval(row['Test Predictions']) if isinstance(row['Test Predictions'], str) else row['Test Predictions']
        expected_return, risk = calculate_risk_return_tradeoff(test_predictions)
        daily_returns.append(expected_return)
        daily_risks.append(risk)
        
    except Exception as e:
        print(f"Error processing {row['Symbol']}: {e}")
        daily_returns.append(np.nan)
        daily_risks.append(np.nan)

risk_return_df_2 = pd.DataFrame({
    'Symbol': risk_return_df['Symbol'],
    'Daily Return': daily_returns,
    'Daily Risk': daily_risks
})

sp500 = pd.read_csv("/Users/pedroalexleite/Desktop/Tese/Dados/sp500.csv")
merged_df_3 = pd.merge(risk_return_df_2, sp500[['Symbol', 'Sector']], on='Symbol', how='left')
merged_df_3.to_csv('/Users/pedroalexleite/Desktop/Tese/Dados/risk_return.csv', index=False)