In [22]:
# Import necessary libraries
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from finta import TA
import tensorflow as tf
from tensorflow.keras.models import Sequential
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from keras.regularizers import l2
from sklearn.model_selection import KFold
from tensorflow.keras.models import clone_model


In [23]:
# Load data
TSLA_path = Path("../files/TSLA.csv")
VIX_path = Path("../files/^VIX.csv")
FEDFUNDS_path = Path("../files/FEDFUNDS (1).csv")
SPY_index = Path("../files/SPY.csv")

In [24]:
tesla_df = pd.read_csv(TSLA_path, index_col="Date")
fear_index_df = pd.read_csv(VIX_path, index_col="Date").rename(columns={"Close": "Fear_index"})
spy_index_df = pd.read_csv(SPY_index, index_col="Date").rename(columns={"Close": "SPY_index"})
fedfunds_df = pd.read_csv(FEDFUNDS_path, index_col="DATE")

In [25]:
# Convert index to datetime index
fedfunds_df.index = pd.to_datetime(fedfunds_df.index)

# Resample the fed_funds_df to have daily frequency and forward fill the values
fedfunds_df_monthly = fedfunds_df.resample('D').ffill()


In [26]:
# Concatenate dataframes
concatenated_df = pd.concat([tesla_df, fear_index_df['Fear_index'], spy_index_df['SPY_index']], axis=1)
concatenated_df.index = pd.to_datetime(concatenated_df.index)


In [27]:
# Merge with fedfunds_df
concatenated_df = pd.merge(concatenated_df, fedfunds_df_monthly, left_index=True, right_index=True)


In [28]:
# Drop rows with NaN values
concatenated_df = concatenated_df.dropna()


In [29]:
# Shift target variable
concatenated_df['Target'] = concatenated_df['Close'].shift(-5)
concatenated_df = concatenated_df.dropna()

In [30]:
concatenated_df.head(10)

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Fear_index,SPY_index,FEDFUNDS,Target
2014-04-30,13.573333,13.877333,13.418667,13.859333,13.859333,66609000.0,13.41,188.309998,0.09,13.423333
2014-05-01,13.805333,14.268,13.712667,13.848667,13.848667,81598500.0,13.25,188.330002,0.09,11.906
2014-05-02,13.906667,14.090667,13.768,14.060667,14.060667,61302000.0,12.91,188.059998,0.09,12.150667
2014-05-05,13.965333,14.512667,13.901333,14.440667,14.440667,77205000.0,13.29,188.419998,0.09,12.311333
2014-05-06,14.44,14.577333,13.79,13.818667,13.818667,84550500.0,13.8,186.779999,0.09,12.677333
2014-05-07,13.976,14.013333,13.15,13.423333,13.423333,152689500.0,13.4,187.880005,0.09,12.708
2014-05-08,12.133333,12.96,11.866667,11.906,11.906,300849000.0,13.43,187.679993,0.09,12.572667
2014-05-09,11.990667,12.226667,11.814667,12.150667,12.150667,127428000.0,12.92,187.960007,0.09,12.770667
2014-05-12,12.258,12.479333,11.992,12.311333,12.311333,105034500.0,12.23,189.789993,0.09,13.072667
2014-05-13,12.250667,12.756,12.2,12.677333,12.677333,106458000.0,12.13,189.960007,0.09,13.02


In [31]:
# Calculate technical indicators using finta
data = concatenated_df.copy()  # Use the existing DataFrame concatenated_df
data['MA'] = TA.SMA(data, 20)  # 20-period Simple Moving Average
data['RSI'] = TA.RSI(data, 14)  # 14-period RSI

# Calculate Bollinger Bands correctly
bb_bands = TA.BBANDS(data, 20, 2)

# Assign Bollinger Bands values to DataFrame columns
data['BB_UPPER'] = bb_bands['BB_UPPER']
data['BB_MIDDLE'] = bb_bands['BB_MIDDLE']
data['BB_LOWER'] = bb_bands['BB_LOWER']

# Convert index to datetime
data.index = pd.to_datetime(data.index)

# Display the calculated technical indicators
data.tail()


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Fear_index,SPY_index,FEDFUNDS,Target,MA,RSI,BB_UPPER,BB_MIDDLE,BB_LOWER
2024-03-18,170.020004,174.720001,165.899994,173.800003,173.800003,108214400.0,14.33,512.859985,5.33,172.630005,185.383499,40.373166,211.96241,185.383499,158.804588
2024-03-19,172.360001,172.820007,167.419998,171.320007,171.320007,77271400.0,13.82,515.710022,5.33,177.669998,184.2615,38.755841,211.243059,184.2615,157.279941
2024-03-20,173.0,176.25,170.820007,175.660004,175.660004,83846700.0,13.04,520.47998,5.33,179.830002,183.306,43.054999,210.073297,183.306,156.538703
2024-03-21,176.389999,178.179993,171.800003,172.820007,172.820007,73178000.0,12.92,522.200012,5.33,175.789993,182.0765,41.025514,208.37086,182.0765,155.782139
2024-03-22,166.690002,171.199997,166.300003,170.830002,170.830002,75454700.0,13.06,521.210022,5.33,175.220001,181.0195,39.616361,207.338898,181.0195,154.700101


In [32]:
data = data.dropna()
data.head(10)

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Fear_index,SPY_index,FEDFUNDS,Target,MA,RSI,BB_UPPER,BB_MIDDLE,BB_LOWER
2014-05-28,14.001333,14.184667,13.684,14.016,14.016,82426500.0,11.68,191.380005,0.09,13.599333,13.2768,60.825644,14.745372,13.2768,11.808228
2014-05-29,14.038,14.166,13.848,14.016,14.016,55387500.0,11.57,192.369995,0.09,13.793333,13.284633,60.825644,14.767886,13.284633,11.801381
2014-05-30,14.02,14.32,13.801333,13.851333,13.851333,83716500.0,11.4,192.679993,0.09,13.878,13.284767,57.035166,14.768234,13.284767,11.8013
2014-06-02,13.822,13.956667,13.444667,13.646667,13.646667,70021500.0,11.58,192.899994,0.1,13.687333,13.264067,52.643999,14.713101,13.264067,11.815032
2014-06-03,13.566,13.866667,13.506,13.662667,13.662667,57912000.0,11.87,192.800003,0.1,13.486667,13.225167,52.948975,14.57991,13.225167,11.870424
2014-06-04,13.623333,13.750667,13.36,13.599333,13.599333,51411000.0,12.08,193.190002,0.1,13.631333,13.2142,51.534199,14.552161,13.2142,11.876239
2014-06-05,13.631333,13.946667,13.603333,13.793333,13.793333,60819000.0,11.68,194.449997,0.1,13.568,13.2327,55.460011,14.592884,13.2327,11.872516
2014-06-06,13.983333,14.054,13.812,13.878,13.878,46107000.0,10.73,195.380005,0.1,13.761333,13.3313,57.093493,14.566726,13.3313,12.095874
2014-06-09,13.863333,13.999333,13.613333,13.687333,13.687333,42085500.0,11.15,195.580002,0.1,14.974,13.408133,52.430209,14.519284,13.408133,12.296983
2014-06-10,13.628667,13.798,13.436667,13.486667,13.486667,52720500.0,10.99,195.600006,0.1,15.444667,13.4669,47.987795,14.450849,13.4669,12.482951


In [33]:
# Define features and target
X = concatenated_df.drop("Close", axis=1)
y = concatenated_df["Close"]

In [34]:
data.drop(columns=['Open', 'High', 'Low', 'Close', 'Adj Close'], inplace=True)


In [35]:
# Display the modified DataFrame
data.head()

Unnamed: 0,Volume,Fear_index,SPY_index,FEDFUNDS,Target,MA,RSI,BB_UPPER,BB_MIDDLE,BB_LOWER
2014-05-28,82426500.0,11.68,191.380005,0.09,13.599333,13.2768,60.825644,14.745372,13.2768,11.808228
2014-05-29,55387500.0,11.57,192.369995,0.09,13.793333,13.284633,60.825644,14.767886,13.284633,11.801381
2014-05-30,83716500.0,11.4,192.679993,0.09,13.878,13.284767,57.035166,14.768234,13.284767,11.8013
2014-06-02,70021500.0,11.58,192.899994,0.1,13.687333,13.264067,52.643999,14.713101,13.264067,11.815032
2014-06-03,57912000.0,11.87,192.800003,0.1,13.486667,13.225167,52.948975,14.57991,13.225167,11.870424


In [36]:
data_clean = data.dropna()
data_clean.index.rename('date', inplace=True)
data_clean.to_csv('../clean_data/TSLA_prepared_data.csv', index=True)

In [37]:
data_clean.head()

Unnamed: 0_level_0,Volume,Fear_index,SPY_index,FEDFUNDS,Target,MA,RSI,BB_UPPER,BB_MIDDLE,BB_LOWER
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-05-28,82426500.0,11.68,191.380005,0.09,13.599333,13.2768,60.825644,14.745372,13.2768,11.808228
2014-05-29,55387500.0,11.57,192.369995,0.09,13.793333,13.284633,60.825644,14.767886,13.284633,11.801381
2014-05-30,83716500.0,11.4,192.679993,0.09,13.878,13.284767,57.035166,14.768234,13.284767,11.8013
2014-06-02,70021500.0,11.58,192.899994,0.1,13.687333,13.264067,52.643999,14.713101,13.264067,11.815032
2014-06-03,57912000.0,11.87,192.800003,0.1,13.486667,13.225167,52.948975,14.57991,13.225167,11.870424


In [38]:
# Define date cutoff for data split
date_cutoff = "2022-04-30"

# Split data
X_train = X[X.index <= date_cutoff]
X_test = X[X.index > date_cutoff]
y_train = y[y.index <= date_cutoff]
y_test = y[y.index > date_cutoff]

In [39]:
# Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [40]:
# Define the number of folds
k = 10

# Initialize lists to store R-squared scores
train_r2_scores = []
test_r2_scores = []

# Initialize KFold
kf = KFold(n_splits=k, shuffle=True)

# Define the model architecture
model = Sequential([
    Dense(units=16, activation='relu', kernel_regularizer=l2(0.0005), input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.6),
    Dense(units=8, activation='relu', kernel_regularizer=l2(0.0005)),
    Dropout(0.6),
    Dense(units=1)
])

# Example: Train with a smaller learning rate
from keras.optimizers import Adam
adam = Adam(learning_rate=0.0001)  # Adjust learning rate as needed
model.compile(optimizer=adam, loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

# Compile the model
model.compile(optimizer=adam, loss='mean_squared_error')

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_train_scaled):
    X_train_cv, X_test_cv = X_train_scaled[train_index], X_train_scaled[test_index]
    y_train_cv, y_test_cv = y_train[train_index], y_train[test_index]
    
    
    # Evaluate the model on training data
    train_predictions = model.predict(X_train_cv)
    train_r2 = r2_score(y_train_cv, train_predictions)
    train_r2_scores.append(train_r2)
    
    # Evaluate the model on test data
    test_predictions = model.predict(X_test_cv)
    test_r2 = r2_score(y_test_cv, test_predictions)
    test_r2_scores.append(test_r2)

# Calculate average R-squared scores
avg_train_r2 = np.mean(train_r2_scores)
avg_test_r2 = np.mean(test_r2_scores)

print("Average R-squared (Train):", avg_train_r2)
print("Average R-squared (Test):", avg_test_r2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 648us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 521us/step
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 508us/step
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 569us/step
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 555us/step
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452us/step
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 512us/step
[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━

In [41]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Initialize lists to store metrics for selected models
selected_train_r2 = []
selected_test_r2 = []
selected_train_mae = []
selected_train_mse = []
selected_test_mae = []
selected_test_mse = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_train_scaled):
    X_train_cv, X_test_cv = X_train_scaled[train_index], X_train_scaled[test_index]
    y_train_cv, y_test_cv = y_train[train_index], y_train[test_index]
    
    # Define a new model for each fold
    model_fold = Sequential([
        Dense(32, activation='relu', input_shape=(X_train_cv.shape[1],)),
        Dropout(0.5),  # Dropout layer with a dropout rate of 0.5
        Dense(16, activation='relu'),
        Dropout(0.5),  # Dropout layer with a dropout rate of 0.5
        Dense(1)  # Output layer
    ])
    
    # Compile the model with Adam optimizer and mean squared error loss
    model_fold.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    
    # Train the model
    model_fold.fit(X_train_cv, y_train_cv, epochs=50, batch_size=32, verbose=0)
    
    # Evaluate the model on training data
    train_predictions = model_fold.predict(X_train_cv)
    train_r2 = r2_score(y_train_cv, train_predictions)
    train_mae = mean_absolute_error(y_train_cv, train_predictions)
    train_mse = mean_squared_error(y_train_cv, train_predictions)
    
    # Evaluate the model on test data
    test_predictions = model_fold.predict(X_test_cv)
    test_r2 = r2_score(y_test_cv, test_predictions)
    test_mae = mean_absolute_error(y_test_cv, test_predictions)
    test_mse = mean_squared_error(y_test_cv, test_predictions)
    
    # Append metrics to the lists
    selected_train_r2.append(train_r2)
    selected_test_r2.append(test_r2)
    selected_train_mae.append(train_mae)
    selected_train_mse.append(train_mse)
    selected_test_mae.append(test_mae)
    selected_test_mse.append(test_mse)

# Print metrics for selected models
for idx, (train_r2, test_r2, train_mae, train_mse, test_mae, test_mse) in enumerate(zip(selected_train_r2, selected_test_r2, selected_train_mae, selected_train_mse, selected_test_mae, selected_test_mse), start=1):
    print(f"Model {idx} - Train R-squared: {train_r2}, Test R-squared: {test_r2}, Train MAE: {train_mae}, Train MSE: {train_mse}, Test MAE: {test_mae}, Test MSE: {test_mse}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 631us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 494us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 670us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 507us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 610us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 537us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 591us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 546us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 604us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 525us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 592us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 479us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 628us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 599us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 471us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 632us/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 562us/step
Model 1 - Train R-squared: 0.9735178439274009, Test R-squared: 0.9713198151492701, Train MAE: 9.440885823238016, Train MSE: 255.0529146381123, Test MAE: 11.115029290121551, Test MSE: 328.80001128343457
Model 2 - Train R-squared: 0.9820473707844283, Test R-squared: 0.9851945426133194, Train MAE: 8.194734358221927, Train MSE: 181.02417626188685, Test MAE: 6.456981169180691, Test MSE: 109.40806546799456
Model 3 - Train R-squared: 0.9843166489432036, Test R-squared: 0.9818053987901124, Train MAE: 7.550651866900895, Train MSE: 154.61724623825654, Test MAE: 7.651003080317129, Test MSE: 173.47123236059227
Model 4 - Train R-squared: 0.9604403075370131, Test R-squared: 0.9629576565206602, Train MAE: 11.393357352834984, Train MSE: 393.1223350400241, Test MAE: 9.886669066860389, Test MSE: 324.94439129625914
Model 5 - Train R-squared: 0.977757392993650

In [42]:
# Filter models where both Train R-squared and Test R-squared are less than 0.96
filtered_indices = [i for i, (train_r2, test_r2) in enumerate(zip(selected_train_r2, selected_test_r2)) if train_r2 < 0.96 and test_r2 < 0.96]

# Calculate the absolute difference between train R-squared and test R-squared values for filtered models
abs_diff_r2_filtered = np.abs(np.array(selected_train_r2)[filtered_indices] - np.array(selected_test_r2)[filtered_indices])

# Find the index of the model with the smallest absolute difference among filtered models
best_model_index = filtered_indices[np.argmin(abs_diff_r2_filtered)]

# Retrieve the metrics for the best model
best_train_r2 = selected_train_r2[best_model_index]
best_test_r2 = selected_test_r2[best_model_index]
best_train_mae = selected_train_mae[best_model_index]
best_train_mse = selected_train_mse[best_model_index]
best_test_mae = selected_test_mae[best_model_index]
best_test_mse = selected_test_mse[best_model_index]

# Print metrics for the best model
print(f"Best Model - Train R-squared: {best_train_r2}, Test R-squared: {best_test_r2}, Train MAE: {best_train_mae}, Train MSE: {best_train_mse}, Test MAE: {best_test_mae}, Test MSE: {best_test_mse}")


Best Model - Train R-squared: 0.9550239176803, Test R-squared: 0.9563573664920262, Train MAE: 12.543204233341974, Train MSE: 456.40711436621336, Test MAE: 9.216528997543714, Test MSE: 288.08285981686663


In [43]:
# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the model architecture
model = Sequential([
    Dense(units=16, activation='relu', kernel_regularizer=l2(0.0005), input_shape=(X_scaled.shape[1],)),
    Dropout(0.6),
    Dense(units=8, activation='relu', kernel_regularizer=l2(0.0005)),
    Dropout(0.6),
    Dense(units=1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model on the entire dataset
model.fit(X_scaled, y, epochs=50, batch_size=32, verbose=0)

# Predict sequentially on each data point
all_predictions = model.predict(X_scaled)

# Ensure the number of predictions matches the original dataset
assert len(all_predictions) == len(X_scaled)

# Create a DataFrame to store the actual and predicted values
predictions_df = pd.DataFrame({'Actual': y, 'Predicted': all_predictions.flatten()}, index=X.index)

# Ensure index uniqueness in both the original dataset and predictions DataFrame
data_clean_unique_index = data_clean.index.drop_duplicates()
predictions_df = predictions_df.loc[data_clean_unique_index]

# Display the DataFrame
predictions_df


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 523us/step


Unnamed: 0_level_0,Actual,Predicted
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-05-28,14.016000,15.873261
2014-05-29,14.016000,15.803020
2014-05-30,13.851333,15.935019
2014-06-02,13.646667,15.836580
2014-06-03,13.662667,15.758624
...,...,...
2024-03-18,173.800003,149.040070
2024-03-19,171.320007,149.107040
2024-03-20,175.660004,151.939743
2024-03-21,172.820007,152.055710


In [44]:
predictions_df.to_csv('../predicted_data/TSLA_predicted_data.csv', index=True)

In [None]:
# Check for NaN values in the predictions DataFrame
nan_values = predictions_df.isnull().sum().sum()

if nan_values == 0:
    print("No NaN values found in the predictions DataFrame.")
    print(predictions_df)
else:
    print(f"Found {nan_values} NaN values in the predictions DataFrame. Please check your data or model.")


In [None]:
# Calculate the percentage difference between actual and predicted values
predictions_df['Percentage Difference (%)'] = ((predictions_df['Predicted'] - predictions_df['Actual']) / predictions_df['Actual']) * 100

# Display the DataFrame with percentage difference
predictions_df


In [None]:
# Calculate the absolute percentage difference for each data point
predictions_df['Abs_Percentage_Diff'] = abs((predictions_df['Actual'] - predictions_df['Predicted']) / predictions_df['Actual']) * 100

# Calculate the average percentage difference
avg_percentage_diff = predictions_df['Abs_Percentage_Diff'].mean()

print("Average Percentage Difference (%):", avg_percentage_diff)
