In [1]:
import os
import sys

import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..')))

from helpers.helpers import load_and_prepare_csv

In [2]:
TICKER = 'SQQQ'
DATA_PATH = os.path.join('outputs', 'data', f'{TICKER}.csv')
df = load_and_prepare_csv(DATA_PATH)


In [9]:
# Features and target
X = df.drop(f'{TICKER}_CLOSE', axis=1)
y = df[f'{TICKER}_CLOSE']

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Training the model
rf.fit(X_train, y_train)

# Making predictions
y_pred = rf.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Predicting future prices
# Ensure future_data is correctly formatted
future_data = {col_name: [value] for col_name, value in df.iloc[-1].items()}

# Create a DataFrame with the appropriate shape
future_df = pd.DataFrame(future_data)

# Make sure the future_df has the same columns as the training data
future_df = future_df[X.columns]

# Predict the future price
future_price_pred = rf.predict(future_df)

print(f"Predicted Future Price: {future_price_pred[0]}")


Mean Squared Error: 1215651.7844064785
R^2 Score: 0.9998575133100885
Predicted Future Price: 9.809600057601928
