In [132]:
import yfinance as yf
import seaborn as sns
import pandas  as pd
import numpy as np
import pandas_market_calendars as mcal
import pytz

from datetime import datetime
import plotly.graph_objects as go
import plotly.express as px
from FMNM.BS_pricer import BS_pricer
from FMNM.Parameters import Option_param
from FMNM.Processes import Diffusion_process

import scipy.stats as ss
from scipy.integrate import quad
from typing import List, Optional
from scipy.stats import lognorm
from scipy.stats import norm
from functools import partial

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

%matplotlib inline

from utils.gbm import GBMAnalyzer
from utils.dataFetcher import OptionsDataFetcher as odf

In [133]:
fetcher = odf('TSLA')
stock_data = fetcher.fetch_stock_data("2022-01-01", "2025-05-18")
opts_data = fetcher.fetch_options_chain()
opts_data_p = fetcher.fetch_options_chain(puts_only = True)
opts_data_c = fetcher.fetch_options_chain(calls_only = True)
# fetcher.save(stock_data, opts_data, path="retrivedData", save_stock=True, save_options=True)

In [134]:
# Create target: 1 if next day's Close is higher, else 0
stock_data['Target'] = (stock_data['Close'].shift(-1) > stock_data['Close']).astype(int)

# Features: Use today's Open, High, Low, Close, Volume
features = ['Open', 'High', 'Low', 'Close', 'Volume']
stock_data.dropna(inplace=True)
X = stock_data[features]
y = stock_data['Target']

In [135]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [136]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.54


In [99]:
nyse = mcal.get_calendar('NYSE')
ny_tz = pytz.timezone('America/New_York')

def trading_day_after_n_days(start_date, n_days):
    """
    Returns the first NYSE trading day on or after (or on or before, if n_days < 0)
    the date shifted by n_days. Always returns a trading day >= or <= the shifted date.
    """
    # Shift the date by n_days
    target_date = (start_date + pd.Timedelta(days=n_days)).normalize()

    # Define search window (inclusive)
    if n_days >= 0:
        search_start = target_date
        search_end = target_date + pd.Timedelta(days=15)
    else:
        search_start = target_date - pd.Timedelta(days=15)
        search_end = target_date

    # Get valid trading days in the range
    schedule = nyse.valid_days(start_date=search_start, end_date=search_end)

    if not schedule.empty:
        selected_date = schedule[0] if n_days >= 0 else schedule[-1]
        return selected_date.tz_convert(ny_tz)
    else:
        raise ValueError(f"No trading day found in the expected window around {target_date}")

In [100]:
end_date_train = pd.Timestamp(2023, 1, 3, tz="America/New_York")
t = 1
days = 365
start_date_train = trading_day_after_n_days(end_date_train, -t*days)
start_date_test = trading_day_after_n_days(end_date_train,0)
end_date_test = trading_day_after_n_days(end_date_train,t*days + 1)

In [101]:
train_data = stock_data.loc[start_date_train:end_date_train]
test_data = stock_data.loc[end_date_train:end_date_test]

In [104]:
prices = np.array(train_data['Close'])
actual = np.array(test_data['Close'])
dates = np.array(test_data.index)
pred = predicted(prices, t)
sims = simulate(prices, t, 1000)

In [112]:
train_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-03 00:00:00-05:00,382.583344,400.356659,378.679993,399.926666,103931400,0.0,0.0
2022-01-04 00:00:00-05:00,396.516663,402.666656,374.350006,383.196655,100248300,0.0,0.0
2022-01-05 00:00:00-05:00,382.216675,390.113342,360.336670,362.706665,80119800,0.0,0.0
2022-01-06 00:00:00-05:00,359.000000,362.666656,340.166656,354.899994,90336600,0.0,0.0
2022-01-07 00:00:00-05:00,360.123322,360.309998,336.666656,342.320007,84164700,0.0,0.0
...,...,...,...,...,...,...,...
2022-12-27 00:00:00-05:00,117.500000,119.669998,108.760002,109.099998,208643400,0.0,0.0
2022-12-28 00:00:00-05:00,110.349998,116.269997,108.239998,112.709999,221070500,0.0,0.0
2022-12-29 00:00:00-05:00,120.389999,123.570000,117.500000,121.820000,221923300,0.0,0.0
2022-12-30 00:00:00-05:00,119.949997,124.480003,119.750000,123.180000,157777300,0.0,0.0


In [129]:
stock_data = stock_data.reset_index()

In [None]:
stock_data

In [131]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = np.array(stock_data[['Date', 'Open', 'High', 'Low', 'Volume', 'Dividends',]])
y = np.array(stock_data['Close'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


TypeError: float() argument must be a string or a real number, not 'Timestamp'