In [9]:
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd
import requests
import QuantLib as ql
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.metrics import mean_squared_error
import scipy.optimize as opt
from scipy.optimize import differential_evolution
import concurrent.futures


with open("config.json", "r") as config_file:
    config = json.load(config_file)

api_key = config.get("api_key")
secret_key = config.get("secret_key")


In [10]:
NASDAQ = pd.read_csv('Indexes/NASDAQ.csv')
DOWJ = pd.read_csv('Indexes/DOWJ.csv')
SP = pd.read_csv('Indexes/S&P500.csv')

def clean_data(df):
    df = df[['Company', 'Symbol']]
    df = pd.DataFrame(df).dropna()
    return df

NASDAQ = clean_data(NASDAQ)
DOWJ = clean_data(DOWJ)
SP = clean_data(SP)


In [11]:
    
# SCREENS STOCKS WITH 1-DAY CHANGE OF -2%, ROLLING AVG PRICE CHANGE OVER 2 WEEKS OF > 0%, COST IS LESs THAN $150
def filter_stocks(): 
    filtered_stocks = set()
    # for index, stock in SP.iterrows():
    #     today_change, rolling_avg = get_price_change_and_rolling_avg(stock['Symbol'])
    #     if today_change is not None and rolling_avg is not None:  # Only process if valid data is returned
    #         if (today_change <= -2.00) and (rolling_avg > 0.00) and (get_current_stock_price(stock['Symbol']) < 150): 
    #             filtered_stocks.add(stock['Symbol'])

    for index, stock in DOWJ.iterrows():
        today_change, rolling_avg = get_price_change_and_rolling_avg(stock['Symbol'])
        if today_change is not None and rolling_avg is not None:
            if (today_change <= -2.00) and (rolling_avg > 0.00) and (get_current_stock_price(stock['Symbol']) < 150): 
                filtered_stocks.add(stock['Symbol'])

    for index, stock in NASDAQ.iterrows():
        today_change, rolling_avg = get_price_change_and_rolling_avg(stock['Symbol'])
        if today_change is not None and rolling_avg is not None:
            if (today_change <= -2.00) and (rolling_avg > 0.00) and (get_current_stock_price(stock['Symbol']) < 150): 
                filtered_stocks.add(stock['Symbol'])

    return filtered_stocks

def get_price_change_and_rolling_avg(ticker: str):
    try:
        # Define the date range
        end_date = datetime.now()
        start_date = end_date - timedelta(days=24)
        
        # Download stock data
        data = yf.download(ticker, start=start_date, end=end_date, progress=False)
        
        # If data is empty, return None
        if data.empty:
            return None, None
        
        data = data.sort_index()

        # Calculate the daily price change
        data['Price_Change'] = (data['Close'].diff() / data['Close'].shift(1)) * 100
        today_price_change = data['Price_Change'].iloc[-1]

        # Calculate the rolling average of the price change
        rolling_avg = data['Price_Change'].rolling(window=min(14, len(data))).mean().iloc[-1]

        return today_price_change, rolling_avg
    
    except Exception as e:
        print(f"Error occurred for ticker {ticker}: {e}")
        return None, None

def get_current_stock_price(symbol: str):

    url = "https://data.alpaca.markets/v2/stocks/trades/latest"

    headers = {
        "accept": "application/json",
        "APCA-API-KEY-ID": api_key,
        "APCA-API-SECRET-KEY": secret_key,
    }

    params = {
        "symbols": symbol,  
        "feed": "iex" 
    }

    try:
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()  

        data = response.json()
        return data.get("trades", {}).get(symbol, {}).get("p") 

    except requests.exceptions.RequestException as e:
        print(f"Error fetching stock price: {e}")


def fetch_option_chain(api_key: str, secret_key: str, underlying_symbol: str):

    url = f"https://data.alpaca.markets/v1beta1/options/snapshots/{underlying_symbol}?feed=indicative&limit=100&expiration_date=2025-02-07"
    headers = {
        "accept": "application/json",
        "APCA-API-KEY-ID": api_key,
        "APCA-API-SECRET-KEY": secret_key,
    }
    try:
        response = requests.get(url, headers=headers)
        data = response.json()
        snapshots = data.get('snapshots', {})
        return snapshots
    except requests.exceptions.RequestException as e:
        print(f"Error fetching option chain: {e}")
        return None

def parse_option_chain(option_chain):

    parsed_data = []
    
    for symbol, details in option_chain.items():
        expiration_start = len(symbol) - 15
        ticker = symbol[:expiration_start]
        expiration_date = f"20{symbol[expiration_start:expiration_start+2]}-{symbol[expiration_start+2:expiration_start+4]}-{symbol[expiration_start+4:expiration_start+6]}"
        option_type = "Call" if symbol[expiration_start+6] == "C" else "Put"
        strike_price = int(symbol[expiration_start+7:]) / 1000  
        
        greeks = details.get("greeks", {}) or {}
        implied_volatility = details.get("impliedVolatility", None)
        latest_quote = details.get("latestQuote", {})
        
        parsed_data.append({
            "ticker": ticker,
            "expiration_date": expiration_date,
            "option_type": option_type,
            "strike_price": strike_price,
            "delta": greeks.get("delta"),
            "gamma": greeks.get("gamma"),
            "rho": greeks.get("rho"),
            "theta": greeks.get("theta"),
            "vega": greeks.get("vega"),
            "implied_volatility": implied_volatility,
            "ask_price": latest_quote.get("ap"),
            "ask_size": latest_quote.get("as"),
            "bid_price": latest_quote.get("bp"),
            "bid_size": latest_quote.get("bs")
        })
    
    return pd.DataFrame(parsed_data)

def get_option_chain(api_key: str, secret_key: str, underlying_symbol: str):
    option_chain = fetch_option_chain(api_key, secret_key, underlying_symbol)
    if option_chain:
        return parse_option_chain(option_chain)
    else:
        return None
    

def gbm(s0, mu, sigma, deltaT, dt):
    """
    Models a stock price S(t) using the Wiener process W(t) as
    `S(t) = S(0).exp{(mu-(sigma^2/2).t)+sigma.W(t)}`
    
    Arguments:
        s0: Initial stock price, default 100
        mu: 'Drift' of the stock (upwards or downwards), default 0.2
        sigma: 'Volatility' of the stock, default 0.68
        deltaT: The time period for which the future prices are computed, default 52 (as in 52 weeks)
        dt: The granularity of the time-period, default 0.1
    
    Returns:
        time_vector: array of time steps
        s: array with the simulated stock prices over the time-period deltaT
    """
    n_step = int(deltaT / dt)  # Number of time steps
    time_vector = np.linspace(0, deltaT, num=n_step)  # Time vector
    
    # Wiener process: cumulative sum of random normal increments
    random_increments = np.random.normal(0, np.sqrt(dt), size=n_step)
    weiner_process = np.cumsum(random_increments)
    
    # Stock price simulation
    stock_var = (mu - (sigma**2 / 2)) * time_vector
    s = s0 * np.exp(stock_var + sigma * weiner_process)
    
    return s


def objective(params, real_prices, s0):
    """Objective function for optimization."""
    mu, sigma = params  # Unpack parameters
    gbm_prices = gbm(s0, mu, sigma, deltaT=len(real_prices), dt=1)
    return mean_squared_error(real_prices, gbm_prices)

def optimize_bin(bin_prices, bin_size, weights, i):
    s0 = bin_prices[0]

    # Define the bounds for optimization
    bounds = [(-0.3, 0.3), (0.001, 0.30)]

    # Run the optimizer for the bin
    result = differential_evolution(objective, bounds, args=(bin_prices, s0))
    best_mu, best_sigma = result.x
    best_mse = result.fun

    print(f"Bin {i+1}: μ = {best_mu:.4f}, σ = {best_sigma:.4f}, MSE = {best_mse:.4f}")
    return best_mu, best_sigma, best_mse

def optimize_gbm(symbol):
    """
    Optimize μ and σ over multiple time bins, weighting recent periods more.
    """
    # Fetch real stock data (past 2 years)
    stock_data = yf.download(symbol, period="2y", interval="1d")
    real_prices = stock_data["Close"].dropna().values

    # Split into bins of 20 trading days
    bin_size = 20
    num_bins = len(real_prices) // bin_size
    weights = np.linspace(1, 2, num_bins)  # Increasing weights for recent bins

    # Initialize containers for results
    mu_values, sigma_values, mses = [], [], []

    # Use concurrent.futures for parallel processing of bins
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for i in range(num_bins):
            bin_prices = real_prices[i * bin_size : (i + 1) * bin_size]
            futures.append(executor.submit(optimize_bin, bin_prices, bin_size, weights, i))
        
        for future in concurrent.futures.as_completed(futures):
            best_mu, best_sigma, best_mse = future.result()
            mu_values.append(best_mu)
            sigma_values.append(best_sigma)
            mses.append(best_mse)

    # Compute weighted averages
    weight_sum = np.sum(weights)
    avg_mu = np.sum(np.array(mu_values) * weights) / weight_sum
    avg_sigma = np.sum(np.array(sigma_values) * weights) / weight_sum

    print(f"\nFinal Weighted Averages: μ = {avg_mu:.4f}, σ = {avg_sigma:.4f}")

    return avg_mu, avg_sigma


def archive_objective(params, real_prices, s0):
    """Objective function for optimization."""
    mu, sigma = params  # Unpack parameters
    gbm_prices = gbm(s0, mu, sigma, deltaT=len(real_prices), dt=1)
    return mean_squared_error(real_prices, gbm_prices)

def archive_optimize_gbm(symbol):
    """
    Optimize μ and σ over multiple time bins, weighting recent periods more.
    """
    # Fetch real stock data (past 5 years)
    stock_data = yf.download(symbol, period="2y", interval="1d")
    real_prices = stock_data["Close"].dropna().values

    # Split into bins of 20 trading days
    bin_size = 20
    num_bins = len(real_prices) // bin_size
    weights = np.linspace(1, 2, num_bins)  # Increasing weights for recent bins

    mu_values, sigma_values, mses = [], [], []

    # Optimize each bin
    for i in range(num_bins):
        bin_prices = real_prices[i * bin_size : (i + 1) * bin_size]
        s0 = bin_prices[0]

        # Define the bounds for optimization
        bounds = [(-0.3, 0.3), (0.001, 0.35)]

        # Run the optimizer
        result = differential_evolution(objective, bounds, args=(bin_prices, s0))
        best_mu, best_sigma = result.x
        best_mse = result.fun

        mu_values.append(best_mu)
        sigma_values.append(best_sigma)
        mses.append(best_mse)

        print(f"Bin {i+1}/{num_bins}: μ = {best_mu:.4f}, σ = {best_sigma:.4f}, MSE = {best_mse:.4f}")

    # Compute weighted averages
    weight_sum = np.sum(weights)
    avg_mu = np.sum(np.array(mu_values) * weights) / weight_sum
    avg_sigma = np.sum(np.array(sigma_values) * weights) / weight_sum

    print(f"\nFinal Weighted Averages: μ = {avg_mu:.4f}, σ = {avg_sigma:.4f}")

    return avg_mu, avg_sigma

def gbm_vs_real_graph(symbol, mu, sigma, period):
    stock_data = yf.download(symbol, period=period, interval="1d")
    real_prices = stock_data["Adj Close"].dropna().values
    time_steps = np.arange(len(real_prices))


    gbm_path = gbm(s0 = real_prices[0], mu=mu, sigma=sigma, deltaT=len(real_prices), dt=1)
    plt.figure(figsize=(10, 5))
    plt.plot(time_steps, real_prices, label="Real Prices", color="blue")
    plt.plot(time_steps, gbm_path, label="GBM Simulated", linestyle="dashed", color="red")
    
    plt.xlabel("Time (Days)")
    plt.ylabel("Price")
    plt.title(f"GBM vs Real Prices for {symbol}")
    plt.legend()
    plt.grid()
    plt.show()

# Example usage
# avg_mu, avg_sigma = optimize_gbm("AAPL")
#  gbm_vs_real_graph('AAPL', mu=0.0011, sigma=0.0130, period='2y')

In [12]:
candidates = filter_stocks()
candidates = list(candidates)[:2]  # Convert to list, take first two elements
print(len(candidates))
simulation_attempts = 100
risk_free_rate = (((1 + 0.0419) / (1 + 0.029)) - 1 ) * 100
all_options = pd.DataFrame(columns=['ticker', 'expiration_date', 'option_type', 'strike_price', 'delta', 'gamma', 'rho', 'theta', 'vega', 'implied_volatility', 'ask_price', 'ask_size', 'bid_price', 'bid_size'])
print(candidates)

for symbol in candidates:
    option_chain = get_option_chain(api_key, secret_key, symbol)
    put_chain = option_chain[(option_chain['option_type'] == 'Put') & (option_chain['rho'].notna())].sort_values(by='strike_price', ascending=True)

    if option_chain is None or option_chain.empty:
        continue 

    price = get_current_stock_price(symbol)
    optimized_mu, optimized_sigma = optimize_gbm(symbol)

    profitability_chances = []
    percent_returns = []

    for index, contract in put_chain.iterrows():
        count = 0
        strike_price = contract['strike_price']

        for i in range(simulation_attempts):
            prices = gbm(s0=price, mu=optimized_mu, sigma=optimized_sigma, 
                deltaT=np.busday_count(datetime.today().date(), datetime.strptime(contract['expiration_date'], "%Y-%m-%d").date()), dt=1)
        
            if prices[-1] > strike_price:
                count += 1

        profitability_chance = (count / simulation_attempts) * 100
        profit = (contract['bid_price']*contract['bid_size'] + contract['ask_price']*contract['ask_size']) / (contract['ask_size'] + contract['bid_size'])
        percent_return = (profit / (strike_price)) * 100

        profitability_chances.append(profitability_chance)
        percent_returns.append(percent_return)

    put_chain['profitability_percent'] = profitability_chances
    put_chain['percent_return'] = percent_returns

    if put_chain['percent_return'].std() != 0:
        put_chain['sharpe_ratio'] = (put_chain['percent_return'] - risk_free_rate) / put_chain['percent_return'].std()
    else:
        put_chain['sharpe_ratio'] = 0  # Avoid division by zero

    # filename = f"/Options/{symbol}_put_options.csv"
    # put_chain.to_csv(filename, index=False)

    # print(f"Saved {symbol} data to {filename}")

    all_options = pd.concat([all_options, put_chain], ignore_index=True, copy=False)



2
['INTC', 'PDD']


[*********************100%***********************]  1 of 1 completed


KeyError: 'Adj Close'

In [None]:
# TODO
"""
- Figure out way to normalize stock price, whether that is min max of the range of the price(shoudl help optimizer
- Find better metric for optimizer
- Try binning, so like get past 10 years of AAPL, seperate into bins of 20 or n trading days, train optimzer on each one. Then the hyperparameters can be weighted to have more bias towards more recent bins
"""


'\n- Figure out way to normalize stock price, whether that is min max of the range of the price(shoudl help optimizer\n- Find better metric for optimizer\n- Try binning, so like get past 10 years of AAPL, seperate into bins of 20 or n trading days, train optimzer on each one. Then the hyperparameters can be weighted to have more bias towards more recent bins\n'

In [14]:
# Fetch real stock data (past 5 years)
stock_data = yf.download('INTC', period="2y", interval="1d")
print(stock_data.columns)
real_prices = stock_data["Close"].dropna().values
real_prices

[*********************100%***********************]  1 of 1 completed

MultiIndex([( 'Close', 'INTC'),
            (  'High', 'INTC'),
            (   'Low', 'INTC'),
            (  'Open', 'INTC'),
            ('Volume', 'INTC')],
           names=['Price', 'Ticker'])





array([[28.03155136],
       [29.11154556],
       [29.23690033],
       [28.00222588],
       [28.35359383],
       [27.56301117],
       [27.06524086],
       [27.133564  ],
       [27.86558342],
       [27.95342636],
       [28.15838814],
       [27.52397537],
       [26.94811821],
       [25.43527222],
       [24.85941505],
       [24.99606323],
       [24.53732872],
       [24.30308342],
       [24.33236504],
       [24.72277451],
       [25.57191658],
       [25.76712227],
       [25.3669529 ],
       [24.9179821 ],
       [25.35719299],
       [25.80616379],
       [26.56746483],
       [26.30393982],
       [27.33852959],
       [27.72893906],
       [29.45650864],
       [29.09537888],
       [28.46095848],
       [27.77774048],
       [27.45565033],
       [28.33407784],
       [28.65616608],
       [28.48048019],
       [28.58784294],
       [30.76438522],
       [31.32072067],
       [31.88681412],
       [32.10154343],
       [32.30650711],
       [32.04298019],
       [32