In [219]:
import yfinance as yf
import psycopg2 as pg2
import pandas as pd
import pandas_market_calendars as mcal
import traceback
import sys
import os
from datetime import date, timedelta

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'src')))
import database as db

nyse = mcal.get_calendar("NYSE")




In [257]:
def get_dates_for_ticker(ticker):
    """
    This method retrieves a list of all the tickers in the database.

    Returns:
        list: A list of all the tickers in the database.
    """

    conn = db.get_conn() # returns existing db connection, or creates one if needed

    with conn.cursor() as curs:
        try:
            curs.execute(f"SELECT ticker, date, time_of_report FROM earnings_reports WHERE ticker = '{ticker}'")
            dates = curs.fetchall()
        except Exception as e:
            print(f"Failed to get dates from database for ticker {ticker}", traceback.format_exc())
            raise e

    print(f"Retrieved {len(dates)} dates for ticker {ticker}")

    return dates

In [123]:
def get_closing_prices(dates):
    for d in dates:
        ticker, date, time = d
        print(f"\n{ticker} - {date} - {time}")
        if time == "Before Open":
            date = date - timedelta(days=1)
            print(f"new date = {date}")


        
            

In [259]:
def find_relative_dates(date):
    offsets = [1, 2, 3, 4, 5, 10, 20, 30]
    rel_dates = []
    end_date = date + timedelta(days=50)
    sched = nyse.schedule(start_date=date.strftime("%Y-%m-%d"), end_date=end_date.strftime("%Y-%m-%d"))
    
    for o in offsets:
        odate = sched.iloc[o].iloc[0].date()
        rel_dates.append((o, odate))

    
    #REVERSE!
    offsets = [-1, -2, -3, -4, -5, -10, -20, -30]
    start_date = date - timedelta(days=50)
    sched = nyse.schedule(start_date=start_date.strftime("%Y-%m-%d"), end_date=date.strftime("%Y-%m-%d"))

    for o in offsets:
        odate = sched.iloc[o].iloc[0].date()
        rel_dates.append((o, odate))


    #sched[1] is the day after 'date'. For post-market this makes sense. for pre-market we set 'date' back by 1 day, so sched[1] is
    #actually the same day as the report, which is what we want because thats the first trading session 'after' the report
    
    #sched.iloc[-1].iloc[0].date() is equal to date. The [-1] element is the original date passed into this func.
    #that corresponds to the -1 offset, because if it was an after-hours report then the last day before the report
    #IS the same day. If it was a pre-market

    return rel_dates

In [260]:
def populate_prices(ticker):
    """
        Populates relative stock prices for all earnings report records for a given ticker
    """
    def update_date(report):
        print("updating date: ", report)
        rel_dates = find_relative_dates(report[1])
        print(f"Adding {len(rel_dates)} relative dates to {report}. First rel_date is {rel_dates[0]}")
        insert_rel_dates(report, rel_dates)
        return rel_dates

        #for logging stuff. remove lines below this
        #rel_dates.sort(key=lambda r: r[0])
        #for r in rel_dates:
            #print(f"{r[0]} - {r[1]}")

        
    dates = get_dates_for_ticker(ticker)
    # dates includes the date of each earnings report for the given ticker
    # for each date in dates, find all 'relative_dates'. 
    # add the closing price for each relative_date to the database WHERE ticker=ticker AND date=date (undo the -1 day for pre-market!)
    # column names will be minus_1_day, minus_5_day, plus_1_day, plus_30_day, etc...

    
    #below is just for logging/debugging  
    rel_dates = update_date(dates[0]) #updating a single earnings report
    
    return rel_dates

In [299]:
def get_prices_for_dates(ticker, dates):
    print(f"[get_prices_for_dates] ticker={ticker}, len(dates)={len(dates)}")
    yt = yf.Ticker(ticker)
    print("start_date=", dates[0][1].strftime("%Y-%m-%d"))
    print("end_date=", dates[-1][1].strftime("%Y-%m-%d"))

    prices = yt.history(start=dates[0][1].strftime("%Y-%m-%d"), end=dates[-1][1].strftime("%Y-%m-%d"))
    for index, row in prices.iterrows():
        # I AM HERE: pull the datetime.date, open, close, high, low, and volume from each row.
        # match the values pulled from each row with the dates(days_relative, datetime.date) tuple
        # create a list of tuples with (days_relative, datetime.date, open, close, high, low, volume)
        # insert a row into the price_history table with:
        # ticker, earnings_report_date, open..., close..., high..., low..., volume...
        # NOTE :: Add a stock_splits column while we're at it?? Researcht h
        print(f"\n\nindex: {index} - {int(round(float(row[0]), 2) * 100)}\n{row}")
    

In [300]:
def insert_rel_dates(report, rel):
    dstring = "%Y-%m-%d"
    rel.sort(key=lambda d: d[0])
    print(f"\nRetrieving and inserting relative date prices for {report}")
    get_prices_for_dates(report[0], rel)
    #for r in rel:
        # get closing price for ticker report[0] on r[1]
        #print(f"Updating {report[0]}_{report[1].strftime(dstring)}. {r[0]} days after report({r[1]}) is 420.69")

In [301]:
dates = populate_prices("f")

Retrieved 47 dates for ticker f
updating date:  ('f', datetime.date(2024, 10, 28), 'After Close')
Adding 16 relative dates to ('f', datetime.date(2024, 10, 28), 'After Close'). First rel_date is (1, datetime.date(2024, 10, 29))

Retrieving and inserting relative date prices for ('f', datetime.date(2024, 10, 28), 'After Close')
[get_prices_for_dates] ticker=f, len(dates)=16
start_date= 2024-09-17
end_date= 2024-12-10


index: 2024-09-17 00:00:00-04:00 - 1074
Open            1.074402e+01
High            1.099067e+01
Low             1.067496e+01
Close           1.075389e+01
Volume          5.206860e+07
Dividends       0.000000e+00
Stock Splits    0.000000e+00
Name: 2024-09-17 00:00:00-04:00, dtype: float64


index: 2024-09-18 00:00:00-04:00 - 1078
Open            1.078349e+01
High            1.110906e+01
Low             1.077362e+01
Close           1.083281e+01
Volume          5.501730e+07
Dividends       0.000000e+00
Stock Splits    0.000000e+00
Name: 2024-09-18 00:00:00-04:00, dtype: fl

  print(f"\n\nindex: {index} - {int(round(float(row[0]), 2) * 100)}\n{row}")


In [204]:
dates.sort(key=lambda d: d[0])
dates

[(-30, datetime.date(2024, 9, 17)),
 (-20, datetime.date(2024, 10, 1)),
 (-10, datetime.date(2024, 10, 15)),
 (-5, datetime.date(2024, 10, 22)),
 (-4, datetime.date(2024, 10, 23)),
 (-3, datetime.date(2024, 10, 24)),
 (-2, datetime.date(2024, 10, 25)),
 (-1, datetime.date(2024, 10, 28)),
 (1, datetime.date(2024, 10, 29)),
 (2, datetime.date(2024, 10, 30)),
 (3, datetime.date(2024, 10, 31)),
 (4, datetime.date(2024, 11, 1)),
 (5, datetime.date(2024, 11, 4)),
 (10, datetime.date(2024, 11, 11)),
 (20, datetime.date(2024, 11, 25)),
 (30, datetime.date(2024, 12, 10))]

In [200]:
yt = yf.Ticker('f')

In [207]:
print("start_date=", dates[0][1].strftime("%Y-%m-%d"))
print("end_date=", dates[-1][1].strftime("%Y-%m-%d"))

start_date= 2024-09-17
end_date= 2024-12-10


In [225]:
prices = yt.history(start=dates[0][1].strftime("%Y-%m-%d"), end=dates[-1][1].strftime("%Y-%m-%d"))
for index, row in prices.iterrows():
    print(f"\n\nindex: {index}\n{row}")



index: 2024-09-17 00:00:00-04:00
Open            1.074402e+01
High            1.099067e+01
Low             1.067496e+01
Close           1.075389e+01
Volume          5.206860e+07
Dividends       0.000000e+00
Stock Splits    0.000000e+00
Name: 2024-09-17 00:00:00-04:00, dtype: float64


index: 2024-09-18 00:00:00-04:00
Open            1.078349e+01
High            1.110906e+01
Low             1.077362e+01
Close           1.083281e+01
Volume          5.501730e+07
Dividends       0.000000e+00
Stock Splits    0.000000e+00
Name: 2024-09-18 00:00:00-04:00, dtype: float64


index: 2024-09-19 00:00:00-04:00
Open            1.106960e+01
High            1.115839e+01
Low             1.072429e+01
Close           1.077362e+01
Volume          5.243640e+07
Dividends       0.000000e+00
Stock Splits    0.000000e+00
Name: 2024-09-19 00:00:00-04:00, dtype: float64


index: 2024-09-20 00:00:00-04:00
Open            1.071442e+01
High            1.073416e+01
Low             1.054670e+01
Close           1.07

In [239]:
index_map = {}
for i in prices.index:
    index_map[i] = i.date()

index_map


{Timestamp('2024-09-17 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 17),
 Timestamp('2024-09-18 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 18),
 Timestamp('2024-09-19 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 19),
 Timestamp('2024-09-20 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 20),
 Timestamp('2024-09-23 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 23),
 Timestamp('2024-09-24 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 24),
 Timestamp('2024-09-25 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 25),
 Timestamp('2024-09-26 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 26),
 Timestamp('2024-09-27 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 27),
 Timestamp('2024-09-30 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 9, 30),
 Timestamp('2024-10-01 00:00:00-0400', tz='America/New_York'): datetime.date(2024, 10, 1),