# PHASE 2: PROFIT

In [1]:
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official as official
from datetime import datetime,timedelta
from dateutil.parser import parse
import requests
import statistics
import copy

_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

In [2]:
def get_unix_timestamp(date):
    date = datetime.strptime(date, constants.DATE_FORMAT) - timedelta(days = 1)
    dt = datetime( date.year, date.month, date.day, 23, 59, 59)
    return int( dt.timestamp() )

def get_stock_price(ticker, date):
    try: 
        if ticker in constants.SOME_WRONG_TICKERS:
            ticker = constants.SOME_WRONG_TICKERS[ticker]
            
        period1 = get_unix_timestamp(date)
        date = datetime.strptime(date, constants.DATE_FORMAT)
        desired_date = datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT)
        
        # Friday
        if date.isoweekday() == 5:
            # Monday
            date = date + timedelta(days = 3)
            
        # Saturday
        elif date.isoweekday() == 6:
            date = date - timedelta(days = 1)
            period1 = get_unix_timestamp(datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT))

            # Monday
            date = date + timedelta(days = 2)

        # Sunday
        elif date.isoweekday() == 7:
            date = date - timedelta(days = 2)
            period1 = get_unix_timestamp(datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT))
            date = date + timedelta(days = 1)
            
        else:
            date = date + timedelta(days = 1)

            
        period2 = get_unix_timestamp(datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT))

        url = 'https://finance.yahoo.com/quote/{ticker}/history?period1={period1}&period2={period2}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true'.format(ticker=ticker, period1=period1, period2=period2)

        
        # print(url)
        agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'

        headers = {'User-Agent': agent}

        response = requests.get(url, headers=headers)
        
        find = '<th class="Fw(400) Py(6px)"><span>Volume</span></th></tr></thead><tbody><tr class="BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)"><td class="Py(10px) Ta(start) Pend(10px)"><span>'
        
        if find in response.text:
            res =     response.text[ response.text.find(find) + len(find) : ] 
        else:
            return None 
        
        find = 'Close price adjusted for splits.'
        if find in res:
            res = res[ : res.find(find)]
        else:
            return None 
        
        date_received = open = high = low = close = adj_close = volume =  0 


        find = '</span>'
        date_received = res[ : res.find(find)]
        date_received = datetime.strptime(str(parse(date_received).date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT)
        assert date_received == desired_date       
        res = res [res.find(find) + len(find) : ]
        
        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        open = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]
        
        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        high = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]
        
        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        low = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        close = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        find = '(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        adj_close = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        volume = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        return statistics.mean([high, low]) 
    
    except Exception as e:
        # print(e)
        # print(ticker, date)
        return None

### Match Purchases and Sales

In [3]:
def setup():
    purchases = set()
    sales = set()

    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            core =  (t[constants.TDATE], official.get_name(t), t[constants.TICKER])
            if t[constants.TYPE] == 'Purchase': 
                purchases.add(core)
            elif t[constants.TYPE] == 'Sale (Full)' or t[constants.TYPE] == 'Sale (Partial)':
                sales.add(core)

    return purchases, sales 

def share_diff(ticker, tdate_sale, tdate_purch):
    sale_price = get_stock_price(ticker, tdate_sale)
    purch_price = get_stock_price(ticker, tdate_purch)
    if purch_price and sale_price:
        return round(purch_price - sale_price, 2)
    return None 

        
p, s = setup()
print('len(sales): {}'.format(len(s)))
print('len(purchases): {}'.format(len(p)))

def match(purchases, sales):
    sales_copy = copy.deepcopy(sales)

    # { (name, ticker, tdate_sale) : [(share_diff, tdate_purch), ... , ... ]  }
    possible_matches = {}
    i = 0 
    for (tdate_sale, name_sale, ticker_sale) in sales_copy: 
        i += 1 
        print(i)   
        for (tdate_purch, name_purch, ticker_purch)  in purchases:
            if name_purch == name_sale and ticker_purch == ticker_sale and ptr_utils.is_this_date_after(tdate_sale, tdate_purch):
                diff = share_diff(ticker_purch, tdate_sale, tdate_purch)
                if diff: 
                    possible_matches = dict_utils.increment_list_in_dictionary(possible_matches, (name_sale, ticker_sale, tdate_sale), (diff, tdate_purch))
            
    return possible_matches, purchases, sales 

pm, p, s = match(p, s)

len(sales): 8170
len(purchases): 8563
1


KeyboardInterrupt: 

In [None]:
def get_instances(pm):
    # { (name, ticker) : { tdate_sale : [(diff, tdate_purch), (diff, tdate_purch), ....] } , .... }
    instances = {}

    for (name, ticker, tdate_sale) in pm.keys():
        l =  pm[(name, ticker, tdate_sale)]
        if (name, ticker) not in instances:
            instances[(name, ticker)] = {tdate_sale : l}    
        else:
            instances[(name, ticker)][tdate_sale] = l 

    return instances 


def stableMarriage(instance_value):
    srted = {}
    for k,v in instance_value.items():
        v = sorted(v, key=lambda a:a[0], reverse=True)
        srted[k] = v
    instance_value = srted
    
    
    x = list(instance_value.keys())
    y = list(instance_value.values())
    y = [val for l in y for val in l]

    availablePurchases = {}
    for _,i in y:
        availablePurchases[i] = -1 
    
    unmatchedSaleDates = len(x)

    availableSaleDates = {}
    for saleDate in x: 
        availableSaleDates[saleDate] = True 

    pickedSaleDate = None 
    
    saleIndex = 0 
    maxSaleIndex = len(instance_value.keys())

    while saleIndex < maxSaleIndex:
        pickedSaleDate = list(availableSaleDates)[saleIndex]
        purchaseIndex = 0 
        
        while purchaseIndex < len(instance_value[pickedSaleDate]) and availableSaleDates[pickedSaleDate]:

            local_prefs = instance_value[pickedSaleDate]
            x_diff, purch_date =  local_prefs[purchaseIndex]
            
            
            if availablePurchases[purch_date] == -1:
                availablePurchases[purch_date] = (x_diff, pickedSaleDate)
                availableSaleDates[pickedSaleDate] = False 
                unmatchedSaleDates -= 1 
                
            else:
                oldSaleMatch = availablePurchases[purch_date]
                x, y = oldSaleMatch
                aList = instance_value[y]

                for (y_diff, purchDate) in aList:
                    if purch_date == purchDate and y_diff < x_diff:
                        availablePurchases[purch_date] = (x_diff, pickedSaleDate)
                        availableSaleDates[y] = True 
                        availableSaleDates[pickedSaleDate] = False
                        break 
                    else:
                        break 
            
            purchaseIndex += 1 
        saleIndex += 1 
            
    return availablePurchases

p = get_instances(pm)
res = {}
for i in p:
    m = stableMarriage(p[i])
    res = dict_utils.increment_dictionary(res, i, m, not_math=True)    


# name, ticker {'2017/04/05': (143.2, '2019/11/05'), '2015/07/09': (28.95, '2020/03/26'), '2017/12/07': -1, '2021/01/11': (-2.02, '2021/01/15')}


In [None]:
# def get_sprice(matches):
#     for (tdate_sale, tdate_purch, name, ticker) in matches:
#             sale_price = get_stock_price(ticker, tdate_sale)
#             purch_price = get_stock_price(ticker, tdate_purch)
            
#             if not purch_price or not sale_price:
#                 pass 
#             else: 
#                 print(round(purch_price - sale_price, 2), tdate_sale, tdate_purch, name, ticker)
#                 pass 


# get_sprice(m)

                # matches.add((tdate_sale, tdate_purch, name_purch, ticker_purch))
                # sales.remove((tdate_sale, name_sale, ticker_sale))
                # purchases.remove((tdate_purch, name_purch, ticker_purch))
                # break 
                
                
                # def maximize(possible_matches):
#     # { (name, ticker, tdate_sale) : (share_diff, tdate_purch), ....  }
#     finalized = {}
    
#     for (name, ticker, tdate_sale) in possible_matches.keys():
#         best_diff = 0
#         best_date = None 
#         for (diff, tdate_purch) in possible_matches[(name, ticker, tdate_sale)]:
#             if diff > best_diff:
#                 best_diff = diff
#                 best_date = tdate_purch
                
#         finalized[(name, ticker, tdate_sale)] = (best_diff, best_date)
    
#     return finalized
        

