# PHASE 2: PROFIT

In [1]:
import utils.csv_utils as csv_utils 
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils 
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official as official
import helpers.search as search
import helpers.congress as congress
import pandas as pd 
from scipy.stats.mstats import gmean
import random 

In [2]:
_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

# {canonical_name_input_based : link, ...}
input_all_officials_name = {}

# {link : canonical_name_input_based, ....}
input_all_officials_link = {}
input_house_officials_link = {}
input_senate_officials_link = {}

# (canonical_name_input_based, ...)
names = set()

for _,t in input_df.iterrows():        
    name = official.get_name(t)
        
    if name not in names:    
        link = search.get_wiki_link(name)
        
        if ptr_utils.isvalid(t[constants.REPRESENTATIVE]) and link not in input_house_officials_link:
            input_house_officials_link = dict_utils.increment_dictionary(input_house_officials_link, link, name, not_math=True)
        if ptr_utils.isvalid(t[constants.SENATOR]) and link not in input_senate_officials_link:
            input_senate_officials_link = dict_utils.increment_dictionary(input_senate_officials_link, link, name, not_math=True)
        
        input_all_officials_link = dict_utils.increment_dictionary(input_all_officials_link, link, name, not_math=True)
        input_all_officials_name = dict_utils.increment_dictionary(input_all_officials_name, name, link, not_math=True)

        names.add(name)

# {link : (canonical_name_input_based, official_object), ... }
input_house_officials_objects = {}
for link, person in input_house_officials_link.items(): 
    off = search.wiki_search(person)        
    input_house_officials_objects[link] = (person, off)
        
# {link : (canonical_name_input_based, official_object), ... }
input_senate_officials_objects = {}
for link, person in input_senate_officials_link.items():
    off = search.wiki_search(person)        
    input_senate_officials_objects[link] = (person, off)

# {link : (canonical_name_input_based, official_object) ... }
input_officials_objects = {**input_house_officials_objects, **input_senate_officials_objects}

def t_to_obj(t):
    name = official.get_name(t)
    link = input_all_officials_name[name]
    _, obj = input_officials_objects[link]
    return obj

In [130]:

from datetime import timedelta, date
import requests
import statistics


def get_unix_timestamp(date):
    date = datetime.strptime(date, constants.DATE_FORMAT) - timedelta(days = 1)
    dt = datetime( date.year, date.month, date.day, 23, 59, 59)
    return int( dt.timestamp() )

def get_stock_price(ticker, date):
    try: 
        period1 = get_unix_timestamp(date)
        date = datetime.strptime(date, constants.DATE_FORMAT)
        
        # Friday
        if date.isoweekday() == 5:
            # Monday
            date = date + timedelta(days = 3)
            
        # Saturday
        elif date.isoweekday() == 6:
            date = date - timedelta(days = 1)
            period1 = get_unix_timestamp(datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT))

            # Monday
            date = date + timedelta(days = 2)

        # Sunday
        elif date.isoweekday() == 7:
            date = date - timedelta(days = 2)
            period1 = get_unix_timestamp(datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT))
            date = date + timedelta(days = 1)
            
        else:
            date = date + timedelta(days = 1)

            
        period2 = get_unix_timestamp(datetime.strptime(str(date.date()), "%Y-%m-%d").strftime(constants.DATE_FORMAT))

        url = 'https://finance.yahoo.com/quote/{ticker}/history?period1={period1}&period2={period2}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true'.format(ticker=ticker, period1=period1, period2=period2)

        
        # print(url)
        agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'

        headers = {'User-Agent': agent}

        response = requests.get(url, headers=headers)
        
        find = '<th class="Fw(400) Py(6px)"><span>Volume</span></th></tr></thead><tbody><tr class="BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)"><td class="Py(10px) Ta(start) Pend(10px)"><span>'
        
        if find in response.text:
            res =     response.text[ response.text.find(find) + len(find) : ] 
        else:
            print(ticker, date)
            return None 
        
        find = 'Close price adjusted for splits.'
        if find in res:
            res = res[ : res.find(find)]
        else:
            print(ticker, date)
            return None 
        
        date = open = high = low = close = adj_close = volume =  0 

        # print(res)

    # </td><td class="Py(10px) Pstart(10px)"><span>25,872,900</span></td></tr></tbody><tfoot><tr class="BdT Bdc($seperatorColor) C($tertiaryColor) H(36px)"><td class="Fz(xs)" colSpan="7"><span>*

        find = '</span>'
        date = res[ : res.find(find)]
        res = res [res.find(find) + len(find) : ]
        
        
        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        open = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        # print(res)
        
        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        high = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]
        
        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        low = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        close = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        find = '(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        adj_close = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        find = '</td><td class="Py(10px) Pstart(10px)"><span>'
        res = res [res.find(find) + len(find) : ] 
        find = '</span>'
        volume = float(res [ : res.find(find)].replace(",", ''))
        res = res [res.find(find) + len(find) : ]

        return statistics.mean([high, low]) 
    
    except Exception:
        return None 


print( get_stock_price('SNAP', "2019/01/01") )


SNAP 2019-01-02 00:00:00
None


### Match Purchases and Sales

In [132]:
from datetime import datetime
import copy

purchases = set()
sales = set()
i = 0

def is_this_date_after(tdate_sale, tdate_purch):
    return datetime.strptime(tdate_sale, '%Y/%m/%d').date() > datetime.strptime(tdate_purch, '%Y/%m/%d').date()

for _,t in input_df.iterrows():
    if ptr_utils.isvalid(t[constants.TICKER]):
        core =  (t[constants.TDATE], official.get_name(t), t[constants.TICKER])
        
        if t[constants.TYPE] == 'Purchase': 
            purchases.add(core)
        elif t[constants.TYPE] == 'Sale (Full)' or t[constants.TYPE] == 'Sale (Partial)':
            sales.add(core)


matches = set()
sales_copy = copy.deepcopy(sales)

for (tdate_sale, name_sale, ticker_sale) in sales_copy:    
    for (tdate_purch, name_purch, ticker_purch)  in purchases:
        if name_purch == name_sale and ticker_purch == ticker_sale and is_this_date_after(tdate_sale, tdate_purch):
            matches.add((tdate_sale, tdate_purch, name_purch, ticker_purch))
            sales.remove((tdate_sale, name_sale, ticker_sale))
            purchases.remove((tdate_purch, name_purch, ticker_purch))
            break 


# print(len(sales))
# print(len(purchases))
# print(len(matches))

for (tdate_sale, tdate_purch, name, ticker) in matches:
    # print(" sale : " + tdate_sale, " purch : " + tdate_purch, name, ticker)
    if ticker != 'UTX' and ticker != 'FDC' and ticker != 'FEYE':
        sale_price = get_stock_price(ticker, tdate_sale)
        purch_price = get_stock_price(ticker, tdate_purch)
        
        if not purch_price or not sale_price:
            print(tdate_sale, tdate_purch, name, ticker)
#         if purch_price and sale_price:
#             print(round(purch_price - sale_price, 2))


# 2020/08/07 2019/12/31 Courtney, Joe ANTM
# https://finance.yahoo.com/quote/ANTM/history?period1=1596859199&period2=1597118399&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true
# https://finance.yahoo.com/quote/ANTM/history?period1=1577854799&period2=1577941199&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true
# ANTM 2020-01-01 00:00:00                

AXLL 2016-07-07 00:00:00
AXLL 2015-06-04 00:00:00
2016/07/06 2015/06/03 Perdue Jr., David A. AXLL
AM 2015-08-06 00:00:00
2019/01/09 2015/08/05 Perdue Jr., David A. AM
HYH 2016-07-12 00:00:00
HYH 2016-01-25 00:00:00
2016/07/11 2016/01/22 Perdue Jr., David A. HYH
KORS 2014-12-19 00:00:00
KORS 2014-07-08 00:00:00
2014/12/18 2014/07/07 Whitehouse, Sheldon KORS
TDDXX 2021-07-05 00:00:00
TDDXX 2020-04-30 00:00:00
2021/07/02 2020/04/29 Beyer, Donald S. TDDXX
2018/12/19 2018/12/06 Moran, Jerry LM09.SG
ENBL 2021-02-23 00:00:00
ENBL 2020-07-03 00:00:00
2021/02/22 2020/07/02 Green, Mark ENBL
CWEN.A 2020-12-08 00:00:00
CWEN.A 2020-09-17 00:00:00
2020/12/07 2020/09/16 Foxx, Virginia CWEN.A
CELG 2019-03-01 00:00:00
CELG 2014-08-20 00:00:00
2019/02/28 2014/08/19 Reed, John F. CELG
TDDXX 2021-11-03 00:00:00
TDDXX 2020-10-29 00:00:00
2021/11/02 2020/10/28 Beyer, Donald S. TDDXX
CATM 2020-12-18 00:00:00
CATM 2020-06-25 00:00:00
2020/12/17 2020/06/24 Gianforte, Greg CATM
CELG 2019-10-02 00:00:00
CELG 201

KeyboardInterrupt: 