# PHASE 2: PROFIT

In [1]:
import utils.csv_utils as csv_utils 
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils 
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official as official
import helpers.search as search
import helpers.congress as congress
import pandas as pd 
from scipy.stats.mstats import gmean
import random 

In [2]:
_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

# {canonical_name_input_based : link, ...}
input_all_officials_name = {}

# {link : canonical_name_input_based, ....}
input_all_officials_link = {}
input_house_officials_link = {}
input_senate_officials_link = {}

# (canonical_name_input_based, ...)
names = set()

for _,t in input_df.iterrows():        
    name = official.get_name(t)
        
    if name not in names:    
        link = search.get_wiki_link(name)
        
        if ptr_utils.isvalid(t[constants.REPRESENTATIVE]) and link not in input_house_officials_link:
            input_house_officials_link = dict_utils.increment_dictionary(input_house_officials_link, link, name, not_math=True)
        if ptr_utils.isvalid(t[constants.SENATOR]) and link not in input_senate_officials_link:
            input_senate_officials_link = dict_utils.increment_dictionary(input_senate_officials_link, link, name, not_math=True)
        
        input_all_officials_link = dict_utils.increment_dictionary(input_all_officials_link, link, name, not_math=True)
        input_all_officials_name = dict_utils.increment_dictionary(input_all_officials_name, name, link, not_math=True)

        names.add(name)

# {link : (canonical_name_input_based, official_object), ... }
input_house_officials_objects = {}
for link, person in input_house_officials_link.items(): 
    off = search.wiki_search(person)        
    input_house_officials_objects[link] = (person, off)
        
# {link : (canonical_name_input_based, official_object), ... }
input_senate_officials_objects = {}
for link, person in input_senate_officials_link.items():
    off = search.wiki_search(person)        
    input_senate_officials_objects[link] = (person, off)

# {link : (canonical_name_input_based, official_object) ... }
input_officials_objects = {**input_house_officials_objects, **input_senate_officials_objects}

def t_to_obj(t):
    name = official.get_name(t)
    link = input_all_officials_name[name]
    _, obj = input_officials_objects[link]
    return obj

In [60]:
from datetime import timedelta, date
import requests
import statistics


def get_unix_timestamp(year, month, day):
    dt = datetime( year, month, day, 23, 59, 59)
    return int( dt.timestamp() )

def get_stock_price(ticker, year, month, day):
    period1 = get_unix_timestamp(int(year), int(month), int(day))
    date = datetime.strptime(str(year) + "/" + str(month) + "/" + str(day), constants.DATE_FORMAT)
    date = date + timedelta(days = 1)
    period2 = get_unix_timestamp(date.year, date.month, date.day)

    url = 'https://finance.yahoo.com/quote/{ticker}/history?period1={period1}&period2={period2}&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true'.format(ticker=ticker, period1=period1, period2=period2)
    
    # print(url)
    agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'

    headers = {'User-Agent': agent}

    response = requests.get(url, headers=headers)
    
    find = '<th class="Fw(400) Py(6px)"><span>Volume</span></th></tr></thead><tbody><tr class="BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)"><td class="Py(10px) Ta(start) Pend(10px)"><span>'
    
    if find in response.text:
        res =     response.text[ response.text.find(find) + len(find) : ] 
    else:
        return None 
    
    find = 'Close price adjusted for splits.'
    if find in res:
        res = res[ : res.find(find)]
    else:
        return None 
    
    date = open = high = low = close = adj_close = volume =  0 

    # print(res)

# </td><td class="Py(10px) Pstart(10px)"><span>25,872,900</span></td></tr></tbody><tfoot><tr class="BdT Bdc($seperatorColor) C($tertiaryColor) H(36px)"><td class="Fz(xs)" colSpan="7"><span>*

    find = '</span>'
    date = res[ : res.find(find)]
    res = res [res.find(find) + len(find) : ]
    
    
    find = '</td><td class="Py(10px) Pstart(10px)"><span>'
    res = res [res.find(find) + len(find) : ] 
    find = '</span>'
    open = float(res [ : res.find(find)])
    res = res [res.find(find) + len(find) : ]

    # print(res)
    
    find = '</td><td class="Py(10px) Pstart(10px)"><span>'
    res = res [res.find(find) + len(find) : ] 
    find = '</span>'
    high = float(res [ : res.find(find)])
    res = res [res.find(find) + len(find) : ]
    
    find = '</td><td class="Py(10px) Pstart(10px)"><span>'
    res = res [res.find(find) + len(find) : ] 
    find = '</span>'
    low = float(res [ : res.find(find)])
    res = res [res.find(find) + len(find) : ]

    find = '</td><td class="Py(10px) Pstart(10px)"><span>'
    res = res [res.find(find) + len(find) : ] 
    find = '</span>'
    close = float(res [ : res.find(find)])
    res = res [res.find(find) + len(find) : ]

    find = '(10px)"><span>'
    res = res [res.find(find) + len(find) : ] 
    find = '</span>'
    adj_close = float(res [ : res.find(find)])
    res = res [res.find(find) + len(find) : ]

    find = '</td><td class="Py(10px) Pstart(10px)"><span>'
    res = res [res.find(find) + len(find) : ] 
    find = '</span>'
    volume = float(res [ : res.find(find)].replace(",", ''))
    res = res [res.find(find) + len(find) : ]

    return statistics.mean([high, low]) 


print( get_stock_price('SNAP', 2021, 1, 20) )


53.93


### Match Purchases and Sales

In [61]:
from datetime import datetime

# def partial_match_set(s, core):
#     date, name, ticker = core 
    
#     for c in s:
#         if date in c and name in c and ticker in c:
#             _, _, _, amount =  c
#             s.remove(c)
#             return amount, s 
    
#     return 0, s  

purchases = set()
sales = set()
i = 0

for _,t in input_df.iterrows():
    if ptr_utils.isvalid(t[constants.TICKER]):
        name = official.get_name(t)
        core =  (t[constants.TDATE], name, t[constants.TICKER])
        new_amount = ptr_utils.get_gmean(t[constants.AMOUNT])
        
        if t[constants.TYPE] == 'Purchase': 
            old_amount, s = partial_match_set(purchases, core)
            s.add((t[constants.TDATE], name, t[constants.TICKER], new_amount + old_amount))
            

        elif t[constants.TYPE] == 'Sale (Full)' or t[constants.TYPE] == 'Sale (Partial)':
            old_amount, s = partial_match_set(sales, core)
            s.add((t[constants.TDATE], name, t[constants.TICKER], new_amount + old_amount))


for i in purchases: print(i)
# for (tdate_sale, name_sale, ticker_sale, amount_sale) in sales:
    # print(name_sale, ticker_sale, tdate_sale, amount_sale)
    
    
    # for (tdate_purch, name_purch, ticker_purch, amount_purch, _)  in purchases:
    #     if name_purch == name_sale and ticker_purch == ticker_sale:
    #         if datetime.strptime(tdate_sale, '%Y/%m/%d').date() > datetime.strptime(tdate_purch, '%Y/%m/%d').date():
    #             # 23501
    #             # print(tdate_sale, name_sale, ticker_sale, amount_sale, tdate_purch, name_purch, ticker_purch, amount_purch)
                
    #             # 14359
    #             # 2021/04/30 Manning, Kathy JNJ $1,001 - $15,000 2021/01/15 Manning, Kathy JNJ $1,001 - $15,000
    #             # if amount_sale == amount_purch:    
    #             #     print(tdate_sale, name_sale, ticker_sale, amount_sale, tdate_purch, name_purch, ticker_purch, amount_purch)

    #             # 9142
    #             if amount_sale != amount_purch:    
    #                 print(name_sale, ticker_sale, tdate_sale, tdate_purch, amount_purch, amount_sale,)


('2020/11/20', 'Carper, Thomas R.', 'GBIL', 6696)
('2017/09/29', 'Perdue Jr., David A.', 'MRK', 6696)
('2017/03/16', 'Murray, Patty', 'BAC', 6696)
('2016/04/13', 'Inhofe, James M.', 'C', 73576)
('2021/01/05', 'Foxx, Virginia', 'PPL', 6696)
('2020/10/13', 'Lowenthal, Alan S.', 'RUN', 6696)
('2020/02/05', 'Lee, Susie', 'QSR', 6696)
('2014/02/19', 'Whitehouse, Sheldon', 'V', 30815)
('2018/09/06', 'McConnell Jr., A. M.', 'WFC', 6696)
('2017/02/08', 'Perdue Jr., David A.', 'PFE', 6696)
('2015/08/20', 'Perdue Jr., David A.', 'DISCA', 13392)
('2021/05/20', 'Hern, Kevin', 'IQV', 20088)
('2017/03/16', 'Murray, Patty', 'SLB', 6696)
('2016/01/29', 'Perdue Jr., David A.', 'LYV', 6696)
('2021/09/14', 'Green, Mark', 'NS', 73576)
('2018/02/28', 'Perdue Jr., David A.', 'UHAL', 6696)
('2020/08/21', 'Moran, Jerry', 'INTC', 6696)
('2020/08/13', 'Frankel, Lois', 'BURL', 6696)
('2017/06/15', 'Collins, Susan M.', 'CFR', 6696)
('2020/02/24', 'Langevin, James R.', 'DIA', 6696)
('2020/08/14', 'Green, Mark', 'W