# PHASE 2: PROFIT

In [None]:
import utils.csv_utils as csv_utils 
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils 
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official as official
import helpers.search as search
import helpers.congress as congress
import pandas as pd 
from scipy.stats.mstats import gmean
import random 

In [None]:
_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

# {canonical_name_input_based : link, ...}
input_all_officials_name = {}

# {link : canonical_name_input_based, ....}
input_all_officials_link = {}
input_house_officials_link = {}
input_senate_officials_link = {}

# (canonical_name_input_based, ...)
names = set()

for _,t in input_df.iterrows():        
    name = official.get_name(t)
        
    if name not in names:    
        link = search.get_wiki_link(name)
        
        if ptr_utils.isvalid(t[constants.REPRESENTATIVE]) and link not in input_house_officials_link:
            input_house_officials_link = dict_utils.increment_dictionary(input_house_officials_link, link, name, not_math=True)
        if ptr_utils.isvalid(t[constants.SENATOR]) and link not in input_senate_officials_link:
            input_senate_officials_link = dict_utils.increment_dictionary(input_senate_officials_link, link, name, not_math=True)
        
        input_all_officials_link = dict_utils.increment_dictionary(input_all_officials_link, link, name, not_math=True)
        input_all_officials_name = dict_utils.increment_dictionary(input_all_officials_name, name, link, not_math=True)

        names.add(name)

# {link : (canonical_name_input_based, official_object), ... }
input_house_officials_objects = {}
for link, person in input_house_officials_link.items(): 
    off = search.wiki_search(person)        
    input_house_officials_objects[link] = (person, off)
        
# {link : (canonical_name_input_based, official_object), ... }
input_senate_officials_objects = {}
for link, person in input_senate_officials_link.items():
    off = search.wiki_search(person)        
    input_senate_officials_objects[link] = (person, off)

# {link : (canonical_name_input_based, official_object) ... }
input_officials_objects = {**input_house_officials_objects, **input_senate_officials_objects}

def t_to_obj(t):
    name = official.get_name(t)
    link = input_all_officials_name[name]
    _, obj = input_officials_objects[link]
    return obj

### Match Purchases and Sales

In [28]:
from datetime import datetime

# (transaction_date, ticker)
# ('2021/12/21', 'PII'), ('2021/12/21', 'PFE')
purchases = set()
sale = set()
i = 0

for _,t in input_df.iterrows():
    if ptr_utils.isvalid(t[constants.TICKER]):
        name = official.get_name(t)
        core =  (t[constants.TDATE], name, t[constants.TICKER], ptr_utils.get_gmean(t[constants.AMOUNT]))
        
        if t[constants.TYPE] == 'Purchase': 
            if core in purchases:
                purchases.remove(core)
                core =  (t[constants.TDATE], name, t[constants.TICKER], ptr_utils.get_gmean(t[constants.AMOUNT]) * 2)
            purchases.add(core)
            
                
        elif t[constants.TYPE] == 'Sale (Full)' or t[constants.TYPE] == 'Sale (Partial)':
            if core in sale:
                sale.remove(core)
                core =  (t[constants.TDATE], name, t[constants.TICKER], ptr_utils.get_gmean(t[constants.AMOUNT]) * 2)
            sale.add(core)

        
for (tdate_sale, name_sale, ticker_sale, amount_sale) in sale:
    print(name_sale, ticker_sale, tdate_sale, amount_sale)
    
    
    # for (tdate_purch, name_purch, ticker_purch, amount_purch, _)  in purchases:
    #     if name_purch == name_sale and ticker_purch == ticker_sale:
    #         if datetime.strptime(tdate_sale, '%Y/%m/%d').date() > datetime.strptime(tdate_purch, '%Y/%m/%d').date():
    #             # 23501
    #             # print(tdate_sale, name_sale, ticker_sale, amount_sale, tdate_purch, name_purch, ticker_purch, amount_purch)
                
    #             # 14359
    #             # 2021/04/30 Manning, Kathy JNJ $1,001 - $15,000 2021/01/15 Manning, Kathy JNJ $1,001 - $15,000
    #             # if amount_sale == amount_purch:    
    #             #     print(tdate_sale, name_sale, ticker_sale, amount_sale, tdate_purch, name_purch, ticker_purch, amount_purch)

    #             # 9142
    #             if amount_sale != amount_purch:    
    #                 print(name_sale, ticker_sale, tdate_sale, tdate_purch, amount_purch, amount_sale,)


Whitehouse, Sheldon APA 2014/09/04 73576
Cisneros, Gilbert HRB 2020/04/29 6696
Cassidy, William YACKX 2016/03/29 6696
Perdue Jr., David A. AAN 2015/08/25 6696
Collins, Susan M. VR 2014/05/07 6696
Brooks, Mo CDNS 2020/01/02 6696
Whitehouse, Sheldon IBM 2019/07/05 13392
King Jr., Angus S. AXP 2019/01/10 6696
Malinowski, Tom DASH 2020/12/22 6696
Inhofe, James M. ST 2018/06/28 73576
Perdue Jr., David A. AAPL 2019/10/28 30815
Garbarino, Andrew ORMP 2021/10/01 6696
Crenshaw, Daniel TSLA 2021/12/29 6696
Lee, Susie O 2021/02/25 6696
Perdue Jr., David A. GM 2017/10/02 6696
Cisneros, Gilbert VCYT 2020/10/29 6696
Courtney, Joe AAPL 2020/08/07 6696
Loeffler, Kelly BMY 2020/04/14 367879
Lowenthal, Alan S. OLED 2020/10/19 6696
Frankel, Lois BAX 2020/05/27 6696
Moran, Jerry CVS 2019/09/25 6696
Carper, Thomas R. HASI 2020/07/09 6696
Perdue Jr., David A. PFE 2018/11/19 13392
Perdue Jr., David A. LNG 2020/04/15 30815
Booker, Cory A. IRM 2014/08/08 13392
Gottheimer, Josh LRCX 2021/11/08 6696
Lowenthal, A