# PHASE 3: INSIDER TRADING

In [1]:
import utils.csv_utils as csv_utils 
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils 
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official as official
import helpers.search as search
import helpers.congress as congress
import pandas as pd 
from scipy.stats.mstats import gmean

In [3]:
_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

num_of_transactions = input_df.shape[0]
num_of_house_transactions = house_input_df.shape[0]
num_of_senate_transactions = senate_input_df.shape[0]

sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

# {canonical_name_input_based : link, ...}
input_all_officials_name = {}

# {link : canonical_name_input_based, ....}
input_all_officials_link = {}
input_house_officials_link = {}
input_senate_officials_link = {}

# (canonical_name_input_based, ...)
names = set()

for _,t in input_df.iterrows():        
    name = official.get_name(t)
        
    if name not in names:    
        link = search.get_wiki_link(name)
                
        if ptr_utils.isvalid(t[constants.REPRESENTATIVE]) and link not in input_house_officials_link:
            input_house_officials_link =  dict_utils.increment_dictionary(input_house_officials_link, link, name, not_math=True)
        if ptr_utils.isvalid(t[constants.SENATOR]) and link not in input_senate_officials_link:
            input_senate_officials_link =  dict_utils.increment_dictionary(input_senate_officials_link, link, name, not_math=True)
        
        input_all_officials_link =  dict_utils.increment_dictionary(input_all_officials_link, link, name, not_math=True)
        input_all_officials_name =  dict_utils.increment_dictionary(input_all_officials_name, name, link, not_math=True)

        names.add(name)

print("==========================================================================================================================================================================")
print('len(names)')
print(len(names))

print('\n\n\n')
print('for i in names:')
for i in names:
    print(i)
    
print('\n\n\n')
print('len(input_all_officials_name)')
print(len(input_all_officials_name))

print('\n\n\n')
print('for i, k in input_all_officials_name.items():')
for i, k in input_all_officials_name.items():
    print(i, k)

print('\n\n\n')
print('len(input_all_officials_link)')
print(len(input_all_officials_link))

print('\n\n\n')
print('for i, k in input_all_officials_link.items():')
for i, k in input_all_officials_link.items():
    print(i, k)
    
print('\n\n\n')
print('len(input_house_officials_link)')
print(len(input_house_officials_link))

print('\n\n\n')
print('for i, k in input_house_officials_link.items():')
for i, k in input_house_officials_link.items():
    print(i, k)

print('\n\n\n')
print('len(input_senate_officials_link)')
print(len(input_senate_officials_link))

print('\n\n\n')
print('for i, k in input_senate_officials_link.items():')
for i, k in input_senate_officials_link.items():
    print(i, k)
print("==========================================================================================================================================================================")

# {link : (canonical_name_input_based, official_object), ... }
input_house_officials_objects = {}
for link, person in input_house_officials_link.items(): 
    off = search.wiki_search(person)        
    input_house_officials_objects[link] = (person, off)
        
# {link : (canonical_name_input_based, official_object), ... }
input_senate_officials_objects = {}
for link, person in input_senate_officials_link.items():
    off = search.wiki_search(person)        
    input_senate_officials_objects[link] = (person, off)

# {link : (canonical_name_input_based, official_object) ... }
input_officials_objects = {**input_house_officials_objects, **input_senate_officials_objects}

print("==========================================================================================================================================================================")
print('len(input_house_officials_objects)')
print(len(input_house_officials_objects))

print('for i, (k, v) in input_house_officials_objects.items():')
for i, (k, v) in input_house_officials_objects.items():
    print(i, k, v)


print('len(input_senate_officials_objects)')
print(len(input_senate_officials_objects))

print('for i, (k, v) in input_senate_officials_objects.items():')
for i, (k, v) in input_senate_officials_objects.items():
    print(i, k, v)

print('len(input_officials_objects)')
print(len(input_officials_objects))

print('for i, (k, v) in input_officials_objects.items():')
for i, (k, v) in input_officials_objects.items():
    print(i, k, v)

print("==========================================================================================================================================================================")


# {link : canonical_name_wiki_based, ... }
all_officials = congress.get_all_officials()
house_officials = congress.get_house_officials()
senate_officials = congress.get_senate_officials()

print("==========================================================================================================================================================================")
print('\n\n\n')
print('len(all_officials)')
print(len(all_officials))

print('\n\n\n')
print('for i, k in all_officials.items():')
for i, k in all_officials.items():
    print(i,k)

print('\n\n\n')
print('len(house_officials)')
print(len(house_officials))

print('\n\n\n')
print('for i, k in house_officials.items():')
for i, k in house_officials.items():
    print(i,k)

print('\n\n\n')
print('len(senate_officials)')
print(len(senate_officials))

print('\n\n\n')
print('for i, k in senate_officials.items():')
for i, k in senate_officials.items():
    print(i,k)

print("==========================================================================================================================================================================")
officials_gender = {}

for link, name in all_officials.items():
    gender = official.get_gender(name)
    officials_gender[link] = gender

print("==========================================================================================================================================================================")
print('\n\n\n')
print('len(officials_gender)')
print(len(officials_gender))

print('\n\n\n')
print('for i, k in officials_gender.items():')
for i, k in officials_gender.items():
    print(i, k)
print("==========================================================================================================================================================================")

# {'California' :  #_of_representatives_from_112_to_117, ...}
all_officials_state_count = congress.get_officials_state(everyone=list(all_officials.values()))
house_officials_state_count = congress.get_officials_state(house=list(house_officials.values()))
senate_officials_state_count = congress.get_officials_state(everyone=list(senate_officials.values()))

print("==========================================================================================================================================================================")
print('\n\n\n')

print('len(all_officials_state_count)')
print(len(all_officials_state_count))

print('\n\n\n')
print('len(house_officials_state_count)')
print(len(house_officials_state_count))

print('\n\n\n')
print('len(senate_officials_state_count)')
print(len(senate_officials_state_count))
print("==========================================================================================================================================================================")

congress_objects = []
house_officials_party = {}
senate_officials_party = {}
for i in range(112, 118):
    c = search.get_congress(i)
    congress_objects.append(c)
    house_officials_party.update(c.get_house_party())
    senate_officials_party.update(c.get_senate_party())
all_officials_party = {**house_officials_party, **senate_officials_party}

print("==========================================================================================================================================================================")
print('\n\n\n')

print("print(congress_objects)")
print(congress_objects)

print('\n\n\n')
print("len(congress_objects))")
print(len(congress_objects))
print("==========================================================================================================================================================================")

print("==========================================================================================================================================================================")
print('\n\n\n')

print("len(house_officials_party)")
print(len(house_officials_party))

print('\n\n\n')

print('for i, k in house_officials_party.items():')
for i, k in house_officials_party.items():
    print(i, k)
print("==========================================================================================================================================================================")


print("==========================================================================================================================================================================")
print('\n\n\n')

print("len(senate_officials_party)")
print(len(senate_officials_party))
print('\n\n\n')

print('for i, k in senate_officials_party.items():')
for i, k in senate_officials_party.items():
    print(i, k)
print("==========================================================================================================================================================================")


print("==========================================================================================================================================================================")
print('\n\n\n')

print("len(all_officials_party)")
print(len(all_officials_party))
print('\n\n\n')

print('for i, k in all_officials_party.items():')
for i, k in all_officials_party.items():
    print(i, k)
print("==========================================================================================================================================================================")

# {link : canonical_name_wiki_based, ... }
all_officials_not_in_input = dict(all_officials)

for link_input in input_all_officials_link.keys():
    del all_officials_not_in_input[link_input]
    
print("==========================================================================================================================================================================")
print('\n\n\n')

print(len(all_officials_not_in_input))
print("len(all_officials_not_in_input)")
print("==========================================================================================================================================================================")


print("==========================================================================================================================================================================")
print('\n\n\n')

print("for k,v in all_officials_not_in_input.items():")
for k,v in all_officials_not_in_input.items():
    print(k,v)
print("==========================================================================================================================================================================")

len(names)
221




for i in names:
Cruz, Rafael E.
Huizenga, Bill
Rutherford, John
Kelly, Mike
Suozzi, Thomas
Garcia, Mike
O'Halleran, Tom
Wagner, Ann
Manning, Kathy
Arenholz, Ashley H.
Udall, Thomas S.
Guest, Michael P.
Pallone, Frank
Diaz-Balart, Mario
Manchin III, Joseph
Scott, Robert C.
Trahan, Lori
Bennet, Michael F.
DelBene, Suzan K.
Evans, Dwight
Peters, Scott H.
Timmons, William R.
Green, Mark
Waltz, Michael
Comer, James
Keating, William R.
Feinstein, Dianne
Courtney, Joe
Daines, Steve
Dingell, Debbie
Cochran, Thad
Garbarino, Andrew
Lamborn, Doug
Conaway, K. M.
Kennedy, Joseph P.
Capito, Shelley M.
Tillis, Thomas R.
Morelle, Joseph D.
Yarmuth, John A.
Flores, Bill
Meng, Grace
Castor, Kathy
Malinowski, Tom
Sherrill, Mikie
Schneider, Bradley S.
Warren, Elizabeth
Schiff, Adam B.
Moulton, Seth
Jacobs, Christopher L.
Wittman, Robert J.
McCaul, Michael T.
Mast, Brian
Spartz, Victoria
Carper, Thomas R.
Cohen, Steve
Visclosky, Peter J.
Enzi, Michael B.
Maloney, Sean P.
Chu, Judy
Sanche

Unknown: There is either a bug in the code or you do not catch the necessary exception.

In [2]:
_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

# {canonical_name_input_based : link, ...}
input_all_officials_name = {}

# {link : canonical_name_input_based, ....}
input_all_officials_link = {}
input_house_officials_link = {}
input_senate_officials_link = {}

# (canonical_name_input_based, ...)
names = set()

for _,t in input_df.iterrows():        
    name = official.get_name(t)
        
    if name not in names:    
        link = search.get_wiki_link(name)
        
        if ptr_utils.isvalid(t[constants.REPRESENTATIVE]) and link not in input_house_officials_link:
            input_house_officials_link = dict_utils.increment_dictionary(input_house_officials_link, link, name, not_math=True)
        if ptr_utils.isvalid(t[constants.SENATOR]) and link not in input_senate_officials_link:
            input_senate_officials_link = dict_utils.increment_dictionary(input_senate_officials_link, link, name, not_math=True)
        
        input_all_officials_link = dict_utils.increment_dictionary(input_all_officials_link, link, name, not_math=True)
        input_all_officials_name = dict_utils.increment_dictionary(input_all_officials_name, name, link, not_math=True)

        names.add(name)

# {link : (canonical_name_input_based, official_object), ... }
input_house_officials_objects = {}
for link, person in input_house_officials_link.items(): 
    off = search.wiki_search(person)        
    input_house_officials_objects[link] = (person, off)
        
# {link : (canonical_name_input_based, official_object), ... }
input_senate_officials_objects = {}
for link, person in input_senate_officials_link.items():
    off = search.wiki_search(person)        
    input_senate_officials_objects[link] = (person, off)

# {link : (canonical_name_input_based, official_object) ... }
input_officials_objects = {**input_house_officials_objects, **input_senate_officials_objects}

def t_to_obj(t):
    name = official.get_name(t)
    link = input_all_officials_name[name]
    _, obj = input_officials_objects[link]
    return obj

### Committee and Sector Overlap

In [3]:
## Commitee and Industry Check
def committee_and_industry(group):
    # d = {name : { commitee : set(tickers)}, }
    d = {}
    
    for _,t in group.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            name = official.get_name(t)
            year = ptr_utils.get_year((t[constants.TDATE]))
            sector = dir_utils.search_mapping(sector_df, t[constants.TICKER], sector=True)
            obj = t_to_obj(t)
            committees = obj.asgts
            
            for comm in committees:
                comm_year = congress.get_committee_year(comm)
                if sector in comm and year in comm_year: 
                    d = dict_utils.increment_set_in_inner_dictionary(d, name, comm, t[constants.TICKER])
    return d          
    
d1 = committee_and_industry(house_input_df)
d2 = committee_and_industry(senate_input_df)
d3 = committee_and_industry(input_df)


dir = dir_utils.makesubdir(constants.path_csv, constants.INSIDER)
wd = csv_utils.make_csv_breakdown(dir, "committee_and_sector2", d1,  [])

'/Users/marinabeshai/OneDrive/Senior/Thesis/publicinterest/src/../results/csv/insider_trading/committee_and_sector2.csv'