# PHASE 3: INSIDER TRADING

In [1]:
import utils.csv_utils as csv_utils
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official as official
import helpers.search as search

In [2]:
_, input_df = dir_utils.get_data(combined=True)
_, house_input_df = dir_utils.get_data(house=True)
_, senate_input_df = dir_utils.get_data(senate=True)

sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

# {canonical_name_input_based : link, ...}
input_all_officials_name = {}

# {link : canonical_name_input_based, ....}
input_all_officials_link = {}
input_house_officials_link = {}
input_senate_officials_link = {}

# (canonical_name_input_based, ...)
names = set()

for _,t in input_df.iterrows():        
    name = official.get_name(t)
        
    if name not in names:    
        link = search.get_wiki_link(name)
        
        if ptr_utils.isvalid(t[constants.REPRESENTATIVE]) and link not in input_house_officials_link:
            input_house_officials_link = dict_utils.increment_dictionary(input_house_officials_link, link, name, not_math=True)
        if ptr_utils.isvalid(t[constants.SENATOR]) and link not in input_senate_officials_link:
            input_senate_officials_link = dict_utils.increment_dictionary(input_senate_officials_link, link, name, not_math=True)
        
        input_all_officials_link = dict_utils.increment_dictionary(input_all_officials_link, link, name, not_math=True)
        input_all_officials_name = dict_utils.increment_dictionary(input_all_officials_name, name, link, not_math=True)

        names.add(name)

input_officials_objects = {}

# {link : (canonical_name_input_based, official_object), ... }
input_house_officials_objects = {}
for link, person in input_house_officials_link.items(): 
    off = search.wiki_search(person)        
    input_house_officials_objects[link] = (person, off)
    input_officials_objects[link] = (person, off)
        
# {link : (canonical_name_input_based, official_object), ... }
input_senate_officials_objects = {}
for link, person in input_senate_officials_link.items():
    off = search.wiki_search(person)        
    input_senate_officials_objects[link] = (person, off)
    input_officials_objects[link] = (person, off)
    
def t_to_obj(t):
    name = official.get_name(t)
    link = input_all_officials_name[name]
    _, obj = input_officials_objects[link]
    return obj

### Committee and Sector Overlap

In [3]:
# {'name (d-ia), commitee' : ticker, tdate}
d = {}
c = 0 

for _,t in input_df.iterrows():
    #  and  'Purchase' in t[constants.TYPE]
    if ptr_utils.isvalid(t[constants.TICKER]):
        name = official.get_name(t)
        year = str(ptr_utils.get_year((t[constants.TDATE])))
        sector = dir_utils.search_mapping(sector_df, t[constants.TICKER], sector=True)
        obj = t_to_obj(t)
        committees = obj.asgts
        
        for comm in committees:
            comm_year = official.get_committee_year(comm)                
            if sector in comm and year in comm_year: 
                c += 1 
                d = dict_utils.increment_list_in_dictionary(d, (obj.get_label() + ' ' + comm), (t[constants.TICKER]))   
                break 

print(c)

AttributeError: 'Official' object has no attribute 'asgts'

In [None]:
def committee_and_industry(group):
    # d = {name : { commitee : set(tickers)}, }
    d = {} 
    # name : freq
    freq_w_names = {}
    count = 0
    
    for _,t in group.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            name = official.get_name(t)
            year = str(ptr_utils.get_year((t[constants.TDATE])))
            sector = dir_utils.search_mapping(sector_df, t[constants.TICKER], sector=True)
            obj = t_to_obj(t)
            committees = obj.asgts
            
            for comm in committees:
                comm_year = official.get_committee_year(comm)                
                if sector in comm and year in comm_year: 
                    d = dict_utils.increment_set_in_inner_dictionary(d, name, comm, t[constants.TICKER])
                    freq_w_names = dict_utils.increment_dictionary(freq_w_names, name)
                    count += 1
                    break 

    flatten_freq_w_names = []
    for k,v in freq_w_names.items():
        link = input_all_officials_name[k]
        _, obj = input_officials_objects[link]
        flatten_freq_w_names.append([obj.get_label(), v, obj.get_color()])
        
    return d, flatten_freq_w_names, count          
    
d1, d2, c = committee_and_industry(house_input_df)
print(c)
d3, d4, c = committee_and_industry(senate_input_df)
print(c)
d5, people_doing_this, c = committee_and_industry(input_df)
print(c)


dir = dir_utils.makesubdir(constants.path_csv, constants.INSIDER)
wd = csv_utils.make_csv_breakdown(dir, "committee_and_sector_house", d1,  "")
wd = csv_utils.make_csv_breakdown(dir, "committee_and_sector_senate", d3,  "")
wd = csv_utils.make_csv_breakdown(dir, "committee_and_sector_input", d5,  "")

wd = csv_utils.make_csv_base(dir, "committee_and_sector_house_names", ["name", "count", "color"], d2)
wd = csv_utils.make_csv_base(dir, "committee_and_sector_senate_names", ["name", "count", "color"], d4)
wd = csv_utils.make_csv_base(dir, "committee_and_sector_input_names", ["name", "count", "color"], people_doing_this)

In [None]:
people = []
for k, _, _ in people_doing_this:
    k = k[ : k.find(" (")]
    people.append(k)

In [None]:
# {'25-44': 4, '45-64': 18, '65-84': 25, '85-100': 0}
ages = dict(constants.age_groups)
for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    ages = dict_utils.increment_dictionary(ages, ptr_utils.which_age_group(obj.get_age())) 

ages_normalized = {}
for k,v in ages.items():
    ages_normalized[k] = round(v/len(people), 2) 

ages = dict_utils.sort_dictionary_by_keys(ages)
ages_normalized = dict_utils.sort_dictionary_by_keys(ages_normalized)

print(ages)
print(ages_normalized)

In [None]:
# {'Republican, male': 0.47, 'Democratic, male': 0.34, 'Democratic, female': 0.11, 'Republican, female': 0.06, 'Independent, male': 0.02}
genderandparty = {}
    
for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    genderandparty = dict_utils.increment_dictionary(genderandparty,  obj.party + ", " + official.get_gender(k) ) 

genderandparty_normalized = {}
for k,v in genderandparty.items():
    genderandparty_normalized[k] = round(v/len(people), 2) 

genderandparty = dict_utils.sort_dictionary_by_keys(genderandparty)
genderandparty_normalized = dict_utils.sort_dictionary_by_keys(genderandparty_normalized)

print(genderandparty)
print(genderandparty_normalized)

In [None]:
# {'BA': 0.66, 'JD': 0.45, 'BS': 0.26, 'MBA': 0.19, 'MA': 0.17, 'MS': 0.09, 'MD': 0.04, 'AB': 0.04, 'MPP': 0.02, 'MEd': 0.02, 'DVM': 0.02, 'BEng': 0.02, 'MSc': 0.02, 'MPA': 0.02, 'BBA': 0.02}
degrees = {}
    
for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    for d in obj.get_degrees():
        degrees = dict_utils.increment_dictionary(degrees,  d) 

degrees_normalized = {}
for k,v in degrees.items():
    degrees_normalized[k] = round(v/len(people), 2) 

degrees = dict_utils.sort_dictionary_by_keys(degrees)
degrees_normalized = dict_utils.sort_dictionary_by_keys(degrees_normalized)

print(degrees)
print(degrees_normalized)

In [None]:
# {'male': 0.83, 'female': 0.17}
gender = {}
    
for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    gender = dict_utils.increment_dictionary(gender, official.get_gender(k)) 

gender_normalized = {}
for k,v in gender.items():
    gender_normalized[k] = round(v/len(people), 2) 

gender = dict_utils.sort_dictionary_by_keys(gender)
gender_normalized = dict_utils.sort_dictionary_by_keys(gender_normalized)

print(gender)
print(gender_normalized)

In [None]:
# {'Republican': 0.53, 'Democratic': 0.45, 'Independent': 0.02}
party = {}

    
for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    party = dict_utils.increment_dictionary(party,  obj.party) 

party_normalized = {}
for k,v in party.items():
    party_normalized[k] = round(v/len(people), 2) 

party = dict_utils.sort_dictionary_by_keys(party)
party_normalized = dict_utils.sort_dictionary_by_keys(party_normalized)

print(party)
print(party_normalized)

In [None]:
# {1: 0.81, 13: 0.04, 31: 0.04, 25: 0.02, 41: 0.02, 23: 0.02, 21: 0.02, 5: 0.02}
seniority = {}
    
for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    if obj.get_seniority() == 1 and len(obj.get_congress()) > 1:
        l = [] 
        if obj._senate:
            l.append(obj._senate)
        if obj._house:
            l.append(obj._house)
        if len(l) == 1 and "-" in l and l[0].split("-")[0] == l[0].split("-")[1]:
            print("hi")
        print(l)
        if len(l) == 1 and "-" not in l[0]:
            print("bye")
        # print(obj.check())
        
    seniority = dict_utils.increment_dictionary(seniority,  obj.get_seniority()) 

seniority_normalized = {}
for k,v in seniority.items():
    seniority_normalized[k] = round(v/len(people), 2) 

seniority = dict_utils.sort_dictionary_by_keys(seniority)
seniority_normalized = dict_utils.sort_dictionary_by_keys(seniority_normalized)

print(seniority)
print(seniority_normalized)

In [None]:
# {'New Jersey': 0.09, 'Michigan': 0.06, 'Texas': 0.06, 'Kansas': 0.04, 'Tennessee': 0.04, 'Alabama': 0.04, 'Colorado': 0.04, 'Rhode Island': 0.04, 'Maine': 0.04, 'Oregon': 0.04, 'Georgia': 0.04, 'North Carolina': 0.04, 'California': 0.04, 'Louisiana': 0.02, 'North Dakota': 0.02, 'Washington': 0.02, 'West Virginia': 0.02, 'Delaware': 0.02, 'Ohio': 0.02, 'Virginia': 0.02, 'Missouri': 0.02, 'Minnesota': 0.02, 'Iowa': 0.02, 'New York': 0.02, 'Montana': 0.02, 'Vermont': 0.02, 'Utah': 0.02, 'South Carolina': 0.02, 'Indiana': 0.02, 'Massachusetts': 0.02}
state = {}

for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    state = dict_utils.increment_dictionary(state,  obj.state) 

state_normalized = {}
for k,v in state.items():
    state_normalized[k] = round(v/len(people), 2) 

state = dict_utils.sort_dictionary_by_values(state)
state_normalized = dict_utils.sort_dictionary_by_values(state_normalized)

print(state)
print(state_normalized)

In [None]:
# {'1, male': 0.68, 
#  '1, female': 0.13, 
#  '13, male': 0.04, 
#  '31, male': 0.04, 
#  '25, male': 0.02, 
#  '41, male': 0.02, 
#  '23, female': 0.02, 
#  '21, female': 0.02, 
#  '5, male': 0.02}
seniority_gender = {}

for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]    
    seniority_gender = dict_utils.increment_dictionary(seniority_gender,  str(obj.get_seniority()) + ", " + official.get_gender(k) ) 

seniority_gender_normalized = {}
for k,v in seniority_gender.items():
    seniority_gender_normalized[k] = round(v/len(people), 2) 

seniority_gender = dict_utils.sort_dictionary_by_values(seniority_gender)
seniority_gender_normalized = dict_utils.sort_dictionary_by_values(seniority_gender_normalized)

print(seniority_gender)
print(seniority_gender_normalized)

In [None]:
# where they buys/purchases/sales/ etc?!?!??!??!?!?!? 

In [None]:
a = {}

for k in people:
    link = input_all_officials_name[k]
    _, obj = input_officials_objects[link]
    
    for asgt in obj.asgts:
        asgt = asgt[ : asgt.find(" (")]
        a = dict_utils.increment_dictionary(a, asgt)

for k,v in a.items():
    a[k] = round(v/len(people), 2) 
print(dict_utils.sort_dictionary_by_values(a))


# print(dict_utils.sort_dictionary_by_values(a))


(buy - sale)/(sale)


# LATEX IT
# for k, v in (dict_utils.sort_dictionary_by_keys(d)).items():
#     print("\item " + k)
#     print("\\begin{itemize}")
#     print("\item ", end="" )
#     # ticker = [] 
#     for item in sorted(set(v)): 
#         print(item + ", ",  end="")
#     print("    \end{itemize}")