In [1]:
import utils.csv_utils as csv_utils 
import utils.dir_utils as dir_utils
import utils.dict_utils as dict_utils 
import utils.ptr_utils as ptr_utils
import utils.constants as constants 
import helpers.official_u as official
import helpers.search_u as search
import pandas as pd 
from scipy.stats.mstats import gmean

title, input_df = dir_utils.get_data(senate=True)
sector_df = dir_utils.get_mapping(sector=True)
industry_df = dir_utils.get_mapping(industry=True)

## Transaction Date (transaction_date)

### The most popular transaction_date for each sector.

In [2]:
def transaction_date_wrt_sector():

    # d_prime = {'sector' : {'date' : #_of_transactions, ....} , 'sector2' : .... }
    d_prime = {}
    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            sector = dir_utils.search_mapping(sector_df, t[constants.TICKER], sector=True)
            
            d_prime = dict_utils.increment_dictionary_in_dictionary(d_prime, sector, ptr_utils.format_date(t[constants.TDATE]))
       
    # d = {'ticker' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)

    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

    filename = "most_popular_td_fe_sector"
    key_header = constants.SECTOR
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_sector_res = transaction_date_wrt_sector()

                   sector transaction_date  number_of_transactions
0      Financial Services       2020/04/14                      27
1                    Fund       2020/04/02                      26
2                     NaN       2015/02/13                      19
3  Communication Services       2020/04/14                      18
4              Technology       2020/04/14                      18


### The most popular transaction_date for each sector controlling for each official.

In [None]:
def transaction_date_wrt_sector_controlled():

    # d_prime = { ('ticker' : {'date' : set(people_who_traded_on_that_day) , ....} , ....}
    d_prime = {}

    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            industry = dir_utils.search_mapping(sector_df, t[constants.TICKER], sector=True)
            
            d_prime = dict_utils.increment_set_in_dictionary(d_prime, industry, ptr_utils.format_date(t[constants.TDATE]), official.get_canonical_name(t[title]))
       

    for ticker in d_prime:
        for date in d_prime[ticker]:
            d_prime[ticker][date] = len(d_prime[ticker][date])
            
    
    # d = {'ticker' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

   
    
    filename = "most_popular_td_fe_sector_controlled"
    key_header = constants.SECTOR
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_sector_controlled_res = transaction_date_wrt_sector_controlled()

### The most popular transaction_date for each industry.

In [None]:
def transaction_date_wrt_industry():
    
    # d_prime = {'industry' : {'date' : #_of_transactions, ....} , 'industry2' : .... }
    d_prime = {}
    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            industry = dir_utils.search_mapping(industry_df, t[constants.TICKER])
            
            d_prime = dict_utils.increment_dictionary_in_dictionary(d_prime, industry, ptr_utils.format_date(t[constants.TDATE]))
       
    # d = {'ticker' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)

    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

    filename = "most_popular_td_fe_industry"
    key_header = constants.INDUSTRY
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_industry_res = transaction_date_wrt_industry()

### The most popular transaction_date for each industry controlling for official.

In [None]:
def transaction_date_wrt_industry_controlled():
    
    # d_prime = { ('ticker' : {'date' : set(people_who_traded_on_that_day) , ....} , ....}
    d_prime = {}

    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            industry = dir_utils.search_mapping(industry_df, t[constants.TICKER])
            
            d_prime = dict_utils.increment_set_in_dictionary(d_prime, industry, ptr_utils.format_date(t[constants.TDATE]), official.get_canonical_name(t[title]))
       

    for ticker in d_prime:
        for date in d_prime[ticker]:
            d_prime[ticker][date] = len(d_prime[ticker][date])
            
    
    # d = {'ticker' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)
    
    
    filename = "most_popular_td_fe_industry_controlled"
    key_header = constants.INDUSTRY
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_industry_controlled_res = transaction_date_wrt_industry_controlled()

### The most popular transaction_date for each ticker. 

In [None]:
def transaction_date_wrt_ticker():
        
    # d_prime = {'ticker' : {'date' : #_of_transactions, ....} , 'ticker2' : .... }
    d_prime = {}
    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            d_prime = dict_utils.increment_dictionary_in_dictionary(d_prime, t[constants.TICKER], ptr_utils.format_date(t[constants.TDATE]))
       
    # d = {'ticker' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

    filename = "most_popular_td_fe_ticker"
    key_header = constants.TICKER
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_ticker_res = transaction_date_wrt_ticker()

### The most popular transaction_date for each ticker controlling for official.

In [None]:
def transaction_date_wrt_ticker_controlled():
    # d_prime = { ('ticker' : {'date' : set(people_who_traded_on_that_day) , ....} , ....}
    d_prime = {}

    
    for _,t in input_df.iterrows():
       d_prime = dict_utils.increment_set_in_dictionary(d_prime, t[constants.TICKER], ptr_utils.format_date(t[constants.TDATE]), official.get_canonical_name(t[title]))
       

    for ticker in d_prime:
        for date in d_prime[ticker]:
            d_prime[ticker][date] = len(d_prime[ticker][date])
            
    
    # d = {'ticker' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

    
    filename = "most_popular_td_fe_ticker_controlled"
    key_header = constants.TICKER
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_ticker_controlled_res = transaction_date_wrt_ticker_controlled()

### The most popular transaction_date for type.

In [None]:
def transaction_date_wrt_type():
        
    # d_prime = {'type' : {'date' : #_of_transactions, ....} , 'type2' : .... }
    d_prime = {}
    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            d_prime = dict_utils.increment_dictionary_in_dictionary(d_prime, t[constants.TYPE], ptr_utils.format_date(t[constants.TDATE]))
       
    # d = {'type' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

    filename = "most_popuar_td_for_type"
    key_header = constants.TYPE
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_type_res = transaction_date_wrt_type()

### The most popular transaction_date for type controlling for official. 

In [None]:
def transaction_date_wrt_type_controlled():
    
    # d_prime = { ('type' : {'date' : set(people_who_traded_on_that_day) , ....} , ....}
    d_prime = {}

    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            d_prime = dict_utils.increment_set_in_dictionary(d_prime, t[constants.TYPE], ptr_utils.format_date(t[constants.TDATE]), official.get_canonical_name(t[title]))
       

    for ticker in d_prime:
        for date in d_prime[ticker]:
            d_prime[ticker][date] = len(d_prime[ticker][date])
            
    
    # d = {'type' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)
    
    
    filename = "most_popular_td_fe_type_controlled"
    key_header = constants.TYPE
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_type_controlled_res = transaction_date_wrt_type_controlled()

### The most popular transaction_date for amount.

In [None]:
def transaction_date_wrt_amount():
    # d_prime = {'amount' : {'date' : #_of_transactions, ....} , 'amount1' : .... }
    d_prime = {}
    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            d_prime = dict_utils.increment_dictionary_in_dictionary(d_prime, t[constants.AMOUNT], ptr_utils.format_date(t[constants.TDATE]))
       
    # d = {'type' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)

    d = dict_utils.add_sort_key_for_amount(d)
    d = dict_utils.sort_dictionary_by_sort_key(d)

    filename = "most_popuar_td_for_amount"
    key_header = constants.TYPE

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv_breakdown(dir, filename, d, key_header)
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_amount_res = transaction_date_wrt_amount()

### The most popular transaction_date for amount controlling for official.

In [None]:
def transaction_date_wrt_amount_controlled():

    # d_prime = { ('amount' : {'date' : set(people_who_traded_on_that_day) , ....} , ....}
    d_prime = {}

    
    for _,t in rows:
       d_prime = dict_utils.increment_set_in_dictionary(d_prime, t[constants.AMOUNT], ptr_utils.format_date(t[constants.TDATE]), official.get_canonical_name(t[title]))
       

    for ticker in d_prime:
        for date in d_prime[ticker]:
            d_prime[ticker][date] = len(d_prime[ticker][date])
            
    
    # d = {'amount' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)

    d = dict_utils.add_sort_key_for_amount(d)
    d = dict_utils.sort_dictionary_by_sort_key(d)
    
    
    filename = "most_popular_td_fe_amount_controlled"
    key_header = constants.AMOUNT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv_breakdown(dir, filename, d, key_header                                                                                                                                                                                                                                      )
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_amount_controlled_res = transaction_date_wrt_amount_controlled()

### The most popular transaction_date for each official.

In [None]:
def transaction_date_wrt_official():

    # d_prime = {'person1' : {'date' : #_of_transactions, ....} , 'person2' : .... }
    d_prime = {}
    
    for _,t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.TICKER]):
            d_prime = dict_utils.increment_dictionary_in_dictionary(d_prime, official.get_canonical_name(t[title]), ptr_utils.format_date(t[constants.TDATE]))
       
    # d = {'person' : {'best_date' : #_of_transactions}, .... }
    d = dict_utils.flatten(d_prime)


    d = dict_utils.sort_dictionary_by_inner_values(d, reverse=True)

    filename = "most_popular_td_fe_official"
    key_header = title
    value_header = constants.TDATE
    value_header2 = constants.NUMT

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header, value_header2])
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

transaction_date_wrt_official_res = transaction_date_wrt_official()

### The average amount size of transactions (i.e., activity) for each transaction_date

In [None]:
def dates_and_size_of_amount():

    d={}
    
    for _, t in input_df.iterrows():
        if ptr_utils.isvalid(t[constants.AMOUNT]):
            
            # if 'Purchase' in transaction['type']:
                
            # if 'Sale' in transaction['type']:
            d = dict_utils.increment_dictionary_in_dictionary(d, ptr_utils.format_date(t[constants.TDATE]), ptr_utils.average_amount(t[constants.AMOUNT]))


    d2 = {}
    for date in d:
        l = []
        for amount in d[date]:   
            l.append(d[date][amount]*amount)
            
        d2[date] = l 
        
        
    for date in d2:
        d2[date] = int(gmean(d2[date]))
    
    filename = "dates_and_size_of_amount"
    key_header = "date" 
    value_header = "average_size_of_transactions"

    d2 = dict_utils.sort_dictionary_by_values(d2)
    d2 = dict_utils.commify(d2)

    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d2, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))

    return d2 

dates_and_size_of_amount_res = dates_and_size_of_amount()

### Number of Transactions per Person by Date 

In [None]:
def num_of_trans_per_person_per_date():    

    d={}

    for _, t in input_df.iterrows():
        d = dict_utils.increment_dictionary_in_dictionary(d, official.get_canonical_name(t[title]), ptr_utils.format_date(t[constants.TDATE]))

    filename = "num_of_trans_per_person_per_date"
    
    
    # some sort of error 
    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv_breakdown(dir, filename, d, title)
    print(pd.read_csv(wd).head(5))

num_of_trans_per_person_per_date()


### Number of Transactions per Date 

In [None]:
def num_of_trans_per_person_per_date():

    d={}

    for _, t in input_df.iterrows():
        d = dict_utils.increment_dictionary(d, ptr_utils.format_date(t[constants.TDATE]))
        

    filename = "num_of_trans_per_person_per_date"
    key_header = "date"
    value_header = "number_of_transactions"
    
    d = dict_utils.sort_dictionary_by_values(d)
    
    dir = dir_utils.makesubdir(constants.path_csv, constants.TDATE)
    wd = csv_utils.make_csv(dir, filename, d, [key_header, value_header])
    # fig = graph_csv(dir, path_html, filename, key_header, value_header)
    # fig.show()
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 

trans_per_person_per_date_res = num_of_trans_per_person_per_date()

### Frequency of Transactions per Date Controlled
_Number of transactions per date controlled by official. E.g. if Ted Baker made 40 transactions on 1/1/02 and Sam Wall made 2 transactions on 1/1/02, we conclude that there were two transactions on 1/1/02._


In [None]:
def num_of_trans_per_date_controlled():

    d = {}

    for _, t in input_df.iterrows():
        d = dict_utils.increment_dictionary_in_dictionary(d, ptr_utils.format_date(t[constants.TDATE]), official.get_canonical_name(t[title]))
    
    d2 = {}
    for date in d:
        d2[date] =  len(d[date])
        
    d2 = dict_utils.sort_dictionary_by_values(d2)
        
    filename = "num_of_trans_per_date_controlled"
    key_header = "date"
    value_header = "number_of_transactions_unique"


    dir = dir_utils.makesubdir(constants.path_csv, "transaction_date")
    wd = csv_utils.make_csv(dir, filename, d2, [key_header, value_header])
    # fig = graph_csv(dir, path_html, filename, key_header, value_header, scatter=True)
    # fig.show()
    df = pd.read_csv(wd)
    print(df.head(5))
    return d 
    
num_of_trans_per_date_controlled_res = num_of_trans_per_date_controlled()


### Tax (not touched.)

#### Number of Transactions Within 2 Weeks Prior to Quarterly Tax Date 

In [None]:
# i should like type of transactions @TODO. 

def num_of_trans_within_tax_date(rows):
        total = within = 0 
        
        for k,v in rows.items():  
                total += 1 
                if ptr_utils.within_tax_date(k):
                        within += v 

        print("Percent of transactions posted within two weeks of quarterly tax deadline: {percent}%".format(percent=str((within/total)*100)[:5]))
        return (within/total)*100

frac = num_of_trans_within_tax_date(trans_per_person_per_date_res)


#### Number of Transactions Within 2 Weeks Prior to Quarterly Tax Date Semi-Controlled 

_Given dict='09/03/2021': {'Thomas H Tuberville': 1, 'Cynthia M Lummis': 1, 'A. Mitchell Mcconnell, Jr.': 1}...I only incremement the number of within (tax date) once per date per official. So, if an official does 100 transactions on a date within two weeks of a quarterly deadline, then I only count it as one transaction._

_A Note: total === number of transactions per person per date (so not really all transactions) because someone could have potentially made 60 transactions on one date which we don't include in neither total or within, if applicable._

In [None]:
def num_of_trans_within_tax_date_controlled(rows):
        total = within = 0 
        people = set()

        for date, inner_dict in rows.items():
                if ptr_utils.within_tax_date(date):
                    for person in inner_dict:
                                people.add(person)
                                within += 1 
                                total += 1 
                else:
                        for person in inner_dict:
                                total += 1 
     
     
        print("Percent of transactions posted within two weeks of quarterly tax deadline: {percent}%".format(percent=str((within/total)*100)[:5]))
        return people

num_of_trans_within_tax_date_controlled_res = num_of_trans_within_tax_date_controlled(num_of_trans_per_date_controlled_res)


In [None]:
from utils import sort_dictionary_by_keys

def people_and_within_tax_date(people):        
        # todo get number of senators. 
        # todo is the monetary value of that equal!!!! 
        d = {}
        for i in people:
                d[i] = ""
                
        d = dict_utils.sort_dictionary_by_keys(d)
        
        dir = dir_utils.makesubdir(path_csv, "transaction_date/tax")
        wd = csv_utils.make_csv(dir, "people_and_within_tax_date_list", d, ["Officials"])
        df = pd.read_csv(wd)
        print("People who posted transactions within two weeks of quarterly tax deadline:\n {}\n".format(df.head(5)))

        print("Number of people who posted transactions within two weeks of quarterly tax deadline: {}\n".format(len(people)))
        
        party = {}
        for p in people:
                party = dict_utils.increment_dictionary(party, search.wiki_search(p).get_party())
                
        party = dict_utils.sort_dictionary_by_values(party)
        
        wd = csv_utils.make_csv(dir, "people_and_within_tax_date_list_w_aff", party, ["party", "number_of_filing_within_tax_date"])
        df = pd.read_csv(wd)
        print("Party breakdown of people who posted transactions within two weeks of quarterly tax deadline:\n {}\n".format(df.head(5)))

people_and_within_tax_date(num_of_trans_within_tax_date_controlled_res)

In [None]:
def people_and_within_tax_date_how_often(people):

        d = {}
        d_controlled_by_dates = {}
        
        for _, t in input_df.iterrows():
                if official.get_canonical_name(t[title]) in people and ptr_utils.within_tax_date(t[constants.TDATE]):
                        d = dict_utils.increment_dictionary(d, t[title])
                        d_controlled_by_dates = dict_utils.increment_dictionary_in_dictionary(d_controlled_by_dates, t[constants.TDATE], t[title])

        d_controlled_by_dates_res  = {}
        for date in d_controlled_by_dates:
                for person in d_controlled_by_dates[date]:
                        d_controlled_by_dates_res = dict_utils.increment_dictionary(d_controlled_by_dates_res, person)

        d = dict_utils.sort_dictionary_by_values(d)
        d_controlled_by_dates_res = dict_utils.sort_dictionary_by_values(d_controlled_by_dates_res)

        dir = dir_utils.makesubdir(constants.path_csv, "transaction_date/tax")
        wd = csv_utils.make_csv(dir, "people_and_within_tax_date_how_often", d, [title, "number_of_filing_within_tax_date"])
        df = pd.read_csv(wd)
        print("People who posted transactions within two weeks of quarterly tax deadline and the number of transactions posted:\n {}\n".format(df.head(5)))

        wd = csv_utils.make_csv(dir, "people_and_within_tax_date_how_often_date_controlled", d_controlled_by_dates_res, [title, "number_of_filing_within_tax_date_date_controlled"])
        df = pd.read_csv(wd)
        print("People who posted transactions within two weeks of quarterly tax deadline and the number of transactions posted controlled by date:\n {}\n".format(df.head(5)))

          
people_and_within_tax_date_how_often(num_of_trans_within_tax_date_controlled_res)



# Main

## Owner (owner)

## Ticker (ticker)

### Frequency of Ticker per Year

In [None]:
def frequency_of_ticker_breakdown_ticker():
    d = {}


    for _, transaction in input_df.iterrows():
        if isvalid(transaction['ticker']):
            d = increment_dictionary_in_dictionary(d, transaction['ticker'], get_year(transaction['transaction_date']))
            d = increment_dictionary_in_dictionary(d, transaction['ticker'], "Total")

    d = sort_dictionary_by_keys(d)
    
    filename = "trans_per_year_breakdown"
    key_header = "ticker"
    
    
    dir = makesubdir(path_csv, "ticker")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))

frequency_of_ticker_breakdown_ticker()

### Frequency of Ticker per Date

In [None]:
from utils import get_year, increment_dictionary_in_dictionary, path_csv
from csv_utils import make_csv_breakdown

def frequency_of_ticker_by_date():
    d = {}

    
    for _, transaction in rows:
        if isvalid(transaction['ticker']):
            d = increment_dictionary_in_dictionary(d, transaction['ticker'], transaction['transaction_date'])

    filename = "frequency_of_ticker_by_date"
    key_header = "ticker"
    
    dir = makesubdir(path_csv, "ticker")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))

frequency_of_ticker_by_date()

### Industry

#### Number of Transactions per Industry
_Not controlled in any way._

In [None]:
def number_of_transactions_per_indusry():        
    d = {}

    df = get_mapping()
    
    for _, transaction in rows:
        ticker = transaction['ticker']
        industry = search_mapping(df, ticker)
        if industry: 
            d = increment_dictionary(d, industry)

    filename = "number_of_transactions_per_indusry"
    key_header = "industry"
    value_header = "number_of_transactions"

    d = sort_dictionary_by_values(d)
    
    dir = makesubdir(path_csv, "ticker/industry")
    wd = make_csv(dir, filename, d, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))    
    
    return d 

number_of_transactions_per_indusry_res = number_of_transactions_per_indusry()

#### Industry Breakdown per Official

In [None]:
def frequency_of_industry_breakdown_official():
    d = {}

    df = get_mapping()

    for _, transaction in rows:
        industry = search_mapping(df, transaction['ticker'])
        if industry: 
            d = increment_dictionary_in_dictionary(d, get_canonical_name(transaction[title]), industry)

    d = sort_dictionary_by_keys(d)
    
    filename = "frequency_of_industry_per_official"
    dir = makesubdir(path_csv, "ticker/industry")

    wd = make_csv_breakdown(dir, filename, d, title)
    print(pd.read_csv(wd).head(2))

frequency_of_industry_breakdown_official()

#### Frequency of Industry per Year

In [None]:
def frequency_of_industry_breakdown():
    
    d = {}
    df = get_mapping()

    for _, transaction in rows:
        ticker = transaction['ticker']
        
        if isvalid(ticker):
            industry = search_mapping(df, ticker)
            if industry: 
                d = increment_dictionary_in_dictionary(d, industry, get_year(transaction['transaction_date']))

    d = sort_dictionary_by_keys(d)
    
    filename = "frequency_of_industry_breakdown"
    key_header = "industry"
    
    
    dir = makesubdir(path_csv, "ticker/industry")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))

frequency_of_industry_breakdown()

## Asset Description (asset_description)

## Asset Type (asset_type)

### Frequency of Asset Type

In [None]:
def frequency_of_asset_type():
    d = {}


    for _, transaction in rows:
        if isvalid(transaction['asset_type']):
            d = increment_dictionary(d, transaction['asset_type'])
      
    d = sort_dictionary_by_values(d)
  
    filename = "frequency_of_asset_type"
    key_header = "asset_type"
    value_header = "number_of_transactions"
    
    dir = makesubdir(path_csv, "asset_type")
    wd = make_csv(dir, filename, d, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))


frequency_of_asset_type()

## Amount

### Frequency of Amount by Person

In [None]:
def frequency_of_amount_by_persom():
    d = {}


    for _, transaction in rows:
        d = increment_dictionary_in_dictionary(d, get_canonical_name(transaction[title]), transaction['amount'])

    
    d = sort_dictionary_by_keys(d)
    
    filename = "frequency_of_amount_by_persom"
    key_header = "amount"
    
    dir = makesubdir(path_csv, "amount")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))
   
    
frequency_of_amount_by_persom()

### Frequency of Transactions by Amount

In [None]:
def frequency_of_amount_total():
    d = {}


    for _, transaction in rows:
        d = increment_dictionary(d, transaction['amount'])

    d = add_sort_key_for_amount(d, normal_header="num_of_transactions", normal=True)
    d = sort_dictionary_by_sort_key(d)

    filename = "frequency_of_amount_total"
    key_header = "amount"


    dir = makesubdir(path_csv, "amount")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))
   
    
frequency_of_amount_total()

### Frequency of Amount by Gender 

In [None]:
def frequency_of_amount_by_gender():
    d = {}


    gender = {}
    for _, transaction in rows:
        person = transaction[title]
        
        if person not in gender: 
            rep = wiki_search(person) 
            gender.update({person : rep.get_gender()})
            
        d = increment_dictionary_in_dictionary(d, transaction['amount'], gender[person])
        

    d = add_sort_key_for_amount(d)
    d = sort_dictionary_by_sort_key(d)
    
        
    filename = "frequency_of_amount_by_gender"
    key_header = "amount"
    
    
    dir = makesubdir(path_csv, "amount")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))
   
    
frequency_of_amount_by_gender()

### Frequency of Transactions by Political Affiliation and Amount

In [None]:


def frequency_of_amount_by_aff():
    d = {}


    affiliations = {}
    for _, transaction in rows:
        person = transaction[title]
        
        if person not in affiliations: 
            rep = wiki_search(person) 
            affiliations.update({person : rep.party})
            
        d = increment_dictionary_in_dictionary(d, transaction['amount'], affiliations[person])


    d = add_sort_key_for_amount(d)
    d = sort_dictionary_by_sort_key(d)
    
        
    filename = "frequency_of_amount_by_aff"
    key_header = "amount"
    
    
    dir = makesubdir(path_csv, "amount")
    wd = make_csv_breakdown(dir, filename, d, key_header)
    print(pd.read_csv(wd).head(2))
   
    
frequency_of_amount_by_aff()

### Average For Buys and Sells per Official 

In [None]:
def average_per_person():

    d={}
    
    for _, transaction in rows:
        if isvalid(transaction['amount']):
            
            # if 'Purchase' in transaction['type']:
                
            # if 'Sale' in transaction['type']:
            d = increment_dictionary_in_dictionary(d, get_canonical_name(transaction[title]), average_amount(transaction['amount']))


    d2 = {}
    for person in d:
        l = []
        for amount in d[person]:   
            l.append(d[person][amount]*amount)
            
        d2[person] = l 
        
        
    for person in d2:
        d2[person] = int(gmean(d2[person]))
    
    filename = "average_per_person"
    key_header = title 
    value_header = "average_size_of_transactions"

    d2 = sort_dictionary_by_values(d2)
    d2 = commify(d2)

    dir = makesubdir(path_csv, "amount")
    wd = make_csv(dir, filename, d2, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))

    return d2 

average_per_person_res = average_per_person()

## Types (type)

### Types of Actions Total

In [None]:
def frequency_of_act():
    d = {}

    for _, transaction in rows:
        if isvalid(transaction['type']): 
            d = increment_dictionary(d, transaction['type'])
    
    d = sort_dictionary_by_values(d)
    
    filename = "frequency_of_act"
    key_header = "type"
    value_header = "number_of_transactions"
    
    dir = makesubdir(path_csv, "type")
    wd = make_csv(dir, filename, d, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))

     
frequency_of_act()

### Types of Transactions per Person

In [None]:
from utils import increment_dictionary_in_dictionary, sort_dictionary_by_keys, get_data, path_csv,makesubdir
from csv_utils import make_csv_breakdown
import pandas as pd 
from official import get_canonical_name


def types_of_transactions_per_person():

    d={}
    
    for _, transaction in rows:
        d = increment_dictionary_in_dictionary(d, get_canonical_name(transaction[title]), transaction['type'])


    filename = "types_of_transactions_per_person"

    d = sort_dictionary_by_keys(d)

    dir = makesubdir(path_csv, "type")
    wd = make_csv_breakdown(dir, filename, d, title)
    df = pd.read_csv(wd)
    print(df.head(5))

    return d 

types_of_transactions_per_person_res = types_of_transactions_per_person()

    

## Comment (comment)

## Frequency

### Number of Transactions per Year

In [None]:

def num_of_trans_per_year():
    d={}
    
    for _, transaction in rows:
        d = increment_dictionary(d, get_year(transaction['transaction_date']))

    filename = "num_of_trans_per_year"
    key_header = "year"
    value_header = "number_of_transactions"

    d = sort_dictionary_by_values(d)
    
    dir = makesubdir(path_csv, "frequency")
    wd = make_csv(dir, filename, d, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))

    return d 

num_of_trans_per_year_res = num_of_trans_per_year()

### Number of Transactions per Person 
_Not controlling for number of years in position or size of transaction._ For each official, we want their total number of transactions.  {'Sam': 5, 'Alex': 2424, ...}

In [None]:

def num_of_trans_per_person():

    d={}
    
    for _, transaction in rows:
        d = increment_dictionary(d, get_canonical_name(transaction[title]))

    filename = "num_of_trans_per_person"
    key_header = title
    value_header = "number_of_transactions"

    d = sort_dictionary_by_values(d)
    
    dir = makesubdir(path_csv, "frequency")
    wd = make_csv(dir, filename, d, [key_header, value_header])
    df = pd.read_csv(wd)
    print(df.head(5))

    return d 

trans_per_person_res = num_of_trans_per_person()


### Number of Transactions per Person Controlled
_Divide number of transactions by number of years in official position.  Not controlling for size of transaction._

In [None]:
def num_of_trans_per_person_controlled(rows):    

    d={}

    for person, num_of_trans in rows.items():
        official = wiki_search(person)    
        years_in_office = official.get_num_of_years()
        d[person] = math.ceil(num_of_trans/years_in_office) 

    filename = "num_of_trans_per_person_controlled"
    value_header = "avg_number_of_transactions"

    d = sort_dictionary_by_values(d)
   
    dir = makesubdir(path_csv, "frequency")
    wd = make_csv(dir, filename, d, [title, value_header])
    print(pd.read_csv(wd).head(5))
    
    return d 

num_of_trans_per_person_controlled_res = num_of_trans_per_person_controlled(trans_per_person_res)
    

### Number of Transactions per Person Controlled w/Number of Years in Congress

In [None]:
def num_of_trans_per_person_controlled_w_seniority(rows):    

    d={}

    for person, num_of_trans in rows.items():
        official = wiki_search(person)    
        years_in_office = official.get_num_of_years()
        d[person] = math.ceil(num_of_trans/years_in_office), official.get_num_of_years()

    filename = "num_of_trans_per_person_controlled_w_seniority"
    value_header = "avg_number_of_transactions"

    d = sort_dictionary_by_values(d)
   
    dir = makesubdir(path_csv, "frequency")
    wd = make_csv(dir, filename, d, [title, value_header, "seniority"])
    print(pd.read_csv(wd).head(5))
    
    return d 

num_of_trans_per_person_controlled_w_seniority_res = num_of_trans_per_person_controlled_w_seniority(trans_per_person_res)
    

### Number of Transactions per Person Controlled w/Degrees

In [None]:
def num_of_trans_per_person_controlled_w_degrees(rows):    


    d={}

    for person, num_of_trans in rows.items():
        official = wiki_search(person)    
        years_in_office = official.get_num_of_years()
        d[person] = math.ceil(num_of_trans/years_in_office), official.get_num_of_degrees()

    filename = "num_of_trans_per_person_controlled_w_degrees"
    value_header = "avg_number_of_transactions"

    d = sort_dictionary_by_values(d)
   
    dir = makesubdir(path_csv, "frequency")
    wd = make_csv(dir, filename, d, [title, value_header, "num_of_degrees"])
    print(pd.read_csv(wd).head(5))
    
    return d 

num_of_trans_per_person_controlled_w_degrees_res = num_of_trans_per_person_controlled_w_degrees(trans_per_person_res)
    

### Number of Transactions per Person Controlled w/Affiliation 

In [None]:

def num_of_trans_per_person_controlled_w_aff(rows):

    d={}
    
    for person, val in rows.items():
        official = wiki_search(person)     
        d[person] = val, official.get_party()

    filename = "num_of_trans_per_person_controlled_w_aff"
    value_header = "avg_number_of_transactions"

    d = sort_dictionary_by_values(d)
        
    dir = makesubdir(path_csv, "frequency")
    wd = make_csv(dir, filename, d, [title, value_header, "party"])
    print(pd.read_csv(wd).head(5))
    
    return d, pd.read_csv(wd).head(10)

num_of_trans_per_person_controlled_w_aff_res, top_10 = num_of_trans_per_person_controlled_w_aff(num_of_trans_per_person_controlled_res)
    

### Transaction Date (transaction_date)

#### Frequency of Differences between Transaction and Disclosure Date

In [None]:
def frequency_of_differences():
    d = {}
    match = {}
    total = num = 0 

    for _, transaction in rows:
        transaction_date = transaction['transaction_date']
        disclosure_date = transaction['disclosure_date']

        # Negative, X days BEFORE
        # Positive, Y dayas AFTER
        diff = difference_between_dates(disclosure_date, transaction_date)      
        total += 1 
        num += diff   
        
        # match = {5 days: {'Tom' : 1313, 'X': 3 , ...}. ..}
        match = increment_dictionary_in_dictionary(match, diff, transaction[title])
            
            
        d = increment_dictionary(d, int(diff))
    
    d = sort_dictionary_by_keys(d)
        
    filename = "frequency_of_differences"
    key_header = "difference_in_days"
    value_header = "#_of_transactions_with_that_diff"
    
    
    dir = makesubdir(path_csv, "disclosure_date/transaction_date")
    make_csv(dir, filename, d, [key_header, value_header])
    # fig = graph_csv(dir, path_html, filename, key_header, value_header)
    # fig.show()
    
    print("Average difference in days: {}".format(num//total))

frequency_of_differences()