In [37]:
import pandas as pd
import numpy as np
import requests as req
import sys
import time

from datetime import datetime
from datetime import timedelta

# change the following filepath, file, and key name to your own
sys.path.insert(0, '../../API_Keys/')
from vantage_api import premium_av_key

import pickle

from lists import error_list2 #this serves as a way to import the list of bad ticker names if one
                                #doesn't have time to run the whole notebook.

# Function to Pull Stock Data and Create Error List
This function pulls stock data and creates a list of tickers that can't be found in Alpha Vantage's API, to use for further cleaning. This means that the stock data and the error list must each be pulled from the the result of the function after running.

In [38]:
#below, be sure to change the name of 'apikey' to your key's name

def pull_all_stocks(df):
    '''
    Takes a dateframe of congress people w/ their positions in various stocks and returns all
    the time series data of relevant stocks.
    '''
    stock_dict = {}
    error_list = []
    for ticker in list(df['ticker'].unique()):
        json = req.get(url = "https://www.alphavantage.co/query",
                  params = {
                      'function': 'TIME_SERIES_DAILY_ADJUSTED',
                      'outputsize': 'full',
                      'symbol': ticker,
#--------------------->
                      'apikey':premium_av_key #<-----CHANGE KEY NAME HERE
                  }).json()
        try:
            ticker_df = pd.DataFrame(json['Time Series (Daily)']).T
            ticker_df.drop(columns = ['1. open', '2. high', '3. low', '4. close', '7. dividend amount', '8. split coefficient'], inplace = True)
            ticker_df.rename(columns = {'5. adjusted close': 'adj_close',
                                        '6. volume': 'volume'
                                       }, inplace = True)
            if ticker_df.shape[0] > 2500:
                ticker_df = ticker_df.iloc[range(2500)] #Data beyond 2012 is unnecessary, this helps reduce file size
            stock_dict[f'{ticker}'] = ticker_df.sort_index()
        except:
            error_list.append(ticker)
            print(f'Ticker {ticker} appears to be missing "Time Series (Daily)" or another string used to call data.')
            pass
        time.sleep(0.8)
    return stock_dict, error_list

# Importing Data

The data we use throughout our analysis is available at [this link](https://drive.google.com/drive/folders/1LG5bKuLBJXHF2HL9jAuEtyCQbmk8m3lS?usp=sharing). 

To replicate the analysis, please download the files from the `'data'` folder at the above link and put it into the `'data'` folder in this repository on your computer.

In [39]:
reps = pd.read_csv('data/cleaned_complete_congress_data.csv')
reps.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
1,2022-05-31,X,"United States Steel Corporation <div class=""te...",sale_partial,50000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
2,2022-05-31,X,"United States Steel Corporation <div class=""te...",sale_full,50000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL


# Additional Cleaning
We examined the value counts of the years in which these transactions were happening and decided to drop 2012 and 2013 because they had so many fewer data points. We project sales for which we don't have purchases in the records back to the first date of our records, and as it seems likely that this is incomplete data for those years, for now, we're going to start in 2014.

In [40]:
#add a transaction year

reps['transaction_year'] = reps['transaction_date'].map(lambda x: x[:4])

reps['transaction_year'].value_counts()

2020    7690
2021    5249
2019    1784
2022    1762
2018    1147
2017    1116
2015     903
2016     795
2014     594
2013       5
2012       2
Name: transaction_year, dtype: int64

In [41]:
reps = reps[(reps['transaction_year'] != '2012') & (reps['transaction_year'] != '2013')]
reps['transaction_year'].value_counts()

2020    7690
2021    5249
2019    1784
2022    1762
2018    1147
2017    1116
2015     903
2016     795
2014     594
Name: transaction_year, dtype: int64

At this point, we dropped the transaction year, as we don't anticipate needing that again.

In [42]:
reps.drop(columns = ['transaction_year'], inplace = True)

A little EDA on transaction dates.

In [43]:
reps['transaction_date'].value_counts()[:20]

2020-03-18    305
2019-06-24    208
2020-04-02    186
2021-02-16    184
2020-04-14    162
2020-02-20    136
2020-04-07    134
2020-11-13    133
2021-02-11    119
2020-11-05    107
2015-02-13     99
2020-10-22     97
2020-03-23     97
2020-11-20     89
2017-06-15     87
2020-01-13     84
2020-11-19     83
2020-03-27     80
2017-03-16     79
2020-04-29     78
Name: transaction_date, dtype: int64

# Creating Data Frame to For Feature Engineering

In [44]:
data = reps.copy()
data.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
1,2022-05-31,X,"United States Steel Corporation <div class=""te...",sale_partial,50000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
2,2022-05-31,X,"United States Steel Corporation <div class=""te...",sale_full,50000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL


In [45]:
data.shape

(21040, 14)

# Saved Stock and Bad `'ticker'`Information
Running the stock function takes a long time. In order to save time and enable offline work, we saved our `stocks` data and an `error_list` of bad tickers from the dataframes. Both are available as pickle files. The stock data is also saved as a csv, `stocks.csv`. The error list is also in the `error_list2` that was imported at the top of this notebook. Please note that if you open `stocks.csv`, the data in the second row won't be immediately visible, but it's there if you expand the cells.

In order to use the pickled files, you should please pull them from the `'pickled_files'` folder available at [this link](https://drive.google.com/drive/folders/1LG5bKuLBJXHF2HL9jAuEtyCQbmk8m3lS?usp=sharing), the same one that has the data files used for our analysis.

In [44]:
# to import the pickeld stocks file

with open('stocks_file', 'rb') as f:
    stocks2 = pickle.load(f)

In [59]:
# to import the pickled errors_list file
with open('errors_file', 'rb') as f:
    error_list2 = pickle.load(f)

If you do the above, please run the below lines, as well, as the rest of the code refers to '`stocks`' and `'error_list2'`.

In [None]:
stocks = stocks2

In [29]:
error_list = error_list2

# Running Function to Pull Stock Data
## NOTE: this function is very time consuming to run.

In [46]:
t0 = time.time()
stocks = pull_all_stocks(data)
print("Time to run", time.time()-t0) 

Ticker PHLD appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker FB appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker RDSA appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker CNR appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker TRIT appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker FEYE appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker BLL appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker ANTM appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker AGN appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker CBS appears to be missing "Time Series (Daily)" or another string used to call data.
Ticker 3V64.TI appears to be missing "Time Series (Daily)" or another string

In [47]:
#pull the error_list out of the result

error_list = stocks[1]
print(error_list)

['PHLD', 'FB', 'RDSA', 'CNR', 'TRIT', 'FEYE', 'BLL', 'ANTM', 'AGN', 'CBS', '3V64.TI', '0QZI.IL', 'MYL', 'RDSA.AS', 'RHT', 'TOT', 'LM09.SG', 'APC', 'IBM.MX', 'ZIOP', 'NPSND', 'SCG', 'CSI', 'ZNGA.SW', 'LB', 'NEE-PC', 'TMK', 'DPM', 'SYY.SG', 'ADT.F', 'SPY160219P00180000', 'LTD', 'ESV', 'CPN', 'CVC', 'LINE', 'SXL', 'GAS', 'SBUX.SW', 'BUNT-RP', 'XLS-WI', 'TF', 'CV', 'JCOM', 'PLT', 'MFRM', 'CMN', 'BMR', 'AKRX', 'ACE', 'COH', 'COF-PP', 'MDP', 'LGP', 'BTE', 'HUB-B', 'PGH', 'NTI', 'HFC', 'VRX', 'LDR', 'CLNY', 'WIN', 'JPJQL', 'VAR', 'BRKS', 'FXM', 'FLX', 'APPL', 'APCD', 'LCRX', 'LINK-USD', 'RLGY', 'ADDDYY', 'ADS', 'IDXXX', 'RDS.B', 'FNRVGX', 'BXS', 'LBDAV', 'CTRCF', 'CRYX', 'AXHE', 'AXTE', 'KRR', 'DUK$A', 'RF$A', 'WRK.V', 'WFC$V', 'ALFY', 'BPHSPX', 'CRY', 'SPDR', 'IQVA', 'MGNA', 'MSBHY', 'RDS.A', 'HZD', 'GSS', 'UHG', 'BTC-USD', 'ETWO.W', 'GFN', 'SPYS', 'CWEN.A', 'DESY', 'BF.B', 'AAM$A', 'COF-CD', 'HTA', 'EBJ', 'DOGE-USD', 'ETH-USD', 'KRYLL-USD', 'KYRLL-USD', 'LGB-USD', 'REQUEST-USD', 'SOL-USD', 

In [48]:
#pull the stock data out of the result

stocks = stocks[0]

## Pickling the Stock and Error List Data
We pickled the file for efficiency. That way one can reopen the pickled file and not need to run the function again, which is time consuming (and also requires internet connectivity).  [This site](https://www.geeksforgeeks.org/save-a-dictionary-to-a-file/) walked us through the steps to pickle a file. Wee used pd.to_csv() but opted to use this new approach to practice what we learned in class. The downside is the data is less available (i.e. can't be opened in Excel). The upside is we practiced this approach that can be used in other situations. We'll `.to_csv()` as well.

In [49]:
try:
    stocks_file = open('stocks_file', 'wb')
    pickle.dump(stocks, stocks_file)
    stocks_file.close()
  
except:
    print("Something went wrong")

In [50]:
error_list

['PHLD',
 'FB',
 'RDSA',
 'CNR',
 'TRIT',
 'FEYE',
 'BLL',
 'ANTM',
 'AGN',
 'CBS',
 '3V64.TI',
 '0QZI.IL',
 'MYL',
 'RDSA.AS',
 'RHT',
 'TOT',
 'LM09.SG',
 'APC',
 'IBM.MX',
 'ZIOP',
 'NPSND',
 'SCG',
 'CSI',
 'ZNGA.SW',
 'LB',
 'NEE-PC',
 'TMK',
 'DPM',
 'SYY.SG',
 'ADT.F',
 'SPY160219P00180000',
 'LTD',
 'ESV',
 'CPN',
 'CVC',
 'LINE',
 'SXL',
 'GAS',
 'SBUX.SW',
 'BUNT-RP',
 'XLS-WI',
 'TF',
 'CV',
 'JCOM',
 'PLT',
 'MFRM',
 'CMN',
 'BMR',
 'AKRX',
 'ACE',
 'COH',
 'COF-PP',
 'MDP',
 'LGP',
 'BTE',
 'HUB-B',
 'PGH',
 'NTI',
 'HFC',
 'VRX',
 'LDR',
 'CLNY',
 'WIN',
 'JPJQL',
 'VAR',
 'BRKS',
 'FXM',
 'FLX',
 'APPL',
 'APCD',
 'LCRX',
 'LINK-USD',
 'RLGY',
 'ADDDYY',
 'ADS',
 'IDXXX',
 'RDS.B',
 'FNRVGX',
 'BXS',
 'LBDAV',
 'CTRCF',
 'CRYX',
 'AXHE',
 'AXTE',
 'KRR',
 'DUK$A',
 'RF$A',
 'WRK.V',
 'WFC$V',
 'ALFY',
 'BPHSPX',
 'CRY',
 'SPDR',
 'IQVA',
 'MGNA',
 'MSBHY',
 'RDS.A',
 'HZD',
 'GSS',
 'UHG',
 'BTC-USD',
 'ETWO.W',
 'GFN',
 'SPYS',
 'CWEN.A',
 'DESY',
 'BF.B',
 'AAM$A',
 'C

In [51]:
try:
    errors_file = open('errors_file', 'wb')
    pickle.dump(error_list, errors_file)
    errors_file.close()
  
except:
    print("Something went wrong")

We tested the pickle out, verifying it works, running the following, opening the code, and comparing stocks2 to stocks. They matched. We eliminated the tests, themselves, for space.

# Creating `'stocks.csv'` As An Alternative Way to Import That Data

In [52]:
stocksdf = pd.DataFrame([stocks])

In [53]:
stocksdf.head()

Unnamed: 0,WFC,X,PYPL,ABYIX,GSMYX,FCPIX,JCI,LUBYX,IVV,GLD,...,AMCR,EVRG,MGDDY,PWCDF,DNBBY,TRI,UGI,XYIGY,OWCIX,JFAMX
0,adj_close volume 2012-08-21...,adj_close volume 2012-08-21...,adj_close volume 2015-07-20 ...,adj_close volume 2014-07-03 8...,adj_close volume 2012-08-20 6...,adj_close volume 2012-08-20 1...,adj_close volume 2012-08-21 ...,adj_close volume 2016-10-27 9...,adj_close volume 2012-08-21 ...,adj_close volume 2012-08-21 1...,...,adj_close volume 2019-06-1...,adj_close volume 2018-06-04...,adj_close volume 2012-08-10 ...,adj_close volume 2012-08-20 1...,adj_close volume 2021-07-06 ...,adj_close volume 2012-08-21 ...,adj_close volume 2012-08-21 ...,adj_close volume 2012-08-15 5...,adj_close volume 2020-10-14 9...,adj_close volume 2012-08-20 1...


In [54]:
stocksdf.to_csv('data/stocks.csv', index = False)

Double checking data types

# Correcting Some `'ticker'` Errors
A couple of tickers from the error_list jumped out at us as being likely to be transcription errors. We checked and fixed them here.

In [55]:
data[data['ticker'] == 'DUKE']

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
13777,2022-03-18,DUKE,Duke Energy Corp Fxd to 09/2024 thraftr 4.875%,purchase,15000,Mo Brooks,2022-03-21,house,Mo,Brooks,Republican,1954-04-29,M,AL05


In [56]:
data.loc[data['ticker'] == 'DUKE', 'ticker'] = 'DUK'

In [57]:
data[data['ticker'] == 'DUKE']

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents


In [58]:
data[data['ticker'] == 'DUK'].head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
181,2022-01-21,DUK,Duke Energy Corporation (Holding Company) Comm...,sale_full,50000,Thomas H Tuberville,2022-02-14,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
3769,2018-04-17,DUK,Duke Energy Corporation,sale_full,50000,David A Perdue,2018-05-01,senate,David,Perdue,Republican,1949-12-10,M,GA
4052,2017-12-26,DUK,Duke Energy Corporation,purchase,15000,David A Perdue,2018-01-04,senate,David,Perdue,Republican,1949-12-10,M,GA
4693,2017-06-15,DUK,Duke Energy Corporation,sale_full,15000,Patty Murray,2017-06-22,senate,Patty,Murray,Democrat,1950-10-11,F,WA
4953,2017-03-16,DUK,Duke Energy Corporation,purchase,15000,Patty Murray,2017-03-21,senate,Patty,Murray,Democrat,1950-10-11,F,WA


In [59]:
data[data['ticker'] == 'APPL']

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
7907,2020-06-26,APPL,Apple Inc. This transaction was reported late ...,sale_full,15000,Sean Patrick Maloney,2021-04-02,house,Sean,Maloney,Democrat,1966-07-30,M,NY18
10165,2021-03-04,APPL,Apple Inc,purchase,15000,James R. Langevin,2021-04-09,house,James,Langevin,Democrat,1964-04-22,M,RI02
10166,2021-03-09,APPL,Apple Inc,sale_full,15000,James R. Langevin,2021-04-09,house,James,Langevin,Democrat,1964-04-22,M,RI02
10167,2021-03-08,APPL,APPLE INC,purchase,15000,James R. Langevin,2021-04-09,house,James,Langevin,Democrat,1964-04-22,M,RI02
12915,2021-03-31,APPL,Apple Inc,sale_full,15000,None Ashley Hinson Arenholz,2021-04-05,house,,Hinson,Republican,1983-06-27,F,IA01
13738,2020-09-10,APPL,"Apple, Inc.",sale_full,15000,Francis Rooney,2020-10-05,house,Francis,Rooney,Republican,1953-12-04,M,FL19
13928,2020-11-19,APPL,Apple,purchase,50000,Kenny Marchant,2020-12-29,house,Kenny,Marchant,Republican,1951-02-23,M,TX24
14005,2020-11-19,APPL,Apple,purchase,50000,Kenny Marchant,2020-12-29,house,Kenny,Marchant,Republican,1951-02-23,M,TX24
16586,2020-01-02,APPL,Apple Inc,sale_full,50000,Adam B. Schiff,2020-01-08,house,Adam,Schiff,Democrat,1960-06-22,M,CA28


In [60]:
data.loc[data['ticker'] == 'APPL', 'ticker'] = 'AAPL'

In [61]:
data[data['ticker'] == 'APPL']

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents


In [62]:
data[data['ticker'] == 'AAPL'].head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
109,2022-02-24,AAPL,Apple Inc. -,sale_partial,100000,Ron L Wyden,2022-03-23,senate,Ron,Wyden,Democrat,1949-05-03,M,OR
110,2022-02-24,AAPL,Apple Inc. -,purchase,100000,Ron L Wyden,2022-03-23,senate,Ron,Wyden,Democrat,1949-05-03,M,OR
116,2022-02-24,AAPL,Apple Inc. -,sale_partial,15000,Shelley M Capito,2022-03-11,senate,Shelley,Capito,Republican,1953-11-26,F,WV
205,2022-01-14,AAPL,Apple Inc. -,sale_full,50000,Thomas H Tuberville,2022-02-14,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
263,2022-01-18,AAPL,Apple Inc. -,sale_partial,15000,Thomas R Carper,2022-02-03,senate,Thomas,Carper,Democrat,1947-01-23,M,DE


# Drop Remaining Ticker Errors
This eliminates about 4% of the data.

In [63]:
data.shape

(21040, 14)

In [64]:
len(error_list)

187

In [65]:
data[data['ticker'] == 'PHLD']

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
139,2022-02-18,PHLD,PHLD - Phillips Edison Grocery Center REIT I,sale_full,50000,Patrick J Toomey,2022-03-04,senate,Patrick,Toomey,Republican,1961-11-17,M,PA
140,2022-01-20,PHLD,PHLD - Phillips Edison Grocery Center REIT I,sale_partial,50000,Patrick J Toomey,2022-03-04,senate,Patrick,Toomey,Republican,1961-11-17,M,PA
1068,2019-07-31,PHLD,PHLD - Phillips Edison Grocery Center REIT I,sale_partial,15000,Patrick J Toomey,2020-08-13,senate,Patrick,Toomey,Republican,1961-11-17,M,PA
4102,2017-08-03,PHLD,PHILLIPS EDISON GR,purchase,100000,Patrick J Toomey,2017-12-21,senate,Patrick,Toomey,Republican,1961-11-17,M,PA


In [66]:
for name in error_list:
    data = data[data['ticker'] != name]

In [67]:
data[data['ticker'] == 'PHLD']

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents


In [68]:
data.shape

(20261, 14)

In [69]:
21040-20261

779

In [70]:
(21040-20261)/21040

0.03702471482889734

# Pulling Stock Prices, and Some Assumptions We Made

## Partial Sales
We eliminated partial sales from our data set. We were able to sketch out methods for including them, but they were fairly complex because they require tracking an estimate for the total number of shares a given legislator holds of any one equity on any given day, and we didn't have time to implement it before our deadline.

## Transaction Date = Day Market Was Closed
Congresspeople sometimes entered transaction dates for which the market was closed. We presumed that means they entered the trade with their broker (or otherwise) on that date, and that it would be executed the next trading day. Our code advances the date from the transaction date until the first trading day is found.

In some cases, transactions were entered months or years before a ticker was on the stock market. Presumably, this was a clerical error of some kind. For those trades, we advanced the start date until we found the first date the stock was trading.

[This site](https://docs.python.org/2/library/datetime.html#datetime.timedeltahttps://docs.python.org/2/library/datetime.html#datetime.timedelta) showed us timedelta, which helped with advancing the dates.

[This site](https://www.geeksforgeeks.org/python-iterating-through-a-range-of-dates/) showed us the date_range method.

## Start and End Dates
We matched purchases with sales that occurred after them, all of which were full sales. We identified them with the `'purchase-sale'` column. Ultimately, we eliminated the transaction with the corresponding sale. For these transactions, accounted for in the purchase, we used the first trading day after the purchase transaction date as the start date and the first trading day after the sale transaction date as the end date.

(**Note:** In full disclosure, this is one place where we didn't have time to sanity check the results as much as we would have liked. We also didn't have a chance to write could that would check if any congresspeople went in and out of the same stock and make sure that was accounted for with multiple remaining transactions.)

For sales unassociated with a purchase, we treated the first day of our analysis, 2014-01-02, or the first day the stock was traded after 2014-01-02 as the start date. We used a variable `'start_date'` near the beginning of our code set to find the first date in the `'reps'` data brought in at the beginning of this notebook. This variable is overwritten several times, but makes the code adaptable to new datasets.

For purchases unassociated with a sale, we treated the last day of our analysis, a variable 'the_final_date' set to 2022-07-15, as the end date.

(**Note:** Some stocks stopped trading before our end date. With more time we could have written code to account for that. As we ran out of time, we had to eliminate these transactions as they created errors later in our analysis. This appears to be 247 transactions getting lost, a relatively small number.)

## `'the_final_date'`
In order to make our code more versatile, we established a variable, `'the_final_date'`, to allow easy adaptation if someone would like to run an analysis on newer data. It was manually set to 2022-07-15 based on the date the disclosure data was pulled.

## Start and End Prices
These are the prices associated with the start and end dates for each transaction.

## Shares
Number of shares is estimated from the `'amount'` and either the start or end price, based on whether the transaction was a purchase or a sale respectively.

## Start and End Values
For a purchase, the start value is equal to the amount. The end value was estimated by multiplying the number of shares by the end price.

For a sale, the end value is equal to the amount. The start value was estimated by multiplying the number of shares by the start price.

In [71]:
# change this date if you use data pulled at a different time.

the_final_date = '2022-07-15'

In [72]:
data.shape

(20261, 14)

In [73]:
#DROPPING PARTIAL SALES HERE

data = data[data['type'] != 'sale_partial']

In [74]:
#establishing columns for pruchase-sale, start_date, end_date, start_price, end_price,
#shares, start_value, and end_value,

data.loc[:, 'purchase-sale'] = 0
data.loc[:, 'start_date'] = ''
data.loc[:, 'end_date'] = ''
data.loc[:, 'start_price'] = 0.0
data.loc[:, 'end_price'] = 0.0
data.loc[:, 'shares'] = 0.0
data.loc[:, 'start_value'] = 0.0
data.loc[:, 'end_value'] = 0.0

#NOTE: This (is one of many places that) throws copy warnings. We think the way around these warnings
#would be to make an array of 0s of the same length as the dataframe, 
#then concatenate those, but we ran out of time to experiment with that.

In [75]:
#establishing all the dates we cover in this period

start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime(the_final_date)+timedelta(days = 1) #advancing the final date a day ensures the final date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]
date_list = [str(date)[:10] for date in date_list]

# creating columns for each and filling them in with 0.0 to start

for i in range(len(date_list)):
    data.loc[:, date_list[i]] = 0.0

  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, date_list[i]] = 0.0
  data.loc[:, 

In [76]:
data.shape

(16511, 3139)

In [77]:
#this code finds and fills in the start and end dates and the start and end prices

t0 = time.time()

to_drop = [] #making a list of rows that are sales accounted for in purchase-sale rows
missing_prices = [] #identifying indices that are missing prices at start, end, or both

for i in data.index:
    print(i) #this takes a long time to run and seeing the index advance helps to track it
    
    #establish ticker
    ticker = data.loc[i, 'ticker']
    start_date = 0
    end_date = 0
    
    #this accounts for purchases
    if data.loc[i, 'type'] == 'purchase':
        
        start_date = data.loc[i, 'transaction_date']
        
        for n in data.index:
            
            #determine if there's a full sale after the purchase:
            if (data.loc[n, 'ticker'] == ticker) and (data.loc[n, 'type'] == 'sale_full') and (pd.to_datetime(data.loc[n, 'transaction_date']) > pd.to_datetime(start_date)):
                
                #NOTE: THIS CODE DOESN'T YET ACCOUNT FOR A REP BUYING, SELLING, THEN DOING IT AGAIN LATER
                #WITH THE SAME TICKER
                
                #mark this row as a purchase --> sale:
                data.loc[i, 'purchase-sale'] = 1
                
                end_date = data.loc[n, 'transaction_date']
                
                to_drop.append(n) #saving indices to drop                

        #if the end_date hasn't been changed by the first by an if in the loop-->        
        #end_date is a misnomer here but is used to simplify the code
        if end_date == 0:
            
            end_date = the_final_date
    
    #this accounts for sales that aren't associated with purchases
    else:
        
        if data.loc[i, 'purchase-sale'] == 0:
            
            #assume purchase date is the first date for which we're tracking
            #this is very rough, but will at least let us track price changes
            
            start_date = str(pd.to_datetime(reps["transaction_date"].min()))[:10]
            
            end_date = data.loc[i, 'transaction_date']
    
                            #at this point, we've established the dates from which we'll work to find prices,
                            #which may not be the same because some transaction dates are days the market is closed

                            #next, we get actual day of purchase, which is the day of transaction (current 'start_date'),
                            #or the first day of our records, or the first day of trading after either of those
    
    count = 0
    while start_date not in stocks[ticker].index:
        start_date = str(pd.to_datetime(start_date) + timedelta(days = 1))[:10]
        count += 1
        if pd.to_datetime(start_date) == pd.to_datetime(the_final_date): #tried making this ==
            print(f'For {ticker}, index {i} purchase date reached {start_date} without finding a price.')
            break
                 
    
    #next, we get actual day of sale, which is the day of transaction (current 'end_date'),
    #or the first day of trading after that, or the last day of our records   
    
    count = 0
    while (end_date not in stocks[ticker].index) & (end_date != the_final_date):
        end_date = str(pd.to_datetime(end_date) + timedelta(days = 1))[:10]
        count += 1
        if pd.to_datetime(end_date) == pd.to_datetime(the_final_date): #tried making this ==
            print(f'For {ticker}, index {i} sale date reached {end_date} without finding a price.')
            missing_prices.append(i)
            break
 
    
    data.loc[i, 'start_date'] = start_date
    data.loc[i, 'end_date'] = end_date
    
    #now we get the start_price and the end_price
    
    try:
        data.loc[i, 'start_price'] = float(stocks[ticker].loc[start_date]['adj_close']) #changed these with adjusted close
    except:
        missing_prices.append(i)
        pass
    try:
        data.loc[i, 'end_price'] = float(stocks[ticker].loc[end_date]['adj_close']) #changed these with adjusted close
    except:
        missing_prices.append(i)
        pass


    
print('')
print("Time to run", time.time()-t0)  

0
2
3
4
5
6
9
11
15
27
28
29
30
31
32
33
34
35
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
100
101
102
103
110
111
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
214
215
216
217
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
238
241
246
247
255
258
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
296
302
303
304
305
306
307
308
312
321
322
323
324
325
326
327
328
330
331
332
333
334
335
337
338
339
340
341
342
347
348
349
350
351
353
354
355
356
357
358
360

With more time, we could likely figure out how to find end prices for these data (`'missing_prices'`), but in order to hit our deadline, they ultimately get dropped because the lack of end prices throws off our cumulative calculations, later on. 'VIAC' is explored below, to illustrate.

In [78]:
missing_prices

[391,
 391,
 471,
 705,
 718,
 718,
 779,
 779,
 859,
 880,
 1246,
 1246,
 1282,
 1282,
 1519,
 1758,
 1758,
 2032,
 2032,
 2032,
 2040,
 2040,
 2040,
 2046,
 2046,
 2046,
 2049,
 2049,
 2049,
 2084,
 2084,
 2084,
 2639,
 2639,
 2857,
 2857,
 3192,
 3193,
 3305,
 3590,
 3655,
 3828,
 4042,
 4354,
 4354,
 4415,
 4416,
 4438,
 4438,
 4438,
 4546,
 4631,
 4632,
 4684,
 4684,
 4684,
 4859,
 4908,
 4908,
 4943,
 4943,
 4943,
 5173,
 5281,
 5281,
 5353,
 5523,
 5529,
 5726,
 5779,
 5780,
 6102,
 6121,
 6126,
 6129,
 6132,
 6134,
 6136,
 6139,
 6140,
 6141,
 6142,
 6144,
 6407,
 6422,
 6982,
 7298,
 7339,
 7404,
 7451,
 7451,
 7452,
 7452,
 7452,
 7711,
 7711,
 7714,
 7714,
 7716,
 7716,
 7889,
 7889,
 8089,
 8089,
 8112,
 8112,
 8451,
 8451,
 8626,
 8626,
 8829,
 8829,
 8829,
 8978,
 8978,
 8991,
 8991,
 8991,
 9071,
 9072,
 9129,
 9130,
 9131,
 9138,
 9139,
 9140,
 9205,
 9205,
 9210,
 9211,
 9599,
 9599,
 9612,
 9612,
 9717,
 9718,
 9900,
 9958,
 10263,
 10263,
 10263,
 10478,
 10616,
 106

In [134]:
len(missing_prices)

317

In [79]:
stocks['VIAC']

Unnamed: 0,adj_close,volume
2009-12-30,12.6953497586,4600400
2009-12-31,12.4734030845,4260000
2010-01-04,12.6509604238,6710900
2010-01-05,12.588815355,5441000
2010-01-06,12.1893113416,14566200
...,...,...
2019-11-27,40.4,5298091
2019-11-29,40.38,1876412
2019-12-02,39.86,6128601
2019-12-03,39.34,8777653


In [80]:
len(data[data['ticker'] == 'VIAC'])

24

VIAC, for example, is in the stocks dataframe, but it stops trading 20129-12-04. Our code currently doesn't account for stocks that stopped trading before the end of our analysis, though it does account for those that started after the period of the analysis began. Unfortunately, we ran out of time to run it.

## Drop the Sale Reflected in Purchase-Sale Columns
Dropping the sales that are already reflected in the purchase sale columns.

In [81]:
for i in set(to_drop):
    try:
        data.drop(index = i, inplace = True)
    except:
        pass

## Fill In Start Value, End Value, and Number of Shares

In [82]:
bad_indices = []
for i in data.index:    

    try:
        if data.loc[i, 'type'] == 'purchase':
            data.loc[i, 'start_value'] = data.loc[i, 'amount']
            data.loc[i, 'shares'] = data.loc[i, 'amount']/data.loc[i,'start_price']
            data.loc[i, 'end_value'] = round(data.loc[i,'shares']*data.loc[i, 'end_price'], 2)

        else:
            data.loc[i, 'end_value'] = data.loc[i, 'amount']
            data.loc[i, 'shares'] = data.loc[i, 'amount']/data.loc[i, 'end_price']
            data.loc[i, 'start_value'] = round(data.loc[i, 'shares']*data.loc[i, 'start_price'], 2)

        for date in pd.date_range(pd.to_datetime(data.loc[i, 'start_date'])+timedelta(days=1), pd.to_datetime(data.loc[i, 'end_date'])):
            try:
                price = float(stocks[data.loc[i, 'ticker']].loc[str(date)[:10]]['adj_close'])
                data.loc[i, str(date)[:10]] = round(((price-data.loc[i, 'start_price'])/data.loc[i, 'start_price'])*data.loc[i, 'start_value'], 2)
            except:
                pass
    except:
        bad_indices.append(i)
        pass

  data.loc[i, 'shares'] = data.loc[i, 'amount']/data.loc[i, 'end_price']
  data.loc[i, str(date)[:10]] = round(((price-data.loc[i, 'start_price'])/data.loc[i, 'start_price'])*data.loc[i, 'start_value'], 2)
  data.loc[i, 'shares'] = data.loc[i, 'amount']/data.loc[i,'start_price']
  data.loc[i, 'end_value'] = round(data.loc[i,'shares']*data.loc[i, 'end_price'], 2)
  data.loc[i, 'start_value'] = round(data.loc[i, 'shares']*data.loc[i, 'start_price'], 2)


In [83]:
bad_indices

[]

In [84]:
data.shape

(11650, 3139)

In [85]:
len(data[data['purchase-sale'] == 1])

8098

# Dealing with Nulls
It's not clear how the start or end value became null, but we ran out of time to figure it out and so eliminated the nulls in order to move forward with our analysis.

In [86]:
data.isnull().sum()[:20]

transaction_date     0
ticker               0
asset_description    0
type                 0
amount               0
name                 0
disclosure_date      0
chamber              0
first_name           0
last_name            0
party                0
birthday             0
gender               0
represents           0
purchase-sale        0
start_date           0
end_date             0
start_price          0
end_price            0
shares               0
dtype: int64

In [87]:
data[data['end_value'].isnull()]

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,...,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
1246,2020-05-01,CA,"Surgical Information Sciences <div class=""text...",purchase,50000,Tina Smith,2020-05-31,senate,Tina,Smith,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2032,2020-02-26,VIAC,ViacomCBS Inc.,purchase,15000,David A Perdue,2020-03-03,senate,David,Perdue,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2040,2020-02-25,VIAC,ViacomCBS Inc.,purchase,15000,David A Perdue,2020-03-03,senate,David,Perdue,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2046,2020-02-25,VIAC,ViacomCBS Inc.,purchase,15000,David A Perdue,2020-03-03,senate,David,Perdue,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2049,2020-02-24,VIAC,ViacomCBS Inc.,purchase,15000,David A Perdue,2020-03-03,senate,David,Perdue,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2084,2020-02-21,VIAC,ViacomCBS Inc.,purchase,15000,David A Perdue,2020-03-03,senate,David,Perdue,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2639,2018-12-06,BRKB,Berkshire Hathaway Inc.,purchase,50000,Jerry Moran,2019-05-15,senate,Jerry,Moran,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4438,2017-08-31,ATK,Oribtal ATK Inc,purchase,15000,Thomas R Carper,2017-09-06,senate,Thomas,Carper,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4943,2017-03-16,GSK,GlaxoSmithKline plc,purchase,15000,Patty Murray,2017-03-21,senate,Patty,Murray,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7452,2021-10-08,GSK,GlaxoSmithKline PLC,purchase,15000,Virginia Foxx,2021-11-02,house,Virginia,Foxx,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [88]:
data.dropna(inplace = True)

In [89]:
data.isnull().sum()

transaction_date     0
ticker               0
asset_description    0
type                 0
amount               0
                    ..
2022-07-11           0
2022-07-12           0
2022-07-13           0
2022-07-14           0
2022-07-15           0
Length: 3139, dtype: int64

### Eliminating Infinite Values
Because some prices were left at zero do to an error that we didn't have time to troubleshoot yet, some positions were calculated as being infitely large (amount/$0 = infinite shares).

We had to eliminate these in order to build our `'daily'` dataframes, so we did so here. This starts with a check of the rows that have infinite shares. Presuming there are some, they're eliminated below.

[This site](https://devenum.com/how-to-find-infinity-values-in-pandas-dataframe/) helped with understanding how to find the infinite values.

In [90]:
data[data['shares'].isin([np.inf, -np.inf])]

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,...,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
718,2021-02-16,CTAA,Qwest Corp. NT 7% 2056,sale_full,50000,William F Hagerty,2021-02-19,senate,William,Hagerty,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7451,2021-10-07,DUKH,Duke Energy Corporation 5.125% Junior Subordin...,sale_full,100000,Virginia Foxx,2021-11-02,house,Virginia,Foxx,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7889,2021-01-15,INTL,Intel Corporation,sale_full,15000,None Kathy Manning,2021-02-04,house,,Manning,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8626,2020-12-21,NGLS,Targa Resources Partners LP Common Unites Repr...,sale_full,50000,Dwight Evans,2021-02-09,house,Dwight,Evans,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9599,2021-06-23,BHI,Baker Hughes Incorporated,sale_full,15000,Susie Lee,2021-07-13,house,Susie,Lee,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13335,2022-06-14,CTRX,Citrix Systems Cloud computing,sale_full,15000,Diana Harshbarger,2022-06-29,house,Diana,Harshbarger,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13337,2022-06-15,CTRX,Citrix Systems Cloud computing,sale_full,15000,Diana Harshbarger,2022-06-29,house,Diana,Harshbarger,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13839,2019-12-27,TSS,Total System Services Inc,sale_full,50000,Richard W. Allen,2021-11-08,house,Richard,Allen,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16311,2020-12-07,BGG,Briggs & Stratton Corp,sale_full,15000,David E. Price,2021-01-03,house,David,Price,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20479,2020-04-02,HCN,Welltower Inc.,sale_full,15000,Dean Phillips,2020-05-07,house,Dean,Phillips,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [91]:
for i in data[data['shares'].isin([np.inf, -np.inf])].index:
    data.drop(i, inplace = True)

In [92]:
data[data['shares'].isin([np.inf, -np.inf])]

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,...,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15


# Sanity Check
Making sure this is looking the way we expect it to, with values appearing where we would expect them to appear.

In [93]:
pd.set_option('display.max_columns', 200)

In [94]:
data.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents,purchase-sale,start_date,end_date,start_price,end_price,shares,start_value,end_value,2014-01-02,2014-01-03,2014-01-04,2014-01-05,2014-01-06,2014-01-07,2014-01-08,2014-01-09,2014-01-10,2014-01-11,2014-01-12,2014-01-13,2014-01-14,2014-01-15,2014-01-16,2014-01-17,2014-01-18,2014-01-19,2014-01-20,2014-01-21,2014-01-22,2014-01-23,2014-01-24,2014-01-25,2014-01-26,2014-01-27,2014-01-28,2014-01-29,2014-01-30,2014-01-31,2014-02-01,2014-02-02,2014-02-03,2014-02-04,2014-02-05,2014-02-06,2014-02-07,2014-02-08,2014-02-09,2014-02-10,2014-02-11,2014-02-12,2014-02-13,2014-02-14,2014-02-15,2014-02-16,2014-02-17,2014-02-18,2014-02-19,2014-02-20,2014-02-21,2014-02-22,2014-02-23,2014-02-24,2014-02-25,2014-02-26,2014-02-27,2014-02-28,2014-03-01,2014-03-02,2014-03-03,2014-03-04,2014-03-05,2014-03-06,2014-03-07,2014-03-08,2014-03-09,2014-03-10,2014-03-11,2014-03-12,2014-03-13,2014-03-14,2014-03-15,2014-03-16,2014-03-17,2014-03-18,2014-03-19,2014-03-20,...,2022-04-07,2022-04-08,2022-04-09,2022-04-10,2022-04-11,2022-04-12,2022-04-13,2022-04-14,2022-04-15,2022-04-16,2022-04-17,2022-04-18,2022-04-19,2022-04-20,2022-04-21,2022-04-22,2022-04-23,2022-04-24,2022-04-25,2022-04-26,2022-04-27,2022-04-28,2022-04-29,2022-04-30,2022-05-01,2022-05-02,2022-05-03,2022-05-04,2022-05-05,2022-05-06,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13,2022-05-14,2022-05-15,2022-05-16,2022-05-17,2022-05-18,2022-05-19,2022-05-20,2022-05-21,2022-05-22,2022-05-23,2022-05-24,2022-05-25,2022-05-26,2022-05-27,2022-05-28,2022-05-29,2022-05-30,2022-05-31,2022-06-01,2022-06-02,2022-06-03,2022-06-04,2022-06-05,2022-06-06,2022-06-07,2022-06-08,2022-06-09,2022-06-10,2022-06-11,2022-06-12,2022-06-13,2022-06-14,2022-06-15,2022-06-16,2022-06-17,2022-06-18,2022-06-19,2022-06-20,2022-06-21,2022-06-22,2022-06-23,2022-06-24,2022-06-25,2022-06-26,2022-06-27,2022-06-28,2022-06-29,2022-06-30,2022-07-01,2022-07-02,2022-07-03,2022-07-04,2022-07-05,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,0,2022-06-03,2022-07-15,44.82,41.13,334.672021,15000.0,13765.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.4,217.54,-63.59,-719.54,-1586.35,0.0,0.0,-1951.14,-2473.23,-2225.57,-2399.6,-2121.82,0.0,0.0,0.0,-1951.14,-2081.66,-2315.93,-1358.77,0.0,0.0,-1532.8,-1552.88,-1710.17,-1890.9,-1639.89,0.0,0.0,0.0,-1579.65,-1780.46,-1572.96,-1552.88,0.0,0.0,-1683.4,-1746.99,-1924.36,-2034.81,-1234.94
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-31,2022-07-15,25.07,17.73,9972.078181,250000.0,176804.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-698.05,897.49,-1296.37,0.0,0.0,4287.99,7279.62,-7578.78,-23833.27,-29118.47,0.0,0.0,-48165.14,-49960.11,-44774.63,-54746.71,-51655.36,0.0,0.0,0.0,-54547.27,-59832.47,-60430.79,-52153.97,0.0,0.0,-56142.8,-60530.51,-65117.67,-71400.08,-71400.08,0.0,0.0,0.0,-80275.23,-79976.07,-71998.4,-74291.98,0.0,0.0,-75488.63,-75488.63,-69804.55,-77084.16,-73195.05
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-20,2022-07-15,80.54,73.91,1241.619071,100000.0,91768.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,794.64,-2346.66,-521.48,-148.99,5798.36,0.0,0.0,0.0,5798.36,2408.74,9659.8,7437.3,0.0,0.0,7784.95,9995.03,8654.08,4432.58,-1539.61,0.0,0.0,-8467.84,-10032.28,-5848.03,-11559.47,-9485.97,0.0,0.0,0.0,-10144.03,-9399.06,-8356.1,-3551.03,0.0,0.0,-5711.45,-10826.92,-11261.48,-13285.32,-11348.4,0.0,0.0,0.0,-7623.54,-9063.82,-6828.9,-8827.91,0.0,0.0,-12503.1,-11311.15,-11398.06,-13645.39,-8231.93
5,2022-05-19,PYPL,"PayPal Holdings, Inc. -",purchase,500000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-19,2022-07-15,81.28,73.91,6151.574803,500000.0,454662.89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4552.17,0.0,0.0,-615.16,-16178.64,-7135.83,-5290.35,24175.69,0.0,0.0,0.0,24175.69,7381.89,43307.09,32295.77,0.0,0.0,34018.21,44968.01,38324.31,17408.96,-12180.12,0.0,0.0,-46505.91,-54256.89,-33526.08,-61823.33,-51550.2,0.0,0.0,0.0,-54810.53,-51119.59,-45952.26,-22145.67,0.0,0.0,-32849.41,-58193.9,-60346.95,-70374.02,-60777.56,0.0,0.0,0.0,-42322.83,-49458.66,-38385.83,-48289.86,0.0,0.0,-66498.52,-60593.01,-61023.62,-72157.97,-45337.11
6,2022-05-16,ABYIX,Abbey Capital Futures Strategy Fund- Class I Sha,purchase,15000,Thomas R Carper,2022-06-03,senate,Thomas,Carper,Democrat,1947-01-23,M,DE,0,2022-05-16,2022-07-15,13.35,13.71,1123.595506,15000.0,15404.49,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-22.47,-33.71,-123.6,-146.07,0.0,0.0,-168.54,-247.19,-247.19,-213.48,-269.66,0.0,0.0,0.0,-168.54,-44.94,-89.89,0.0,0.0,0.0,134.83,112.36,202.25,325.84,438.2,0.0,0.0,640.45,696.63,573.03,483.15,528.09,0.0,0.0,528.09,494.38,426.97,292.13,213.48,0.0,0.0,235.96,325.84,337.08,258.43,247.19,0.0,0.0,0.0,269.66,303.37,292.13,303.37,0.0,0.0,415.73,370.79,359.55,483.15,404.49


# Checking Data Types and Putting Them In Preferred Form
We preferred birthday as a datetime for EDA and start and end prices as floats.

In [95]:
data.dtypes

transaction_date      object
ticker                object
asset_description     object
type                  object
amount                 int64
                      ...   
2022-07-11           float64
2022-07-12           float64
2022-07-13           float64
2022-07-14           float64
2022-07-15           float64
Length: 3139, dtype: object

In [96]:
data['birthday']=pd.to_datetime(data['birthday'])
data['start_price']=data['start_price'].astype(float)
data['end_price']=data['end_price'].astype(float)

In [97]:
data.dtypes[:40]

transaction_date             object
ticker                       object
asset_description            object
type                         object
amount                        int64
name                         object
disclosure_date              object
chamber                      object
first_name                   object
last_name                    object
party                        object
birthday             datetime64[ns]
gender                       object
represents                   object
purchase-sale                 int64
start_date                   object
end_date                     object
start_price                 float64
end_price                   float64
shares                      float64
start_value                 float64
end_value                   float64
2014-01-02                  float64
2014-01-03                  float64
2014-01-04                  float64
2014-01-05                  float64
2014-01-06                  float64
2014-01-07                  

Exporting this data.

In [98]:
data.to_csv('data/filled_reps_data.csv')

# Building Daily Dataframe
This builds the data frame that tracks positions currently in the portfolio on a given day.
## Build Basis Dataframe
We started by building a data frame the provides the desired basis on each day for each position, the original value from which percent change will be calculated using the profit/loss for each day.

In [99]:
basis = data.copy()

start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime(the_final_date)+timedelta(days=1) #advancing the final date a day ensures that the final date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]

date_list = [str(date)[:10] for date in date_list]

for i in range(len(date_list)):
    basis.loc[:, date_list[i]] = 0.0

In [100]:
data.shape

(11591, 3139)

In [101]:
basis.shape

(11591, 3139)

## Filling In the Change in Value From Start For Each Position For Each Position on Each Day

In [102]:
for i in basis.index:
    for date in pd.date_range(pd.to_datetime(basis.loc[i, 'start_date'])+timedelta(days=1), pd.to_datetime(basis.loc[i, 'end_date'])):
        basis.loc[i, str(date)[:10]] = basis.loc[i, 'start_value']
        

### Sanity Check
Is shape right? Do we see what we expect in the data?

In [103]:
basis.shape

(11591, 3139)

In [104]:
basis

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents,purchase-sale,start_date,end_date,start_price,end_price,shares,start_value,end_value,2014-01-02,2014-01-03,2014-01-04,2014-01-05,2014-01-06,2014-01-07,2014-01-08,2014-01-09,2014-01-10,2014-01-11,2014-01-12,2014-01-13,2014-01-14,2014-01-15,2014-01-16,2014-01-17,2014-01-18,2014-01-19,2014-01-20,2014-01-21,2014-01-22,2014-01-23,2014-01-24,2014-01-25,2014-01-26,2014-01-27,2014-01-28,2014-01-29,2014-01-30,2014-01-31,2014-02-01,2014-02-02,2014-02-03,2014-02-04,2014-02-05,2014-02-06,2014-02-07,2014-02-08,2014-02-09,2014-02-10,2014-02-11,2014-02-12,2014-02-13,2014-02-14,2014-02-15,2014-02-16,2014-02-17,2014-02-18,2014-02-19,2014-02-20,2014-02-21,2014-02-22,2014-02-23,2014-02-24,2014-02-25,2014-02-26,2014-02-27,2014-02-28,2014-03-01,2014-03-02,2014-03-03,2014-03-04,2014-03-05,2014-03-06,2014-03-07,2014-03-08,2014-03-09,2014-03-10,2014-03-11,2014-03-12,2014-03-13,2014-03-14,2014-03-15,2014-03-16,2014-03-17,2014-03-18,2014-03-19,2014-03-20,...,2022-04-07,2022-04-08,2022-04-09,2022-04-10,2022-04-11,2022-04-12,2022-04-13,2022-04-14,2022-04-15,2022-04-16,2022-04-17,2022-04-18,2022-04-19,2022-04-20,2022-04-21,2022-04-22,2022-04-23,2022-04-24,2022-04-25,2022-04-26,2022-04-27,2022-04-28,2022-04-29,2022-04-30,2022-05-01,2022-05-02,2022-05-03,2022-05-04,2022-05-05,2022-05-06,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13,2022-05-14,2022-05-15,2022-05-16,2022-05-17,2022-05-18,2022-05-19,2022-05-20,2022-05-21,2022-05-22,2022-05-23,2022-05-24,2022-05-25,2022-05-26,2022-05-27,2022-05-28,2022-05-29,2022-05-30,2022-05-31,2022-06-01,2022-06-02,2022-06-03,2022-06-04,2022-06-05,2022-06-06,2022-06-07,2022-06-08,2022-06-09,2022-06-10,2022-06-11,2022-06-12,2022-06-13,2022-06-14,2022-06-15,2022-06-16,2022-06-17,2022-06-18,2022-06-19,2022-06-20,2022-06-21,2022-06-22,2022-06-23,2022-06-24,2022-06-25,2022-06-26,2022-06-27,2022-06-28,2022-06-29,2022-06-30,2022-07-01,2022-07-02,2022-07-03,2022-07-04,2022-07-05,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,0,2022-06-03,2022-07-15,44.820000,41.130000,334.672021,15000.00,13765.06,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-31,2022-07-15,25.070000,17.730000,9972.078181,250000.00,176804.95,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-20,2022-07-15,80.540000,73.910000,1241.619071,100000.00,91768.07,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
5,2022-05-19,PYPL,"PayPal Holdings, Inc. -",purchase,500000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-19,2022-07-15,81.280000,73.910000,6151.574803,500000.00,454662.89,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0
6,2022-05-16,ABYIX,Abbey Capital Futures Strategy Fund- Class I Sha,purchase,15000,Thomas R Carper,2022-06-03,senate,Thomas,Carper,Democrat,1947-01-23,M,DE,0,2022-05-16,2022-07-15,13.350000,13.710000,1123.595506,15000.00,15404.49,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21030,2020-05-07,VIAV,Viavi Solutions Inc.,purchase,15000,Josh Gottheimer,2020-06-10,house,Josh,Gottheimer,Democrat,1975-03-08,M,NJ05,1,2020-05-07,2021-01-25,11.710000,16.000000,1280.956447,15000.00,20495.30,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21031,2020-05-04,USFD,US Foods Holding Corp.,purchase,15000,Adam Kinzinger,2020-06-10,house,Adam,Kinzinger,Republican,1978-02-27,M,IL16,1,2020-05-04,2020-05-26,19.940000,19.330000,752.256770,15000.00,14541.12,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21033,2020-05-26,AAL,"American Airlines Group, Inc.",purchase,15000,Adam Kinzinger,2020-06-10,house,Adam,Kinzinger,Republican,1978-02-27,M,IL16,1,2020-05-26,2020-06-01,11.140000,11.110000,1346.499102,15000.00,14959.61,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21039,2020-03-18,COST,Costco Wholesale Corporation,purchase,15000,Ed Perlmutter,2020-06-10,house,Ed,Perlmutter,Democrat,1953-05-01,M,CO07,1,2020-03-18,2020-04-02,294.208810,278.881249,50.984197,15000.00,14218.54,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [105]:
basis['start_value'].dtypes

dtype('float64')

In [106]:
basis['2022-07-15'].dtypes

dtype('float64')

In [107]:
basis.isnull().sum()[:40]

transaction_date     0
ticker               0
asset_description    0
type                 0
amount               0
name                 0
disclosure_date      0
chamber              0
first_name           0
last_name            0
party                0
birthday             0
gender               0
represents           0
purchase-sale        0
start_date           0
end_date             0
start_price          0
end_price            0
shares               0
start_value          0
end_value            0
2014-01-02           0
2014-01-03           0
2014-01-04           0
2014-01-05           0
2014-01-06           0
2014-01-07           0
2014-01-08           0
2014-01-09           0
2014-01-10           0
2014-01-11           0
2014-01-12           0
2014-01-13           0
2014-01-14           0
2014-01-15           0
2014-01-16           0
2014-01-17           0
2014-01-18           0
2014-01-19           0
dtype: int64

In [108]:
basis.dropna(inplace = True)

## Building the Data Frame to Take the Daily Sums

In [109]:
# this establishes the index and the column headers

start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime(the_final_date)+timedelta(days=1) #advancing the final date a day ensures that the final date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]

daily = pd.DataFrame(index = date_list, columns = ['portfolio_basis', 'portfolio_delta', 'percent_change' ])

daily.head()

Unnamed: 0,portfolio_basis,portfolio_delta,percent_change
2014-01-02,,,
2014-01-03,,,
2014-01-04,,,
2014-01-05,,,
2014-01-06,,,


### Checking the Data Type of the Index
[This stackoverflow answer](https://stackoverflow.com/a/29218694) explains that, in fact, <M8[ns] is a type of datetime.

In [110]:
daily.index.dtype

dtype('<M8[ns]')

### Filling In The Cumulative Daily Values

In [111]:
for i in daily.index:
    daily.loc[i, 'portfolio_basis'] = basis[str(i)[:10]].sum()
    daily.loc[i, 'portfolio_delta'] = data[str(i)[:10]].sum()
    daily.loc[i, 'percent_change'] = daily.loc[i, 'portfolio_delta']/daily.loc[i, 'portfolio_basis']

  daily.loc[i, 'percent_change'] = daily.loc[i, 'portfolio_delta']/daily.loc[i, 'portfolio_basis']


### Sanity Check

In [112]:
daily.head(20)

Unnamed: 0,portfolio_basis,portfolio_delta,percent_change
2014-01-02,0.0,0.0,
2014-01-03,139308037.43,-1091328.82,-0.007834
2014-01-04,139308037.43,0.0,0.0
2014-01-05,139308037.43,0.0,0.0
2014-01-06,139308037.43,-5225084.05,-0.037507
2014-01-07,139308037.43,-4624395.85,-0.033195
2014-01-08,139308037.43,-7190905.5,-0.051619
2014-01-09,139308037.43,-7082280.55,-0.050839
2014-01-10,139308037.43,-7112277.26,-0.051054
2014-01-11,139308037.43,0.0,0.0


In [113]:
len(daily)

3117

In [114]:
daily.tail()

Unnamed: 0,portfolio_basis,portfolio_delta,percent_change
2022-07-11,92040000.0,5755498.33,0.062533
2022-07-12,92040000.0,4834463.44,0.052526
2022-07-13,92040000.0,4238653.96,0.046052
2022-07-14,92040000.0,3277519.45,0.03561
2022-07-15,92040000.0,4837063.4,0.052554


In [115]:
daily.dtypes

portfolio_basis    object
portfolio_delta    object
percent_change     object
dtype: object

In [116]:
daily['portfolio_basis'] = daily['portfolio_basis'].astype(float)
daily['portfolio_delta'] = daily['portfolio_delta'].astype(float)
daily['percent_change'] = daily['percent_change'].astype(float)

In [117]:
daily.dtypes

portfolio_basis    float64
portfolio_delta    float64
percent_change     float64
dtype: object

In [118]:
daily.to_csv('data/complete_daily.csv')

# Building Daily2 Dataframe
This dataframe is designed to calculate legislator success over time. Therefore, once a position is added, the basis of that position is included through the end of our analysis, and when a position is sold, the value of the profit is carried forward through the end of the analysis. As a result, we can see the total profit/loss the legislators have accumulated over time, for easier comparison to other measures, like percent change in the S&P 500 over time.

In [119]:
#making the bones of the dataframe
basis2 = data.copy()

#resetting the date columns to 0
start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime(the_final_date)+timedelta(days=1) #advancing the final date here ensures that the final date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]

date_list = [str(date)[:10] for date in date_list]

for i in range(len(date_list)):
    basis2.loc[:, date_list[i]] = 0.0

for i in basis2.index:
    for date in pd.date_range(pd.to_datetime(basis2.loc[i, 'start_date'])+timedelta(days=1), pd.to_datetime('2022-7-15')):
        basis2.loc[i, str(date)[:10]] = basis2.loc[i, 'start_value']


In [120]:
basis2.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents,purchase-sale,start_date,end_date,start_price,end_price,shares,start_value,end_value,2014-01-02,2014-01-03,2014-01-04,2014-01-05,2014-01-06,2014-01-07,2014-01-08,2014-01-09,2014-01-10,2014-01-11,2014-01-12,2014-01-13,2014-01-14,2014-01-15,2014-01-16,2014-01-17,2014-01-18,2014-01-19,2014-01-20,2014-01-21,2014-01-22,2014-01-23,2014-01-24,2014-01-25,2014-01-26,2014-01-27,2014-01-28,2014-01-29,2014-01-30,2014-01-31,2014-02-01,2014-02-02,2014-02-03,2014-02-04,2014-02-05,2014-02-06,2014-02-07,2014-02-08,2014-02-09,2014-02-10,2014-02-11,2014-02-12,2014-02-13,2014-02-14,2014-02-15,2014-02-16,2014-02-17,2014-02-18,2014-02-19,2014-02-20,2014-02-21,2014-02-22,2014-02-23,2014-02-24,2014-02-25,2014-02-26,2014-02-27,2014-02-28,2014-03-01,2014-03-02,2014-03-03,2014-03-04,2014-03-05,2014-03-06,2014-03-07,2014-03-08,2014-03-09,2014-03-10,2014-03-11,2014-03-12,2014-03-13,2014-03-14,2014-03-15,2014-03-16,2014-03-17,2014-03-18,2014-03-19,2014-03-20,...,2022-04-07,2022-04-08,2022-04-09,2022-04-10,2022-04-11,2022-04-12,2022-04-13,2022-04-14,2022-04-15,2022-04-16,2022-04-17,2022-04-18,2022-04-19,2022-04-20,2022-04-21,2022-04-22,2022-04-23,2022-04-24,2022-04-25,2022-04-26,2022-04-27,2022-04-28,2022-04-29,2022-04-30,2022-05-01,2022-05-02,2022-05-03,2022-05-04,2022-05-05,2022-05-06,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13,2022-05-14,2022-05-15,2022-05-16,2022-05-17,2022-05-18,2022-05-19,2022-05-20,2022-05-21,2022-05-22,2022-05-23,2022-05-24,2022-05-25,2022-05-26,2022-05-27,2022-05-28,2022-05-29,2022-05-30,2022-05-31,2022-06-01,2022-06-02,2022-06-03,2022-06-04,2022-06-05,2022-06-06,2022-06-07,2022-06-08,2022-06-09,2022-06-10,2022-06-11,2022-06-12,2022-06-13,2022-06-14,2022-06-15,2022-06-16,2022-06-17,2022-06-18,2022-06-19,2022-06-20,2022-06-21,2022-06-22,2022-06-23,2022-06-24,2022-06-25,2022-06-26,2022-06-27,2022-06-28,2022-06-29,2022-06-30,2022-07-01,2022-07-02,2022-07-03,2022-07-04,2022-07-05,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,0,2022-06-03,2022-07-15,44.82,41.13,334.672021,15000.0,13765.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-31,2022-07-15,25.07,17.73,9972.078181,250000.0,176804.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0,250000.0
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-20,2022-07-15,80.54,73.91,1241.619071,100000.0,91768.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
5,2022-05-19,PYPL,"PayPal Holdings, Inc. -",purchase,500000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-19,2022-07-15,81.28,73.91,6151.574803,500000.0,454662.89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0
6,2022-05-16,ABYIX,Abbey Capital Futures Strategy Fund- Class I Sha,purchase,15000,Thomas R Carper,2022-06-03,senate,Thomas,Carper,Democrat,1947-01-23,M,DE,0,2022-05-16,2022-07-15,13.35,13.71,1123.595506,15000.0,15404.49,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0


In [121]:
data2 = data.copy()

#adding final difference at end(sale) date to remaining days
for i in data2.index:
    for date in pd.date_range(pd.to_datetime(data2.loc[i, 'end_date'])+timedelta(days=1), pd.to_datetime(the_final_date)):
                if data2.loc[i, str(date)[:10]] != 1: #so we don't overwrite weekend and holiday dates, those remain 0
                    data2.loc[i, str(date)[:10]] = data2.loc[i, data2.loc[i, 'end_date']]

In [122]:
data2['birthday'] = pd.to_datetime(data2['birthday'])

In [123]:
data2['start_price'] = data2['start_price'].astype(float)
data2['end_price'] = data2['end_price'].astype(float)

In [124]:
data2.dtypes[:30]

transaction_date             object
ticker                       object
asset_description            object
type                         object
amount                        int64
name                         object
disclosure_date              object
chamber                      object
first_name                   object
last_name                    object
party                        object
birthday             datetime64[ns]
gender                       object
represents                   object
purchase-sale                 int64
start_date                   object
end_date                     object
start_price                 float64
end_price                   float64
shares                      float64
start_value                 float64
end_value                   float64
2014-01-02                  float64
2014-01-03                  float64
2014-01-04                  float64
2014-01-05                  float64
2014-01-06                  float64
2014-01-07                  

In [125]:
data2.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents,purchase-sale,start_date,end_date,start_price,end_price,shares,start_value,end_value,2014-01-02,2014-01-03,2014-01-04,2014-01-05,2014-01-06,2014-01-07,2014-01-08,2014-01-09,2014-01-10,2014-01-11,2014-01-12,2014-01-13,2014-01-14,2014-01-15,2014-01-16,2014-01-17,2014-01-18,2014-01-19,2014-01-20,2014-01-21,2014-01-22,2014-01-23,2014-01-24,2014-01-25,2014-01-26,2014-01-27,2014-01-28,2014-01-29,2014-01-30,2014-01-31,2014-02-01,2014-02-02,2014-02-03,2014-02-04,2014-02-05,2014-02-06,2014-02-07,2014-02-08,2014-02-09,2014-02-10,2014-02-11,2014-02-12,2014-02-13,2014-02-14,2014-02-15,2014-02-16,2014-02-17,2014-02-18,2014-02-19,2014-02-20,2014-02-21,2014-02-22,2014-02-23,2014-02-24,2014-02-25,2014-02-26,2014-02-27,2014-02-28,2014-03-01,2014-03-02,2014-03-03,2014-03-04,2014-03-05,2014-03-06,2014-03-07,2014-03-08,2014-03-09,2014-03-10,2014-03-11,2014-03-12,2014-03-13,2014-03-14,2014-03-15,2014-03-16,2014-03-17,2014-03-18,2014-03-19,2014-03-20,...,2022-04-07,2022-04-08,2022-04-09,2022-04-10,2022-04-11,2022-04-12,2022-04-13,2022-04-14,2022-04-15,2022-04-16,2022-04-17,2022-04-18,2022-04-19,2022-04-20,2022-04-21,2022-04-22,2022-04-23,2022-04-24,2022-04-25,2022-04-26,2022-04-27,2022-04-28,2022-04-29,2022-04-30,2022-05-01,2022-05-02,2022-05-03,2022-05-04,2022-05-05,2022-05-06,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13,2022-05-14,2022-05-15,2022-05-16,2022-05-17,2022-05-18,2022-05-19,2022-05-20,2022-05-21,2022-05-22,2022-05-23,2022-05-24,2022-05-25,2022-05-26,2022-05-27,2022-05-28,2022-05-29,2022-05-30,2022-05-31,2022-06-01,2022-06-02,2022-06-03,2022-06-04,2022-06-05,2022-06-06,2022-06-07,2022-06-08,2022-06-09,2022-06-10,2022-06-11,2022-06-12,2022-06-13,2022-06-14,2022-06-15,2022-06-16,2022-06-17,2022-06-18,2022-06-19,2022-06-20,2022-06-21,2022-06-22,2022-06-23,2022-06-24,2022-06-25,2022-06-26,2022-06-27,2022-06-28,2022-06-29,2022-06-30,2022-07-01,2022-07-02,2022-07-03,2022-07-04,2022-07-05,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,0,2022-06-03,2022-07-15,44.82,41.13,334.672021,15000.0,13765.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.4,217.54,-63.59,-719.54,-1586.35,0.0,0.0,-1951.14,-2473.23,-2225.57,-2399.6,-2121.82,0.0,0.0,0.0,-1951.14,-2081.66,-2315.93,-1358.77,0.0,0.0,-1532.8,-1552.88,-1710.17,-1890.9,-1639.89,0.0,0.0,0.0,-1579.65,-1780.46,-1572.96,-1552.88,0.0,0.0,-1683.4,-1746.99,-1924.36,-2034.81,-1234.94
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-31,2022-07-15,25.07,17.73,9972.078181,250000.0,176804.95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-698.05,897.49,-1296.37,0.0,0.0,4287.99,7279.62,-7578.78,-23833.27,-29118.47,0.0,0.0,-48165.14,-49960.11,-44774.63,-54746.71,-51655.36,0.0,0.0,0.0,-54547.27,-59832.47,-60430.79,-52153.97,0.0,0.0,-56142.8,-60530.51,-65117.67,-71400.08,-71400.08,0.0,0.0,0.0,-80275.23,-79976.07,-71998.4,-74291.98,0.0,0.0,-75488.63,-75488.63,-69804.55,-77084.16,-73195.05
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-20,2022-07-15,80.54,73.91,1241.619071,100000.0,91768.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,794.64,-2346.66,-521.48,-148.99,5798.36,0.0,0.0,0.0,5798.36,2408.74,9659.8,7437.3,0.0,0.0,7784.95,9995.03,8654.08,4432.58,-1539.61,0.0,0.0,-8467.84,-10032.28,-5848.03,-11559.47,-9485.97,0.0,0.0,0.0,-10144.03,-9399.06,-8356.1,-3551.03,0.0,0.0,-5711.45,-10826.92,-11261.48,-13285.32,-11348.4,0.0,0.0,0.0,-7623.54,-9063.82,-6828.9,-8827.91,0.0,0.0,-12503.1,-11311.15,-11398.06,-13645.39,-8231.93
5,2022-05-19,PYPL,"PayPal Holdings, Inc. -",purchase,500000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL,0,2022-05-19,2022-07-15,81.28,73.91,6151.574803,500000.0,454662.89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4552.17,0.0,0.0,-615.16,-16178.64,-7135.83,-5290.35,24175.69,0.0,0.0,0.0,24175.69,7381.89,43307.09,32295.77,0.0,0.0,34018.21,44968.01,38324.31,17408.96,-12180.12,0.0,0.0,-46505.91,-54256.89,-33526.08,-61823.33,-51550.2,0.0,0.0,0.0,-54810.53,-51119.59,-45952.26,-22145.67,0.0,0.0,-32849.41,-58193.9,-60346.95,-70374.02,-60777.56,0.0,0.0,0.0,-42322.83,-49458.66,-38385.83,-48289.86,0.0,0.0,-66498.52,-60593.01,-61023.62,-72157.97,-45337.11
6,2022-05-16,ABYIX,Abbey Capital Futures Strategy Fund- Class I Sha,purchase,15000,Thomas R Carper,2022-06-03,senate,Thomas,Carper,Democrat,1947-01-23,M,DE,0,2022-05-16,2022-07-15,13.35,13.71,1123.595506,15000.0,15404.49,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-22.47,-33.71,-123.6,-146.07,0.0,0.0,-168.54,-247.19,-247.19,-213.48,-269.66,0.0,0.0,0.0,-168.54,-44.94,-89.89,0.0,0.0,0.0,134.83,112.36,202.25,325.84,438.2,0.0,0.0,640.45,696.63,573.03,483.15,528.09,0.0,0.0,528.09,494.38,426.97,292.13,213.48,0.0,0.0,235.96,325.84,337.08,258.43,247.19,0.0,0.0,0.0,269.66,303.37,292.13,303.37,0.0,0.0,415.73,370.79,359.55,483.15,404.49


In [126]:
data2.to_csv('data/filled_reps_data2.csv')

## Build the New Dataframe

In [127]:
start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime(the_final_date)+timedelta(days=1) #advancing the final date here ensures that the final date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]

daily2 = pd.DataFrame(index = date_list, columns = ['portfolio_basis', 'portfolio_delta', 'percent_change' ])

for i in daily2.index:
    daily2.loc[i, 'portfolio_basis'] = basis2[str(i)[:10]].sum()
    daily2.loc[i, 'portfolio_delta'] = data2[str(i)[:10]].sum()
    daily2.loc[i, 'percent_change'] = daily2.loc[i, 'portfolio_delta']/daily2.loc[i, 'portfolio_basis']

  daily2.loc[i, 'percent_change'] = daily2.loc[i, 'portfolio_delta']/daily2.loc[i, 'portfolio_basis']


In [128]:
daily2.head()

Unnamed: 0,portfolio_basis,portfolio_delta,percent_change
2014-01-02,0.0,0.0,
2014-01-03,139358037.43,-1091328.82,-0.007831
2014-01-04,139358037.43,0.0,0.0
2014-01-05,139358037.43,0.0,0.0
2014-01-06,139358037.43,-5225084.05,-0.037494


In [129]:
daily2.tail()

Unnamed: 0,portfolio_basis,portfolio_delta,percent_change
2022-07-11,877030871.1,51007410.68,0.058159
2022-07-12,877030871.1,50086375.79,0.057109
2022-07-13,877030871.1,49490566.31,0.05643
2022-07-14,877030871.1,48529431.8,0.055334
2022-07-15,877030871.1,50088975.75,0.057112


In [130]:
daily2.dtypes

portfolio_basis    object
portfolio_delta    object
percent_change     object
dtype: object

In [131]:
daily2['portfolio_basis'] = daily2['portfolio_basis'].astype(float)
daily2['portfolio_delta'] = daily2['portfolio_delta'].astype(float)
daily2['percent_change'] = daily2['percent_change'].astype(float)

In [132]:
daily2.dtypes

portfolio_basis    float64
portfolio_delta    float64
percent_change     float64
dtype: object

In [133]:
daily2.to_csv('data/complete_daily2_cumulative.csv')