In [5]:
import pandas as pd
# import numpy as np
import requests as req
import sys
import time

# pip install holidays if you don't have the package already
# import holidays

from datetime import datetime
from datetime import timedelta
# change the following filepath, file, and key name to your own
sys.path.insert(0, '../../API_Keys/')
from vantage_api import premium_av_key

import pickle

In [6]:
#below, be sure to change the name of 'apikey' to your key's name

def pull_all_stocks(df):
    '''
    Takes a dateframe of congress people w/ their positions in various stocks and returns all
    the time series data of relevant stocks.
    '''
    stock_dict = {}
    for ticker in list(df['ticker'].unique()):
        json = req.get(url = "https://www.alphavantage.co/query",
                  params = {
                      'function': 'TIME_SERIES_DAILY_ADJUSTED',
                      'outputsize': 'full',
                      'symbol': ticker,
#--------------------->
                      'apikey':premium_av_key #<-----CHANGE KEY NAME HERE
                  }).json()
        ticker_df = pd.DataFrame(json['Time Series (Daily)']).T
        ticker_df.drop(columns = ['1. open', '2. high', '3. low', '4. close', '7. dividend amount', '8. split coefficient'], inplace = True)
        ticker_df.rename(columns = {'5. adjusted close': 'adj_close',
                                    '6. volume': 'volume'
                                   }, inplace = True)
        if ticker_df.shape[0] > 2500:
            ticker_df = ticker_df.iloc[range(2500)] #Data beyond 2012 is unnecessary, this helps reduce file size
        stock_dict[f'{ticker}'] = ticker_df.sort_index()
        time.sleep(2)
    return stock_dict

In [7]:
reps = pd.read_csv('data/cleaned_complete_congress_data.csv')
reps.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
1,2022-05-31,X,"United States Steel Corporation <div class=""te...",sale_partial,50000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
2,2022-05-31,X,"United States Steel Corporation <div class=""te...",sale_full,50000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
3,2022-05-31,X,United States Steel Corporation,purchase,250000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL
4,2022-05-20,PYPL,"PayPal Holdings, Inc. -",purchase,100000,Thomas H Tuberville,2022-06-13,senate,Thomas,Tuberville,Republican,1954-09-18,M,AL


We examined the value counts of the years in which these transactions were happening and decided to drop 2012 and 2013 because they had so many fewer data points. We project sales for which we don't have purchases in the records back to the first date of our records, and as it seems likely that this is incomplete data for those years, for now, we're going to start in 2014.

In [8]:
#add a transaction year

reps['transaction_year'] = reps['transaction_date'].map(lambda x: x[:4])

reps['transaction_year'].value_counts()

2020    7690
2021    5249
2019    1784
2022    1762
2018    1147
2017    1116
2015     903
2016     795
2014     594
2013       5
2012       2
Name: transaction_year, dtype: int64

In [9]:
reps = reps[(reps['transaction_year'] != '2012') & (reps['transaction_year'] != '2013')]
reps['transaction_year'].value_counts()

2020    7690
2021    5249
2019    1784
2022    1762
2018    1147
2017    1116
2015     903
2016     795
2014     594
Name: transaction_year, dtype: int64

At this point, we dropped the transaction year, as we don't anticipate needing that again.

In [10]:
reps.drop(columns = ['transaction_year'], inplace = True)

In [11]:
mcconnell = reps[reps['last_name'] == 'McConnell']
mcconnell.head()

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
111,2022-03-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-03-14,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
355,2021-12-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-12-16,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
455,2021-09-03,WFC,WFC - Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-09-15,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY
669,2021-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-06-07,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY


In [12]:
mcconnell.shape

(23, 14)

In [14]:
stocks = pull_all_stocks(mcconnell)

Because I'm traveling and won't always have internet access, I wanted to save the stocks dictionary on my computer. [This site](https://www.geeksforgeeks.org/save-a-dictionary-to-a-file/) walked me through how to do that. I opted for pickle since we just learned that yesterday.

In [15]:
try:
    stocks_file = open('stocks_file', 'wb')
    pickle.dump(stocks, stocks_file)
    stocks_file.close()
  
except:
    print("Something went wrong")

We tested the pickle out, verifying it works, running the following, opening the code, and comparing stocks2 to stocks. They matched. We eliminated the tests, themselves, for space.

In [16]:
#testing this out

with open('stocks_file', 'rb') as f:
    stocks2 = pickle.load(f)

Double checking data types

In [17]:
mcconnell.dtypes

transaction_date     object
ticker               object
asset_description    object
type                 object
amount                int64
name                 object
disclosure_date      object
chamber              object
first_name           object
last_name            object
party                object
birthday             object
gender               object
represents           object
dtype: object

# Code to Pull Stock Prices
[Stackoverflow](https://stackoverflow.com/a/61426301) on mean in multipe columns. helped to get this written.[Stackoverflow](https://stackoverflow.com/a/61426301) on mean in multipe columns. helped to get this written.

## Transaction Date = Day Market Was Closed
Congresspeople sometimes entered transaction dates for which the market was closed. We presumed that means they entered the trade with their broker (or otherwise) on that date, and that it would be executed the next trading day. That's accounted for within a couple of try/except pairs.

[This stackoverflow](https://stackoverflow.com/a/61964191) helped with setting up the try/except. [This medium article](https://medium.com/analytics-vidhya/python-how-to-automate-bank-holidays-767de0d42d44) showed how us the holidays module. [This article](https://www.geeksforgeeks.org/python-holidays-library/) helped understand how to use it. [This](https://pynative.com/python-get-the-day-of-week/) showed how to get day of week to check if saturday or sunday so we can adjust day of transaction accordingly.

## Market Closed non-Holidays
2018-12-05 the market was closed for national day of mourning to mark [the passing of George Bush](https://heavy.com/news/2018/12/stock-market-closed-national-day-of-mourning/). Need to add it to the list of holidays.# Market Closed non-Holidays
2018-12-05 the market was closed for national day of mourning to mark [the passing of George Bush](https://heavy.com/news/2018/12/stock-market-closed-national-day-of-mourning/). Need to add it to the list of holidays.

We presume the holiday transaction date means that the orders were executed the next market day.

## Trades When Stocks Weren't Trading AKA Mitch McConnell Sells Something Not On the Exchange?
Some trades are registered as happening when stocks weren't trading. Specifically Ingersoll-Rand (IR) partial_sales by Mitch McConnell 2017-02-02 and 2016-06-09. 

The data on IR doesn't start until May 8, 2017. [This Motley Fool article](https://www.fool.com/investing/general/2016/01/04/is-ingersoll-rand-a-buy-for-2016.aspxhttps://www.fool.com/investing/general/2016/01/04/is-ingersoll-rand-a-buy-for-2016.aspx) makes it appear that Ingersoll-Rand was trading under the ticker TT (or perhaps that the part that wasn't spun into IR is trading as TT now).

Gardner Denver went public in May 2017. In May 2020, it combined with Ingersoll-Rand's HVAC business -- together they started trading under IR. See [this article](https://seekingalpha.com/article/4350361-new-ingersoll-rand-debuts-under-challenging-circumstanceshttps://seekingalpha.com/article/4350361-new-ingersoll-rand-debuts-under-challenging-circumstances).

This may be a noteworthy trade. The disclosure wasn't made until May 15, 2017 -- about the time of the appearance of IR on the stock exchange, and the time of Gardner Denver going public, but before they merged (at least, that it was announced).

Weird.

He sold it in June of 2019... we should look at this more closely. In the meantime, I'll estimate it as being from the first IR price.

[This site](https://docs.python.org/2/library/datetime.html#datetime.timedeltahttps://docs.python.org/2/library/datetime.html#datetime.timedelta) showed us timedelta

[This stackoverflow answer](https://stackoverflow.com/a/42380166) (finally) showed us how to use `.loc()` to create a new column without getting a copy warning. (in this case filled with a default value). It still seems to throw a warning, but maybe not as many of them. Though we seem to be doing what the warning tells us to do... YEAH, ACTUALLY THIS DOESN'T SEEM TO MAKE ANY DIFFERENCE TO THE WARNING. Weird.

### A note on price column names

We've included end_price for EDA purposes, though we don't use it in the below calculations. One can determine if it's associated with a sale or not from `'type'` column.

We've incuded start_price instead of purchase price because sometimes the start price is day 1 of the data set and sometimes it's the price on a day of purchse. One can determine which from the `'type'` column.

We've used the same nomenclature for the prices, 'start' and 'end'.

In [18]:
#establishing columns for shares, which won't change for any one transaction row

mcconnell.loc[:, 'shares'] = 0

#establishing columns for start_value and end_value

mcconnell.loc[:, 'start_value'] = 0
mcconnell.loc[:, 'end_value'] = 0

#NOTE
#I think the way around these warnings would be to make an array of 0s 
#of the same length as the dataframe, then concatenate those.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, 'shares'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, 'start_value'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, 'end_value'] = 0


In [19]:
t0 = time.time()

#revising to get 'start_price' and 'end_price'... use the piece from below

#drop partial sales for this version because we can't account for how much
#was sold

mcconnell = mcconnell[mcconnell['type'] != 'sale_partial']

#establish columns for purchase date and end date,
#which is either the sale date or the last date we're keeping records for

mcconnell.loc[:, 'start_date'] = ''
mcconnell.loc[:, 'end_date'] = ''

#establish columns for purchase price and end price,
#which is either the sale price or the price on the last date we're keeping records for

mcconnell.loc[:, 'start_price'] = 0
mcconnell.loc[:, 'end_price'] = 0

#establish purchase-sale column and setting default to 0
mcconnell.loc[:, 'purchase-sale'] = 0

#establishing all the dates we cover in this period

start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime('2022-07-16') #the data was pulled on 2022-07-15. 2022-07-16 here ensures that date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]

date_list = [str(date)[:10] for date in date_list]

for i in range(len(date_list)):
    mcconnell.loc[:, date_list[i]] = 0

print('*'*100)
print('')
print('Finished filling in the dates with 0s')
print('')
print("Time to run", time.time()-t0) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, 'start_date'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, 'end_date'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, 'start_price'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_i

****************************************************************************************************

Finished filling in the dates with 0s

Time to run 22.50003933906555


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, date_list[i]] = 0
  mcconnell.loc[:, date_list[i]] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, date_list[i]] = 0
  mcconnell.loc[:, date_list[i]] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mcconnell.loc[:, date_list[i]] = 0
  mcconnell.loc[:, date_li

In [35]:
start_date

Timestamp('2014-01-02 00:00:00')

[This site](https://www.geeksforgeeks.org/python-iterating-through-a-range-of-dates/) showed us the date_range method.

In [26]:
t0 = time.time()

for i in mcconnell.index:
    print(i)
    #establish ticker
    ticker = mcconnell.loc[i, 'ticker']
    start_date = 0
    end_date = 0
    
    #this accounts for purchases
    if mcconnell.loc[i, 'type'] == 'purchase':
        
        start_date = mcconnell.loc[i, 'transaction_date']
        print(f'Index {i} is a purchase and the purchase date is {start_date}')
        
        for n in mcconnell.index:
            
            #determine if there's a full sale after the purchase:
            if (mcconnell.loc[n, 'ticker'] == ticker) and (mcconnell.loc[n, 'type'] == 'sale_full') and (pd.to_datetime(mcconnell.loc[n, 'transaction_date']) > pd.to_datetime(start_date)):
                print(f'Index {i} is a purchase that is later sold')
                #NOTE: THIS CODE DOESN'T YET ACCOUNT FOR A REP BUYING, SELLING, THEN DOING IT AGAIN LATER
                #WITH THE SAME TICKER
                
                #mark this row as a purchase --> sale:
                mcconnell.loc[i, 'purchase-sale'] = 1
                
                end_date = mcconnell.loc[n, 'transaction_date']
                
                mcconnell.drop(index = n, inplace = true)
                #include this to enable sanity check
                print(f"Dropped sale at index {n} because it's accounted for in purchase at index {i}")                

        #if the end_date hasn't been changed by the first by an if in the loop-->        
        #end_date is a misnomer here but is used to simplify the code
        if end_date == 0:
            print(f'Index {i} is a purchase that is never sold')
            end_date = '2022-07-15'
    
    #this accounts for sales that aren't associated with purchases
    else:
        
        if mcconnell.loc[i, 'purchase-sale'] == 0:
            print(f'Index {i} is a sale with no associated purchase in the data')
            #assume purchase date is the first date for which we're tracking
            #this is very rough, but will at least let us track price changes
            
            start_date = str(pd.to_datetime(reps["transaction_date"].min()))[:10]
            
            end_date = mcconnell.loc[i, 'transaction_date']
    
                            #at this point, we've established the dates from which we'll work to find prices,
                            #which may not be the same because some transaction dates are days the market is closed

                            #next, we get actual day of purchase, which is the day of transaction (current 'start_date'),
                            #or the first day of our records, or the first day of trading after either of those
    
    print(f'Index {i} starting start (or purchase) date is {start_date}')
    count = 0
    while start_date not in stocks[ticker].index:
        start_date = str(pd.to_datetime(start_date) + timedelta(days = 1))[:10]
        count += 1
        if pd.to_datetime(start_date) > pd.to_datetime('2022-07-15'):
            print(f'For {ticker}, index {i} purchase date reached {date} without finding a price.')
            break
            
    print(f'Index {i} final start (or purchase) date is {start_date}')       
    
    #next, we get actual day of sale, which is the day of transaction (current 'end_date'),
    #or the first day of trading after that, or the last day of our records   
    
    print(f'Index {i} starting end (or sale) date is {end_date}')
    count = 0
    while end_date not in stocks[ticker].index:
        end_date = str(pd.to_datetime(end_date) + timedelta(days = 1))[:10]
        count += 1
        if pd.to_datetime(end_date) > pd.to_datetime('2022-07-15'):
            print(f'For {ticker}, index {i} sale date reached {date} without finding a price.')
            break
    print(f'Index {i} final end (or sale) date is {end_date}')
    
    mcconnell.loc[i, 'start_date'] = start_date
    mcconnell.loc[i, 'end_date'] = end_date
    
    #now we get the start_price and the end_price
    
    mcconnell.loc[i, 'start_price'] = stocks[ticker].loc[start_date].drop('volume').astype(float).mean()
    mcconnell.loc[i, 'end_price'] = stocks[ticker].loc[end_date].drop('volume').astype(float).mean()

    #REMOVED THE FILLING IN FROM HERE TO PUT BELOW THE ADDITION OF PRICES, WITH CHANGE IN VALUE
    
print('')
print("Time to run", time.time()-t0)  

0
Index 0 is a purchase and the purchase date is 2022-06-03
Index 0 is a purchase that is never sold
Index 0 starting start (or purchase) date is 2022-06-03
Index 0 final start (or purchase) date is 2022-06-03
Index 0 starting end (or sale) date is 2022-07-15
Index 0 final end (or sale) date is 2022-07-15
111
Index 111 is a purchase and the purchase date is 2022-03-03
Index 111 is a purchase that is never sold
Index 111 starting start (or purchase) date is 2022-03-03
Index 111 final start (or purchase) date is 2022-03-03
Index 111 starting end (or sale) date is 2022-07-15
Index 111 final end (or sale) date is 2022-07-15
355
Index 355 is a purchase and the purchase date is 2021-12-03
Index 355 is a purchase that is never sold
Index 355 starting start (or purchase) date is 2021-12-03
Index 355 final start (or purchase) date is 2021-12-03
Index 355 starting end (or sale) date is 2022-07-15
Index 355 final end (or sale) date is 2022-07-15
455
Index 455 is a purchase and the purchase date i

In [71]:
pd.set_option('display.max_row', None)

In [27]:
mcconnell.dtypes

transaction_date      object
ticker                object
asset_description     object
type                  object
amount                 int64
                      ...   
2022-07-11           float64
2022-07-12           float64
2022-07-13           float64
2022-07-14           float64
2022-07-15           float64
Length: 3139, dtype: object

Now setting the shares and values based on whether whether the transaction is a purchase, a sale, or a purchase-sale comination.

because the start and end values are already taken care of above, and thus accounted
for in transactions that start with a purchase (type == 'purchase') 
and end with a sale (purchase-sale == 1) we can use the same code for 
all type == 'purchase' transactions
type == 'sale' that matched a purchase were eliminated after matching,
so they're already accounted for

In [50]:
#this is close, but at least for Vulcan Metal Company (VMC), 
#the calculation of start and end prices and values aren't lining up with what I'm seeing for percent changes
for i in mcconnell.index:    
        
        if mcconnell.loc[i, 'type'] == 'purchase':
            mcconnell.loc[i, 'start_value'] = mcconnell.loc[i, 'amount']
            mcconnell.loc[i, 'shares'] = mcconnell.loc[i, 'amount']/mcconnell.loc[i,'start_price']
            mcconnell.loc[i, 'end_value'] = round(mcconnell.loc[i,'shares']*mcconnell.loc[i, 'end_price'], 2)
        
        else: # i.e. it's a sale that isn't a purchase-sale
            mcconnell.loc[i, 'end_value'] = mcconnell.loc[i, 'amount']
            mcconnell.loc[i, 'shares'] = mcconnell.loc[i, 'amount']/mcconnell.loc[i, 'end_price']
            mcconnell.loc[i, 'start_value'] = round(mcconnell.loc[i, 'shares']*mcconnell.loc[i, 'start_price'], 2)
        
        for date in pd.date_range(pd.to_datetime(mcconnell.loc[i, 'start_date'])+timedelta(days=1), pd.to_datetime(mcconnell.loc[i, 'end_date'])):
            try:
                price = stocks[mcconnell.loc[i, 'ticker']].loc[str(date)[:10]].drop('volume').astype(float).mean()
                mcconnell.loc[i, str(date)[:10]] = round(((price-mcconnell.loc[i, 'start_price'])/mcconnell.loc[i, 'start_price'])*mcconnell.loc[i, 'start_value'], 2)
            except:
                pass

        

#### Sanity check with Vulcan metals.

In [39]:
pd.set_option('display.max_columns', 150)

In [None]:
VMC_1 = stocks['VMC'].loc['2014-01-02'].drop('volume').astype(float).mean()

In [None]:
VMC_2 = stocks['VMC'].loc['2014-01-03'].drop('volume').astype(float).mean()

In [None]:
VMC_delta = (VMC_2 - VMC_1)/(VMC_1)
VMC_delta

-0.0015337423307745614

In [None]:
VMC_delta*220130.0

-337.62269927340424

In [51]:
mcconnell

Unnamed: 0,transaction_date,ticker,asset_description,type,amount,name,disclosure_date,chamber,first_name,last_name,party,birthday,gender,represents,shares,start_value,end_value,start_date,end_date,start_price,end_price,purchase-sale,2014-01-02,2014-01-03,2014-01-04,2014-01-05,2014-01-06,2014-01-07,2014-01-08,2014-01-09,2014-01-10,2014-01-11,2014-01-12,2014-01-13,2014-01-14,2014-01-15,2014-01-16,2014-01-17,2014-01-18,2014-01-19,2014-01-20,2014-01-21,2014-01-22,2014-01-23,2014-01-24,2014-01-25,2014-01-26,2014-01-27,2014-01-28,2014-01-29,2014-01-30,2014-01-31,2014-02-01,2014-02-02,2014-02-03,2014-02-04,2014-02-05,2014-02-06,2014-02-07,2014-02-08,2014-02-09,2014-02-10,2014-02-11,2014-02-12,2014-02-13,2014-02-14,2014-02-15,2014-02-16,2014-02-17,2014-02-18,2014-02-19,2014-02-20,2014-02-21,2014-02-22,2014-02-23,...,2022-05-02,2022-05-03,2022-05-04,2022-05-05,2022-05-06,2022-05-07,2022-05-08,2022-05-09,2022-05-10,2022-05-11,2022-05-12,2022-05-13,2022-05-14,2022-05-15,2022-05-16,2022-05-17,2022-05-18,2022-05-19,2022-05-20,2022-05-21,2022-05-22,2022-05-23,2022-05-24,2022-05-25,2022-05-26,2022-05-27,2022-05-28,2022-05-29,2022-05-30,2022-05-31,2022-06-01,2022-06-02,2022-06-03,2022-06-04,2022-06-05,2022-06-06,2022-06-07,2022-06-08,2022-06-09,2022-06-10,2022-06-11,2022-06-12,2022-06-13,2022-06-14,2022-06-15,2022-06-16,2022-06-17,2022-06-18,2022-06-19,2022-06-20,2022-06-21,2022-06-22,2022-06-23,2022-06-24,2022-06-25,2022-06-26,2022-06-27,2022-06-28,2022-06-29,2022-06-30,2022-07-01,2022-07-02,2022-07-03,2022-07-04,2022-07-05,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,2022-07-11,2022-07-12,2022-07-13,2022-07-14,2022-07-15
0,2022-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-06-13,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,334.672021,15000.0,13765.06,2022-06-03,2022-07-15,44.82,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,-466.05,-302.97,302.75,-20.08,-90.36,0,0,-287.82,-582.33,-753.01,-1007.36,-729.59,0,0,-873.49,-371.49,-906.96,-943.78,-1054.22,0,0,-334.67,-512.05,-234.27,261.04,358.1,0,0,0,317.94,113.79,170.68,0.0,0,0,100.4,217.54,-63.59,-719.54,-1586.35,0,0,-1951.14,-2473.23,-2225.57,-2399.6,-2121.82,0,0,0,-1951.14,-2081.66,-2315.93,-1358.77,0,0,-1532.8,-1552.88,-1710.17,-1890.9,-1639.89,0,0,0,-1579.65,-1780.46,-1572.96,-1552.88,0,0,-1683.4,-1746.99,-1924.36,-2034.81,-1234.94
111,2022-03-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2022-03-14,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,294.375101,15000.0,12107.65,2022-03-03,2022-07-15,50.955397,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,-2216.04,-2072.6,-1539.81,-1823.77,-1885.59,0,0,-2059.27,-2318.32,-2468.45,-2692.18,-2447.85,0,0,-2574.43,-2132.86,-2603.86,-2636.25,-2733.39,0,0,-2100.48,-2256.5,-2012.17,-1576.5,-1491.13,0,0,0,-1526.45,-1706.02,-1655.98,-1806.11,0,0,-1717.8,-1614.76,-1862.04,-2439.01,-3201.45,0,0,-3522.31,-3981.54,-3763.7,-3916.78,-3672.45,0,0,0,-3522.31,-3637.12,-3843.18,-3001.27,0,0,-3154.35,-3172.01,-3310.36,-3469.33,-3248.55,0,0,0,-3195.56,-3372.18,-3189.67,-3172.01,0,0,-3286.81,-3342.75,-3498.76,-3595.91,-2892.35
355,2021-12-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-12-16,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,317.322126,15000.0,13051.46,2021-12-03,2022-07-15,47.270577,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,-1219.51,-1064.89,-490.57,-796.66,-863.3,0,0,-1050.52,-1329.76,-1491.6,-1732.76,-1469.38,0,0,-1605.83,-1129.85,-1637.57,-1672.47,-1777.19,0,0,-1094.94,-1263.13,-999.75,-530.11,-438.09,0,0,0,-476.17,-669.73,-615.79,-777.62,0,0,-682.43,-571.36,-837.91,-1459.86,-2281.73,0,0,-2627.61,-3122.63,-2887.81,-3052.82,-2789.44,0,0,0,-2627.61,-2751.37,-2973.49,-2065.95,0,0,-2230.96,-2250.0,-2399.14,-2570.49,-2332.5,0,0,0,-2275.38,-2465.78,-2269.04,-2250.0,0,0,-2373.75,-2434.04,-2602.22,-2706.94,-1948.54
455,2021-09-03,WFC,WFC - Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-09-15,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,344.396598,15000.0,14165.03,2021-09-03,2022-07-15,43.554437,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,-43.74,124.08,747.4,415.19,342.87,0,0,139.67,-163.39,-339.04,-600.78,-314.93,0,0,-463.02,53.58,-497.46,-535.34,-648.99,0,0,91.46,-91.07,194.78,704.48,804.36,0,0,0,763.03,552.95,611.5,435.86,0,0,539.17,659.71,370.42,-304.6,-1196.58,0,0,-1571.98,-2109.24,-1854.38,-2033.47,-1747.62,0,0,0,-1571.98,-1706.29,-1947.37,-962.39,0,0,-1141.48,-1162.14,-1324.01,-1509.99,-1251.69,0,0,0,-1189.7,-1396.33,-1182.81,-1162.14,0,0,-1296.46,-1361.89,-1544.42,-1658.08,-834.97
669,2021-06-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-06-07,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,328.814968,15000.0,13524.16,2021-06-03,2022-07-15,45.618361,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,-720.41,-560.18,34.94,-282.24,-351.29,0,0,-545.29,-834.65,-1002.35,-1252.25,-979.33,0,0,-1120.72,-627.5,-1153.6,-1189.77,-1298.28,0,0,-591.33,-765.6,-492.68,-6.04,89.32,0,0,0,49.86,-150.72,-94.82,-262.51,0,0,-163.87,-48.78,-324.99,-969.47,-1821.1,0,0,-2179.5,-2692.46,-2449.13,-2620.12,-2347.2,0,0,0,-2179.5,-2307.74,-2537.91,-1597.5,0,0,-1768.49,-1788.21,-1942.76,-2120.32,-1873.71,0,0,0,-1814.52,-2011.81,-1807.94,-1788.21,0,0,-1916.45,-1978.93,-2153.2,-2261.71,-1475.84
709,2021-03-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2021-03-16,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,406.19541,15000.0,16706.82,2021-03-03,2022-07-15,36.928039,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,2640.03,2837.96,3573.13,3181.31,3096.01,0,0,2856.35,2498.9,2291.74,1983.03,2320.17,0,0,2145.51,2754.8,2104.89,2060.21,1926.16,0,0,2799.48,2584.2,2921.34,3522.51,3640.31,0,0,0,3591.56,3343.78,3412.84,3205.68,0,0,3327.54,3469.71,3128.5,2332.36,1280.31,0,0,837.56,203.89,504.48,293.26,630.4,0,0,0,837.56,679.14,394.81,1556.52,0,0,1345.3,1320.93,1130.02,910.67,1215.32,0,0,0,1288.44,1044.72,1296.56,1320.93,0,0,1162.52,1085.34,870.05,736.01,1706.82
828,2020-12-03,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2020-12-07,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,526.624843,15000.0,21660.08,2020-12-03,2022-07-15,28.483275,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,7869.97,8126.58,9079.72,8571.73,8461.14,0,0,8150.43,7687.0,7418.42,7018.18,7455.28,0,0,7228.83,8018.77,7176.17,7118.24,6944.46,0,0,8076.7,7797.59,8234.69,9014.09,9166.81,0,0,0,9103.62,8782.38,8871.9,8603.33,0,0,8761.31,8945.63,8503.27,7471.08,6107.12,0,0,5533.1,4711.57,5101.27,4827.43,5264.52,0,0,0,5533.1,5327.72,4959.08,6465.23,0,0,6191.38,6159.79,5912.27,5627.9,6022.86,0,0,0,6117.66,5801.68,6128.19,6159.79,0,0,5954.4,5854.34,5575.23,5401.45,6660.08
966,2020-09-02,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2020-09-11,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,627.804257,15000.0,25821.59,2020-09-02,2022-07-15,23.892798,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,12263.93,12569.85,13706.11,13100.52,12968.68,0,0,12598.28,12045.81,11725.63,11248.5,11769.57,0,0,11499.62,12441.32,11436.84,11367.78,11160.6,0,0,12510.38,12177.65,12698.72,13627.87,13809.94,0,0,0,13734.6,13351.64,13458.37,13138.19,0,0,13326.53,13546.26,13018.9,11788.41,10162.39,0,0,9478.09,8498.71,8963.29,8636.83,9157.91,0,0,0,9478.09,9233.24,8793.78,10589.3,0,0,10262.84,10225.18,9930.11,9591.09,10061.95,0,0,0,10174.95,9798.27,10187.51,10225.18,0,0,9980.33,9861.05,9528.31,9321.14,10821.59
1996,2020-03-04,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2020-03-11,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,381.687838,15000.0,15698.82,2020-03-04,2022-07-15,39.29913,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,1575.73,1761.71,2452.53,2084.35,2004.19,0,0,1779.0,1443.11,1248.45,958.37,1275.17,0,0,1111.04,1683.58,1072.87,1030.89,904.93,0,0,1725.56,1523.27,1840.07,2404.97,2515.65,0,0,0,2469.85,2237.02,2301.91,2107.25,0,0,2221.76,2355.35,2034.73,1286.62,298.05,0,0,-117.99,-713.42,-430.98,-629.45,-312.65,0,0,0,-117.99,-266.85,-534.03,557.6,0,0,359.12,336.22,156.82,-49.29,236.98,0,0,0,305.68,76.67,313.32,336.22,0,0,187.36,114.84,-87.46,-213.41,698.82
2196,2019-12-04,WFC,Wells Fargo Company,purchase,15000,A. Mitchell Mcconnell,2019-12-12,senate,Mitchell,McConnell,Republican,1942-02-20,M,KY,299.959456,15000.0,12337.33,2019-12-04,2022-07-15,50.006758,41.13,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,...,-1973.53,-1827.36,-1284.47,-1573.81,-1636.81,0,0,-1813.78,-2077.75,-2230.73,-2458.7,-2209.73,0,0,-2338.71,-1888.77,-2368.71,-2401.7,-2500.69,0,0,-1855.78,-2014.76,-1765.79,-1321.85,-1234.86,0,0,0,-1270.86,-1453.83,-1402.84,-1555.82,0,0,-1465.83,-1360.84,-1612.81,-2200.73,-2977.63,0,0,-3304.58,-3772.52,-3550.55,-3706.53,-3457.56,0,0,0,-3304.58,-3421.57,-3631.54,-2773.65,0,0,-2929.63,-2947.63,-3088.61,-3250.59,-3025.62,0,0,0,-2971.63,-3151.6,-2965.63,-2947.63,0,0,-3064.61,-3121.61,-3280.58,-3379.57,-2662.67


#### Looks good

### Little EDA on Mitch McConnell's portfolio

In [52]:
mcconnell_profit = mcconnell['end_value'].sum() - mcconnell['start_value'].sum()
mcconnell_profit

460543.5499999998

In [53]:
mcconnell['end_value'].sum()

1278173.5499999998

In [54]:
mcconnell['start_value'].sum()

817630.0

In [60]:
mcconnell.columns

Index(['transaction_date', 'ticker', 'asset_description', 'type', 'amount',
       'name', 'disclosure_date', 'chamber', 'first_name', 'last_name',
       ...
       '2022-07-06', '2022-07-07', '2022-07-08', '2022-07-09', '2022-07-10',
       '2022-07-11', '2022-07-12', '2022-07-13', '2022-07-14', '2022-07-15'],
      dtype='object', length=3612)

In [61]:
#maybe explore further -- should we start our count at a later date?
#also for EDA, could be interesting to look at these outlier dates. What was happening?
#for example, 2020-03-18 was right at the start of lockdown. Did it fall on? after?

reps['transaction_date'].value_counts()

2020-03-18    305
2019-06-24    208
2020-04-02    186
2021-02-16    184
2020-04-14    162
             ... 
2017-08-25      1
2018-03-22      1
2021-06-26      1
2015-12-30      1
2018-04-06      1
Name: transaction_date, Length: 1896, dtype: int64

Verifying there's no nulls still.

In [75]:
mcconnell.isnull().sum()[:20]

transaction_date     0
ticker               0
asset_description    0
type                 0
amount               0
name                 0
disclosure_date      0
chamber              0
first_name           0
last_name            0
party                0
birthday             0
gender               0
represents           0
shares               0
start_value          0
end_value            0
start_date           0
end_date             0
start_price          0
dtype: int64

In [76]:
mcconnell['type'].value_counts()

purchase     19
sale_full     2
Name: type, dtype: int64

# Building a dataframe to take the sums of net profit for each day.

In [69]:
# get the index

start_date = pd.to_datetime(reps["transaction_date"].min())
end_date = pd.to_datetime('2022-07-16') #the data was pulled on 2022-07-15. 2022-07-16 here ensures that date is in the end list
date_list = [start_date + timedelta(days = num) for num in range((end_date-start_date).days)]

daily = pd.DataFrame(index = date_list, columns = ['value_delta'])

daily.head()

Unnamed: 0,value_delta
2014-01-02,
2014-01-03,
2014-01-04,
2014-01-05,
2014-01-06,


[This stackoverflow answer](https://stackoverflow.com/a/29218694) explains that, in fact, <M8[ns] is a type of datetime.

In [62]:
daily.index.dtype

dtype('<M8[ns]')

In [70]:
for i in daily.index:
    daily.loc[i, 'value_delta'] = mcconnell[str(i)[:10]].sum()

In [71]:
daily.head(20)

Unnamed: 0,value_delta
2014-01-02,0.0
2014-01-03,-337.62
2014-01-04,0.0
2014-01-05,0.0
2014-01-06,-2325.85
2014-01-07,600.22
2014-01-08,2513.41
2014-01-09,4276.55
2014-01-10,2776.01
2014-01-11,0.0


In [72]:
len(daily)

3117

In [73]:
daily.tail()

Unnamed: 0,value_delta
2022-07-11,200982.99
2022-07-12,196119.0
2022-07-13,182551.16
2022-07-14,174103.25
2022-07-15,235286.59
