This is for the QuiverQuant data set

In [203]:
#packages

import pandas as pd
import numpy as np
from pandas.tseries.offsets import BDay
import yfinance as yf
from tqdm import tqdm

In [244]:
#Import Data
trade_df = pd.read_excel('../data/quiverquant_congress_trading_all.xlsx')

In [245]:
trade_df['Traded'] = pd.to_datetime(trade_df['Traded'])

In [246]:
trade_df

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,BioGuideID,Filed,Party,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,S000250,2024-11-01,R,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,G000590,2024-10-29,R,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30
2,BRK.B,ST,BERKSHIRE HATHAWAY INC. NEW COMMON STOCK,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,G000596,2024-10-22,R,GA14,House,,2024-10-22,-2.339083,Georgia,2024-10-22
3,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,G000596,2024-10-22,R,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22
4,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,G000596,2024-10-22,R,GA14,House,,2024-10-22,13.334500,Georgia,2024-10-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48314,CAT,,"CATERPILLAR, INC.",2012-07-26,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,D000622,2014-07-08,D,IL08,House,,2020-07-26,31.417039,Illinois,2023-11-16
48315,PG,,PROCTER & GAMBLE COMPANY,2012-07-24,Sale,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,D000622,2014-07-08,D,IL08,House,,2020-07-26,-169.316461,Illinois,2023-11-16
48316,KSU,,KANSAS CITY SOUTHERN,2012-06-06,Purchase,"$1,001 - $15,000",NEW,,,Mr. Alan S. Lowenthal,L000579,2014-05-15,D,CA47,House,,2020-07-26,89.238230,California,2023-11-16
48317,EP$C,,EL PASO CORPORATION PREFERRED STOCK,2012-03-20,Sale,"$1,001 - $15,000",NEW,,,Mr. Alan S. Lowenthal,L000579,2014-05-15,D,CA47,House,,2020-07-26,,California,2023-11-16


In [273]:
# trade_df_backup = trade_df
trade_df = trade_df_backup

In [274]:
# Clean invalid tickers
trade_df = trade_df[trade_df['Ticker'].str.match(r'^[A-Za-z]+$')]

# Reset index if necessary
trade_df.reset_index(drop=True, inplace=True)

In [276]:
def get_adjusted_closing_price(
    trade_df, 
    traded_date_col, 
    ticker_col, 
    offset_days=0, 
    new_col_name="Adjusted_Closing_Price"
):
    """
    Retrieves the closing price on a specified traded date with an optional offset in days.
    Adjusts to the nearest trading day if necessary, using yfinance to pull historical data.

    Parameters:
    - trade_df (pd.DataFrame): DataFrame with trade information.
    - traded_date_col (str): Name of the column in trade_df that contains the traded date.
    - ticker_col (str): Name of the column in trade_df that contains the ticker symbol.
    - offset_days (int): Number of days after the traded date to pull data for (can be negative).
    - new_col_name (str): Name of the new column to store the result in trade_df.

    Returns:
    - pd.DataFrame: trade_df with a new column containing the adjusted closing price.
    """

    # Ensure 'traded_date' column is in datetime format
    trade_df[traded_date_col] = pd.to_datetime(trade_df[traded_date_col])

    # Adjust the date by the given offset and snap to the nearest business day
    trade_df['target_date'] = trade_df[traded_date_col] + BDay(offset_days)
    trade_df['target_date'] = pd.to_datetime(trade_df['target_date'])
    trade_df['target_date'] = trade_df['target_date'].dt.tz_localize(None)

    # Initialize new column in trade_df with np.nan
    trade_df[new_col_name] = np.nan

    # Group by ticker to minimize API calls
    grouped = trade_df.groupby(ticker_col)

    for ticker, group in tqdm(grouped, desc="Processing tickers"):
        # Get all unique target dates for the ticker
        dates_needed = group['target_date'].unique()

        # Define start and end dates for historical data download
        start_date = min(dates_needed) - pd.Timedelta(days=5)
        end_date = max(dates_needed) + pd.Timedelta(days=5)

        # Download historical data for the ticker
        try:
            ticker_data = yf.download(
                ticker, 
                start=start_date.strftime('%Y-%m-%d'), 
                end=(end_date + pd.Timedelta(days=1)).strftime('%Y-%m-%d'),
                progress=False
            )
        except Exception as e:
            print(f"Failed to download data for ticker '{ticker}': {e}")
            trade_df.loc[group.index, new_col_name] = np.nan
            continue

        # Check if ticker_data is empty
        if ticker_data.empty:
            trade_df.loc[group.index, new_col_name] = np.nan
            continue

        # Prepare the historical data
        ticker_data = ticker_data.reset_index()
        ticker_data['Date'] = pd.to_datetime(ticker_data['Date'])
        ticker_data['Date'] = ticker_data['Date'].dt.tz_localize(None)
        ticker_data = ticker_data.set_index('Date')

        # Ensure the index is sorted
        ticker_data.sort_index(inplace=True)

        # Map the closing prices to the target dates
        def get_closing_price(date):
            # Ensure date is timezone-naive
            date = date.tz_localize(None)

            if date in ticker_data.index:
                return float(ticker_data.loc[date, 'Close'])
            else:
                # Find the previous available trading day
                previous_dates = ticker_data.index[ticker_data.index < date]
                if len(previous_dates) > 0:
                    closest_date = previous_dates[-1]
                    return float(ticker_data.loc[closest_date, 'Close'])
                else:
                    return np.nan  # np.nan is a float

        # Apply the function to each row in the group using .map()
        trade_df.loc[group.index, new_col_name] = group['target_date'].map(get_closing_price)

    # Drop the temporary 'target_date' column
    trade_df.drop(columns=['target_date'], inplace=True)

    # Ensure the 'price' column is of type float
    trade_df[new_col_name] = trade_df[new_col_name].astype(float)

    return trade_df


In [None]:
trade_df = get_adjusted_closing_price(trade_df,traded_date_col='Traded', ticker_col='Ticker',offset_days=0,new_col_name='price')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trade_df[traded_date_col] = pd.to_datetime(trade_df[traded_date_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trade_df['target_date'] = trade_df[traded_date_col] + BDay(offset_days)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trade_df['target_date'] = pd.to_datetime(trade_df['target_date'

In [280]:
trade_df_test

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,Filed,Party,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,2024-11-01,R,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,135.399994
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,2024-10-29,R,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,4.200000
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,406.399994
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,13.334500,Georgia,2024-10-22,218.850006
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,286.850006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47720,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,2015-05-13,D,,Senate,,2020-07-26,-336.952251,Delaware,2023-08-28,44.733368
47721,PG,,PROCTER & GAMBLE COMPANY,2012-08-16,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,2014-07-08,D,IL08,House,,2020-07-26,-156.969402,Illinois,2023-11-16,67.000000
47722,CAT,,"CATERPILLAR, INC.",2012-07-26,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,2014-07-08,D,IL08,House,,2020-07-26,31.417039,Illinois,2023-11-16,83.300003
47723,PG,,PROCTER & GAMBLE COMPANY,2012-07-24,Sale,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,2014-07-08,D,IL08,House,,2020-07-26,-169.316461,Illinois,2023-11-16,64.019997


In [281]:
trade_df = trade_df_test

In [8]:
# Drop rows where 'price' is NaN or None
trade_df = trade_df.dropna(subset=['price']).reset_index(drop=True)


In [9]:
trade_df.head()

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,Filed,Party,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,2024-11-01,R,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,Ticker NVDA 135.399994 Name: 2024-11-01 00:...
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,2024-10-29,R,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,"Ticker NGL 4.2 Name: 2024-10-24 00:00:00, d..."
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,Ticker HD 406.399994 Name: 2024-10-21 00:00...
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,13.3345,Georgia,2024-10-22,Ticker TSLA 218.850006 Name: 2024-10-21 00:...
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,Ticker V 286.850006 Name: 2024-10-21 00:00:...


In [283]:
trade_df = get_adjusted_closing_price(trade_df,'Traded', 'Ticker',offset_days=365,new_col_name='price_plus_365')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trade_df[traded_date_col] = pd.to_datetime(trade_df[traded_date_col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trade_df['target_date'] = trade_df[traded_date_col] + BDay(offset_days)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trade_df['target_date'] = pd.to_datetime(trade_df['target_date'

In [285]:
trade_df_backup = trade_df

In [286]:
# Drop rows where 'price' is NaN or None
trade_df = trade_df.dropna(subset=['price']).reset_index(drop=True)

In [287]:
trade_df

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,Party,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price,price_plus_365
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,R,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,135.399994,138.250000
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,R,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,4.200000,4.820000
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,R,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,406.399994,429.130005
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,R,GA14,House,,2024-10-22,13.334500,Georgia,2024-10-22,218.850006,345.160004
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,R,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,286.850006,315.079987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41312,LMT,,LOCKHEED MARTIN CORPORATION,2012-11-13,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,D,IL08,House,,2020-07-26,187.052900,Illinois,2023-11-16,90.180000,156.649994
41313,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,D,,Senate,,2020-07-26,-336.952251,Delaware,2023-08-28,44.733368,64.886871
41314,PG,,PROCTER & GAMBLE COMPANY,2012-08-16,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,D,IL08,House,,2020-07-26,-156.969402,Illinois,2023-11-16,67.000000,80.419998
41315,CAT,,"CATERPILLAR, INC.",2012-07-26,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,D,IL08,House,,2020-07-26,31.417039,Illinois,2023-11-16,83.300003,87.540001


In [288]:
# Store trade df
trade_df.to_excel('../data/qq_trade_df.xlsx',index=False)

In [63]:
# Recover trade df
trade_df = pd.read_excel('../data/qq_trade_df.xlsx')

In [289]:
trade_df['price_plus_365'] 

0        138.250000
1          4.820000
2        429.130005
3        345.160004
4        315.079987
            ...    
41312    156.649994
41313     64.886871
41314     80.419998
41315     87.540001
41316     80.910004
Name: price_plus_365, Length: 41317, dtype: float64

In [290]:
trade_df['price_change']=trade_df['price_plus_365']-trade_df['price']

In [291]:
trade_df

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price,price_plus_365,price_change
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,135.399994,138.250000,2.850006
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,4.200000,4.820000,0.620000
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,406.399994,429.130005,22.730011
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,13.334500,Georgia,2024-10-22,218.850006,345.160004,126.309998
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,286.850006,315.079987,28.229980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41312,LMT,,LOCKHEED MARTIN CORPORATION,2012-11-13,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,IL08,House,,2020-07-26,187.052900,Illinois,2023-11-16,90.180000,156.649994,66.469994
41313,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,,Senate,,2020-07-26,-336.952251,Delaware,2023-08-28,44.733368,64.886871,20.153503
41314,PG,,PROCTER & GAMBLE COMPANY,2012-08-16,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,IL08,House,,2020-07-26,-156.969402,Illinois,2023-11-16,67.000000,80.419998,13.419998
41315,CAT,,"CATERPILLAR, INC.",2012-07-26,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,IL08,House,,2020-07-26,31.417039,Illinois,2023-11-16,83.300003,87.540001,4.239998


In [292]:
# Store trade df
trade_df.to_excel('../data/qq_trade_df.xlsx',index=False)

In [4]:
# Restore trade df
trade_df = pd.read_excel('../data/qq_trade_df.xlsx')
trade_df.head()

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price,price_plus_365,price_change
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,135.399994,147.452301,12.052307
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,4.2,4.15,-0.05
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,406.399994,404.345001,-2.054993
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,13.3345,Georgia,2024-10-22,218.850006,317.140015,98.290009
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,286.850006,310.769989,23.919983


In [293]:
ticker_df = pd.read_csv('../data/ticker_sector_data.csv')
ticker_df.head()

Unnamed: 0,Symbol,Name,Country,IPO Year,Volume,Sector,Industry
0,A,Agilent Technologies Inc. Common Stock,United States,1999.0,950947,Industrials,Biotechnology: Laboratory Analytical Instruments
1,AA,Alcoa Corporation Common Stock,United States,2016.0,1785157,Industrials,Aluminum
2,AACG,ATA Creativity Global American Depositary Shares,China,2008.0,24438,Real Estate,Other Consumer Services
3,AACT,Ares Acquisition Corporation II Class A Ordina...,,2023.0,95056,Finance,Blank Checks
4,AADI,Aadi Bioscience Inc. Common Stock,United States,,44392,Health Care,Biotechnology: Pharmaceutical Preparations


In [294]:
merged_trades = pd.merge(trade_df, ticker_df[['Symbol','Sector','Industry']], left_on='Ticker', right_on='Symbol', how='left')
merged_trades

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,Quiver_Upload_Time,excess_return,State,last_modified,price,price_plus_365,price_change,Symbol,Sector,Industry
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,2024-11-04,0.697227,Texas,2024-11-04,135.399994,138.250000,2.850006,NVDA,Technology,Semiconductors
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,2024-10-30,-3.609834,Tennessee,2024-10-30,4.200000,4.820000,0.620000,NGL,Utilities,Natural Gas Distribution
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,-0.292208,Georgia,2024-10-22,406.399994,429.130005,22.730011,HD,Consumer Discretionary,RETAIL: Building Materials
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,13.334500,Georgia,2024-10-22,218.850006,345.160004,126.309998,TSLA,Consumer Discretionary,Auto Manufacturing
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,4.115725,Georgia,2024-10-22,286.850006,315.079987,28.229980,V,Consumer Discretionary,Business Services
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41312,LMT,,LOCKHEED MARTIN CORPORATION,2012-11-13,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,2020-07-26,187.052900,Illinois,2023-11-16,90.180000,156.649994,66.469994,LMT,Industrials,Military/Government/Technical
41313,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,2020-07-26,-336.952251,Delaware,2023-08-28,44.733368,64.886871,20.153503,DD,Industrials,Major Chemicals
41314,PG,,PROCTER & GAMBLE COMPANY,2012-08-16,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,2020-07-26,-156.969402,Illinois,2023-11-16,67.000000,80.419998,13.419998,PG,Consumer Discretionary,Package Goods/Cosmetics
41315,CAT,,"CATERPILLAR, INC.",2012-07-26,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,2020-07-26,31.417039,Illinois,2023-11-16,83.300003,87.540001,4.239998,CAT,Industrials,Construction/Ag Equipment/Trucks


In [295]:
# Download data
start_date = '2012-01-01'
end_date = '2024-11-28'
sp500_data = yf.download('^GSPC', start=start_date, end=end_date)
sp500_data.columns = ['_'.join(col) for col in sp500_data.columns]
sp500_data.reset_index(inplace=True)

# Rename columns
sp500_data.columns = [
    'Date',
    'Adj Close',
    'Close',
    'High',
    'Low',
    'Open',
    'Volume'
    ]

# Ensure 'Date' is of datetime type
sp500_data['Date'] = pd.to_datetime(sp500_data['Date'])

# Create 'Date_Plus_365' by adding 365 days
sp500_data['Date_Plus_365'] = sp500_data['Date'] + pd.Timedelta(days=365)

# Sort dataframes by 'Date'
sp500_data.sort_values('Date', inplace=True)

# Prepare future data
future_data = sp500_data[['Date', 'Close']]
future_data.rename(columns={'Date': 'Future_Date', 'Close': 'Future_Close'}, inplace=True)

# Merge using merge_asof with suffixes to prevent overlapping column names
merged_data = pd.merge_asof(
    sp500_data,
    future_data,
    left_on='Date_Plus_365',
    right_on='Future_Date',
    direction='forward',
    suffixes=('', '_future')  # This prevents multi-level columns
)

# Drop rows without future data
merged_data.dropna(inplace=True)

# Calculate percentage change
merged_data['S&P500 Change (%)'] = ((merged_data['Future_Close'] - merged_data['Close']) / merged_data['Close']) * 100

# Prepare results DataFrame by selecting only necessary columns
sp500_df = merged_data[['Date', 'Date_Plus_365', 'S&P500 Change (%)']].copy()
sp500_df['Date_Plus_365'] = merged_data['Future_Date'] # Use 'Future_Date' after merge
sp500_df['S&P500 Change (%)'] = sp500_df['S&P500 Change (%)'].round(2)

# Display results
print(sp500_df)

[*********************100%***********************]  1 of 1 completed

                          Date             Date_Plus_365  S&P500 Change (%)
0    2012-01-03 00:00:00+00:00 2013-01-02 00:00:00+00:00              14.51
1    2012-01-04 00:00:00+00:00 2013-01-03 00:00:00+00:00              14.25
2    2012-01-05 00:00:00+00:00 2013-01-04 00:00:00+00:00              14.47
3    2012-01-06 00:00:00+00:00 2013-01-07 00:00:00+00:00              14.41
4    2012-01-09 00:00:00+00:00 2013-01-08 00:00:00+00:00              13.78
...                        ...                       ...                ...
2991 2023-11-21 00:00:00+00:00 2024-11-20 00:00:00+00:00              30.38
2992 2023-11-22 00:00:00+00:00 2024-11-21 00:00:00+00:00              30.55
2993 2023-11-24 00:00:00+00:00 2024-11-25 00:00:00+00:00              31.32
2994 2023-11-27 00:00:00+00:00 2024-11-26 00:00:00+00:00              32.33
2995 2023-11-28 00:00:00+00:00 2024-11-27 00:00:00+00:00              31.70

[2996 rows x 3 columns]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  future_data.rename(columns={'Date': 'Future_Date', 'Close': 'Future_Close'}, inplace=True)


In [296]:
sp500_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2996 entries, 0 to 2995
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   Date               2996 non-null   datetime64[ns, UTC]
 1   Date_Plus_365      2996 non-null   datetime64[ns, UTC]
 2   S&P500 Change (%)  2996 non-null   float64            
dtypes: datetime64[ns, UTC](2), float64(1)
memory usage: 93.6 KB


In [297]:
trades_sub = merged_trades
trades_sub['Traded'] = pd.to_datetime(trades_sub['Traded'])
trades_sub['Traded'] = pd.to_datetime(trades_sub['Traded'], utc=True)
merged_trades = pd.merge(merged_trades, sp500_df, left_on='Traded', right_on='Date', how='left')
merged_trades

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,last_modified,price,price_plus_365,price_change,Symbol,Sector,Industry,Date,Date_Plus_365,S&P500 Change (%)
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,2024-11-04,135.399994,138.250000,2.850006,NVDA,Technology,Semiconductors,NaT,NaT,
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24 00:00:00+00:00,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,2024-10-30,4.200000,4.820000,0.620000,NGL,Utilities,Natural Gas Distribution,NaT,NaT,
2,HD,ST,"HOME DEPOT, INC.",2024-10-21 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,406.399994,429.130005,22.730011,HD,Consumer Discretionary,RETAIL: Building Materials,NaT,NaT,
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,218.850006,345.160004,126.309998,TSLA,Consumer Discretionary,Auto Manufacturing,NaT,NaT,
4,V,ST,VISA INC.,2024-10-21 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,286.850006,315.079987,28.229980,V,Consumer Discretionary,Business Services,NaT,NaT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41312,LMT,,LOCKHEED MARTIN CORPORATION,2012-11-13 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,2023-11-16,90.180000,156.649994,66.469994,LMT,Industrials,Military/Government/Technical,2012-11-13 00:00:00+00:00,2013-11-13 00:00:00+00:00,29.64
41313,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13 00:00:00+00:00,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,2023-08-28,44.733368,64.886871,20.153503,DD,Industrials,Major Chemicals,2012-09-13 00:00:00+00:00,2013-09-13 00:00:00+00:00,15.62
41314,PG,,PROCTER & GAMBLE COMPANY,2012-08-16 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,2023-11-16,67.000000,80.419998,13.419998,PG,Consumer Discretionary,Package Goods/Cosmetics,2012-08-16 00:00:00+00:00,2013-08-16 00:00:00+00:00,16.98
41315,CAT,,"CATERPILLAR, INC.",2012-07-26 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,2023-11-16,83.300003,87.540001,4.239998,CAT,Industrials,Construction/Ag Equipment/Trucks,2012-07-26 00:00:00+00:00,2013-07-26 00:00:00+00:00,24.38


In [298]:
merged_trades['price_change_pct'] = (merged_trades['price_plus_365'] - merged_trades['price']) / merged_trades['price']*100
merged_trades['365Day_excess_return'] = merged_trades['price_change_pct'] - merged_trades['S&P500 Change (%)']

In [299]:
merged_trades.head()

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,price_plus_365,price_change,Symbol,Sector,Industry,Date,Date_Plus_365,S&P500 Change (%),price_change_pct,365Day_excess_return
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,138.25,2.850006,NVDA,Technology,Semiconductors,NaT,NaT,,2.104879,
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24 00:00:00+00:00,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,4.82,0.62,NGL,Utilities,Natural Gas Distribution,NaT,NaT,,14.761914,
2,HD,ST,"HOME DEPOT, INC.",2024-10-21 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,429.130005,22.730011,HD,Consumer Discretionary,RETAIL: Building Materials,NaT,NaT,,5.593015,
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,345.160004,126.309998,TSLA,Consumer Discretionary,Auto Manufacturing,NaT,NaT,,57.715327,
4,V,ST,VISA INC.,2024-10-21 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,315.079987,28.22998,V,Consumer Discretionary,Business Services,NaT,NaT,,9.841373,


In [300]:
output_df = merged_trades
output_df = output_df.dropna(subset=['365Day_excess_return'])
output_df

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,price_plus_365,price_change,Symbol,Sector,Industry,Date,Date_Plus_365,S&P500 Change (%),price_change_pct,365Day_excess_return
2313,CORP,ST,PIMCO INVESTMENT GRADE CORPORATE BOND INDEX EX...,2023-11-28 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,UNITED IRA,11/28/23 SOLD 383.695 SHARES OF PIMCO INVESTME...,Carol Devine Miller,...,97.629997,3.919998,,,,2023-11-28 00:00:00+00:00,2024-11-27 00:00:00+00:00,31.70,4.183116,-27.516884
2314,TMO,ST,THERMO FISHER SCIENTIFIC INC,2023-11-28 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,MORGAN STANLEY - SELECT UMA ACCOUNT # 1,,Josh Gottheimer,...,529.630005,43.709991,TMO,Industrials,Industrial Machinery/Components,2023-11-28 00:00:00+00:00,2024-11-27 00:00:00+00:00,31.70,8.995306,-22.704694
2315,CRT,ST,CROSS TIMBERS ROYALTY TRUST,2023-11-28 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,,,Virginia Foxx,...,10.940000,-9.160001,CRT,Energy,Oil & Gas Production,2023-11-28 00:00:00+00:00,2024-11-27 00:00:00+00:00,31.70,-45.572142,-77.272142
2316,HCA,ST,"HCA HEALTHCARE, INC.",2023-11-28 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,MORGAN STANLEY - SELECT UMA ACCOUNT # 1,,Josh Gottheimer,...,327.220001,80.570007,HCA,Health Care,Hospital/Nursing Management,2023-11-28 00:00:00+00:00,2024-11-27 00:00:00+00:00,31.70,32.665724,0.965724
2317,PANW,ST,"PALO ALTO NETWORKS, INC.",2023-11-28 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,MORGAN STANLEY - SELECT UMA ACCOUNT # 1,,Josh Gottheimer,...,387.820007,109.990021,PANW,Technology,Computer peripheral equipment,2023-11-28 00:00:00+00:00,2024-11-27 00:00:00+00:00,31.70,39.588967,7.888967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41312,LMT,,LOCKHEED MARTIN CORPORATION,2012-11-13 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,156.649994,66.469994,LMT,Industrials,Military/Government/Technical,2012-11-13 00:00:00+00:00,2013-11-13 00:00:00+00:00,29.64,73.708132,44.068132
41313,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13 00:00:00+00:00,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,64.886871,20.153503,DD,Industrials,Major Chemicals,2012-09-13 00:00:00+00:00,2013-09-13 00:00:00+00:00,15.62,45.052506,29.432506
41314,PG,,PROCTER & GAMBLE COMPANY,2012-08-16 00:00:00+00:00,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,80.419998,13.419998,PG,Consumer Discretionary,Package Goods/Cosmetics,2012-08-16 00:00:00+00:00,2013-08-16 00:00:00+00:00,16.98,20.029848,3.049848
41315,CAT,,"CATERPILLAR, INC.",2012-07-26 00:00:00+00:00,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,87.540001,4.239998,CAT,Industrials,Construction/Ag Equipment/Trucks,2012-07-26 00:00:00+00:00,2013-07-26 00:00:00+00:00,24.38,5.090033,-19.289967


In [308]:
# remove timezones
# output_df['Traded'] = output_df['Traded'].dt.tz_localize()
# output_df['Traded'] = output_df['Traded'].dt.date
output_df['Date'] = output_df['Date'].dt.date
output_df['Date_Plus_365'] = output_df['Date_Plus_365'].dt.date
# Store output df
output_df.to_excel('../data/qq_trade_df.xlsx',index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['Date'] = output_df['Date'].dt.date
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['Date_Plus_365'] = output_df['Date_Plus_365'].dt.date


In [311]:
def get_congress_number(date):
    start_year = 1789  # Start year of the 1st Congress
    congress_number = ((date.year - start_year) // 2) + 1
    if date.month == 1 and date.day < 3:  # Adjust if before January 3
        congress_number -= 1
    return congress_number

output_df['congress']=output_df['Traded'].apply(get_congress_number)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df['congress']=output_df['Traded'].apply(get_congress_number)


In [313]:
output_df.to_excel('../data/qq_trade_df.xlsx',index=False)


In [None]:
output_df