This is for the QuiverQuant data set

In [2]:
#packages

import pandas as pd
from pandas.tseries.offsets import BDay
import yfinance as yf
from tqdm import tqdm

In [3]:
#Import Data
trade_df = pd.read_excel('../data/quiverquant_congress_trading_all.xlsx')

In [4]:
trade_df['Traded'] = pd.to_datetime(trade_df['Traded'])

In [6]:
import pandas as pd
import yfinance as yf
from pandas.tseries.offsets import BDay

def get_adjusted_closing_price(trade_df, traded_date_col, ticker_col, offset_days=0, new_col_name="Adjusted_Closing_Price"):
    """
    Retrieves the closing price on a specified traded date with an optional offset in days.
    Adjusts to the nearest trading day if necessary, using yfinance to pull historical data.
    
    Parameters:
    - trade_df (pd.DataFrame): DataFrame with trade information.
    - traded_date_col (str): Name of the column in trade_df that contains the traded date.
    - ticker_col (str): Name of the column in trade_df that contains the ticker symbol.
    - offset_days (int): Number of days after the traded date to pull data for (can be negative).
    - new_col_name (str): Name of the new column to store the result in trade_df.
    
    Returns:
    - pd.DataFrame: trade_df with a new column containing the adjusted closing price.
    """
    
    # Ensure 'traded_date' column is in datetime format
    trade_df[traded_date_col] = pd.to_datetime(trade_df[traded_date_col])
    
    # Initialize new column in trade_df
    trade_df[new_col_name] = None
    
    # Cache for already pulled tickers to avoid redundant API calls
    ticker_data_cache = {}
    
    # Iterate through each row in trade_df with a progress bar
    for idx, row in tqdm(trade_df.iterrows(), total=len(trade_df), desc="Processing trades"):
        traded_date = row[traded_date_col]
        ticker = row[ticker_col]
        
        # Adjust the date by the given offset and snap to the nearest business day
        target_date = (traded_date + BDay(offset_days)).normalize()  # Normalize to ensure consistency
        
        # Check if we've already downloaded data for this ticker
        if ticker not in ticker_data_cache:
            # Download historical data for the ticker using yfinance
            ticker_data = yf.download(ticker, start=traded_date - pd.Timedelta(days=30), end=traded_date + pd.Timedelta(days=30))
            ticker_data_cache[ticker] = ticker_data
        
        # Access the ticker data from cache
        ticker_data = ticker_data_cache[ticker]
        
        # Check if ticker_data is empty
        if ticker_data.empty:
            trade_df.at[idx, new_col_name] = None  # No data available, set to None or a default value
            continue
        
        # Ensure the data has a datetime index and is tz-naive
        ticker_data.index = ticker_data.index.tz_localize(None)
        
        # Check if target_date exists in the price data; otherwise, find the closest available date
        if target_date in ticker_data.index:
            closing_price = ticker_data.loc[target_date, 'Close']
        else:
            # If target_date is not available, find the closest available date
            time_diffs = abs((ticker_data.index - target_date).days)  # Get absolute day differences
            closest_date = ticker_data.index[time_diffs.argmin()] if not time_diffs.empty else None
            closing_price = ticker_data.loc[closest_date, 'Close'] if closest_date is not None else None
        
        # Set the adjusted closing price in the new column
        trade_df.at[idx, new_col_name] = closing_price
    
    return trade_df

# Example usage
# Assuming you have trade_df with 'Traded' and 'Ticker' columns
# trade_df = pd.DataFrame(...)  # Your dataset

# trade_df = get_adjusted_closing_price(trade_df, traded_date_col='Traded', ticker_col='Ticker', offset_days=1, new_col_name='Next_Day_Closing_Price')


In [7]:
trade_df = get_adjusted_closing_price(trade_df,'Traded', 'Ticker',offset_days=0,new_col_name='price')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed/s]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1

In [8]:
# Drop rows where 'price' is NaN or None
trade_df = trade_df.dropna(subset=['price']).reset_index(drop=True)


In [9]:
trade_df.head()

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,Filed,Party,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,2024-11-01,R,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,Ticker NVDA 135.399994 Name: 2024-11-01 00:...
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,2024-10-29,R,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,"Ticker NGL 4.2 Name: 2024-10-24 00:00:00, d..."
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,Ticker HD 406.399994 Name: 2024-10-21 00:00...
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,13.3345,Georgia,2024-10-22,Ticker TSLA 218.850006 Name: 2024-10-21 00:...
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,2024-10-22,R,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,Ticker V 286.850006 Name: 2024-10-21 00:00:...


In [10]:
trade_df = get_adjusted_closing_price(trade_df,'Traded', 'Ticker',offset_days=365,new_col_name='price_plus_365')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completeds]
[*********************100%***********************]  1 of 1 completed/s]
[*********************100%***********************]  1 of 1 completed/s]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed/s]
[**********

In [None]:
# Store trade df
trade_df.to_excel('../data/qq_trade_df.xlsx',index=False)

In [63]:
# Recover trade df
trade_df = pd.read_excel('../data/qq_trade_df.xlsx')

In [77]:
def extract_price(value):
    # Try extracting the second part, assuming it’s a string that includes other text
    try:
        return float(value.split()[2])
    except (IndexError, ValueError):
        return None  # Use np.nan if preferred

In [78]:
trade_df['price'] = trade_df['price'].apply(extract_price)

In [80]:
trade_df['price_plus_365'] = trade_df['price_plus_365'].apply(extract_price)



In [81]:
trade_df['price_plus_365'] 

0        147.452301
1          4.150000
2        404.345001
3        317.140015
4        310.769989
            ...    
41450    545.380005
41451     76.379997
41452    168.059998
41453    371.170013
41454    168.059998
Name: price_plus_365, Length: 41455, dtype: float64

In [82]:
trade_df['price_change']=trade_df['price_plus_365']-trade_df['price']

In [83]:
trade_df

Unnamed: 0,Ticker,TickerType,Company,Traded,Transaction,Trade_Size_USD,Status,Subholding,Description,Name,...,District,Chamber,Comments,Quiver_Upload_Time,excess_return,State,last_modified,price,price_plus_365,price_change
0,NVDA,ST,NVIDIA CORPORATION - COMMON STOCK,2024-11-01,Purchase,"$1,001 - $15,000",NEW,IRA ONE,,Pete Sessions,...,TX17,House,,2024-11-04,0.697227,Texas,2024-11-04,135.399994,147.452301,12.052307
1,NGL,ST,NGL ENERGY PARTNERS LP COMMON UNITS REPRESENTI...,2024-10-24,Sale,"$50,001 - $100,000",NEW,EQUITABLE ADVISORS INVESTMENT ACCOUNT,,Mark Dr Green,...,TN07,House,,2024-10-30,-3.609834,Tennessee,2024-10-30,4.200000,4.150000,-0.050000
2,HD,ST,"HOME DEPOT, INC.",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,-0.292208,Georgia,2024-10-22,406.399994,404.345001,-2.054993
3,TSLA,ST,"TESLA, INC. - COMMON STOCK",2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,13.334500,Georgia,2024-10-22,218.850006,317.140015,98.290009
4,V,ST,VISA INC.,2024-10-21,Purchase,"$1,001 - $15,000",NEW,,,Marjorie Taylor Mrs Greene,...,GA14,House,,2024-10-22,4.115725,Georgia,2024-10-22,286.850006,310.769989,23.919983
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41450,LMT,,LOCKHEED MARTIN CORPORATION,2012-11-13,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,IL08,House,,2020-07-26,187.052900,Illinois,2023-11-16,545.380005,545.380005,0.000000
41451,DD,Stock,E. I. du Pont de Nemours and Company (NYSE),2012-09-13,Purchase,"$1,001 - $15,000",New,,,"Carper, Thomas R.",...,,Senate,,2020-07-26,-336.952251,Delaware,2023-08-28,76.379997,76.379997,0.000000
41452,PG,,PROCTER & GAMBLE COMPANY,2012-08-16,Sale,"$1,001 - $15,000",NEW,BROKERAGE #1 - EDJ 2718,,Ms. Tammy Duckworth,...,IL08,House,,2020-07-26,-156.969402,Illinois,2023-11-16,168.059998,168.059998,0.000000
41453,CAT,,"CATERPILLAR, INC.",2012-07-26,Purchase,"$1,001 - $15,000",NEW,BROKERAGE #2 USAA 8425,,Ms. Tammy Duckworth,...,IL08,House,,2020-07-26,31.417039,Illinois,2023-11-16,371.170013,371.170013,0.000000


In [84]:
# Store trade df
trade_df.to_excel('../data/qq_trade_df.xlsx',index=False)