In [1]:
import os
import pandas as pd
import numpy as np
import yfinance as yf
import gspread

SPREADSHEET_KEY=os.environ['SPREADSHEET_KEY']
ROBINHOOD_BROKERAGE_FILE_PATH='/Users/jaredyu/Desktop/finances/finance_tracker_app/data/investing_statements/robinhood/brokerage'
ROBINHOOD_TRADITIONAL_IRA_FILE_PATH='/Users/jaredyu/Desktop/finances/finance_tracker_app/data/investing_statements/robinhood/traditional_ira/'

In [2]:
# need to open with Numbers, remove the hanging cell, and then export as CSV with utf-8 encoding
brokerage_df = pd.read_csv(
    os.path.join(
        ROBINHOOD_BROKERAGE_FILE_PATH,
        '20201201_20240526.csv'
    )
)
traditional_ira_df = pd.read_csv(
    os.path.join(
        ROBINHOOD_TRADITIONAL_IRA_FILE_PATH,
        '20230116_20240526.csv'
    )
)

In [3]:
# get latest prices for all instruments
df_list = [
    brokerage_df,
    traditional_ira_df,
]
instrument_list = []
for df in df_list:
    instrument_list.extend(list(set(df['Instrument'])))
instrument_list = [i for i in instrument_list if str(i) != 'nan']
instrument_list = list(set(instrument_list))

latest_bid_list = []
for symbol in instrument_list:
    latest_bid = yf.Ticker(symbol).info['bid']
    latest_bid_list.append(latest_bid)
instrument_df = pd.DataFrame(list(zip(instrument_list, latest_bid_list)))
instrument_df.columns = ['SYMBOL', 'PRICE']

In [4]:
def clean_price_col(df):
    df = df.copy()
    df['Price'] = df['Price'].fillna('')
    df['Price'] = df['Price'].apply(lambda x: x.replace('$', '').replace(',', ''))
    df.loc[df['Price'] == '', 'Price'] = np.NaN
    return df
    
def dollar_string_to_float(df, col):
    """
    Convert the $100.00 string to a float.
    """
    return df[col] \
        .replace('[\$,]', '', regex=True) \
        .replace('\(', '-', regex=True) \
        .replace('\)', '', regex=True) \
        .astype(float)

def clean_brokerage_account_table(df):
    brokerage_df = df.copy(deep=True)
    df['Amount'] = dollar_string_to_float(
        df=df,
        col='Amount'
    )

    # calculate net deposits (ACH transfers)
    ach_df = df.loc[
        (df['Description'].str.contains('ACH')) &
        (df['Trans Code'] == 'ACH'),
    ].copy(deep=True)
    net_deposits = round(ach_df['Amount'].sum(), 2)

    # calculate the net interest payments
    interest_payments_df = df.loc[
        (df['Description'].str.contains('Interest Payment')) &
        (df['Trans Code'].str.contains('INT'))
    ].copy(deep=True)
    net_interest_payments = round(interest_payments_df['Amount'].sum(), 2)

    # Robinhood Gold fee
    gold_fee = df.loc[df['Description'].str.contains('Gold Fee'), 'Amount'].sum()

    # calculate the stock lending
    total_stock_lending = df.loc[
        (df['Description'] == 'Stock Lending') &
        (df['Trans Code'] == 'SLIP'),
        'Amount'
    ].sum().round(2)

    # get the stock purchases/sales -> returns
    stock_purchases = df.loc[
        ~(df['Description'].str.contains('Dividend')) &
        ~(df['Description'].str.contains('Option')) &
        (df['Trans Code'] == 'Buy'),
        'Amount'
    ].sum().round(2)
    stock_sales = df.loc[
        ~(df['Description'].str.contains('Dividend')) &
        ~(df['Description'].str.contains('Option')) &
        (df['Trans Code'] == 'Sell'),
        'Amount'
    ].sum().round(2)
    investment_returns = round(stock_sales + stock_purchases, 2)

    # dividend payments
    total_reinvested_dividends = df.loc[df['Description'].str.contains('Dividend Reinvestment'), 'Amount'].sum().round(2)
    total_dividends = df.loc[df['Trans Code'].str.contains('DIV'), 'Amount'].sum().round(2)
    net_dividend_payments = total_dividends + total_reinvested_dividends

    # subset the options data
    options_df = df.loc[
        df['Description'] \
            .str \
            .contains(
                '|'.join(
                    [
                        'Call',
                        'Put',
                        'Option'
                    ]
                )
            )   
    ].copy(deep=True)

    # sort the data from earliest to latest
    options_df['Datetime'] = pd.to_datetime(options_df['Process Date'])
    options_df.sort_values(by='Datetime', ascending=True, inplace=True)
    options_df.reset_index(drop=True, inplace=True)

    # clean the description
    options_df['description_split'] = options_df['Description'].str.split('\n')
    options_df['description_split_length'] = options_df['description_split'].apply(lambda x: len(x))

    option_assigned_df = options_df.loc[
        options_df['description_split_length'] == 3,
        'Description'
    ].str.split('\n').apply(lambda x: x[2])
    option_assigned_list = option_assigned_df.values.tolist()

    # get the total options return
    options_df.loc[
        option_assigned_df.index.tolist(),
        'Description'
    ] = option_assigned_list
    options_return = options_df['Amount'].sum().round(2)

    # clean the activity column
    options_trans_code_mapping_dict = {
        'STO': 'Sell Premium',
        'OEXP': 'Option Expired',
        'OASGN': 'Option Assigned',
        'Buy': 'Buy',
        'Sell': 'Sell',
    }
    options_df['Trans Code'] = options_df['Trans Code'].map(options_trans_code_mapping_dict)
    options_df = options_df[['Process Date', 'Instrument', 'Description', 'Trans Code', 'Quantity', 'Price', 'Amount']].copy(deep=True)
    options_df.rename(columns={'Trans Code': 'Activity'}, inplace=True)

    options_df['Quantity'] = options_df['Quantity'].astype(int)
    total_premium_sold = options_df.loc[options_df['Activity'] == 'Sell Premium', 'Amount'].sum()

    # clean up nulls and save
    # options_df.fillna('', inplace=True)
    options_df = clean_price_col(options_df)
    options_df.replace({np.nan: None}, inplace=True)

    # subset out the options data
    brokerage_df = df.loc[
        ~df['Description'] \
            .str \
            .contains(
                '|'.join(
                    [
                        'Call',
                        'Put',
                        'Option'
                    ]
                )
            )   
    ].copy(deep=True)

    # sort the data from earliest to latest
    brokerage_df['Datetime'] = pd.to_datetime(brokerage_df['Process Date'])
    brokerage_df.sort_values(by='Datetime', ascending=True, inplace=True)
    brokerage_df.reset_index(drop=True, inplace=True)
    brokerage_df.drop('Datetime', axis=1, inplace=True)

    # clean dividend descriptions
    brokerage_df.loc[
        brokerage_df['Description'].str.contains('Cash Div'),
        'Description'
    ] = 'Cash Dividend'
    brokerage_df.loc[
        brokerage_df['Description'].str.contains('Manufactured Div'),
        'Description'
    ] = 'Manufactured Dividend'
    brokerage_df.loc[
        brokerage_df['Description'].str.contains('Dividend Reinvestment'),
        'Description'
    ] = 'Dividend Reinvestment'

    # clean the remaining \nCUSIP descriptions
    cusip_df = brokerage_df.loc[
        brokerage_df['Description'].str.contains('\nCUSIP')
    ]
    cusip_clean_description_list = cusip_df['Description'] \
        .apply(lambda x: x.split('\nCUSIP')) \
        .apply(lambda y: y[0]) \
        .values \
        .tolist()
    brokerage_df.loc[
        cusip_df.index,
        'Description'
    ] = cusip_clean_description_list

    # clean the Trans Code and the other columns
    brokerage_code_mapping_dict = {
        'ACH': 'ACH',
        'CDIV': 'Cash Dividend',
        'MDIV': 'Manufactured Dividend',
        'SLIP': 'Stock Lending',
        'Buy': 'Buy',
        'Sell': 'Sell',
        'GOLD': 'Robinhood Gold',
        'INT': 'Interest Payment',
    }
    brokerage_df['Trans Code'] = brokerage_df['Trans Code'].map(brokerage_code_mapping_dict)
    brokerage_df = brokerage_df[
        ['Process Date', 'Instrument', 'Description', 'Trans Code', 'Quantity', 'Price', 'Amount']
    ].copy(deep=True)
    brokerage_df.rename(columns={'Trans Code': 'Activity'}, inplace=True)

    brokerage_df = clean_price_col(brokerage_df)
    brokerage_df.replace({np.nan: None}, inplace=True)

    return brokerage_df, options_df

In [5]:
def clean_traditional_ira_account_table(df):
    traditional_ira_df = df.copy()
    traditional_ira_df['Amount'] = dollar_string_to_float(
        df=traditional_ira_df,
        col='Amount'
    )

    # clean the Trans Code
    traditional_ira_code_mapping_dict = {
        'ACH': 'ACH',
        'CDIV': 'Cash Dividend',
        'MDIV': 'Manufactured Dividend',
        'SLIP': 'Stock Lending',
        'Buy': 'Buy',
        'Sell': 'Sell',
        'GOLD': 'Robinhood Gold',
        'INT': 'Interest Payment',
        'MTCH': 'IRA Match',
        'DRFRO': 'Direct Rollover',
        'CFIR': 'Contribution',
        'ACATI': 'Asset Transfer',
    }
    traditional_ira_df['Trans Code'] = traditional_ira_df['Trans Code'].map(traditional_ira_code_mapping_dict)

    # clean dividend descriptions
    traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('Cash Div'),
        'Description'
    ] = 'Cash Dividend'
    traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('Manufactured Div'),
        'Description'
    ] = 'Manufactured Dividend'
    traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('Dividend Reinvestment'),
        'Description'
    ] = 'Dividend Reinvestment'

    # clean the remaining \nCUSIP descriptions
    cusip_df = traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('\nCUSIP')
    ]
    cusip_clean_description_list = cusip_df['Description'] \
        .apply(lambda x: x.split('\nCUSIP')) \
        .apply(lambda y: y[0]) \
        .values \
        .tolist()
    traditional_ira_df.loc[
        cusip_df.index,
        'Description'
    ] = cusip_clean_description_list

    # clean the asset transfer descriptions
    traditional_ira_df.loc[
        (traditional_ira_df['Trans Code'].str.contains('Asset')) &
        (traditional_ira_df['Description'].str.contains('ACAT IN')),
        'Description'
    ] = 'Asset Transfer'

    # clean direct rollover
    traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('Direct Rollover Check Received'),
        'Description'
    ] = 'Direct Rollover Check'

    # clean IRA interest description
    traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('Interest on Contribution \(IRA Match\)'),
        'Description'
    ] = 'Robinhood IRA Match'

    # clean IRA contribution description
    traditional_ira_df.loc[
        traditional_ira_df['Description'].str.contains('Current Year Contribution'),
        'Description'
    ] = 'IRA Contribution'

    # sort the data from earliest to latest
    traditional_ira_df['Datetime'] = pd.to_datetime(traditional_ira_df['Process Date'])
    traditional_ira_df.sort_values(by='Datetime', ascending=True, inplace=True)
    traditional_ira_df.reset_index(drop=True, inplace=True)

    # clean the columns
    traditional_ira_df = traditional_ira_df[
        ['Process Date', 'Instrument', 'Description', 'Trans Code', 'Quantity', 'Price', 'Amount']
    ].copy(deep=True)
    traditional_ira_df.rename(columns={'Trans Code': 'Activity'}, inplace=True)

    traditional_ira_df = clean_price_col(traditional_ira_df)
    traditional_ira_df.replace({np.nan: None}, inplace=True)

    return traditional_ira_df

In [6]:
brokerage_df, options_df = clean_brokerage_account_table(brokerage_df)

In [36]:
set(brokerage_df['Activity'])

{'ACH',
 'Buy',
 'Cash Dividend',
 'Interest Payment',
 'Manufactured Dividend',
 'Robinhood Gold',
 'Sell',
 'Stock Lending'}

In [38]:
brokerage_df.head(10)

Unnamed: 0,Process Date,Instrument,Description,Activity,Quantity,Price,Amount
0,1/7/2021,,ACH CANCEL,ACH,,,-600.0
1,1/7/2021,,ACH Deposit,ACH,,,600.0
2,7/8/2021,SPY,SPDR S&P 500 ETF,Buy,2.323825,430.32,-999.99
3,7/8/2021,,ACH Deposit,ACH,,,2000.0
4,7/8/2021,SPY,SPDR S&P 500 ETF,Buy,2.3e-05,430.32,-0.01
5,7/14/2021,QQQ,Invesco QQQ,Buy,2.74104,364.82,-1000.0
6,7/29/2021,SPY,SPDR S&P 500 ETF,Sell,0.323848,441.6,143.01
7,7/29/2021,QQQ,Invesco QQQ,Sell,2.0,367.09,734.17
8,7/29/2021,QQQ,Invesco QQQ,Sell,0.74104,367.1,272.03
9,7/29/2021,VOO,Vanguard S&P 500 ETF,Buy,0.004801,406.09,-1.95


In [7]:
traditional_ira_df = clean_traditional_ira_account_table(traditional_ira_df)

In [96]:
gc = gspread.service_account()
finance_tracker_db_spreadsheet = gc.open_by_key(SPREADSHEET_KEY)

robinhood_brokerage_worksheet = finance_tracker_db_spreadsheet.worksheet('robinhood_brokerage')
robinhood_brokerage_worksheet.update([brokerage_df.columns.values.tolist()] + brokerage_df.values.tolist())
robinhood_brokerage_worksheet.format("A:A", {"numberFormat": {"type": "DATE_TIME"}})

robinhood_options_trading_worksheet = finance_tracker_db_spreadsheet.worksheet('robinhood_options_trading')
robinhood_options_trading_worksheet.update([options_df.columns.values.tolist()] + options_df.values.tolist())
robinhood_options_trading_worksheet.format("A:A", {"numberFormat": {"type": "DATE_TIME"}})

robinhood_traditional_ira_worksheet = finance_tracker_db_spreadsheet.worksheet('robinhood_traditional_ira')
robinhood_traditional_ira_worksheet.update([traditional_ira_df.columns.values.tolist()] + traditional_ira_df.values.tolist())
robinhood_traditional_ira_worksheet.format("A:A", {"numberFormat": {"type": "DATE_TIME"}})

{'spreadsheetId': '1CAyyf2kr-pS7LNX1a_0ithw6niL3Js3K4ZEOlwDViZY',
 'replies': [{}]}

In [24]:
options_df['Amount'].sum()

-38464.73000000026

In [28]:
options_df.tail(20)

Unnamed: 0,Process Date,Instrument,Description,Activity,Quantity,Price,Amount
201,5/13/2024,QQQ,QQQ 5/13/2024 Call $446.00,Sell Premium,1,0.04,3.95
202,5/13/2024,QQQ,Option Expiration for QQQ 5/13/2024 Call $446.00,Option Expired,1,,
203,5/14/2024,QQQ,QQQ 5/14/2024 Call $447.00,Sell Premium,1,0.09,8.95
204,5/14/2024,QQQ,Option Expiration for QQQ 5/14/2024 Call $447.00,Option Expired,1,,
205,5/15/2024,QQQ,QQQ 5/15/2024 Call $452.00,Sell Premium,1,0.16,15.95
206,5/15/2024,QQQ,1 QQQ Option Assigned,Sell,100,452.0,45199.61
207,5/15/2024,QQQ,QQQ 5/15/2024 Call $452.00,Option Assigned,1,,
208,5/16/2024,QQQ,QQQ 5/16/2024 Put $454.00,Option Assigned,1,,
209,5/16/2024,QQQ,QQQ 5/16/2024 Put $454.00,Sell Premium,1,1.01,100.95
210,5/16/2024,QQQ,1 QQQ Option Assigned,Buy,100,454.0,-45400.0


In [21]:
options_df.loc[options_df['Activity'] == 'Sell Premium', 'Amount'].sum()

2988.139999999991

In [34]:
instrument_df

Unnamed: 0,SYMBOL,PRICE
0,VOO,481.46
1,AAPL,191.54
2,QQQ,453.52
3,SPY,523.8


In [11]:
# brokerage_df.head(30)

In [17]:
set(traditional_ira_df['Description'])

{'Asset Transfer',
 'Cash Dividend',
 'Direct Rollover Check',
 'Dividend Reinvestment',
 'IRA Contribution',
 'Invesco QQQ',
 'Robinhood IRA Match',
 'Stock Lending'}

In [16]:
set(traditional_ira_df['Activity'])

{'Asset Transfer',
 'Buy',
 'Cash Dividend',
 'Contribution',
 'Direct Rollover',
 'IRA Match',
 'Stock Lending'}

In [30]:
traditional_ira_df['Amount'].sum()

0.12000000000047549

In [31]:
set(traditional_ira_df['Activity'])

{'Asset Transfer',
 'Buy',
 'Cash Dividend',
 'Contribution',
 'Direct Rollover',
 'IRA Match',
 'Stock Lending'}

In [33]:
traditional_ira_df.loc[traditional_ira_df['Activity'] == 'Buy']

Unnamed: 0,Process Date,Instrument,Description,Activity,Quantity,Price,Amount
1,2/6/2023,QQQ,Invesco QQQ,Buy,0.999967,304.03,-304.02
3,2/6/2023,QQQ,Invesco QQQ,Buy,0.661074,304.02,-200.98
7,3/29/2023,QQQ,Invesco QQQ,Buy,1.615308,312.64,-505.01
11,4/6/2023,QQQ,Invesco QQQ,Buy,0.074784,314.24,-23.5
13,4/11/2023,QQQ,Invesco QQQ,Buy,0.057645,317.74,-18.32
18,4/24/2023,QQQ,Invesco QQQ,Buy,0.000695,316.33,-0.22
19,4/24/2023,QQQ,Invesco QQQ,Buy,1.595727,316.33,-504.78
21,5/1/2023,QQQ,Invesco QQQ,Buy,0.002424,321.71,-0.78
24,5/2/2023,QQQ,Invesco QQQ,Buy,0.034117,322.11,-10.99
27,5/23/2023,QQQ,Invesco QQQ,Buy,14.999791,336.29,-5044.28


In [29]:
traditional_ira_df.head(10)

Unnamed: 0,Process Date,Instrument,Description,Activity,Quantity,Price,Amount
0,2/6/2023,,Robinhood IRA Match,IRA Match,,,5.0
1,2/6/2023,QQQ,Invesco QQQ,Buy,0.999967,304.03,-304.02
2,2/6/2023,,IRA Contribution,Contribution,,,500.0
3,2/6/2023,QQQ,Invesco QQQ,Buy,0.661074,304.02,-200.98
4,3/6/2023,QQQ,Stock Lending,Stock Lending,,,0.01
5,3/29/2023,,IRA Contribution,Contribution,,,500.0
6,3/29/2023,,Robinhood IRA Match,IRA Match,,,5.0
7,3/29/2023,QQQ,Invesco QQQ,Buy,1.615308,312.64,-505.01
8,4/5/2023,,Asset Transfer,Asset Transfer,,,-49.39
9,4/5/2023,QQQ,Invesco QQQ,Asset Transfer,23.0,,


In [18]:
traditional_ira_df.tail(10)

Unnamed: 0,Process Date,Instrument,Description,Activity,Quantity,Price,Amount
42,1/3/2024,QQQ,Dividend Reinvestment,Buy,0.088318,399.35,-35.27
43,1/8/2024,QQQ,Stock Lending,Stock Lending,,,0.01
44,1/16/2024,QQQ,Cash Dividend,Cash Dividend,,,9.42
45,1/17/2024,QQQ,Dividend Reinvestment,Buy,0.023295,404.37,-9.42
46,2/7/2024,QQQ,Stock Lending,Stock Lending,,,0.01
47,3/7/2024,QQQ,Stock Lending,Stock Lending,,,0.01
48,4/5/2024,QQQ,Stock Lending,Stock Lending,,,0.01
49,4/30/2024,QQQ,Cash Dividend,Cash Dividend,,,25.08
50,5/1/2024,QQQ,Dividend Reinvestment,Buy,0.059369,422.44,-25.08
51,5/6/2024,QQQ,Stock Lending,Stock Lending,,,0.01


In [9]:
# df = brokerage_df.copy()
# df['Date'] = df['Process Date']
# df['Symbol'] = df['Instrument']

In [100]:
df = brokerage_df.copy()

# Ensure that the data is sorted by date
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')

# Initialize a dictionary to keep track of holdings and P/L
holdings = {}
cash = 0
pl = 0

# Process each row in the DataFrame
for index, row in df.iterrows():
    symbol = row['Symbol']
    action = row['Action'].lower()
    quantity = row['Quantity']
    price = row['Price']

    if action == 'buy':
        cost = quantity * price
        cash -= cost

        if symbol in holdings:
            holdings[symbol]['quantity'] += quantity
            holdings[symbol]['cost'] += cost
        else:
            holdings[symbol] = {'quantity': quantity, 'cost': cost}
    elif action == 'sell':
        if symbol in holdings and holdings[symbol]['quantity'] >= quantity:
            revenue = quantity * price
            cash += revenue
            holdings[symbol]['quantity'] -= quantity
            avg_cost_per_share = holdings[symbol]['cost'] / holdings[symbol]['quantity']
            profit = (price - avg_cost_per_share) * quantity
            pl += profit

            if holdings[symbol]['quantity'] == 0:
                del holdings[symbol]

# Calculate current holdings value
current_value = sum([holdings[symbol]['quantity'] * holdings[symbol]['cost'] / holdings[symbol]['quantity'] for symbol in holdings])

# Total account balance
account_balance = cash + current_value

print(f"Total P/L: ${pl:.2f}")
print(f"Current Account Balance: ${account_balance:.2f}")


KeyError: 'Date'