# The bagholder project: Measuring stock purchase price of apes

## 1. Get price data of GME over the last 5 years

I used data from Yahoo Finance. Will download a CSV to computer for viewing price data, disable if desired

In [1]:
import pandas as pd
import requests
import time
from datetime import datetime
from bisect import *
import hashlib
from functools import lru_cache
from yahoo_fin.stock_info import get_data, get_live_price

In [2]:
START = '12/04/2019'
current_time = datetime.now()
today = f"{current_time.month}/{current_time.day}/{current_time.year}"
stock_prices = get_data('GME', start_date=START, end_date=today, index_as_date=True, interval='1d')
stock_prices.to_csv('data/GME.csv')
stock_prices

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2019-12-04,1.542500,1.622500,1.530000,1.607500,1.607500,11489600,GME
2019-12-05,1.580000,1.630000,1.560000,1.600000,1.600000,17525600,GME
2019-12-06,1.597500,1.670000,1.597500,1.670000,1.670000,19029200,GME
2019-12-09,1.662500,1.730000,1.547500,1.582500,1.582500,25002400,GME
2019-12-10,1.572500,1.635000,1.530000,1.627500,1.627500,38379600,GME
...,...,...,...,...,...,...,...
2022-07-21,39.930000,40.000000,37.290001,38.367500,38.367500,9277600,GME
2022-07-22,36.880001,38.700001,34.930000,35.779999,35.779999,10039800,GME
2022-07-25,35.000000,35.509998,33.250000,33.980000,33.980000,5686300,GME
2022-07-26,32.869999,33.310001,32.099998,32.430000,32.430000,4866100,GME


## 2. Scrap ape's purchase data from their own API =) 

I have no desire or financial resources to make my own scraper, so I used one that the ape made :)

In [3]:
# Scrap API and get a df of name -> (total_cost, share_count)
API_URL = 'https://5o7q0683ig.execute-api.us-west-2.amazonaws.com/prod/computershared/posts'

# This api returns max 500 records, and you gotta get the u and id of the 500th user & id to get the next batch of 500 records
def make_url(u=None, pid=None):
    if u == None:
        return API_URL
    return f"{API_URL}?resumeUser={u}&resumeId={pid}"

def get_day_str(epoch_time):
    return time.strftime('%Y-%m-%d', time.localtime(epoch_time))

# Will find the day nearest time posted and get stock prices from that day
# Ex: If post posted on Sunday, will get closing price on Friday.
def get_stock_price(epoch_time):
    idx = bisect_right(stock_prices.index, datetime.fromtimestamp(epoch_time))
    if idx == 0:
        return stock_prices.iloc[0]['close']
    return float(stock_prices.iloc[idx-1]['close'])
    
# Return a df of name -> (total_cost, share_count)
# A note: The ape API is either unreliable or does not have data before a certain point. Tread with caution
# The parameter you expected might not be there...
def scrap_api_data(limit=1000):
    u,pid = None,None
    earliest_recorded_date = '2022-07-28'
    purchased_data = pd.DataFrame(columns = ['Total Cost', 'Share Count'])
    while limit > 0:
        response = requests.get(make_url(u, pid))
        json_data = response.json()
        if 'Items' not in json_data: # No items to process
            continue
        count = int(json_data['Count']) if 'Count' in json_data else 0 # This parameter might not be in json
        limit -= count
            
        for item in json_data['Items']:
            # Get data for each image. If parameter isn't there then skip
            try:
                posted_time = int(float(item['timestamp']['N'])) # For some reason the API spits out float for timestamp bruh
                day = get_day_str(posted_time)
                purchased_price = get_stock_price(posted_time)
                user = item['u']['S']
                share_count = float(item['shares']['N'])
            except:
                print("FUD")
                continue
            
            earliest_recorded_date = min(earliest_recorded_date, day)
            
            # put to dataframe
            if user not in purchased_data.index:
                purchased_data.loc[user] = [0, 0]
            
            purchased_data.loc[user,'Total Cost'] += share_count * purchased_price
            purchased_data.loc[user,'Share Count'] += share_count
            
            # print(user, day, purchased_price, share_count)
            
        # Go to next 500
        # If this isn't in json, we have nowhere left to go/no more data to scrap. FUDDD and exit
        if 'LastEvaluatedKey' not in json_data:
            print("FUDDDDD")
            return purchased_data, earliest_recorded_date
        u = json_data['LastEvaluatedKey']['u']['S']
        pid = json_data['LastEvaluatedKey']['id']['S']
        print(f"Next 500, {limit} left")
    return purchased_data, earliest_recorded_date

In [None]:
purchased_data, earliest_recorded_date = scrap_api_data(limit=50000)
print(f"Data recorded since {earliest_recorded_date}")
purchased_data

Next 500, 49500 left
Next 500, 49000 left
Next 500, 48500 left
Next 500, 48000 left
Next 500, 47500 left
Next 500, 47000 left
Next 500, 46500 left
Next 500, 46000 left
Next 500, 45500 left
Next 500, 45000 left
Next 500, 44500 left
Next 500, 44000 left
Next 500, 43500 left
Next 500, 43000 left
Next 500, 42500 left
Next 500, 42000 left
Next 500, 41500 left
Next 500, 41000 left
Next 500, 40500 left
Next 500, 40000 left
Next 500, 39500 left
Next 500, 39000 left
Next 500, 38500 left
Next 500, 38000 left
Next 500, 37500 left
Next 500, 37000 left
Next 500, 36500 left
Next 500, 36000 left
Next 500, 35500 left
Next 500, 35000 left
Next 500, 34500 left
Next 500, 34000 left
Next 500, 33500 left
Next 500, 33000 left
Next 500, 32500 left
Next 500, 32000 left
Next 500, 31500 left
Next 500, 31000 left
Next 500, 30500 left
Next 500, 30000 left
Next 500, 29500 left
Next 500, 29000 left
Next 500, 28500 left
Next 500, 28000 left
Next 500, 27500 left
Next 500, 27000 left
Next 500, 26500 left
Next 500, 260

# Who's the biggest bagholder?

## Get user with the largest average price

In [None]:
purchased_data['Average Price'] = purchased_data['Total Cost'] / purchased_data['Share Count']
purchased_data.sort_values(by='Average Price', ascending=False)

# Who's the biggest whale?

## Get user with the largest share count and position

In [None]:
purchased_data.sort_values(by='Total Cost', ascending=False)

In [None]:
purchased_data.sort_values(by='Share Count', ascending=False)

# Price is right: Comparing GME hodler's number to current share price

In [None]:
df = purchased_data
current_price = get_live_price('GME')
total_share = df['Share Count'].sum()
total_cost = df['Total Cost'].sum()
average_price = total_cost / total_share
print(f"Total share purchased: {round(total_share, 2):,} shares")
print(f"Total cost: ${round(total_cost, 2):,}")
print(f"Average price of ape: ${round(average_price, 2):,}")
print(f"Are they up? {round((current_price-average_price)/current_price * 100, 2):,}%")
print(f"Average cost: ${round(df['Total Cost'].mean(), 2):,}")
print(f"Average holding: {round(df['Share Count'].mean(), 2):,} shares")
print(f"Median cost: ${round(df['Total Cost'].median(), 2):,}")
print(f"Median holding: {round(df['Share Count'].median(), 2):,} shares")
print(f"Median Average Price: ${round(df['Average Price'].median(), 2):,}")
print(f"Percentage of apes who are down: {round(df.loc[df['Average Price'] > current_price, 'Average Price'].count() / df['Average Price'].count() * 100, 2):,}%")