In [1]:
! pip3 install yfinance --upgrade --no-cache-dir
! pip3 install yahoo_fin

Requirement already up-to-date: yfinance in /home/michael/.local/lib/python3.8/site-packages (0.1.59)


In [1]:
import json
import pandas as pd
import numpy as np
from yahoo_fin import stock_info
from multiprocessing import Pool
import datetime
from collections import Counter

In [2]:
# we have transactions data
all_transactions_parquet_file_name = '../collected_data/house_stock_watcher_data_all_transactions.parquet'
all_transactions = pd.read_parquet(all_transactions_parquet_file_name)

In [21]:
end_date = datetime.datetime.now().date()
start_date = end_date - datetime.timedelta(days=365 * 2)

def get_historical_data(ticker, start_date=start_date, end_date=end_date):
    try:
        return stock_info.get_data(
            ticker, start_date=start_date, end_date=end_date, index_as_date = False, interval="1d")
    except:
        # really don't do this, it's hacky but whatever
        return ('ERROR', ticker)

# now we have some way of getting stock info from multiple tickers
def get_historical_data_batch(tickers, start_date, end_date):
    historical_data = []
    with Pool(32) as p:
        historical_data = p.map(get_historical_data, tickers)
    return historical_data


In [22]:
%%time

# a couple of hard-coded company->ticker values
company_to_ticker = {
    'Broadcom Inc.': 'AVGO',
    'AD': 'ADM',
    'ADDDYY': 'ADDYY'
}
all_transactions['cleaned_ticker'] = all_transactions['ticker'].apply(lambda ticker: company_to_ticker.get(ticker, ticker))
# replace '.' w/ '-' to make things compatible
all_transactions['cleaned_ticker'] = all_transactions['cleaned_ticker']

# get the set of tickers
tickers = sorted(set(all_transactions['cleaned_ticker']))
# '--' is the null ticker, remove it
if '--' in tickers:
    tickers.remove('--')

historical_data = get_historical_data_batch(tickers, start_date, end_date)


CPU times: user 1.38 s, sys: 238 ms, total: 1.62 s
Wall time: 1min 2s


In [33]:
Counter([isinstance(el, tuple) and 'ERROR' == el[0] for el in historical_data])

Counter({False: 1660, True: 104})

In [34]:
lemon_tickers = [el[1] for el in historical_data if isinstance(el, tuple)]

In [35]:
lemon_tickers

['AD',
 'ADDDYY',
 'AGN',
 'AMTD',
 'AOBC',
 'APC',
 'APCD',
 'APPL',
 'AZSEY',
 'BAC$K',
 'BAMXY',
 'BBCBX',
 'BGG',
 'BOA',
 'BPHSPX',
 'BRK.A',
 'BRK.B',
 'BXS$A',
 'CBS',
 'CCC',
 'CCXX',
 'CELG',
 'CELO',
 'CHL',
 'CHV',
 'CTL',
 'CTRCF',
 'CWEN.A',
 'DCMYY',
 'DEACU',
 'DESY',
 'DNKN',
 'DWDP',
 'EBJ',
 'EQUIX',
 'ETFC',
 'ETP',
 'EVGB',
 'FFHRX',
 'FII',
 'FMCMF',
 'FNRVGX',
 'FXM',
 'GLIBA',
 'HCN',
 'HDS',
 'HS',
 'HTZ',
 'HZD',
 'IDXXX',
 'INTL',
 'JPM$G',
 'LAM',
 'LBDAV',
 'LCRX',
 'LEN.B',
 'LLL',
 'LPT',
 'LTD',
 'MINI',
 'MNTA',
 'MRCK',
 'MSBHY',
 'MYL',
 'MZOR',
 'NBL',
 'NGLS',
 'NLG',
 'ORLT',
 'PEGI',
 'POL',
 'QCHR',
 'RBS',
 'RDS.A',
 'RDS.B',
 'RF$A',
 'RTN',
 'SERV',
 'SKVKY',
 'SPDR',
 'SPN',
 'SPYS',
 'SVCBY',
 'SZEVY',
 'TDDC',
 'TF',
 'TIF',
 'TMK',
 'TMUSR',
 'URGO',
 'UTX',
 'VIAB',
 'VMN',
 'VRTU',
 'VSLR',
 'WAIR',
 'WCG',
 'WFC$V',
 'WLTL',
 'WMGI',
 'WPX',
 'WYND',
 'XMO',
 'ZOOM']

In [42]:
lemon_tickers[0]

index = 2
print(lemon_tickers[index])


all_transactions[all_transactions['cleaned_ticker'] == lemon_tickers[index]]

AGN


Unnamed: 0,disclosure_year,disclosure_date,transaction_date,owner,ticker,asset_description,type,amount,representative,district,ptr_link,cap_gains_over_200_usd,cleaned_ticker
1956,2020,04/16/2020,2020-02-28,self,AGN,Allergan plc Ordinary Shares,sale_full,"$1,001 - $15,000",Hon. Earl Blumenauer,OR03,https://disclosures-clerk.house.gov/public_dis...,False,AGN
5921,2020,04/27/2020,2019-02-15,--,AGN,Allergan plc Ordinary Shares,purchase,"$1,001 - $15,000",Hon. Donna Shalala,FL27,https://disclosures-clerk.house.gov/public_dis...,False,AGN
5922,2020,04/27/2020,2019-06-24,--,AGN,Allergan plc Ordinary Shares,sale_full,"$1,001 - $15,000",Hon. Donna Shalala,FL27,https://disclosures-clerk.house.gov/public_dis...,False,AGN
8235,2020,12/09/2020,2020-05-11,joint,AGN,Allergan plc Ordinary Shares,exchange,"$1,001 - $15,000",Hon. Joseph D. Morelle,NY25,https://disclosures-clerk.house.gov/public_dis...,False,AGN
8685,2020,06/16/2020,2020-05-18,,AGN,Allergan plc Ordinary Shares,sale_full,"$1,001 - $15,000",Hon. Gilbert Cisneros,CA39,https://disclosures-clerk.house.gov/public_dis...,False,AGN
8740,2020,04/13/2020,2020-03-30,--,AGN,Allergan plc Ordinary Shares,sale_full,"$1,001 - $15,000",Hon. Robert J. Wittman,VA01,https://disclosures-clerk.house.gov/public_dis...,False,AGN
9069,2020,01/31/2020,2019-12-18,,AGN,Allergan Inc,sale_full,"$1,001 - $15,000",Hon. John B. Larson,CT01,https://disclosures-clerk.house.gov/public_dis...,False,AGN
9815,2020,05/13/2020,2020-04-29,,AGN,Allergan plc Ordinary Shares,purchase,"$1,001 - $15,000",Hon. Gilbert Cisneros,CA39,https://disclosures-clerk.house.gov/public_dis...,False,AGN


In [40]:
stock_info.get_data('ADDYY')

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2006-05-31,24.962500,24.975000,24.962500,24.962500,19.252100,28000,ADDYY
2006-06-01,24.812500,24.812500,24.812500,24.812500,19.136414,1600,ADDYY
2006-06-02,25.287500,25.287500,25.250000,25.287500,19.502756,1600,ADDYY
2006-06-05,25.250000,25.250000,25.250000,25.250000,19.473835,2000,ADDYY
2006-06-06,24.437500,24.650000,24.075001,24.437500,18.847198,10400,ADDYY
...,...,...,...,...,...,...,...
2021-05-17,180.360001,181.889999,179.360001,181.539993,181.539993,441600,ADDYY
2021-05-18,180.800003,181.100006,178.389999,179.100006,179.100006,138800,ADDYY
2021-05-19,178.000000,180.779999,177.339996,179.710007,179.710007,44900,ADDYY
2021-05-20,181.619995,183.149994,181.330002,182.630005,182.630005,53800,ADDYY


In [16]:
Counter(['.' in ticker for ticker in set(all_transcations['ticker'])])

Counter({False: 1754, True: 12})

In [17]:
[ticker for ticker in set(all_transcations['ticker']) if '.' in ticker]

['CWEN.A',
 'RY.TO',
 'WXA.F',
 'BRK.B',
 'LEN.B',
 'VFC.VI',
 'Broadcom Inc.',
 'RDS.A',
 '35G.SG',
 'BRK.A',
 'WB2.SG',
 'RDS.B']

In [14]:
??stock_info.tickers_sp500

In [20]:
stock_info.get_data('Broadcom Inc')

AssertionError: {'chart': {'result': None, 'error': {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}}}