In [2]:
! pip3 install yfinance --upgrade --no-cache-dir
! pip3 install yahoo_fin
! pip3 install multiprocess

Requirement already up-to-date: yfinance in c:\users\katri\anaconda3\lib\site-packages (0.1.59)


In [3]:
import json
import pandas as pd
import numpy as np
from yahoo_fin import stock_info
import yahoo_fin
from multiprocessing import Pool
import multiprocess as mp
import datetime
from collections import Counter


In [4]:
# we have transactions data
all_transactions_parquet_file_name = '../collected_data/house_stock_watcher_data_all_transactions.parquet'
all_transactions = pd.read_parquet(all_transactions_parquet_file_name)

In [5]:
end_date = datetime.datetime.now().date()
start_date = end_date - datetime.timedelta(days=365 * 2)

# def get_historical_data(ticker, start_date=start_date, end_date=end_date):
# #     try:
#         return stock_info.get_data(
#             ticker, start_date=start_date, end_date=end_date, index_as_date = False, interval="1d")
# #     except:
# #         # really don't do this, it's hacky but whatever
# #         return ('ERROR', ticker)

# # now we have some way of getting stock info from multiple tickers

from get_historical_data import get_historical_data

def get_historical_data_batch(tickers, start_date, end_date):
    historical_data = []
    with Pool(32) as p:
        historical_data = p.map(get_historical_data, tickers)
    return historical_data


In [6]:
%%time
# stock_info.get_data('amzn')
get_historical_data_batch(['amzn'],start_date, end_date)

Wall time: 3.61 s


[          date         open         high          low        close  \
 0   2019-05-28  1832.750000  1849.270020  1827.349976  1836.430054   
 1   2019-05-29  1823.119995  1830.000000  1807.530029  1819.189941   
 2   2019-05-30  1825.489990  1829.469971  1807.829956  1816.319946   
 3   2019-05-31  1790.010010  1795.589966  1772.699951  1775.069946   
 4   2019-06-03  1760.010010  1766.290039  1672.000000  1692.689941   
 ..         ...          ...          ...          ...          ...   
 500 2021-05-20  3244.399902  3259.679932  3236.179932  3247.679932   
 501 2021-05-21  3250.000000  3256.689941  3197.010010  3203.080078   
 502 2021-05-24  3215.500000  3257.949951  3210.500000  3244.989990   
 503 2021-05-25  3266.669922  3279.820068  3213.760010  3259.050049   
 504 2021-05-26  3274.590088  3295.729980  3258.510010  3265.159912   
 
         adjclose   volume ticker  
 0    1836.430054  3200000   AMZN  
 1    1819.189941  4279000   AMZN  
 2    1816.319946  3146900   AMZN  
 3

In [7]:
%%time

# a couple of hard-coded company->ticker values
company_to_ticker = {
    'Broadcom Inc.': 'AVGO',
    'AD': 'ADM',
    'ADDDYY': 'ADDYY'
}
all_transactions['cleaned_ticker'] = all_transactions['ticker'].apply(lambda ticker: company_to_ticker.get(ticker, ticker))
# replace '.' w/ '-' to make things compatible
all_transactions['cleaned_ticker'] = all_transactions['cleaned_ticker']

# get the set of tickers
tickers = sorted(set(all_transactions['cleaned_ticker']))
# '--' is the null ticker, remove it
if '--' in tickers:
    tickers.remove('--')

historical_data = get_historical_data_batch(tickers, start_date, end_date)


Wall time: 1min 20s


In [8]:
Counter([isinstance(el, tuple) and 'ERROR' == el[0] for el in historical_data])

Counter({False: 1656, True: 106})

In [9]:
lemon_tickers = [el[1] for el in historical_data if isinstance(el, tuple)]

In [10]:
# AGN: company purchased by Abbvie (ABBV) in May 2020
# AMTD: company purchased by Charles Schwab (SCHW)
lemon_tickers

['AGN',
 'AMTD',
 'AOBC',
 'APC',
 'APCD',
 'APPL',
 'AZSEY',
 'BAC$K',
 'BAMXY',
 'BBCBX',
 'BGG',
 'BOA',
 'BPHSPX',
 'BRK.A',
 'BRK.B',
 'BXS$A',
 'CBS',
 'CCC',
 'CCXX',
 'CELG',
 'CELO',
 'CHL',
 'CHV',
 'CTL',
 'CTRCF',
 'CWEN.A',
 'DCMYY',
 'DEACU',
 'DESY',
 'DNKN',
 'DWDP',
 'EBJ',
 'EQUIX',
 'ETFC',
 'ETP',
 'EVGB',
 'FFHRX',
 'FII',
 'FMCMF',
 'FNRVGX',
 'FXM',
 'GEAGY',
 'GLIBA',
 'HCN',
 'HDS',
 'HS',
 'HTZ',
 'HZD',
 'IDXXX',
 'INTL',
 'JPM$G',
 'LAM',
 'LBDAV',
 'LCRX',
 'LEN.B',
 'LLL',
 'LPT',
 'LTD',
 'MINI',
 'MNTA',
 'MRCK',
 'MSBHY',
 'MYL',
 'MZOR',
 'NBL',
 'NGLS',
 'NLG',
 'ORLT',
 'PEGI',
 'POL',
 'QCHR',
 'RBS',
 'RDS.A',
 'RDS.B',
 'RF$A',
 'RTN',
 'SERV',
 'SKVKY',
 'SNOXX',
 'SPDR',
 'SPN',
 'SPYS',
 'SVCBY',
 'SZEVY',
 'TDDC',
 'TDDXX',
 'TF',
 'TFDXX',
 'TIF',
 'TMK',
 'TMUSR',
 'URGO',
 'UTX',
 'VIAB',
 'VMN',
 'VRTU',
 'VSLR',
 'WAIR',
 'WCG',
 'WFC$V',
 'WLTL',
 'WMGI',
 'WPX',
 'WYND',
 'XMO',
 'ZOOM']

In [36]:
lemon_tickers[0]

index = 9
print(lemon_tickers[index])


all_transactions[all_transactions['cleaned_ticker'] == lemon_tickers[index]]

BBCBX


Unnamed: 0,disclosure_year,disclosure_date,transaction_date,owner,ticker,asset_description,type,amount,representative,district,ptr_link,cap_gains_over_200_usd,cleaned_ticker
7974,2021,04/01/2021,2021-03-25,,BBCBX,Bridge Builder Core Plus Bond @ 10.29,purchase,"$1,001 - $15,000",Hon. Harold Dallas Rogers,KY05,https://disclosures-clerk.house.gov/public_dis...,False,BBCBX


In [37]:
stock_info.get_data('bbcbx')

AssertionError: {'chart': {'result': None, 'error': {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}}}