### This Workbook demonstrates how to download the last 20 years worth of share price data for all Wilshire5000 Index constituents.



#### Import Libraries

In [12]:
# Provides ways to work with large multidimensional arrays
import numpy as np
# Allows for further data manipulation and analysis
import pandas as pd
# Download time series data for S&P500 or Wilshire5000 via alpha vantage API
from alpha_vantage.timeseries import TimeSeries

import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates #styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance
import time

#getting data from directory
import os
from os import listdir
from os.path import isfile, join

# API key from alpha vantage
key = open(r"C:/Users/User/Documents/PM Tools/US Share Market Data/Alpha_vantage_api_key.txt").read()

#### List of Shares Not Downloaded

In [3]:
shares_not_downloaded = [] # create list of shares that failed to download
missing_shares = [] # place shares that didn't download into a new list for later use

#### Save Data to CSV

In [4]:
# download the last 20 years of daily adjusted prices for Wilshire5000 constituents
def save_shares_data_to_csv(folder, ticker):
    try:
        print("Get data for:", ticker)
        api_call = TimeSeries(key, output_format='pandas') #api call (alpha vantage)
        df = api_call.get_daily_adjusted(ticker, outputsize='full') # get daily adjusted prices and save to data frame
        time.sleep(1) #pause for 1 second between calls to cut down on missing data 
        df[0].to_csv(folder + ticker + ".csv") # save ticker data from df to csv
        # index at 0 returns the dataframe. Otherwise, it would output a tuple and an error.
    except Exception as ex:
        shares_not_downloaded.append(ticker) #append list of shares not downloaded
        print("Couldn't get data for:", ticker)  

#### Return Dataframe from CSV

In [5]:
def get_shares_df_from_csv(folder, ticker):
    try:  
        df = pd.read_csv(folder + ticker + ".csv") 
    except FileNotFoundError:
        print("File doesn't exist.")
    else:
        return df

#### Return a Named Column Data from CSV

In [6]:
def get_column_from_csv(file, column_name):
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File doesn't exist.")
    else:
        return df[column_name]

#### Test receiving Tickers List

In [8]:
# loading wilshire 5000 tickers from csv
# search online for updated tickers list for Wilshire5000 or S&P500 indicies 
tickers = get_column_from_csv(r"C:/Users/User/Documents/PM Tools/US Share Market Data/wilshire5000_tickers.csv", "Ticker")
tickers

0          A
1         AA
2        AAC
3        AAL
4       AAME
        ... 
3213    PFLT
3214     PRT
3215    PSEC
3216     SBR
3217    SUNS
Name: Ticker, Length: 3218, dtype: object

#### TEST: Get 20 years of data for the first 10 shares from tickers list

In [10]:
# define folder path where all the csv files will be saved to.
folder = (r"C:/Users/User/Documents/PM Tools/US Share Market Data/Wilshire5000/")
for x in range(10):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")

Get data for: A
Get data for: AA
Get data for: AAC
Get data for: AAL
Get data for: AAME
Get data for: AAN
Get data for: AAOI
Get data for: AAON
Get data for: AAP
Get data for: AAPL
Finished


#### Download all Wilshire5000 Shares in Blocks

Code bellow assumes free API access with 500 max. calls per day. Whole data set for Wilshire5000 or S&P500 can be downloaded with a single function call and premium API key.
When using a premium API key, change final range in "for loop" from 500 to the object lenght of tickers list (in my case that is 3218).  


In [9]:
for x in range(10, 500):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: AAT
Get data for: AAWW
Get data for: ABBV
Get data for: ABC
Get data for: ABCB
Get data for: ABEO
Get data for: ABG
Get data for: ABIO
Get data for: ABM
Get data for: ABMD
Get data for: ABR
Get data for: ABT
Get data for: ABTX
Get data for: AC
Get data for: ACA
Get data for: ACAD
Get data for: ACBI
Get data for: ACC
Get data for: ACCO
Get data for: ACER
Get data for: ACGL
Get data for: ACHC
Get data for: ACHN
Get data for: ACHV
Get data for: ACIA
Get data for: ACIW
Get data for: ACLS
Get data for: ACM
Get data for: ACMR
Get data for: ACN
Get data for: ACNB
Get data for: ACOR
Get data for: ACRE
Get data for: ACRS
Get data for: ACRX
Get data for: ACTG
Get data for: ACU
Get data for: ADBE
Get data for: ADC
Get data for: ADES
Get data for: ADI
Get data for: ADM
Get data for: ADMA
Get data for: ADMP
Get data for: ADMS
Get data for: ADNT
Get data for: ADP
Get data for: ADSK
Get data for: ADT
Get data for: ADTN
Get data for: ADUS
Get data for: ADVM
Get data for: ADXS
Get data fo

['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI']

In [11]:
for x in range(500, 1000):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: CASH
Get data for: CASI
Get data for: CASS
Get data for: CASY
Get data for: CAT
Get data for: CATC
Get data for: CATM
Get data for: CATO
Get data for: CATY
Get data for: CB
Get data for: CBAN
Get data for: CBAY
Get data for: CBB
Couldn't get data for: CBB
Get data for: CBFV
Get data for: CBIO
Get data for: CBL
Get data for: CBMG
Get data for: CBOE
Get data for: CBRE
Get data for: CBRL
Get data for: CBSH
Get data for: CBT
Get data for: CBTX
Couldn't get data for: CBTX
Get data for: CBU
Get data for: CBZ
Get data for: CC
Get data for: CCBG
Get data for: CCF
Get data for: CCI
Get data for: CCK
Get data for: CCL
Get data for: CCMP
Get data for: CCNE
Get data for: CCO
Get data for: CCOI
Get data for: CCRN
Get data for: CCS
Get data for: CCXI
Get data for: CDAY
Get data for: CDE
Get data for: CDEV
Couldn't get data for: CDEV
Get data for: CDK
Get data for: CDLX
Get data for: CDMO
Get data for: CDNA
Get data for: CDNS
Get data for: CDR
Couldn't get data for: CDR
Get data for: CD

['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI',
 'CBB',
 'CBTX',
 'CDEV',
 'CDR',
 'CECE',
 'CLBS',
 'CLR',
 'CLSN',
 'CLVS',
 'CMD',
 'CMO',
 'CNR',
 'COG',
 'CRDB',
 'CTT',
 'CXO',
 'CYTR',
 'DRE',
 'ELY']

In [12]:
for x in range(1000, 1500):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: ESTE
Get data for: ESXB
Get data for: ETN
Get data for: ETR
Get data for: ETRN
Get data for: ETSY
Get data for: EV
Get data for: EVBG
Get data for: EVBN
Get data for: EVC
Get data for: EVER
Get data for: EVFM
Get data for: EVH
Get data for: EVI
Get data for: EVOK
Get data for: EVOL
Get data for: EVOP
Get data for: EVR
Get data for: EVRG
Get data for: EVRI
Get data for: EVTC
Get data for: EW
Get data for: EWBC
Get data for: EXAS
Get data for: EXC
Get data for: EXEL
Get data for: EXLS
Get data for: EXP
Get data for: EXPD
Get data for: EXPE
Get data for: EXPI
Get data for: EXPO
Get data for: EXPR
Get data for: EXR
Get data for: EXTN
Get data for: EXTR
Get data for: EYE
Get data for: EYEG
Couldn't get data for: EYEG
Get data for: EYES
Couldn't get data for: EYES
Get data for: EYPT
Get data for: EZPW
Get data for: F
Get data for: FAF
Get data for: FANG
Get data for: FARM
Get data for: FARO
Get data for: FAST
Get data for: FATE
Get data for: FB
Couldn't get data for: FB
Get dat

['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI',
 'CBB',
 'CBTX',
 'CDEV',
 'CDR',
 'CECE',
 'CLBS',
 'CLR',
 'CLSN',
 'CLVS',
 'CMD',
 'CMO',
 'CNR',
 'COG',
 'CRDB',
 'CTT',
 'CXO',
 'CYTR',
 'DRE',
 'ELY',
 'EYEG',
 'EYES',
 'FB',
 'FBHS',
 'FFG',
 'FNHC',
 'FRAN',
 'GBL',
 'GEFB',
 'GFN',
 'GRBK',
 'GTT',
 'GWGH',
 'HEIA',
 'HIL',
 'HPR',
 'HTA',
 'HTBX',
 'IEC',
 'IIVI',
 'INS']

In [13]:
for x in range(1500, 2000):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: INUV
Get data for: INVA
Get data for: INVE
Get data for: INVH
Get data for: IO
Couldn't get data for: IO
Get data for: IONS
Get data for: IOR
Get data for: IOSP
Get data for: IOVA
Get data for: IP
Get data for: IPAR
Get data for: IPG
Get data for: IPGP
Get data for: IPHI
Get data for: IPI
Get data for: IPWR
Get data for: IQV
Get data for: IR
Get data for: IRBT
Get data for: IRDM
Get data for: IRET
Get data for: IRIX
Get data for: IRM
Get data for: IRMD
Get data for: IROQ
Get data for: IRT
Get data for: IRTC
Get data for: IRWD
Get data for: ISBC
Get data for: ISDR
Get data for: ISEE
Get data for: ISIG
Get data for: ISNS
Get data for: ISR
Get data for: ISRG
Get data for: ISSC
Get data for: ISTR
Get data for: IT
Get data for: ITCI
Get data for: ITGR
Get data for: ITI
Get data for: ITIC
Get data for: ITRI
Get data for: ITT
Get data for: ITW
Get data for: IVAC
Get data for: IVC
Get data for: IVR
Get data for: IVZ
Get data for: IZEA
Get data for: JACK
Get data for: JAGX
Get dat

['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI',
 'CBB',
 'CBTX',
 'CDEV',
 'CDR',
 'CECE',
 'CLBS',
 'CLR',
 'CLSN',
 'CLVS',
 'CMD',
 'CMO',
 'CNR',
 'COG',
 'CRDB',
 'CTT',
 'CXO',
 'CYTR',
 'DRE',
 'ELY',
 'EYEG',
 'EYES',
 'FB',
 'FBHS',
 'FFG',
 'FNHC',
 'FRAN',
 'GBL',
 'GEFB',
 'GFN',
 'GRBK',
 'GTT',
 'GWGH',
 'HEIA',
 'HIL',
 'HPR',
 'HTA',
 'HTBX',
 'IEC',
 'IIVI',
 'INS',
 'IO',
 'JCS',
 'KIN',
 'LB',
 'LENB',
 'LGFB',
 'LJPC',
 'LLNW',
 'LNDC',
 'MDLY',
 'MDP',
 'MLHR',
 'NAV',
 'NBEV',
 'NETE']

In [14]:
for x in range(2000, 2500):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: NHI
Get data for: NHTC
Get data for: NI
Get data for: NICK
Get data for: NINE
Get data for: NJR
Get data for: NKE
Get data for: NKSH
Get data for: NKTR
Get data for: NL
Get data for: NLS
Get data for: NLSN
Get data for: NLTX
Get data for: NLY
Get data for: NMIH
Get data for: NMRK
Get data for: NNBR
Get data for: NNI
Get data for: NNN
Get data for: NNVC
Get data for: NOC
Get data for: NODK
Get data for: NOG
Get data for: NOV
Get data for: NOVT
Get data for: NOW
Get data for: NP
Get data for: NPK
Get data for: NPO
Get data for: NPTN
Get data for: NR
Get data for: NRC
Get data for: NRG
Get data for: NRIM
Get data for: NRZ
Couldn't get data for: NRZ
Get data for: NSA
Get data for: NSC
Get data for: NSEC
Get data for: NSIT
Get data for: NSP
Get data for: NSSC
Get data for: NSTG
Get data for: NTAP
Get data for: NTCT
Get data for: NTGR
Get data for: NTIC
Get data for: NTIP
Get data for: NTLA
Get data for: NTNX
Get data for: NTRA
Get data for: NTRS
Get data for: NTUS
Get data for

['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI',
 'CBB',
 'CBTX',
 'CDEV',
 'CDR',
 'CECE',
 'CLBS',
 'CLR',
 'CLSN',
 'CLVS',
 'CMD',
 'CMO',
 'CNR',
 'COG',
 'CRDB',
 'CTT',
 'CXO',
 'CYTR',
 'DRE',
 'ELY',
 'EYEG',
 'EYES',
 'FB',
 'FBHS',
 'FFG',
 'FNHC',
 'FRAN',
 'GBL',
 'GEFB',
 'GFN',
 'GRBK',
 'GTT',
 'GWGH',
 'HEIA',
 'HIL',
 'HPR',
 'HTA',
 'HTBX',
 'IEC',
 'IIVI',
 'INS',
 'IO',
 'JCS',
 'KIN',
 'LB',
 'LENB',
 'LGFB',
 'LJPC',
 'LLNW',
 'LNDC',
 'MDLY',
 'MDP',
 'MLHR',
 'NAV',
 'NBEV',
 'NETE',
 'NRZ',
 'NXTD',
 'ODT',
 'OMED',
 'PEI',
 'REV',
 'RVI']

In [15]:
for x in range(2500, 3000):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: SAH
Get data for: SAIA
Get data for: SAIC
Get data for: SAIL
Get data for: SAL
Get data for: SALM
Get data for: SAM
Get data for: SAMG
Get data for: SANM
Get data for: SANW
Get data for: SASR
Get data for: SATS
Get data for: SAVA
Get data for: SAVE
Get data for: SBAC
Get data for: SBCF
Get data for: SBFG
Get data for: SBGI
Get data for: SBH
Get data for: SBNY
Get data for: SBOW
Get data for: SBRA
Get data for: SBSI
Get data for: SBT
Get data for: SBUX
Get data for: SC
Get data for: SCHL
Get data for: SCHN
Get data for: SCHW
Get data for: SCI
Get data for: SCL
Get data for: SCON
Couldn't get data for: SCON
Get data for: SCOR
Get data for: SCPL
Get data for: SCS
Get data for: SCSC
Get data for: SCVL
Get data for: SCWX
Get data for: SCX
Get data for: SCYX
Get data for: SD
Get data for: SDPI
Get data for: SEAC
Get data for: SEAS
Get data for: SEB
Get data for: SEDG
Get data for: SEE
Get data for: SEEL
Get data for: SEIC
Get data for: SELB
Get data for: SELF
Get data for: SEM


['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI',
 'CBB',
 'CBTX',
 'CDEV',
 'CDR',
 'CECE',
 'CLBS',
 'CLR',
 'CLSN',
 'CLVS',
 'CMD',
 'CMO',
 'CNR',
 'COG',
 'CRDB',
 'CTT',
 'CXO',
 'CYTR',
 'DRE',
 'ELY',
 'EYEG',
 'EYES',
 'FB',
 'FBHS',
 'FFG',
 'FNHC',
 'FRAN',
 'GBL',
 'GEFB',
 'GFN',
 'GRBK',
 'GTT',
 'GWGH',
 'HEIA',
 'HIL',
 'HPR',
 'HTA',
 'HTBX',
 'IEC',
 'IIVI',
 'INS',
 'IO',
 'JCS',
 'KIN',
 'LB',
 'LENB',
 'LGFB',
 'LJPC',
 'LLNW',
 'LNDC',
 'MDLY',
 'MDP',
 'MLHR',
 'NAV',
 'NBEV',
 'NETE',
 'NRZ',
 'NXTD',
 'ODT',
 'OMED',
 'PEI',
 'REV',
 'RVI',
 'SCON',
 'SQBG',
 'SWM',
 'SYN',
 'SYNL',
 'TBK',
 'TCF',
 'TCO',
 'TLGT',
 'TSC',
 'VAR',
 'VCTR',
 'VEC']

In [16]:
for x in range(3000, 3218):
    save_shares_data_to_csv(folder, tickers[x])
print("Finished")
shares_not_downloaded

Get data for: VHC
Get data for: VIA
Get data for: VIAV
Get data for: VICI
Get data for: VICR
Get data for: VIRC
Get data for: VIRT
Get data for: VISL
Get data for: VIVO
Get data for: VKTX
Get data for: VLGEA
Get data for: VLO
Get data for: VLY
Get data for: VMC
Get data for: VMI
Get data for: VMW
Get data for: VNCE
Get data for: VNDA
Get data for: VNE
Get data for: VNO
Get data for: VNRX
Get data for: VNTR
Get data for: VOXX
Get data for: VOYA
Get data for: VPG
Get data for: VRA
Get data for: VRAY
Get data for: VREX
Get data for: VRNS
Get data for: VRNT
Get data for: VRRM
Get data for: VRS
Couldn't get data for: VRS
Get data for: VRSK
Get data for: VRSN
Get data for: VRTS
Get data for: VRTU
Get data for: VRTV
Get data for: VRTX
Get data for: VSAT
Get data for: VSEC
Get data for: VSH
Get data for: VST
Get data for: VSTM
Get data for: VSTO
Get data for: VTNR
Get data for: VTR
Get data for: VTVT
Get data for: VUZI
Get data for: VVI
Get data for: VVV
Get data for: VXRT
Get data for: VYGR
G

['AFI',
 'AGMA',
 'AHC',
 'ALNA',
 'ANIX',
 'ANTM',
 'ARA',
 'BFB',
 'BLL',
 'BOMN',
 'BRG',
 'CAI',
 'CBB',
 'CBTX',
 'CDEV',
 'CDR',
 'CECE',
 'CLBS',
 'CLR',
 'CLSN',
 'CLVS',
 'CMD',
 'CMO',
 'CNR',
 'COG',
 'CRDB',
 'CTT',
 'CXO',
 'CYTR',
 'DRE',
 'ELY',
 'EYEG',
 'EYES',
 'FB',
 'FBHS',
 'FFG',
 'FNHC',
 'FRAN',
 'GBL',
 'GEFB',
 'GFN',
 'GRBK',
 'GTT',
 'GWGH',
 'HEIA',
 'HIL',
 'HPR',
 'HTA',
 'HTBX',
 'IEC',
 'IIVI',
 'INS',
 'IO',
 'JCS',
 'KIN',
 'LB',
 'LENB',
 'LGFB',
 'LJPC',
 'LLNW',
 'LNDC',
 'MDLY',
 'MDP',
 'MLHR',
 'NAV',
 'NBEV',
 'NETE',
 'NRZ',
 'NXTD',
 'ODT',
 'OMED',
 'PEI',
 'REV',
 'RVI',
 'SCON',
 'SQBG',
 'SWM',
 'SYN',
 'SYNL',
 'TBK',
 'TCF',
 'TCO',
 'TLGT',
 'TSC',
 'VAR',
 'VCTR',
 'VEC',
 'VRS',
 'WBT',
 'WPG',
 'WRE',
 'WSTG',
 'XSPA',
 'ZIOP',
 'ZS']

In [20]:
# copy shares that didn't download into a master block "missing_shares" 
missing_shares = shares_not_downloaded.copy()

In [None]:
# run download for shares that didn't download the first time around. 
for x in missing_shares:
    save_shares_data_to_csv(folder, x)
print("Finished")

#### Function to Download individual share data

In [8]:
def save_individual_share_to_csv(ticker):
    api_call = TimeSeries(key, output_format='pandas') 
    df = api_call.get_daily_adjusted(ticker, outputsize='full') 
    df[0].to_csv(r"C:/Users/User/Documents/PM Tools/US Share Market Data/Wilshire5000/" + ticker + ".csv")
    
    return df

In [9]:
META = save_individual_share_to_csv('META')

In [10]:
META

(            1. open  2. high    3. low  4. close  5. adjusted close  \
 date                                                                  
 2022-12-28   116.25  118.150  115.5100  115.6200           115.6200   
 2022-12-27   117.93  118.600  116.0501  116.8800           116.8800   
 2022-12-23   116.03  118.175  115.5350  118.0400           118.0400   
 2022-12-22   117.20  118.620  114.3800  117.1200           117.1200   
 2022-12-21   116.70  120.340  115.6200  119.7600           119.7600   
 ...             ...      ...       ...       ...                ...   
 2012-05-24    32.95   33.210   31.7700   33.0300            33.0300   
 2012-05-23    31.37   32.500   31.3600   32.0000            32.0000   
 2012-05-22    32.61   33.590   30.9400   31.0000            31.0000   
 2012-05-21    36.53   36.660   33.0000   34.0300            34.0300   
 2012-05-18    42.05   45.000   38.0000   38.2318            38.2318   
 
               6. volume  7. dividend amount  8. split coeffic