In [1]:
!pip install alpha_vantage

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting alpha_vantage
  Downloading alpha_vantage-2.3.1-py3-none-any.whl (31 kB)
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-win_amd64.whl (551 kB)
     -------------------------------------- 551.8/551.8 KB 8.7 MB/s eta 0:00:00
Collecting charset-normalizer<3.0,>=2.0
  Downloading charset_normalizer-2.0.12-py3-none-any.whl (39 kB)
Collecting async-timeout<5.0,>=4.0.0a3
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting yarl<2.0,>=1.0
  Downloading yarl-1.7.2-cp37-cp37m-win_amd64.whl (121 kB)
     ---------------------------------------- 121.3/121.3 KB ? eta 0:00:00
Collecting aiosignal>=1.1.2
  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Collecting asynctest==0.13.0
  Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.2-cp37-cp37m-win_amd64.whl (27 kB)
Collecting frozenlist>=1.1.1
  Downloading frozenlist-1

You should consider upgrading via the 'C:\Users\jr101\AppData\Local\Programs\Python\Python37\python.exe -m pip install --upgrade pip' command.


In [29]:
import pandas as pd
import os
import time
from dotenv import load_dotenv
from alpha_vantage.timeseries import TimeSeries

In [4]:
# Load my api key
load_dotenv()
MY_KEY = os.getenv('ALPHA_VANTAGE_KEY')

In [13]:
ts = TimeSeries(key=MY_KEY, output_format='pandas')

In [6]:
df = pd.read_csv('data/archive.zip')
df.head()

Unnamed: 0,id,ticker,title,category,content,release_date,provider,url,article_id
0,221515,NIO,Why Shares of Chinese Electric Car Maker NIO A...,news,What s happening\nShares of Chinese electric c...,2020-01-15,The Motley Fool,https://invst.ly/pigqi,2060327
1,221516,NIO,NIO only consumer gainer Workhorse Group amon...,news,Gainers NIO NYSE NIO 7 \nLosers MGP Ingr...,2020-01-18,Seeking Alpha,https://invst.ly/pje9c,2062196
2,221517,NIO,NIO leads consumer gainers Beyond Meat and Ma...,news,Gainers NIO NYSE NIO 14 Village Farms In...,2020-01-15,Seeking Alpha,https://invst.ly/pifmv,2060249
3,221518,NIO,NIO NVAX among premarket gainers,news,Cemtrex NASDAQ CETX 85 after FY results \n...,2020-01-15,Seeking Alpha,https://invst.ly/picu8,2060039
4,221519,NIO,PLUG NIO among premarket gainers,news,aTyr Pharma NASDAQ LIFE 63 on Kyorin Pharm...,2020-01-06,Seeking Alpha,https://seekingalpha.com/news/3529772-plug-nio...,2053096


In [7]:
# Getting ready to get the historical data for the most talked about tickers
most_popular = df.ticker.value_counts()
most_popular = most_popular.head(25)

In [10]:
most_popular

AAPL     20231
MSFT      8110
BAC       7409
AMZN      6330
NWSA      5914
BA        5879
GOOGL     5171
GS        4513
TSLA      4283
NFLX      3806
TGT       3689
INTC      3188
DIS       2875
XOM       2831
JPM       2600
MS        2498
GM        2089
C         2082
GE        2045
MU        1927
TM        1822
WMB       1482
KO        1367
WFC       1359
WMT       1267
Name: ticker, dtype: int64

In [17]:
# Testing out TimeSeries and how it works
data, meta_data = ts.get_daily('AAPL')

In [19]:
meta_data

{'1. Information': 'Daily Prices (open, high, low, close) and Volumes',
 '2. Symbol': 'AAPL',
 '3. Last Refreshed': '2022-05-16',
 '4. Output Size': 'Compact',
 '5. Time Zone': 'US/Eastern'}

In [26]:
str(data.index[0]).split()[0]

'2022-05-16'

In [32]:
# Now to create a function to download and save the stock data
def download_stock(symbol):
    data, _ = ts.get_daily(symbol=symbol, outputsize='full')
    filename = symbol + '_' + str(data.index[-1]).split()[0] + '_' + str(data.index[0]).split()[0] + '.csv'
    filepath = 'data/' + filename
    data.to_csv(filepath)

In [33]:
# Finally to set up a loop that should wait long enough before querying for the next stock
for symbol in most_popular.keys():
    print("Starting download for:", symbol)
    download_stock(symbol)
    print("Finished download for:", symbol)
    print("waiting for 20 seconds before next iteration...")
    time.sleep(20)
    
print("Done downloading all the files!")

Starting download for: AAPL
Finished download for: AAPL
waiting for 20 seconds before next iteration...
Starting download for: MSFT
Finished download for: MSFT
waiting for 20 seconds before next iteration...
Starting download for: BAC
Finished download for: BAC
waiting for 20 seconds before next iteration...
Starting download for: AMZN
Finished download for: AMZN
waiting for 20 seconds before next iteration...
Starting download for: NWSA
Finished download for: NWSA
waiting for 20 seconds before next iteration...
Starting download for: BA
Finished download for: BA
waiting for 20 seconds before next iteration...
Starting download for: GOOGL
Finished download for: GOOGL
waiting for 20 seconds before next iteration...
Starting download for: GS
Finished download for: GS
waiting for 20 seconds before next iteration...
Starting download for: TSLA
Finished download for: TSLA
waiting for 20 seconds before next iteration...
Starting download for: NFLX
Finished download for: NFLX
waiting for 20 s

In [34]:
download_stock('SPY')

In [45]:
# Need to save the news data for the different stocks
to_save = df[df.ticker.isin(most_popular.keys())]
to_save.to_csv('data/top25-headlines.csv')