In [None]:
%cd ../

In [41]:
import yfinance as yf
import pandas as pd
import requests
from tvDatafeed import TvDatafeed, Interval
import os
import tqdm
import dotenv
import logging

dotenv.load_dotenv()

logging.getLogger("tvDatafeed.main").setLevel(logging.DEBUG)

In [47]:
base_url = "https://www.alphavantage.co/query"
req = requests.Request("GET", base_url, params={
    "apikey": os.getenv("ALPHAVANTAGE_KEY"),
    "function": "TIME_SERIES_INTRADAY",
    "symbol": "AMZN",
    "interval": "5min",
    "adjusted": "true",
    "extended_hours": "false",
    "outputsize": "full",
    "month": "2022-10"
}).prepare()

with requests.Session() as s:
    res = s.send(req)
    data = res.json()

In [42]:
all_data = pd.read_pickle("AMZN_May_Aug_2022_5min.pkl")

In [48]:
all_data = pd.concat([all_data, pd.DataFrame(data["Time Series (5min)"]).T.set_axis(["open", "high", "low", "close", "volume"], axis=1)], axis=0)

In [53]:
all_data.index = pd.to_datetime(all_data.index)
all_data = all_data.astype(float)

In [54]:
all_data

Unnamed: 0,open,high,low,close,volume
2022-05-02 09:30:00,122.243,123.000,120.626,120.902,4069920.0
2022-05-02 09:35:00,120.881,120.950,118.541,119.729,3203900.0
2022-05-02 09:40:00,119.743,120.950,118.375,120.621,2442160.0
2022-05-02 09:45:00,120.601,121.340,120.050,120.112,1839720.0
2022-05-02 09:50:00,120.113,121.171,119.629,121.172,1343420.0
...,...,...,...,...,...
2022-10-03 09:50:00,113.680,113.755,112.870,113.200,788333.0
2022-10-03 09:45:00,113.230,113.900,113.140,113.699,574130.0
2022-10-03 09:40:00,113.455,113.490,113.050,113.220,740424.0
2022-10-03 09:35:00,113.940,113.970,113.450,113.470,730441.0


In [55]:
all_data.sort_index().to_pickle("AMZN_May_Aug_2022_5min.pkl")

In [17]:
username = os.getenv("TRADINGVIEW_USERNAME")
password = os.getenv("TRADINGVIEW_PW")
tv = TvDatafeed(username, password)

In [None]:
logging.Logger.manager.loggerDict

In [83]:
tv.get_hist(
    symbol="TSLA",
    exchange="NASDAQ", 
    interval=Interval.in_5_minute,
    n_bars=1000,
)

DEBUG:tvDatafeed.main:getting data for NMD:TSLA...
ERROR:tvDatafeed.main:The read operation timed out
ERROR:tvDatafeed.main:no data, please check the exchange and symbol


In [None]:
tv.search_symbol("TSLA")

In [11]:
data = yf.download(symbols, start=start_date, end=end_date)


[*********************100%%**********************]  3 of 3 completed


In [17]:
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

In [25]:
sp500_data = []
for ticker in sp500_tickers["Symbol"]:
    try:
        stock = yf.Ticker(ticker)
        info = {
            k: v
            for k, v in stock.info.items()
            if k
            not in [
                "companyOfficers",
                "address1",
                "address2",
                "zip",
                "fax",
                "phone",
                "website",
                "city",
                "state",
                "fullTimeEmployees",
                "country",
                "industry",
                "industryDisp",
                "sector",
                "sectorDisp",
                "longBusinessSummary",
            ]
        }
        sp500_data.append(info)
    except:
        pass
    
sp500_data = pd.DataFrame(sp500_data)
sp500_data = sp500_data.set_index("symbol")
sp500_data = sp500_data.dropna(subset=["marketCap"])
sp500_data = sp500_data.drop_duplicates(subset=['longName'])

In [None]:
sp500_data.sort_values("marketCap", ascending=False)[["marketCap", "exchange"]].head(50)

# 5m Candlesticks

In [None]:
sp500_candlesticks = {}
for ticker in tqdm.tqdm(sp500_data.index):
    try:
        info = tv.search_symbol(ticker)
        exchange = info[0]["exchange"]
        candlesticks_5m = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_5_minute,
            n_bars=1000,
        )
        sp500_candlesticks[ticker] = {"5m": candlesticks_5m}
    except:
        print(f"Could not find {ticker}")
        continue

# 15m Candlesticks

In [None]:
for ticker in tqdm.tqdm(sp500_data.index):
    try:
        info = tv.search_symbol(ticker)
        exchange = info[0]["exchange"]
        candlesticks_15m = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_15_minute,
            n_bars=334,
        )
        sp500_candlesticks[ticker] = {"15m": candlesticks_15m}
    except:
        print(f"Could not find {ticker}")
        continue

In [16]:
sp500_exchanges = {}
for ticker in tqdm.tqdm(sp500_data.index):
    try:
        info = tv.search_symbol(ticker)
        exchange = info[0]["exchange"]
    except:
        print(f"Could not find {ticker}")
        exchange = ""
        
    sp500_exchanges[ticker] = exchange

100%|██████████| 498/498 [04:38<00:00,  1.79it/s]


In [19]:
tv.get_hist("AAPL", "NYSE", Interval.in_5_minute, 1000) is None

DEBUG:tvDatafeed.main:getting data for NYSE:AAPL...
ERROR:tvDatafeed.main:The read operation timed out
ERROR:tvDatafeed.main:no data, please check the exchange and symbol


True

In [None]:
for ticker in tqdm.tqdm(sp500_data.index):
    sp500_candlesticks[ticker] = {}
    exchange = sp500_exchanges[ticker]
    try:
        candlesticks_5m = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_5_minute,
            n_bars=5000,
        )
        if candlesticks_5m is None:
            raise Exception("No 5m candlesticks")
        sp500_candlesticks[ticker]["5m"] = candlesticks_5m
    except:
        continue

    try:
        candlesticks_15m = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_15_minute,
            n_bars=1667,
        )
        if candlesticks_15m is None:
            raise Exception("No 15m candlesticks")
        sp500_candlesticks[ticker]["15m"] = candlesticks_15m
    except:
        continue

    try:
        candlesticks_30m = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_30_minute,
            n_bars=834,
        )
        if candlesticks_30m is None:
            raise Exception("No 30m candlesticks")
        sp500_candlesticks[ticker]["30m"] = candlesticks_30m
    except:
        continue

    try:
        candlesticks_1h = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_1_hour,
            n_bars=418,
        )
        if candlesticks_1h is None:
            raise Exception("No 1h candlesticks")
        sp500_candlesticks[ticker]["1h"] = candlesticks_1h
    except:
        continue

    try:
        candlesticks_2h = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_2_hour,
            n_bars=210,
        )
        if candlesticks_2h is None:
            raise Exception("No 2h candlesticks")
        sp500_candlesticks[ticker]["2h"] = candlesticks_2h
    except:
        continue

    try:
        candlesticks_4h = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_4_hour,
            n_bars=106,
        )
        if candlesticks_4h is None:
            raise Exception("No 4h candlesticks")
        sp500_candlesticks[ticker]["4h"] = candlesticks_4h
    except:
        continue

    try:
        candlesticks_1d = tv.get_hist(
            symbol=ticker,
            exchange=exchange,
            interval=Interval.in_daily,
            n_bars=53,
        )
        if candlesticks_1d is None:
            raise Exception("No 1d candlesticks")
        sp500_candlesticks[ticker]["1d"] = candlesticks_1d
    except:
        continue

In [43]:
for k, v in sp500_candlesticks.items():
    assert len(v["5m"]) == 5000
    assert len(v["15m"]) == 1667
    assert len(v["30m"]) == 834
    assert len(v["1h"]) == 418
    assert len(v["2h"]) == 210
    assert len(v["4h"]) == 106
    assert len(v["1d"]) == 53

In [49]:
for k in sp500_candlesticks.keys():
    for interval in sp500_candlesticks[k].keys():
        sp500_candlesticks[k][interval] = sp500_candlesticks[k][interval].drop(columns="symbol")

In [50]:
sp500_candlesticks["TSLA"]["5m"]

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-06-08 14:50:00,232.61,232.90,232.56,232.66,63810.0
2023-06-08 14:55:00,232.66,233.12,232.63,233.10,90398.0
2023-06-08 15:00:00,233.12,233.19,232.77,233.02,95037.0
2023-06-08 15:05:00,233.03,233.50,232.83,233.46,148088.0
2023-06-08 15:10:00,233.44,233.77,233.09,233.14,167837.0
...,...,...,...,...,...
2023-09-12 11:40:00,272.55,272.96,272.12,272.40,122499.0
2023-09-12 11:45:00,272.40,272.45,271.77,271.90,133778.0
2023-09-12 11:50:00,271.94,272.50,271.89,272.00,114490.0
2023-09-12 11:55:00,272.00,272.02,271.05,271.21,100188.0


In [51]:
sp500_candlesticks["AAPL"]["5m"]

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-06-08 14:05:00,179.65,179.68,179.490,179.610,47753.0
2023-06-08 14:10:00,179.61,179.78,179.485,179.770,49404.0
2023-06-08 14:15:00,179.77,179.97,179.760,179.970,85641.0
2023-06-08 14:20:00,179.97,179.98,179.830,179.860,48027.0
2023-06-08 14:25:00,179.86,179.97,179.860,179.955,45956.0
...,...,...,...,...,...
2023-09-12 10:55:00,176.24,176.66,176.230,176.350,129789.0
2023-09-12 11:00:00,176.34,176.65,176.290,176.510,118397.0
2023-09-12 11:05:00,176.51,176.67,176.250,176.340,71141.0
2023-09-12 11:10:00,176.32,176.45,176.250,176.410,80565.0
