In [1]:
import requests
import pandas as pd
import numpy as np
import io
from datetime import date, timedelta, datetime, timezone
import time
import pytz
import json
from os import path
import os
from urllib.parse import unquote
import base64

In [2]:
with open("./apikeys.key") as APIfp:
    obj = json.load(APIfp)
    # financialKey = obj['financialAPI']
    SchwabConsumer = obj["Schwab_consumer"]
    SchwabSecret = obj["Schwab_secret"]
    SchwabRefresh = obj['Schwab_refresh']

In [3]:
def retrieveTDAuthToken():
    """Get auth token using refresh token"""
    client_encoded = base64.b64encode(f"{SchwabConsumer}:{SchwabSecret}".encode("utf-8")).decode("utf-8")
    r = requests.post("https://api.schwabapi.com/v1/oauth/token",
    headers={
        "Authorization": f"Basic {client_encoded}",
        "Content-Type": 'application/x-www-form-urlencoded',},
    data={
        'grant_type': 'refresh_token',
        'refresh_token': SchwabRefresh,
        'redirect_uri':'https://127.0.0.1'

    })
    if r.status_code != 200:
        print("Error getting TD auth code")
        print(r.content)
        return
    else:
        print("success retrieving token")
    robj = json.loads(r.text)
    print(f"Refresh token: {robj['refresh_token']}")
    global TDAuthToken, TDAuthExpire
    TDAuthToken = robj["access_token"]
    TDAuthExpire = datetime.now() + timedelta(minutes=29)


In [4]:
retrieveTDAuthToken()
print(TDAuthToken)

success retrieving token
Refresh token: RbgKxJKviDNvfOiRekFdWB3vcR0rhWEE_1wA11QHSYPn5hTYeAHREW6hC4InkJTcq818W8I3Ab3jk_8h1XOB8NTXMhQEMyht
I0.b2F1dGgyLmJkYy5zY2h3YWIuY29t.4NMkZihrSvoiNjs7bJwPvQeJ0wgNLo4amrCqxz9oOhU@


# Downloading data

In [9]:
def getSymbolsTD(arr: list, directory="data", freqType="minute", periodType="day", freq=1, period=5, endDate=None, daysBack=0, save=True,
                 disableDelay=False, datedFileName=False):
    if endDate is None:
        # Calculate end time as now, or previous close if it is past close
        prevclose = datetime.now(tz=pytz.timezone("America/New_York"))
        if prevclose.hour < 4:
            prevclose -= timedelta(days=1)
        endDateStamp = 1000 * int(min(time.time(), prevclose.timestamp()))
        endDate = datetime.fromtimestamp(endDateStamp / 1e3)
    else:
        endDateStamp = 1000 * int(endDate.timestamp())
    if daysBack > 0 and endDate.weekday() < daysBack:
        daysBack += 2
    startDateStamp = endDateStamp - (daysBack * 86400000)

    res = {}
    count = 0
    errors = 0
    for sym in arr:
        count += 1
        if not disableDelay:
            print(f"Downloading  {sym}", end=" ")
        fileName = f"{directory}/{sym}{endDate.strftime('%m-%d-%Y') if datedFileName else ''}.csv"
        if save and path.exists(fileName):
            print("File exists")
            continue
        if not disableDelay:
            print("")
        nextRun = time.time() + 0.51
        r = requests.get("https://api.schwabapi.com/marketdata/v1/pricehistory",
                         headers={"Authorization": f"Bearer {TDAuthToken}"},
                         params={
                             "symbol":sym,
                             "periodType": periodType,
                             "frequencyType": freqType,
                             "frequency": freq,
                             # In place of end start
                             "period": period
                             #  "needExtendedHoursData": "true",
                             #  "endDate": endDateStamp,
                             #  "startDate": startDateStamp,
                         })
        if r.status_code == 200:
            try:
                obj = json.loads(r.text)
                df = pd.DataFrame(obj["candles"])
                df["datetime"] = pd.to_datetime(df['datetime'], unit='ms').dt.tz_localize(
                    'UTC').dt.tz_convert('America/New_York')
                if save:
                    df.to_csv(fileName, index=False)
                else:
                    res[sym] = df
            except Exception as e:
                print("Download Error ", sym)
                print(e)
                errors += 1
        else:
            print(f"Download Error {sym} \n {r.text}")
            errors += 1
        sleepDur = nextRun - time.time()
        if sleepDur > 0 and not disableDelay:
            time.sleep(sleepDur)
    print(f"Total #errors {errors}")
    if not save:
        return res

In [10]:
df = pd.read_csv("fortune500.csv")
syms = df['Symbol'].to_numpy()
print(len(syms))
getSymbolsTD(syms, directory="./data_minutely_10d", freqType="minute", periodType="day", period=10)
# getSymbolsTD(syms, directory="./data_daily_5y", freqType="daily", periodType="year", period=5)

505
Downloading  MMM 
Downloading  AOS 
Download Error  AOS
'datetime'
Downloading  ABT 
Downloading  ABBV 
Downloading  ABMD 
Download Error  ABMD
'datetime'
Downloading  ACN 
Downloading  ATVI 
Download Error  ATVI
'datetime'
Downloading  ADM 
Downloading  ADBE 
Downloading  AAP 
Downloading  AMD 
Downloading  AES 
Downloading  AFL 
Downloading  A 
Downloading  APD 
Downloading  AKAM 
Downloading  ALK 
Downloading  ALB 
Downloading  ARE 
Downloading  ALGN 
Downloading  ALLE 
Downloading  LNT 
Downloading  ALL 
Downloading  GOOGL 
Downloading  GOOG 
Downloading  MO 
Downloading  AMZN 
Downloading  AMCR 
Downloading  AEE 
Downloading  AAL 
Downloading  AEP 
Downloading  AXP 
Downloading  AIG 
Downloading  AMT 
Downloading  AWK 
Downloading  AMP 
Downloading  ABC 
Download Error  ABC
'datetime'
Downloading  AME 
Downloading  AMGN 
Downloading  APH 
Downloading  ADI 
Downloading  ANSS 
Downloading  ANTM 
Download Error  ANTM
'datetime'
Downloading  AON 
Downloading  APA 
Downloading  AAP

# Preprocessing

In [11]:
target_dir = "./data/1m-10d"
target_out = "1m-10d.csv"

syms = os.listdir(target_dir)
syms = list(map(lambda x: x.split(".csv")[0],syms))
data = []
for name in syms:
    df = pd.read_csv(f"{target_dir}/{name}.csv")
    if df.shape[0] < 1000:
        continue
    df['name']=name
    data.append(df)
data = pd.concat(data)
data.to_csv(target_out,index=False)

In [14]:
pd.read_csv("1m-10d.csv")

Unnamed: 0,open,high,low,close,volume,datetime,name
0,169.750,169.800,169.610,169.790,1193,2024-04-08 07:00:00-04:00,AMD
1,169.820,169.820,169.820,169.820,248,2024-04-08 07:01:00-04:00,AMD
2,169.870,169.870,169.870,169.870,806,2024-04-08 07:02:00-04:00,AMD
3,169.850,169.850,169.850,169.850,1438,2024-04-08 07:03:00-04:00,AMD
4,169.700,169.800,169.700,169.800,1614,2024-04-08 07:04:00-04:00,AMD
...,...,...,...,...,...,...,...
1799257,54.725,54.760,54.710,54.760,12662,2024-04-19 15:56:00-04:00,CTVA
1799258,54.750,54.785,54.730,54.745,18371,2024-04-19 15:57:00-04:00,CTVA
1799259,54.740,54.760,54.725,54.725,20232,2024-04-19 15:58:00-04:00,CTVA
1799260,54.730,54.840,54.730,54.800,71350,2024-04-19 15:59:00-04:00,CTVA
