In [188]:
import requests
import pandas as pd
from env import api_key
from datetime import timedelta, datetime

### /v8/finance/spark - historical data for various intervals and ranges

In [None]:
# Grabbing 5 years of close prices for Microsoft and Apple
headers = {
    'x-api-key': api_key
    }

response = requests.request("GET", 'https://yfapi.net/v8/finance/spark?interval=1d&range=5y&symbols=AAPL,MSFT', headers=headers)

In [3]:
# Saving response to data 
data = response.json()

In [187]:
def extract_date_and_price(data):
    '''
    This function takes in JSON file,
    creates a series for price and date,
    converts timestamp into a proper date,
    combines both price and date series into one temp dataframe,
    convert data to datetime64 and set as index
    merge to the main dataframe
    returns the main dataframe
    '''
    # Creating an empty main DataFrame
    df = pd.DataFrame()

    # Looping through each symbol in data
    for symbol in data:
        
        # Creating an empty temporary dataframe
        temp = pd.DataFrame()
        
        # Creating a series for timestamp and closing price
        timestamp = pd.Series(data[symbol]['timestamp'])
        price = pd.Series(data[symbol]['close'])

        # Converting timestamp to year/month/day format
        timestamp = timestamp.apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d'))

        # Saving series into a the empty DataFrame I created earlier
        temp[symbol + '_price'] = price
        temp['date'] = timestamp

        # Converting type object to datetime64
        temp['date'] = pd.to_datetime(temp.date)
        
        # Using the date as the index
        temp = temp.set_index('date').sort_index()
        
        # Saving the temporary DataFrame to the main DataFrame
        df = pd.merge(df, temp, how='outer', left_index=True, right_index=True)
        
    return df

In [29]:
# Example output of just two symbols using the extract function
df

Unnamed: 0,MSFT_price,MSFT_date,AAPL_price,AAPL_date
0,62.30,2017-01-04,29.005,2017-01-04
1,62.30,2017-01-05,29.153,2017-01-05
2,62.84,2017-01-06,29.478,2017-01-06
3,62.64,2017-01-09,29.747,2017-01-09
4,62.62,2017-01-10,29.778,2017-01-10
...,...,...,...,...
1254,341.25,2021-12-28,179.290,2021-12-28
1255,341.95,2021-12-29,179.380,2021-12-29
1256,339.32,2021-12-30,178.200,2021-12-30
1257,336.32,2021-12-31,177.570,2021-12-31


###  Grabbing the top 250 day_gainers

In [189]:
url = 'https://yfapi.net/ws/screeners/v1/finance/screener/predefined/saved?count=250&scrIds=day_gainers'

headers = {
    'x-api-key': api_key
    }

response = requests.request("GET", url, headers=headers)
data = response.json()

In [190]:
# Inspecting the Data
data

{'finance': {'result': [{'id': 'ec5bebb9-b7b2-4474-9e5c-3e258b61cbe6',
    'title': 'Day Gainers - US',
    'description': 'Stocks ordered in descending order by price percent change greater than 3% with respect to the previous close',
    'canonicalName': 'DAY_GAINERS',
    'criteriaMeta': {'size': 250,
     'offset': 0,
     'sortField': 'percentchange',
     'sortType': 'DESC',
     'quoteType': 'EQUITY',
     'topOperator': 'AND',
     'criteria': [{'field': 'percentchange',
       'operators': ['GT'],
       'values': [3.0],
       'labelsSelected': []},
      {'field': 'region',
       'operators': ['EQ'],
       'values': [],
       'labelsSelected': [52]},
      {'field': 'intradaymarketcap',
       'operators': ['EQ'],
       'values': [],
       'labelsSelected': [1, 2, 3]},
      {'field': 'dayvolume',
       'operators': ['GT'],
       'values': [15000],
       'labelsSelected': []}]},
    'rawCriteria': '{"offset":0,"size":250,"sortField":"percentchange","sortType":"DESC",

In [191]:
# Creating an empty list to grab the symbols 
list_of_symbols = []

# Going to be looping through the list of quotes
quotes = data['finance']['result'][0]['quotes']

# Loop through quotes and save symbol to list
for quote in quotes:
    list_of_symbols.append(quote['symbol'])

In [194]:
# Trying to automate grabbing the price of the most popular 250 stocks for the last 5 years
headers = {
    'x-api-key': api_key
    }


# Testing out only the first 10 stocks of list_of_stocks
for y in range(10, 250, 10):
    
    # Setting x to 10 less than y
    x = y-10
    
    # Created an emtpy string
    placeholder = ""
    
    # Creating a placeholder string from list of symbols
    for index, symbol in enumerate(list_of_symbols[x:y]):
        
        # if index is equal to nine then this is last symbol and doesn't require a comma at the end
        if index == 9:
            placeholder += symbol
            
        # All other index's will require a comma
        else:
            placeholder += symbol + ","
            
    # Using requests to get the last 5 years of the 5 stocks saved under placeholder
    response = requests.request("GET", f'https://yfapi.net/v8/finance/spark?interval=1d&range=5y&symbols={placeholder}', headers=headers)
    data = response.json()
    
    # Using the function defined above to create a Data Frame
    df = extract_date_and_price(data)

In [195]:
# Example out of 10 stocks, ran into a problem of NaNs
df

Unnamed: 0_level_0,JWN_price,VALN_price,HWC_price,EBKDY_price,UMBF_price,TRMK_price,TTM_price,MED_price,BANR_price,APTV_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-01-06,45.470,,43.95,15.55,76.700,34.91,36.34,41.430,56.71,57.980
2017-01-09,45.050,,43.25,15.43,74.860,34.29,37.26,41.100,55.90,57.049
2017-01-10,45.670,,44.05,15.22,75.640,34.72,38.15,41.570,57.17,59.070
2017-01-11,44.900,,44.25,15.19,75.460,35.13,38.51,41.170,57.55,59.966
2017-01-12,45.030,,43.40,15.24,73.820,34.24,37.94,40.500,56.56,59.564
...,...,...,...,...,...,...,...,...,...,...
2021-12-31,22.620,55.51,50.02,23.90,106.110,32.46,32.09,209.430,60.67,164.950
2022-01-03,23.980,50.99,50.85,23.25,107.430,33.08,33.48,215.830,61.29,166.030
2022-01-04,24.220,39.47,52.36,24.39,108.130,33.84,32.23,214.050,62.36,172.210
2022-01-05,22.870,38.57,51.94,23.64,106.320,33.84,31.95,211.520,61.98,168.280


### Ran into the issue of some stocks not being at least 5 years old
    - Need to use the date as the index and line up stocks prices to that index/date.

# Fixing Bugs

In [127]:
# Grabbing 5 years of close prices for Microsoft and Apple
headers = {
    'x-api-key': api_key
    }

response = requests.request("GET", 'https://yfapi.net/v8/finance/spark?interval=1d&range=5y&symbols=AAPL,STNE', headers=headers)

In [129]:
response.json().keys()

dict_keys(['AAPL', 'STNE'])

In [130]:
data = response.json()

In [172]:
# Creating an empty DataFrame
df = pd.DataFrame()
stne = pd.DataFrame()
aapl = pd.DataFrame()

### Testing out two mismatched symbols

In [173]:
# Looping through each symbol in data
for symbol in data:
    
    # Creating a series for timestamp and closing price
    timestamp = pd.Series(data[symbol]['timestamp'])
    price = pd.Series(data[symbol]['close'])

    # Converting timestamp to year/month/day format
    timestamp = timestamp.apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d'))

    # Saving series into a the empty DataFrame I created earlier
    
    if(symbol == 'AAPL'):
        aapl[symbol + '_price'] = price
        aapl['date'] = timestamp
        
        # Converting type object to datetime64
        aapl['date'] = pd.to_datetime(aapl.date)
        aapl = aapl.set_index('date').sort_index()
        
    else:
        stne
        stne[symbol + '_price'] = price
        stne['date'] = timestamp
        stne['date'] = pd.to_datetime(stne.date)
        stne = stne.set_index('date').sort_index()

In [175]:
# this merge worked
df =pd.merge(aapl, stne, how='outer', left_index=True, right_index=True)

### Final build of bug fix, changed above extract function to this
### Tested it, bug fixed 

In [185]:
# Creating an empty DataFrame
df = pd.DataFrame()

# Looping through each symbol in data
for symbol in data:
    
    temp = pd.DataFrame()
    # Creating a series for timestamp and closing price
    timestamp = pd.Series(data[symbol]['timestamp'])
    price = pd.Series(data[symbol]['close'])

    # Converting timestamp to year/month/day format
    timestamp = timestamp.apply(lambda x : datetime.utcfromtimestamp(x).strftime('%Y-%m-%d'))

    # Saving series into a the empty DataFrame I created earlier
    temp[symbol + '_price'] = price
    temp['date'] = timestamp

    # Converting type object to datetime64
    temp['date'] = pd.to_datetime(temp.date)
    temp = temp.set_index('date').sort_index()
    df = pd.merge(df, temp, how='outer', left_index=True, right_index=True)

In [186]:
df

Unnamed: 0_level_0,AAPL_price,STNE_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-06,29.478,
2017-01-09,29.747,
2017-01-10,29.778,
2017-01-11,29.938,
2017-01-12,29.813,
...,...,...
2021-12-31,177.570,16.86
2022-01-03,182.010,19.32
2022-01-04,179.700,17.07
2022-01-05,174.920,15.81


In [201]:
def get_data():   
    url = 'https://yfapi.net/ws/screeners/v1/finance/screener/predefined/saved?count=250&scrIds=day_gainers'

    headers = {
        'x-api-key': api_key
        }

    response = requests.request("GET", url, headers=headers)
    data = response.json()

    # Creating an empty list to grab the symbols 
    list_of_symbols = []

    # Going to be looping through the list of quotes
    quotes = data['finance']['result'][0]['quotes']

    # Loop through quotes and save symbol to list
    for quote in quotes:
        list_of_symbols.append(quote['symbol'])

    # Grabbing the price of the most popular 250 stocks for the last 5 years

    df = pd.DataFrame()
    # Testing out only the first 10 stocks of list_of_stocks
    for y in range(10, 260, 10):
        
        # Resetting temp as an empty Data Frame
        temp = pd.DataFrame()
        
        # Setting x to 10 less than y
        x = y-10

        # Created an emtpy string
        placeholder = ""

        # Creating a placeholder string from list of symbols
        for index, symbol in enumerate(list_of_symbols[x:y]):

            # if index is equal to nine then this is last symbol and doesn't require a comma at the end
            if index == 9:
                placeholder += symbol

            # All other index's will require a comma
            else:
                placeholder += symbol + ","

        # Using requests to get the last 5 years of the 5 stocks saved under placeholder
        response = requests.request("GET", f'https://yfapi.net/v8/finance/spark?interval=1d&range=5y&symbols={placeholder}', headers=headers)
        data = response.json()

        # Using the function defined above to create a Data Frame
        temp = extract_date_and_price(data)
        df = pd.concat([df, temp], axis = 1)
    return df

In [199]:
df

Unnamed: 0_level_0,FUTU_price,OGCP_price,BEKE_price,BYND_price,VET_price,VCRA_price,SFIX_price,DWAC_price,BIRD_price,GCPEF_price,PDD_price,SIVB_price,YMM_price,PTEN_price,CLR_price,SAGE_price,LW_price,DASH_price,RLX_price,AXNX_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2017-01-09,,19.75,,,42.46,19.75,,,,0.13,,174.33,,28.08,50.02,53.37,36.89,,,
2017-01-10,,19.83,,,42.58,19.85,,,,0.13,,175.95,,28.26,49.88,53.85,36.84,,,
2017-01-11,,,,,42.74,19.71,,,,0.12,,177.53,,28.22,51.17,52.85,36.89,,,
2017-01-12,,,,,42.36,19.35,,,,,,174.51,,28.32,51.37,53.88,36.96,,,
2017-01-13,,19.83,,,42.69,19.43,,,,,,176.59,,27.53,51.40,55.36,37.22,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31,43.30,8.88,20.12,65.16,12.59,64.84,18.92,51.43,15.08,0.30,58.30,678.24,8.37,8.45,44.76,42.54,63.38,148.90,3.90,56.00
2022-01-03,42.06,9.16,19.72,64.87,12.98,65.89,19.40,51.70,16.22,,56.10,688.17,8.30,9.07,46.20,43.59,63.92,145.34,4.04,58.71
2022-01-04,37.40,9.61,17.60,61.62,12.62,67.03,18.95,52.23,15.59,0.30,49.82,706.15,7.80,9.57,47.87,42.21,63.30,135.91,3.75,58.61
2022-01-05,36.09,9.10,17.09,58.49,12.56,62.52,17.63,50.37,13.57,0.25,48.53,681.93,7.37,9.02,46.54,39.45,63.12,127.97,3.32,54.80


In [202]:
df = get_data()

In [203]:
df

Unnamed: 0_level_0,FUTU_price,OGCP_price,BEKE_price,BYND_price,SFIX_price,VET_price,VCRA_price,DWAC_price,BIRD_price,GCPEF_price,...,VORB_price,OGZPY_price,CRHKY_price,ONB_price,NWSA_price,FBC_price,HTLF_price,CVBF_price,LUKOY_price,SFNC_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-09,,19.75,,,,42.46,19.75,,,0.13,...,,5.14,3.960,17.60,12.00,26.63,46.10,22.48,56.20,30.325
2017-01-10,,19.83,,,,42.58,19.85,,,0.13,...,,5.20,4.100,17.90,12.00,26.89,46.75,22.81,56.20,30.575
2017-01-11,,,,,,42.74,19.71,,,0.12,...,,5.27,3.910,17.90,12.04,26.84,47.80,23.05,57.00,30.800
2017-01-12,,,,,,42.36,19.35,,,,...,,5.23,3.910,17.55,12.02,26.52,46.70,22.51,55.80,30.050
2017-01-13,,19.83,,,,42.69,19.43,,,,...,,5.31,3.990,17.80,12.05,26.32,47.15,22.75,54.92,30.425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31,43.30,8.88,20.12,65.16,18.92,12.59,64.84,51.43,15.08,0.30,...,8.04,9.19,16.420,18.12,22.31,47.94,50.61,21.41,89.80,29.580
2022-01-03,42.06,9.16,19.72,64.87,19.40,12.98,65.89,51.70,16.22,,...,7.30,9.42,15.880,18.37,22.77,48.71,51.24,21.58,89.23,30.210
2022-01-04,37.40,9.61,17.60,61.62,18.95,12.62,67.03,52.23,15.59,0.30,...,6.83,9.31,15.170,18.82,22.59,49.39,51.74,21.89,89.22,30.880
2022-01-05,36.09,9.10,17.09,58.49,17.63,12.56,62.52,50.37,13.57,0.25,...,6.29,8.73,14.650,18.91,21.83,49.52,51.68,21.92,85.05,30.620
