In [152]:
import pandas as pd
import numpy as np
import pandas_datareader.data as web
from datetime import datetime
from pathlib import Path
import requests
import nasdaqdatalink
import json
import tqdm
import time
import os
from tqdm import tqdm
from eodhd import APIClient

In [4]:
from twelvedata import TDClient
td = TDClient(apikey = '1166574b73ce408ca939ca15595f719e')

In [103]:
#Create a new folder called financial data if there isnt one
financial_data = Path.cwd()/"Data/Financial data"
Path.mkdir(financial_data, exist_ok=True)

### Define functions

In [156]:
def convert_json(response):
    """
    Converts a json format variable to a pandasdataframe and converts its datetime string to a datetime format.
    
    Args: Response from API call
    
    returns: pandas dataframe
    """
    df = pd.DataFrame(response.json()['values'])
    df['datetime'] = pd.to_datetime(df['datetime'])
    
    return df

def stock_data(ticker):
    """
    Get stock data using twelvedata API connection. 
    
    Args(str): ticker name of the stock
    
    Returns a pandas dataframe outer-merged on 'datetime' 
    """
    
    ###MAKE API CLALL###
    Timeseries = requests.get(f"https://api.twelvedata.com/time_series?apikey=1166574b73ce408ca939ca15595f719e&interval=1day&type=stock&symbol={ticker}&exchange=NASDAQ&dp=2&timezone=America/New_York&start_date=2020-01-01 00:00:00&end_date=2023-07-31 23:59:59&format=JSON&previous_close=true")
    #Momentum for one company - example: AAPL
    momentum = requests.get(f"https://api.twelvedata.com/mom?apikey=1166574b73ce408ca939ca15595f719e&interval=1day&symbol={ticker}&timezone=America/New_York&start_date=2020-01-01 00:00:00&end_date=2023-07-31 23:59:59&format=JSON&dp=2")
    #Relative strength index for one company
    rsi = requests.get(f"https://api.twelvedata.com/rsi?apikey=1166574b73ce408ca939ca15595f719e&interval=1day&symbol={ticker}&timezone=America/New_York&start_date=2020-01-01 00:00:00&end_date=2023-07-31 23:59:59&format=JSON&dp=2")
    #Money flow index - MFI 
    mfi = requests.get(f"https://api.twelvedata.com/mfi?apikey=1166574b73ce408ca939ca15595f719e&interval=1day&symbol={ticker}&timezone=America/New_York&start_date=2020-01-01 00:00:00&end_date=2023-07-31 23:59:59&format=JSON&dp=2")
    
    ### convert json files to pandas dataframes
    timeseries_df = convert_json(Timeseries)
    momentum_df = convert_json(momentum)
    rsi_df = convert_json(rsi)
    mfi_df = convert_json(mfi)
    
    #Join dataframes on "datetime", how = "outer"
    stock_df = timeseries_df.merge(momentum_df, on = "datetime", how = "outer")
    stock_df = stock_df.merge(rsi_df, on = "datetime", how = "outer")
    stock_df = stock_df.merge(mfi_df, on = "datetime", how = "outer")
    
    return stock_df

def df_to_csv(df, name):
    """
    Save a pandas dataframe into a csv file
    
    Args(pd.DataFrame, str): dataframe and the name of the file it should return.
    When working with stock data name should be the ticker of the specified stock.
    
    requirements = requests, pandas, json should be installed and imported.
    
    returns 0, creates a csv file in /Data/Financial data
    """

    pathname = Path.cwd()/f"Data/Financial data/{name}.csv"
    
    df.to_csv(pathname, index = False)
    
    return None

def log(ticker, df, logfile, output_path=os.getcwd()):
    #open or create the csv file
    if os.path.isfile(logfile): #if log file exist, open and allow changes
        log = open(logfile,'a')
    else:
        log = open(logfile,'w')
        header = ['timestamp', 'Status', 'length', 'output_file']
        log.write(";".join(header)+"\n") #Make the headers and jump to the new line
    
    #Gather log information
    status_code = f"last call made happened in ticker {ticker}"
    timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) #local time
    length = len(df) #Lenght of HTML string
    
    #Open the log file and append the gathered log information
    with open(logfile, 'a') as log:
        log.write(f'{timestamp};{status_code};{length};{output_path}' + "\n") #Append the information and jump to the new line.

### Call data automatically
Errors are omitted and the missing stocks are saved in a list so they can be obtained manually later

In [205]:
NASDAQ100 = pd.read_csv(Path.cwd()/"Data/NASDAQ 100.csv", sep = ";")
tickers = NASDAQ100["Symbol"]
tickers = tickers.values
tickers_list = tickers
tickers_list

array(['AZN', 'ON', 'EXC', 'BIIB', 'ROST', 'MRNA', 'SGEN', 'BKR', 'CTSH',
       'CEG', 'VRSK', 'WBD', 'EA', 'CRWD', 'TTD', 'CSGP', 'XEL', 'FAST',
       'GEHC', 'DLTR', 'GFS', 'TEAM', 'ILMN', 'FANG', 'ALGN', 'DDOG',
       'ANSS', 'WBA', 'EBAY', 'ZS', 'ENPH', 'SIRI', 'ZM', 'JD', 'LCID'],
      dtype=object)

In [206]:
logfile = Path.cwd()/f"Data/financial_data_logs.csv"
missing = ['ASML', 'PDD']
index_missing = [45,49]

for i, ticker in tqdm.tqdm(enumerate(tickers_list)):
    try: #get and save data for stock i in list if working
        df = stock_data(ticker) #save data of a single stock in dataframe
        df_to_csv(df, ticker) #save dataframe to csv
        log(ticker, df, logfile, output_path =  Path.cwd()/f"Data/Financial data/{ticker}.csv") #create logfile reporting each call
        time.sleep(60) #wait for 60 seconds as compliance to API regulation from server
    
    except: #get data for stock i+1 and print the stock that failed so it can be retrieved later
        print(f"Company {ticker} was not retrieved")
        missing.append(ticker)
        index_missing.append(i+66)
        time.sleep(60) #wait for 60 seconds as compliance to API regulation from server


0it [00:00, ?it/s]

Company AZN was not retrieved


33it [57:26, 128.62s/it]

Company JD was not retrieved


35it [59:27, 101.93s/it]


### Merge into one large dataframe
Now it is time to read all the downloaded files and merge them in a wide-dataset.

Afterthat this should be converted to a long format dataframe.

Here it is going to be required to use the list with 100 stocks in order to get all tickers. Also i will delete the four stocks that were not possible to get

In [243]:
def read_csv(name):
    pathname = Path.cwd()/f"Data/Financial data/{name}.csv"
    df = pd.read_csv(pathname)
    
    return df



Unnamed: 0,datetime,open,high,low,close,volume,previous_close,mom,rsi,mfi
0,2023-07-31,153.86,154.95,151.55,152.19,4457300,153.33,5.66,72.27,81.96
1,2023-07-28,150.23,154.49,150.21,153.33,4162300,148.50,7.97,75.15,83.00
2,2023-07-27,151.89,153.18,147.55,148.50,5426000,150.17,5.16,70.53,82.85
3,2023-07-26,147.83,150.27,147.09,150.17,4055500,148.65,10.08,75.00,82.23
4,2023-07-25,147.61,150.78,147.46,148.65,4121300,147.62,11.63,73.59,77.69
...,...,...,...,...,...,...,...,...,...,...
658,2020-12-15,126.69,127.60,121.50,124.80,10914400,130.00,0.00,0.00,0.00
659,2020-12-14,135.00,135.30,125.16,130.00,16966100,139.25,0.00,0.00,0.00
660,2020-12-11,146.55,151.50,135.10,139.25,26980800,144.71,0.00,0.00,0.00
661,2020-12-10,146.00,165.00,141.25,144.71,70447500,144.71,0.00,0.00,0.00
