# JSON Fundamentals ingest

- bring JSON files into dataframes experiment


In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import requests
import json
from io import StringIO
import yaml
from datetime import date
import requests
from sklearn.metrics import classification_report
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
#import pandas_datareader as pdr
from pandas_datareader.data import DataReader
# import yahoo_fin.stock_info as si

# For time stamps
from datetime import datetime

# for LSTM
from keras.models import Sequential
from keras.layers import Dense, LSTM, Input
from keras.models import Model
# from tensorflow.keras.layers import Input, Dropout, Dense, BatchNormalization, Activation, concatenate, GRU, Embedding, Flatten, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
# access datasets from quandl.com - need to pip install Quandl to use
import quandl
config_file = 'json_ingest_config.yml'

Using TensorFlow backend.


In [2]:
# load config file
current_path = os.getcwd()
print("current directory is: "+current_path)

path_to_yaml = os.path.join(current_path, config_file)
print("path_to_yaml "+path_to_yaml)
try:
    with open (path_to_yaml, 'r') as c_file:
        config = yaml.safe_load(c_file)
except Exception as e:
    print('Error reading the config file')

current directory is: C:\personal\karma_stocks_2021\stock_investigation\notebooks
path_to_yaml C:\personal\karma_stocks_2021\stock_investigation\notebooks\json_ingest_config.yml


In [3]:
# load config parms
parms = {}
eod_token = config['general']['eod_token']
eod_historical_token = config['general']['eod_historical_token']
fundamentals_directory = config['files']['fundamentals_directory']

from_date = config['general']['master_start']
to_date = config['general']['master_end']
parms['master_date_mode'] = config['general']['master_date_mode']
parms['master_start'] = config['general']['master_start']
parms['master_end'] = config['general']['master_end']


# Helper functions

In [4]:
# DataReader: https://riptutorial.com/pandas/topic/1912/pandas-datareader
# https://pandas-datareader.readthedocs.io/en/latest/remote_data.html
# 
# Set up End and Start times for data grab
# check to see if start and end dates are hard-coded with master dates
def set_start_end():
    if parms['master_date_mode']: # start and end hardcoded by parameters
        start = parms['master_start']
        end = parms['master_end']
    else: # end is current date; start is current date minus years_window
        end = datetime.now()
        start = datetime(end.year - parms['years_window'], end.month, end.day)
    # output a test dataset
    tester = DataReader('IBM', 'stooq', start, end)
    print(tester.shape)
    return(start,end)

In [5]:
def get_path():
    rawpath = os.getcwd()
    # data is in a directory that is a sibling to the directory containing the notebook
    path = os.path.abspath(os.path.join(rawpath, '..', fundamentals_directory))
    return(path)

# Bring sample JSON file into Pandas dataframe

In [7]:
json_filename = "ABX_2021-10-18.json"
json_file = os.path.join(get_path(),json_filename)
df = pd.read_json(json_file)
df.head()

Unnamed: 0,General,Highlights,Valuation,SharesStats,Technicals,SplitsDividends,Holders,InsiderTransactions,ESGScores,outstandingShares,Earnings,Financials
Code,ABX,,,,,,,,,,,
Type,Common Stock,,,,,,,,,,,
Name,Barrick Gold Corporation,,,,,,,,,,,
Exchange,TO,,,,,,,,,,,
CurrencyCode,CAD,,,,,,,,,,,


In [8]:
df.head(30)

Unnamed: 0,General,Highlights,Valuation,SharesStats,Technicals,SplitsDividends,Holders,InsiderTransactions,ESGScores,outstandingShares,Earnings,Financials
Code,ABX,,,,,,,,,,,
Type,Common Stock,,,,,,,,,,,
Name,Barrick Gold Corporation,,,,,,,,,,,
Exchange,TO,,,,,,,,,,,
CurrencyCode,CAD,,,,,,,,,,,
CurrencyName,Canadian Dollar,,,,,,,,,,,
CurrencySymbol,C$,,,,,,,,,,,
CountryName,Canada,,,,,,,,,,,
CountryISO,CA,,,,,,,,,,,
ISIN,CA0679011084,,,,,,,,,,,


In [9]:
df.head(60)

Unnamed: 0,General,Highlights,Valuation,SharesStats,Technicals,SplitsDividends,Holders,InsiderTransactions,ESGScores,outstandingShares,Earnings,Financials
Code,ABX,,,,,,,,,,,
Type,Common Stock,,,,,,,,,,,
Name,Barrick Gold Corporation,,,,,,,,,,,
Exchange,TO,,,,,,,,,,,
CurrencyCode,CAD,,,,,,,,,,,
CurrencyName,Canadian Dollar,,,,,,,,,,,
CurrencySymbol,C$,,,,,,,,,,,
CountryName,Canada,,,,,,,,,,,
CountryISO,CA,,,,,,,,,,,
ISIN,CA0679011084,,,,,,,,,,,


In [10]:
df.tail()

Unnamed: 0,General,Highlights,Valuation,SharesStats,Technicals,SplitsDividends,Holders,InsiderTransactions,ESGScores,outstandingShares,Earnings,Financials
Trend,,,,,,,,,,,"{'2022-12-31': {'date': '2022-12-31', 'period'...",
Annual,,,,,,,,,,,"{'2021-09-30': {'date': '2021-09-30', 'epsActu...",
Balance_Sheet,,,,,,,,,,,,"{'currency_symbol': 'USD', 'quarterly': {'2021..."
Cash_Flow,,,,,,,,,,,,"{'currency_symbol': 'USD', 'quarterly': {'2021..."
Income_Statement,,,,,,,,,,,,"{'currency_symbol': 'USD', 'quarterly': {'2021..."


# Bring sample JSON into a dictionary

In [13]:
# Opening JSON file
f = open(json_file)
 
# returns JSON object as
# a dictionary
json_dict = json.load(f)
#print("json_dict is ",json_dict)
 
# Iterating through the json
# list
print("top level")
for i in json_dict:
    print(i)
print("General")
for i in json_dict['General']:
    print(i)
 
# Closing file
f.close()

top level
General
Highlights
Valuation
SharesStats
Technicals
SplitsDividends
Holders
InsiderTransactions
ESGScores
outstandingShares
Earnings
Financials
General
Code
Type
Name
Exchange
CurrencyCode
CurrencyName
CurrencySymbol
CountryName
CountryISO
ISIN
CIK
EmployerIdNumber
FiscalYearEnd
IPODate
InternationalDomestic
Sector
Industry
GicSector
GicGroup
GicIndustry
GicSubIndustry
Description
Address
AddressData
Listings
Officers
Phone
WebURL
LogoURL
FullTimeEmployees
UpdatedAt
