yfinance API Documentation: https://ranaroussi.github.io/yfinance/reference/index.html 


In [None]:
import sys 
import os

import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime
from datetime import timezone

import importlib

# Add root path so other subfolders are accessible
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# Reload it every time the cell is run
import utils.db_azure
importlib.reload(utils.db_azure)

from utils.db_azure import get_analytics_azure_engine
from utils.db_azure import azure_upsert



In [None]:
# get general stock data based on screener query
from yfinance import EquityQuery

q = EquityQuery('and', [
    EquityQuery('is-in', ['exchange', 'TOR']),
    EquityQuery('is-in', ['sector',"Energy"])
])

response = yf.screen(q, size = 250) # 250 is max at one time

# extract to a df
df = pd.json_normalize(response["quotes"])

In [94]:
# get industry& sector for each company

def get_industry(symbol):
    stock = yf.Ticker(symbol)
    info = stock.get_info()
    return info.get('industry',None) # get industry if in dict keys, return None if it doesn't exist


def get_sector(symbol):
    stock = yf.Ticker(symbol)
    info = stock.get_info()
    return info.get('sector',None) # get sector if in dict keys, return None if it doesn't exist


df['industry'] = df['symbol'].apply(get_industry)
df['sector'] = df['symbol'].apply(get_sector)


In [137]:
header_dict = {
    "symbol": "symbol",
    "shortName": "asset_name",
    "currency": "currency",
    "exchange": "exchange",
    "marketCap": "market_cap",
    "sharesOutstanding": "shares_outstanding",   
    "sector" : "sector",
    'industry' : 'industry'
}


In [138]:
# keep only needed columns & rename
df_new = df[header_dict.keys()]

df_new.rename(columns=header_dict, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new.rename(columns=header_dict, inplace=True)


In [139]:
# filter out non O&G companies and pref shares
df_new = df_new[df_new['sector']=='Energy']
df_new = df_new[df_new['industry'].str.contains('Oil & Gas',na=False)]
df_new = df_new[~df_new['asset_name'].str.contains('PREF SERIES',na=False)] # get rid of pref share tickers
df_new = df_new[~df_new['asset_name'].str.contains('PREF SER',na=False)] 
df_new = df_new[~df_new['asset_name'].str.contains('PREF',na=False)] 



In [140]:
# add meta data columns
df_new['unit'] = 'share'
df_new['asset_type'] = 'stock'
df_new['updated_at'] = datetime.now()

In [145]:
# Upsert header
azure_engine = get_analytics_azure_engine()
azure_upsert(df_new,azure_engine,'asset_header')