In [3]:

import requests
import pandas as pd
import bs4 
import numpy as np

Get wiki ticker

In [4]:

def get_wiki_ticker():
    wiki_page = requests.get("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies").text
    soup = bs4.BeautifulSoup(wiki_page, 'html.parser')
    table = soup.find("table", {"class":"wikitable sortable"})

    ticker = []
    Sector = []
    SubSector = []
    HqLocation = []
    Founded = []

    # First row is header
    for row in table.findAll('tr')[1:]:
        ticker.append(row.findAll('td')[0].text)
        Sector.append(row.findAll('td')[3].text)
        SubSector.append(row.findAll('td')[4].text)
        HqLocation.append(row.findAll('td')[5].text)
        Founded.append(row.findAll('td')[8].text)

    ticker = list(map(lambda s: s.strip(), ticker))
    Founded = list(map(lambda s: s.strip(), Founded))

    ticker = pd.DataFrame(ticker,columns=['ticker'])
    sector = pd.DataFrame(Sector,columns=['sector'])
    subSector = pd.DataFrame(SubSector,columns=['subSector'])
    hqLocation = pd.DataFrame(HqLocation,columns=['hqLocation'])
    founded = pd.DataFrame(Founded,columns=['founded'])



    return  pd.concat([ticker, sector, subSector, hqLocation,founded],axis=1)

In [5]:
ticker_data = get_wiki_ticker()
# df["sector"] = row['sector']
# df["subSector"] = row['subSector']
# df["hqLocation"] = row['hqLocation']
# df["founded"] = row['founded']

# Get Price data

In [6]:
def get_yh_esg(ticker_list):
    
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}

    
    #url_esg = "https://query2.finance.yahoo.com/v1/finance/esgChart"
    dataframes_esg = []
    dataframes_price = []

    for ticker in ticker_list:
        # ESG historical data (only changes yearly)
        url_esg = f"https://query2.finance.yahoo.com/v1/finance/esgChart?symbol={ticker}"
        response = requests.get(url_esg, headers=headers)
        if response.ok:
            try:
                tmp_df_esg = pd.DataFrame(response.json()["esgChart"]["result"][0]["symbolSeries"])
            except TypeError:
                continue
            except KeyError:
                continue

            tmp_df_esg["ticker"] = ticker
            tmp_df_esg["timestamp"] = pd.to_datetime(tmp_df_esg["timestamp"], unit="s")
            dataframes_esg.append(tmp_df_esg)




    df_esg = pd.concat(dataframes_esg)


    return df_esg






In [7]:

def get_yh_price(ticker_list, interval = '1wk'):
    
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    
    dataframes_price = []

    for ticker in ticker_list:

        # Price data
        url_price = f"http://query2.finance.yahoo.com/v8/finance/chart/{ticker}?symbol={ticker}&period1=1090618879&period2=9999999999&interval={interval}"
        response = requests.get(url_price, headers=headers)
        if response.ok:
            try:
                dates = pd.DataFrame({'timestamp':response.json()["chart"]["result"][0]["timestamp"]})

                #low, open ,volume, close, high
                df_quote = pd.DataFrame(response.json()["chart"]["result"][0]["indicators"]["quote"][0])
                # ad_close
                df_adjclose = pd.DataFrame(response.json()["chart"]["result"][0]["indicators"]["adjclose"][0])

                tmp_df_price = pd.concat([dates, df_quote,df_adjclose], axis=1)
                tmp_df_price['timestamp'] = pd.to_datetime(tmp_df_price['timestamp'], unit = "s")

            except TypeError:
                continue
            except KeyError:
                continue

            tmp_df_price["ticker"] = ticker
            dataframes_price.append(tmp_df_price)

    df_price = pd.concat(dataframes_price)

    return  df_price




In [8]:
def get_yh_asset_esg(ticker_list):

    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    
    dataframes_asset_profile = []
    dataframes_company_officers = []
    dataframes_esg_info = []

    for ticker in ticker_list:

        # Price data
        url = f"https://query2.finance.yahoo.com/v10/finance/quoteSummary/{ticker}?modules=assetProfile%2CesgScores"
        response = requests.get(url, headers=headers)
        if response.ok:
            try:
                assetProfile = response.json()["quoteSummary"]["result"][0]["assetProfile"]
                esg_scores_dict = response.json()["quoteSummary"]["result"][0]["esgScores"]
            except TypeError:
                continue
            except KeyError:
                continue

            
            df_asset_profile = pd.DataFrame({'address':assetProfile.get('address1', None),
                                            'city':assetProfile.get('city', None),
                                            'state':assetProfile.get('state', None),
                                            'zip':assetProfile.get('zip', None),
                                            'country':assetProfile.get('country', None),
                                            'industry':assetProfile.get('industry', None),
                                            'sector':assetProfile.get('sector', None),
                                            'boardRisk':assetProfile.get('boardRisk', None),
                                            'compensationRisk':assetProfile.get('compensationRisk', None),
                                            'shareHolderRightsRisk':assetProfile.get('shareHolderRightsRisk', None),
                                            'overallRisk':assetProfile.get('overallRisk', None),
                                            'ticker':ticker}, 
                                        index = [0])

            dataframes_asset_profile.append(df_asset_profile)

            officers = {'maxAge':list(),
                        'name':list(),
                        'age':list(),
                        'title':list(),
                        'yearBorn':list(),
                        'fiscalYear':list(),
                        'totalPay':list(),
                        'exercisedValue':list(),
                        'unexercisedValue':list()}

            for item in assetProfile['companyOfficers']:
                officers['maxAge'].append(item.get('maxAge', None))
                officers['name'].append(item.get('name', None))
                officers['age'].append(item.get('age', None))
                officers['title'].append(item.get('title', None))
                officers['yearBorn'].append(item.get('yearBorn', None))
                officers['fiscalYear'].append(item.get('fiscalYear', None))

                if not item.get('totalPay', None) is None:
                    officers['totalPay'].append(item['totalPay'].get('raw', None))
                else:
                    officers['totalPay'].append(None)

                if not item.get('exercisedValue', None) is None:
                    officers['exercisedValue'].append(item['exercisedValue'].get('raw', None))
                else:
                    officers['exercisedValue'].append(None)
                    
                if not item.get('unexercisedValue', None) is None:
                    officers['unexercisedValue'].append(item['unexercisedValue'].get('raw', None))
                else:
                    officers['unexercisedValue'].append(None)

        

            officers = pd.DataFrame(officers)#index = list(range(len(assetProfile['companyOfficers'])))
            officers['ticker'] = ticker


            dataframes_company_officers.append(officers)


            dataframes_esg_info.append( pd.DataFrame({'adult':esg_scores_dict.get('adult', None),
                'alcoholic':esg_scores_dict.get('alcoholic', None),
                'animalTesting':esg_scores_dict.get('animalTesting', None),
                'catholic':esg_scores_dict.get('catholic', None),
                'controversialWeapons':esg_scores_dict.get('controversialWeapons', None),
                'smallArms':esg_scores_dict.get('smallArms', None),
                'furLeather':esg_scores_dict.get('furLeather', None),
                'gambling':esg_scores_dict.get('gambling', None),
                'gmo':esg_scores_dict.get('gmo', None),
                'militaryContract':esg_scores_dict.get('militaryContract', None),
                'nuclear':esg_scores_dict.get('nuclear', None),
                'pesticides':esg_scores_dict.get('pesticides', None),
                'palmOil':esg_scores_dict.get('palmOil', None),
                'coal':esg_scores_dict.get('coal', None),
                'tobacco':esg_scores_dict.get('tobacco',None),
                'ticker':ticker}, index = [0])
            )

    df_asset_profiles = pd.concat(dataframes_asset_profile)
    df_company_officers = pd.concat(dataframes_company_officers)
    df_esg_info = pd.concat(dataframes_esg_info)

    return  df_asset_profiles, df_company_officers, df_esg_info
    

In [9]:
df_esg = get_yh_esg(ticker_data['ticker'])
df_asset_profiles, df_company_officers, df_esg_info = get_yh_asset_esg(ticker_data['ticker'])
df_price = get_yh_price(ticker_data['ticker'])

In [10]:
df_asset_profiles, df_company_officers, df_esg_info = get_yh_asset_esg(ticker_data['ticker'])

In [12]:
writer = pd.ExcelWriter('YAHOO_PRICE_ESG.xlsx', engine='xlsxwriter')

# Write each dataframe to a different worksheet.
df_esg.to_excel(writer, sheet_name='ESG')
df_esg_info.to_excel(writer, sheet_name='esg_info')
df_price.to_excel(writer, sheet_name='PRICE')
df_asset_profiles.to_excel(writer, sheet_name='asset_profiles')
df_company_officers.to_excel(writer, sheet_name='company_officers')

# Close the Pandas Excel writer and output the Excel file.
writer.save()
writer.close()

  warn("Calling close() on already closed file.")
