## ESG Research 
### Yfinance, Pandas, NumPy


# Installation
* pip install pandas
* pip install numpy
* pip3 install tqdm
* pip3 install seaborn
* pip install yfinance | pip3 install yfinance
* pip install git+https://github.com/rodrigobercini/yfinance.git


In [2]:
#Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
from pandas import Series, DataFrame
from pandas.tseries import offsets
import csv
import datetime
import csv
from csv import writer
from tqdm import tqdm
import os
import time
from yahoofinancials import YahooFinancials
from sys import platform

dt = datetime.datetime.today()

YEAR = dt.year
MONTH = dt.month
CURRENT_CSV_FILE = bool

if platform == "linux" or platform == "linux2":
       op_sys = "linux"
elif platform == "darwin":
       op_sys = "Mac"
elif platform == "win32":
       op_sys = "Windows"
    
ESG_COLUMNS = {'palmOil', 'controversialWeapons', 'gambling', 'socialScore', 'nuclear',
       'furLeather', 'alcoholic', 'gmo', 'catholic', 'socialPercentile',
       'peerCount', 'governanceScore', 'environmentPercentile',
       'animalTesting', 'tobacco', 'totalEsg', 'highestControversy',
       'esgPerformance', 'coal', 'pesticides', 'adult', 'percentile',
       'peerGroup', 'smallArms', 'environmentScore', 'governancePercentile',
       'militaryContract', 
       #'Stock'
}

ALL_COLUMNS = {'palmOil', 'peerCount', 'environmentScore', 'militaryContract',
       'esgPerformance', 'coal', 'peerGroup', 'furLeather', 'gambling',
       'animalTesting', 'catholic', 'nuclear', 'totalEsg', 'adult',
       'environmentPercentile', 'highestControversy', 'socialScore',
       'percentile', 'alcoholic', 'socialPercentile', 'pesticides',
       'governancePercentile', 'controversialWeapons', 'gmo', 'smallArms',
       'tobacco', 'governanceScore', 'Stock Ticker', 'Sector', 'Name'
}


USER_PATH = '/Users/MichaelCalmette/Documents/Finance Research/ESG Data Files/'
DATE_PATH = "esg-{}-{}.csv".format(dt.month, dt.year)
#DATE_PATH = "esg-2-2022.csv"

FILE_PATH = os.path.join(USER_PATH,DATE_PATH)

Create functions to see if file exists / if there is data in there

In [2]:
f = open(FILE_PATH, 'w')
f.close
### Check if DF is empty

<function TextIOWrapper.close()>

In [69]:
def send_notification(title, t):
    if op_sys == "Mac":
        message = ("Time taken: {} minutes".format(t))
        command = f'''
        osascript -e 'display notification "{message}" with title "{title}"'
        '''
        os.system(command)

Need to compare data in esg-csv to all tickers

In [13]:
df = pd.read_csv('tickers.csv')
all_data = pd.DataFrame(columns=ESG_COLUMNS)
# Only do if month is partially filled
all_data = pd.read_csv(FILE_PATH)
empty_stocks = pd.DataFrame(columns= ['Ticker'])

In [None]:
tic = df.sample(n=1)
ticker_sym = tic.loc[tic.index[0],'Symbol'] # get ticker name ex: 'AAPL'
print(ticker_sym)
ticker = yf.Ticker(ticker_sym).history(period="1mo")
ticker

Yahoo Financials to get additional stock data

In [None]:
yahoo_financials = YahooFinancials('AAPL')
print(yahoo_financials.get_summary_data())

Test pulling a random ticker and getting sustainability information

In [None]:
tic = df.sample(n=1)
ticker_sym = tic.loc[tic.index[0],'Symbol'] # get ticker name ex: 'AAPL'
ticker = yf.Ticker(ticker_sym)
y = ticker.sustainability 
y

In [25]:
from reader import feed

In [117]:
df1_transposed = pd.DataFrame()
no_data_stocks = 0
start_time = time.time()
temp = all_data[['Stock Ticker','Sector','Name']].copy()

for x in tqdm(range(63)):
    df_test = df.merge(temp, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
    tic = df_test.sample(n=1)
    ticker_sym = tic.loc[tic.index[0],'Symbol'] # get ticker name ex: 'AAPL'
    ticker = yf.Ticker(ticker_sym)
    y = ticker.sustainability 
    df1 = y
    end_time = time.perf_counter()
    print(f"Downloaded and stored ESG data in {end_time - start_time:0.4f} seconds")

    if df1 is None:
        no_data_stocks += 1
        empty_stocks = empty_stocks.append({"Ticker": ticker_sym}, ignore_index = True)
    else:
        df1_transposed = df1.transpose()
        #print(df1_transposed.columns)
        df1_transposed['Stock Ticker'] = ticker_sym
        df1_transposed['Sector'] = tic.loc[tic.index[0],'Sector']
        df1_transposed['Name'] = tic.loc[tic.index[0],'Name']

        #all_data = df1_transposed
        all_data = all_data.append(df1_transposed, ignore_index = True)
    temp = temp[temp['Stock Ticker'] != ticker_sym]

all_shape = len(all_data)
empty_shape = len(empty_stocks)
time_taken = round((time.time() - start_time) / 60,2)

print("There are: {} stocks left in the database".format(len(df_test)))
print("There are: {} stocks with no ESG data".format(no_data_stocks))

print("Dataframe shape: {}".format(all_shape))
print("Missing stocks: {}".format(empty_shape))

send_notification("Completed ESG Data Pull",time_taken)

  2%|▏         | 1/63 [00:11<12:19, 11.93s/it]

Downloaded and stored ESG data in -1653105648.6582 seconds


  3%|▎         | 2/63 [00:24<12:34, 12.37s/it]

Downloaded and stored ESG data in -1653105635.9690 seconds


  5%|▍         | 3/63 [00:36<11:59, 11.99s/it]

Downloaded and stored ESG data in -1653105624.4571 seconds


  6%|▋         | 4/63 [00:48<11:57, 12.15s/it]

Downloaded and stored ESG data in -1653105612.0362 seconds


  8%|▊         | 5/63 [00:59<11:09, 11.54s/it]

Downloaded and stored ESG data in -1653105601.5752 seconds


 10%|▉         | 6/63 [01:10<11:02, 11.62s/it]

Downloaded and stored ESG data in -1653105589.7991 seconds


 11%|█         | 7/63 [01:23<11:08, 11.93s/it]

Downloaded and stored ESG data in -1653105577.2385 seconds


 13%|█▎        | 8/63 [01:35<11:06, 12.12s/it]

Downloaded and stored ESG data in -1653105564.6990 seconds


 14%|█▍        | 9/63 [01:46<10:35, 11.78s/it]

Downloaded and stored ESG data in -1653105553.6878 seconds


 16%|█▌        | 10/63 [01:58<10:16, 11.64s/it]

Downloaded and stored ESG data in -1653105542.3570 seconds


 17%|█▋        | 11/63 [02:09<09:58, 11.52s/it]

Downloaded and stored ESG data in -1653105531.1216 seconds


 19%|█▉        | 12/63 [02:21<09:49, 11.57s/it]

Downloaded and stored ESG data in -1653105519.4372 seconds


 21%|██        | 13/63 [02:34<10:00, 12.00s/it]

Downloaded and stored ESG data in -1653105506.4378 seconds


 22%|██▏       | 14/63 [02:45<09:43, 11.91s/it]

Downloaded and stored ESG data in -1653105494.7477 seconds


 24%|██▍       | 15/63 [02:57<09:23, 11.73s/it]

Downloaded and stored ESG data in -1653105483.4262 seconds


 25%|██▌       | 16/63 [03:08<09:08, 11.67s/it]

Downloaded and stored ESG data in -1653105471.9012 seconds


 27%|██▋       | 17/63 [03:20<08:57, 11.69s/it]

Downloaded and stored ESG data in -1653105460.1642 seconds


 29%|██▊       | 18/63 [03:33<08:58, 11.97s/it]

Downloaded and stored ESG data in -1653105447.5440 seconds


 30%|███       | 19/63 [03:44<08:42, 11.86s/it]

Downloaded and stored ESG data in -1653105435.9207 seconds


 32%|███▏      | 20/63 [03:56<08:27, 11.80s/it]

Downloaded and stored ESG data in -1653105424.2641 seconds


 33%|███▎      | 21/63 [04:08<08:14, 11.78s/it]

Downloaded and stored ESG data in -1653105412.5223 seconds


 35%|███▍      | 22/63 [04:19<08:02, 11.77s/it]

Downloaded and stored ESG data in -1653105400.7816 seconds


 37%|███▋      | 23/63 [04:31<07:55, 11.88s/it]

Downloaded and stored ESG data in -1653105388.6462 seconds


 38%|███▊      | 24/63 [04:43<07:44, 11.91s/it]

Downloaded and stored ESG data in -1653105376.6742 seconds


 40%|███▉      | 25/63 [04:55<07:33, 11.93s/it]

Downloaded and stored ESG data in -1653105364.6903 seconds


 41%|████▏     | 26/63 [05:02<06:18, 10.22s/it]

Downloaded and stored ESG data in -1653105358.4639 seconds


 43%|████▎     | 27/63 [05:12<06:11, 10.33s/it]

Downloaded and stored ESG data in -1653105347.8833 seconds


 44%|████▍     | 28/63 [05:26<06:39, 11.43s/it]

Downloaded and stored ESG data in -1653105333.8995 seconds


 46%|████▌     | 29/63 [05:39<06:38, 11.71s/it]

Downloaded and stored ESG data in -1653105321.5325 seconds


 48%|████▊     | 30/63 [05:50<06:28, 11.77s/it]

Downloaded and stored ESG data in -1653105309.6272 seconds


 49%|████▉     | 31/63 [05:57<05:28, 10.26s/it]

Downloaded and stored ESG data in -1653105302.8955 seconds


 51%|█████     | 32/63 [06:09<05:32, 10.74s/it]

Downloaded and stored ESG data in -1653105291.0443 seconds


 52%|█████▏    | 33/63 [06:16<04:45,  9.53s/it]

Downloaded and stored ESG data in -1653105284.3295 seconds


 54%|█████▍    | 34/63 [06:29<05:10, 10.72s/it]

Downloaded and stored ESG data in -1653105270.8196 seconds


 56%|█████▌    | 35/63 [06:41<05:07, 10.99s/it]

Downloaded and stored ESG data in -1653105259.2012 seconds


 57%|█████▋    | 36/63 [06:53<05:02, 11.21s/it]

Downloaded and stored ESG data in -1653105247.4701 seconds


 59%|█████▊    | 37/63 [07:05<05:01, 11.60s/it]

Downloaded and stored ESG data in -1653105234.9595 seconds


 60%|██████    | 38/63 [07:18<04:59, 11.99s/it]

Downloaded and stored ESG data in -1653105222.0582 seconds


 62%|██████▏   | 39/63 [07:31<04:57, 12.41s/it]

Downloaded and stored ESG data in -1653105208.6664 seconds


 63%|██████▎   | 40/63 [07:38<04:02, 10.52s/it]

Downloaded and stored ESG data in -1653105202.5501 seconds


 65%|██████▌   | 41/63 [07:50<04:01, 10.95s/it]

Downloaded and stored ESG data in -1653105190.5886 seconds


 67%|██████▋   | 42/63 [08:02<03:57, 11.32s/it]

Downloaded and stored ESG data in -1653105178.4004 seconds


 68%|██████▊   | 43/63 [08:14<03:49, 11.49s/it]

Downloaded and stored ESG data in -1653105166.5761 seconds


 70%|██████▉   | 44/63 [08:27<03:47, 11.99s/it]

Downloaded and stored ESG data in -1653105153.3826 seconds


 71%|███████▏  | 45/63 [08:39<03:35, 11.97s/it]

Downloaded and stored ESG data in -1653105141.4437 seconds


 73%|███████▎  | 46/63 [08:50<03:19, 11.75s/it]

Downloaded and stored ESG data in -1653105130.2111 seconds


 75%|███████▍  | 47/63 [09:02<03:11, 11.95s/it]

Downloaded and stored ESG data in -1653105117.7984 seconds


 76%|███████▌  | 48/63 [09:14<02:58, 11.90s/it]

Downloaded and stored ESG data in -1653105106.0263 seconds


 78%|███████▊  | 49/63 [09:26<02:47, 11.93s/it]

Downloaded and stored ESG data in -1653105094.0056 seconds


 79%|███████▉  | 50/63 [09:38<02:34, 11.86s/it]

Downloaded and stored ESG data in -1653105082.3303 seconds


 81%|████████  | 51/63 [09:49<02:21, 11.76s/it]

Downloaded and stored ESG data in -1653105070.7972 seconds


 83%|████████▎ | 52/63 [10:01<02:10, 11.82s/it]

Downloaded and stored ESG data in -1653105058.8314 seconds


 84%|████████▍ | 53/63 [10:13<01:57, 11.78s/it]

Downloaded and stored ESG data in -1653105047.1407 seconds


 86%|████████▌ | 54/63 [10:25<01:46, 11.89s/it]

Downloaded and stored ESG data in -1653105035.0212 seconds


 87%|████████▋ | 55/63 [10:37<01:34, 11.85s/it]

Downloaded and stored ESG data in -1653105023.2608 seconds


 89%|████████▉ | 56/63 [10:48<01:21, 11.60s/it]

Downloaded and stored ESG data in -1653105012.2212 seconds


 90%|█████████ | 57/63 [10:59<01:09, 11.54s/it]

Downloaded and stored ESG data in -1653105000.8206 seconds


 92%|█████████▏| 58/63 [11:12<00:58, 11.75s/it]

Downloaded and stored ESG data in -1653104988.5789 seconds


 94%|█████████▎| 59/63 [11:23<00:46, 11.72s/it]

Downloaded and stored ESG data in -1653104976.9412 seconds


 95%|█████████▌| 60/63 [11:32<00:32, 10.72s/it]

Downloaded and stored ESG data in -1653104968.5509 seconds


 97%|█████████▋| 61/63 [11:43<00:21, 10.91s/it]

Downloaded and stored ESG data in -1653104957.2157 seconds


 98%|█████████▊| 62/63 [11:54<00:11, 11.03s/it]

Downloaded and stored ESG data in -1653104945.9103 seconds


100%|██████████| 63/63 [12:06<00:00, 11.53s/it]

Downloaded and stored ESG data in -1653104934.0164 seconds
There are: 63 stocks left in the database
There are: 62 stocks with no ESG data
Dataframe shape: 466
Missing stocks: 413





Make sure there are no duplicates

In [112]:
all_data['Stock Ticker'].value_counts()

GWW     3
C       3
BBWI    3
DISH    2
QRVO    2
       ..
TPR     1
ROL     1
EXR     1
PFG     1
WHR     1
Name: Stock Ticker, Length: 442, dtype: int64

In [47]:
s = len(all_data)
print(all_data.shape)
all_data = all_data.drop_duplicates()
dropped=len(all_data) - s
print("{} duplicates have been dropped".format(dropped))

(263, 30)
0 duplicates have been dropped


In [113]:
all_data.shape

(465, 30)

In [None]:
all_data['Stock Ticker'].value_counts()

In [118]:
if os.stat(FILE_PATH).st_size == 0:
    CURRENT_CSV_FILE = 0
    print('ESG File is empty')
    f = open(FILE_PATH, 'w')
    f.close

    with open(FILE_PATH, 'a', newline='') as f_object: 
        writer_object = writer(f_object) # Pass the CSV  file object to the writer() function
        writer_object.writerow(all_data) # Pass the data in the list as an argument into the writerow() function
        f_object.close()
    print('Column names have been entered.')

else:
    CURRENT_CSV_FILE = 1
    print('ESG File has column headers.')
    all_data.to_csv(FILE_PATH, mode='a', index=False, header=False)
    print('ESG Data has been entered.')

ESG File has column headers.
ESG Data has been entered.
