## ESG Research 
### Yfinance, Pandas, NumPy


# Installation
* pip install pandas
* pip install numpy
* pip3 install tqdm
* pip3 install seaborn
* pip install yfinance | pip3 install yfinance
* pip install git+https://github.com/rodrigobercini/yfinance.git


In [37]:
#Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
from pandas import Series, DataFrame
from pandas.tseries import offsets
import csv
import datetime
import csv
from csv import writer
from tqdm import tqdm
import os
import time
from yahoofinancials import YahooFinancials
from sys import platform

dt = datetime.datetime.today()

YEAR = dt.year
MONTH = dt.month
CURRENT_CSV_FILE = bool

if platform == "linux" or platform == "linux2":
       op_sys = "linux"
elif platform == "darwin":
       op_sys = "Mac"
elif platform == "win32":
       op_sys = "Windows"
    
ESG_COLUMNS = {'palmOil', 'controversialWeapons', 'gambling', 'socialScore', 'nuclear',
       'furLeather', 'alcoholic', 'gmo', 'catholic', 'socialPercentile',
       'peerCount', 'governanceScore', 'environmentPercentile',
       'animalTesting', 'tobacco', 'totalEsg', 'highestControversy',
       'esgPerformance', 'coal', 'pesticides', 'adult', 'percentile',
       'peerGroup', 'smallArms', 'environmentScore', 'governancePercentile',
       'militaryContract', 
       #'Stock'
}

ALL_COLUMNS = {'palmOil', 'peerCount', 'environmentScore', 'militaryContract',
       'esgPerformance', 'coal', 'peerGroup', 'furLeather', 'gambling',
       'animalTesting', 'catholic', 'nuclear', 'totalEsg', 'adult',
       'environmentPercentile', 'highestControversy', 'socialScore',
       'percentile', 'alcoholic', 'socialPercentile', 'pesticides',
       'governancePercentile', 'controversialWeapons', 'gmo', 'smallArms',
       'tobacco', 'governanceScore', 'Stock Ticker', 'Sector', 'Name'
}

USER_PATH = '/Users/MichaelCalmette/Documents/Finance Research/ESG Data Files/'
DATE_PATH = "esg-{}-{}.csv".format(dt.month, dt.year)
#DATE_PATH = "esg-2-2022.csv"

FILE_PATH = os.path.join(USER_PATH,DATE_PATH)

Create functions to see if file exists / if there is data in there

In [2]:
f = open(FILE_PATH, 'w')
f.close
### Check if DF is empty

<function TextIOWrapper.close()>

In [40]:
def send_notification(title, t):
    if op_sys == "Mac":
        message = ("Time taken: {} minutes".format(t))
        command = f'''
        osascript -e 'display notification "{message}" with title "{title}"'
        '''
        os.system(command)
    

Need to compare data in esg-csv to all tickers

In [26]:
df = pd.read_csv('tickers.csv')
#all_data = pd.DataFrame(columns=ESG_COLUMNS)
# Only do if month is partially filled
all_data = pd.read_csv(FILE_PATH)
empty_stocks = pd.DataFrame(columns= ['Ticker'])

In [None]:
tic = df.sample(n=1)
ticker_sym = tic.loc[tic.index[0],'Symbol'] # get ticker name ex: 'AAPL'
print(ticker_sym)
ticker = yf.Ticker(ticker_sym).history(period="1mo")
ticker

Yahoo Financials to get additional stock data

In [None]:
yahoo_financials = YahooFinancials('AAPL')
print(yahoo_financials.get_summary_data())

Test pulling a random ticker and getting sustainability information

In [None]:
tic = df.sample(n=1)
ticker_sym = tic.loc[tic.index[0],'Symbol'] # get ticker name ex: 'AAPL'
ticker = yf.Ticker(ticker_sym)
y = ticker.sustainability 
y

In [43]:
df1_transposed = pd.DataFrame()
no_data_stocks = 0
start_time = time.time()
for x in tqdm(range(15)):
    
    temp = all_data[['Stock Ticker','Sector','Name']].copy()
    df_test = df.merge(temp, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
    tic = df_test.sample(n=1)
    ticker_sym = tic.loc[tic.index[0],'Symbol'] # get ticker name ex: 'AAPL'
    ticker = yf.Ticker(ticker_sym)
    y = ticker.sustainability 
    df1 = y

    if df1 is None:
        no_data_stocks += 1
        empty_stocks = empty_stocks.append({"Ticker": ticker_sym}, ignore_index = True)
    else:
        df1_transposed = df1.transpose()
        #print(df1_transposed.columns)
        df1_transposed['Stock Ticker'] = ticker_sym
        df1_transposed['Sector'] = tic.loc[tic.index[0],'Sector']
        df1_transposed['Name'] = tic.loc[tic.index[0],'Name']

        #all_data = df1_transposed
        all_data = all_data.append(df1_transposed, ignore_index = True)

all_shape = len(all_data)
empty_shape = len(empty_stocks)
time_taken = round((time.time() - start_time) / 60,2)

print("There are: {} stocks left in the database".format(len(df_test)))
print("There are: {} stocks with no ESG data".format(no_data_stocks))

print("Dataframe shape: {}".format(all_shape))
print("Missing stocks: {}".format(empty_shape))

send_notification("Completed ESG Data Pull",time_taken)

100%|██████████| 15/15 [03:10<00:00, 12.73s/it]


There are: 478 stocks left in the database
There are: 3 stocks with no ESG data
Dataframe shape: 27
Missing stocks: 3


Make sure there are no duplicates

In [25]:
all_data.shape

(23, 30)

In [None]:
s = len(all_data)
print(all_data.shape)
all_data = all_data.drop_duplicates()
dropped=len(all_data) - s
print("{} duplicates have been dropped".format(dropped))

In [None]:
all_data.shape

In [None]:
all_data['Stock Ticker'].value_counts()

In [None]:
if os.stat(FILE_PATH).st_size == 0:
    CURRENT_CSV_FILE = 0
    print('ESG File is empty')
    f = open(FILE_PATH, 'w')
    f.close

    with open(FILE_PATH, 'a', newline='') as f_object: 
        writer_object = writer(f_object) # Pass the CSV  file object to the writer() function
        writer_object.writerow(all_data) # Pass the data in the list as an argument into the writerow() function
        f_object.close()
    print('Column names have been entered.')

else:
    CURRENT_CSV_FILE = 1
    print('ESG File has column headers.')
    all_data.to_csv(FILE_PATH, mode='a', index=False, header=False)
    print('ESG Data has been entered.')