In [1]:
# https://docs.google.com/document/d/1e9oakyEftdHp4zGz2F0WP-_X7GEzEKYGaPvu1SguuCE/edit

import pandas as pd
import numpy as np
import matplotlib as plt
from datetime import date as dt


# https://pypi.org/project/yfinance/#description
import yfinance as yf

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 100)

# Transformations:

### Transform 1: Group by industry

Calculate:
* Market value per industry (dollar amount) for the day
* % change of market value from previous day for each industry

(market value = adjusted close * volume)

In [2]:
portfolio2 = ['MSFT', 'AAPL', 'IBM', 'WMT', 'SHOP', 'LWLG', 'ALB', 'LYV', 'GOOGL', 'TTGT', 'TSLA', 'GME', 'AMZN', 'TGT', 'COST', 'COKE','TPL', 'BX', 'MORN', 'CBRE', 
            'NVDA', 'AMD', 'NEE']
portfolio = ['MSFT', 'AAPL'] # reduced portfolio to make python quicker (pre prod)

In [3]:
df = yf.download(portfolio,'2022-2-1', group_by='Ticker')
df = df.stack(level=0).rename_axis(['Date', 'Ticker']).reset_index(level=1) # Trenton McKinney, https://stackoverflow.com/questions/63107594/how-to-deal-with-multi-level-column-names-downloaded-with-yfinance/63107801#63107801

[*********************100%***********************]  2 of 2 completed


### Pull industry for each ticker (for eventual aggregation) as well as current market cap (updates daily)

In [None]:
# %%time
# sector_cap = [['Date', 'Company', 'Sector', 'MarketCap']] # dictionary to be used for market cap data (changes daily)
# for stock in portfolio:
#     temp = []
#     info = yf.Ticker(stock).info # api call for data for respective stock
#     temp.append(dt.today().strftime("%d/%m/%Y"))
#     temp.append(stock)
#     temp.append(info['sector'])
#     temp.append(info['marketCap'])   

#     sector_cap.append(temp)

In [40]:
%%time

# hash the entirety of the api call data for each stock
info = {i: yf.Ticker(i).info for i in portfolio}

# pull the respective data we'd like from the hash (instead of calling the api twice for each stock)
sector_dict = {i: [dt.today().strftime("%d/%m/%Y"), info[i]['sector'], info[i]['marketCap']] for i in portfolio}
    

sector_dict

CPU times: user 707 ms, sys: 16 ms, total: 723 ms
Wall time: 21.1 s


{'MSFT': ['25/02/2022', 'Technology', 2218683269120],
 'AAPL': ['25/02/2022', 'Technology', 2693133500416]}

In [137]:
sector_df = pd.DataFrame.from_dict(sector_dict,orient='index')
sector_df

Unnamed: 0,0,1,2
MSFT,25/02/2022,Technology,2218683269120
AAPL,25/02/2022,Technology,2693133500416


In [138]:
sector_df.insert(1, 'company', sector_df.index)


sector_df.index = sector_df.iloc[:,0]
sector_df.drop(sector_df.columns[0], axis=1, inplace=True)
sector_df.index.name = None
pd.DataFrame(sector_df).rename(columns={1: 'sector'}, inplace=True)
pd.DataFrame(sector_df).rename(columns={2: 'market_cap'}, inplace=True)


sector_df

Unnamed: 0,company,sector,market_cap
25/02/2022,MSFT,Technology,2218683269120
25/02/2022,AAPL,Technology,2693133500416


In [42]:
sector_df['company'] = sector_df.index

In [68]:
sector_df.reset_index(drop=True, inplace=True)

In [69]:
sector_df

Unnamed: 0,0,1,2,company
0,25/02/2022,Technology,2218683269120,MSFT
1,25/02/2022,Technology,2693133500416,AAPL


In [10]:
# mkt_cap_df2 = mkt_cap_df

In [11]:
# # timestamp the market capitalization table
# mkt_cap_df2['Date'] = dt.today().strftime("%m/%d/%y")

In [12]:
# mkt_cap_df3 = mkt_cap_df2


# mkt_cap_df3 = pd.merge(mkt_cap_df2, sector_df, left_index=True, right_index=True) # Combine sector table with daily financial info table


In [13]:
# mkt_cap_df3

In [14]:
# # merge Sector df with the core financial info dataframe
# df_merged = pd.merge(df, sector_df, left_on='Ticker', right_index=True) # Combine sector table with daily financial info table

# # shift column 'Sector' to second position
# first_column = df_merged.pop('Sector')

# # insert column using insert(position,column_name,first_column) function
# df_merged.insert(1, 'Sector', first_column)

In [15]:
# df_merged

### Transform 1: % change of previous day close price vs current day close price

In [16]:
# # Below we'll calculate the percent change of Adj Close price for each stock and, if the price change for one exceeds a predermined threshold, an email notification will be sent out.
# last_two_days = df['Adj Close'].iloc[-2:].pct_change()
# percent_change = last_two_days.iloc[-1]
# percent_change

### Transform 1: % change of previous day close price vs current day close price