In [1]:
# https://docs.google.com/document/d/1e9oakyEftdHp4zGz2F0WP-_X7GEzEKYGaPvu1SguuCE/edit

import pandas as pd
import numpy as np
import matplotlib as plt
from datetime import date as dt


# https://pypi.org/project/yfinance/#description
import yfinance as yf

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 100)

# Transformations:

### Transform 1: Group by industry

Calculate:
* Market value per industry (dollar amount) for the day
* % change of market value from previous day for each industry

(market value = adjusted close * volume)

In [13]:
portfolio2 = ['MSFT', 'AAPL', 'IBM', 'WMT', 'SHOP', 'LWLG', 'ALB', 'LYV', 'GOOGL', 'TTGT', 'TSLA', 'GME', 'AMZN', 'TGT', 'COST', 'COKE','TPL', 'BX', 'MORN', 'CBRE', 
            'NVDA', 'AMD', 'NEE']
portfolio = ['MSFT', 'AAPL'] # reduced portfolio to make python quicker (pre prod)

In [5]:
df = yf.download(portfolio,'2022-2-1', group_by='Ticker')
df = df.stack(level=0).rename_axis(['Date', 'Ticker']).reset_index(level=1) # Trenton McKinney, https://stackoverflow.com/questions/63107594/how-to-deal-with-multi-level-column-names-downloaded-with-yfinance/63107801#63107801

[*********************100%***********************]  23 of 23 completed


### Pull industry for each ticker (for eventual aggregation) as well as current market cap (updates daily)

In [6]:
%%time
sector_dict = {} # dictionary to be used for sector data (does not change)
mkt_cap_dict = {} # dictionary to be used for market cap data (changes daily)
for stock in portfolio:
    info = yf.Ticker(stock).info # api call for data for respective stock
    sector_dict.setdefault(stock, []) # initializing dictionary
    mkt_cap_dict.setdefault(stock, []) # initializing dictionary
    
    sector_dict[stock].append(info['sector']) # adds sector to the portfolio_df dict
    mkt_cap_dict[stock].append(info['marketCap']) # adds marketcap to the mkt_cap dict
    
sector_df = pd.DataFrame.from_dict(sector_dict,orient='index')
sector_df = pd.DataFrame(sector_dict).rename(columns={0: 'Sector'})

mkt_cap_df = pd.DataFrame.from_dict(mkt_cap_dict,orient='index')
mkt_cap_df = pd.DataFrame(mkt_cap_dict).rename(columns={0: 'Market_Capitalization'})


CPU times: user 8.18 s, sys: 234 ms, total: 8.41 s
Wall time: 4min 10s


In [32]:
mkt_cap_df # market cap as of today (4pm EST)

Unnamed: 0,Market_Capitalization
MSFT,2178665545728
AAPL,2639215722496
IBM,108625469440
WMT,372385611776
SHOP,82728058880
LWLG,722648704
ALB,21372686336
LYV,27665754112
GOOGL,1748066893824
TTGT,2188279808


In [34]:
mkt_cap_df2 = mkt_cap_df

In [36]:
# timestamp the market capitalization table
mkt_cap_df2['Date'] = dt.today().strftime("%m/%d/%y")

In [39]:
mkt_cap_df3 = mkt_cap_df2


mkt_cap_df3 = pd.merge(mkt_cap_df2, sector_df, left_index=True, right_index=True) # Combine sector table with daily financial info table


In [40]:
mkt_cap_df3

Unnamed: 0,Market_Capitalization,Date,Sector
MSFT,2178665545728,02/24/22,Technology
AAPL,2639215722496,02/24/22,Technology
IBM,108625469440,02/24/22,Technology
WMT,372385611776,02/24/22,Consumer Defensive
SHOP,82728058880,02/24/22,Technology
LWLG,722648704,02/24/22,Basic Materials
ALB,21372686336,02/24/22,Basic Materials
LYV,27665754112,02/24/22,Communication Services
GOOGL,1748066893824,02/24/22,Communication Services
TTGT,2188279808,02/24/22,Communication Services


In [33]:
# merge Sector df with the core financial info dataframe
df_merged = pd.merge(df, sector_df, left_on='Ticker', right_index=True) # Combine sector table with daily financial info table

# shift column 'Sector' to second position
first_column = df_merged.pop('Sector')

# insert column using insert(position,column_name,first_column) function
df_merged.insert(1, 'Sector', first_column)

In [31]:
df_merged

Unnamed: 0_level_0,Ticker,Sector,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-02-01,AAPL,Technology,174.610001,174.610001,174.839996,172.309998,174.009995,86213900
2022-02-02,AAPL,Technology,175.839996,175.839996,175.880005,173.330002,174.750000,84914300
2022-02-03,AAPL,Technology,172.899994,172.899994,176.240005,172.119995,174.479996,89418100
2022-02-04,AAPL,Technology,172.389999,172.389999,174.100006,170.679993,171.679993,82391400
2022-02-07,AAPL,Technology,171.660004,171.660004,173.949997,170.949997,172.860001,77251200
...,...,...,...,...,...,...,...,...
2022-02-17,WMT,Consumer Defensive,138.880005,138.880005,139.470001,133.779999,134.000000,17943800
2022-02-18,WMT,Consumer Defensive,137.990005,137.990005,139.589996,137.160004,138.100006,10364700
2022-02-22,WMT,Consumer Defensive,136.449997,136.449997,138.240005,135.940002,137.770004,8460300
2022-02-23,WMT,Consumer Defensive,135.050003,135.050003,137.190002,134.839996,136.860001,7010300


### Transform 1: % change of previous day close price vs current day close price

In [12]:
# # Below we'll calculate the percent change of Adj Close price for each stock and, if the price change for one exceeds a predermined threshold, an email notification will be sent out.
# last_two_days = df['Adj Close'].iloc[-2:].pct_change()
# percent_change = last_two_days.iloc[-1]
# percent_change

### Transform 1: % change of previous day close price vs current day close price