In [1]:
# https://docs.google.com/document/d/1e9oakyEftdHp4zGz2F0WP-_X7GEzEKYGaPvu1SguuCE/edit

import pandas as pd
import numpy as np
import matplotlib as plt
from datetime import date as dt


# https://pypi.org/project/yfinance/#description
import yfinance as yf

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 100)

In [2]:
# portfolio = ['MSFT', 'AAPL', 'IBM', 'WMT', 'SHOP']
# df = yf.download(portfolio,'2022-1-1') # Pulling the daily closing stock price for the aforementioned stocks from the start of 2022-onwards.

In [3]:
# print(df.tail())

# Transformations:

### Transform 1: Group by industry

Calculate:
* Market value per industry (dollar amount) for the day
* % change of market value from previous day for each industry

(market value = adjusted close * volume)

In [13]:
portfolio2 = ['MSFT', 'AAPL', 'IBM', 'WMT', 'SHOP', 'LWLG', 'ALB', 'LYV', 'GOOGL', 'TTGT', 'TSLA', 'GME', 'AMZN', 'TGT', 'COST', 'COKE','TPL', 'BX', 'MORN', 'CBRE', 
            'NVDA', 'AMD', 'NEE']
portfolio = ['MSFT', 'AAPL'] # reduced portfolio to make python quicker (pre prod)

In [5]:
df = yf.download(portfolio,'2022-2-1', group_by='Ticker')
df = df.stack(level=0).rename_axis(['Date', 'Ticker']).reset_index(level=1) # Trenton McKinney, https://stackoverflow.com/questions/63107594/how-to-deal-with-multi-level-column-names-downloaded-with-yfinance/63107801#63107801

[*********************100%***********************]  23 of 23 completed


### Pull industry for each ticker (for eventual aggregation) as well as current market cap (updates daily)

In [6]:
%%time
sector_df = {} # dictionary to be used for sector data (does not change)
mkt_cap_df = {} # dictionary to be used for market cap data (changes daily)
for stock in portfolio:
    info = yf.Ticker(stock).info # api call for data for respective stock
    sector_df.setdefault(stock, []) # initializing dictionary
    mkt_cap_df.setdefault(stock, []) # initializing dictionary
    sector_df[stock].append(info['sector']) # adds sector to the portfolio_df dict
    mkt_cap_df[stock].append(info['marketCap']) # adds marketcap to the mkt_cap dict
    
sector_df = pd.DataFrame.from_dict(sector_df,orient='index')
sector_df = pd.DataFrame(sector_df).rename(columns={0: 'Sector'})

mkt_cap_df = pd.DataFrame.from_dict(mkt_cap_df,orient='index')
mkt_cap_df = pd.DataFrame(mkt_cap_df).rename(columns={0: 'Market_Capitalization'})


CPU times: user 8.18 s, sys: 234 ms, total: 8.41 s
Wall time: 4min 10s


In [7]:
mkt_cap_df

Unnamed: 0,Market_Capitalization
MSFT,2178665545728
AAPL,2639215722496
IBM,108625469440
WMT,372385611776
SHOP,82728058880
LWLG,722648704
ALB,21372686336
LYV,27665754112
GOOGL,1748066893824
TTGT,2188279808


Combine the two tables

In [14]:
df6 = pd.merge(df, sector_df, left_on='Ticker', right_index=True)

In [16]:
df6

Unnamed: 0_level_0,Ticker,Adj Close,Close,High,Low,Open,Volume,Sector
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-02-01,AAPL,174.610001,174.610001,174.839996,172.309998,174.009995,86213900,Technology
2022-02-02,AAPL,175.839996,175.839996,175.880005,173.330002,174.750000,84914300,Technology
2022-02-03,AAPL,172.899994,172.899994,176.240005,172.119995,174.479996,89418100,Technology
2022-02-04,AAPL,172.389999,172.389999,174.100006,170.679993,171.679993,82391400,Technology
2022-02-07,AAPL,171.660004,171.660004,173.949997,170.949997,172.860001,77251200,Technology
...,...,...,...,...,...,...,...,...
2022-02-17,WMT,138.880005,138.880005,139.470001,133.779999,134.000000,17943800,Consumer Defensive
2022-02-18,WMT,137.990005,137.990005,139.589996,137.160004,138.100006,10364700,Consumer Defensive
2022-02-22,WMT,136.449997,136.449997,138.240005,135.940002,137.770004,8460300,Consumer Defensive
2022-02-23,WMT,135.050003,135.050003,137.190002,134.839996,136.860001,7010300,Consumer Defensive


### Transform 1: % change of previous day close price vs current day close price

In [12]:
# # Below we'll calculate the percent change of Adj Close price for each stock and, if the price change for one exceeds a predermined threshold, an email notification will be sent out.
# last_two_days = df['Adj Close'].iloc[-2:].pct_change()
# percent_change = last_two_days.iloc[-1]
# percent_change

### Transform 1: % change of previous day close price vs current day close price