DS4200 A5 Tehani Cabour

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import yfinance as yf
from pandas_datareader import data as pdr
import datetime
import altair as alt

In [2]:
# load stock info data from CSV
info = pd.read_csv('constituents_csv.csv').set_index('Symbol')

In [3]:
# clean data by making CSV file's ticker names correspond to Yahoo Finance ticker names
ticker = list(info.index.values)
for i in range(len(ticker)):
    info.rename(index={ticker[i]: ticker[i].replace('.', '-')}, inplace=True)
    ticker[i] = ticker[i].replace('.', '-')
    
# account for renamed tickers
ticker = ['GEN' if i=='NLOK' else 'META' if i=='FB' else 'ELV' if i=='ANTM' else i for i in ticker] 
# remove delisted companies
ticker = [e for e in ticker if e not in ('KSU', 'XLNX', 'BLL', 'INFO', 'CTXS', 'PBCT', 'VIAC', 'DISCK', 'NLSN', 'CERN', 'DRE', 'FBHS', 'ABMD', 'DISCA', 'TWTR', 'WLTW')]
# rename tickers in CSV's dataframe
info.rename(index={'NLOK': 'GEN', 'FB': 'META', 'ANTM': 'ELV'}, inplace=True)

In [4]:
# load data from yf using the same tickers as in the CSV file
# the data dataframe has stock price information from 03-03 to 03-17
start = datetime.datetime(2023, 3, 3)
end = datetime.datetime(2023, 3, 17)
data = yf.download(ticker, start=start, end=end)

[*********************100%***********************]  489 of 489 completed


In [5]:
# load same companies' stock prices from start of the month for comparison purposes
initialStart = datetime.datetime(2023, 3, 1)
initialEnd = datetime.datetime(2023, 3, 2)
initial = yf.download(ticker, start=initialStart, end=initialEnd)

[*********************100%***********************]  489 of 489 completed


In [6]:
data = data['Adj Close']
initial = initial['Adj Close']

# transform ticker prices to percent change in stock closing price
for j in ticker:
    data[j] = (data[j] - initial[j].iloc[0]) / initial[j].iloc[0]

In [7]:
# Transform dataframe to make it easier to use with Altair
df = data.reset_index()
df = pd.melt(df, id_vars=['Date'], value_vars=ticker, var_name='Ticker', value_name='Adj. Close')
# Join the CSV and yf dataframes
info.index.name = "Ticker"
df = pd.merge(df, info, on=["Ticker", "Ticker"])
df = df.rename(columns={'Adj. Close': 'Percent Change in Stock Price'})

In [8]:
# Check table for any null values to ensure clean up process is complete
df.isnull().values.any()

False

In [9]:
df

Unnamed: 0,Date,Ticker,Percent Change in Stock Price,Name,Sector
0,2023-03-03,MMM,0.009527,3M,Industrials
1,2023-03-06,MMM,-0.002541,3M,Industrials
2,2023-03-07,MMM,-0.028310,3M,Industrials
3,2023-03-08,MMM,-0.027674,3M,Industrials
4,2023-03-09,MMM,-0.042555,3M,Industrials
...,...,...,...,...,...
4885,2023-03-10,ZTS,-0.036045,Zoetis,Health Care
4886,2023-03-13,ZTS,-0.024825,Zoetis,Health Care
4887,2023-03-14,ZTS,-0.017963,Zoetis,Health Care
4888,2023-03-15,ZTS,-0.023871,Zoetis,Health Care


In [10]:
# Chart 1 - Average Percent Change in Stock Price for Each Sector Across Time
# interactive feature: interactive legend
selection = alt.selection_single(fields=['Sector'], bind='legend')

alt.Chart(df, title="Average Percent Change in Daily Stock Prices for Each Sector Across Start of March 2023").mark_line().encode(
    alt.X('Date:T'),
    alt.Y('mean(Percent Change in Stock Price):Q',axis=alt.Axis(format='%', title='Average Percent Change in Stock Price')),
    alt.Color('Sector:N'),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)

In [19]:
# Chart 2 - Percent Change in Stock Prices Across Time for Financial Companies
# interactive feature: tooltip label on hover, interactive average bar 
brush = alt.selection_interval(encodings=['x'])
lineChart = alt.Chart(title="Percent Change in Daily Stock Prices Across Start of March 2023 for Financial Companies").mark_line(point=True).encode(
  x='Date:T',
  y=alt.Y('Percent Change in Stock Price:Q',axis=alt.Axis(format='%', title='Percent Change in Stock Price')),
  color=alt.Color('Name:N', legend=None),
  opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)),
  tooltip='Name:N'
).add_selection(
    brush
).interactive()

line = alt.Chart().mark_rule(color='firebrick').encode(
    y='mean(Percent Change in Stock Price):Q',
    size=alt.SizeValue(3)
).transform_filter(
    brush
)

alt.layer(lineChart, line, data=df.loc[df["Sector"] == 'Financials'])





In [12]:
# Chart 3 - Error Bars using Confidence Intervals for Average Stock Prices for Each Sector Across Time
error_bars = alt.Chart(df, title="Error Bars using Confidence Intervals for Daily Average Stock Prices for Each Sector Across Start of March 2023").mark_errorbar(extent='ci').encode(
  x=alt.X('mean(Percent Change in Stock Price):Q', scale=alt.Scale(zero=False)),
  y=alt.Y('Sector:N')
)

points = alt.Chart(df).mark_point(filled=True, color='black').encode(
  x=alt.X('mean(Percent Change in Stock Price):Q', aggregate='mean',axis=alt.Axis(format='%', title='Average Percent Change in Stock Price')),
  y=alt.Y('Sector:N'),
)

error_bars + points