# Set up

In [None]:
# math operations
from numpy import inf

# time operations
from datetime import timedelta

# for numerical analyiss
import numpy as np

# to store and process data in dataframe
import pandas as pd

# basic visualization package
import matplotlib.pyplot as plt

# advanced ploting
import seaborn as sns

# interactive visualization
import plotly.express as px
import plotly.graph_objs as go
# import plotly.figure_factory as ff
#from plotly.subplots import make_subplots

# for offline ploting
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

# hide warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# color pallette
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801' 

In [None]:
# list files
# ==========

!ls ../input/corona-virus-report

# Wrangle Data

In [None]:
# Full data
# =========

full_table = pd.read_csv('../input/corona-virus-report/covid_19_clean_complete.csv')

# Deep dive into the DataFrame
# Examine DataFrame (object type, shape, columns, dtypes)
full_table.info()

# type(full_table)
# full_table.shape
# full_table.columns
# full_table.dtypes
# full_table.head(20)

In [None]:
# Country wise
# ============

country_wise = pd.read_csv('../input/corona-virus-report/country_wise_latest.csv')

# Replace missing values '' with NAN and then 0
country_wise = country_wise.replace('', np.nan).fillna(0)

# Deep dive into the DataFrame
country_wise.info()
country_wise.head(10)

In [None]:
# Grouped by day, country
# =======================

full_grouped = pd.read_csv('../input/corona-virus-report/full_grouped.csv')
full_grouped.info()
full_grouped.head(10)

# Convert Date from Dtype "Object" (or String) to Dtype "Datetime"
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
full_grouped.info()

In [None]:
# Day wise
# ========

day_wise = pd.read_csv('../input/corona-virus-report/day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])
day_wise.info()
day_wise.head(10)

In [None]:
# Worldometer data
# ================

worldometer_data = pd.read_csv('../input/corona-virus-report/worldometer_data.csv')

# Replace missing values '' with NAN and then 0
# What are the alternatives? Drop or impute. Do they make sense in this context?
worldometer_data = worldometer_data.replace('', np.nan).fillna(0)
worldometer_data['Case Positivity'] = round(worldometer_data['TotalCases']/worldometer_data['TotalTests'],2)
worldometer_data['Case Fatality'] = round(worldometer_data['TotalDeaths']/worldometer_data['TotalCases'],2)

# Case Positivity is infinity when there is zero TotalTests due to division by zero
worldometer_data[worldometer_data["Case Positivity"] == inf] = 0

# Qcut is quantile cut. Here we specify three equally sized bins and label them low, medium, and high, respectively.
worldometer_data ['Case Positivity Bin']= pd.qcut(worldometer_data['Case Positivity'], q=3, labels=["low", "medium", "high"])

# Population Structure
worldometer_pop_struc = pd.read_csv('../input/covid19-worldometer-snapshots-since-april-18/population_structure_by_age_per_contry.csv')

# Replace missing values with zeros
worldometer_pop_struc = worldometer_pop_struc.fillna(0)
#worldometer_pop_struc.info()

# Merge worldometer_data with worldometer_pop_struc
# Inner means keep only common key values in both datasets
worldometer_data = worldometer_data.merge(worldometer_pop_struc,how='inner',left_on='Country/Region', right_on='Country')

# Keep observations where column "Country/Region" is not 0
worldometer_data = worldometer_data[worldometer_data["Country/Region"] != 0]

# Inspect worldometer_data's metadata
worldometer_data.info()

# Inspect Data
# worldometer_data.info()
# worldometer_data.tail(20)
# worldometer_data["Case Positivity"].describe()


# How does Canada's pandemic curve look?,How does it compare to a country similar to yours?

In [None]:
def gt_n(n):
    # Identify countries with confirmed cases greater than N
    # Then among these countries choose the unique set of countries
    countries = full_grouped[full_grouped['Confirmed']>n]['Country/Region'].unique()
    
    # Filter countries that are in the unique set of countries with confirmed cases greater than N
    temp = full_table[full_table['Country/Region'].isin(['Canada','US'])]
    
    # Aggregate (i.e., sum up) confirmed cases by Country/Region and Date
    # Reset the index (it is no longer in running order)
    temp = temp.groupby(['Country/Region', 'Date'])['Confirmed'].sum().reset_index()
    
    # Filter observations with confirmed cases more than N
    temp = temp[temp['Confirmed']>n]
    # print(temp.head())

    # Identify the start date when confirmed cases exceed N for each country
    min_date = temp.groupby('Country/Region')['Date'].min().reset_index()
    
    # Name the columns in the dataframe min_date
    min_date.columns = ['Country/Region', 'Min Date']
    # print(min_date.head())

    # Merge dataframe temp with dataframe min_date by 'Country/Region'
    from_nth_case = pd.merge(temp, min_date, on='Country/Region')
    
    # Convert data type to datetime object
    from_nth_case['Date'] = pd.to_datetime(from_nth_case['Date'])
    from_nth_case['Min Date'] = pd.to_datetime(from_nth_case['Min Date'])
    
    # Create a variable that counts the number of days relative to the day when confirmed cases exceed N
    from_nth_case['N days'] = (from_nth_case['Date'] - from_nth_case['Min Date']).dt.days
    # print(from_nth_case.head())

    # Plot a line graph from dataframe from_nth_case with column 'N days' and 'Confirmed' mapped to x-axis and y-axis, respectively.
    # Distinguish each country by color (system-determined color)
    # str converts n integer into string and "'N days from '+ str(n) +' case'" is the title 
    fig = px.line(from_nth_case, x='N days', y='Confirmed', color='Country/Region', 
                  title='N days from '+ str(n) +' case', height=600)
    fig.show()

In [None]:
# Call function gt_n with argument 100000 (i.e., 100000 confirmed cases)
gt_n(5000)

Canada seems to have already flattened the curve, while the US's Curve is still rising sharpely.

# Are the reported confirmed cases and deaths reliable? Why? 

In [None]:
# Draw horizontal bar plot with three arguments
# 1. variable of interest
# 2. top n countries
# 3. minimum population size (default value is 1000000)
def plot_hbar_wm(col, n, min_pop=1000000):
    df = worldometer_data[worldometer_data['Population']>min_pop]
    df = df.sort_values(col, ascending=True).tail(n)
    df.info()
    fig = px.bar(df,
                 x=col, y="Country/Region", color='WHO Region',  
                 text=col, orientation='h', width=700, 
                 color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title=col+' (Only countries with Population > ' + str(min_pop), 
                      xaxis_title="", yaxis_title="", 
                      yaxis_categoryorder = 'total ascending',
                      uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()
    
# Draw histogram with two arguments
# 1. variable of interest
# 2. the number of bins
def plot_histogram_wm(col, bins):
    fig = px.histogram(worldometer_data[col], x=col, nbins=bins)
    fig.show()

In [None]:
# Draw bar chart for case fatality of the top 15 countries with the highest case fatality rate (with the minimum population of 1 million)
plot_hbar_wm('Case Fatality', 15, 10000000)

Why Canada has higer death rates. aging poopulation? health infrastructures ?

In [None]:
# Draw the histogram for case fatality rate (50 bins)
plot_histogram_wm("Case Fatality",50)

Same virus, but why is there a large dispursion of fatility rates among countries 

# True fatality rate 

In [None]:
# Show the summary statistics of column case positivity
worldometer_data["Case Positivity"].describe()

# Filter countries with Case Positivity less than 1% (i.e., 1 confirmed case out of 100 tests)
# These are countries that go for rigorous testing regime
benchmark_countries = worldometer_data[worldometer_data["Case Positivity"]<=0.01]
#benchmark_countries.info()
#benchmark_countries.head(20)

In [None]:
# Assume that the number of confirmed cases are close to the true infections rates for countries with gold standard testing regimes 
# Thus, their case fatality rates are closer to the true infection fatality rates
infection_fatality_rate = benchmark_countries['TotalDeaths'].sum() / benchmark_countries['TotalCases'].sum()

# Calculate the fraction of total Covid19 deaths for the population aged 65+ among the benchmark countries
benchmark_death_65y_pct = sum(benchmark_countries['TotalDeaths'] * benchmark_countries['Fraction age 65+ years']) / sum(benchmark_countries['TotalDeaths'])

print(infection_fatality_rate)
print(benchmark_death_65y_pct)

print('Estimated Infection Fatality Rate for a benchmark country with %.1f%s of population older than 65 years old \
is %.2f%s' %(100 * benchmark_death_65y_pct,'%',100 * infection_fatality_rate,'%'))

Canadahas population Approximately 16% of population older than 65

# 2b 

In [None]:
# for numerical analysis
import numpy as np

# to store and process data in dataframe
import pandas as pd

# to interface with operating system
import os

# for basic visualization
import matplotlib.pyplot as plt

# for advanced visualization
import seaborn as sns; sns.set()

# for interactive visualization
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

# for offline interactive visualization
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

# for trendlines
import statsmodels

# data manipulation
from datetime import datetime as dt
from scipy.stats.mstats import winsorize

In [None]:
files = []

for dirname, _, filenames in os.walk('../input/ca1234'):
    for filename in filenames:
        files.append(os.path.join(dirname, filename))
        
files = sorted(files)
files

In [None]:
series = [pd.read_csv(f, na_values=['.']) for f in files]
series_name = ['btc', 'cst', 'gold', 'jnj', 'bond', 'ca_bs', 'ca_stock', 'employment', 'oil'] 
series_dict = dict(zip(series_name, series))

# wrangle data

In [None]:
# Grouped by day, country
# =======================

full_grouped = pd.read_csv('../input/corona-virus-report/full_grouped.csv')
full_grouped.info()
full_grouped.head(10)

# Convert Date from Dtype "Object" (or String) to Dtype "Datetime"
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
ca_covid = full_grouped[full_grouped['Country/Region']=="Canada"]
ca_covid.info()
ca_covid.tail()

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
air_ca = pd.read_csv('../input/air-ca/AC.csv')
#air Canada
air_ca['Date'] = pd.to_datetime(air_ca['Date'])
air_ca.rename(columns={'Adj Close':'ac'}, inplace=True)
air_ca['ac_return'] = air_ca['ac'].pct_change()
air_ca['ac_volatility_1m'] = (air_ca['ac_return'].rolling(20).std())*(20)**(1/2)
air_ca['ac_volatility_1y'] = (air_ca['ac_return'].rolling(252).std())*(252)**(1/2)
air_ca = air_ca[['Date','ac','ac_return','ac_volatility_1m', 'ac_volatility_1y']]
air_ca['one_month_forward_ac_return'] = air_ca['ac_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]
air_ca = air_ca[air_ca['Date'] >= '2020-01-01']
air_ca.info()
air_ca.tail(10)





In [None]:
#ca_stock
ca_stock = series_dict['ca_stock']
ca_stock['Date'] = pd.to_datetime(ca_stock['Date'])
ca_stock.rename(columns={'Adj Close':'ca_stock'}, inplace=True)
ca_stock['ca_stock_return'] = ca_stock['ca_stock'].pct_change()
ca_stock['ca_stock_volatility_1m'] = (ca_stock['ca_stock_return'].rolling(20).std())*(20)**(1/2) # Annualize daily standard deviation
ca_stock['ca_stock_volatility_1y'] = (ca_stock['ca_stock_return'].rolling(252).std())*(252)**(1/2) # 252 trading days per year
ca_stock = ca_stock[['Date','ca_stock','ca_stock_return','ca_stock_volatility_1m','ca_stock_volatility_1y']]
# Calculate 1-month forward cumulative returns
ca_stock['one_month_forward_ca_stock_return'] = ca_stock['ca_stock_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

#Bitcoin
btc = series_dict['btc']
btc['Date'] = pd.to_datetime(btc['Date'])
btc.rename(columns={'Adj Close':'btc'}, inplace=True)
btc['btc_return'] = btc['btc'].pct_change()
btc['btc_volatility_1m'] = (btc['btc_return'].rolling(20).std())*(20)**(1/2) 
btc['btc_volatility_1y'] = (btc['btc_return'].rolling(252).std())*(252)**(1/2) 
btc = btc[['Date','btc','btc_return','btc_volatility_1m','btc_volatility_1y']]
btc['one_month_forward_btc_return'] = btc['btc_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

#Gold
gold = series_dict['gold']
gold['Date'] = pd.to_datetime(gold['DATE'])
gold.rename(columns={'GOLDPMGBD228NLBM':'gold'}, inplace=True)
gold['gold_lag1'] = gold['gold'].shift(1)
gold['gold_lag2'] = gold['gold'].shift(2)
gold['gold'] = gold['gold'].fillna(gold['gold_lag1'])
gold['gold'] = gold['gold'].fillna(gold['gold_lag2'])
gold["gold"] = gold["gold"].astype('float64')
gold['gold_return'] = gold['gold'].pct_change()
gold['gold_volatility_1m'] = (gold['gold_return'].rolling(20).std())*(20)**(1/2) 
gold['gold_volatility_1y'] = (gold['gold_return'].rolling(252).std())*(252)**(1/2) 
gold = gold[['Date','gold','gold_return','gold_volatility_1m','gold_volatility_1y']]
gold['one_month_forward_gold_return'] = gold['gold_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

#bond
bond= series_dict['bond']
bond['Date'] = pd.to_datetime(bond['Date'])
bond.rename(columns={'Price':'bond'}, inplace=True)
bond['bond_return'] = bond['bond'].pct_change()
bond['bond_volatility_1m'] = (bond['bond_return'].rolling(20).std())*(20)**(1/2)
bond['bond_volatility_1y'] = (bond['bond_return'].rolling(252).std())*(252)**(1/2)
bond=bond[['Date','bond','bond_return','bond_volatility_1m',
                                   'bond_volatility_1y']]
bond['one_month_forward_bond_return'] = bond['bond_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

#oil
oil = series_dict['oil']
oil['Date'] = pd.to_datetime(oil['Date'])
oil.rename(columns={'Adj Close':'oil'}, inplace=True)
oil['oil_return'] = oil['oil'].pct_change()
oil['oil_volatility_1m'] = (oil['oil_return'].rolling(20).std())*(20)**(1/2) # Annualize daily standard deviation
oil['oil_volatility_1y'] = (oil['oil_return'].rolling(252).std())*(252)**(1/2) # 252 trading days per year
oil = oil[['Date','oil','oil_return','oil_volatility_1m','oil_volatility_1y']]
# Calculate 1-month forward cumulative returns
oil['one_month_forward_oil_return'] = oil['oil_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]

#Employment
employment = series_dict['employment']
employment['DATE'] = pd.to_datetime(employment['DATE'])
employment = employment[['DATE','employment']]
employment.rename(columns={'DATE':'Date'}, inplace=True)

#Canada Balance Sheet
ca_bs = series_dict['ca_bs']
ca_bs['date'] = pd.to_datetime(ca_bs['date'])
ca_bs.rename(columns={'bs':'ca_bs'}, inplace=True)
ca_bs.rename(columns={'date':'Date'}, inplace=True)
ca_bs = ca_bs[['Date','ca_bs']]
 
#Johnson & Johnson
jnj = series_dict['jnj']
jnj['Date'] = pd.to_datetime(jnj['Date'])
jnj.rename(columns={'Adj Close':'jnj'}, inplace=True)
jnj['jnj_return'] = jnj['jnj'].pct_change()
jnj['jnj_volatility_1m'] = (jnj['jnj_return'].rolling(20).std())*(20)**(1/2)
jnj['jnj_volatility_1y'] = (jnj['jnj_return'].rolling(252).std())*(252)**(1/2)
jnj = jnj[['Date','jnj','jnj_return','jnj_volatility_1m', 'jnj_volatility_1y']]
jnj['one_month_forward_jnj_return'] = jnj['jnj_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]
jnj = jnj[jnj['Date'] >= '2020-01-01']

#Constellation Software Inc                       
csu = series_dict['cst']
csu['Date'] = pd.to_datetime(csu['Date'])
csu.rename(columns={'Adj Close':'csu'}, inplace=True)
csu['csu_return'] = csu['csu'].pct_change()
csu['csu_volatility_1m'] = (csu['csu_return'].rolling(20).std())*(20)**(1/2)
csu['csu_volatility_1y'] = (csu['csu_return'].rolling(252).std())*(252)**(1/2)
csu = csu[['Date','csu','csu_return','csu_volatility_1m', 'csu_volatility_1y']]
csu['one_month_forward_csu_return'] = csu['csu_return'][::-1].rolling(window=20, min_periods=1).sum()[::-1]



In [None]:
nber_recession_indicator_day = pd.read_csv('../input/ca-recession-daily/ca_recession.csv')

nber_recession_indicator_day["DATE"] = pd.to_datetime(nber_recession_indicator_day["DATE"])
nber_recession_indicator_day["CANRECDM"] = nber_recession_indicator_day["CANRECDM"].astype('bool')
nber_recession_indicator_day.rename(columns={'CANRECDM':'recession'}, inplace=True)
nber_recession_indicator_day.rename(columns={'DATE':'Date'}, inplace=True)
nber_recession_indicator_day = nber_recession_indicator_day[["Date","recession"]]

In [None]:
baseline = pd.merge(ca_stock, nber_recession_indicator_day, how='left', on='Date')
baseline = pd.merge(baseline, btc, how='left', on='Date')
baseline = pd.merge(baseline, csu, how='left', on='Date')
baseline = pd.merge(baseline, gold, how='left', on='Date')
baseline = pd.merge(baseline, bond, how='left', on='Date')
baseline = pd.merge(baseline, oil, how='left', on='Date')
baseline = pd.merge(baseline, air_ca, how='left', on='Date')
baseline = pd.merge(baseline, employment, how='left', on='Date')
baseline = pd.merge(baseline, ca_bs, how='left', on='Date')

baseline.loc[baseline.Date >= '2020-03-01', "recession"] = 1
baseline["recession"] = baseline["recession"].fillna(0)
#baseline["recession"] = baseline["recession"].astype(int)

baseline.info()

#2020 covid19 period
baseline2020 = baseline[baseline['Date'] >= '2020-01-01']
baseline2020 = pd.merge(baseline2020,ca_covid, how='left', on='Date')
baseline2020['New cases'] = baseline2020['New cases'].fillna(0)
baseline2020.info()

# What is the impact of COVID19 on the U.S. stock market?

In [None]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces to create subplots
fig.add_trace(
    go.Scatter(x=baseline2020['Date'], y=baseline2020['ca_stock'], name = 'TSX'),  
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=baseline2020['Date'], y=baseline2020['New cases'], name = 'New COVID19 Cases'), 
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="TSX and New COVID19 Cases"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="<b>TSX</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>New COVID19 Cases</b>", secondary_y=True)

fig.show()

comment?

In [None]:
def plot_chart(series):
    fig = px.scatter(baseline[baseline[series].notnull()], x="Date", y=series, color='recession', width=1000)
    fig.update_traces(mode='markers', marker_size=5)
    fig.update_layout(title=series, xaxis_title="", yaxis_title="")
    fig.show()

In [None]:
baseline2020['ca_stock_return'].describe()

The worst single-daily decline is -12.34%. How bad is it relative to TSX's history?

In [None]:
baseline['ca_stock_return'].describe()

The historical standard deviation of daily return for S&P500 is 0.98%.

In [None]:
print("The worst single-day return in 2020 is ", str(round(abs(baseline2020['ca_stock_return'].min()/baseline['ca_stock_return'].std()),2)), 
      " X standard deviations of TSX historical returns!")

-12.3% is historical low 

In [None]:
# Output the range of TSX historical daily returns from 1928-01-03 to 2020-07-01
print("TSX historical daily returns from " + str(baseline[baseline['ca_stock_return'].notnull()]['Date'].min().date()) + ' to '
       + str(baseline[baseline['ca_stock_return'].notnull()]['Date'].max().date()))

fig = px.histogram(baseline, x="ca_stock_return")
fig.show()

# What is the impact of Covid19 on employment?

In [None]:
plot_chart("employment")

Almost 2 million job were lost in April 2020

In [None]:
print("This is ", str(round(abs(baseline['employment'].min()/baseline['employment'].std()),2)), 
      " X standard deviations of the historical monthly change in employment!")

Number of unemployed people

In [None]:
#Unemployment
unemployment = pd.read_csv('../input/0914bc2b/unemployment.csv')
unemployment['Date'] = pd.to_datetime(unemployment['REF_DATE'])
unemployment.tail()
unemployment.rename(columns={'VALUE':'unemployment'}, inplace=True)
unemployment = unemployment[['Date','unemployment']]

baseline1 = pd.merge(unemployment,nber_recession_indicator_day, how='left', on='Date')
baseline1.loc[baseline1.Date >= '2020-03-01', "recession"] = 1
baseline1["recession"] = baseline1["recession"].fillna(0)
baseline1.info()

fig = px.scatter(baseline1[baseline1['unemployment'].notnull()], x="Date", y='unemployment', color='recession', width=1000)
fig.update_traces(mode='markers', marker_size=5)
fig.update_layout(title='unemployment', xaxis_title="", yaxis_title="")
fig.show()



# Do new Covid19 cases and deaths correlate with asset returns?







In [None]:
sns.jointplot(x = 'New cases', y = 'ca_stock_return', data = baseline2020, kind='reg')

does not look right 

In [None]:
sns.jointplot(x = 'New deaths', y = 'ca_stock_return', data = baseline2020, kind='reg')

does not look right 

In [None]:
# Draw scatter of asset returns during Covid19 pandemic
baseline_returns = baseline2020[["ca_stock_return", "btc_return", "gold_return", "bond_return",
                  "oil_return","ac_return", "New deaths", "New cases"]]

sns.pairplot(baseline_returns)

In [None]:
# Draw heatmap of correlation strength across asset classes (returns and volatilities) and Covid19 new cases and deaths during the pandemic period 
baseline_corr = baseline2020[['ca_stock_return', 'ca_stock_volatility_1y', 'btc_return', 'btc_volatility_1y',
                         'gold_return', 'gold_volatility_1y','ac_return', 'ac_volatility_1y',
                           'oil_return', 'oil_volatility_1y','csu_return', 'csu_volatility_1y',
                         'New deaths', 'New cases']].corr()

fig, ax = plt.subplots(figsize=(16,5)) 
sns.heatmap(baseline_corr, annot=True, ax = ax)

#  What is going on to the financial market?

Helicopter Money?

In [None]:
# Let's see how Federal Reserves's balance sheet has changed over time?
plot_chart('ca_bs')

In million

At its peak during the COVID19 pandemic, the increase in Canada central banks Reserve's balance sheet is around 546 billion, so much higher than during the global financial crisis in 2008. Is it necessary?https://www.bankofcanada.ca/2020/05/bridge-recovery-banks-covid-19-pandemic-response/



# Optimism about the coming economic recovery?

In [None]:
# Identify key milestone dates in vaccine developments by JNJ
dates = pd.to_datetime(['2020-7-16', '2020-5-6', '2020-5-1', '2020-4-27', '2020-4-16', '2020-3-30', '2020-1-13'])


jnj['vaccine_milestone_announced']  = jnj['Date'].isin(dates)
baseline2020['vaccine_milestone_announced'] = baseline2020['Date'].isin(dates)

In [None]:
# Let's inspect the JNJ dataset
jnj

In [None]:
# Let's create a function to plot graphs with vaccine milestones highlighted.
def plot_return_vaccine_milestone(data, asset):
    fig = px.scatter(data, x='Date', y=asset, color='vaccine_milestone_announced', width=1000)
    fig.update_traces(mode='markers', marker_size=4)
    fig.update_layout(title=str(asset), xaxis_title='Date', yaxis_title=str(asset))
    fig.show()

In [None]:
# Draw a scatterplot of JNJ's historical stock returns
plot_return_vaccine_milestone(jnj, 'jnj_return')

March 30 2020: human testing of its coronavirus vaccine to begin by September
https://www.cnbc.com/2020/03/30/johnson-johnson-to-begin-clinical-trials-on-coronavirus-vaccine-candidate.html


https://www.cnbc.com/2020/07/16/johnson-johnson-expects-to-begin-late-stage-coronavirus-vaccine-trial-ahead-of-schedule-in-late-september.html


In [None]:
# Draw a scatterplot of TSX's historical stock returns
plot_return_vaccine_milestone(baseline2020, 'ca_stock_return')

# Impcat on the Travel industry with the treval restrictions

In [None]:
# Let's create a function to plot graphs with vaccine milestones highlighted.
def plot_travel_restriction_policy_announced(data, asset):
    fig = px.scatter(data, x='Date', y=asset, color='travel_restriction_policy_announced', width=1000)
    fig.update_traces(mode='markers', marker_size=4)
    fig.update_layout(title=str(asset), xaxis_title='Date', yaxis_title=str(asset))
    fig.show()

In [None]:
# Identify key date dates relating to the social distancing/travel restrictions
dates = pd.to_datetime(['2020-6-07', '2020-5-14', '2020-3-23', '2020-3-18', '2020-3-06'])


air_ca['travel_restriction_policy_announced']  = air_ca['Date'].isin(dates)
baseline2020['travel_restriction_policy_announced'] = baseline2020['Date'].isin(dates)

In [None]:
# Draw a scatterplot of Air Canada's historical stock returns
plot_travel_restriction_policy_announced(air_ca, 'ac_return')


Mar. 16: Canada closes borders

Mar. 18: Canada-US border closes 

Mar. 23: Social distancing enforcement and non-essential workplace closures (share price droped almost 20%)

May 14: Ontario to start reopening May 19

June 7: Canada allows families of citizens to cross border


and the stock is very volatile in 2020


In [None]:
# Draw a scatterplot of Air Canada's historical stock returns
plot_travel_restriction_policy_announced(baseline2020, 'ca_stock_return')

In [None]:
# Draw a scatterplot of Air Canada's historical stock returns
plot_travel_restriction_policy_announced(baseline2020, 'csu_return')