# Happiness vs COVID 19

In [636]:
import pandas as pd
import altair as alt

In [637]:
# read Johns Hopkins COVID data
covidGlobalCasesURL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
covidGlobalDeathsURL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
covidGlobalRecoveredURL = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

covidGlobalCasesDF = pd.read_csv(covidGlobalCasesURL)
covidGlobalDeathsDF = pd.read_csv(covidGlobalDeathsURL)
covidGlobalRecoveredDF = pd.read_csv(covidGlobalRecoveredURL)

In [638]:
# tidy covid data
covidGlobalCasesTidyDF = pd.melt(covidGlobalCasesDF,
                       ['Province/State','Country/Region', 'Lat', 'Long'],
                       var_name="Date",
                       value_name="cases")
covidGlobalCasesTidyDF['Date'] = pd.to_datetime(covidGlobalCasesTidyDF['Date'])
covidGlobalCasesTidyDF['Country/Region'] = covidGlobalCasesTidyDF['Country/Region'].replace(['US'],'United States')
covidGlobalCasesTidyDF['Country/Region'] = covidGlobalCasesTidyDF['Country/Region'].replace(['Taiwan*'],'Taiwan')
covidGlobalCasesTidyDF['Country/Region'] = covidGlobalCasesTidyDF['Country/Region'].replace(['Korea, South'],'South Korea')

covidGlobalDeathTidyDF = pd.melt(covidGlobalDeathsDF,
                       ['Province/State','Country/Region', 'Lat', 'Long'],
                       var_name="Date",
                       value_name="cases")
covidGlobalDeathTidyDF['Date'] = pd.to_datetime(covidGlobalDeathTidyDF['Date'])
covidGlobalDeathTidyDF['Country/Region'] = covidGlobalDeathTidyDF['Country/Region'].replace(['US'],'United States')
covidGlobalDeathTidyDF['Country/Region'] = covidGlobalDeathTidyDF['Country/Region'].replace(['Taiwan*'],'Taiwan')
covidGlobalDeathTidyDF['Country/Region'] = covidGlobalDeathTidyDF['Country/Region'].replace(['Korea, South'],'South Korea')

covidGlobalRecoveredTidyDF = pd.melt(covidGlobalRecoveredDF,
                       ['Province/State','Country/Region', 'Lat', 'Long'],
                       var_name="Date",
                       value_name="cases")
covidGlobalRecoveredTidyDF['Date'] = pd.to_datetime(covidGlobalRecoveredTidyDF['Date'])
covidGlobalRecoveredTidyDF['Country/Region'] = covidGlobalRecoveredTidyDF['Country/Region'].replace(['US'],'United States')
covidGlobalRecoveredTidyDF['Country/Region'] = covidGlobalRecoveredTidyDF['Country/Region'].replace(['Taiwan*'],'Taiwan')
covidGlobalRecoveredTidyDF['Country/Region'] = covidGlobalRecoveredTidyDF['Country/Region'].replace(['Korea, South'],'South Korea')

# rename field to do join with happiness dataframe, namely to use population data
covidGlobalCasesTidyDF = covidGlobalCasesTidyDF.rename(columns={'Country/Region': 'Country'})
covidGlobalDeathTidyDF = covidGlobalDeathTidyDF.rename(columns={'Country/Region': 'Country'})
covidGlobalRecoveredTidyDF = covidGlobalRecoveredTidyDF.rename(columns={'Country/Region': 'Country'})

In [639]:
# get gallup happiness data *this has been copied to a public github
# we will use pre-covid data, 2019, which can be considered the baseline for each of countries.
# With this baseline the countries went dealt with covid.
happinessURL = 'https://raw.githubusercontent.com/nfuruya/COVID19-analysis/1a1e241f6dde9ba0f3b9bb5cd47e3ed1bb994e8e/2019.csv'
happinessDF = pd.read_csv(happinessURL)
happinessDF = happinessDF.rename(columns={'Score': 'Happiness score'})
happinessDF = happinessDF.rename(columns={'Country or region': 'Country'})

In [640]:
# this is to map country with region such as Denmark to Western Europe *this has been copied to a public github
regionsURL = 'https://raw.githubusercontent.com/nfuruya/COVID19-analysis/main/continents2.csv'
regionsDF = pd.read_csv(regionsURL)
regionsDF = regionsDF.rename(columns={'name': 'Country'})
regionsDF = regionsDF.rename(columns={'sub-region': 'Region'})
regionsDF = regionsDF[['Country','Region']]

In [641]:
# get population data *this has been copied to a public github
populationsURL = 'https://raw.githubusercontent.com/nfuruya/COVID19-analysis/main/population_by_country_2020.csv'
populationsDF = pd.read_csv(populationsURL)
populationsDF = populationsDF.rename(columns={'Country (or dependency)': 'Country'})
populationsDF = populationsDF.rename(columns={'Population (2020)': 'Population'})

In [642]:
# The combined dataframe will be the only data structure for analysis

# merge population data
combinedDF = happinessDF.merge(populationsDF, how = 'outer', on = 'Country')

# merge in country to region
combinedDF = combinedDF.merge(regionsDF, how = 'right', on = 'Country')

# merge in latest number of cases, deaths and recovered
combinedDF = combinedDF.merge(covidGlobalCasesTidyDF.groupby(['Country'], sort=False)['cases'].max(), how = 'right', on = 'Country')
combinedDF = combinedDF.rename(columns={'cases': 'Cases'})
combinedDF['Cases/population'] = combinedDF.Cases / combinedDF.Population

combinedDF = combinedDF.merge(covidGlobalDeathTidyDF.groupby(['Country'], sort=False)['cases'].max(), how = 'right', on = 'Country')
combinedDF = combinedDF.rename(columns={'cases': 'Deaths'})
combinedDF['Death/cases'] = combinedDF.Deaths / combinedDF.Cases

combinedDF = combinedDF.merge(covidGlobalRecoveredTidyDF.groupby(['Country'], sort=False)['cases'].max(), how = 'right', on = 'Country')
combinedDF = combinedDF.rename(columns={'cases': 'Recovered'})
combinedDF['Recovered/cases'] = combinedDF.Recovered / combinedDF.Cases

# something wrong with the data so correct
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Cases'] = 1540317
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Deaths'] = 27189
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Recovered'] = 1472824
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Cases/population'] = 1540317 / 37855702
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Death/cases'] = 27189 / 1540317
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Recovered/cases'] = 1472824 / 1540317
combinedDF.loc[combinedDF['Country'] == 'Canada', 'Region'] = 'Northern America'

combinedDF.loc[combinedDF['Country'] == 'Brunei', 'Region'] = 'South-eastern Asia'
combinedDF.loc[combinedDF['Country'] == 'Burma', 'Region'] = 'South-eastern Asia'

In [643]:
# from happiness report
# https://worldhappiness.report/ed/2019/changing-world-happiness/

# GDP per capita is in terms of Purchasing Power Parity (PPP) adjusted to constant 2011
# international dollars, taken from the World Development Indicators (WDI) released by 
# the World Bank on November 14, 2018. See Statistical Appendix 1 for more details. GDP 
# data for 2018 are not yet available, so we extend the GDP time series from 2017 to 
# 2018 using country-specific forecasts of real GDP growth from the OECD Economic Outlook 
# No. 104 (Edition November 2018) and the World Bank’s Global Economic Prospects 
# (Last Updated: 06/07/2018), after adjustment for population growth. The equation uses 
# the natural log of GDP per capita, as this form fits the data significantly better 
# than GDP per capita.
selection = alt.selection_multi(fields=['Region'], bind='legend')
ScoreByGDP = alt.Chart(combinedDF).mark_point().encode(
    x = 'GDP per capita',
    y = 'Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
)
ScoreByGDPLine = ScoreByGDP.transform_regression('Happiness score', 'GDP per capita').mark_line()

# The time series of healthy life expectancy at birth are constructed based on data 
# from the World Health Organization (WHO) Global Health Observatory data repository, 
# with data available for 2005, 2010, 2015, and 2016. To match this report’s sample 
# period, interpolation and extrapolation are used. See Statistical Appendix 1 for more details.
ScoreByLifeExpectancy = alt.Chart(combinedDF).mark_point().encode(
    x = 'Healthy life expectancy',
    y = 'Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
)
ScoreByLifeExpectancyLine = ScoreByLifeExpectancy.transform_regression('Happiness score', 'Healthy life expectancy').mark_line()

# Social support is the national average of the binary responses (either 0 or 1) to 
# the Gallup World Poll (GWP) question “If you were in trouble, do you have relatives 
# or friends you can count on to help you whenever you need them, or not?
ScoreBySocialSupport = alt.Chart(combinedDF).mark_point().encode(
    x = 'Social support',
    y = 'Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
)
ScoreBySocialSupportLine = ScoreBySocialSupport.transform_regression('Happiness score', 'Social support').mark_line()

# Freedom to make life choices is the national average of binary responses to the 
# GWP question “Are you satisfied or dissatisfied with your freedom to choose what 
# you do with your life?”
ScoreByFreedom = alt.Chart(combinedDF).mark_point().encode(
    x = 'Freedom to make life choices',
    y = 'Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))

)
ScoreByFreedomLine = ScoreByFreedom.transform_regression('Happiness score', 'Freedom to make life choices').mark_line()

In [644]:
ScoreByGDP.interactive().add_selection(selection) + ScoreByGDPLine

In [645]:
ScoreByLifeExpectancy.interactive().add_selection(selection) + ScoreByLifeExpectancyLine 

In [646]:
ScoreBySocialSupport.interactive().add_selection(selection) + ScoreBySocialSupportLine 

In [647]:
ScoreByFreedom.interactive().add_selection(selection) + ScoreByFreedomLine

In [648]:
# these plots show happiness against COVID 19 cases/population, deaths/cases and recovered/cases by country.

ScoreByCases = alt.Chart(combinedDF).mark_point().encode(
    x = 'Cases/population',
    y = 'Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country', 'Cases'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
)
ScoreByCasesLine = ScoreByCases.transform_regression('Happiness score', 'Cases/population').mark_line()

ScoreByDeaths = alt.Chart(combinedDF).mark_point().encode(
    x = 'Death/cases',
    y = 'Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country', 'Deaths'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
)
ScoreByDeathsLine = ScoreByDeaths.transform_regression('Happiness score', 'Death/cases').mark_line()

ScoreByRecovered = alt.Chart(combinedDF).mark_point().encode(
    x = 'Recovered/cases',
    y ='Happiness score',
    color=alt.Color('Region', scale=alt.Scale(scheme='tableau20')),
    tooltip=['Country', 'Recovered'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
)
ScoreByRecoveredLine = ScoreByRecovered.transform_regression('Happiness score', 'Recovered/cases').mark_line()

In [649]:
ScoreByCases.interactive().add_selection(selection)  + ScoreByCasesLine

In [650]:
ScoreByDeaths.interactive().add_selection(selection)  + ScoreByDeathsLine

In [651]:
ScoreByRecovered.interactive().add_selection(selection)  + ScoreByRecoveredLine