<center> <img src="http://sbpress.com/wp-content/uploads/2019/09/fd4a335311d225bdc2e44d780aea9b55.jpg" alt="banner" width=800> </center>

# World Time use, work hours and GDP


This notebook includes a visualization of the different datasets through which some insights may be observed.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import json
from urllib.request import urlopen
import seaborn as sns
import matplotlib.pyplot as plt

## TOC:
* [Avg. Hours Worked](#1)
* [Yearly RGDPO](#2)
* [RGDPO vs Avg. Hours Worked](#vs)
* [Time use](#3)
    * [How do people spend their time?](#4)
    * [Gender gap in leisurer time](#5)
    * [Gender gap in paid work/study](#6)
    * [Gender gap in personal care time](#7)

## Avg. Hours Worked <a class="anchor" id="1"></a>

In [None]:
hours_worked = pd.read_csv('../input/time-use-employment-and-gdp-per-country/Avg_hours_worked_(1950-2017).csv', index_col='Year')

#Read reference table from website to append continent and country names
url = "https://cloford.com/resources/codes/index.htm"
continents = pd.read_html(url, attrs={'class':'outlinetable'})[0]
#Add two missing countries to reference table
continents = continents.append(pd.DataFrame({'Country':['Hong Kong', 'Romania'],'ISO (3)':['HKG','ROU'],'Continent':['Asia', 'Europe']}))

# Merge reference table and hours worked
hours_worked = hours_worked.reset_index().merge(right=continents[['Continent','Country', 'ISO (3)']], how='left', left_on='RegionCode', right_on='ISO (3)').drop(columns='ISO (3)').set_index('Year')
hours_worked

In [None]:
#Verify there are no more missing values.
hours_worked.info()

In [None]:
fig = px.line(hours_worked, y='AvgHoursWorked', 
              color='Continent', 
              hover_name = 'RegionCode', 
              line_group = 'Country', 
              template='plotly_white',
              height=650,
              title ='<span style="color:#012888;font-weight:bold">Yearly Avg. Working hours per Country</span>'+'<br>'+
                    '<span style="font-size: 13px;color:#444444;">Asian countries have higher working hours. Korea stands out as having a experienced a large decrease since 1970. <br>On the other hand, Europe clearly has the lowest Avg. working hours, with Norway, Denmark and Germany at the bottom. </span>'
             )

#Calculate yearly averages
avg_hours_by_year = hours_worked.groupby('Year').mean()

fig.add_hline(y=hours_worked.AvgHoursWorked.mean(), 
              line_dash="dot", 
              line_color='#000000',
              annotation_text="Avg: " + str('{:.0f}'.format(hours_worked.AvgHoursWorked.mean()))+" hrs", 
              annotation_position="right",
              annotation_font_size=13,
              line_width=5,
              opacity=0.8)

fig.show()

In [None]:
with urlopen('https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json') as response:
    countries = json.load(response)

fig = px.choropleth(hours_worked.reset_index(),geojson=countries, locations='RegionCode', 
                        color='AvgHoursWorked',
                        color_continuous_scale="Reds",
                        labels={'RegionCode':'Region Code','AvgHoursWorked':'Avg. Hours worked' },
                        animation_frame="Year",
                        title= '<span style="color:#012888;font-weight:bold">Yearly Avg. Working hours per Country</span>',
                        width=1000,
                        height=650
                        )
fig.update_layout(
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ))


fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.show()

## Yearly RGDPO <a class="anchor" id="2"></a>

In [None]:
#Load data and add continent classification for the countries
rgdpo = pd.read_csv('../input/time-use-employment-and-gdp-per-country/Yearly_RGDPO_(1950-2017).csv')
rgdpo = rgdpo.merge(right=continents[['Continent','Country', 'ISO (3)']], how='left', left_on='RegionCode', right_on='ISO (3)').drop(columns='ISO (3)')
rgdpo.rename(columns={'YearCode':'Year'},inplace=True)
rgdpo.set_index('Year',inplace=True)
rgdpo.dropna(axis='index',inplace=True)
rgdpo

In [None]:
fig = px.line(rgdpo, y='rgdpo', 
              color='Continent', 
              hover_name = 'RegionCode', 
              line_group = 'Country', 
              template='plotly_white',
              height=650,
              labels= {'rgdpo':'RGDPO (in mil. 2017US$)'},
              title ='<span style="color:#012888;font-weight:bold">Yearly RGDPO (in mil. 2017US$) per Country</span>'
             )
fig.show()

In [None]:
with urlopen('https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json') as response:
    countries = json.load(response)

fig = px.choropleth(rgdpo.reset_index(),geojson=countries, locations='RegionCode', 
                        color='rgdpo',
                        color_continuous_scale="Blues",
                        labels={'RegionCode':'Region Code','rgdpo':'RGDPO (in mil. 2017US$)' },
                        animation_frame="Year",
                        title= '<span style="color:#012888;font-weight:bold">Yearly RGDPO (in mil. 2017US$) per Country</span>',
                        width=1000,
                        height=650
                        )
fig.update_layout(
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ))


fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.show()

## RGDPO vs Avg. Hours Worked <a class="anchor" id="vs"></a>

In [None]:
m_rgdpo_work = rgdpo.merge(right=hours_worked, how='inner', left_on=['RegionCode','Year'], right_on=['RegionCode','Year'], suffixes=['','_y'])
m_rgdpo_work.drop(m_rgdpo_work.filter(regex='_y$').columns.tolist(),axis=1, inplace=True)
m_rgdpo_work.reset_index(inplace=True)
m_rgdpo_work=m_rgdpo_work.round(0)

In [None]:
fig= px.scatter(m_rgdpo_work, y='rgdpo', x='AvgHoursWorked', size='pop', color='Continent',
                animation_frame='Year',
                hover_name='Country',
                width=900,
                height=600,
                size_max=130,
                title='<span style="color:#012888;font-weight:bold">RGDPO vs Avg. Hous Worked</span>'+'<br>'+'<span style="font-size: 13px;color:#444444;">There doesnt seem to be a stron relationship between RGDPO and Avg. HW.<br>However, there does seem to be a trend for both variables over time, where RGDPO increases and Avg.WH. decrease.</span>'
               )

x0=m_rgdpo_work['AvgHoursWorked'].min()
x1=m_rgdpo_work['AvgHoursWorked'].max()
y0=m_rgdpo_work['rgdpo'].min()
y1=m_rgdpo_work['rgdpo'].max()

fig.update_xaxes(range=[x0-x0*0.05, x1+x1*0.05])
fig.update_yaxes(range=[y0-2000000, y1+100000])

fig.show()

In [None]:
sns.pairplot(m_rgdpo_work, hue='Continent', corner=True, height=2.3)
plt.show()

## Time use <a class="anchor" id="3"></a>

In [None]:
time_use = pd.read_csv('../input/time-use-employment-and-gdp-per-country/Time_use_OECD.csv')
time_use

<a class="anchor" id="4"></a>

<img src="https://ourworldindata.org/uploads/2020/11/Time-Use-by-Country-OECD-732x550.png" alt="Time use OWID">

In [None]:
def create_scatter_time_use(time_use_filter, annotation_x, annotation_y, title):

    gender_time_use = pd.pivot_table(time_use, columns= 'Sex', values= 'Minutes_per_day', index= ['Country_Code', 'Country','Description'] ).reset_index().round(0)

    fig = px.scatter(gender_time_use[gender_time_use['Description']==time_use_filter], x= 'Men', y= 'Women', 
                        template= 'plotly_white',
                        width= 700,
                        height= 700,
                        hover_name= 'Country',
                        title= title,
                        labels={
                         "Men": "Men [min]", "Women":"Women [min]"
                        }
                    )

    y0= min([min(gender_time_use[gender_time_use['Description']==time_use_filter].Women),min(gender_time_use[gender_time_use['Description']==time_use_filter].Men)])-10
    y1= max([max(gender_time_use[gender_time_use['Description']==time_use_filter].Women),max(gender_time_use[gender_time_use['Description']==time_use_filter].Men)])+10
    x0= min([min(gender_time_use[gender_time_use['Description']==time_use_filter].Men),y0])
    x1= max([max(gender_time_use[gender_time_use['Description']==time_use_filter].Men),y1])

    fig.add_shape(type="line",
        xref="x", yref="y",
        x0=x0, y0=y0,
        x1=x1, y1=y1,
        opacity=0.4,
        fillcolor="grey",
        line_color="grey",
        line=dict(
            width=3,
            dash="dot",
        )
    )

    fig.add_annotation(
                x=annotation_x,
                y=annotation_y,
                text='Gender parity line',
                xref="x",
                yref="y",
                textangle = -45,
                showarrow=False,
                font_size=13,
                font=dict(
                color= 'grey'
                )
    )


    fig.update_xaxes(range=[x0, x1])
    fig.update_yaxes(range=[y0, y1])

    #fig.show()
    return fig

### Gender gap in leisure time <a class="anchor" id="5"></a>

In [None]:
title = '<span style="color:#012888;font-weight:bold">Gender gap in Leisure time</span>'+'<br>'+'<span style="font-size: 13px;color:#444444;">The data is spread out and slightly biased towards men, being below the parity line. <br>Mexico and Norway stand out as being the lowes and highest with regards to time spent leisurely.</span>'
create_scatter_time_use('Leisure',181,189, title).show()

### Gender gap in paid work/study time <a class="anchor" id="6"></a>

In [None]:
title = '<span style="color:#012888;font-weight:bold">Gender gap in Paid work/study time</span>'+'<br>'+'<span style="font-size: 13px;color:#444444;">The data is biasaed towards men below the parity line. Indicating that men spend more time working/studying.</span>'
create_scatter_time_use('Paid work or study',175,187, title).show()

### Gender gap in unpaid work <a class="anchor" id="6"></a>

In [None]:
title = '<span style="color:#012888;font-weight:bold">Gender gap in Unpaid work time</span>'+'<br>'+'<span style="font-size: 13px;color:#444444;">The data is less dispersed and biased towards women. <br>This makes sense, as traditionally, women perform unpaid activities such as house keeping.</span>'
create_scatter_time_use('Unpaid work',75,87, title).show()

### Gender gap in personal care time <a class="anchor" id="7"></a>

In [None]:
title = '<span style="color:#012888;font-weight:bold">Gender gap in personal care time</span>'+'<br>'+'<span style="font-size: 13px;color:#444444;">Time spent in personal care is relatively uniform, being slightly higher in women. <br>France stands out as both men and women spend larger amounts of time.</span>'
create_scatter_time_use('Personal care',610,615, title).show()