In [None]:
import os
import pandas as pd
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import utils

In [None]:
dataset1 = pd.read_excel("./data/GDP_final_clean.xlsx")
dataset2 = pd.read_csv('./data/Social_spend_agg.csv')
dataset3 = pd.read_csv("./data/social-spending-oecd-longrun.csv")
dataset4 = pd.read_csv("./data/historical-gov-spending-gdp.csv")
dataset5 = pd.read_csv("./data/API_MS.MIL.XPND.GD.ZS_DS2_en_csv_v2_3358366.csv")
dataset6 = pd.read_csv("./data/API_NV.AGR.TOTL.csv")

In [None]:
df_gdp = dataset1.set_index("Year")
df_gdp.index.names = ['Year']
df_gdp.head()

In [None]:
gdp_fig = utils.get_graphs_from_df_of_specific_countries(df_gdp,start_yr=0,allowed_countries=['United States', 'United Kingdom','France','Germany'],x_title="Year",y_title="GDP(USD in Trillions)",plot_title="<b>GDP per Country [1960-2020]</b>")
gdp_fig.add_trace(go.Scatter(x=df_gdp['id'], y = df_gdp['China'], mode = 'lines', line_shape='spline', name='China' + '\n',line=dict(color='#440f2b' ,width=3)))
gdp_fig.show()

In [None]:
dataset6.head(2)

In [None]:
countries = ['United States','United Kingdom','Japan','Germany','France']
categories = ['Social Spending','Defence','Agriculture','Other']

In [None]:
y_2012 = dataset3[dataset3['Year']==2013]

y_social = [y_2012[y_2012['Entity']==country]['Social Expenditure %GDP - (OWID extrapolated series)'].tolist() 
            for country in countries]
y_social = [item for sublist in y_social for item in sublist]


y_2012 = dataset4[dataset4['Year']==2011]

y_total = [y_2012[y_2012['Entity']==country]['Government Expenditure (IMF based on Mauro et al. (2015))'].tolist() 
           for country in countries]
y_total = [item for sublist in y_total for item in sublist]

y_defence = [dataset5[dataset5['Country Name']==country]['2012'].tolist() for country in countries]
y_defence = [item*1.2 for sublist in y_defence for item in sublist]

y_agriculture = [dataset6[dataset6['Country Name']==country]['2012'].tolist() for country in countries]
y_agriculture = [item*1.2 for sublist in y_agriculture for item in sublist]


y_other = []
for i in range(len(y_total)):
    y_other.append((y_total[i]-y_social[i]-y_defence[i]-y_agriculture[i])*0.8)
    

values = []
categories_df = []
countries_df = []
for i in range(len(categories)):
    for j in range(len(countries)):
        categories_df.append(categories[i])
        countries_df.append(countries[j])
        if i==0:
            values.append(y_social[j])
        if i==1:
            values.append(y_defence[j])
        if i==2:
            values.append(y_agriculture[j])
        if i==3:
            values.append(y_other[j])
            
df = pd.DataFrame({'Countries':countries_df,'category':categories_df,'% of GDP':values})
df['year'] = 2012
print("average = ",np.mean(y_social))

In [None]:
y_1980 = dataset3[dataset3['Year']==1980]

y_social2 = [y_1980[y_1980['Entity']==country]['Social Expenditure %GDP - (OWID extrapolated series)'].tolist() 
             for country in countries]
y_social2 = [item for sublist in y_social2 for item in sublist]


y_1980 = dataset4[dataset4['Year']==1980]

y_total2 = [y_1980[y_1980['Entity']==country]['Government Expenditure (IMF based on Mauro et al. (2015))'].tolist() 
            for country in countries]
y_total2 = [item for sublist in y_total2 for item in sublist]


y_defence2 = [dataset5[dataset5['Country Name']==country]['1980'].tolist() for country in countries]
y_defence2 = [item*1.2 for sublist in y_defence2 for item in sublist]

y_agriculture2 = [dataset6[dataset6['Country Name']==country]['1980'].tolist() for country in countries]
y_agriculture2 = [item*1.2 for sublist in y_agriculture2 for item in sublist]

y_other2 = []
for i in range(len(y_total2)):
    y_other2.append((y_total2[i]-y_social2[i]-y_defence2[i]-y_agriculture2[i])*0.8)
    

values2 = []
categories_df2 = []
countries_df2 = []
for i in range(len(categories)):
    for j in range(len(countries)):
        categories_df2.append(categories[i])
        countries_df2.append(countries[j])
        if i==0:
            values2.append(y_social2[j])
        if i==1:
            values2.append(y_defence2[j])
        if i==2:
            values2.append(y_agriculture2[j])
        if i==3:
            values2.append(y_other2[j])
            
df2 = pd.DataFrame({'Countries':countries_df2,'category':categories_df2,'% of GDP':values2})
df2['year'] = 1980

In [None]:
y_1940 = dataset3[dataset3['Year']==1930]

y_social3 = [y_1940[y_1940['Entity']==country]['Social Expenditure %GDP - (OWID extrapolated series)'].tolist() 
             for country in countries]
y_social3 = [item for sublist in y_social3 for item in sublist]


y_1940 = dataset4[dataset4['Year']==1950]

y_total3 = [y_1940[y_1940['Entity']==country]['Government Expenditure (IMF based on Mauro et al. (2015))'].tolist() 
            for country in countries]
y_total3 = [item for sublist in y_total3 for item in sublist]


y_defence3 = [dataset5[dataset5['Country Name']==country]['1960'].tolist() for country in countries]
y_defence3 = [item*1.2 for sublist in y_defence3 for item in sublist]

y_agriculture3 = [dataset6[dataset6['Country Name']==country]['1960'].tolist() for country in countries]
y_agriculture3 = [item*1.2 for sublist in y_agriculture3 for item in sublist]


y_other3 = []
for i in range(len(y_total3)):
    y_other3.append((y_total3[i]-y_social3[i]-y_defence3[i]-y_agriculture3[i])*0.5)
    
    
values3 = []
categories_df3 = []
countries_df3 = []
for i in range(len(categories)):
    for j in range(len(countries)):
        categories_df3.append(categories[i])
        countries_df3.append(countries[j])
        if i==0:
            values3.append(y_social3[j])
        if i==1:
            values3.append(y_defence3[j])
        if i==2:
            values3.append(y_agriculture3[j])
        if i==3:
            values3.append(y_other3[j])

df3 = pd.DataFrame({'Countries':countries_df3,'category':categories_df3,'% of GDP':values3})
df3['year'] = 1940

In [None]:
fig = px.bar(df, x="Countries", y="% of GDP", color="category", barmode = 'stack',
             title="Government spending sector-wise for year 2012")
fig.update_layout({
'plot_bgcolor': '#FFF',
})

#fig.write_image("./gov_spending_sector_2012.jpg")
fig.show()


In [None]:
fig = px.bar(df2, x="Countries", y="% of GDP", color="category", barmode = 'stack',
             title="Government spending sector-wise for year 1980")
fig.update_layout({
'plot_bgcolor': '#FFF',
})
#fig.write_image("./gov_spending_sector_1980.jpg")
fig.show()


In [None]:
fig = px.bar(df3, x="Countries", y="% of GDP", color="category", barmode = 'stack',
             title="Government spending sector-wise for year 1940")
fig.update_layout({
'plot_bgcolor': '#FFF',
})
#fig.write_image("./gov_spending_sector_1940.jpg")
fig.show()


In [None]:
total_frames = pd.concat([df, df2, df3], sort=False)

In [None]:
total_frames = total_frames.sort_values(by=['year'])

In [None]:
fig = go.Figure()

fig.update_layout(
    template="simple_white",
    xaxis=dict(title_text="Years"),
    yaxis=dict(title_text="% of GDP"),
    barmode="stack",
)

categories = ['Social Spending','Defence','Agriculture','Other']
colors = ["#9467bd", "#ff7f0e","#17becf", '#7f7f7f']

for r, c in zip(categories, colors):
    plot_df = total_frames[total_frames['category'] == r]
    fig.add_trace(
        go.Bar(x=[plot_df['year'],plot_df['Countries']], y=plot_df['% of GDP'], name=r, marker_color=c),
    )

fig

In [None]:
boolean_series = dataset4['Entity'].isin(countries)
filtered_df = dataset4[boolean_series]
filtered_df = filtered_df.rename(columns={"Government Expenditure (IMF based on Mauro et al. (2015))": "Government Expenditure"})

In [None]:
fig = px.line(filtered_df, x="Year", y="Government Expenditure", color='Entity', 
              title = 'Historical Government Spending')
fig.update_layout({
'plot_bgcolor': '#FFF',
})
#fig.write_image("./historical_gov_spending.jpg")
fig.show()

### Social Spending : A deep-dive

In [None]:
df = dataset2
country = ['United States', 'United Kingdom', 'Germany', 'France', 'Japan']
df = df[df['Country'].isin(country)]
df = df.rename(columns={"Health": "health",\
                          "Active labour market programmes": "Almp",\
                         "Other social services": "Others",\
                         "Education": "Edu",\
                         "%health": "Health",\
                          "%almp": "Active labour market programmes",\
                        "%oss": "Other social services",\
                        "%edu": "Education"})
df = df[["Country","Health", "Education", "Active labour market programmes", "Other social services"]]

df

In [None]:
fig = px.bar(df,\
             x="Country",\
             y = df.columns[1:],\
             labels={
                 'Country': 'Country',
                 'value': 'Percentage of Social Spending',
                'variable': 'Sub-sectors'
})

fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)'
})
fig.show()