In [51]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings("ignore")

country_data=pd.read_csv("world-data-2023.csv")
country_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 35 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   Country                                    195 non-null    object 
 1   Density
(P/Km2)                            195 non-null    object 
 2   Abbreviation                               188 non-null    object 
 3   Agricultural Land( %)                      188 non-null    object 
 4   Land Area(Km2)                             194 non-null    object 
 5   Armed Forces size                          171 non-null    object 
 6   Birth Rate                                 189 non-null    float64
 7   Calling Code                               194 non-null    float64
 8   Capital/Major City                         192 non-null    object 
 9   Co2-Emissions                              188 non-null    object 
 10  CPI                       

# Data Cleaning

In [52]:
country_data.dropna(inplace=True)

column_to_float=['Density\n(P/Km2)', 'Agricultural Land( %)','Land Area(Km2)',
                 'Birth Rate', 'Co2-Emissions', 'Forested Area (%)',
                 'CPI', 'CPI Change (%)', 'Fertility Rate', 'Gasoline Price','GDP',
                 'Gross primary education enrollment (%)', "Armed Forces size",
                 'Gross tertiary education enrollment (%)', 'Infant mortality',
                 'Life expectancy', 'Maternal mortality ratio','Minimum wage', 
                 'Out of pocket health expenditure','Physicians per thousand', 
                 'Population','Population: Labor force participation (%)', 
                 'Tax revenue (%)','Total tax rate', 'Unemployment rate', 'Urban_population']

for column in column_to_float:
    country_data[column]=country_data[column].astype(str)
    country_data[column]=country_data[column].str.replace(",","")
    country_data[column]=country_data[column].str.replace("$","")
    country_data[column]=country_data[column].str.replace("%","").astype(float)

country_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 110 entries, 0 to 193
Data columns (total 35 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   Country                                    110 non-null    object 
 1   Density
(P/Km2)                            110 non-null    float64
 2   Abbreviation                               110 non-null    object 
 3   Agricultural Land( %)                      110 non-null    float64
 4   Land Area(Km2)                             110 non-null    float64
 5   Armed Forces size                          110 non-null    float64
 6   Birth Rate                                 110 non-null    float64
 7   Calling Code                               110 non-null    float64
 8   Capital/Major City                         110 non-null    object 
 9   Co2-Emissions                              110 non-null    float64
 10  CPI                       

# Feature Engineering

In [53]:
country_data['GDP per capita']=country_data['GDP']/country_data['Population']
country_data['Co2-Emissions per capita']=country_data['Co2-Emissions']/country_data['Population']

# Data Visualization of the World by GDP

In [54]:
fig = px.choropleth(country_data, locations="Country", 
                    color="GDP", 
                    locationmode='country names',
                    hover_name="Country",
                    hover_data=['GDP'],
                    title = 'GDP Distribution around the world',
                    color_continuous_scale='OrRd')
fig.show()

> Without considering population, USA, Canada and China are Top 3 countries in terms of GDP

In [55]:
fig = px.choropleth(country_data, locations="Country", 
                    color="GDP per capita", 
                    locationmode='country names',
                    hover_name="Country",
                    hover_data=['GDP per capita'],
                    title = 'GDP per capita distribution around the world',
                    color_continuous_scale='OrRd')
fig.show()

>In terms of GDP per capita, USA, Canda and Australia are among top 3.

# Analysing Top 20 countries in terms of GDP

> # CO2 Emissions

In [56]:
country_by_gdp = country_data.sort_values(by='GDP',ascending=False).head(20)

fig=make_subplots(rows=2, cols=1)
fig.add_trace(go.Bar(
            name="GDP",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["GDP"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="CO2-Emissions",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Co2-Emissions"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )

fig.update_xaxes(showticklabels=False, row=1, col=1)

fig.show()

> Countries with higher CDP produce higher CO2 Emisiions.

In [57]:
fig=make_subplots(rows=2, cols=1)
fig.add_trace(go.Bar(
            name="GDP per capita",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["GDP per capita"],

            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="CO2-Emissions",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Co2-Emissions"],

            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )
fig.update_xaxes(showticklabels=False, row=1, col=1)

fig.show()

In [58]:
fig=px.scatter(country_by_gdp,x="GDP per capita", y="Co2-Emissions",
               size="GDP per capita",
               color="Country",title='GDP per capita vs Co2-Emissions')
fig.update_layout(legend=dict(itemclick='toggleothers'))
fig.show()

> # Health factors

In [59]:
fig=make_subplots(rows=3, cols=1)
fig.add_trace(go.Bar(
            name="Infant mortality",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Infant mortality"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="Maternal mortality ratio",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Maternal mortality ratio"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )
fig.add_trace(go.Bar(
            name="GDP per capita",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["GDP per capita"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=3,col=1,
        )
fig.update_xaxes(showticklabels=False, row=1, col=1)
fig.update_xaxes(showticklabels=False, row=2, col=1)

fig.show()

> Lower GDP per capita means higher infant mortality and maternal mortality ratio in case of India and Indonesia.

In [60]:
fig=make_subplots(rows=3, cols=1)
fig.add_trace(go.Bar(
            name="Fertility Rate",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Fertility Rate"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="Life expectancy",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Life expectancy"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )
fig.add_trace(go.Bar(
            name="GDP per capita",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["GDP per capita"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=3,col=1,
        )
fig.update_xaxes(showticklabels=False, row=1, col=1)
fig.update_xaxes(showticklabels=False, row=2, col=1)



> Countries with Higher GDP per capita leads to a higher pattern in Life Expentancy, and lower fertility rate.

> # Unemployment 

In [61]:
fig=make_subplots(rows=3, cols=1)
fig.add_trace(go.Bar(
            name='Unemployment rate',
            x=country_by_gdp["Country"], 
            y=country_by_gdp['Unemployment rate'],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name='Population: Labor force participation (%)',
            x=country_by_gdp["Country"], 
            y=country_by_gdp['Population: Labor force participation (%)'],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )

fig.add_trace(go.Bar(
            name="GDP per capita",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["GDP per capita"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=3,col=1,
        )

fig.update_xaxes(showticklabels=False, row=1, col=1)
fig.update_xaxes(showticklabels=False, row=2, col=1)


fig.show()

> Countries with higher GDP per capita shows lower unemployment rate and vise versa, with an exception of USA.


> Countries with higher GDP per capita shows higher labor force participation and vise versa

> # Taxes

In [62]:
fig=px.scatter(country_by_gdp,x="Total tax rate", y="Tax revenue (%)",
               size="GDP per capita",
               color="Country",title='Total tax rates vs Tax revenue (%)')
fig.update_layout(legend=dict(itemclick='toggleothers'))
fig.show()

>Countries like USA,UK,Canada,South Korea with higher GDP,have higher tax revenue with low tax rates

In [63]:
fig=make_subplots(rows=2, cols=1)
fig.add_trace(go.Bar(
            name="Tax revenue (%)",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Tax revenue (%)"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="Total tax rate",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Total tax rate"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )

fig.update_xaxes(showticklabels=False, row=1, col=1)

fig.show()

> # Prices

In [64]:
fig=make_subplots(rows=2, cols=1)
fig.add_trace(go.Bar(
            name="CPI",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["CPI"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="GDP per capita",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["GDP per capita"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )
fig.update_xaxes(showticklabels=False, row=1, col=1)

fig.show()

>Countries with higher GDP, have lower CPI, and vise versa

In [65]:
fig=make_subplots(rows=2, cols=1)
fig.add_trace(go.Bar(
            name="CPI",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["CPI"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=1,col=1,
        )

fig.add_trace(go.Bar(
            name="Gasoline Price ($)",
            x=country_by_gdp["Country"], 
            y=country_by_gdp["Gasoline Price"],
            showlegend=True,
            marker=dict(
                opacity=0.5,
                showscale=False,
            )),row=2,col=1,
        )
fig.update_xaxes(showticklabels=False, row=1, col=1)

fig.show()

>Countries with lower CPI are most likely to have higher gasoline prices.