## Loading required libraries

In [37]:
from gapminder import gapminder
import pandas as pd

import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

pd.set_option('display.float_format', '{:.2f}'.format)

## Loading and Inspecting the data

In [2]:
gapminder.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [32]:
gapminder.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
dtypes: float64(2), int64(2), object(2)
memory usage: 80.0+ KB


In [38]:
gapminder.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
year,1704.0,1979.5,17.27,1952.0,1965.75,1979.5,1993.25,2007.0
lifeExp,1704.0,59.47,12.92,23.6,48.2,60.71,70.85,82.6
pop,1704.0,29601212.32,106157896.74,60011.0,2793664.0,7023595.5,19585221.75,1318683096.0
gdpPercap,1704.0,7215.33,9857.45,241.17,1202.06,3531.85,9325.46,113523.13


In [30]:
print(f"We have data from {gapminder['year'].min()} to {gapminder['year'].max()}.")

We have data from 1952 to 2007.


## Exploring the data

We have life expectancy, population and GDP per capita data from 1952 to 2007. Let's filter the data with year 2007 and explore it.

In [4]:
gapminder_2007 = gapminder[gapminder["year"] == 2007]

In [31]:
fig = px.scatter(data_frame = gapminder_2007, x = "gdpPercap", y = "lifeExp", color="continent")
fig.update_layout(title = "2007 data: Life Expectancy vs GDP Per Capita", xaxis_title = "GDP Per Capita", 
                 yaxis_title = "Life Expectancy")
fig.show()

In [6]:
fig = px.scatter(data_frame = gapminder_2007, x = "gdpPercap", y = "lifeExp", color = "continent", size = "pop", 
                 hover_name=  "country")
fig.update_layout(title = "2007 data: Life Expectancy vs GDP Per Capita", xaxis_title = "GDP Per Capita", 
                 yaxis_title = "Life Expectancy")
fig.show()

In [39]:
fig = px.scatter(data_frame = gapminder_2007, x = "pop", y = "lifeExp", color = "continent", 
                 log_x = True, hover_name = "country")
fig.update_layout(title = "2007 data: Life Expectancy vs Population", xaxis_title = "Population", 
                 yaxis_title = "Life Expectancy")
fig.show()

Above graph shows that **Oceania, Europe and Americas** Countries has high life Expectancy and GDP Per Capita as compared to **Asia and Africa** countries.

Now, let's explore how life expectancy varies between continents. We will use the same filtered data of 2007 year.

In [8]:
mean_life_exp = gapminder_2007.groupby("continent", as_index = False)["lifeExp"].mean()

In [9]:
mean_life_exp

Unnamed: 0,continent,lifeExp
0,Africa,54.806038
1,Americas,73.60812
2,Asia,70.728485
3,Europe,77.6486
4,Oceania,80.7195


In [10]:
fig = px.bar(data_frame = mean_life_exp, x = "continent", y = "lifeExp", color = "continent")
fig.update_layout(title = "2007 data: Mean Life Expectancy of continents", xaxis_title = "Continent", 
                 yaxis_title = "Mean Life Expectancy", xaxis = {'categoryorder': 'total descending'})
fig.show()

**Oceania** has the highest mean life expectancy followed by **Europe, Americas and Asia. Africa** has worst mean life expectancy of all.

In [11]:
# Countries with worst Life Expectancy in each continent

min_life_exp_ind = gapminder_2007.groupby("continent")["lifeExp"].agg("idxmin").to_list()
gapminder.iloc[min_life_exp_ind, :].reset_index(drop = True)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Swaziland,Africa,2007,39.613,1133066,4513.480643
1,Haiti,Americas,2007,60.916,8502814,1201.637154
2,Afghanistan,Asia,2007,43.828,31889923,974.580338
3,Turkey,Europe,2007,71.777,71158647,8458.276384
4,New Zealand,Oceania,2007,80.204,4115771,25185.00911


In [12]:
# Countries with best Life Expectancy in each continent

max_life_exp_ind = gapminder_2007.groupby("continent")["lifeExp"].agg("idxmax").to_list()
gapminder.iloc[max_life_exp_ind, :].reset_index(drop = True)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Reunion,Africa,2007,76.442,798094,7670.122558
1,Canada,Americas,2007,80.653,33390141,36319.23501
2,Japan,Asia,2007,82.603,127467972,31656.06806
3,Iceland,Europe,2007,81.757,301931,36180.78919
4,Australia,Oceania,2007,81.235,20434176,34435.36744


In [13]:
# Distrubution of Life Expectancy

fig = px.box(data_frame = gapminder_2007, x = "continent", y = "lifeExp", color = "continent")
fig.update_layout(title = "2007 data: Life Expectancy of continents", xaxis_title = "Continent", 
                 yaxis_title = "Life Expectancy")
fig.show()

Life Expectancy varies more in **Africa** than other continent. Median Life Expectancy is also lowest among all.

In [14]:
# Distrubution of GDP Per Capita

fig = px.box(data_frame = gapminder_2007, x = "continent", y = "gdpPercap", color = "continent")
fig.update_layout(title = "2007 data: GDP Per Capita of continents", xaxis_title = "Continent", 
                 yaxis_title = "GDP Per Capita")
fig.show()

GDP Per Capita varies more in **Asia and European** countries as compared to countries in other continents.

Let's look at life expectancy, population and GDP per capita data of India.

In [16]:
gapminder_india = gapminder[gapminder["country"] == "India"]
gapminder_india

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
696,India,Asia,1952,37.373,372000000,546.565749
697,India,Asia,1957,40.249,409000000,590.061996
698,India,Asia,1962,43.605,454000000,658.347151
699,India,Asia,1967,47.193,506000000,700.770611
700,India,Asia,1972,50.651,567000000,724.032527
701,India,Asia,1977,54.208,634000000,813.337323
702,India,Asia,1982,56.596,708000000,855.723538
703,India,Asia,1987,58.553,788000000,976.512676
704,India,Asia,1992,60.223,872000000,1164.406809
705,India,Asia,1997,61.765,959000000,1458.817442


In [17]:
fig = px.line(data_frame = gapminder_india, x = "year", y = "lifeExp",  markers = True)
fig.update_layout(title = "Life Expectancy of India", xaxis_title = "Year", 
                 yaxis_title = "Life Expectancy")
fig.show()

In [18]:
fig = px.line(data_frame = gapminder_india, x = "year", y = "pop", markers = True)
fig.update_layout(title = "Population of India", xaxis_title = "Year", 
                 yaxis_title = "Population")
fig.show()

In [19]:
fig = px.line(data_frame = gapminder_india, x = "year", y = "gdpPercap", markers = True)
fig.update_layout(title = "GDP Per Capita of India", xaxis_title = "Year", 
                 yaxis_title = "GDP Per Capita")
fig.show()

Life Expectancy of India has increased from 37.373 years to 64.698 years.

Population of India has increased dramtically from 372 Million to 1.11 Billion.

GDP Per Capita has increased from 546.56 to 2452.21.

In [20]:
corr = gapminder[["lifeExp", "pop", "gdpPercap"]].corr()
corr

Unnamed: 0,lifeExp,pop,gdpPercap
lifeExp,1.0,0.064955,0.583706
pop,0.064955,1.0,-0.0256
gdpPercap,0.583706,-0.0256,1.0


In [21]:
fig = px.imshow(corr, zmin = -1, zmax = 1, text_auto = ".2f")
fig.show()

**GDP Per Capita and Life Expectancy** has high positive correlation between them.

In [22]:
country_list = ["United States", "United Kingdom", "Canada", "France", "Spain", "Australia", "Japan"]

In [23]:
select_country = gapminder[gapminder["country"].isin(country_list)]

In [24]:
fig = px.line(data_frame = select_country, x = "year", y = "gdpPercap", color = "country", color_discrete_sequence=px.colors.colorbrewer.Set2)
fig.update_layout(title = "GDP Per Capita", xaxis_title = "Year", 
                 yaxis_title = "GDP Per Capita")
fig.show()

**United States** has highest GDP Per Capita from 1952 to 2007 following by **Canada**. GDP Per Capita is trending similar in European Countries.