1. Import libraries

In [12]:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

2. Load dataset

In [13]:
df = pd.read_csv('world_population.csv')

3. Basic exploration

In [14]:
print(f'Shape of the dataset: {df.shape}')

Shape of the dataset: (234, 17)


In [15]:
print(f'\nGlimpse of the dataset:')
df.head().style.set_properties(**{'background-color': '#006837', 'color': '#e9c46a', 'border': '1.5px solid black'})


Glimpse of the dataset:


Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [16]:
print(f'Information of the dataset:\n')
print(df.info())

Information of the dataset:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Rank                         234 non-null    int64  
 1   CCA3                         234 non-null    object 
 2   Country/Territory            234 non-null    object 
 3   Capital                      234 non-null    object 
 4   Continent                    234 non-null    object 
 5   2022 Population              234 non-null    int64  
 6   2020 Population              234 non-null    int64  
 7   2015 Population              234 non-null    int64  
 8   2010 Population              234 non-null    int64  
 9   2000 Population              234 non-null    int64  
 10  1990 Population              234 non-null    int64  
 11  1980 Population              234 non-null    int64  
 12  1970 Population              234 non-null    int6

In [17]:
print('Summary of the dataset:')
df.describe().style.set_properties(**{"background-color": "#006837","color":"#e9c46a","border": "1.5px solid black"})

Summary of the dataset:


Unnamed: 0,Rank,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
count,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0,234.0
mean,117.5,34074414.709402,33501070.952991,31729956.24359,29845235.034188,26269468.816239,22710220.790598,18984616.970085,15786908.807692,581449.384615,452.127044,1.009577,0.427051
std,67.694165,136766424.804763,135589876.924439,130404992.75176,124218487.632998,111698206.71907,97832173.346751,81785186.084201,67795091.643236,1761840.864063,2066.121904,0.013385,1.714977
min,1.0,510.0,520.0,564.0,596.0,651.0,700.0,733.0,752.0,1.0,0.0261,0.912,0.0
25%,59.25,419738.5,415284.5,404676.0,393149.0,327242.0,264115.75,229614.25,155997.0,2650.0,38.417875,1.001775,0.01
50%,117.5,5559944.5,5493074.5,5307400.0,4942770.5,4292907.0,3825409.5,3141145.5,2604830.0,81199.5,95.34675,1.0079,0.07
75%,175.75,22476504.75,21447979.5,19730853.75,19159567.5,15762301.0,11869231.0,9826053.75,8817329.0,430425.75,238.93325,1.01695,0.28
max,234.0,1425887337.0,1424929781.0,1393715448.0,1348191368.0,1264099069.0,1153704252.0,982372466.0,822534450.0,17098242.0,23172.2667,1.0691,17.88


In [18]:
df.describe(include = object).T.style.set_properties(**{"background-color": "#006837","color":"#e9c46a","border": "1.5px solid black"})

Unnamed: 0,count,unique,top,freq
CCA3,234,234,AFG,1
Country/Territory,234,234,Afghanistan,1
Capital,234,234,Kabul,1
Continent,234,6,Africa,57


In [19]:
print(f'Null values of the dataset:')
df.isna().sum().to_frame().T.style.set_properties(**{"background-color": "#006837","color":"#e9c46a","border": "1.5px solid black"})

Null values of the dataset:


Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Insights:
- There is no missing value in this dataset.
- We will encode the categorical features into numerical form later.

4. Custom palette

In [20]:
sns.set_style('white')
sns.set(rc={'axes.facecolor': '#d5ce98', 'figure.facecolor': '#d5ce98'})
sns.set_context('poster', font_scale = .7)
palette = ["#006837","#1A9850","#66BD63","#A6D96A","#D9EF8B","#FFFFBF","#FEE08B","#FDAE61","#F46D43","#D73027","#A50026"]
palette_cmap = ["#A50026","#D73027","#F46D43","#FDAE61","#FEE08B","#FFFFBF","#D9EF8B","#A6D96A","#66BD63","#1A9850","#006837"]

5. Analysis

In [None]:
print(f"Let's have a look on the population:")
_, axs = plt.subplots(2, 1, figsize = (20, 16))
plt.tight_layout(pad = 7.0)