In [130]:
# load all librires pandas, seaborn, matplotlib, numpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [131]:
# load data set worldpop
df = pd.read_csv('worldpop.csv')

In [132]:
# quick look into data
df.head()

Unnamed: 0,Rank,Country,Population (2024),Yearly Change,Net Change,Density (P/Km²),Land Area (Km²),Migrants (net),Fert. Rate,Med. Age,Urban Pop %
0,1,India,1450935791,0.0089,12866195,488,2973190,-630830,2.0,28,37%
1,2,China,1419321278,-0.0023,-3263655,151,9388211,-318992,1.0,40,66%
2,3,United States,345426571,0.0057,1949236,38,9147420,1286132,1.6,38,82%
3,4,Indonesia,283487931,0.0082,2297864,156,1811570,-38469,2.1,30,59%
4,5,Pakistan,251269164,0.0152,3764669,326,770880,-1401173,3.5,20,34%


In [133]:
# quick analysis
df.describe()

Unnamed: 0,Rank,Yearly Change,Fert. Rate,Med. Age
count,234.0,234.0,234.0,234.0
mean,117.5,0.009424,2.332051,31.679487
std,67.694165,0.013671,1.163002,9.810427
min,1.0,-0.0504,0.7,14.0
25%,59.25,0.0001,1.5,23.0
50%,117.5,0.0086,2.0,32.5
75%,175.75,0.01875,2.975,40.0
max,234.0,0.0507,6.0,59.0


## Basic Population Insights

1. Find the country with the largest population in 2024.
2. Find the country with the smallest population in 2024.
3. Rank the top 10 most populated countries.
4. Rank the bottom 10 least populated countries.
5. Calculate the average population across all countries.
6. Check the total world population (sum of all countries).
7. Identify the median population (middle country when sorted).

In [134]:
# quick look into data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Rank               234 non-null    int64  
 1   Country            234 non-null    object 
 2   Population (2024)  234 non-null    object 
 3   Yearly Change      234 non-null    float64
 4   Net Change         234 non-null    object 
 5   Density (P/Km²)    234 non-null    object 
 6   Land Area (Km²)    234 non-null    object 
 7   Migrants (net)     234 non-null    object 
 8   Fert. Rate         234 non-null    float64
 9   Med. Age           234 non-null    int64  
 10  Urban Pop %        234 non-null    object 
dtypes: float64(2), int64(2), object(7)
memory usage: 20.2+ KB


In [135]:
# 1. Find the country with the largest population in 2024.
df.loc[df['Population (2024)'].idxmax()]


Rank                                 199
Country              Antigua and Barbuda
Population (2024)                 93,772
Yearly Change                     0.0049
Net Change                           456
Density (P/Km²)                      213
Land Area (Km²)                      440
Migrants (net)                         9
Fert. Rate                           1.6
Med. Age                              36
Urban Pop %                          28%
Name: 198, dtype: object

In [136]:
# 2.Find the country with the smallest population in 2024.
df.loc[df['Population (2024)'].idxmin()]

Rank                       160
Country               Djibouti
Population (2024)    1,168,722
Yearly Change           0.0137
Net Change              15,778
Density (P/Km²)             50
Land Area (Km²)         23,180
Migrants (net)             -11
Fert. Rate                 2.6
Med. Age                    25
Urban Pop %                71%
Name: 159, dtype: object

In [137]:
# 3. Rank the top 10 most populated countries
df.nlargest(10,'Population (2024)')[['Country','Population (2024)']]

TypeError: Column 'Population (2024)' has dtype object, cannot use method 'nlargest' with this dtype

In [None]:
# 4. Rank the bottom 10 least populated countries.
df.nsmallest(10,'Population (2024)')[['Country','Population (2024)']]

Unnamed: 0,Country,Population (2024)
233,Holy See,496
232,Niue,1819
231,Tokelau,2506
230,Falkland Islands,3470
229,Montserrat,4389
228,Saint Helena,5237
227,Saint Pierre & Miquelon,5628
226,Tuvalu,9646
225,Saint Barthelemy,11258
224,Wallis & Futuna,11277


In [None]:
# 5. Calculate the average population across all countries.
aveg = df['Population (2024)'].mean()
print('The average population across all countries is',aveg)    

The average population across all countries is 34874074.45726496


In [None]:
# 6. Check the total world population (sum of all countries)
world_pop = df['Population (2024)'].sum()
print('The total world population is',world_pop)

The total world population is 8160533423


In [None]:
# convert figure into billions
world_pop = world_pop/1000000000
print('The total world population is',world_pop,'billions')


The total world population is 8.160533423 billions


In [None]:
# 7.Identify the median population (middle country when sorted).
median = df['Population (2024)'].median()
print('The median population is',median)

The median population is 5.6150635e-23


## 2. Yearly Change & Growth
8. Find the country with the highest yearly growth (%).
9. Find the country with the lowest yearly growth (%) (could even be negative).
10. Compare absolute net change in population across countries.
11. Identify countries with population decline.
12. Check if large populations always have higher net change.
13. Calculate the average yearly growth (%) across all countries.

In [None]:
# 8. Find the country with the highest yearly growth (%)
df.loc[df['Yearly Change'].idxmax()]

Country                  Chad
Population (2024)         0.0
Yearly Change           5.07%
Net Change           0.980059
Density (P/Km²)            16
Land Area (Km²)       1259200
Migrants (net)         204040
Fert. Rate                6.0
Med. Age                   16
Urban Pop %               22%
Name: 64, dtype: object

In [None]:
# 9. Find the country with the lowest yearly growth (%)
df.loc[df['Yearly Change'].idxmin()]

Country              Isle of Man
Population (2024)            0.0
Yearly Change             -0.01%
Net Change             -0.000005
Density (P/Km²)              148
Land Area (Km²)              570
Migrants (net)               177
Fert. Rate                   1.5
Med. Age                      46
Urban Pop %                  56%
Name: 200, dtype: object

In [None]:
# 10. Compare absolute net change in population across countries.
top1 = df.loc[df['Net Change'].idxmin()]

Rank                         32
Country                   Spain
Population (2024)    47,910,526
Yearly Change               0.0
Net Change               -1,053
Density (P/Km²)              96
Land Area (Km²)         498,800
Migrants (net)          111,674
Fert. Rate                  1.2
Med. Age                     45
Urban Pop %                 79%
Name: 31, dtype: object

In [157]:
# 11. Identify countries with population decline.
df.loc[df['Net Change'].idxmin()]

Rank                         32
Country                   Spain
Population (2024)    47,910,526
Yearly Change               0.0
Net Change               -1,053
Density (P/Km²)              96
Land Area (Km²)         498,800
Migrants (net)          111,674
Fert. Rate                  1.2
Med. Age                     45
Urban Pop %                 79%
Name: 31, dtype: object

In [None]:
# 12. Calculate the average yearly growth (%) across all countries.
aveg = df['Yearly Change'].mean()
print('The average yearly growth (%) across all countries is',aveg)

The average yearly growth (%) across all countries is 0.009423931623931623
