In [1]:
import pandas as pd
import math
pd.set_option('display.max_rows', None)

In [2]:
population_path = "Resources/world_population.csv"

In [3]:
population_df = pd.read_csv(population_path)
population_df.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [4]:
# Drop the columns of the data that will not be used
population_df = population_df.drop(columns={"Rank","CCA3","Capital","Continent","Area (km²)", "Density (per km²)"},axis=1)
population_df.head()

Unnamed: 0,Country/Territory,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Growth Rate,World Population Percentage
0,Afghanistan,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,1.0257,0.52
1,Albania,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,0.9957,0.04
2,Algeria,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,1.0164,0.56
3,American Samoa,44273,46189,51368,54849,58230,47818,32886,27075,0.9831,0.0
4,Andorra,79824,77700,71746,71519,66097,53569,35611,19860,1.01,0.0


In [5]:
population_df.dtypes

Country/Territory               object
2022 Population                  int64
2020 Population                  int64
2015 Population                  int64
2010 Population                  int64
2000 Population                  int64
1990 Population                  int64
1980 Population                  int64
1970 Population                  int64
Growth Rate                    float64
World Population Percentage    float64
dtype: object

In [6]:
population_df.rename(columns = {"Country/Territory":"Country"}, inplace = True)
population_df.head()

Unnamed: 0,Country,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Growth Rate,World Population Percentage
0,Afghanistan,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,1.0257,0.52
1,Albania,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,0.9957,0.04
2,Algeria,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,1.0164,0.56
3,American Samoa,44273,46189,51368,54849,58230,47818,32886,27075,0.9831,0.0
4,Andorra,79824,77700,71746,71519,66097,53569,35611,19860,1.01,0.0


In [7]:
# Filtering of population data based upon threshold of 20 Million
cleanpopulation = population_df[population_df["2022 Population"] >= 20000000]
cleanpopulation

Unnamed: 0,Country,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Growth Rate,World Population Percentage
0,Afghanistan,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,1.0257,0.52
2,Algeria,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,1.0164,0.56
5,Angola,35588987,33428485,28127721,23364185,16394062,11828638,8330047,6029700,1.0315,0.45
8,Argentina,45510318,45036032,43257065,41100123,37070774,32637657,28024803,23842803,1.0052,0.57
11,Australia,26177413,25670051,23820236,22019168,19017963,17048003,14706322,12595034,1.0099,0.33
16,Bangladesh,171186372,167420951,157830000,148391139,129193327,107147651,83929765,67541860,1.0108,2.15
27,Brazil,215313498,213196304,205188205,196353492,175873720,150706446,122288383,96369875,1.0046,2.7
31,Burkina Faso,22673762,21522626,18718019,16116845,11882888,9131361,6932967,5611666,1.0259,0.28
34,Cameroon,27914536,26491087,23012646,19878036,15091594,11430520,8519891,6452787,1.0263,0.35
35,Canada,38454327,37888705,35732126,33963412,30683313,27657204,24511510,21434577,1.0078,0.48


In [8]:
# Updating cell value of United States to USA in order to merge with other DF
cleanpopulation.at[221,"Country"]="USA"
cleanpopulation

Unnamed: 0,Country,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Growth Rate,World Population Percentage
0,Afghanistan,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,1.0257,0.52
2,Algeria,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,1.0164,0.56
5,Angola,35588987,33428485,28127721,23364185,16394062,11828638,8330047,6029700,1.0315,0.45
8,Argentina,45510318,45036032,43257065,41100123,37070774,32637657,28024803,23842803,1.0052,0.57
11,Australia,26177413,25670051,23820236,22019168,19017963,17048003,14706322,12595034,1.0099,0.33
16,Bangladesh,171186372,167420951,157830000,148391139,129193327,107147651,83929765,67541860,1.0108,2.15
27,Brazil,215313498,213196304,205188205,196353492,175873720,150706446,122288383,96369875,1.0046,2.7
31,Burkina Faso,22673762,21522626,18718019,16116845,11882888,9131361,6932967,5611666,1.0259,0.28
34,Cameroon,27914536,26491087,23012646,19878036,15091594,11430520,8519891,6452787,1.0263,0.35
35,Canada,38454327,37888705,35732126,33963412,30683313,27657204,24511510,21434577,1.0078,0.48


In [9]:
cleanpopulation.to_csv("Resources/UpdatedWorldPopulation.csv")