In [1]:
import pandas as pd
import plotly.express as px
import pycountry_convert as pc

In [2]:
def country_to_continent(country):
    try:
        country_code = pc.country_name_to_country_alpha2(country, cn_name_format="default")
        continent_name = pc.convert_continent_code_to_continent_name(pc.country_alpha2_to_continent_code(country_code))
    except:
        continent_name="Unknown"
    
    return continent_name

In [3]:
df=pd.read_csv('population_total.csv',encoding="Windows-1252")
df.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
0,Afghanistan,3.28M,3.28M,3.28M,3.28M,3.28M,3.28M,3.28M,3.28M,3.28M,...,76.6M,76.4M,76.3M,76.1M,76M,75.8M,75.6M,75.4M,75.2M,74.9M
1,Angola,1.57M,1.57M,1.57M,1.57M,1.57M,1.57M,1.57M,1.57M,1.57M,...,168M,170M,172M,175M,177M,179M,182M,184M,186M,188M
2,Albania,400k,402k,404k,405k,407k,409k,411k,413k,414k,...,1.33M,1.3M,1.27M,1.25M,1.22M,1.19M,1.17M,1.14M,1.11M,1.09M
3,Andorra,2650,2650,2650,2650,2650,2650,2650,2650,2650,...,63k,62.9k,62.9k,62.8k,62.7k,62.7k,62.6k,62.5k,62.5k,62.4k
4,United Arab Emirates,40.2k,40.2k,40.2k,40.2k,40.2k,40.2k,40.2k,40.2k,40.2k,...,12.3M,12.4M,12.5M,12.5M,12.6M,12.7M,12.7M,12.8M,12.8M,12.9M


In [4]:
df=df.set_index("country")

In [5]:
df=df.iloc[:, ::10]


In [6]:
df=df.iloc[:, :-8]
df=df.replace({"k":"*1e3", "M":"*1e6", "B":"*1e9"}, regex=True).map(pd.eval).astype(int)


In [7]:

df["Continent"]=df.index.map(country_to_continent)


In [8]:
df.drop(df.loc[df['Continent']=="Unknown"].index, inplace=True)
df

Unnamed: 0_level_0,1800,1810,1820,1830,1840,1850,1860,1870,1880,1890,...,1940,1950,1960,1970,1980,1990,2000,2010,2020,Continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,3280000,3280000,3290000,3430000,3590000,3750000,3940000,4139999,4350000,4580000,...,6700000,7750000,9000000,11200000,13400000,12400000,20800000,29200000,38900000,Asia
Angola,1570000,1570000,1580000,1810000,2100000,2420000,2640000,2870000,3060000,3240000,...,3810000,4550000,5450000,5890000,8340000,11800000,16399999,23400000,32900000,Africa
Albania,400000,418000,437000,457000,478000,501000,551000,606000,667000,735000,...,1110000,1260000,1640000,2150000,2680000,3290000,3130000,2950000,2880000,Europe
Andorra,2650,2650,2670,2850,3060,3280,3510,3760,4030,4320,...,5510,6200,13400,24300,36100,54500,65400,84500,77300,Europe
United Arab Emirates,40200,40200,40200,40200,40200,40200,39900,39800,41800,44100,...,62900,69600,92400,235000,1020000,1830000,3130000,8550000,9890000,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Samoa,47300,47200,47100,47100,47000,46700,43700,40700,37900,35300,...,67700,82100,109000,143000,156000,163000,174000,186000,198000,Oceania
Yemen,2590000,2590000,2600000,2640000,2690000,2740000,2780000,2810000,2850000,2890000,...,4220000,4660000,5320000,6190000,7940000,11700000,17400000,23200000,29800000,Asia
South Africa,1450000,1500000,1580000,1930000,2410000,2990000,3370000,3780000,4240000,4750000,...,11200000,13600000,17100000,22100000,28600000,36800000,45000000,51200000,59300000,Africa
Zambia,747000,869000,1010000,1180000,1370000,1580000,1600000,1600000,1610000,1640000,...,1930000,2310000,3070000,4179999,5850000,8039999,10400000,13600000,18400000,Africa


In [9]:
df.reset_index().to_csv('pop.csv', index=False) 

In [10]:

fig = px.pie(df, names='Continent', values='2020', title="Population Distribution by Continent in 2020",hole=0.5)

fig.show()


In [11]:

df_grouped = df.groupby("Continent")[["1950", "2020"]].sum()

fig = px.bar(df_grouped, x=df_grouped.index, y=["1950","2020"], barmode="group",
            title=f"Continent Population: 1950 vs 2020",height=400)
fig.show()

In [12]:
import plotly.express as px

fig = px.choropleth(df, locations=df.index, locationmode='country names', 
                    color='2020', hover_name=df.index, 
                    title="World Population Distribution")
fig.show()

In [13]:
latest_years = df.columns[-10:-1]  

top_5_countries = df[latest_years[-1]].nlargest(5).index  

df_filtered = df.loc[top_5_countries, latest_years]

df_long = df_filtered.reset_index().melt(id_vars="country", var_name="Year", value_name="Population")

fig = px.line(df_long, x="Year", y="Population", color="country", markers=True,title="Population Trends of Top 5 Countries Over the Last Few Years")

fig.show()

In [14]:
top_10_countries = df["2020"].nlargest(10).index  

df_filtered = df.loc[top_10_countries, ["2020"]]
df_filtered.index
fig = px.bar(df_filtered, x="2020", y=df_filtered.index, orientation="h", text="2020", title=f"Top 10 Most Populated Countries in {"2020"}", color=df_filtered.index)
fig.update_layout(yaxis=dict(showticklabels=False))

fig.show()