**Loading the Dataset**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv('/kaggle/input/world-population-live-dataset/World Population Live Dataset.csv')
df

**Data Cleaning and Preparation**

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df['CCA3']=df['CCA3'].fillna('')

In [None]:
df.describe()

In [None]:
df['World Population Percentage'] = df['World Population Percentage'].str.replace('%', '')
df['World Population Percentage'] = df['World Population Percentage'].astype('float')
df.head()

**Asking and Answering Question and EDA visualization**

**1.whatis the ratio of the population of the 10 most populous countries to the world population**

In [None]:
palette_color = sns.color_palette('bright')
fig = plt.figure(figsize = (7,5))
explode = [0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
ax = plt.pie(df['World Population Percentage'][:10], labels = df['Name'][:10], colors=palette_color, explode=explode)
plt.xticks(rotation=45)
plt.title('The ratio of the population of the 10 most populous countries to the world population')
plt.show()

**2.What is the distribution of population density across countries in 2022?**

In [None]:
# Plot histogram for population density
plt.figure(figsize=(10, 6))
plt.hist(df['Density (per km²)'], bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of Population Density Across Countries (2022)')
plt.xlabel('Population Density (per km²)')
plt.ylabel('Number of Countries')
plt.grid(True)
plt.show()


**3.What is the relationship between area and population density of countries?**

In [None]:
# Plot scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(df['Area (km²)'], df['Density (per km²)'], alpha=0.6, color='orange')
plt.title('Relationship Between Area and Population Density of Countries')
plt.xlabel('Area (km²)')
plt.ylabel('Population Density (per km²)')
plt.xscale('log')  # Optional: Log scale for better visualization if area varies greatly
plt.grid(True)
plt.show()


**4. How has the population growth rate changed over the years for the top 5 most populous countries?**

In [None]:
# Calculate average growth rate over decades
df['AvgGrowthRate'] = df[['1980', '1990', '2000', '2010', '2020', '2022']].pct_change(axis='columns').mean(axis=1)

# Select the top 5 most populous countries
top_countries_growth = df.nlargest(5, '2022')

# Plot growth rate trend for top 5 countries
plt.figure(figsize=(12, 6))
for country in top_countries_growth['Name']:
    plt.plot(df.columns[2:10], df[df['Name'] == country].iloc[0, 2:10].pct_change().cumsum(), marker='o', label=country)
plt.title('Population Growth Rate Change Over the Decades (1980-2022)')
plt.xlabel('Year')
plt.ylabel('Cumulative Growth Rate')
plt.legend()
plt.grid(True)
plt.show()


**5.What is the percentage of world population residing in the top 10 most populous countries?**

In [None]:
# Calculate percentage of world population
top_10_population = df.nlargest(10, '2022')
top_10_population['World Population Percentage'] = (top_10_population['2022'] / df['2022'].sum()) * 100

# Plot bar chart
plt.figure(figsize=(12, 6))
plt.bar(top_10_population['Name'], top_10_population['World Population Percentage'], color='purple')
plt.title('Top 10 Countries by World Population Percentage (2022)')
plt.xlabel('Country')
plt.ylabel('Percentage of World Population')
plt.xticks(rotation=45, ha='right')
plt.show()


**6.Which countries have experienced the largest population growth since 1970**

In [None]:
# Calculate absolute population growth
df['Population Growth'] = df['2022'] - df['1970']
largest_growth = df.nlargest(10, 'Population Growth')

# Plot bar chart
plt.figure(figsize=(12, 6))
plt.bar(largest_growth['Name'], largest_growth['Population Growth'], color='teal')
plt.title('Top 10 Countries with Largest Population Growth (1970-2022)')
plt.xlabel('Country')
plt.ylabel('Population Growth')
plt.xticks(rotation=45, ha='right')
plt.show()


**7. What is the correlation between country area and population size in 2022?**

In [None]:
import seaborn as sns

# Plot scatter plot with regression line
plt.figure(figsize=(10, 6))
sns.regplot(x='Area (km²)', y='2022', data=df, scatter_kws={'alpha':0.5}, line_kws={'color':'red'})
plt.title('Correlation Between Country Area and Population Size (2022)')
plt.xlabel('Area (km²)')
plt.ylabel('Population (2022)')
plt.xscale('log')
plt.yscale('log')
plt.grid(True)
plt.show()


**8. What is the comparison of population growth rates between developed and developing countries?**

In [None]:
# Categorize countries into small, medium, and large by area size
df['Area Size'] = pd.cut(df['Area (km²)'], bins=[0, 10000, 100000, float('inf')], labels=['Small', 'Medium', 'Large'])

# Plot violin plot
plt.figure(figsize=(10, 6))
sns.barplot(x='Area Size', y='GrowthRate', data=df, palette='muted')
plt.title('Population Growth Rate Comparison by Country Area Size')
plt.xlabel('Area Size')
plt.ylabel('Growth Rate')
plt.show()


**Conclusion**

The analysis reveals that developing countries like India, Nigeria, and China have experienced significant population growth since 1970, contributing greatly to global population increases, particularly in Asia and Africa. Smaller nations such as Monaco, Singapore, and Bangladesh have extremely high population densities, leading to unique challenges in urban planning and resource management. Countries like India and Nigeria have seen changes in their global population ranks, reflecting shifting demographic patterns and broader trends such as urbanization. Large nations like China and India dominate the global population share, significantly influencing socio-economic dynamics. Meanwhile, diverse growth rates across regions highlight the need for tailored policy interventions, as developing regions exhibit rapid increases, while developed regions face stable or declining population trends.