**What are the basic statistics and structure of the dataset?**

In [1]:
import numpy as np
import pandas as pd
import plotly as px

# Load the dataset
df = pd.read_csv('/kaggle/input/world-population-statistics-2023/world_population_data.csv')

# Display basic information about the dataset
print("Basic Statistics and Structure of the Dataset:")
print(df.info())

# Display summary statistics of numerical columns
print("\nSummary Statistics:")
print(df.describe())


Basic Statistics and Structure of the Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   rank              234 non-null    int64  
 1   cca3              234 non-null    object 
 2   country           234 non-null    object 
 3   continent         234 non-null    object 
 4   2023 population   234 non-null    int64  
 5   2022 population   234 non-null    int64  
 6   2020 population   234 non-null    int64  
 7   2015 population   234 non-null    int64  
 8   2010 population   234 non-null    int64  
 9   2000 population   234 non-null    int64  
 10  1990 population   234 non-null    int64  
 11  1980 population   234 non-null    int64  
 12  1970 population   234 non-null    int64  
 13  area (km²)        234 non-null    float64
 14  density (km²)     234 non-null    int64  
 15  growth rate       234 non-null    object 
 1

**How many countries are represented in the dataset, and what continents do they belong to?**

In [2]:
# Count the number of unique countries and continents
num_countries = df['country'].nunique()
unique_continents = df['continent'].unique()

print(f"\nNumber of Countries: {num_countries}")
print(f"Continents Represented: {', '.join(unique_continents)}")



Number of Countries: 234
Continents Represented: Asia, North America, Africa, South America, Europe, Oceania


**What are the top 5 countries with the highest population in 2023?**

In [3]:
# Sort the dataframe by 2023 population and select the top 5
top_countries_2023 = df.sort_values(by='2023 population', ascending=False).head(5)

print("\nTop 5 Countries with Highest Population in 2023:")
print(top_countries_2023[['rank', 'country', '2023 population']])



Top 5 Countries with Highest Population in 2023:
   rank        country  2023 population
0     1          India       1428627663
1     2          China       1425671352
2     3  United States        339996563
3     4      Indonesia        277534122
4     5       Pakistan        240485658


**Create a histogram for the 2023 population distribution**

In [4]:
import plotly.express as px

# Assuming 'df' is your DataFrame with a column named '2023 population'
fig = px.histogram(df, x='2023 population', nbins=20, title='2023 World Population Distribution')
fig.show()

**How has the population of the top 5 most populous countries changed from 1970 to 2023?**

In [5]:
# Extract data for the top 5 most populous countries
top_5_countries = df.head(5)

# Create a line plot to show population changes over time for the top 5 countries
fig = px.line(top_5_countries, x='country', y=['1970 population', '1980 population', '1990 population',
                                               '2000 population', '2010 population', '2015 population',
                                               '2020 population', '2023 population'],
              title='Population Changes in Top 5 Most Populous Countries (1970-2023)')
fig.show()


**What is the relationship between the population growth rate and the population density?**

In [6]:
# Create a scatter plot to show the relationship between population growth rate and density
fig = px.scatter(df, x='growth rate', y='density (km²)', title='Population Growth Rate vs Density')
fig.show()


**How does the world population percentage vary by continent in 2023?**

In [7]:
# Create a bar chart to show world population percentage by continent in 2023
fig = px.bar(df, x='continent', y='world percentage', title='World Population Percentage by Continent (2023)')
fig.show()


**What is the correlation between the country's area and its population density in 2023?**

In [8]:
# Create a scatter plot to visualize the correlation between area and population density in 2023
fig = px.scatter(df, x='area (km²)', y='density (km²)', title='Correlation: Area vs Population Density (2023)')
fig.show()


**How does the population density vary among continents in 2023?**

In [9]:
# Create a box plot to show the distribution of population density among continents in 2023
fig = px.box(df, x='continent', y='density (km²)', title='Population Density Distribution by Continent (2023)')
fig.show()

**What is the relationship between a country's area and its population size in 2023?**

In [10]:
# Create a scatter plot to visualize the relationship between a country's area and population size in 2023
fig = px.scatter(df, x='area (km²)', y='2023 population', title='Relationship: Area vs Population Size (2023)')
fig.show()


**What is the proportion of each continent's population to the total world population in 2023?**

In [11]:
# Create a pie chart to display the proportion of each continent's population to the total world population in 2023
continent_population_2023 = df.groupby('continent')['2023 population'].sum().reset_index()
fig = px.pie(continent_population_2023, values='2023 population', names='continent', title='Continent-wise Proportion of World Population (2023)')
fig.show()


**How has the population density changed over time for the top 5 most populous countries?**

In [12]:
# Extract data for the top 5 most populous countries
top_5_countries = df.head(5)

# Create a line plot to show the changes in population density over time for the top 5 countries
fig = px.line(top_5_countries, x='country', y=['density (km²)'], title='Population Density Changes in Top 5 Most Populous Countries (1970-2023)')
fig.show()


**What is the distribution of the population growth rate for the year 2023?**

In [13]:
# Create a histogram for the distribution of population growth rate in 2023
fig = px.histogram(df, x='growth rate', nbins=20, title='Distribution of Population Growth Rate (2023)')
fig.show()


**How has the world population percentage of the top 10 most populous countries changed in the last decade?**

In [14]:
# Extract data for the top 10 most populous countries
top_10_countries = df.nlargest(10, '2023 population')

# Create a line plot to show the changes in world population percentage for the top 10 countries (2013-2023)
fig = px.line(top_10_countries, x='country', y='world percentage', title='World Population Percentage Changes for Top 10 Countries (2013-2023)')
fig.show()


**What is the average population density for each continent in 2023?**

In [15]:
# Calculate the average population density for each continent in 2023
average_density_by_continent = df.groupby('continent')['density (km²)'].mean().reset_index()

# Create a bar chart to display the average population density for each continent in 2023
fig = px.bar(average_density_by_continent, x='continent', y='density (km²)', title='Average Population Density by Continent (2023)')
fig.show()


**What is the relationship between a country's population size and its growth rate in 2023?**

In [16]:
# Create a scatter plot to visualize the relationship between a country's population size and growth rate in 2023
fig = px.scatter(df, x='2023 population', y='growth rate', title='Relationship: Population Size vs Growth Rate (2023)')
fig.show()


**How does the population density correlate with the country's area in 2023?**

In [17]:
# Create a scatter plot with trendline to show the correlation between population density and country's area in 2023
fig = px.scatter(df, x='area (km²)', y='density (km²)', trendline='ols', title='Correlation: Area vs Population Density (2023)')
fig.show()


**What is the overall growth rate distribution for the year 2023?**

In [18]:
# Create a violin plot to show the distribution of growth rates in 2023
fig = px.violin(df, y='growth rate', box=True, title='Distribution of Growth Rates in 2023')
fig.show()


**How has the population density changed over time for the top 5 most densely populated countries?**

In [19]:
# Extract data for the top 5 most densely populated countries
top_5_dense_countries = df.nlargest(5, 'density (km²)')

# Create a line plot to show the changes in population density over time for the top 5 densely populated countries
fig = px.line(top_5_dense_countries, x='country', y=['density (km²)'], title='Population Density Changes in Top 5 Most Densely Populated Countries (1970-2023)')
fig.show()


**What is the correlation between a country's area and its population size for the year 2023?**

In [20]:
# Create a scatter plot to visualize the correlation between a country's area and population size in 2023
fig = px.scatter(df, x='area (km²)', y='2023 population', title='Correlation: Area vs Population Size (2023)')
fig.show()


**How does the world population growth rate vary by continent in the last decade (2013-2023)?**

In [21]:
# Create a box plot to show the variation of world population growth rate by continent in the last decade (2013-2023)
fig = px.box(df, x='continent', y='growth rate', title='World Population Growth Rate Variation by Continent (2013-2023)')
fig.show()


**What is the proportion of each continent's area to the total world area?**

In [22]:
# Calculate the proportion of each continent's area to the total world area
continent_area_proportion = df.groupby('continent')['area (km²)'].sum() / df['area (km²)'].sum()

# Create a pie chart to display the proportion of each continent's area to the total world area
fig = px.pie(continent_area_proportion, names=continent_area_proportion.index, values=continent_area_proportion.values, title='Continent-wise Proportion of World Area')
fig.show()


**What is the relationship between population size, area, and density for the year 2023?**

In [23]:
# Create a pair plot to visualize the relationships between population size, area, and density in 2023
pair_plot_data = df[['2023 population', 'area (km²)', 'density (km²)']]
fig = px.scatter_matrix(pair_plot_data, title='Pair Plot: Population Size, Area, and Density (2023)')
fig.show()


**How does the growth rate, area, and population density vary for the top 10 most populous countries in 2023?**

In [24]:
# Extract data for the top 10 most populous countries
top_10_countries = df.nlargest(10, '2023 population')

# Select relevant columns for the pair plot
pair_plot_top_10_data = top_10_countries[['growth rate', 'area (km²)', 'density (km²)']]

# Create a pair plot to show the relationships between growth rate, area, and population density for the top 10 countries
fig = px.scatter_matrix(pair_plot_top_10_data, title='Pair Plot: Growth Rate, Area, and Density for Top 10 Countries (2023)')
fig.show()
