---
title: "Countries and their Economic status of their citizens as of 2023, based on their mean daily household per capita"
format: dashboard
---

In [None]:
# import neccessary packages
import pandas as pd
import plotly.express as px
from itables import show
import matplotlib.pyplot as plt
import re
import numpy as np
import country_converter as coco

In [None]:
# Define a varible for top countries
TOP = 20

In [None]:
# import the data
pop_data = pd.read_csv("data/pop.csv")
average_daily_income = pd.read_csv("data/mincpcap_cppp.csv")

In [None]:
# Define a function to convert a string to numeric, replacing the letters with their appropriate values (thousands(k), million(m) and billion(b))
def replace_letters_in_pop(string: str):
    """Replace letters that are commonly used to write figures in their short forms.
    For example, 1000 = 1k"""
    string = string.upper()  # convert the string to upper case
    # extract the letter from the string
    letter = re.search("[BKM]$", string)
    if letter:
        letter_conversion_figure = {"K": 1000, "M": 1000000, "B": 1000000000}
        char = letter.group(0)
        return float(string.replace(char, "")) * letter_conversion_figure[char]
    else:
        return float(string)


replace_letters_in_pop_vec = np.vectorize(replace_letters_in_pop)

In [None]:
# Melt the data and query for the year 2023

# 1. population dataset
pop_data_long = pd.melt(
    pop_data,
    id_vars="country",
    var_name="year",
    value_name="population",
).query("year == '2023'")

# 2. average_daily_inocme dataset
average_daily_income_long = pd.melt(
    average_daily_income,
    id_vars="country",
    var_name="year",
    value_name="average_daily_income",
).query("year == '2023'")

In [None]:
# Add country codes to each of the datasets
pop_data_long["country_code"] = coco.convert(
    names=pop_data_long["country"], to="ISO3", not_found=None
)
average_daily_income_long["country_code"] = coco.convert(
    names=average_daily_income_long["country"], to="ISO3", not_found=None
)

In [None]:
# check for missing values in the datasets and drop them.
if average_daily_income_long.isnull().sum().any():
    average_daily_income_long = average_daily_income_long.dropna()

if pop_data_long.isnull().sum().any():
    pop_data_long = pop_data_long.dropna()

In [None]:
# Convert the year in the average_daily_inocme_long dataset to numeric
average_daily_income_long["year"] = pd.to_numeric(average_daily_income_long["year"])

# Also, do the same for the pop_data_long dataset
pop_data_long["year"] = pd.to_numeric(pop_data_long["year"])

In [None]:
# Sort countries according to their average daily income from highest to lowest and pick the first 20 countries
average_daily_income_long = average_daily_income_long.sort_values(
    by="average_daily_income", ascending=False
)

In [None]:
# The values of the population in the pop_data_long are not numeric. They have short forms like K,B and M. Replace them appropriately
pop_data_long["population"] = replace_letters_in_pop_vec(pop_data_long["population"])

In [None]:
# Drop the year columns in both datasets

pop_data_long = pop_data_long.drop(columns="year")
average_daily_income_long = average_daily_income_long.drop(columns="year")

In [None]:
# Merge the two datasets
final_dataset = pd.merge(
    left=average_daily_income_long,
    right=pop_data_long,
    on=["country", "country_code"],
    how="inner",
)

# Add column for total income person in a country.
# average income multiplied by population
final_dataset["total_income"] = (
    final_dataset["population"] * final_dataset["average_daily_income"]
)
# Pick the top TOP countries
final_dataset = final_dataset

# Home

Economic status, for this purpose, is defined as the ability of a household to access household necessities with financial implications.

Therefore, the higher the the household per capita, the higher the econimic status and vice versa.

# Country populations
## Row {height="30%"}
### Column {width="50%"}
::: {.valuebox title="Highest population" color="red" icon="arrow-up"}
`{python} max(pop_data_long["population"])`

`{python} pop_data_long.loc[pop_data_long["population"].idxmax()]["country"]`
:::

### Column {width="50%"}
::: {.valuebox title="Lowest population" color="blue" icon="arrow-down"}

`{python} min(pop_data_long["population"])`

:::
## Row {height="70%"}
### column {.tabset}
#### Top `{python} TOP` Countries with highest population


In [None]:
px.bar(
    pop_data_long.sort_values(by="population", ascending=False).head(TOP),
    x="country",
    y="population",
    labels={"country": "Country", "population": "Population (Billion)"},
    text_auto=True,
    color="country",
    title=f"Top {TOP} countries with highest population",
)

#### A map of Countries with their population
##### Row
###### Column {width="80%"}


In [None]:
px.choropleth(
    pop_data_long,
    locations="country_code",
    color="population",
    hover_name="country",
    color_continuous_scale="reds",
)

###### Column {width="20%"}

India and China are extremely populated compared to other countries.

# Average Daily Income
## Row {height="30%"}
### Column
::: {.valuebox title="Highest Average Daily Income (USD)" color="red" icon="arrow-up"}

`{python} max(average_daily_income_long["average_daily_income"])`

`{python} average_daily_income_long.loc[average_daily_income_long["average_daily_income"].idxmax()]["country"]`

:::

### Column
::: {.valuebox title="Lowest Average Daily Income (USD)" color="blue" icon="arrow-down"}

`{python} min(average_daily_income_long["average_daily_income"])`

:::
## Row {height="70%"}
### column {.tabset} 
#### A map showing average daily income


In [None]:
px.choropleth(
    average_daily_income_long,
    locations="country_code",
    color="average_daily_income",
    color_continuous_scale="reds",
    title="A map showing mean daily household per capita (USD)",
)

#### Top `{python} TOP` countries with highest mean daily household per capita


In [None]:
px.histogram(
    average_daily_income_long.sort_values(
        by="average_daily_income", ascending=False
    ).head(TOP),
    x="country",
    y="average_daily_income",
    color="country",
    text_auto=True,
    title=f"Top {TOP} countries with highest mean daily household per capita (USD)",
)

# Does the size of the population have an effect on average daily income?

## Column 
### Relationship between population and household per capita
#### Row {height="90%"}
##### Column {width="60%"}
###### The size of the population and mean daily household per capita

In [None]:
px.scatter(
    final_dataset,
    x="average_daily_income",
    y="population",
    color="country",
    labels={
        "average_daily_income": "Average Daily Income ($)",
        "population": "Population in Billions",
    },
    title="Relationship between the size of the population and daily household per capita",
)

##### Column {width="40"}
**Explanation**

The size of the population is not a significant factor that affects the average daily income

###### The size of the population and average daily income


In [None]:
px.scatter(
    final_dataset,
    x="population",
    y="total_income",
    color="country",
    labels={
        "total_income": "Total Daily Income ($)",
        "population": "Population in Billions",
    },
    title="Relationship between the size of the population and total daily household per capita",
)

# Data

## Row
### Column {.tabset}
#### Population Data
##### Column {.tabset}
###### Original Data


In [None]:
show(pop_data, buttons="donwload", footer=True)

###### Cleaned Data


In [None]:
show(pop_data_long, buttons="donwload", footer=True)

##### Average Daily Income  Data
#### Population Data
##### Column {.tabset}
###### Original Data


In [None]:
show(average_daily_income, buttons="donwload", footer=True)

###### Cleaned Data


In [None]:
show(average_daily_income_long, buttons="download", footer=True)