In [1]:
import pydytuesday

pydytuesday.get_date('2025-08-05')

Trying to fetch README from: https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2025/2025-08-05/readme.md
Successfully fetched README from: https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/main/data/2025/2025-08-05/readme.md
Downloading income_inequality_processed.csv...
Successfully saved income_inequality_processed.csv to /home/serban/Documents/Training/nicegui/datasets/20250805/income_inequality_processed.csv
Downloading income_inequality_raw.csv...
Successfully saved income_inequality_raw.csv to /home/serban/Documents/Training/nicegui/datasets/20250805/income_inequality_raw.csv
Downloading meta.yaml...
Successfully saved meta.yaml to /home/serban/Documents/Training/nicegui/datasets/20250805/meta.yaml
Downloading reduction-in-income-inequality.png...
Successfully saved reduction-in-income-inequality.png to /home/serban/Documents/Training/nicegui/datasets/20250805/reduction-in-income-inequality.png


In [2]:
import polars as pl
# import pandas as pd

# raw_data: pl.DataFrame = pl.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-05/income_inequality_raw.csv")
raw_data = pl.scan_csv("income_inequality_raw.csv", infer_schema_length=None, null_values='NA')


raw_data.describe()

raw_data.collect_schema().names()

['Entity',
 'Code',
 'Year',
 'gini_disposable__age_total',
 'gini_market__age_total',
 'population_historical',
 'owid_region']

In [3]:
# Country codes

country_data = pl.scan_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/refs/heads/master/all/all.csv")

# print(country_data.collect())

country_data.collect_schema().names()

['name',
 'alpha-2',
 'alpha-3',
 'country-code',
 'iso_3166-2',
 'region',
 'sub-region',
 'intermediate-region',
 'region-code',
 'sub-region-code',
 'intermediate-region-code']

In [4]:
country_data.head(10).collect()

name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
str,str,str,i64,str,str,str,str,str,str,str
"""Afghanistan""","""AF""","""AFG""",4,"""ISO 3166-2:AF""","""Asia""","""Southern Asia""","""""","""142""","""034""",""""""
"""Åland Islands""","""AX""","""ALA""",248,"""ISO 3166-2:AX""","""Europe""","""Northern Europe""","""""","""150""","""154""",""""""
"""Albania""","""AL""","""ALB""",8,"""ISO 3166-2:AL""","""Europe""","""Southern Europe""","""""","""150""","""039""",""""""
"""Algeria""","""DZ""","""DZA""",12,"""ISO 3166-2:DZ""","""Africa""","""Northern Africa""","""""","""002""","""015""",""""""
"""American Samoa""","""AS""","""ASM""",16,"""ISO 3166-2:AS""","""Oceania""","""Polynesia""","""""","""009""","""061""",""""""
"""Andorra""","""AD""","""AND""",20,"""ISO 3166-2:AD""","""Europe""","""Southern Europe""","""""","""150""","""039""",""""""
"""Angola""","""AO""","""AGO""",24,"""ISO 3166-2:AO""","""Africa""","""Sub-Saharan Africa""","""Middle Africa""","""002""","""202""","""017"""
"""Anguilla""","""AI""","""AIA""",660,"""ISO 3166-2:AI""","""Americas""","""Latin America and the Caribbea…","""Caribbean""","""019""","""419""","""029"""
"""Antarctica""","""AQ""","""ATA""",10,"""ISO 3166-2:AQ""","""""","""""","""""","""""","""""",""""""
"""Antigua and Barbuda""","""AG""","""ATG""",28,"""ISO 3166-2:AG""","""Americas""","""Latin America and the Caribbea…","""Caribbean""","""019""","""419""","""029"""


In [8]:
country_data_dimensions = ["alpha-3", "region", "sub-region"]

country_data_slim = country_data.select(country_data_dimensions)

country_data_slim.head().collect()

alpha-3,region,sub-region
str,str,str
"""AFG""","""Asia""","""Southern Asia"""
"""ALA""","""Europe""","""Northern Europe"""
"""ALB""","""Europe""","""Southern Europe"""
"""DZA""","""Africa""","""Northern Africa"""
"""ASM""","""Oceania""","""Polynesia"""


In [9]:
data = raw_data.cast({"Year": pl.Int32})\
.rename(lambda column_name: column_name.lower())\
.rename({"gini_disposable__age_total": "gini_after_tax"})\
.rename({"gini_market__age_total": "gini_pre_tax"})\
.filter(pl.col("gini_after_tax").is_not_null() & pl.col("gini_pre_tax").is_not_null())\
.join(other=country_data_slim, left_on="code", right_on="alpha-3", how="left")

data.collect().describe

<bound method DataFrame.describe of shape: (596, 9)
┌─────────────┬──────┬──────┬─────────────┬───┬─────────────┬─────────────┬──────────┬─────────────┐
│ entity      ┆ code ┆ year ┆ gini_after_ ┆ … ┆ population_ ┆ owid_region ┆ region   ┆ sub-region  │
│ ---         ┆ ---  ┆ ---  ┆ tax         ┆   ┆ historical  ┆ ---         ┆ ---      ┆ ---         │
│ str         ┆ str  ┆ i32  ┆ ---         ┆   ┆ ---         ┆ str         ┆ str      ┆ str         │
│             ┆      ┆      ┆ f64         ┆   ┆ i64         ┆             ┆          ┆             │
╞═════════════╪══════╪══════╪═════════════╪═══╪═════════════╪═════════════╪══════════╪═════════════╡
│ Australia   ┆ AUS  ┆ 2012 ┆ 0.326       ┆ … ┆ 22852597    ┆ null        ┆ Oceania  ┆ Australia   │
│             ┆      ┆      ┆             ┆   ┆             ┆             ┆          ┆ and New     │
│             ┆      ┆      ┆             ┆   ┆             ┆             ┆          ┆ Zealand     │
│ Australia   ┆ AUS  ┆ 2014 ┆ 0.337    

In [10]:
z = data.collect()

In [11]:
# get each entity owid region
entity_owid_region = data.select(pl.col("entity"), pl.col("owid_region"))\
.drop_nulls()\
.collect()\


print(entity_owid_region)


shape: (2, 2)
┌───────────────┬───────────────┐
│ entity        ┆ owid_region   │
│ ---           ┆ ---           │
│ str           ┆ str           │
╞═══════════════╪═══════════════╡
│ Costa Rica    ┆ North America │
│ United States ┆ North America │
└───────────────┴───────────────┘


In [12]:
owid_regions = data.unique("code").collect()
print(owid_regions)

shape: (45, 9)
┌─────────────┬──────┬──────┬─────────────┬───┬─────────────┬─────────────┬──────────┬─────────────┐
│ entity      ┆ code ┆ year ┆ gini_after_ ┆ … ┆ population_ ┆ owid_region ┆ region   ┆ sub-region  │
│ ---         ┆ ---  ┆ ---  ┆ tax         ┆   ┆ historical  ┆ ---         ┆ ---      ┆ ---         │
│ str         ┆ str  ┆ i32  ┆ ---         ┆   ┆ ---         ┆ str         ┆ str      ┆ str         │
│             ┆      ┆      ┆ f64         ┆   ┆ i64         ┆             ┆          ┆             │
╞═════════════╪══════╪══════╪═════════════╪═══╪═════════════╪═════════════╪══════════╪═════════════╡
│ Denmark     ┆ DNK  ┆ 2011 ┆ 0.251       ┆ … ┆ 5570801     ┆ null        ┆ Europe   ┆ Northern    │
│             ┆      ┆      ┆             ┆   ┆             ┆             ┆          ┆ Europe      │
│ Germany     ┆ DEU  ┆ 2008 ┆ 0.285       ┆ … ┆ 81110734    ┆ null        ┆ Europe   ┆ Western     │
│             ┆      ┆      ┆             ┆   ┆             ┆             ┆ 

**Which countries have the highest Gini coefficient before taxes?**

**Which countries have the highest Gini coefficient after taxes?**

- compute the country rank for each year for the pre0 and after-tax Gini coefficient respectively

In [13]:
gini_by_country_by_year: pl.LazyFrame = data.with_columns(pl.col("gini_pre_tax").rank("max").over(partition_by="year").alias("rank_gini_pre_tax"))\
    .with_columns(pl.col("gini_after_tax").rank("max").over(partition_by="year").alias("rank_gini_after_tax"))\
        .collect()

print(gini_by_country_by_year)

shape: (596, 11)
┌─────────────┬──────┬──────┬─────────────┬───┬──────────┬─────────────┬─────────────┬─────────────┐
│ entity      ┆ code ┆ year ┆ gini_after_ ┆ … ┆ region   ┆ sub-region  ┆ rank_gini_p ┆ rank_gini_a │
│ ---         ┆ ---  ┆ ---  ┆ tax         ┆   ┆ ---      ┆ ---         ┆ re_tax      ┆ fter_tax    │
│ str         ┆ str  ┆ i32  ┆ ---         ┆   ┆ str      ┆ str         ┆ ---         ┆ ---         │
│             ┆      ┆      ┆ f64         ┆   ┆          ┆             ┆ u32         ┆ u32         │
╞═════════════╪══════╪══════╪═════════════╪═══╪══════════╪═════════════╪═════════════╪═════════════╡
│ Australia   ┆ AUS  ┆ 2012 ┆ 0.326       ┆ … ┆ Oceania  ┆ Australia   ┆ 13          ┆ 17          │
│             ┆      ┆      ┆             ┆   ┆          ┆ and New     ┆             ┆             │
│             ┆      ┆      ┆             ┆   ┆          ┆ Zealand     ┆             ┆             │
│ Australia   ┆ AUS  ┆ 2014 ┆ 0.337       ┆ … ┆ Oceania  ┆ Australia   ┆ 1

**Which countries have the highest shifts in Gini coefficient?**

**Which countries have the lowest shifts in Gini coefficient?**

In [14]:
gini_shifts = data.group_by(pl.col("entity"))\
    .agg(pl.col("gini_pre_tax").min().alias("min_gini_pre_tax"),\
         pl.col("gini_pre_tax").max().alias("max_gini_pre_tax"),\
          pl.col("gini_after_tax").min().alias("min_gini_after_tax"),\
            pl.col("gini_after_tax").max().alias("max_gini_after_tax"))\
    .with_columns((pl.col("max_gini_pre_tax") - pl.col("min_gini_pre_tax")).alias("pre_tax_shift"), (pl.col("max_gini_after_tax") - pl.col("min_gini_after_tax")).alias("after_tax_shift"))\
    .with_columns(pl.col("pre_tax_shift").rank(method="max", descending=True).alias("pre_tax_shift_rank"))\
    .with_columns(pl.col("after_tax_shift").rank(method="max", descending=True).alias("after_tax_shift_rank"))\
    .collect()

print(gini_shifts)

shape: (45, 9)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ entity    ┆ min_gini_ ┆ max_gini_ ┆ min_gini_ ┆ … ┆ pre_tax_s ┆ after_tax ┆ pre_tax_s ┆ after_ta │
│ ---       ┆ pre_tax   ┆ pre_tax   ┆ after_tax ┆   ┆ hift      ┆ _shift    ┆ hift_rank ┆ x_shift_ │
│ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ rank     │
│           ┆ f64       ┆ f64       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ u32       ┆ ---      │
│           ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ u32      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ United    ┆ 0.505     ┆ 0.521     ┆ 0.375     ┆ … ┆ 0.016     ┆ 0.021     ┆ 39        ┆ 33       │
│ States    ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ Sweden    ┆ 0.424     ┆ 0.44      ┆ 0.267     ┆ … ┆ 0.016     ┆ 0.023     