# Income Inequality Before and After Taxes

Let's load some libraries:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rc("figure", figsize=(10, 6))

## Load data

In [2]:
# Now load the data

income_inequality_processed = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-05/income_inequality_processed.csv')
income_inequality_raw = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-05/income_inequality_raw.csv')

In [3]:
income_inequality_processed.head()

Unnamed: 0,Entity,Code,Year,gini_mi_eq,gini_dhi_eq
0,Australia,AUS,1989,0.431,0.304
1,Australia,AUS,1995,0.47,0.311
2,Australia,AUS,2001,0.481,0.32
3,Australia,AUS,2003,0.469,0.316
4,Australia,AUS,2004,0.467,0.316


In [4]:
# Which countries have the highest Gini coefficients before taxes?

(
    income_inequality_processed
        .pivot_table(values='gini_mi_eq',
                     index='Entity',
                     aggfunc='mean')
        .sort_values(by='gini_mi_eq', ascending=False)
        .head(10)
)

Unnamed: 0_level_0,gini_mi_eq
Entity,Unnamed: 1_level_1
South Africa,0.7308
Brazil,0.587714
Italy,0.553667
Ireland,0.530476
Dominican Republic,0.523
Lithuania,0.514923
Greece,0.513375
Bulgaria,0.496625
Spain,0.495579
Austria,0.48865


In [5]:
# Which countries have the highest Gini coefficients after taxes?

(
    income_inequality_processed
        .pivot_table(values='gini_dhi_eq',
                     index='Entity',
                     aggfunc='mean')
        .sort_values(by='gini_dhi_eq', ascending=False)
        .head(10)
)

Unnamed: 0_level_0,gini_dhi_eq
Entity,Unnamed: 1_level_1
South Africa,0.6364
Cote d'Ivoire,0.561
Dominican Republic,0.515
Brazil,0.511395
Paraguay,0.4995
Colombia,0.499348
India,0.4955
Chile,0.489
Peru,0.473294
Guatemala,0.472667


In [6]:
# Which countries have the highest shifts in Gini coefficient

(
    income_inequality_processed
        .assign(diff = lambda x: np.abs(x['gini_mi_eq'] - x['gini_dhi_eq']))
        .pivot_table(values='diff',
                     index='Entity',
                     aggfunc='max')
        .sort_values(by='diff', ascending=False)
        .head(10)
)

Unnamed: 0_level_0,diff
Entity,Unnamed: 1_level_1
Ireland,0.279
Belgium,0.248
Sweden,0.246
Germany,0.231
Italy,0.228
Austria,0.225
Denmark,0.222
Greece,0.216
Czechia,0.211
Netherlands,0.209


In [7]:
# Which countries have the lowest shifts in Gini coefficient

(
    income_inequality_processed
        .assign(diff = lambda x: np.abs(x['gini_mi_eq'] - x['gini_dhi_eq']))
        .pivot_table(values='diff',
                     index='Entity',
                     aggfunc='min')
        .sort_values(by='diff', ascending=True)
        .head(10)
)

Unnamed: 0_level_0,diff
Entity,Unnamed: 1_level_1
Dominican Republic,0.008
Brazil,0.049
Iceland,0.05
Switzerland,0.064
United States,0.075
Canada,0.079
United Kingdom,0.087
South Africa,0.09
Israel,0.095
Japan,0.099
