# Income inequality in China: World Inequality Report

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np



In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

#### Datawrapper creds

In [6]:
from datawrapper import Datawrapper

dw = Datawrapper(
    access_token="FtIwtvFtoGLaRT9a3gjX69PLu4wSuRyKddoOz6SOPw3k9wWyNICMHTkcPhOGCR5Z"
)

---

## Read data from [report](https://wir2022.wid.world/methodology/)

#### First, get the income gap for countries in the report

In [7]:
countries = [
    "A1T1-Algeria",
    "A1T1-Argentina",
    "A1T1-Australia",
    "A1T1-Brazil",
    "A1T1-Canada",
    "A1T1-Chile",
    "A1T1-China",
    "A1T1-Germany",
    "A1T1-Spain",
    "A1T1-France",
    "A1T1-United Kingdom",
    "A1T1-Indonesia",
    "A1T1-Israel",
    "A1T1-India",
    "A1T1-Italy",
    "A1T1-Japan",
    "A1T1-Korea",
    "A1T1-Morocco",
    "A1T1-Mexico",
    "A1T1-Nigeria",
    "A1T1-Poland",
    "A1T1-Russian Federation",
    "A1T1-Sweden",
    "A1T1-Turkey",
    "A1T1-USA",
    "A1T1-South Africa",
]

#### Definition

The top 10% to bottom 50% average income gap is the ratio between the income shares of the top 10% and the bottom 50%. **It measures the average income difference between the poorest half and the highest earners within a population**. The higher the ratio, the higher the inequality.

#### Cycle through all the Excel reports to extract actual dataframe

In [8]:
dataframes = []

for c in countries:
    country = (
        pd.read_excel(
            "/Users/stiles/Desktop/WIR2022TablesFigures-CountryAppendix/WIR2022TablesFigures-CountryAppendix-A1T1.xlsx",
            sheet_name=f"{c}",
            skiprows=10,
            skipfooter=4,
            header=0,
            names=["drop", "measure", "drop1", "drop2", "ratio", "drop3"],
        )
        .assign(name=c.replace("A1T1-", ""))
        .drop(["measure", "drop", "drop1", "drop2", "drop3"], axis=1)
    )
    dataframes.append(pd.DataFrame(country))

In [9]:
ratio_df = pd.concat(dataframes)

In [10]:
ratio_df[["ration_one", "ratio_figure"]] = ratio_df["ratio"].str.split(
    " to ", expand=True
)

In [11]:
ratio_df

Unnamed: 0,ratio,name,ration_one,ratio_figure
0,1 to 10,Algeria,1,10
0,1 to 13,Argentina,1,13
0,1 to 10,Australia,1,10
0,1 to 29,Brazil,1,29
0,1 to 13,Canada,1,13
0,1 to 29,Chile,1,29
0,1 to 14,China,1,14
0,1 to 10,Germany,1,10
0,1 to 8,Spain,1,8
0,1 to 7,France,1,7


---

#### Get income group share over time for select countries

In [12]:
src = pd.read_excel(
    "data/raw/WIR2022TablesFigures-CountryAppendix-A1F1.xlsx", sheet_name="data-F1"
)

In [13]:
src_slim = src[
    [
        "year",
        "p0p50CN",
        "p90p100CN",
        "p0p50FR",
        "p90p100FR",
        "p0p50IN",
        "p90p100IN",
        "p0p50JP",
        "p90p100JP",
        "p0p50US",
        "p90p100US",
        "p0p50ZA",
        "p90p100ZA",
    ]
]

In [14]:
src_slim.columns = src_slim.columns.str.replace("p0p50", "bottom50_").str.replace(
    "p90p100", "top10_"
)

In [15]:
src_slim[src_slim["year"] == 2021]

Unnamed: 0,year,bottom50_CN,top10_CN,bottom50_FR,top10_FR,bottom50_IN,top10_IN,bottom50_JP,top10_JP,bottom50_US,top10_US,bottom50_ZA,top10_ZA
121,2021,0.144,0.417,0.227,0.322,0.131,0.571,0.168,0.449,0.133,0.455,0.053,0.665


#### Melt the dataframe wide to long for charting and because we're not savages

In [16]:
src_melt = pd.melt(
    src_slim.ffill(axis=0),
    id_vars=["year"],
    value_vars=[
        "bottom50_CN",
        "top10_CN",
        "bottom50_FR",
        "top10_FR",
        "bottom50_IN",
        "top10_IN",
        "bottom50_JP",
        "top10_JP",
        "bottom50_US",
        "top10_US",
        "bottom50_ZA",
        "top10_ZA",
    ],
    var_name="variable",
    value_name="value",
)

#### Fake a data

In [17]:
src_melt["date"] = pd.to_datetime(src_melt["year"].astype(str) + "-" + "-01" + "-01")

#### Country names

In [18]:
src_melt[["variable", "country"]] = src_melt["variable"].str.split("_", expand=True)

In [19]:
isos = {
    "DZ": "Algeria",
    "AR": "Argentina",
    "AU": "Australia",
    "BR": "Brazil",
    "CA": "Canada",
    "CL": "Chile",
    "CN": "China",
    "EG": "Egypt",
    "FR": "France",
    "DE": "Germany",
    "GB": "United Kingdom",
    "ID": "Indonesia",
    "IN": "India",
    "IT": "Italy",
    "JP": "Japan",
    "KR": "Korea",
    "MX": "Mexico",
    "NG": "Nigeria",
    "PL": "Poland",
    "RU": "Russian Federation",
    "SA": "Saudi Arabia",
    "SP": "Spain",
    "SE": "Sweden",
    "TU": "Turkey",
    "US": "USA",
    "ZA": "South Africa",
}

In [20]:
src_melt["country_name"] = src_melt["country"].map(isos)

In [21]:
src_melt[src_melt["country_name"] == "China"].tail()

Unnamed: 0,year,variable,value,date,country,country_name
239,2017,top10,0.417,2017-01-01,CN,China
240,2018,top10,0.417,2018-01-01,CN,China
241,2019,top10,0.417,2019-01-01,CN,China
242,2020,top10,0.417,2020-01-01,CN,China
243,2021,top10,0.417,2021-01-01,CN,China


---

## Line chart faceted by country

#### Change height, width and number of columns — depending on screen size you're exporting

In [22]:
# Mobile

alt.Chart(src_melt, padding={"left": -20, "top": 0, "right": 0, "bottom": 0}).mark_line(
    interpolate="monotone", strokeWidth=2
).encode(
    x=alt.X("date:T", axis=alt.Axis(format="%Y", tickCount=1), title=""),
    y=alt.Y("value", title="", axis=alt.Axis(format="%", tickCount=3)),
    color=alt.Color("variable", legend=None),
    facet=alt.Facet(
        "country_name",
        columns=2,
        title="",
        header=alt.Header(labelFontSize=15, labelFont="Summit Sans"),
    ),
).properties(
    width=135,
    height=100,
).configure_legend(
    orient="top", symbolType="stroke"
)

---

## Exports

#### Ration dataframe to Datawrapper

In [23]:
dw.add_data(chart_id="FAbK2", data=ratio_df)

<Response [204]>