In [1]:
#HIDE
try:
    import setup
except:
    pass
from notebook_helper import *
notebook_setup()
from modules import la

# Index of Multiple deprivation approach v3

This creates a basic score for each local authority based on the overall deprivation score for an authority. 

This uses a GB wide approach as Northern Ireland councils do not overlap in the same table. It calculates population quintiles, so the number is not exactly even. 

This is based on the [composite 2020 IMD scoring](https://github.com/mysociety/composite_uk_imd), in turn based on each national IMD ranking.

Based on previous feedback, this does not split by authority population, but produces sets of clean, even, quintiles for three different leagues. 

### Distribution in number of councils

The above tables shows the distribution of a *single* league over both district and single tiers.

The alternative approach would be to calculate the distribution for two different sets of quintiles.

In [8]:
# This follows methodology in https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/833947/IoD2019_Research_Report.pdf p. 69

ldf = la.get_la_with_leagues().set_index("local-authority-code")

lookup = pd.read_csv(
    Path("data", "source", "lsoa_la_2021.csv")).set_index("lsoa")

# merge lsoa to la lookup column
df = pd.read_csv(Path("data","source", "UK_IMD_E.csv")).set_index("lsoa")
df = df.join(lookup)

# merge lsoa population in
pop = pd.read_csv(Path("data", "source", "2019_population.csv"), thousands=",").set_index("lsoa")
df = df.join(pop)

# create a population adjusted score
df["pop_score"] = df["UK_IMD_E_score"] * df["pop"]

# pivot up to the local authority level
pt = df.pivot_table(["pop_score","pop"], index="local-authority-code", aggfunc="sum")

# combine these for the higher level authorities
pt = pt.reset_index()
higher_df = pt.la.to_higher(aggfunc="sum")

# calculate a new score, dividing the summed score by the summed population
pt["la_deprivation_score"] = pt["pop_score"] / pt["pop"]
higher_df["la_deprivation_score"] = higher_df["pop_score"] / higher_df["pop"]


df = pd.concat([pt, higher_df]).set_index("local-authority-code")


In [9]:
df.head()



Unnamed: 0_level_0,pop,pop_score,la_deprivation_score
local-authority-code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABC,214281,6040000.0,28.2
ABD,261210,3150000.0,12.04
ABE,228670,3460000.0,15.14
ADU,64301,1130000.0,17.64
AGB,85870,1490000.0,17.35


In [37]:
ldf = la.get_la_with_leagues().set_index("local-authority-code")
ldf = ldf.join(df, how="outer")

ldf = ldf[ldf["league-group"].isin(["District councils", "Single tier", "County councils"])]

def make_decile(df):

    if len(df) < 50:
        n = 2
    else:
        n = 5

    df["council-quintile"] = pd.qcut(df["la_deprivation_score"],n, range(n,0,-1))
    return df

ldf = ldf.groupby("league-group").apply(make_decile).reset_index()
ldf = ldf[["local-authority-code", "league-group", "la_deprivation_score", "council-quintile" ]]
ldf.head()

Unnamed: 0,local-authority-code,league-group,la_deprivation_score,council-quintile
0,ABD,Single tier,12.04,5
1,ABE,Single tier,15.14,5
2,ADU,District councils,17.64,2
3,AGB,Single tier,17.35,4
4,AGY,Single tier,22.21,3


In [38]:
ldf.pivot_table("local-authority-code", index="league-group", columns="council-quintile", aggfunc="count").fillna(0)

council-quintile,1,2,3,4,5
league-group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
County councils,12.0,12.0,0.0,0.0,0.0
District councils,36.0,36.0,36.0,36.0,37.0
Single tier,37.0,36.0,36.0,36.0,37.0


In [40]:
ldf.to_csv(Path("data", "outputs", "imd_v3.csv"), index=False)