This notebook is to :
1. convert digital propensity data from LSOA 2011 to 2021
2. normalize the dpi values to range of 0 to 10

This allow the data to be integrated with other  LSOA21 data to visualize regions in the UK that the proability of poverty is higher.

In [12]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
def normalize_to_range(x, new_min=0, new_max=10):
    """
    normalized data into rnage 0 to 10
    """
    x_min, x_max = x.min(), x.max()
    return ((x - x_min) / (x_max - x_min)) * (new_max - new_min) + new_min


# normaliz DPI data to range 0-10

data = pd.read_csv("./digitalpropensityindexlsoasv3.csv")
data.head()


Unnamed: 0,LSOAcode,Region,Local Authority name,Score
0,E01000001,London,City of London,97.0
1,E01000002,London,City of London,97.8
2,E01000003,London,City of London,94.6
3,E01000005,London,City of London,93.8
4,E01000006,London,Barking and Dagenham,97.5


In [13]:

# reading LOSA11 to 21 mapping
df2 = pd.read_csv("./LSOA_(2011)_to_LSOA_(2021)_to_Local_Authority_District_(2022)_Lookup_for_England_and_Wales.csv",encoding='ISO-8859-1') 
df2.rename(columns={df2.columns[0]: 'LSOA11CD'}, inplace=True)


df2.head()

Unnamed: 0,LSOA11CD,LSOA11NM,LSOA21CD,LSOA21NM,LAD22CD,LAD22NM,LAD22NMW,ObjectId
0,E01000155,Barnet 030D,E01033916,Barnet 042B,E09000003,Barnet,,1
1,E01000305,Barnet 036B,E01000305,Barnet 036B,E09000003,Barnet,,2
2,E01000001,City of London 001A,E01000001,City of London 001A,E09000001,City of London,,3
3,E01000156,Barnet 022A,E01000156,Barnet 022A,E09000003,Barnet,,4
4,E01000157,Barnet 022B,E01000157,Barnet 022B,E09000003,Barnet,,5


In [14]:
print("--- Map 2011 LSOA to 2021 LSOA")
df2=df2[['LSOA11CD', 'LSOA21CD', 'LSOA21NM']]
# Map 2011 LSOA to 2021 LSOA
merged_df = pd.merge(data, df2, left_on='LSOAcode', right_on='LSOA11CD')

merged_df.head()

--- Map 2011 LSOA to 2021 LSOA


Unnamed: 0,LSOAcode,Region,Local Authority name,Score,LSOA11CD,LSOA21CD,LSOA21NM
0,E01000001,London,City of London,97.0,E01000001,E01000001,City of London 001A
1,E01000002,London,City of London,97.8,E01000002,E01000002,City of London 001B
2,E01000003,London,City of London,94.6,E01000003,E01000003,City of London 001C
3,E01000005,London,City of London,93.8,E01000005,E01000005,City of London 001E
4,E01000006,London,Barking and Dagenham,97.5,E01000006,E01000006,Barking and Dagenham 016A


In [15]:

merged_df['score_normalized'] = normalize_to_range(merged_df.Score)

# Round the normalized scores to 2 decimal places
merged_df['score_normalized'] = merged_df['score_normalized'].round(2)

# If you want to overwrite the original 'score' column instead, use:
# df['score'] = df['score_normalized']
# df = df.drop('score_normalized', axis=1)
merged_df=merged_df[['LSOA21CD','LSOA21NM','score_normalized']]
# Print the first few rows to verify
print(merged_df.head())

merged_df.to_csv("./noramlized_lsoa21.csv")

    LSOA21CD                   LSOA21NM  score_normalized
0  E01000001        City of London 001A              8.81
1  E01000002        City of London 001B              9.13
2  E01000003        City of London 001C              7.86
3  E01000005        City of London 001E              7.54
4  E01000006  Barking and Dagenham 016A              9.01
