# Combine English and Welsh region lists

## Notebook setup:

In [1]:
import pandas as pd

## Import English regions and LSOAs

In [2]:
df_eng = pd.read_csv('../data_tabular/ons_data/LSOA11_LOC22_ICB22_LAD22_EN_LU.csv')

In [3]:
# Reduce the columns:
df_eng = df_eng[[
    'LSOA11CD',
    'LSOA11NM',
    'LOC22CD',
    'LOC22NM',
    'ICB22CD',
    'ICB22NM',
]]

In [4]:
df_eng.head()

Unnamed: 0,LSOA11CD,LSOA11NM,LOC22CD,LOC22NM,ICB22CD,ICB22NM
0,E01012367,Halton 007A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
1,E01012368,Halton 003A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
2,E01012369,Halton 005A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
3,E01012370,Halton 007B,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
4,E01012371,Halton 016A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board


## Import Welsh regions and LSOAs

In [5]:
df_wal_oa = pd.read_csv('../data_tabular/ons_data/Output_Areas_(2011)_to_Local_Health_Boards_(December_2020)_Lookup_in_Wales.csv')
df_wal_lsoa = pd.read_csv('../data_tabular/ons_data/Output_Area_to_LSOA_to_MSOA_to_Local_Authority_District_(December_2017)_Lookup_with_Area_Classifications_in_Great_Britain.csv')

In [6]:
df_wal_oa.columns

Index(['FID', 'OA11CD', 'LHB20CD', 'LHB20NM', 'LHB20NMW'], dtype='object')

In [7]:
df_wal_lsoa.columns

Index(['OA11CD', 'OAC11CD', 'OAC11NM', 'LSOA11CD', 'LSOA11NM', 'SOAC11CD',
       'SOAC11NM', 'MSOA11CD', 'MSOA11NM', 'LAD17CD', 'LAD17NM', 'LACCD',
       'LACNM', 'RGN11CD', 'RGN11NM', 'CTRY11CD', 'CTRY11NM', 'FID'],
      dtype='object')

In [8]:
df_wal = pd.merge(
    df_wal_lsoa[['OA11CD', 'LSOA11CD', 'LSOA11NM', ]],
    df_wal_oa[['OA11CD', 'LHB20CD', 'LHB20NM', 'LHB20NMW']],
    left_on='OA11CD', right_on='OA11CD', how='right'
)
# Drop the OA column:
df_wal = df_wal.drop('OA11CD', axis='columns')
# Drop duplicates:
df_wal = df_wal.drop_duplicates()

In [9]:
df_wal

Unnamed: 0,LSOA11CD,LSOA11NM,LHB20CD,LHB20NM,LHB20NMW
0,W01000012,Isle of Anglesey 006B,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
1,W01000021,Isle of Anglesey 002C,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
9,W01000022,Isle of Anglesey 004A,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
10,W01000013,Isle of Anglesey 005B,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
20,W01000023,Isle of Anglesey 007D,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
...,...,...,...,...,...
9889,W01001865,Cardiff 022C,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
9890,W01001890,Cardiff 021D,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
9927,W01001893,Cardiff 010B,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
9938,W01001892,Cardiff 020D,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro


## Combine England and Wales

In [10]:
# Set both to have a shared axis type:
df_eng = df_eng.set_index(['LSOA11NM', 'LSOA11CD'])
df_wal = df_wal.set_index(['LSOA11NM', 'LSOA11CD'])

In [18]:
df_combo = pd.concat((df_eng, df_wal), axis='columns')
df_combo = df_combo.reset_index()

In [19]:
df_combo

Unnamed: 0,LSOA11NM,LSOA11CD,LOC22CD,LOC22NM,ICB22CD,ICB22NM,LHB20CD,LHB20NM,LHB20NMW
0,Halton 007A,E01012367,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
1,Halton 003A,E01012368,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
2,Halton 005A,E01012369,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
3,Halton 007B,E01012370,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
4,Halton 016A,E01012371,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
...,...,...,...,...,...,...,...,...,...
34748,Cardiff 022C,W01001865,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
34749,Cardiff 021D,W01001890,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
34750,Cardiff 010B,W01001893,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
34751,Cardiff 020D,W01001892,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro


## Merge in ISDNs

From a file made in another notebook:

In [20]:
df_sicbl_to_icb_to_isdn = pd.read_csv('../data_tabular/SICBL_ICB_ISDN.csv')

In [21]:
df_sicbl_to_icb_to_isdn.head()

Unnamed: 0,LOC22CD,LOC22CDH,LOC22NM,ICB22CD,ICB22CDH,ICB22NM,NHSER22CD,NHSER22CDH,NHSER22NM,ISDN
0,E38000240,93C,NHS North Central London ICB - 93C,E54000028,QMJ,NHS North Central London Integrated Care Board,E40000003,Y56,London,London
1,E38000255,A3A8R,NHS North East London ICB - A3A8R,E54000029,QMF,NHS North East London Integrated Care Board,E40000003,Y56,London,London
2,E38000256,W2U3Z,NHS North West London ICB - W2U3Z,E54000027,QRV,NHS North West London Integrated Care Board,E40000003,Y56,London,London
3,E38000244,72Q,NHS South East London ICB - 72Q,E54000030,QKK,NHS South East London Integrated Care Board,E40000003,Y56,London,London
4,E38000245,36L,NHS South West London ICB - 36L,E54000031,QWE,NHS South West London Integrated Care Board,E40000003,Y56,London,London


In [22]:
df_combo = pd.merge(
    df_combo, df_sicbl_to_icb_to_isdn[['LOC22NM', 'ISDN']],
    left_on='LOC22NM', right_on='LOC22NM', how='left'
)

In [23]:
df_combo.to_csv('../data_tabular/regions_lsoa_ew.csv', index=False)

## Drop the LSOA

In [17]:
df_combo_small = df_combo_small.drop(['LSOA11NM', 'LSOA11CD'], axis='columns')
df_combo_small = df_combo_small.drop_duplicates()

KeyError: "['LSOA11NM', 'LSOA11CD'] not found in axis"

In [None]:
df_combo_small

In [None]:
df_combo.to_csv('../data_tabular/regions_ew.csv', index=False)