# Combine English and Welsh region lists

## Notebook setup:

In [1]:
import pandas as pd

## Import English regions and LSOAs

In [2]:
df_eng = pd.read_csv('../data_tabular/ons_data/LSOA11_LOC22_ICB22_LAD22_EN_LU.csv')

In [3]:
# Reduce the columns:
df_eng = df_eng[[
    'LSOA11CD',
    'LSOA11NM',
    'LOC22CD',
    'LOC22NM',
    'ICB22CD',
    'ICB22NM',
]]

In [4]:
df_eng.head()

Unnamed: 0,LSOA11CD,LSOA11NM,LOC22CD,LOC22NM,ICB22CD,ICB22NM
0,E01012367,Halton 007A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
1,E01012368,Halton 003A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
2,E01012369,Halton 005A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
3,E01012370,Halton 007B,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board
4,E01012371,Halton 016A,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board


## Import Welsh regions and LSOAs

In [5]:
df_wal_oa = pd.read_csv('../data_tabular/ons_data/Output_Areas_(2011)_to_Local_Health_Boards_(December_2020)_Lookup_in_Wales.csv')
df_wal_lsoa = pd.read_csv('../data_tabular/ons_data/Output_Area_to_LSOA_to_MSOA_to_Local_Authority_District_(December_2017)_Lookup_with_Area_Classifications_in_Great_Britain.csv')

In [6]:
df_wal_oa.columns

Index(['FID', 'OA11CD', 'LHB20CD', 'LHB20NM', 'LHB20NMW'], dtype='object')

In [7]:
df_wal_lsoa.columns

Index(['OA11CD', 'OAC11CD', 'OAC11NM', 'LSOA11CD', 'LSOA11NM', 'SOAC11CD',
       'SOAC11NM', 'MSOA11CD', 'MSOA11NM', 'LAD17CD', 'LAD17NM', 'LACCD',
       'LACNM', 'RGN11CD', 'RGN11NM', 'CTRY11CD', 'CTRY11NM', 'FID'],
      dtype='object')

In [8]:
df_wal = pd.merge(
    df_wal_lsoa[['OA11CD', 'LSOA11CD', 'LSOA11NM', ]],
    df_wal_oa[['OA11CD', 'LHB20CD', 'LHB20NM', 'LHB20NMW']],
    left_on='OA11CD', right_on='OA11CD', how='right'
)
# Drop the OA column:
df_wal = df_wal.drop('OA11CD', axis='columns')
# Drop duplicates:
df_wal = df_wal.drop_duplicates()

In [9]:
df_wal

Unnamed: 0,LSOA11CD,LSOA11NM,LHB20CD,LHB20NM,LHB20NMW
0,W01000012,Isle of Anglesey 006B,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
1,W01000021,Isle of Anglesey 002C,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
9,W01000022,Isle of Anglesey 004A,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
10,W01000013,Isle of Anglesey 005B,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
20,W01000023,Isle of Anglesey 007D,W11000023,Betsi Cadwaladr University Health Board,Bwrdd Iechyd Prifysgol Betsi Cadwaladr
...,...,...,...,...,...
9889,W01001865,Cardiff 022C,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
9890,W01001890,Cardiff 021D,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
9927,W01001893,Cardiff 010B,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
9938,W01001892,Cardiff 020D,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro


## Combine England and Wales

In [10]:
# Set both to have a shared axis type:
df_eng = df_eng.set_index(['LSOA11NM', 'LSOA11CD'])
df_wal = df_wal.set_index(['LSOA11NM', 'LSOA11CD'])

In [11]:
df_combo = pd.concat((df_eng, df_wal), axis='columns')

In [12]:
df_combo

Unnamed: 0_level_0,Unnamed: 1_level_0,LOC22CD,LOC22NM,ICB22CD,ICB22NM,LHB20CD,LHB20NM,LHB20NMW
LSOA11NM,LSOA11CD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Halton 007A,E01012367,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
Halton 003A,E01012368,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
Halton 005A,E01012369,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
Halton 007B,E01012370,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
Halton 016A,E01012371,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
...,...,...,...,...,...,...,...,...
Cardiff 022C,W01001865,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
Cardiff 021D,W01001890,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
Cardiff 010B,W01001893,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro
Cardiff 020D,W01001892,,,,,W11000029,Cardiff and Vale University Health Board,Bwrdd Iechyd Prifysgol Caerdydd a’r Fro


In [14]:
df_combo.to_csv('../data_tabular/regions_lsoa_ew.csv', index=False)

## Drop the LSOA

In [18]:
df_combo_small = df_combo.reset_index()
df_combo_small = df_combo_small.drop(['LSOA11NM', 'LSOA11CD'], axis='columns')
df_combo_small = df_combo_small.drop_duplicates()

In [19]:
df_combo_small

Unnamed: 0,LOC22CD,LOC22NM,ICB22CD,ICB22NM,LHB20CD,LHB20NM,LHB20NMW
0,E38000068,NHS Cheshire and Merseyside ICB - 01F,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
79,E38000194,NHS Cheshire and Merseyside ICB - 02E,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
206,E38000233,NHS Cheshire and Merseyside ICB - 27D,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
652,E38000091,NHS Cheshire and Merseyside ICB - 01J,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
750,E38000101,NHS Cheshire and Merseyside ICB - 99A,E54000008,NHS Cheshire and Merseyside Integrated Care Board,,,
...,...,...,...,...,...,...,...
33293,,,,,W11000025,Hywel Dda University Health Board,Bwrdd Iechyd Prifysgol Hywel Dda
33565,,,,,W11000031,Swansea Bay University Health Board,Bwrdd Iechyd Prifysgol Bae Abertawe
33799,,,,,W11000030,Cwm Taf Morgannwg University Health Board,Bwrdd Iechyd Prifysgol Cwm Taf Morgannwg
34046,,,,,W11000028,Aneurin Bevan University Health Board,Bwrdd Iechyd Prifysgol Aneurin Bevan


In [20]:
df_combo.to_csv('../data_tabular/regions_ew.csv', index=False)