# Matrimonial Matters County & UA Level Annual

## Contents
#### Setup
1. [import_packages](#import_packages) 
2. [define_key_variables](#define_key_variables) 



## 1. Import packages and set options 
<a name="import_packages"></a>

In [47]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts
import numpy as np
import re

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

## 2. Define key variables to be used throughout the notebook 
<a name="define_key_variables"></a>

In [48]:
#this is the database we will be extracting from
database = "familyman_dev_v3" 

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#setting current year
current_year = 2023

#creating a variable to automate the column names from imported csvs
prev_endyear = (current_year - 1) - 2000

# Stage 1 - Divorce County and UA lookup
<a name="Divorce County and UA lookup"></a>

## Import ONS Postcode Directory 

### Create the ons_postcode table

In [3]:
ons_postcode_table = pd.read_csv("s3://alpha-family-data/CSVs/Divorce/Petitioner LA/Lookup/ONSPD_NOV_2022_UK.csv", low_memory=False)

In [4]:
pydb.dataframe_to_temp_table(ons_postcode_table, "ons_postcode")

#### ons_postcode validation

In [5]:
ons_postcode_count = pydb.read_sql_query("SELECT * from __temp__.ons_postcode limit 10")
ons_postcode_count

Unnamed: 0,pcd,pcd2,pcds,dointr,doterm,oscty,ced,oslaua,osward,parish,usertype,oseast1m,osnrth1m,osgrdind,oshlthau,nhser,ctry,rgn,streg,pcon,eer,teclec,ttwa,pct,itl,statsward,oa01,casward,park,lsoa01,msoa01,ur01ind,oac01,oa11,lsoa11,msoa11,wz11,ccg,bua11,buasd11,ru11ind,oac11,lat,long,lep1,lep2,pfa,imd,calncv,stp,oa21,lsoa21,msoa21
0,AB1 0AE,AB1 0AE,AB1 0AE,199402,199606.0,S99999999,S99999999,S12000034,S13002864,S99999999,0,384600.0,799300.0,8,S08000020,S99999999,S92000003,S99999999,0,S14000058,S15000001,S09000001,S22000047,S03000013,S30000027,99ZZ00,S00002142,02C58,S99999999,S01000333,S02000061,6,3B1,S00091322,S01006853,S02001296,S34003292,S03000013,S99999999,S99999999,6,1A2,57.084444,-2.255708,S99999999,S99999999,S23000009,5069,S99999999,S99999999,,,
1,AB1 0AF,AB1 0AF,AB1 0AF,199012,199207.0,S99999999,S99999999,S12000033,S13002843,S99999999,1,384460.0,800660.0,8,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001266,01C30,S99999999,S01000007,S02000003,3,4D2,S00090299,S01006511,S02001236,S34003015,S03000012,S99999999,S99999999,3,6A4,57.096656,-2.258102,S99999999,S99999999,S23000009,6253,S99999999,S99999999,,,
2,AB1 0AG,AB1 0AG,AB1 0AG,199012,199207.0,S99999999,S99999999,S12000033,S13002843,S99999999,1,383890.0,800710.0,8,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001258,01C30,S99999999,S01000001,S02000003,3,5B4,S00090291,S01006506,S02001236,S34003124,S03000012,S99999999,S99999999,3,7C3,57.097085,-2.267513,S99999999,S99999999,S23000009,4691,S99999999,S99999999,,,
3,AB1 0AJ,AB1 0AJ,AB1 0AJ,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,0,384779.0,800921.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001364,01C30,S99999999,S01000011,S02000007,6,3C2,S00090399,S01006514,S02001237,S34003015,S03000012,S99999999,S99999999,3,6A1,57.099011,-2.252854,S99999999,S99999999,S23000009,6715,S99999999,S99999999,,,
4,AB1 0AB,AB1 0AB,AB1 0AB,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,0,385177.0,801314.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001270,01C31,S99999999,S01000011,S02000007,6,4B3,S00090303,S01006514,S02001237,S34002990,S03000012,S99999999,S99999999,3,1C3,57.102554,-2.246308,S99999999,S99999999,S23000009,6715,S99999999,S99999999,,,
5,AB1 0AD,AB1 0AD,AB1 0AD,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,0,385053.0,801092.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001364,01C30,S99999999,S01000011,S02000007,6,3C2,S00090399,S01006514,S02001237,S34003015,S03000012,S99999999,S99999999,3,6A1,57.100556,-2.248342,S99999999,S99999999,S23000009,6715,S99999999,S99999999,,,
6,AB1 0AA,AB1 0AA,AB1 0AA,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,0,385386.0,801193.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001364,01C30,S99999999,S01000011,S02000007,6,3C2,S00090303,S01006514,S02001237,S34002990,S03000012,S99999999,S99999999,3,1C3,57.101474,-2.242851,S99999999,S99999999,S23000009,6715,S99999999,S99999999,,,
7,AB1 0AL,AB1 0AL,AB1 0AL,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,0,384669.0,801228.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001364,01C30,S99999999,S01000011,S02000007,6,3C2,S00090381,S01006511,S02001236,S34002990,S03000012,S99999999,S99999999,3,6B3,57.101765,-2.254688,S99999999,S99999999,S23000009,6253,S99999999,S99999999,,,
8,AB1 0AN,AB1 0AN,AB1 0AN,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,1,385225.0,800757.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001364,01C30,S99999999,S01000011,S02000007,6,3C2,S00090399,S01006514,S02001237,S34003015,S03000012,S99999999,S99999999,3,6A1,57.097553,-2.245483,S99999999,S99999999,S23000009,6715,S99999999,S99999999,,,
9,AB1 0AP,AB1 0AP,AB1 0AP,198001,199606.0,S99999999,S99999999,S12000033,S13002843,S99999999,0,385208.0,800834.0,1,S08000020,S99999999,S92000003,S99999999,0,S14000002,S15000001,S09000001,S22000047,S03000012,S30000026,99ZZ00,S00001364,01C30,S99999999,S01000011,S02000007,6,3C2,S00090399,S01006514,S02001237,S34003015,S03000012,S99999999,S99999999,3,6A1,57.098244,-2.245768,S99999999,S99999999,S23000009,6715,S99999999,S99999999,,,


## Import Local Authority Districts

### Create the la_districts table

In [6]:
la_districts_table = pd.read_csv("s3://alpha-family-data/CSVs/Divorce/Petitioner LA/Lookup/Local_Authority_Districts_(December_2022)_Names_and_Codes_in_the_United_Kingdom.csv")

In [7]:
pydb.dataframe_to_temp_table(la_districts_table, "la_districts")

#### la_districts validation

In [8]:
la_districts_count = pydb.read_sql_query("SELECT * from __temp__.la_districts LIMIT 10")
la_districts_count

Unnamed: 0,lad22cd,lad22nm,lad22nmw,objectid
0,E07000148,Norwich,,8
1,E07000149,South Norfolk,,9
2,E07000163,Craven,,10
3,E07000144,Broadland,,4
4,E07000145,Great Yarmouth,,5
5,E07000146,King's Lynn and West Norfolk,,6
6,E07000147,North Norfolk,,7
7,E07000141,South Kesteven,,1
8,E07000142,West Lindsey,,2
9,E07000143,Breckland,,3


## Import Local Authority Districts to Counties

### Create the la_districts_counties table

In [9]:
la_districts_counties_table = pd.read_csv("s3://alpha-family-data/CSVs/Divorce/Petitioner LA/Lookup/Local_Authority_District_to_County_(December_2022)_Lookup_in_England.csv")

In [10]:
pydb.dataframe_to_temp_table(la_districts_counties_table, "la_districts_counties")

#### la_districts_counties validation

In [11]:
la_districts_counties_count = pydb.read_sql_query("SELECT * from __temp__.la_districts_counties LIMIT 10")
la_districts_counties_count

Unnamed: 0,lad22cd,lad22nm,cty22cd,cty22nm,objectid
0,E07000008,Cambridge,E10000003,Cambridgeshire,1
1,E07000011,Huntingdonshire,E10000003,Cambridgeshire,4
2,E07000012,South Cambridgeshire,E10000003,Cambridgeshire,5
3,E07000026,Allerdale,E10000006,Cumbria,6
4,E07000027,Barrow-in-Furness,E10000006,Cumbria,7
5,E07000028,Carlisle,E10000006,Cumbria,8
6,E07000029,Copeland,E10000006,Cumbria,9
7,E07000030,Eden,E10000006,Cumbria,10
8,E07000009,East Cambridgeshire,E10000003,Cambridgeshire,2
9,E07000010,Fenland,E10000003,Cambridgeshire,3


## Creating Lookup

### Create the lookup_working table

In [12]:
create_lookup_working =f"""
SELECT 
a.LAD{prev_endyear}CD AS code,
a.LAD{prev_endyear}NM AS la,
b.CTY{prev_endyear}NM AS county,
CASE WHEN a.LAD{prev_endyear}CD LIKE 'E%' THEN 'England'
WHEN a.LAD{prev_endyear}CD LIKE 'W%' THEN 'Wales'
END AS country
FROM __temp__.la_districts a
LEFT JOIN __temp__.la_districts_counties b
ON a.LAD22CD = b.LAD22CD 
WHERE a.LAD22CD LIKE 'E%' OR a.LAD22CD LIKE 'W%';
"""
pydb.create_temp_table(create_lookup_working,'lookup_working')

In [13]:
lookup_working = pydb.read_sql_query("SELECT * from __temp__.lookup_working LIMIT 10")
lookup_working

Unnamed: 0,code,la,county,country
0,E07000148,Norwich,Norfolk,England
1,E07000149,South Norfolk,Norfolk,England
2,E07000163,Craven,North Yorkshire,England
3,E07000144,Broadland,Norfolk,England
4,E07000145,Great Yarmouth,Norfolk,England
5,E07000146,King's Lynn and West Norfolk,Norfolk,England
6,E07000147,North Norfolk,Norfolk,England
7,E07000141,South Kesteven,Lincolnshire,England
8,E07000142,West Lindsey,Lincolnshire,England
9,E07000143,Breckland,Norfolk,England


### Create the divorce_county_ua_lookup table

In [14]:
create_divorce_county_ua_lookup =f"""
SELECT
code,
CASE WHEN county IN ('Greater Manchester', 'Merseyside', 'South Yorkshire', 'Tyne and Wear', 'West Midlands', 'West Yorkshire')
THEN CONCAT(' ',county,'(Met County)')
WHEN code LIKE 'W%'
THEN la
WHEN county IS NULL
THEN CONCAT(' ',la,'UA')
ELSE county
END AS county_ua,
country
FROM __temp__.lookup_working;
"""
pydb.create_temp_table(create_divorce_county_ua_lookup,'divorce_county_ua_lookup')

In [15]:
divorce_county_ua_lookup = pydb.read_sql_query("SELECT * from __temp__.divorce_county_ua_lookup LIMIT 10")
divorce_county_ua_lookup

Unnamed: 0,code,county_ua,country
0,E07000144,Norfolk,England
1,E07000145,Norfolk,England
2,E07000146,Norfolk,England
3,E07000147,Norfolk,England
4,E07000142,Lincolnshire,England
5,E07000143,Norfolk,England
6,E07000141,Lincolnshire,England
7,E07000148,Norfolk,England
8,E07000149,Norfolk,England
9,E07000163,North Yorkshire,England


# Stage 2 - Petitioner Postcode

## Import Petitioner Address Details 

### Create the petitioner_address_details table

In [16]:
petitioner_address_details_table = pd.read_csv("s3://alpha-family-data/CSVs/Divorce/Petitioner LA/Petitioner_Address_Details.csv", low_memory=False)

In [17]:
pydb.dataframe_to_temp_table(petitioner_address_details_table, "petitioner_address_details")

#### petitioner_address_details validation

In [18]:
petitioner_address_details_count = pydb.read_sql_query("SELECT * from __temp__.petitioner_address_details limit 10")
petitioner_address_details_count

Unnamed: 0,year,month,quarter,fm_case_cid,legal_case_id,pettnr_line_1_address,pettnr_line_2_address,pettnr_line_3_address,pettnr_postal_code,pettnr_line_4_address,pettnr_line_5_address,pettnr_line_6_address,pettnr_contact_details_confdntl_cind
0,2022,3,1,ZZ22D26732,4138379,,,,,,,,share
1,2021,4,2,ZZ21D07556,2915032,,,,,,,,
2,2020,12,4,ZZ20D73359,1876032,,,,,,,,
3,2021,4,2,ZZ21D18144,2658348,,,,,,,,
4,2021,7,3,ZZ21D59619,3323101,,,,,,,,
5,2021,10,4,ZZ21D68994,3740779,,,,CM11 1LU,,,,share
6,2021,9,3,ZZ21D78446,3616793,,,,,,,,keep
7,2020,5,2,ZZ20D21210,1426083,,,,,,,,
8,2020,6,2,ZZ20D28785,1504747,,,,,,,,
9,2020,1,1,ZZ19D74058,1205371,,,,,,,,


In [51]:
petitioner_address_details_count = pydb.read_sql_query("SELECT * from __temp__.petitioner_address_details LIMIT 10")
petitioner_address_details_count

Unnamed: 0,year,month,quarter,fm_case_cid,legal_case_id,pettnr_line_1_address,pettnr_line_2_address,pettnr_line_3_address,pettnr_postal_code,pettnr_line_4_address,pettnr_line_5_address,pettnr_line_6_address,pettnr_contact_details_confdntl_cind
0,2022,3,1,ZZ22D26732,4138379,,,,,,,,share
1,2021,4,2,ZZ21D07556,2915032,,,,,,,,
2,2020,12,4,ZZ20D73359,1876032,,,,,,,,
3,2020,1,1,ZZ19D74058,1205371,,,,,,,,
4,2021,10,4,ZZ21D68994,3740779,,,,CM11 1LU,,,,share
5,2021,9,3,ZZ21D78446,3616793,,,,,,,,keep
6,2020,5,2,ZZ20D21210,1426083,,,,,,,,
7,2020,6,2,ZZ20D28785,1504747,,,,,,,,
8,2021,4,2,ZZ21D18144,2658348,,,,,,,,
9,2021,7,3,ZZ21D59619,3323101,,,,,,,,


## Creating Final Output

### Create the petitioner_address table

In [19]:
create_petitioner_address =f"""
SELECT t1.Year, 
          t1.Month, 
          t1.Quarter,
          t1.PETTNR_LINE_1_ADDRESS,
          t1.PETTNR_LINE_2_ADDRESS,
          t1.PETTNR_LINE_3_ADDRESS,
          t1.PETTNR_LINE_4_ADDRESS,
          t1.PETTNR_LINE_5_ADDRESS, 
          t1.PETTNR_LINE_6_ADDRESS,
          t1.PETTNR_POSTAL_CODE,
          t1.PETTNR_CONTACT_DETAILS_CONFDNTL_CIND as CONFDNTL
FROM __temp__.petitioner_address_details t1;
"""
pydb.create_temp_table(create_petitioner_address,'petitioner_address')

In [20]:
petitioner_address = pydb.read_sql_query("SELECT * from __temp__.petitioner_address LIMIT 10")
petitioner_address

Unnamed: 0,year,month,quarter,pettnr_line_1_address,pettnr_line_2_address,pettnr_line_3_address,pettnr_line_4_address,pettnr_line_5_address,pettnr_line_6_address,pettnr_postal_code,confdntl
0,2020,10,4,,,,,,,,
1,2021,11,4,,,,,,,,
2,2020,11,4,,,,,,,,
3,2021,2,1,,,,,,,,
4,2021,11,4,,,,,,,,
5,2021,11,4,,,,,,,,share
6,2022,1,1,,,,,,,TF3 5EW,share
7,2020,5,2,,,,,,,,
8,2020,4,2,,,,,,,,
9,2021,11,4,,,,,,,,


### Create the new_divorce_postcode table

In [4]:
create_new_divorce_postcode =f"""
SELECT t1.Year, 
    t1.Month, 
    t1.Quarter,
    UPPER(t1.PETTNR_LINE_1_ADDRESS) as Line1,
    UPPER(t1.PETTNR_LINE_2_ADDRESS) as Line2,
    UPPER(t1.PETTNR_LINE_3_ADDRESS) as Line3,
    UPPER(t1.PETTNR_LINE_4_ADDRESS) as Line4,
    UPPER(t1.PETTNR_LINE_5_ADDRESS) as Line5, 
    UPPER(t1.PETTNR_LINE_6_ADDRESS) as Line6,
    UPPER(t1.PETTNR_POSTAL_CODE) as postcode,
    UPPER(t1.PETTNR_CONTACT_DETAILS_CONFDNTL_CIND) as CONFDNTL
    
      FROM __temp__.petitioner_address_details t1;
"""
pydb.create_temp_table(create_new_divorce_postcode,'new_divorce_postcode')

In [5]:
new_divorce_postcode = pydb.read_sql_query("SELECT * from __temp__.new_divorce_postcode LIMIT 10")
new_divorce_postcode

Unnamed: 0,year,month,quarter,line1,line2,line3,line4,line5,line6,postcode,confdntl
0,2021,11,4,,,,,,,,
1,2020,11,4,,,,,,,,
2,2021,6,2,,,,,,,,
3,2021,10,4,,,,,,,,
4,2020,12,4,,,,,,,,
5,2020,4,2,,,,,,,,
6,2022,1,1,,,,,,,SS14 1RY,SHARE
7,2021,12,4,,,,,,,EN3 4LT,KEEP
8,2022,1,1,,,,,,,,SHARE
9,2021,9,3,,,,,,,,


In [52]:
create_new_divorce_with_postcode_temp1 =f"""
SELECT *,
CASE 
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line1, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line1, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line1, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line1, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(line1, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line1, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line1, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode1,


CASE 
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line2, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line2, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line2, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line2, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(line2, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line2, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line2, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode2,


CASE 
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line3, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line3, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line3, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line3, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(line3, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line3, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line3, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode3,


CASE
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line4, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line4, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line4, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line4, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(line4, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line4, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line4, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode4,


CASE
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line5, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line5, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line5, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line5, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(line5, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line5, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line5, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode5,

CASE
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line6, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line6, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line6, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(line6, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(line6, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(line6, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(line6, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode6,

CASE 
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(postcode, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][0-9][ \t][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][A-Z][ \t][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(postcode, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][0-9][ \t][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][A-Z][ \t][0-9][A-Z][A-Z]')

WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][A-Z][0-9][A-Z][0-9][A-Z][A-Z]')

WHEN regexp_like(postcode, '[A-Z][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][0-9][0-9][A-Z][A-Z]')
WHEN regexp_like(postcode, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]$') THEN regexp_extract_all(postcode, '[A-Z][0-9][A-Z][0-9][A-Z][A-Z]')
ELSE NULL
END newpostcode7


FROM __temp__.new_divorce_postcode;
"""
pydb.create_temp_table(create_new_divorce_with_postcode_temp1,'new_divorce_with_postcode_temp1')


In [53]:
code = pydb.read_sql_query("SELECT * from __temp__.new_divorce_with_postcode_temp1 LIMIT 10")
code

Unnamed: 0,year,month,quarter,line1,line2,line3,line4,line5,line6,postcode,confdntl,newpostcode1,newpostcode2,newpostcode3,newpostcode4,newpostcode5,newpostcode6,newpostcode7
0,1999,7,3,16 WELLINGTON RD,BROMLEY,BR2 9NQ,,,,,N,,,[BR2 9NQ],,,,
1,2007,6,2,MARTINS COTTAGE,HIGH STREET,CHIPPING CAMPDEN,GLOUCESTERSHIRE,,,,,,,,,,,
2,2007,6,2,39 CONWAY CLOSE,NUNEATON,WARWICKSHIRE,,,,,N,,,,,,,
3,2007,7,3,10 ALLESLEY CLOSE,SUTTON COLDFIELD,,,,,B74 2NF,N,,,,,,,[B74 2NF]
4,2007,7,3,18 GOODACRE ROAD,ULLESTHORPE,LEICESTER,LEICESTERSHIRE,,,LE17 5DL,N,,,,,,,[LE17 5DL]
5,2007,6,2,8 INVERNESS CLOSE,MOUNT NOD,COVENTRY,,,,,,,,,,,,
6,2007,6,2,2 CARROLL CLOSE,STRATFORD-UPON-AVON,WARWICKSHIRE,,,,CV37 7LQ,,,,,,,,[CV37 7LQ]
7,2007,8,3,15 NAPTON RISE,SOUTHAM,WARWICKSHIRE,,,,CV47 1GN,N,,,,,,,[CV47 1GN]
8,2007,10,4,1 EDWARD ROAD,KERESLEY,COVENTRY,,,,CV6 2GS,N,,,,,,,[CV6 2GS]
9,2007,12,4,94 WATHAN ROAD,WARWICK,,,,,CV34 5BB,N,,,,,,,[CV34 5BB]


In [54]:
create_new_divorce_with_postcode_temp2 =f"""
SELECT year, 
month, 
quarter,
line1,
line2,
line3,
line4,
line5, 
line6,
postcode,
confdntl,

array_join(newpostcode1,  '') as newpostcode1,
array_join(newpostcode2,  '') as newpostcode2,
array_join(newpostcode3,  '') as newpostcode3,
array_join(newpostcode4,  '') as newpostcode4,
array_join(newpostcode5,  '') as newpostcode5,
array_join(newpostcode6,  '') as newpostcode6,
array_join(newpostcode7,  '') as newpostcode7

FROM __temp__.new_divorce_with_postcode_temp1;
"""
pydb.create_temp_table(create_new_divorce_with_postcode_temp2,'new_divorce_with_postcode_temp2')

In [55]:
code = pydb.read_sql_query("SELECT * FROM __temp__.new_divorce_with_postcode_temp2 LIMIT 10")
code

Unnamed: 0,year,month,quarter,line1,line2,line3,line4,line5,line6,postcode,confdntl,newpostcode1,newpostcode2,newpostcode3,newpostcode4,newpostcode5,newpostcode6,newpostcode7
0,2017,8,3,BOW FARM,BADGWORTH,AXBRIDGE,SOMERSET,BIRSTOL,,BS26 2QA,N,,,,,,,BS26 2QA
1,2017,8,3,ACORNS,19A ROWANS CLOSE,FARNBOROUGH,HAMPSHIRE,,,GU14 9EJ,N,,,,,,,GU14 9EJ
2,2017,8,3,18 LOCKSWAY ROAD,PORTSMOUTH,HAMPSHIRE,,,,PO4 8JW,N,,,,,,,PO4 8JW
3,2017,9,3,139 SEAFIELD ROAD,SOUTHBOURNE,BOURNEMOUTH,DORSET,,,BH6 3JL,N,,,,,,,BH6 3JL
4,2017,7,3,7 BROOKBRIDGE LANE,DATCHWORTH,KNEBWORTH,HERTFORDSHIRE,,,SG3 6SU,N,,,,,,,SG3 6SU
5,2017,8,3,1 OAK DRIVE,BARTON UPON HUMBER,NORTH LINCOLNSHIRE,,,,DN18 6BY,N,,,,,,,DN18 6BY
6,2017,5,2,23 ST MARTINS AVENUE,LEEDS,WEST YORKSHIRE,,,,LS7 3LQ,N,,,,,,,LS7 3LQ
7,2017,9,3,21 THE PARK,YEOVIL,SOMERSET,,,,BA20 1DG,N,,,,,,,BA20 1DG
8,2017,9,3,166 GROSVENOR ROAD,ALDERSHOT,HAMPSHIRE,,,,GU11 3EJ,N,,,,,,,GU11 3EJ
9,2003,7,3,80 STRATFORD ROAD,WARWICK,,,,,,,,,,,,,


### Create the new_divorce_with_postcode table

In [56]:
create_new_divorce_with_postcode =f"""
SELECT year, 
month, 
quarter,
line1,
line2,
line3,
line4,
line5, 
line6,
postcode,
confdntl,

CASE WHEN newpostcode1 IS NOT NULL THEN newpostcode1
WHEN newpostcode2 IS NOT NULL THEN newpostcode2
WHEN newpostcode3 IS NOT NULL THEN newpostcode3
WHEN newpostcode4 IS NOT NULL THEN newpostcode4
WHEN newpostcode5 IS NOT NULL THEN newpostcode5
WHEN newpostcode6 IS NOT NULL THEN newpostcode6
WHEN newpostcode7 IS NOT NULL THEN newpostcode7

ELSE NULL
END newpostcode

FROM __temp__.new_divorce_with_postcode_temp2;
"""
pydb.create_temp_table(create_new_divorce_with_postcode,'new_divorce_with_postcode')

In [57]:
code = pydb.read_sql_query("SELECT * FROM __temp__.new_divorce_with_postcode LIMIT 10")
code

Unnamed: 0,year,month,quarter,line1,line2,line3,line4,line5,line6,postcode,confdntl,newpostcode
0,2021,4,2,,,,,,,,,
1,2012,10,4,15 COLLIERS AVENUE,LLANHARAN,PONTYCLUN,,,,CF72 9UT,N,CF72 9UT
2,2013,4,2,BROOKFIELD HOUSE,PRIORY ROAD,MILFORD HAVEN,PEMBROKESHIRE,,,SA73 2DZ,N,SA73 2DZ
3,2014,6,2,9 MAES FFYNNON,ROCH,HAVERFORDWEST,PEMBROKESHIRE,,,SA62 6BQ,N,SA62 6BQ
4,2012,11,4,14A MOUNTBATTEN AVENUE,HEBBURN,TYNE & WEAR,,,,NE31 2QP,N,NE31 2QP
5,2001,6,2,21 HERTBURN GARDENS,CONCORD,WASHINGTON,,,,,,
6,2012,12,4,2 HEATHFIELD GARDENS,ROBERTSBRIDGE,EAST SUSSEX,,,,TN32 5BE,N,TN32 5BE
7,2001,5,2,8 CHURCH CLOSE,NEW ROAD,BEGELLY,KILGETTY,PEMBS,,,,
8,2008,11,4,28 TRAFALGAR ROAD,MILFORD HAVEN,PEMBROKESHIRE,,,,SA73 2AS,,SA73 2AS
9,2012,6,2,ERWAN FAWR,BLAENNCELYN,LLANDYSSUL,,,,SA44 6DQ,N,SA44 6DQ


### Create the divorce_postcode_1 table

In [58]:
create_divorce_postcode_1 =f"""
SELECT t1.Year, 
    t1.Month, 
    t1.Quarter, 
    t1.Line1, 
    t1.Line2, 
    t1.Line3, 
    t1.Line4, 
    t1.Line5,
    t1.Line6,
    t1.CONFDNTL,
    t1.postcode,
    /* Addr Postcode */
    REPLACE(t1.newpostcode, ' ', '') AS newpostcode
FROM __temp__.new_divorce_with_postcode t1;
"""
pydb.create_temp_table(create_divorce_postcode_1,'divorce_postcode_1')

In [59]:
divorce_postcode_1 = pydb.read_sql_query("SELECT * from __temp__.divorce_postcode_1 LIMIT 10")
divorce_postcode_1

Unnamed: 0,year,month,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode
0,2008,4,2,123 HIGGINSHAW ROAD,OLDHAM,,,,,N,OL1 3JY,OL13JY
1,2017,12,4,FLAT 4,172 LADBROKE GROVE,LONDON,,,,N,W10 5LZ,W105LZ
2,2005,3,1,27 PYRORD CLOSE,WATERLOOVILLE,HAMPSHIRE,,,,,PO7 6BT,PO76BT
3,2007,6,2,141 HEVER AVENUE,WEST KINGSDOWN,SEVENOAKS,KENT,,,,TN15 6DU,TN156DU
4,2019,1,1,,,,,,,Y,,
5,2019,1,1,SHAMROCK COTTAGES,4 WALTON ROAD,KIRBY LE SOKEN,ESSEX,,,N,CO13 0DU,CO130DU
6,2018,2,1,87 RAYNTON DRIVE,HAYES,MIDDLESEX,,,,N,UB4 8BG,UB48BG
7,2018,8,3,5 GRANGE AVENUE,TWICKENHAM,MIDDLESEX,,,,N,TW2 5TW,TW25TW
8,2018,12,4,,,,,,,Y,,
9,2018,12,4,FLAT 66 RAMSEY HOUSE,VASSAL ROAD,LONDON,,,,N,SW9 6NB,SW96NB


### Create the ons_postcode_data table

In [60]:
create_ons_postcode_data =f"""
SELECT REPLACE(t1.pcd , ' ', '') AS PCD, 
t1.oslaua,
t1.ctry

FROM __temp__.ons_postcode t1

WHERE t1.ctry IN ('E92000001','W92000004');
"""
pydb.create_temp_table(create_ons_postcode_data,'ons_postcode_data')

In [61]:
ons_postcode_data = pydb.read_sql_query("SELECT * from __temp__.ons_postcode_data LIMIT 10")
ons_postcode_data

Unnamed: 0,pcd,oslaua,ctry
0,AL13WF,E07000240,E92000001
1,AL13WG,E07000240,E92000001
2,AL13WH,E07000240,E92000001
3,AL13WJ,E07000240,E92000001
4,AL13WL,E07000240,E92000001
5,AL13WP,E07000240,E92000001
6,AL13WQ,E07000240,E92000001
7,AL13WB,E07000240,E92000001
8,AL13WD,E07000240,E92000001
9,AL13WE,E07000240,E92000001


### Create the divorce_postcode_ons_match table

In [62]:
create_divorce_postcode_ons_match =f"""
SELECT t1.Year, 
          t1.Quarter, 
          t1.LINE1, 
          t1.LINE2, 
          t1.LINE3, 
          t1.LINE4, 
          t1.LINE5,
          t1.LINE6,
          t1.CONFDNTL,
          t1.postcode,
          t1.newpostcode, 
          t2.PCD, 
          t2.oslaua
FROM __temp__.divorce_postcode_1 t1
LEFT JOIN __temp__.ons_postcode_data t2 
    ON (t1.newpostcode = t2.PCD);
"""
pydb.create_temp_table(create_divorce_postcode_ons_match,'divorce_postcode_ons_match')

In [63]:
divorce_postcode_ons_match = pydb.read_sql_query("SELECT * from __temp__.divorce_postcode_ons_match LIMIT 10")
divorce_postcode_ons_match

Unnamed: 0,year,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode,pcd,oslaua
0,2022,1,,,,,,,SHARE,NG18 5NJ,NG185NJ,NG185NJ,E07000174
1,2019,2,35 MAERDY PARK,PENCOED,BRIDGEND,,,,N,CF355HX,CF355HX,CF355HX,W06000013
2,2018,4,GWARCWM,MAESYMEILLION,NR LLANDYSUL,CEREDIGION,,,N,SA44 4NH,SA444NH,SA444NH,W06000008
3,2019,2,7 COLWINSTONE STREET,LLANDAFF NORTH,CARDIFF,,,,N,CF14 2LA,CF142LA,CF142LA,W06000015
4,2002,4,GARDEN COTTAGE,ALDERBROOK,CRANLEIGH,SURREY,,,,GU6 8QU,GU68QU,GU68QU,E07000216
5,2019,3,174 PENRHIWCEIBER ROAD,PENRHIWCEIBER,MOUNTAIN ASH,,,,N,CF44 3SH,CF443SH,,
6,2019,4,106 CAER'R GWERLAS,PORTH,RHONDDA,,,,N,CF39 8HU,CF398HU,CF398HU,W06000016
7,2019,4,2 BROGERDDAN,PENRHYNCOCH,ABERYSTWYTH,,,,N,SY23 3ED,SY233ED,SY233ED,W06000008
8,2003,2,14A ELMERS DRIVE,TEDDINGTON,MIDDLESEX,,,,,TW11 8PB,TW118PB,TW118PB,E09000027
9,2011,4,1 MAPLETON CLOSE,BROMLEY,KENT,,,,N,BR1 9DU,BR19DU,BR19DU,E09000006


### Create the divorce_postcode_la table

In [64]:
create_divorce_postcode_la =f"""
SELECT t1.Year, 
          t1.Quarter, 
          t1.LINE1, 
          t1.LINE2, 
          t1.LINE3, 
          t1.LINE4, 
          t1.LINE5,
          t1.LINE6,
          t1.CONFDNTL,
          t1.postcode, 
          t1.newpostcode, 
          t1.PCD, 
          t1.oslaua, 
          LTRIM(t2.county_ua) as county_ua,
          t2.country
FROM __temp__.divorce_postcode_ons_match t1
LEFT JOIN __temp__.divorce_county_ua_lookup t2 
ON (t1.oslaua = t2.code);

"""
pydb.create_temp_table(create_divorce_postcode_la,'divorce_postcode_la')

In [65]:
divorce_postcode_la = pydb.read_sql_query("SELECT * from __temp__.divorce_postcode_la LIMIT 10")
divorce_postcode_la

Unnamed: 0,year,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode,pcd,oslaua,county_ua,country
0,2018,3,FLAT 22,WOODVILLE COURT,19 STAFFORD CLOSE,LONDON,,,N,N14 4BF,N144BF,N144BF,E09000010,Outer London,England
1,2005,4,C/O 59 BARROW GROVE,SITTINGBOURNE,KENT,,,,,ME10 1JY,ME101JY,ME101JY,E07000113,Kent,England
2,2006,2,1 LOURDES MANOR CLOSE,SWAN LANE,SELLINDGE,KENT,,,,TN25 6BU,TN256BU,TN256BU,E07000112,Kent,England
3,2006,2,62 ALEXANDRA ROAD,SHEERNESS,KENT,,,,,ME12 2AT,ME122AT,ME122AT,E07000113,Kent,England
4,2006,3,THE RECTORY,DORMAN AVENUE NORTH,AYLESHAM,CANTERBURY,KENT,,,CT3 3BL,CT33BL,CT33BL,E07000108,Kent,England
5,2006,4,2 OAKWOOD TERRACE,PEEL DRIVE,SITTINGBOURNE,KENT,,,,ME10 3EA,ME103EA,ME103EA,E07000113,Kent,England
6,2006,4,9 WOOTTEN CLOSE,GILLINGHAM,KENT,,,,,,,,,,
7,2007,1,12 MARLBOROUGH WAY,KENNINGTON,ASHFORD,KENT,,,,,,,,,
8,2002,4,3 WOODSIDE STREET,STALYBRIDGE,CHESHIRE,,,,,,,,,,
9,2004,4,90 SALTINGS ROAD,SNODLAND,KENT,,,,,ME6 5HA,ME65HA,ME65HA,E07000115,Kent,England


In [43]:
divorce_postcode_la = pydb.read_sql_query("SELECT * from __temp__.divorce_postcode_la WHERE YEAR = 2021 AND COUNTY_UA = 'DerbyUA' AND confdntl = 'SHARE' ")
divorce_postcode_la

Unnamed: 0,year,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode,pcd,oslaua,county_ua,country
0,2021,3,,,,,,,SHARE,DE23 6TD,DE236TD,DE236TD,E06000015,DerbyUA,England
1,2021,4,,,,,,,SHARE,DE24 1AE,DE241AE,DE241AE,E06000015,DerbyUA,England
2,2021,4,,,,,,,SHARE,DE23 1QF,DE231QF,DE231QF,E06000015,DerbyUA,England
3,2021,4,,,,,,,SHARE,DE23 1JL,DE231JL,DE231JL,E06000015,DerbyUA,England
4,2021,3,,,,,,,SHARE,DE21 2RH,DE212RH,DE212RH,E06000015,DerbyUA,England
5,2021,3,,,,,,,SHARE,DE23 6QW,DE236QW,DE236QW,E06000015,DerbyUA,England
6,2021,2,,,,,,,SHARE,DE24 8NN,DE248NN,DE248NN,E06000015,DerbyUA,England
7,2021,4,,,,,,,SHARE,DE22 2JA,DE222JA,DE222JA,E06000015,DerbyUA,England
8,2021,4,,,,,,,SHARE,DE22 2JY,DE222JY,DE222JY,E06000015,DerbyUA,England
9,2021,3,,,,,,,SHARE,DE24 1AD,DE241AD,DE241AD,E06000015,DerbyUA,England


In [35]:
divorce_postcode_la = pydb.read_sql_query("SELECT * from __temp__.divorce_postcode_la where postcode = 'DE24 8XH'")
divorce_postcode_la

Unnamed: 0,year,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode,pcd,oslaua,county_ua,country
0,2009,2,125 CREWTON WAY,ALVASTON,DERBY,,,,,DE24 8XH,DE248XH,DE248XH,E06000015,DerbyUA,England
1,2009,4,127 CREWTON WAY,ALVASTON,DERBY,,,,,DE24 8XH,DE248XH,DE248XH,E06000015,DerbyUA,England
2,2017,1,121 CREWTON WAY,ALVASTON,DERBY,,,,N,DE24 8XH,DE248XH,DE248XH,E06000015,DerbyUA,England
3,2000,2,410 UTTOXETER NEW ROAD,DERBY,,,,,,DE24 8XH,DE248XH,DE248XH,E06000015,DerbyUA,England
4,2011,3,3 CREWTON WAY,ALVERSTON,DERBY,,,,N,DE24 8XH,DE248XH,DE248XH,E06000015,DerbyUA,England
5,2019,2,113 CREWTON WAY,ALVASTON,DERBY,,,,N,DE24 8XH,DE248XH,DE248XH,E06000015,DerbyUA,England


In [40]:
# Export the final csv
divorce_postcode_la.to_csv("s3://alpha-family-data/CSVs/Divorce/test3.csv", index = False)

### Create the divorce_la_c8 table

In [66]:
create_divorce_la_c8 =f"""
SELECT *,
CASE 
WHEN CONFDNTL = 'KEEP' THEN 'Confidentiality requested' 
WHEN CONFDNTL = 'Y' THEN 'Confidentiality requested' 
WHEN Line1 IS NULL AND Line2 IS NULL AND Line3 IS NULL AND Line4 IS NULL AND Line5 IS NULL AND LINE6 IS NULL AND Postcode IS NULL THEN 'Confidentiality requested'
WHEN Line1 = '-' AND Line2 ='-' AND Postcode IS NULL THEN 'Confidentiality requested'
WHEN Line1 = '.' And Line2 = '.' AND Line3 IS NULL AND Postcode IS NULL THEN 'Confidentiality requested'
WHEN Line1 = 'XX' OR Line1 = 'XXX' OR Line1 = 'XXXX' OR Line1 = 'XXXXX' or Line1 = 'XXXXXX' OR Line1 = 'XXXXXXX' or Line1 = 'XXXXXXXX' then 'Confidentiality requested'
WHEN strpos(Line1,'PRIVATE') <> 0 then 'Confidentiality requested'
WHEN strpos(Line1,'WITHHELD') <> 0 then 'Confidentiality requested'
WHEN strpos(Line1,'CONFIDENT') <> 0 then 'Confidentiality requested'
WHEN strpos(Line2,'CONFIDENT') <> 0 then 'Confidentiality requested'
WHEN strpos(Line1,'C8') <> 0 then 'Confidentiality requested'
WHEN strpos(Line2,'C8') <> 0 then 'Confidentiality requested'

WHEN Newpostcode IS NOT NULL AND PCD IS NULL then 'Postcode invalid/not given or foreign'
WHEN Newpostcode IS NOT NULL AND PCD IS NOT NULL AND county_ua IS NULL then 'Postcode invalid/not given or foreign'
WHEN county_ua IS NULL THEN 'Postcode invalid/not given or foreign'

ELSE county_ua 
END county_ua2
FROM __temp__.divorce_postcode_la;

"""
pydb.create_temp_table(create_divorce_la_c8,'divorce_la_c8')

In [67]:
divorce_la_c8 = pydb.read_sql_query("SELECT * from __temp__.divorce_la_c8 LIMIT 10")
divorce_la_c8

Unnamed: 0,year,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode,pcd,oslaua,county_ua,country,county_ua2
0,2011,2,CHORLTON,MANCHESTER,,,,,N,M21 9JD,M219JD,M219JD,E08000003,Greater Manchester(Met County),England,Greater Manchester(Met County)
1,2000,4,24 DOVE ROAD,ORRELL PARK,LIVERPOOL,,,,,L9 8AT,L98AT,L98AT,E08000012,Merseyside(Met County),England,Merseyside(Met County)
2,2000,3,3 MUSARD PLACE,MIDDLECROFT,CHESTERFIELD,,,,,S43 3XQ,S433XQ,S433XQ,E07000034,Derbyshire,England,Derbyshire
3,2000,4,14 NORMAN ROAD,BROADSTAIRS,KENT,,,,,CT10 3BZ,CT103BZ,CT103BZ,E07000114,Kent,England,Kent
4,2001,2,"4, ITHON GROVE,","KINGS NORTON,",BIRMINGHAM,,,,,B38 9LG,B389LG,B389LG,E08000025,West Midlands(Met County),England,West Midlands(Met County)
5,2016,2,15 MAIN STREET,GRASSINGTON,NR. SKIPTON,,,,N,BD23 5AD,BD235AD,BD235AD,E07000163,North Yorkshire,England,North Yorkshire
6,1998,3,10 MOORE STREET,NORMANTON,DERBY,,,,N,,,,,,,Postcode invalid/not given or foreign
7,2004,4,50 CHURCH STREET SOUTH,BIRDHOLME,CHESTERFIELD,,,,,,,,,,,Postcode invalid/not given or foreign
8,2000,1,C/O 136 DERBY ROAD,CHESTERFIELD,DERBYSHIRE,,,,,,,,,,,Postcode invalid/not given or foreign
9,2002,4,19 DOCHDWY ROAD,LLANDOUGH,VALE OF GLAMORGAN,,,,,,,,,,,Postcode invalid/not given or foreign


In [68]:
# Export the final csv
#divorce_la_c8.to_csv("s3://alpha-family-data/CSVs/Divorce/test2.csv", index = False)

#### Check Confidentiality Filter

In [69]:
#check = pydb.read_sql_query("SELECT DISTINCT * FROM __temp__.DIVORCE_LA_C8 WHERE CONFDNTL = 'Y' or CONFDNTL = 'KEEP';")
#check

### Create the divorce_county table

In [70]:
create_divorce_county =f"""
SELECT year,
quarter,
line1,
line2,
line3,
line4,
line5,
line6,
confdntl,
postcode, 
newpostcode,
pcd,
oslaua,

CASE 
WHEN county_ua2 = 'Isles of ScillyUA' then 'Cornwall & Isles of Scilly'
WHEN county_ua2 = 'CornwallUA' then 'Cornwall & Isles of Scilly'

ELSE county_ua2 
END county_ua,

CASE 
WHEN county_ua2 = 'Confidentiality requested' then 'Confidentiality requested'
WHEN county_ua2 = 'Postcode invalid/not given or foreign' then 'Postcode invalid/not given or foreign'
ELSE country
END country

FROM __temp__.divorce_la_c8;

"""
pydb.create_temp_table(create_divorce_county,'divorce_county')

In [71]:
divorce_county = pydb.read_sql_query("SELECT * from __temp__.divorce_county LIMIT 10")
divorce_county

Unnamed: 0,year,quarter,line1,line2,line3,line4,line5,line6,confdntl,postcode,newpostcode,pcd,oslaua,county_ua,country
0,1999,3,5 WHITSTABLE ROAD,WYMERING,COSHAM,PORTSMOUTH,HANTS,,N,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
1,2003,3,319 LIVERPOOL ROAD,BIRKDALE,SOUTHPORT,,,,,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
2,2021,4,,,,,,,,,,,,Confidentiality requested,Confidentiality requested
3,2001,1,THE VILLA,GLEBE ROAD,RAMSDEN BELLHOUSE,BILLERICAY,ESSEX,,N,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
4,2001,1,89B CHALKWELL ESPLANADE,WESTCLIFF ON SEA,ESSEX,,,,,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
5,2001,1,602 RAYLEIGH ROAD,EASTWOOD,LEIGH ON SEA,ESSEX,,,,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
6,2000,2,181 FAIRFAX DRIVE,WESTCLIFF ON SEA,ESSEX,,,,,SS0 9BQ,SS09BQ,SS09BQ,E06000033,Southend-on-SeaUA,England
7,2000,3,1 CHESTNUT COURT,HIGH ROAD,VANGE,BASILDON,ESSEX,,,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
8,2000,4,147 CHESTNUT ROAD,VANGE,BASILDON,ESSEX,,,N,,,,,Postcode invalid/not given or foreign,Postcode invalid/not given or foreign
9,2000,4,131 BUTNEYS,BASILDON,ESSEX,,,,N,SS14 2DR,SS142DR,SS142DR,E07000066,Essex,England


### Create the petitioner_summary_la table

In [72]:
create_petitioner_summary_la =f"""
SELECT DISTINCT 'Petitioner' as Type,
t1.year,
t1.country,
t1.county_ua,
(COUNT(t1.county_ua)) AS COUNT_of_County

FROM __temp__.divorce_county t1

WHERE year > 2010
AND year < {current_year}

GROUP BY t1.year,
t1.county_ua,
t1.country;

"""
pydb.create_temp_table(create_petitioner_summary_la,'petitioner_summary_la')

In [73]:
petitioner_summary_la = pydb.read_sql_query("SELECT * from __temp__.petitioner_summary_la")
petitioner_summary_la

Unnamed: 0,type,year,country,county_ua,count_of_county
0,Petitioner,2015,England,DorsetUA,787
1,Petitioner,2015,Wales,Monmouthshire,154
2,Petitioner,2012,England,WokinghamUA,321
3,Petitioner,2021,England,SloughUA,116
4,Petitioner,2015,England,Bath and North East SomersetUA,300
...,...,...,...,...,...
1363,Petitioner,2013,Wales,Bridgend,282
1364,Petitioner,2015,England,Cambridgeshire,1243
1365,Petitioner,2015,England,MedwayUA,541
1366,Petitioner,2014,England,Lancashire,2102


In [74]:
# Check that counts of county 
petitioner_summary_la[['count_of_county']].sum()

count_of_county    1405374
dtype: int64

In [75]:
final_output = pydb.read_sql_query("""
SELECT *
from __temp__.petitioner_summary_la
ORDER BY year,
country,
county_ua
""")

In [76]:
# Export the final csv
final_output.to_csv("s3://alpha-family-data/CSVs/Divorce/CSV Matrimonial Matters County & UA Annual 2022.csv", index = False)