From ad2132ba11682c1ac8f0db1d024aba65a77c7443 Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Fri, 5 Jun 2020 21:00:26 +0100 Subject: [PATCH] fix regression found on postcode --- splink_data_normalisation/postcode.py | 6 +++--- tests/test_postcode.py | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/splink_data_normalisation/postcode.py b/splink_data_normalisation/postcode.py index 804de71..cdf7e44 100644 --- a/splink_data_normalisation/postcode.py +++ b/splink_data_normalisation/postcode.py @@ -17,10 +17,10 @@ def postcode_to_inward_outward(df: DataFrame, pc_field: str, drop_orig:bool = Tr # If the postcode is long enough, parse out inner outer # If it's too short, assume we only have the outer part - + sql = """ case - when length(pc_nospace_temp__) >= 6 then left(pc_nospace_temp__, length(pc_nospace_temp__) - 3) + when length(pc_nospace_temp__) >= 5 then left(pc_nospace_temp__, length(pc_nospace_temp__) - 3) else left(pc_nospace_temp__, 4) end """ @@ -32,7 +32,7 @@ def postcode_to_inward_outward(df: DataFrame, pc_field: str, drop_orig:bool = Tr sql = """ case - when length(pc_nospace_temp__) >= 6 then right(pc_nospace_temp__, 3) + when length(pc_nospace_temp__) >= 5 then right(pc_nospace_temp__, 3) else null end """ diff --git a/tests/test_postcode.py b/tests/test_postcode.py index a52c4f8..d14fe06 100644 --- a/tests/test_postcode.py +++ b/tests/test_postcode.py @@ -18,6 +18,10 @@ def test_pc_1(spark): {"id": 8, "postcode": "AB12C"}, {"id": 9, "postcode": "AB12"}, {"id": 10, "postcode": "AB1"}, + {"id": 11, "postcode": "B8 3QF"}, + {"id": 12, "postcode": "B83QF"}, + + ] df = spark.createDataFrame(Row(**x) for x in postcode_list) @@ -33,9 +37,11 @@ def test_pc_1(spark): {"id": 5, "outward_postcode_norm": "AB1C", "inward_postcode_norm": "2DE"}, {"id": 6, "outward_postcode_norm": "AB1C", "inward_postcode_norm": "2DE"}, {"id": 7, "outward_postcode_norm": "AB1", "inward_postcode_norm": "2CD"}, - {"id": 8, "outward_postcode_norm": "AB12", "inward_postcode_norm": None}, + {"id": 8, "outward_postcode_norm": "AB", "inward_postcode_norm": "12C"}, {"id": 9, "outward_postcode_norm": "AB12", "inward_postcode_norm": None}, {"id": 10, "outward_postcode_norm": "AB1", "inward_postcode_norm": None}, + {"id": 11, "outward_postcode_norm": "B8", "inward_postcode_norm": "3QF"}, + {"id": 12, "outward_postcode_norm": "B8", "inward_postcode_norm": "3QF"}, ]