# Setup

In [1]:
from piper.defaults import *
from pathlib import Path

piper v0.1.0: Monday, 29 March 2021 19:35:26


# Helper functions

## clean_postcodes

In [2]:
def clean_postcodes(filename):
    
    df = pd.read_csv(filename, header=None, usecols=[0], names=['post_code'])
    
    df[['p1', 'p2']] = df.post_code.str.extract(r'(.*)(\d\w{,2})$', expand=True)
    df['post_code'] = df.p1 + ' ' + df.p2
    df['postcode'] = df.p1.str.extract('([a-zA-Z]+)')
    
    df.drop(columns=['p1', 'p2'], inplace=True)
    
    return df

# Postcodes

## Consolidate raw multiple CSV files

In [3]:
directory = Path('inputs/fake_data/supporting workfiles/CSV')
list_of_csv_files = list_files(directory, glob_pattern='*.csv', as_posix=True)

dataframes = [clean_postcodes(f) for f in list_of_csv_files]

df = pd.concat(dataframes).dropna()

In [4]:
head(df)

1708495 rows, 2 columns


Unnamed: 0,post_code,postcode
0,WR1 1AA,WR
1,WR1 1AD,WR
2,WR1 1AE,WR
3,WR1 1AF,WR


## Read top level postcodes

In [5]:
directory = Path('inputs/fake_data/')

In [6]:
filename = directory / 'UK postcodes.tsv'
subset_cols = ['postcode', 'area_covered', 'center_latitude', 'center_longitude']

%piper areas <- read_csv(filename, sep='\t', info=False) >> select(subset_cols)

inputs/fake_data/UK postcodes.tsv


In [7]:
head(areas)

2631621 rows, 4 columns


Unnamed: 0,postcode,area_covered,center_latitude,center_longitude
0,AB,Aberdeen,57.301,-2.3079
1,AB,Aberdeen,57.301,-2.3079
2,AB,Aberdeen,57.301,-2.3079
3,AB,Aberdeen,57.301,-2.3079


## Merge with detail and write TSV

In [8]:
%%piper 

df 
>> str_trim()
# >> inner_join(areas, on='postcode')
# >> order_by(['post_code', 'center_latitude', 'center_longitude'])
# >> to_csv(directory / 'UK postcodes.tsv', sep='\t')

Unnamed: 0,post_code,postcode
0,WR1 1AA,WR
1,WR1 1AD,WR
2,WR1 1AE,WR
3,WR1 1AF,WR
4,WR1 1AG,WR
...,...,...
18558,W9 4HJ,W
18559,W9 4HQ,W
18560,W9 4HS,W
18561,W9 4HW,W


## Check:: Count postcodes by area_covered

In [9]:
%%piper
read_csv(directory / 'UK postcodes.tsv', sep='\t')
>> count('area_covered')
>> adorn()

inputs/fake_data/UK postcodes.tsv




Unnamed: 0,n,%,cum %
Northern Ireland,61876,2.35,2.35
Birmingham,61274,2.33,4.68
Manchester,58692,2.23,6.91
Sheffield,49158,1.87,8.78
Glasgow,46759,1.78,10.55
...,...,...,...
Llandrindod Wells,2650,0.10,99.85
Kirkwall,2104,0.08,99.93
Outer Hebrides,1093,0.04,99.97
Lerwick,749,0.03,100.00
