# Setup

In [1]:
from piper import piper 
from piper.verbs import *
from piper.pandas import *
from pathlib import Path

piper version 0.0.7, last run: Saturday, 13 February 2021 15:02:06


# Helper functions

## clean_postcodes

In [2]:
def clean_postcodes(filename):
    
    df = pd.read_csv(filename, header=None, usecols=[0], names=['post_code'])
    
    df[['p1', 'p2']] = df.post_code.str.extract(r'(.*)(\d\w{,2})$', expand=True)
    df['post_code'] = df.p1 + ' ' + df.p2
    df['postcode'] = df.p1.str.extract('([a-zA-Z]+)')
    
    df.drop(columns=['p1', 'p2'], inplace=True)
    
    return df

# Postcodes

## Consolidate raw multiple CSV files

In [3]:
directory = Path('inputs/fake_data/supporting workfiles/')
list_of_csv_files = list_files(directory, filter='*.csv', as_posix=True)

dataframes = [clean_postcodes(f) for f in list_of_csv_files]

df = pd.concat(dataframes).dropna()

In [4]:
head(df)

2634076 rows, 2 columns


Unnamed: 0,post_code,postcode
1,AB1 0AA,AB
2,AB1 0AB,AB
3,AB1 0AD,AB
4,AB1 0AE,AB


## Read top level postcodes

In [5]:
directory = Path('inputs/fake_data/')

In [6]:
filename = directory / 'UK_Toplevel_postcode_areas.csv'
subset_cols = ['postcode', 'area_covered', 'center_latitude', 'center_longitude']

%piper areas <- read_csv(filename, sep='\t', info=False) >> select(subset_cols)

inputs/fake_data/UK_Toplevel_postcode_areas.csv
read_csv -> 121 rows, 9 columns


In [7]:
head(areas)

121 rows, 4 columns


Unnamed: 0,postcode,area_covered,center_latitude,center_longitude
0,AB,Aberdeen,57.301,-2.3079
1,AL,St Albans,51.7755,-0.283982
2,B,Birmingham,52.4652,-1.88885
3,BA,Bath,51.2295,-2.41734


## Merge with detail and write TSV

In [8]:
%%piper 

df 
>> inner_join(areas, on='postcode')
>> order_by(['post_code', 'center_latitude', 'center_longitude'])
>> to_csv(directory / 'UK postcodes.tsv', sep='\t')

## Check:: Count postcodes by area_covered

In [9]:
%%piper
read_csv(directory / 'UK postcodes.tsv', sep='\t')
>> count('area_covered')
>> adorn()

inputs/fake_data/UK postcodes.tsv
read_csv -> 2631621 rows, 5 columns


Unnamed: 0,area_covered,n
0,Northern Ireland,61876
1,Birmingham,61274
2,Manchester,58692
3,Sheffield,49158
4,Glasgow,46759
...,...,...
116,Llandrindod Wells,2650
117,Kirkwall,2104
118,Outer Hebrides,1093
119,Lerwick,749
