In [1]:
from pathlib import Path

import polars as pl

In [6]:
path_pp = Path.cwd() / "data" / "csv" / "price_paid" / "pp_202304.csv"
pp_schema = pl.Schema(
    {
        "tuid": pl.String(),
        "price": pl.Int64(),
        "transfer_date": pl.Datetime(),
        "postcode": pl.String(),
        # Presumably Terraced, Flat, O-something?, Semi-detached, Detached
        "type": pl.Enum(categories=["T", "F", "O", "S", "D"]),
        "is_new_build": pl.Enum(categories=["Y", "N"]),
        # Presumably Freehold, Leasehold, Unclassified
        "duration": pl.Enum(categories=["F", "L", "U"]),
        "street_number": pl.String(),
        "flat_number": pl.String(),
        "street": pl.String(),
        "locality": pl.String(),
        "town": pl.String(),
        "district": pl.String(),
        "county": pl.String(),
        "category_type": pl.String(),
        "record_status": pl.String()
    }
)
pp = pl.scan_csv(path_pp, schema=pp_schema)
pp.collect().write_parquet(Path.cwd() / "data" / "parquet" / "price_paid.parquet")

In [23]:
path_postcodes = Path.cwd() / "data" / "csv" / "ukpostcodes" / "open_postcode_geo.csv"
schema_postcodes = pl.Schema({
    "postcode": pl.String(),
    "live_or_terminated": pl.Categorical(),
    "size": pl.Categorical(),
    "easting": pl.Int64(),
    "northing": pl.Int64(),
    "positional_quality": pl.Int8(),
    "country": pl.Categorical(),
    "latitude": pl.Float64(),
    "longitude": pl.Float64(),
    "postcode_no_spaces": pl.String(),
    "postcode_unit": pl.String(),
    "postcode_2_spaces": pl.String(),
    "postcode_area": pl.String(),
    "postcode_district": pl.String(),
    "postcode_sector": pl.String(),
    "postcode_first_half": pl.String(),
    "postcode_second_half": pl.String(),
})
postcodes = pl.scan_csv(path_postcodes, schema=schema_postcodes, ignore_errors=True)
postcodes.tail(100).collect()


postcode,live_or_terminated,size,easting,northing,tbd,country,latitude,longitude,postcode_no_spaces,postcode_unit,postcode_2_spaces,postcode_area,postcode_district,postcode_sector,postcode_first_half,postcode_second_half
str,cat,cat,i64,i64,i8,cat,f64,f64,str,str,str,str,str,str,str,str
"""ZE2 9SR""","""live""","""small""",435967,1168443,1,"""Scotland""",60.398298,-1.349144,"""ZE29SR""","""ZE2 9SR""","""ZE2 9SR""","""ZE""","""ZE2""","""ZE2 9""","""ZE2""","""9SR"""
"""ZE2 9SS""","""live""","""small""",436001,1168415,1,"""Scotland""",60.398043,-1.348532,"""ZE29SS""","""ZE2 9SS""","""ZE2 9SS""","""ZE""","""ZE2""","""ZE2 9""","""ZE2""","""9SS"""
"""ZE2 9ST""","""live""","""small""",435909,1168431,1,"""Scotland""",60.398195,-1.350198,"""ZE29ST""","""ZE2 9ST""","""ZE2 9ST""","""ZE""","""ZE2""","""ZE2 9""","""ZE2""","""9ST"""
"""ZE2 9SU""","""live""","""small""",435891,1168377,1,"""Scotland""",60.397712,-1.350535,"""ZE29SU""","""ZE2 9SU""","""ZE2 9SU""","""ZE""","""ZE2""","""ZE2 9""","""ZE2""","""9SU"""
"""ZE2 9SW""","""live""","""small""",435913,1168617,1,"""Scotland""",60.399865,-1.350093,"""ZE29SW""","""ZE2 9SW""","""ZE2 9SW""","""ZE""","""ZE2""","""ZE2 9""","""ZE2""","""9SW"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""ZE3 9JW""","""live""","""small""",438975,1110038,1,"""Scotland""",59.873651,-1.305697,"""ZE39JW""","""ZE3 9JW""","""ZE3 9JW""","""ZE""","""ZE3""","""ZE3 9""","""ZE3""","""9JW"""
"""ZE3 9JX""","""live""","""small""",438872,1110219,1,"""Scotland""",59.875286,-1.307502,"""ZE39JX""","""ZE3 9JX""","""ZE3 9JX""","""ZE""","""ZE3""","""ZE3 9""","""ZE3""","""9JX"""
"""ZE3 9JY""","""live""","""small""",438498,1112029,1,"""Scotland""",59.891572,-1.313847,"""ZE39JY""","""ZE3 9JY""","""ZE3 9JY""","""ZE""","""ZE3""","""ZE3 9""","""ZE3""","""9JY"""
"""ZE3 9JZ""","""live""","""small""",438662,1112122,1,"""Scotland""",59.892392,-1.310899,"""ZE39JZ""","""ZE3 9JZ""","""ZE3 9JZ""","""ZE""","""ZE3""","""ZE3 9""","""ZE3""","""9JZ"""


In [24]:
postcodes.select("size").unique().collect()

size
cat
"""small"""
"""large"""


In [11]:
(
    pp.filter(
        (pl.col("street").str.contains("CONEY GREEN DRIVE"))
        & (pl.col("street_number") == "1")
    )
    .sort(by="street_number")
    .sort(by="transfer_date")
    .collect()
 )

tuid,price,transfer_date,postcode,type,age,duration,street_number,flat_number,street,locality,town,district,county,category_type,record_status
str,i64,datetime[μs],str,str,str,str,str,str,str,str,str,str,str,str,str


In [26]:
(
    pp
    .join(
        postcodes,
        on="postcode",
        how="left",
    )
    .filter(
        pl.col("postcode_sector") == "B31 4"
    )
    .collect()
)

tuid,price,transfer_date,postcode,type,age,duration,street_number,flat_number,street,locality,town,district,county,category_type,record_status,live_or_terminated,size,easting,northing,tbd,country,latitude,longitude,postcode_no_spaces,postcode_unit,postcode_2_spaces,postcode_area,postcode_district,postcode_sector,postcode_first_half,postcode_second_half
str,i64,datetime[μs],str,str,str,str,str,str,str,str,str,str,str,str,str,cat,cat,i64,i64,i8,cat,f64,f64,str,str,str,str,str,str,str,str
"""{C0D5C5BB-7771-4E1C-9170-83B9E…",45000,1995-05-26 00:00:00,"""B31 4TH""","""S""","""N""","""L""","""99""","""""","""NUTHURST ROAD""","""BIRMINGHAM""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",402084,276999,1,"""England""",52.391011,-1.970802,"""B314TH""","""B31 4TH""","""B31 4TH""","""B""","""B31""","""B31 4""","""B31""","""4TH"""
"""{C4CA46FD-C8D3-49AA-B607-8ACD2…",42950,1995-07-14 00:00:00,"""B31 4RY""","""S""","""N""","""F""","""1""","""""","""DITTON GROVE""","""BIRMINGHAM""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",401733,276852,1,"""England""",52.389691,-1.97596,"""B314RY""","""B31 4RY""","""B31 4RY""","""B""","""B31""","""B31 4""","""B31""","""4RY"""
"""{E2C98286-0601-4ECD-AE6B-921BE…",36000,1995-08-25 00:00:00,"""B31 4RY""","""S""","""N""","""L""","""2""","""""","""DITTON GROVE""","""BIRMINGHAM""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",401733,276852,1,"""England""",52.389691,-1.97596,"""B314RY""","""B31 4RY""","""B31 4RY""","""B""","""B31""","""B31 4""","""B31""","""4RY"""
"""{6D0CA1F2-6186-4465-8F10-FA58E…",47500,1995-06-06 00:00:00,"""B31 4RU""","""T""","""N""","""F""","""125""","""""","""KINGSWOOD ROAD""","""NORTHFIELD""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",401751,276981,1,"""England""",52.390851,-1.975695,"""B314RU""","""B31 4RU""","""B31 4RU""","""B""","""B31""","""B31 4""","""B31""","""4RU"""
"""{0979AEE7-7016-4570-92B3-F6E52…",40000,1995-07-31 00:00:00,"""B31 4PB""","""T""","""N""","""F""","""45""","""""","""CROWHURST ROAD""","""BIRMINGHAM""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",401409,277073,1,"""England""",52.391679,-1.98072,"""B314PB""","""B31 4PB""","""B31 4PB""","""B""","""B31""","""B31 4""","""B31""","""4PB"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""{F87E72F9-9BED-176C-E053-6B04A…",185000,2023-03-01 00:00:00,"""B31 4EL""","""T""","""N""","""F""","""145""","""""","""CONEY GREEN DRIVE""","""""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",401598,277745,1,"""England""",52.39772,-1.97794,"""B314EL""","""B31 4EL""","""B31 4EL""","""B""","""B31""","""B31 4""","""B31""","""4EL"""
"""{F87E72F9-9493-176C-E053-6B04A…",90000,2023-02-10 00:00:00,"""B31 4SX""","""F""","""N""","""L""","""94""","""""","""NUTHURST ROAD""","""WEST HEATH""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",402039,276915,1,"""England""",52.390256,-1.971464,"""B314SX""","""B31 4SX""","""B31 4SX""","""B""","""B31""","""B31 4""","""B31""","""4SX"""
"""{F87E72F9-9951-176C-E053-6B04A…",350000,2023-03-03 00:00:00,"""B31 4RE""","""S""","""N""","""F""","""235""","""""","""LONGBRIDGE LANE""","""WEST HEATH""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",401915,277309,1,"""England""",52.393799,-1.973284,"""B314RE""","""B31 4RE""","""B31 4RE""","""B""","""B31""","""B31 4""","""B31""","""4RE"""
"""{F87E72F9-A000-176C-E053-6B04A…",141000,2023-03-01 00:00:00,"""B31 4QF""","""T""","""N""","""L""","""76""","""""","""GROVELEY LANE""","""""","""BIRMINGHAM""","""BIRMINGHAM""","""WEST MIDLANDS""","""A""","""A""","""live""","""small""",402044,277030,1,"""England""",52.39129,-1.971389,"""B314QF""","""B31 4QF""","""B31 4QF""","""B""","""B31""","""B31 4""","""B31""","""4QF"""


In [29]:
pp.collect().write_parquet(Path.cwd() / "data" / "parquet" / "price_paid.parquet")

In [31]:
postcodes.collect().write_parquet(Path.cwd() / "data" / "parquet" / "postcodes.parquet")

In [2]:
pp = pl.scan_parquet(Path.cwd() / "data" / "parquet" / "price_paid.parquet")

In [3]:
pp.select("age").unique().collect()

age
str
"""N"""
"""Y"""


In [5]:
pp.select("type").unique().collect()

type
str
"""T"""
"""F"""
"""O"""
"""S"""
"""D"""
