In [15]:
from __future__ import annotations
from typing import get_type_hints
from pathlib import Path


def test(t: Path):
    ...

{'t': pathlib.Path}

In [18]:
from pathlib import Path
from shapely import Point
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

top_level = Path.cwd()

while (top_level / "pyproject.toml").exists() is False:
    top_level = top_level.parent

p = top_level / "data" / "raw" / "external" / "DZ2021.parquet"

gdf = gpd.read_parquet(p)

In [24]:
# get into national grid format

gdf = gdf.to_crs(epsg=27700)

if gdf is None:
    raise ValueError(f"Error with CRS")

# get the centroid of each polygon
gdf["centroid"] = gdf.geometry.centroid

# get the eastings and northings of the centroid
gdf["Eastings"] = gdf.centroid.x
gdf["Northings"] = gdf.centroid.y

df = gdf[["DZ2021_cd", "Eastings", "Northings"]]  # type: ignore
df = df.rename(columns={"DZ2021_cd": "Postcode"})  # type: ignore

header = [
    "Postcode",
    "Positional_quality_indicator",
    "Eastings",
    "Northings",
    "Country_code",
    "NHS_regional_HA_code",
    "NHS_HA_code",
    "Admin_county_code",
    "Admin_district_code",
    "Admin_ward_code",
]

df["Positional_quality_indicator"] = 10
# add blank columns for all the header columns not currently populated and reorder

for col in header:
    if col not in df.columns:
        df[col] = ""

df = df[header]

# convert easting and northings to integers

df["Eastings"] = df["Eastings"].astype(int)
df["Northings"] = df["Northings"].astype(int)

df.head()

codepo_folder = top_level / "data" / "raw" / "external" / "codepo" / "csv"

df.to_csv(codepo_folder / "fake_ni.csv", index=False, header=False)