In [None]:
"""
File: read_ghcn.py
Author: ned haughton
Description: Code for reading GHCN dly file data
"""

import pandas as pd
import numpy as np


# Metadata specs #

metadata_col_specs = [
    (0,  12),
    (12, 21),
    (21, 31),
    (31, 38),
    (38, 41),
    (41, 72),
    (72, 76),
    (76, 80),
    (80, 86)
]

metadata_names = [
    "ID",
    "LATITUDE",
    "LONGITUDE",
    "ELEVATION",
    "STATE",
    "NAME",
    "GSN FLAG",
    "HCN/CRN FLAG",
    "WMO ID"]

metadata_dtype = {
    "ID": str,
    "STATE": str,
    "NAME": str,
    "GSN FLAG": str,
    "HCN/CRN FLAG": str,
    "WMO ID": str
    }


def read_station_metadata(filename="../preprocessing_data/ghcnd-stations.txt"):
    """Reads in station metadata

    :filename: ghcnd station metadata file.
    :returns: station metadata as a pandas Dataframe

    """
    df = pd.read_fwf(filename, metadata_col_specs, names=metadata_names,
                     index_col='ID', dtype=metadata_dtype)

    return df


In [None]:
df: pd.DataFrame = read_station_metadata()
coord = pd.Series(list(zip(df["LATITUDE"], df["LONGITUDE"])))
coord.index = df.index

df["coord"] = coord
df.to_parquet("../preprocessing_data/stations.parquet")