In [None]:
import sys
sys.dont_write_bytecode = True

import os
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas

import h3
import matplotlib.pyplot as plt


# Local imports.
from resale_flat_prices.resale_flat_data.resale_flat_data import ResaleFlatData
from resale_flat_prices.h3_utils.h3_statistics import monthly_median_price
from resale_flat_prices.h3_utils.h3_vis_utils import plot_df


# Data directories.
csv_data_dir = Path("../data/ResaleFlatPrices/")
processed_data_dir = Path("../data/processed_data/")

In [None]:
# Load processed and geocoded data.
csv_file = "resale-flat-prices.csv.zip"

disp_columns = [
    "year_month", "town", "block", "street_name", "flat_type", "storey_range", "floor_area_sqm", "age",
    "resale_price", "price_per_sqm", "price_per_sqft"
]

resale_flat_data = ResaleFlatData(processed_data_dir / csv_file)
resale_flat_data.read_csv()
resale_flat_data.df = resale_flat_data.df.sort_values(["year_month", "town"])

resale_flat_data.df["quarter"] = resale_flat_data.df["month"].apply(lambda x: int(np.ceil(x / 3)))
resale_flat_data.df["year_quarter"] = resale_flat_data.df.apply(lambda DF: str(DF["year"]) + "-" + str(DF["quarter"]), axis = 1)

#display(resale_flat_data.df[disp_columns].tail())
print(resale_flat_data.df.shape)

In [None]:
# H3 cell creation using a specified resolution.
RESOLUTION = 9

resale_flat_data.make_h3_geometries(resolution = RESOLUTION)

disp_columns = [
    "year_month", "town", "block", "street_name", "flat_type", "storey_range", "floor_area_sqm", "age",
    "resale_price", "price_per_sqft", "h3"
]

df = resale_flat_data.df.copy()
unique_cells = df[["h3", "geometry"]].drop_duplicates()

#display(df[["year_month", "town", "block", "street_name", "flat_type", "storey_range", "price_per_sqft", "h3"]].tail())
print(df.shape)

In [None]:
# Calculate the median prices.
datetime_column = "year_quarter"
wanted_datetimes = ["2024-4", "2024-3"]
price_column = "price_per_sqft"

cell_monthly_median_prices_df = monthly_median_price(df, datetime_column, price_column, "h3")
cell_monthly_median_prices_df = geopandas.GeoDataFrame(cell_monthly_median_prices_df)

median_prices_df = cell_monthly_median_prices_df[cell_monthly_median_prices_df[datetime_column].isin(wanted_datetimes)]
median_prices_df = median_prices_df.merge(right = unique_cells, left_on = "h3", right_on = "h3", how = "left")
median_prices_df = median_prices_df[["h3", "geometry", price_column]].groupby(["h3", "geometry"]).aggregate("mean").reset_index()
median_prices_df = geopandas.GeoDataFrame(median_prices_df, crs = df.crs)

median_prices_df = median_prices_df.sort_values([price_column])

display(median_prices_df.tail())

In [None]:
plot_df(
    median_prices_df, 
    plot_kwds = {
        "figsize": [12, 12],
        "xlim": [103.60152080468028, 104.0470051248534],
        "ylim": [1.2359029533199608, 1.4733321131970046],
        "alpha": 0.5,
        "categorical": False,
        "column": "price_per_sqft",
        "legend": True,
        "legend_kwds": {"label": "price_per_sqft"},
        "cmap": 'viridis',
        "edgecolor": None,
        "divider_kwds": {"position": "right", "size": "5%", "pad": 0.1},
    },
)