In [None]:
import sys
sys.dont_write_bytecode = True

import os
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas

import h3
import matplotlib.pyplot as plt
import contextily as cx

# Local imports.
from resale_flat_prices.resale_flat_data.resale_flat_data import ResaleFlatData
from resale_flat_prices.h3_utils.h3_statistics import grid_ring_monthly_median_price
from resale_flat_prices.h3_utils.h3_vis_utils import plot_df


# Data directories.
csv_data_dir = Path("../data/ResaleFlatPrices/")
processed_data_dir = Path("../data/processed_data/")

In [None]:
csv_file = "resale-flat-prices.csv.zip"

resale_flat_data = ResaleFlatData(processed_data_dir / csv_file)
resale_flat_data.read_csv()

display(resale_flat_data.df[["year_month", "town", "block", "street_name", "flat_type", "storey_range", "price_per_sqft"]].head())
print(resale_flat_data.df.shape)

In [None]:
# H3 cell creation.
RESOLUTION = 9

resale_flat_data.make_h3_geometries(resolution = RESOLUTION)

display(resale_flat_data.df[["year_month", "town", "block", "street_name", "flat_type", "storey_range", "price_per_sqft", "h3"]].head())
print(resale_flat_data.df.shape)

In [None]:
df = resale_flat_data.df.copy()

median_price = df[["year_month", "price_per_sqft", "h3"]].groupby(["year_month", "h3"]).median().reset_index()

median_price

In [None]:
df = resale_flat_data.df.copy()

unique_cells = df[["h3", "geometry"]].drop_duplicates()

display(df[["year_month", "town", "block", "street_name", "flat_type", "storey_range", "price_per_sqft"]].head())
print(df.shape)

In [None]:
year_months = ["2024-11", "2024-10", "2024-09", "2024-08", "2024-07", "2024-06", "2024-05"]
price_column = "price_per_sqft"

median_prices_df = grid_ring_monthly_median_price(
    df,
    date_column = "year_month", 
    price_column = price_column,
    grid_ring_distance = 1,
    h3_column_name = "h3",
)
median_prices_df = geopandas.GeoDataFrame(median_prices_df)

median_prices_df_want = median_prices_df[median_prices_df["year_month"].isin(year_months)]
median_prices_df_want = median_prices_df_want.merge(right = unique_cells, left_on = "h3", right_on = "h3", how = "left")
median_prices_df_want = median_prices_df_want.groupby(["year_month", "h3", "geometry"]).aggregate("mean").reset_index()
median_prices_df_want = geopandas.GeoDataFrame(median_prices_df_want, crs = df.crs)

median_prices_df_want.head()

In [None]:
plot_df(
    df, 
    plot_kwds = {
        "figsize": [12, 12],
        "xlim": [103.60152080468028, 104.0470051248534],
        "ylim": [1.2359029533199608, 1.4723321131970046],
        "alpha": 0.5,
        "categorical": False,
        "column": "price_per_sqft",
        "legend": True,
        "legend_kwds": {"label": "price_per_sqft"},
        "edgecolor": None,
        "divider_kwds": {"position": "right", "size": "5%", "pad": 0.1},
    },
)