In [45]:
import sys
sys.dont_write_bytecode = True

from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas

import matplotlib.pyplot as plt


# Local imports.
from property_prices.private_residential_data.private_residential_data import PrivateResidentialData
from property_prices.vis_utils.vis_utils import plot_df


# Data directories.
processed_data_dir = Path("../data/processed_data/")

# Load processed and geocoded resale flat data.
input_file_name = Path("condo_transactions-indexed.parquet")

# H3 cell creation using a specified resolution.
RESOLUTION = 9

In [None]:
# Resale flat data (indexed).

#price_col = "transacted_price"
price_col = "unit_price_psf"
price_index = "price_index"
price_col_indexed = price_col + "_indexed"

data = PrivateResidentialData(processed_data_dir / input_file_name)
data.read_parquet()

# For condominiums, restrict to single unit resales. 
data.df = data.df[data.df["number_of_units"] == 1]
data.df = data.df[data.df["type_of_area"] == "STRATA"]

#data.df = data.df[data.df["street_name"] == "BUKIT TIMAH ROAD"]

data.df[price_col_indexed] = data.df[price_col] / data.df[price_index]

data.make_h3_geometries(resolution = RESOLUTION)

display(data.df.head())
print("Loaded df.shape: {}.".format(data.df.shape))

In [52]:
def quantile(X, q = 0.5, axis = None):
    return np.quantile(X, q=q, axis=axis)


median_df = data.df[["geometry", price_col_indexed]].groupby(["geometry"]).aggregate("median").reset_index()

median_df = geopandas.GeoDataFrame(median_df)
median_df = median_df.set_crs(data.df.crs)

median_df[price_col_indexed] = median_df[price_col_indexed].apply(lambda x: int(round(x)))

In [None]:
plot_df(
    median_df[["geometry", price_col_indexed]], 
    plot_kwds = {
        "figsize": [12, 12],
        "xlim": [103.60152080468028, 104.0470051248534],
        "ylim": [1.2359029533199608, 1.4733321131970046],
        "title": "{}".format("Median transacted price [SGD]."),
        "alpha": 0.75,
        "categorical": False,
        "column": price_col_indexed,
        "legend": True,
        "legend_kwds": {"label": "Transacted price [SGD]."},
        "cmap": 'viridis',
        "edgecolor": None,
        "divider_kwds": {"position": "right", "size": "5%", "pad": 0.1},
    },
)

In [59]:
#data.df[data.df["geometry"] == median_df.sort_values(price_col_indexed).iloc[-1]["geometry"]]