In [1]:
import sys
sys.dont_write_bytecode = True

from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas

import matplotlib.pyplot as plt


# Local imports.
from resale_flat_prices.resale_flat_data.resale_flat_data import ResaleFlatData
from resale_flat_prices.resale_flat_data.rent_prices_data import RentPricesData
from resale_flat_prices.h3_utils.h3_statistics import monthly_median_price
from resale_flat_prices.vis_utils.vis_utils import plot_df


# Data directories.
csv_data_dir = Path("../data/ResaleFlatPrices/")
processed_data_dir = Path("../data/processed_data/")

# Load processed and geocoded resale flat data.
resale_flat_data_csv_file = "resale-flat-prices.csv.zip"
rent_prices_data_csv_file = "rent-prices.csv.zip"

In [2]:
def add_1_quarter_to_year_quarter(year_quarter):
    year = int(year_quarter.split("-")[0])
    quarter = int(year_quarter.split("-")[1].strip("Q"))
    quarter = quarter + 1
    if quarter == 5:
        quarter = 1
        year = year + 1
    return "{}-Q{}".format(year, quarter)

def year_quarter_range(year_quarter_1, year_quarter_2):
    x = [year_quarter_1]
    while x[-1] != year_quarter_2:
        x.append(add_1_quarter_to_year_quarter(x[-1]))
    return pd.DataFrame(x, columns = ["year_quarter"])

In [None]:
# H3 cell creation using a specified resolution.
RESOLUTION = 9

# Calculate the median prices.
datetime_column = "year_quarter"
price_column = "price_per_sqft"
rent_column = "monthly_rent"

# Resale flat data.
resale_flat_data = ResaleFlatData(processed_data_dir / resale_flat_data_csv_file)
resale_flat_data.read_csv()
resale_flat_data.df = resale_flat_data.df.sort_values(["year_month", "town"])

# Create additional datetime columns indicating the quarter the resale occured in.
resale_flat_data.df["quarter"] = resale_flat_data.df["month"].apply(lambda x: int(np.ceil(x / 3)))
resale_flat_data.df["year_quarter"] = resale_flat_data.df.apply(lambda DF: str(DF["year"]) + "-Q" + str(DF["quarter"]), axis = 1)

# Rent data.
rent_prices_data = RentPricesData(processed_data_dir / rent_prices_data_csv_file)
rent_prices_data.read_csv()
rent_prices_data.df = rent_prices_data.df.sort_values(["year_month", "town"])

rent_prices_data.df["flat_type"] = rent_prices_data.df["flat_type"].apply(lambda x: x.replace("-", " "))

# Create additional datetime columns indicating the quarter the resale occured in.
rent_prices_data.df["quarter"] = rent_prices_data.df["month"].apply(lambda x: int(np.ceil(x / 3)))
rent_prices_data.df["year_quarter"] = rent_prices_data.df.apply(lambda DF: str(DF["year"]) + "-Q" + str(DF["quarter"]), axis = 1)

resale_flat_data.df = resale_flat_data.df[resale_flat_data.df["year"] >= rent_prices_data.df["year"].min()]

resale_flat_data.df = resale_flat_data.df[[
    'year', 'quarter', 'town', 'block', 'street_name_cleaned', 'flat_type', 'floor_area_sqm', 'lease_commence_date', 
    "latitude", "longitude", 'resale_price'
]]

rent_prices_data.df = rent_prices_data.df[[
    'year', 'quarter', "town", "block", "street_name_cleaned", "flat_type", 
    "latitude", "longitude", "monthly_rent"
]]

display(resale_flat_data.df.head())
print("Resale flat data DataFrame: {}.".format(resale_flat_data.df.shape))

display(rent_prices_data.df.head())
print("Rent prices data DataFrame: {}.".format(rent_prices_data.df.shape))

In [None]:
resale_df = resale_flat_data.df.copy()
rent_df = rent_prices_data.df.copy()

resale_df_median = resale_df[
    ["year", "quarter", "block", "street_name_cleaned", "flat_type", "latitude", "longitude", "resale_price"]
].groupby(
    ["year", "quarter", "block", "street_name_cleaned", "flat_type", "latitude", "longitude"]).median().reset_index()

rent_df_median = rent_df[
    ["year", "quarter", "block", "street_name_cleaned", "flat_type", "monthly_rent", "latitude", "longitude"]
].groupby(
    ["year", "quarter", "block", "street_name_cleaned", "flat_type", "latitude", "longitude"]).median().reset_index()

df = pd.merge(
    resale_df_median, rent_df_median, 
    left_on = ["year", "quarter", "block", "street_name_cleaned", "flat_type", "latitude", "longitude"],
    right_on = ["year", "quarter", "block", "street_name_cleaned", "flat_type", "latitude", "longitude"],
    how = "outer"
)

display(df[df["street_name_cleaned"] == "ANG MO KIO AVENUE 4"].head())
print(df.shape)

In [5]:
"""
disp_cols = ['year', 'month', "year_quarter", 'address', 'storey_range', 'flat_type', 'age', 'price_per_sqft']

xdf = df[disp_cols].sort_values(["year", "month", "price_per_sqft"], ascending = False)

xdf = xdf[["year_quarter", "price_per_sqft"]].groupby("year_quarter").median().reset_index()

all_time_steps = year_quarter_range(xdf["year_quarter"].iloc[0], xdf["year_quarter"].iloc[-1])
xdf = pd.merge(all_time_steps, xdf, left_on = "year_quarter", right_on = "year_quarter", how = "left")
""";

In [6]:
"""
plt.figure(figsize = [20, 5])
plt.plot(xdf["year_quarter"], xdf["price_per_sqft"], "o")
plt.xticks(rotation = 90)
plt.grid(True)
plt.show()
""";