In [None]:
import sys
sys.dont_write_bytecode = True

import os
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import h3

# Local imports.
from resale_flat_prices.resale_flat_data.resale_flat_data import ResaleFlatData
from resale_flat_prices.geocode.geocoded_addresses import GeocodedAddresses
from resale_flat_prices.h3_utils.h3_utils import latlon_to_h3
from resale_flat_prices.h3_utils.h3_vis_utils import visualize_hexagons


# Data directories.
raw_data_dir = Path("../data/ResaleFlatPrices/")
processed_data_dir = Path("../data/processed_data/")

In [None]:
resale_flat_data = ResaleFlatData(raw_data_dir, wanted_columns="default")
resale_flat_data.load_csv_files()
resale_flat_data.compile_data()
resale_flat_data.process_data()

In [None]:
geocoded_addresses = GeocodedAddresses()
geocoded_addresses.read_json(processed_data_dir / "geocoded_addresses.json")

In [None]:
all_unique_addresses = set(resale_flat_data.df["address"].unique())
all_unique_geocoded_addresses = geocoded_addresses.get_all_geocoded_addresses()

missing_addresses = all_unique_addresses.difference(all_unique_geocoded_addresses)
print(len(missing_addresses))

In [None]:
print(geocoded_addresses.verify_geocoded_latitudes_and_longitudes(country="SINGAPORE"))

In [None]:
df = geocoded_addresses.to_df()
df = pd.merge(left = resale_flat_data.df, right = df, left_on = "address", right_on = "address", how = "left")
print(df.shape)

In [None]:
df = latlon_to_h3(df, 8)

In [None]:
hexes = df["h3"].values

In [None]:
m = visualize_hexagons(hexes)

In [None]:
h3.cells_to_geo(hexes[:1])