In [1]:
import ipywidgets as widgets
import polars as pl
import quak
import string

df = pl.read_csv("https://gist.githubusercontent.com/kolibril13/4ac77cfe1ee68cae7de8d477b9c251aa/raw/66517ae387dc7251f8157e57a99e18d51095d8eb/housing.csv")
df = df.unique(subset=["latitude", "longitude"], keep="first")

# Step 2: Generate a 3-letter sequential ID
def generate_sequential_id(index):
    chars = string.ascii_lowercase
    first_char = chars[index // (26 * 26) % 26]
    second_char = chars[index // 26 % 26]
    third_char = chars[index % 26]
    return f"#{first_char}{second_char}{third_char}"

ids = [generate_sequential_id(i) for i in range(len(df))]
df.insert_column(0, pl.Series("short_id", ids))

# Step 3: Normalize the coordinates and create the reference DataFrame
reference_frame = df.with_columns([
    ((pl.col("longitude") - pl.col("longitude").min()) / (pl.col("longitude").max() - pl.col("longitude").min()) * 10).alias("longitude_normalized"),
    ((pl.col("latitude") - pl.col("latitude").min()) / (pl.col("latitude").max() - pl.col("latitude").min()) * 10).alias("latitude_normalized")
]).select(["short_id", "longitude_normalized", "latitude_normalized"])

# Display the DataFrame with the new unique_id column using the quak widget
widget = quak.Widget(df)
widget

Widget(sql='SELECT * FROM "df"', temp_indexes=True)

In [4]:
df.write_csv("df.csv")
reference_frame.write_csv("reference_frame.csv")
