## Imports

In [None]:
import datamate
import pandas as pd
import pictorial

## Load Necessary Configration For Derived Data

In [None]:
name_derived = "price-paid-data/derived"
key_derived = datamate.config.get_key_for_name(name=name_derived)
path_data_derived = datamate.config.get_path_data_for_key(key=key_derived)

## Load Data

In [None]:
columns_load = [
    "price",
    "date",
    "propertyType",
    "buildType",
    "ownershipType",
    "townCity",
    "county",
]

filters = [
    ("transactionType", "==", "standard"),
]

In [None]:
%%time
df = (
    pd.read_parquet(
        path=path_data_derived.joinpath("data.parquet"),
        columns=columns_load,
        filters=filters,
    )
    .assign(
        year=lambda x: x["date"].dt.to_period("Y").dt.to_timestamp(),
        month=lambda x: x["date"].dt.month,
        dayOfMonth=lambda x: x["date"].dt.day,
        count=1,
    )
)

In [None]:
category_orders = {
    column: sorted(df[column].unique()) for column in df.select_dtypes(include="category")
}

df.describe(include="category")

In [None]:
(
    pictorial.box(
        df,
        x="year",
        y="price",
        sd=False,
        category_orders=category_orders,
    )
    .update_yaxes(type="log")
    .update_layout(height=600)
)

In [None]:
(
    pictorial.bar(
        df,
        x="month",
        y="count",
        category_orders=category_orders,
    )
    .update_layout(height=600)
)

In [None]:
(
    pictorial.bar(
        df,
        x="dayOfMonth",
        y="count",
        category_orders=category_orders,
    )
    .update_layout(height=600)
)

In [None]:
def plot_color(
    df: pd.DataFrame,
    color: str,
):
    (
        pictorial.bar(
            df,
            x="year",
            color=color,
            y="count",
            category_orders=category_orders,
        )
        .update_layout(height=600, title=f"Count by {color}")
        .show()
    )

    (
        pictorial.box(
            df,
            x="year",
            color=color,
            y="price",
            sd=False,
            category_orders=category_orders,
        )
        .update_yaxes(type="log")
        .update_layout(height=600, title=f"Price distribution by {color}")
        .show()
    )

In [None]:
plot_color(df=df, color="propertyType")

In [None]:
plot_color(df=df, color="buildType")

In [None]:
plot_color(df=df, color="ownershipType")

In [None]:
df[lambda x: x['buildType'] == "new"][lambda x: x['price'] == 1]