## Imports

In [None]:
import datamate
import geopandas as gpd
import pandas as pd
import pictorial
import plotly.express as px

## Load Necessary Configration For Derived Data

In [None]:
name_derived = "police-data/derived"
key_derived = datamate.config.get_key_for_name(name=name_derived)
path_data_derived = datamate.config.get_path_data_for_key(key=key_derived)

## Load Data

In [None]:
columns_load = [
    "date",
    "crimeType",
    "longitude",
    "latitude",
    "location",
    "lsoaCode",
    "lsoaName",
    "policeForce",
]

filters = [
    ("year", ">", 2018),
]

In [None]:
%%time
df = (
    pd.read_parquet(
        path=path_data_derived.joinpath("data.parquet"),
        columns=columns_load,
        filters=filters,
    )
    .assign(
        year=lambda x: x["date"].dt.to_period("Y").dt.to_timestamp(),
        month=lambda x: x["date"].dt.month,
        dayOfMonth=lambda x: x["date"].dt.day,
        count=1,
    )
)

In [None]:
df.describe(include="category")

category_orders = {
    column: df.groupby(column)["count"].sum().sort_values(ascending=False).index.tolist() 
    for column in df.select_dtypes(include="category")
}
print(f"{category_orders}")

In [None]:
(
    pictorial.bar(
        df,
        x="year",
        color="policeForce",
        y="count",
        category_orders=category_orders,
    )
    .update_layout(height=600)
)

In [None]:
(
    pictorial.bar(
        df,
        x="year",
        color="crimeType",
        y="count",
        category_orders=category_orders,
    )
    .update_layout(height=600)
)

In [None]:
(
    pictorial.bar(
        df,
        x="month",
        y="count",
        color="crimeType",
        category_orders=category_orders,
    )
    .update_layout(height=600)
)

In [None]:
# response = requests.get("https://github.com/gausie/LSOA-2011-GeoJSON/raw/master/lsoa.geojson")

In [None]:
# geometry = response.text

In [None]:
df_geo = (
    gpd.read_file("https://github.com/gausie/LSOA-2011-GeoJSON/raw/master/lsoa.geojson")
)

In [None]:
# df_plot = (
#     df
#     [lambda x: x["policeForce"] == "metropolitan"]
#     .value_counts(["year", "lsoaCode"])
#     .rename("count")
#     .reset_index()
#     .assign(year=lambda x: x["year"].dt.year)
#     .head(1000)
# )

In [None]:
import numpy as np

In [None]:
df_plot = (
    df_geo
    .merge(
        (
            df
            [lambda x: x["crimeType"] == "violence-and-sexual-offences"]
            .rename(columns={"lsoaCode": "LSOA11CD"})
            .value_counts("LSOA11CD")
            .rename("count")
            .reset_index()
            .assign(logCount=lambda x: np.log(x["count"]))
        ),
        on="LSOA11CD",
    )
    # .assign(year=lambda x: x["year"].dt.year)
    # .head(1000)
)

df_plot

In [None]:
df_plot.explore(column="logCount")

In [None]:
# df_plot.explore(column="count")

In [None]:
fig = px.choropleth(
    df_plot,
    color="count",
    locations="lsoaCode",
    featureidkey="properties.LSOA11CD",
    animation_frame="year",
    geojson=geometry,
)

In [None]:
fig.show()

In [None]:
# from urllib.request import urlopen
# import json
# with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
#     counties = gpd.read_file(response)

# import pandas as pd
# df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
#                    dtype={"fips": str})

# import plotly.express as px

# fig_plotly = px.choropleth(df, geojson=counties.geometry, locations='fips', color='unemp',
#                            color_continuous_scale="Viridis",
#                            range_color=(0, 12),
#                            scope="usa",
#                            labels={'unemp':'unemployment rate'}
#                           )
# fig_plotly.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# # fig_plotly.show()

In [None]:
df_plot["geometry"].iloc[5]

In [None]:
type(df_plot)