# Analysis of digitized base circles

In [1]:
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from itertools import count
from pathlib import Path
import subsampling_config



In [2]:
import fractopo_subsampling.utils as utils
import fractopo_subsampling.plotting_utils as plotting_utils

In [3]:
sns.set_theme(context="paper", style="ticks", font_scale=1.3)

## Read data

In [7]:
Path(".")

PosixPath('.')

In [4]:
analysis_points = gpd.read_file(subsampling_config.analysis_points_path)

DriverError: ../results/Ahvenanmaa_analysis_points.gpkg: No such file or directory

Data is filtered to only selected target areas, spatially concentrated. Neither radius or coverage filter affects this dataset.

In [None]:
filtered = utils.filter_dataframe(
    analysis_points,
    list(subsampling_config.circle_names_with_diameter),
    filter_radius=(5, 51),
    relative_coverage_threshold=0.11,
)

The maximum relative coverage is:

In [None]:
filtered[utils.Utils.relative_coverage].max()

In [None]:
analysis_points.shape[0], filtered.shape[0]

Shoreline dataset contains a rough sketch of the Getaberget shoreline for visualization purposes only.

In [None]:
# shoreline = gpd.read_file(subsampling_config.shoreline_geojson_url)
shoreline = gpd.read_file(
    "/home/nialov/nikke-projects/Pipenv_projects/fractopo_scripts/misc/shoreline.geojson"
)

## Determine reference values for simulation analyses

The parameter values determined from the full dataset should represent the most accurate assesment.

Because the full circle radii vary, the achieved parameter results must be weighted by the area of the circle.

In [None]:
reference_values = {
    utils.param_renamer(param): round(
        np.average(filtered[param], weights=filtered["area"]), 4
    )
    for param in utils.Utils.params_with_func
    if param in utils.Utils.selected_params
}
# Save to local file
reference_value_df = pd.DataFrame(
    {"Parameter": reference_values.keys(), "Value": reference_values.values()}
)
reference_value_df.to_csv(
    subsampling_config.base_circle_reference_value_csv_path, index=False
)

In [None]:
reference_value_df

Total area in base circles.

In [None]:
filtered["area"].sum()

## Spatial variance analysis of parameters

Do parameters have a direct correlation with their spatial location in E-W or N-S axes?

In [None]:
filtered["x"] = [point.x for point in filtered.geometry.values]
filtered["y"] = [point.y for point in filtered.geometry.values]

In [None]:
filtered["name"]

In [None]:
plotting_utils.plot_base_circle_map(filtered=filtered, shoreline=shoreline)

In [None]:
coords = plotting_utils.base_circle_id_coords(filtered)
id_dict = plotting_utils.base_circle_id_dict(coords)
id_dict

In [None]:
base_circle_dicts = []
for key, value in id_dict.items():
    base_circle_dict = dict()
    base_circle_dict["ID"] = value
    base_circle_dict["Radius (m)"] = filtered["radius"].values[filtered["name"] == key][
        0
    ]
    base_circle_dict["Name"] = key
    base_circle_dicts.append(base_circle_dict)
df = pd.DataFrame(base_circle_dicts)

# Update a local file with always correct ids for all target areas
df.to_csv(subsampling_config.base_circle_ids_csv_path, index=False)

In [None]:
# Update filtered as alternative to Ahvenanmaa_analysis_points
filtered["CIRCLE_ID"] = [id_dict[name] for name in filtered["name"].values]

# Have to drop x and y
filtered.drop(columns=["x", "y"]).to_file(
    subsampling_config.filtered_analysis_points, driver="GPKG"
)

In [None]:
filtered["Number of Traces"].sum()

In [None]:
def format_func_m_to_km(value, tick_number):
    return int(value / 1000)

In [None]:
fig = plt.figure(constrained_layout=True, figsize=utils.paper_figsize(0.8))

gs = fig.add_gridspec(3, 2)

fig_top_ax = fig.add_subplot(gs[0, :])
fig_left_middle = fig.add_subplot(gs[1, 0])
fig_right_middle = fig.add_subplot(gs[1, 1])
fig_left_bot = fig.add_subplot(gs[2, 0])
fig_right_bot = fig.add_subplot(gs[2, 1])


fig_top_ax = plotting_utils.plot_base_circle_map(
    filtered=filtered, shoreline=shoreline, ax=fig_top_ax
)

fig_top_ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func_m_to_km))
fig_top_ax.yaxis.set_major_formatter(plt.FuncFormatter(format_func_m_to_km))

fig_top_ax.set_ylabel("Y (km)")
fig_top_ax.set_xlabel("X (km)")
fig_top_ax.legend(**{"edgecolor": "black", "loc": "lower right", "framealpha": 1})
fig_top_ax.text(0.025, 0.9, "A", transform=fig_top_ax.transAxes, fontweight="bold")
fig_top_ax.text(
    0.020,
    0.04,
    "ETRS-TM35FIN",
    transform=fig_top_ax.transAxes,
    fontsize=9,
    style="italic",
)

# fig, axes = plt.subplots(3, 2, figsize=utils.paper_figsize(1), sharex=True)
for param, ax, text in zip(
    list(utils.Utils.params_with_func)[0:4],
    [fig_left_middle, fig_right_middle, fig_left_bot, fig_right_bot],
    ("B", "C", "D", "E"),
):
    sns.regplot(data=filtered, x="x", y=param, ax=ax, ci=95, color="black")
    if param != utils.param_renamer(param):
        ax.set_ylabel(utils.param_renamer(param))
    ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func_m_to_km))
    ax.set_xlabel("X (km)")
    ax.set_xlim(min(filtered["x"]) - 100, max(filtered["x"]) + 100)
    ax.text(0.05, 0.9, text, transform=ax.transAxes, fontweight="bold")

fig.savefig(
    subsampling_config.results_path / "base_circle_map.svg", bbox_inches="tight"
)

## Miscellaneous analyses

Simplify the significance of powerlaw vs. lognormal distribution comparison for traces.

In [None]:
# Simplify significance of powerlaw vs. lognormal distribution comparison
filtered["POWERLAW_VS_LOGNORMAL RP"] = [
    val if r > 0 else -val
    for val, r in zip(
        filtered[utils.Utils.trace_power_law_vs_lognormal_p],
        filtered[utils.Utils.trace_power_law_vs_lognormal_r],
    )
]

for param in utils.Utils.params_with_func:
    sns.lmplot(
        data=filtered, x="POWERLAW_VS_LOGNORMAL RP", y="trace power_law exponent"
    )
    break

Plot RP and cut-off.

In [None]:
sns.lmplot(data=filtered, x="POWERLAW_VS_LOGNORMAL RP", y="trace power_law cut-off")

Simplify the significance of powerlaw vs. lognormal distribution comparison for branches.

In [None]:
# Simplify significance of powerlaw vs. lognormal distribution comparison
filtered["BRANCH POWERLAW_VS_LOGNORMAL RP"] = [
    val if r > 0 else -val
    for val, r in zip(
        filtered[utils.Utils.trace_power_law_vs_lognormal_p.replace("trace", "branch")],
        filtered[utils.Utils.trace_power_law_vs_lognormal_r.replace("trace", "branch")],
    )
]

for param in utils.Utils.params_with_func:
    sns.lmplot(
        data=filtered,
        x="BRANCH POWERLAW_VS_LOGNORMAL RP",
        y="branch power_law exponent",
    )
    break

In [None]:
sns.lmplot(
    data=filtered, x="BRANCH POWERLAW_VS_LOGNORMAL RP", y="branch power_law cut-off"
)

In [None]:
filtered["branch lognormal vs. exponential R"], filtered[
    "branch lognormal vs. exponential p"
]
# filtered.columns

In [None]:
filtered[["radius", "name"]]