In [None]:
import pandas as pd
import geopandas as gpd
from shapely import  wkb
import matplotlib.pyplot as plt
import sqlalchemy
from scipy import stats

## settings

In [None]:
local_crs = 3006

plot = True

main_relflow_col = 'relflow_minspeed0_minpoints2'

## read flows

In [None]:
# read from database
url = sqlalchemy.URL.create(
    "postgresql+psycopg", port=5432,
    host="host", database="database", username="username")
engine = sqlalchemy.create_engine(url)

In [None]:
n = 10 # n of samples to evaluate
i = 1 # sample to load (in case n=1)

if n == 1:

    sql_query_flows_sthlm = "SELECT * FROM flows.sthlm_flows_2024_random{};".format(i)

    with engine.connect() as conn_flowsense:
        with conn_flowsense.execute(sqlalchemy.text(sql_query_flows_sthlm)) as cursor:
            df_flows_sthlm = pd.read_sql(sql_query_flows_sthlm, con=conn_flowsense)

    flows_sthlm = gpd.GeoDataFrame(df_flows_sthlm, geometry=df_flows_sthlm['geometry'].apply(wkb.loads), crs=local_crs)

else:

    sql_query_flows_sthlm = "SELECT * FROM flows.sthlm_flows_2024_random1to{};".format(n)

    with engine.connect() as conn_flowsense:
        with conn_flowsense.execute(sqlalchemy.text(sql_query_flows_sthlm)) as cursor:
            df_flows_sthlm = pd.read_sql(sql_query_flows_sthlm, con=conn_flowsense)

    flows_sthlm = gpd.GeoDataFrame(df_flows_sthlm, geometry=df_flows_sthlm['geometry'].apply(wkb.loads), crs=local_crs)

#### sum flows in both directions per road

In [None]:
def sum_and_renormalize(flows):

    # sum relative flow and traj count for edge and its corresponding reverse
    for col in flows.columns:
        if col.startswith('relflow_'):
            flows = pd.merge(
                flows.groupby("ISA_index").sum([col, col.replace('relflow', 'trajcount')]),
                flows[['ISA_index', 'geometry', 'RLID']],
                on='ISA_index',
                how='left').drop_duplicates('ISA_index', keep='first').reset_index(drop=True)
    flows.set_index('ISA_index', inplace=True)
    flows = gpd.GeoDataFrame(flows, geometry=flows.geometry, crs=local_crs)

    # renormalize flows now that values are summed
    for col in flows.columns:
        if col.startswith('relflow_'):
            flows[col] = flows[col] / flows[col].max()

    return flows

In [None]:
flows_sthlm = sum_and_renormalize(flows_sthlm)

## read ground truth data

In [None]:
sql_query_sthlm_2024 = "SELECT * FROM ground_truth.flows_cars_sthlm_2024;"

with engine.connect() as conn_flowsense:
    with conn_flowsense.execute(sqlalchemy.text(sql_query_sthlm_2024)) as cursor:
        df_carflows_sthlm_2024 = pd.read_sql(sql_query_sthlm_2024, con=conn_flowsense)

carflows_sthlm_2024 = gpd.GeoDataFrame(df_carflows_sthlm_2024, geometry=df_carflows_sthlm_2024['geometry'].apply(wkb.loads), crs=local_crs)

## combine flows and ground truth

#### sample one point per groundtruth line, and find corresponding flow value

In [None]:
def sample_and_combine(flows, groundtruth):
    # store original line geometry
    flows['original_geom_flows'] = flows.geometry

    # sample a point per groundtruth line
    sample_points = groundtruth.copy()
    sample_points['original_geom_gt'] = sample_points.geometry
    if groundtruth.geom_type[0] == 'LineString':
        sample_points.geometry = sample_points.geometry.interpolate(0.5, normalized=True)

    # find flow-line within maximum distance away, identify nearest if multiple nearby
    sample_points['_id'] = sample_points.index
    joined = gpd.sjoin_nearest(sample_points, flows, how='inner', max_distance=10, distance_col='_dist')
    idx = joined.groupby('_id')['_dist'].idxmin()
    joined = joined.loc[idx].reset_index(drop=True)
    joined.drop_duplicates(subset=['_id'], keep='first', inplace=True)
    joined.drop(columns=['_id', '_dist'], inplace=True)

    if plot:
        print('{}% of sample points successfully joined'.format(round(100*len(joined)/len(sample_points), 2)))

    return joined

In [None]:
carflows_sthlm_2024_joined = sample_and_combine(flows_sthlm, carflows_sthlm_2024)

## Comparison

In [None]:
sign_threshold = 0.01

In [None]:
x1 = carflows_sthlm_2024_joined[main_relflow_col]
y1 = carflows_sthlm_2024_joined.trafikflöde__fordon_dygn__alla_fordon

In [None]:
if plot:
    fig, axs = plt.subplots(figsize=(20,6))

    axs.scatter(x1, y1, alpha=0.2)

    # ax.set_yscale('log')
    axs.set_xlabel('Estimated relative flow (normalized 0-1)')
    # ax.set_xscale('log')
    axs.set_ylabel('Ground truth (absolute values)')

    plt.show()

In [None]:
r, p = stats.spearmanr(x1, y1)
print(r, p)
if p <= sign_threshold:
    print('--> significant correlation between estimated and ground truth flows, Spearman R: {}'.format(round(r,3)))
else:
    print('--> NO significant correlation')