# Calculate Distance Matrix

In [13]:
import sys

sys.path.insert(0, "./work")

## Imports

In [14]:
from pyspark.sql import SparkSession

from lib.utils import create_distance_matrix, get_coordinates

## Spark Session

In [15]:
spark = SparkSession.builder \
    .appName("SmartLitter") \
    .getOrCreate()

jdbc_url = "jdbc:postgresql://db:5432/litter_db"
connection_properties = {
    "user": "root",
    "password": "pwd123",
    "driver": "org.postgresql.Driver"
}

## Load Data

In [16]:
df = spark.read.jdbc(url=jdbc_url, table="public.litter_bin_geoposition", properties=connection_properties)

## Schema

In [17]:
df.printSchema()

root
 |-- litter_bin_uuid: string (nullable = true)
 |-- geom_point: string (nullable = true)
 |-- fk_type_uuid: string (nullable = true)
 |-- archive: boolean (nullable = true)
 |-- creation_date: date (nullable = true)
 |-- end_date: date (nullable = true)
 |-- active: boolean (nullable = true)
 |-- fastnacht: short (nullable = true)
 |-- braderie: short (nullable = true)
 |-- christmas_market: short (nullable = true)
 |-- barbecue_season: short (nullable = true)
 |-- bin_full: boolean (nullable = true)
 |-- robidog_empty: boolean (nullable = true)
 |-- ashtray_full: boolean (nullable = true)
 |-- defect: boolean (nullable = true)
 |-- dirty: boolean (nullable = true)
 |-- id_nr: integer (nullable = true)


## Main

In [18]:
df = df.sort(df.geom_point)

coordinates = get_coordinates(df=df.limit(5))

distance_matrix: list[list[int]] = create_distance_matrix(coordinates)

print("Distance Matrix (meters):")
print(distance_matrix)

print(f"Shape: ({len(distance_matrix)}, {len(distance_matrix[0])})")

with open("data/distance_matrix.txt", 'w') as file:
        file.write(str(distance_matrix))

Distance Matrix (meters):
[[0, 1287, 2694, 316, 2455, 1817], [1287, 0, 1547, 1314, 1241, 1470], [2694, 1547, 0, 2806, 1498, 1543], [316, 1314, 2806, 0, 2389, 2073], [2455, 1241, 1498, 2389, 0, 2411], [1817, 1470, 1543, 2073, 2411, 0]]
Shape: (6, 6)
