In [9]:
import duckdb
import polars as pl
import torch
from torch_geometric.data import Data
import numpy as np
from torch_geometric.utils import to_networkx
from utils import StoreDataset, LoadedDataset, create_pyg_graph_from_polars


In [12]:
con = duckdb.connect(database="../../data/02_primary/distanze.db")

In [13]:
QUERY = """
    SELECT 
        trim(split_part(OR_DEST,' - ',1)) as OR, trim(split_part(OR_DEST,' - ',2)) as DEST, TEP_TOT, KM_TOT, TTP_TOT 
    FROM '../../data/01_raw/Italia/*/*.csv'
    """

In [14]:
df = pl.from_arrow(
    con.execute(
        QUERY
    ).fetch_arrow_table()
)
print(df.head())

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

shape: (5, 5)
┌───────┬───────┬─────────┬────────┬─────────┐
│ OR    ┆ DEST  ┆ TEP_TOT ┆ KM_TOT ┆ TTP_TOT │
│ ---   ┆ ---   ┆ ---     ┆ ---    ┆ ---     │
│ str   ┆ str   ┆ i64     ┆ f64    ┆ i64     │
╞═══════╪═══════╪═════════╪════════╪═════════╡
│ 66001 ┆ 66001 ┆ 0       ┆ 0.0    ┆ 0       │
│ 66001 ┆ 66055 ┆ 6       ┆ 4.9    ┆ 5       │
│ 66001 ┆ 66086 ┆ 9       ┆ 7.1    ┆ 7       │
│ 66001 ┆ 66031 ┆ 10      ┆ 8.1    ┆ 9       │
│ 66001 ┆ 66027 ┆ 13      ┆ 10.8   ┆ 11      │
└───────┴───────┴─────────┴────────┴─────────┘


In [15]:
data = create_pyg_graph_from_polars(df, "OR", "DEST", ["TEP_TOT", "KM_TOT", "TTP_TOT"])

In [16]:
# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Number of nodes: 7903
Number of edges: 3245271
Average node degree: 410.64
Has isolated nodes: True
Has self-loops: True
Is undirected: False


In [18]:
StoreDataset(
    data_list = [data],
    folder = "../../data/02_primary",
    filename = "italy_network"
)