In [1]:
import torch
from torch.utils.data import Dataset
import pandas as pd
from utils import expand_geometry

class InReteDataset(torch.utils.data.Dataset):

    def __init__(self) -> None:
        super().__init__()
        self.risk_df:pd.DataFrame|None = None

    def load_parquet(self, file_path: str):
        df = pd.read_parquet(file_path)
        if any(col in df.columns for col in ['geometry']):
            print("processing a geometry file")
            self._load_geometry(df)
        elif any(col in df.columns for col in ['ODL', 'DATA', 'IDSAP']):
            print("processing a dispersion file")
            self._load_dispersion(df)

        elif any(col in df.columns for col in ['data', 'idsap', 'risk_level']):
            print("processing a risk file")
            self._load_risk(df)

    def _load_dispersion(self, df: pd.DataFrame):
        df['IDSAP'] = df['IDSAP'].astype(str)
        df['DATA'] = pd.to_datetime(df['DATA']).dt.date
        df['year'] = df['DATA'].apply(lambda x: x.year)
        df['month'] = df['DATA'].apply(lambda x: x.month)
        df['day'] = df['DATA'].apply(lambda x: x.day)

        df = df.drop(columns=["DATA"], inplace=False)
        self.disp = df

    def _load_risk(self, df: pd.DataFrame):
        df['idsap'] = df['idsap'].astype(str)
        df['year'] = df['data'].apply(lambda x: x.year)
        df['month'] = df['data'].apply(lambda x: x.month)
        df = df.drop(columns=["data"], inplace=False)

        if self.risk_df is None:
            self.risk_df = df
        else:
            self.risk_df = pd.concat([self.risk_df, df])

    
    def _load_geometry(self, df: pd.DataFrame):
        new_df = expand_geometry(df)
        self.geom_df = new_df







In [2]:
dataset = InReteDataset()
dataset.load_parquet('dati/2019/tratte_gas_2019.parquet')
dataset.load_parquet('dati/2019/tratte_disp_2019.parquet')
folder_path = "dati/2019/idsap_rischio_2019/"
import os
files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
for f in files:
    dataset.load_parquet(f'dati/2019/idsap_rischio_2019/{f}')



processing a geometry file


processing rows: 100%|██████████| 193982/193982 [00:14<00:00, 13039.74it/s]


processing a dispersion file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file
processing a risk file


In [None]:
# for i in list(dataset.risk_df['data']):
#     print(i)

In [233]:
dataset.risk_df = dataset.risk_df[dataset.risk_df['idsap'].isin(dataset.geom_df['IDSAP'])]
           

In [4]:
dataset.risk_df

Unnamed: 0,idsap,risk_level,year,month
0,1450570,33.0,2019,1
1,1450573,63.0,2019,1
2,1450582,39.0,2019,1
3,1450588,3.0,2019,1
4,1450616,19.0,2019,1
...,...,...,...,...
186644,11107087,76.0,2019,12
186645,11107094,70.0,2019,12
186646,11107112,10.0,2019,12
186647,11108639,6.0,2019,12


In [246]:
risk_w_geom = pd.merge(dataset.risk_df, dataset.geom_df, how='left', left_on=['idsap'], right_on=['IDSAP'])
merged_2_df = pd.merge(risk_w_geom, dataset.disp, how='left',left_on=['year','month', 'idsap'], right_on=['year','month','IDSAP'])

In [249]:
import numpy as np
import datetime
merged_2_df['has_dispersion'] = merged_2_df['ODL'].notna()

In [272]:
merged_2_df[['risk_level', 'has_dispersion', 'DIAMETRO', 'ANNO_POSA', 'MATERIALE', 'is_simple', 'is_ring', 'minimum_clearance', 'lenght', 'bound_0', 'bound_1']].corr()

Unnamed: 0,risk_level,has_dispersion,DIAMETRO,ANNO_POSA,MATERIALE,is_simple,is_ring,minimum_clearance,lenght,bound_0,bound_1
risk_level,1.0,0.005258,0.139178,-0.490541,0.513826,0.001205,-0.003077,-0.005412,-0.245586,-0.174279,-0.087796
has_dispersion,0.005258,1.0,0.002854,-0.003974,0.005816,0.000259,-6.2e-05,0.000389,0.00299,-0.000668,0.000101
DIAMETRO,0.139178,0.002854,1.0,-0.024059,0.218688,0.000764,-0.004323,-0.058224,-0.045594,-0.040477,-0.099282
ANNO_POSA,-0.490541,-0.003974,-0.024059,1.0,-0.255611,-0.006328,0.001702,-0.062384,0.05475,0.07477,0.024765
MATERIALE,0.513826,0.005816,0.218688,-0.255611,1.0,-0.000665,0.000223,-0.06581,-0.130502,-0.170728,-0.244339
is_simple,0.001205,0.000259,0.000764,-0.006328,-0.000665,1.0,0.000251,0.014547,-0.003919,0.002053,0.004405
is_ring,-0.003077,-6.2e-05,-0.004323,0.001702,0.000223,0.000251,1.0,-0.000596,0.007633,-0.000801,0.001359
minimum_clearance,-0.005412,0.000389,-0.058224,-0.062384,-0.06581,0.014547,-0.000596,1.0,0.062329,0.030801,0.106151
lenght,-0.245586,0.00299,-0.045594,0.05475,-0.130502,-0.003919,0.007633,0.062329,1.0,0.10899,0.121438
bound_0,-0.174279,-0.000668,-0.040477,0.07477,-0.170728,0.002053,-0.000801,0.030801,0.10899,1.0,0.614061


In [273]:
merged_2_df

Unnamed: 0,idsap,risk_level,year,month,IDSAP_x,TIPO,MATERIALE,DIAMETRO,ANNO_POSA,CODSISTEMA,...,bound_0,bound_1,bound_2,bound_3,x_coordinates,y_coordinates,ODL,IDSAP_y,day,has_dispersion
0,1450570,33.0,2019,1,1450570,25,1,150.00000000000,1996,11269400,...,-3.353695e+06,2.271897e+06,-3.353694e+06,2.271898e+06,"[-3353693.7669278653, -3353695.053340057]","[2271898.17709321, 2271897.2999495715]",,,,False
1,1450573,63.0,2019,1,1450573,25,1,100.00000000000,1996,11269400,...,-3.353639e+06,2.272263e+06,-3.353637e+06,2.272266e+06,"[-3353638.6642755214, -3353637.5917794593, -33...","[2272262.7235426204, 2272264.5970330117, 22722...",,,,False
2,1450582,39.0,2019,1,1450582,25,2,90.00000000000,1996,11269400,...,-3.353555e+06,2.271701e+06,-3.353519e+06,2.271730e+06,"[-3353554.8349592565, -3353547.296920848, -335...","[2271729.64086165, 2271722.0790262823, 2271716...",,,,False
3,1450588,3.0,2019,1,1450588,25,2,160.00000000000,1996,11269400,...,-3.353614e+06,2.271936e+06,-3.353613e+06,2.271938e+06,"[-3353612.7411560034, -3353614.038804982]","[2271935.9246486067, 2271937.9347880394]",,,,False
4,1450616,19.0,2019,1,1450616,25,1,80.00000000000,1992,36342000,...,-3.351709e+06,2.286029e+06,-3.351662e+06,2.286056e+06,"[-3351662.4467304554, -3351668.2009070404, -33...","[2286054.965993015, 2286055.990898392, 2286052...",,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194292,10773183,56.0,2019,12,10773183,25,1,100.00000000000,1976,36204100,...,-3.399579e+06,2.273425e+06,-3.399579e+06,2.273425e+06,"[-3399578.7783837104, -3399579.198526837]","[2273424.7469944255, 2273425.0898196204]",,,,False
2194293,10782360,43.0,2019,12,10782360,24,1,200.00000000000,2003,36224800,...,-3.402421e+06,2.276597e+06,-3.402420e+06,2.276599e+06,"[-3402419.5564612504, -3402420.449152806, -340...","[2276598.9312498965, 2276597.881015067, 227659...",,,,False
2194294,10782401,23.0,2019,12,10782401,24,1,200.00000000000,2010,36224800,...,-3.402520e+06,2.276365e+06,-3.402485e+06,2.276477e+06,"[-3402516.0774128707, -3402484.54761748, -3402...","[2276364.894695608, 2276403.0633189753, 227640...",,,,False
2194295,10782431,54.0,2019,12,10782431,24,1,150.00000000000,2003,36224800,...,-3.402484e+06,2.276645e+06,-3.402480e+06,2.276650e+06,"[-3402479.5236512665, -3402480.63029976, -3402...","[2276650.4656171296, 2276649.1342936335, 22766...",,,,False
