In [1]:
import fsspec
import os
import numpy as np
import rasterio 
import rasterio.plot as rasterioplot
import pandas as pd
from ml4floods.data.utils import read_json_from_gcp as load_json
from ml4floods.models.utils import metrics
from ml4floods.visualization.plot_utils import plot_s2_rbg_image,get_image_transform
import seaborn as sns
import matplotlib.pyplot as plt

In [43]:
db = load_json(r'C:\Users\1\Documents\Projectes\Floods\Exploratori\database\database_ok.json')

database = pd.DataFrame(db)

satellites = database.satellite.unique()
sat_type = ['SAR','SAR','SAR', 'Optic', 'Optic', 'Optic', 'SAR', 'Optic', 'Optic', 'SAR'
            , 'Optic', 'Optic', 'Optic','Optic', 'Not Applicable', 'Optic' , 'Optic', 'Optic', 'Not Applicable','Optic']
sat_types = dict(zip(satellites,sat_type))
database['source_sat_type'] = [sat_types[sat] for sat in database['satellite']]

database['cems'] = database.apply(lambda x: x['layer name'].split("_")[0], axis = 1 )
database['aoi'] = database.apply(lambda x: x['layer name'].split("_")[1], axis = 1 )

keys = ['layer name','cems','aoi', 'subset', 'continent', 'country','satellite','source_sat_type', 'inv', 'land','delay',
       'cloud', 'water', 'flood', 'permanentwater']

database = database[keys]

In [44]:
database.head()

Unnamed: 0,layer name,cems,aoi,subset,continent,country,satellite,source_sat_type,inv,land,delay,cloud,water,flood,permanentwater
0,EMSR264_01AMBILOPE_DEL_v2_observed_event_a,EMSR264,01AMBILOPE,banned,Africa,Madagascar,RADARSAT-2,SAR,0.968531,56.41895,149,36.278894,6.333625,4.739957,1.593668
1,EMSR264_02AMBANJA_DEL_v2_observed_event_a,EMSR264,02AMBANJA,banned,Africa,Madagascar,RADARSAT-2,SAR,1.902971,60.738435,149,23.28806,14.070533,2.181247,11.889286
2,EMSR264_05MAROVOAY_DEL_v2_observed_event_a,EMSR264,05MAROVOAY,banned,Africa,Madagascar,Sentinel-1,SAR,1.206552,64.312854,77,28.3061,6.174494,2.324843,3.849651
3,EMSR264_06MAHAMBO_DEL_v2_observed_event_a,EMSR264,06MAHAMBO,banned,Africa,Madagascar,Sentinel-1,SAR,1.183829,51.734343,221,33.08665,13.995177,0.095344,13.899834
4,EMSR264_07AMBATOBE_DEL_v2_observed_event_a,EMSR264,07AMBATOBE,banned,Africa,Madagascar,Sentinel-1,SAR,0.955627,41.763294,221,44.329468,12.951611,0.113639,12.837972


In [45]:
database.describe()

Unnamed: 0,inv,land,delay,cloud,water,flood,permanentwater
count,871.0,871.0,871.0,871.0,871.0,871.0,871.0
mean,17.842812,54.313453,84.762342,19.30968,8.534055,3.159053,5.375002
std,24.032014,23.734022,94.107311,18.277091,9.862316,6.513681,7.724632
min,0.0,0.0,-8.0,0.0,0.026007,0.000582,0.0
25%,1.455008,36.928981,16.5,0.877533,1.875528,0.319853,0.811039
50%,3.701975,50.728538,53.0,15.08705,4.759977,1.095499,2.196842
75%,32.820361,74.237058,113.0,36.273311,11.833639,2.971368,6.511503
max,98.268524,99.80043,449.0,84.544853,66.642411,66.466994,54.286448


In [48]:
print(database.groupby('satellite').cems.count())
print()
print(database.groupby('source_sat_type').cems.count())


satellite
997                    1
ALOS                   1
COSMO-SkyMed         166
Deimos-2               3
GeoEye-1              26
Landsat-8             11
Not Applicable         1
PlanetScope            1
Pleiades-1A-1B       134
RADARSAT-2           101
SPOT-6-7              26
Sentinel-1           302
Sentinel-2            57
Skysat                 1
TERRASAR-X            19
WorldView-1            1
WorldView-2           14
WorldView-3            5
earth observing 1      1
Name: cems, dtype: int64

source_sat_type
Not Applicable      4
Optic             253
SAR               614
Name: cems, dtype: int64


In [58]:
database_train = database[(database.subset == 'train') | (database.subset == 'unused' )]

print('unused')
print(database_train[database_train.subset == 'unused'].groupby('satellite').cems.count())
print('\n train')
print(database_train[database_train.subset == 'train'].groupby('satellite').cems.count())

print('full')
print(database_train.groupby('satellite').cems.count())
print()
print(database_train.groupby('source_sat_type').cems.count())



unused
satellite
997                 1
ALOS                1
COSMO-SkyMed       70
Deimos-2            3
GeoEye-1           21
Landsat-8           2
Not Applicable      1
Pleiades-1A-1B     97
RADARSAT-2         45
SPOT-6-7            9
Sentinel-1        150
Sentinel-2         24
Skysat              1
TERRASAR-X         12
WorldView-1         1
WorldView-2        11
Name: cems, dtype: int64

 train
satellite
COSMO-SkyMed          79
GeoEye-1               3
Landsat-8              8
PlanetScope            1
Pleiades-1A-1B        27
RADARSAT-2            40
SPOT-6-7               7
Sentinel-1           103
TERRASAR-X             6
WorldView-2            3
WorldView-3            5
earth observing 1      1
Name: cems, dtype: int64
full
satellite
997                    1
ALOS                   1
COSMO-SkyMed         149
Deimos-2               3
GeoEye-1              24
Landsat-8             10
Not Applicable         1
PlanetScope            1
Pleiades-1A-1B       124
RADARSAT-2            8

In [56]:
# floodmaps por debajo de 10h de delay 

print(database_train[database_train.delay < 10 ].groupby('satellite').cems.count())
print()
print(database_train[database_train.delay < 10 ].groupby('source_sat_type').cems.count())

satellite
COSMO-SkyMed      37
Deimos-2           1
GeoEye-1           5
Not Applicable     1
Pleiades-1A-1B    28
RADARSAT-2        16
SPOT-6-7           3
Sentinel-1        26
Sentinel-2        23
TERRASAR-X         3
WorldView-2        6
WorldView-3        1
Name: cems, dtype: int64

source_sat_type
Not Applicable     1
Optic             62
SAR               87
Name: cems, dtype: int64
