In [1]:
import pandas as pd
import geopandas as gpd
from rasterstats import zonal_stats
import shapefile
from osgeo import gdal
from shapely import geometry
from affine import Affine
import numpy as np

In [None]:
#D:\Software\Python37\python.exe grid_to_vector_stats.py compute-risk --file-shape='//172.17.20.5/geofiles/data/continuinzicht_kanaaldijken/Overstromingsscenarios/area/geo_areas_28992.shp' --file-grid='//172.17.20.5/geofiles/data/continuinzicht_kanaaldijken/Overstromingsscenarios/geo_nattevoeten_dijkdeel396_28992.tif' --p-segment=1 --column-name='risk'

In [13]:
file_shape = r"D:\Projects\Pr\3730.15\Werkfolder\ci\kanaaldijken\gea_areas_28992.shp"
file_grid = r"D:\Projects\Pr\3730.15\Werkfolder\ci\kanaaldijken\geo_nattevoeten_dijkdeel396_28992.tif"
p_segment = 1
column_name = 'risk'

In [14]:
def compute_risk(file_shape, file_grid, p_segment, column_name):
    """
    function to compute the risk given the follow parameters:
    
    Parameters
    -----
    file_shape : str
        path to vector file
    file_grid : str
        path to ascii grid
    p_segment : float
        failure probability on segment
    column_name : str
        name of the output column (eg. 'cond_risk_damage')
    
    Output
    -------
    csv_string : str
        csv file as string object
    """

    sf = shapefile.Reader(file_shape)    
    fields = sf.fields[1:] 
    field_names = [field[0] for field in fields] 
    
    atr_list = []
    geom_list = []
    for r in sf.shapeRecords():  
        atr = dict(zip(field_names, r.record))  
        geom = geometry.shape(r.shape.__geo_interface__)

        atr_list.append(atr)
        geom_list.append(geom) 

    df = pd.DataFrame(atr_list)
    df.loc[:,'geometry'] = geom_list
    gdf = gpd.GeoDataFrame(df)
    
    df_sel = gdf[['id','name','code']]
    
    ds = gdal.Open(file_grid)
    band = ds.GetRasterBand(1)
    gt = ds.GetGeoTransform()
    nan = band.GetNoDataValue()  
    
    array  = band.ReadAsArray()
    affine = Affine.from_gdal(*gt)
    
    array_msk = np.ma.masked_where(array == nan, array)
    array_msk *= p_segment
    
    zs = zonal_stats(vectors=gdf['geometry'], raster=array_msk.data, affine=affine, stats=['sum'], all_touched=False, nodata=nan)  
    
    df_concat = pd.concat((df_sel, pd.DataFrame(zs)), axis=1)
    df_concat.rename(columns={'sum': column_name}, inplace=True)
    
    json_out = df_concat.to_json(orient='records')
    
    return json_out

In [15]:
json_out = compute_risk(file_shape, file_grid, p_segment, column_name)

In [16]:
import json

In [17]:
j = json.loads(json_out)

In [19]:
df = pd.DataFrame.from_dict(j)

In [20]:
df.head()

Unnamed: 0,code,id,name,risk
0,BU18830200,1,Sittard-Geleen: Overhoven,
1,BU18830201,2,Sittard-Geleen: Baandert,
2,BU18830202,3,Sittard-Geleen: Stadbroek,
3,BU18830203,4,Sittard-Geleen: Vrangendael,
4,BU18830204,5,Sittard-Geleen: Broeksittard,


In [23]:
df['code'].isnull().values.any()

False

In [None]:
if __name__ == '__main__':
    fire.Fire()