In [194]:
import glob, os
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import rasterio.mask
import rasterio.plot
import matplotlib.pyplot as plt
from rasterstats import zonal_stats
from pathlib import Path


print('Successfully import of all libraries !')

Successfully import of all libraries !


# Input / Output

## Input

In [195]:
ipath = '/export/projects/Sen4Stat/WorkingData/Delince_DATA_ESP/'

adm_name  = 'all'
crop_name = 'pr_8'

if adm_name == 'municipality':
    adm_limit = f'{ipath}Admin_limits_BM/ESP_SITE_2_MUNICIPALITY_32630.shp'
    adm_name_field  = 'NAMEUNIT'
elif adm_name == 'province':
    adm_limit = f'{ipath}Admin_limits_BM/ESP_SITE_2_PROVINCE_32630.shp'
    adm_name_field  = 'NAMEUNIT'
elif adm_name == 'all':
    adm_limit = f'{ipath}Admin_limits_BM/ESP_SITE_2_ALL_32630.shp'
    adm_name_field  = 'COUNTRY'

if crop_name == 'grp_1':
    crop_map  = f'{ipath}Crop_type_maps/ESP_2018_SITE_2_buf_10_LC_all_EXTENT_wall_to_wall_SEG_RATIO_100_LEVEL_grp_1_SD_25_FEAT_2_CLASSIF_RF_OpenCV_v1.tif'
    crop_nb   = 'grp_1_nb'
elif crop_name == 'pr_7':
    crop_map  = f'{ipath}Crop_type_maps/ESP_2018_SITE_2_buf_10_LC_all_EXTENT_wall_to_wall_SEG_RATIO_100_LEVEL_grp_1_SD_25_FEAT_2_CLASSIF_RF_OpenCV_v1_reclassify_pr_7_nb.tif'
    crop_nb   = 'pr_7_nb'
elif crop_name == 'pr_8':
    crop_map  = f'{ipath}Crop_type_maps/ESP_2018_SITE_2_buf_10_LC_all_EXTENT_wall_to_wall_SEG_RATIO_100_LEVEL_grp_1_SD_25_FEAT_2_CLASSIF_RF_OpenCV_v1_reclassify_pr_8_nb.tif'
    crop_nb   = 'pr_8_nb'


lut = f'{ipath}Crop_dictionnary/crop_dictionary.xlsx'

## Output

In [196]:
opath = '/export/projects/Sen4Stat/WorkingData/Delince_DATA_ESP/Count_frequency/'

Path(opath).mkdir(parents=True, exist_ok=True)


crop_count_excel = f'{opath}count_frequency_{adm_name}_{crop_name}.xlsx'

### Open vector file with `geopandas`

In [197]:
gdf = gpd.read_file(adm_limit)
gdf

Unnamed: 0,COUNTRY,OID_,Name,Shape_Area,INSPIREID,NATLEV,NATLEVNAME,NATCODE,PROVINCE,CODNUT1,CODNUT2,CODNUT3,geometry
0,ES,1,CYL,,ES.IGN.BDDAE.34074000000,https://inspire.ec.europa.eu/codelist/Administ...,Provincia,34074000000,Segovia,ES4,ES41,,MULTIPOLYGON Z (((409800.000 4636045.868 0.000...


In [198]:
lut_df = pd.read_excel(lut)
lut_df = lut_df[[crop_nb,crop_name]].drop_duplicates()
lut_df

Unnamed: 0,pr_8_nb,pr_8
0,0,Remove
1,111,Wheat
6,112,Maize
7,113,Rice
8,114,Sorghum
9,115,Barley
11,116,Rye
12,117,Oats
13,118,Millets
16,119,Quinoa


In [199]:
dict_rename = {}

for index, row in lut_df.iterrows():

    nb   = row[crop_nb]
    name = row[crop_name]

    dict_rename[nb] = f'{nb} - {name}'

dict_rename

{0: '0 - Remove',
 111: '111 - Wheat',
 112: '112 - Maize',
 113: '113 - Rice',
 114: '114 - Sorghum',
 115: '115 - Barley',
 116: '116 - Rye',
 117: '117 - Oats',
 118: '118 - Millets',
 119: '119 - Quinoa',
 121: '121 - Leafy or stem vegetables',
 122: '122 - Fruit-bearing vegetables',
 123: '123 - Root, bulb or tuberous vegetables',
 124: '124 - Mushrooms and truffles',
 141: '141 - Soya beans',
 142: '142 - Groundnuts',
 143: '143 - Other oilseed crops',
 151: '151 - Potatoes',
 152: '152 - Sweet potatoes',
 153: '153 - Cassava',
 154: '154 - Yams',
 161: '161 - Spice crops',
 162: '162 - Hops',
 17: '17 - Leguminous crops',
 181: '181 - Sugar beet',
 182: '182 - Sugar cane',
 3: '3 - Non cropland',
 192: '192 - Fibre crops',
 1931: '1931 - Medicinal, aromatic, pesticidal or similar crops',
 1941: '1941 - Flowers crops',
 1991: '1991 - Tobacco',
 2: '2 - Perennial crops'}

## Display crop map with `rasterio` and polygons with `geopandas`

In [200]:
#src = rasterio.open(crop_map, "r")

#fig, ax = plt.subplots(1, figsize=(20, 20))

#rasterio.plot.show(src, cmap='tab20', ax=ax)

#gdf.plot(facecolor='none', edgecolor='black',ax=ax)


# Count of unique pixel values inside polygon

When working with rasters of categorical values (i.e. raster values represent discrete classes), we are often interested in the counts of unique pixel values.

In [201]:
dict_list = []

for i, row in gdf.iterrows():

    name  = row[adm_name_field]
    
    dict_freq = zonal_stats(row.geometry,
                            crop_map,
                            categorical=True,
                            category_map=dict_rename,
                            nodata=-999)[0]

    dict_freq['name'] = name

    dict_list.append(dict_freq)

dict_list[0:2]

[{'2 - Perennial crops': 6192047,
  '3 - Non cropland': 95411317,
  '17 - Leguminous crops': 18851732,
  '111 - Wheat': 33671731,
  '112 - Maize': 6062002,
  '115 - Barley': 31418127,
  '116 - Rye': 410072,
  '117 - Oats': 5018141,
  '121 - Leafy or stem vegetables': 24409,
  '143 - Other oilseed crops': 11711670,
  '151 - Potatoes': 329021,
  '181 - Sugar beet': 1526077,
  '1941 - Flowers crops': 9838157,
  'name': 'ES'}]

## Convert list of dictionaries into pandas DataFrame

In [202]:
df = pd.DataFrame(dict_list).set_index('name')
df

Unnamed: 0_level_0,2 - Perennial crops,3 - Non cropland,17 - Leguminous crops,111 - Wheat,112 - Maize,115 - Barley,116 - Rye,117 - Oats,121 - Leafy or stem vegetables,143 - Other oilseed crops,151 - Potatoes,181 - Sugar beet,1941 - Flowers crops
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ES,6192047,95411317,18851732,33671731,6062002,31418127,410072,5018141,24409,11711670,329021,1526077,9838157


In [203]:
df.to_excel(crop_count_excel)