In [1]:
import cdsapi

dataset = "satellite-land-cover"

request = {
    "variable": "all",
    "year": ["2018"],
    "version": ["v2_1_1"],
    "area": [90, -180, 0, 180]
}


client = cdsapi.Client()
client.retrieve(dataset, request).download("/explore/nobackup/people/spotter5/anna_v/v2/esa_2018.zip")



2025-09-12 13:17:56,105 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-09-12 13:17:56,428 INFO [2025-07-04T00:00:00] Due to a transition between project phases, there are changes to the timeline of this dataset updates, which are usually on an annual basis with a one year delay: 2023 and 2024 data updates are now expected during 2026. Please watch the [forum](https://forum.ecmwf.int/c/announcements/5) for future announcements.
2025-09-12 13:17:56,429 INFO Request ID is de554d37-b88e-4eca-abb2-5ce46fb0010e
2025-09-12 13:17:56,578 INFO status has been updated to accepted
2025-09-12 13:18:05,306 INFO status has been updated to running
2025-09-12 13:38:20,388 INFO status has been updated to successful
                                                                                          

'/explore/nobackup/people/spotter5/anna_v/v2/esa_2018.zip'

In [3]:
't'

't'

Extract

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import geopandas as gpd
import xarray as xr
import pandas as pd
import numpy as np
from pathlib import Path

# ------------------ Paths ------------------
shapefile_path  = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_sites.shp"
nc_path         = "/explore/nobackup/people/spotter5/anna_v/v2/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.area-subset.90.180.0.-180.nc"
output_csv_path = "/explore/nobackup/people/spotter5/anna_v/v2/esa_landcover.csv"

# ------------------ ESA 2018 class code → label ------------------
CODE_TO_NAME = {
    0:   "No Data",
    10:  "Cropland, rainfed",
    20:  "Cropland, irrigated or post-flooding",
    30:  "Mosaic cropland (>50%) / natural vegetation (<50%)",
    40:  "Mosaic natural vegetation (>50%) / cropland (<50%)",
    50:  "Tree cover, broadleaved, evergreen, closed to open (>15%)",
    60:  "Tree cover, broadleaved, deciduous, closed to open (>15%)",
    70:  "Tree cover, needleleaved, evergreen, closed to open (>15%)",
    80:  "Tree cover, needleleaved, deciduous, closed to open (>15%)",
    90:  "Tree cover, mixed leaf type (broadleaved and needleleaved)",
    100: "Mosaic tree and shrub (>50%) / herbaceous (<50%)",
    110: "Mosaic herbaceous (>50%) / tree and shrub (<50%)",
    120: "Shrubland",
    130: "Grassland",
    140: "Lichens and mosses",
    150: "Sparse vegetation (<15%)",
    160: "Tree cover, flooded, fresh or brackish water",
    170: "Tree cover, flooded, saline water",
    180: "Shrub/herbaceous cover, flooded, fresh/saline/brackish",
    190: "Urban areas",
    200: "Bare areas",
    210: "Water bodies",
    220: "Permanent snow and ice",
}

def normalize_longitudes(lon_series: pd.Series) -> pd.Series:
    """Wrap any longitudes into [-180, 180]."""
    lon = lon_series.astype(float).values
    lon = ((lon + 180.0) % 360.0) - 180.0
    return pd.Series(lon, index=lon_series.index)

def main():
    print("Loading shapefile...")
    sites_gdf = gpd.read_file(shapefile_path)

    print("Opening NetCDF and selecting 2018 land-cover slice...")
    ds = xr.open_dataset(nc_path, engine="netcdf4")

    # Pick first (only) time; ensure lat/lon ascending for nearest selection
    da = ds["lccs_class"].sel(time=ds["time"][0]).sortby("lat").sortby("lon")

    # Reproject sites to EPSG:4326 to match lat/lon degrees
    print("Reprojecting sites to EPSG:4326...")
    sites_ll = sites_gdf.to_crs(4326).copy()

    # Normalize longitudes into [-180, 180] to match dataset range
    print("Normalizing site longitudes to [-180, 180]...")
    sites_ll["longitude"] = normalize_longitudes(pd.Series(sites_ll.geometry.x, index=sites_ll.index))
    sites_ll["latitude"]  = pd.Series(sites_ll.geometry.y, index=sites_ll.index)

    # Vectorized nearest-neighbor sampling
    print("Sampling nearest land-cover codes...")
    x_lon = xr.DataArray(sites_ll["longitude"].values, dims="point", name="lon")
    y_lat = xr.DataArray(sites_ll["latitude"].values,  dims="point", name="lat")

    sampled = da.sel(lon=x_lon, lat=y_lat, method="nearest")

    # Respect nodata if the dataset uses one (rare for uint8 categorical here)
    vals = sampled.values
    nodata = da.encoding.get("_FillValue", None)
    if nodata is not None and not np.isnan(nodata):
        vals = np.where(vals == nodata, np.nan, vals)

    # Cast to Int while preserving NaN
    codes = pd.Series(vals).astype("float")
    codes = codes.where(codes.isna(), codes.round().astype("Int64"))

    sites_ll["land_cover_code"] = codes.values
    sites_ll["land_cover_name"] = sites_ll["land_cover_code"].map(CODE_TO_NAME)

    # Keep only rows where site_refer contains "tower"
    print("Filtering to 'tower' rows...")
    final_df = sites_ll.drop(columns="geometry").copy()
    mask_tower = final_df.get("site_refer", pd.Series("", index=final_df.index)).astype(str).str.contains("tower", case=False, na=False)
    kept = int(mask_tower.sum())
    final_df = final_df[mask_tower].copy()
    print(f"Kept {kept} rows where site_refer contained 'tower'.")

    # Do not drop code==0 ("No Data"); only drop rows where sampling failed (NaN)
    final_df = final_df.dropna(subset=["land_cover_code"])

    # Ensure preferred column names present
    # (latitude/longitude already added above)
    out_path = Path(output_csv_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)

    print(f"Saving results to {output_csv_path}...")
    final_df.to_csv(output_csv_path, index=False)

    print("\nDone. Preview:")
    with pd.option_context("display.max_columns", None, "display.width", 200):
        print(final_df.head())

if __name__ == "__main__":
    main()


Loading shapefile...
Opening NetCDF and selecting 2018 land-cover slice...
Reprojecting sites to EPSG:4326...
Normalizing site longitudes to [-180, 180]...
Sampling nearest land-cover codes...


In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import geopandas as gpd
import xarray as xr
import pandas as pd
import numpy as np
from pathlib import Path

shapefile_path  = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_sites.shp"
nc_path         = "/explore/nobackup/people/spotter5/anna_v/v2/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.area-subset.90.180.0.-180.nc"
output_csv_path = "/explore/nobackup/people/spotter5/anna_v/v2/esa_landcover.csv"

CODE_TO_NAME = {
    0:"No Data",10:"Cropland, rainfed",20:"Cropland, irrigated or post-flooding",
    30:"Mosaic cropland (>50%) / natural vegetation (<50%)",
    40:"Mosaic natural vegetation (>50%) / cropland (<50%)",
    50:"Tree cover, broadleaved, evergreen, closed to open (>15%)",
    60:"Tree cover, broadleaved, deciduous, closed to open (>15%)",
    70:"Tree cover, needleleaved, evergreen, closed to open (>15%)",
    80:"Tree cover, needleleaved, deciduous, closed to open (>15%)",
    90:"Tree cover, mixed leaf type (broadleaved and needleleaved)",
    100:"Mosaic tree and shrub (>50%) / herbaceous (<50%)",
    110:"Mosaic herbaceous (>50%) / tree and shrub (<50%)",
    120:"Shrubland",130:"Grassland",140:"Lichens and mosses",
    150:"Sparse vegetation (<15%)",160:"Tree cover, flooded, fresh or brackish water",
    170:"Tree cover, flooded, saline water",
    180:"Shrub/herbaceous cover, flooded, fresh/saline/brackish",
    190:"Urban areas",200:"Bare areas",210:"Water bodies",220:"Permanent snow and ice",
}

def wrap180(lon):
    return ((lon + 180.0) % 360.0) - 180.0

def nearest_index(sorted_coords_1d, values, ascending=True):
    """
    Map each value to the nearest index of sorted_coords_1d.
    Works for ascending or descending coordinate vectors without copying the data.
    Returns an integer array of indices.
    """
    coords = sorted_coords_1d
    v = values

    if ascending:
        idx = np.searchsorted(coords, v, side="left")
        idx = np.clip(idx, 1, len(coords)-1)
        # pick closer of idx-1 and idx
        left = coords[idx-1]; right = coords[idx]
        choose_left = (v - left) <= (right - v)
        return np.where(choose_left, idx-1, idx)
    else:
        # for descending coords, flip the comparison by negating
        coords_neg = -coords
        v_neg = -v
        idx = np.searchsorted(coords_neg, v_neg, side="left")
        idx = np.clip(idx, 1, len(coords_neg)-1)
        left = coords_neg[idx-1]; right = coords_neg[idx]
        choose_left = (v_neg - left) <= (right - v_neg)
        return np.where(choose_left, idx-1, idx)

def main():
    print("Loading shapefile...")
    sites = gpd.read_file(shapefile_path).to_crs(4326)

    # Filter early to reduce work
    site_refer = sites.get("site_refer")
    if site_refer is not None:
        mask_tower = site_refer.astype(str).str.contains("tower", case=False, na=False)
        sites = sites.loc[mask_tower].copy()
        print(f"Rows with 'tower' in site_refer: {mask_tower.sum()}")

    # Extract lon/lat arrays
    lons = wrap180(sites.geometry.x.values.astype(float))
    lats = sites.geometry.y.values.astype(float)

    print("Opening NetCDF (lazy) ...")
    # Open without sorting; don’t decode to cftime unless needed
    ds = xr.open_dataset(nc_path, engine="netcdf4", decode_coords="all", use_cftime=False)

    # Take the first time slice (2018 in this file)
    da = ds["lccs_class"].sel(time=ds["time"][0])

    # Pull coordinate vectors (NumPy) — no sort/rechunk
    lat_coord = da.coords["lat"].values
    lon_coord = da.coords["lon"].values

    lat_ascending = np.all(np.diff(lat_coord) > 0)
    lon_ascending = np.all(np.diff(lon_coord) > 0)
    print(f"Lat ascending? {lat_ascending}; Lon ascending? {lon_ascending}")

    # Map each point to nearest row/col index (pure NumPy, very fast)
    print("Indexing nearest pixels...")
    i_lat = nearest_index(lat_coord, lats, ascending=lat_ascending)
    j_lon = nearest_index(lon_coord, lons, ascending=lon_ascending)

    # Load the 2D band once into memory, then fancy-index
    print("Loading land-cover band to memory once ...")
    band = np.asarray(da.values)  # shape (nlat, nlon); usually uint8

    # Fancy indexing to sample all points at once
    vals = band[i_lat, j_lon]

    # Handle nodata if present
    nodata = da.encoding.get("_FillValue", None)
    if nodata is not None:
        vals = np.where(vals == nodata, np.nan, vals)

    # Prepare output DataFrame
    out = sites.drop(columns="geometry").copy()
    out["longitude"] = lons
    out["latitude"]  = lats

    codes = pd.Series(vals, index=out.index, dtype="float")
    out["land_cover_code"] = codes.where(codes.isna(), codes.round()).astype("Int64")
    out["land_cover_name"] = out["land_cover_code"].map(CODE_TO_NAME)

    # Drop only rows where sampling failed (NaN); keep code==0 ("No Data")
    out = out.dropna(subset=["land_cover_code"])

    # Save
    Path(output_csv_path).parent.mkdir(parents=True, exist_ok=True)
    out.to_csv(output_csv_path, index=False)
    print(f"Saved: {output_csv_path}")
    with pd.option_context("display.max_columns", None, "display.width", 200):
        print(out.head())

if __name__ == "__main__":
    main()


Loading shapefile...
Rows with 'tower' in site_refer: 208
Opening NetCDF (lazy) ...
Lat ascending? False; Lon ascending? True
Indexing nearest pixels...
Loading land-cover band to memory once ...
Saved: /explore/nobackup/people/spotter5/anna_v/v2/esa_landcover.csv
                                          site_refer   latitude   longitude  land_cover_code                                    land_cover_name
1                         Fyodorovskoye_RU-Fyo_tower  56.461528   32.922083               90  Tree cover, mixed leaf type (broadleaved and n...
2  Saskatchewan - Western Boreal, Mature Aspen_CA...  53.628900 -106.197800               61                                                NaN
3  Saskatchewan - Western Boreal, Mature Jack Pin...  53.916300 -104.692000               71                                                NaN
4                            Flakaliden_SE-Fla_tower  64.112778   19.456944               70  Tree cover, needleleaved, evergreen, closed to...
5              

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Sample ESA-CCI land-cover codes at site points.
If the sampled pixel is 0 ("No Data"), replace it with the nearest non-zero
neighbor within a 5-pixel radius; otherwise keep 0.
Saves a CSV with code and human-readable name.
"""

import geopandas as gpd
import xarray as xr
import pandas as pd
import numpy as np
from pathlib import Path

# ------------------ Paths ------------------
shapefile_path  = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_sites.shp"
nc_path         = "/explore/nobackup/people/spotter5/anna_v/v2/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.area-subset.90.180.0.-180.nc"
output_csv_path = "/explore/nobackup/people/spotter5/anna_v/v2/esa_landcover.csv"

# ------------------ Legend: CODE -> NAME ------------------
CODE_TO_NAME = {
    0:  "No Data",

    # Agriculture
    10: "Rainfed cropland", 11: "Rainfed cropland", 12: "Rainfed cropland",
    20: "Irrigated cropland",
    30: "Mosaic cropland (>50%) / natural vegetation (<50%)",
    40: "Mosaic natural vegetation (>50%) / cropland (<50%)",

    # Forest
    50: "Tree cover, broadleaved, evergreen, closed to open (>15%)",
    60: "Tree cover, broadleaved, deciduous, closed to open (>15%)",
    61: "Tree cover, broadleaved, deciduous, closed to open (>15%)",
    62: "Tree cover, broadleaved, deciduous, closed to open (>15%)",
    70: "Tree cover, needleleaved, evergreen, closed to open (>15%)",
    71: "Tree cover, needleleaved, evergreen, closed to open (>15%)",
    72: "Tree cover, needleleaved, evergreen, closed to open (>15%)",
    80: "Tree cover, needleleaved, deciduous, closed to open (>15%)",
    81: "Tree cover, needleleaved, deciduous, closed to open (>15%)",
    82: "Tree cover, needleleaved, deciduous, closed to open (>15%)",
    90: "Tree cover, mixed leaf type (broadleaved and needleleaved)",
    100:"Mosaic tree and shrub (>50%) / herbaceous cover (<50%)",
    160:"Tree cover, flooded, fresh or brackish water",
    170:"Tree cover, flooded, saline water",

    # Grassland
    110:"Mosaic herbaceous cover (>50%) / tree and shrub (<50%)",
    130:"Grassland",

    # Wetland
    180:"Shrub or herbaceous cover, flooded, fresh–saline or brackish water",

    # Settlement
    190:"Urban areas",

    # Other
    120:"Shrubland", 121:"Shrubland", 122:"Shrubland",
    140:"Lichens and mosses",
    150:"Sparse vegetation", 151:"Sparse vegetation",
    152:"Sparse vegetation", 153:"Sparse vegetation",
    200:"Bare areas", 201:"Bare areas", 202:"Bare areas",
    210:"Water bodies",
    220:"Permanent snow and ice",
}

# Max search radius (in pixels) used when sampled pixel is 0 ("No Data")
MAX_PIXELS = 5


# ------------------ Helpers ------------------
def wrap180(lon):
    """Wrap longitudes into [-180, 180]."""
    return ((lon + 180.0) % 360.0) - 180.0

def nearest_index(sorted_coords_1d, values, ascending=True):
    """
    Map each value to the nearest index of sorted_coords_1d (ascending or descending).
    Returns integer array of indices for 'values'.
    """
    coords = sorted_coords_1d
    v = values
    if ascending:
        idx = np.searchsorted(coords, v, side="left")
        idx = np.clip(idx, 1, len(coords)-1)
        left = coords[idx-1]; right = coords[idx]
        return np.where((v - left) <= (right - v), idx-1, idx)
    else:
        coords_neg, v_neg = -coords, -v
        idx = np.searchsorted(coords_neg, v_neg, side="left")
        idx = np.clip(idx, 1, len(coords_neg)-1)
        left = coords_neg[idx-1]; right = coords_neg[idx]
        return np.where((v_neg - left) <= (right - v_neg), idx-1, idx)

def fill_zero_with_nearest(band, i_lat, j_lon, max_pixels=5):
    """
    For each sampled (i,j) where value == 0, search within a square window with
    Chebyshev radius 'max_pixels' for the nearest non-zero pixel (Euclidean distance).
    If found, replace; otherwise keep 0. Returns filled values array.
    """
    nlat, nlon = band.shape
    vals = band[i_lat, j_lon].copy()
    zero_idx = np.where(vals == 0)[0]
    if zero_idx.size == 0:
        return vals

    for k in zero_idx:
        ci = int(i_lat[k]); cj = int(j_lon[k])

        i0 = max(0, ci - max_pixels)
        i1 = min(nlat, ci + max_pixels + 1)
        j0 = max(0, cj - max_pixels)
        j1 = min(nlon, cj + max_pixels + 1)

        window = band[i0:i1, j0:j1]
        if window.size == 0:
            continue

        # indices of non-zero candidates
        nz = np.nonzero(window)
        if nz[0].size == 0:
            continue

        cand_i = nz[0] + i0
        cand_j = nz[1] + j0

        # choose the candidate with minimum Euclidean distance
        di = cand_i - ci
        dj = cand_j - cj
        best = np.argmin(di*di + dj*dj)
        vals[k] = band[cand_i[best], cand_j[best]]

    return vals


# ------------------ Main ------------------
def main():
    print("Loading shapefile...")
    sites = gpd.read_file(shapefile_path).to_crs(4326)

    # Optional: keep only rows with 'tower' in site_refer (comment out if not desired)
    if "site_refer" in sites.columns:
        mask = sites["site_refer"].astype(str).str.contains("tower", case=False, na=False)
        print(f"Rows with 'tower' in site_refer: {mask.sum()}")
        sites = sites.loc[mask].copy()

    # Extract lon/lat
    lons = wrap180(sites.geometry.x.values.astype(float))
    lats = sites.geometry.y.values.astype(float)

    print("Opening NetCDF (lazy)…")
    ds = xr.open_dataset(nc_path, engine="netcdf4", decode_coords="all", use_cftime=False)

    # First/only time slice (2018)
    da = ds["lccs_class"].sel(time=ds["time"][0])

    # Coordinate vectors
    lat_coord = da.coords["lat"].values
    lon_coord = da.coords["lon"].values
    lat_asc = np.all(np.diff(lat_coord) > 0)
    lon_asc = np.all(np.diff(lon_coord) > 0)
    print(f"Lat ascending? {lat_asc}; Lon ascending? {lon_asc}")

    # Map points to nearest pixel indices
    print("Indexing nearest pixels…")
    i_lat = nearest_index(lat_coord, lats, ascending=lat_asc)
    j_lon = nearest_index(lon_coord, lons, ascending=lon_asc)

    # Load the 2D band once
    print("Loading land-cover band…")
    band = np.asarray(da.values)  # uint8 typically, shape (nlat, nlon)

    # Initial sampling
    vals = band[i_lat, j_lon]

    # Treat encoded nodata (if present) as zero-equivalent for the fill step
    nodata = da.encoding.get("_FillValue", None)
    if nodata is not None:
        vals = np.where(vals == nodata, 0, vals)

    # Replace 0 with nearest non-zero within MAX_PIXELS
    print(f"Filling 'No Data' (0) with nearest non-zero within {MAX_PIXELS} pixels…")
    vals_filled = fill_zero_with_nearest(band, i_lat, j_lon, max_pixels=MAX_PIXELS)

    # Build output table
    out = sites.drop(columns="geometry").copy()
    out["longitude"] = lons
    out["latitude"]  = lats

    codes = pd.Series(vals_filled, index=out.index, dtype="float")
    out["land_cover_code"] = codes.where(codes.isna(), codes.round()).astype("Int64")
    out["land_cover_name"] = out["land_cover_code"].map(CODE_TO_NAME)

    # Keep rows even if still 0; only drop true NaN sampling failures
    out = out.dropna(subset=["land_cover_code"])

    # Save CSV
    Path(output_csv_path).parent.mkdir(parents=True, exist_ok=True)
    out.to_csv(output_csv_path, index=False)
    print(f"Saved: {output_csv_path}")

    with pd.option_context("display.max_columns", None, "display.width", 200):
        print(out.head())

if __name__ == "__main__":
    main()


Loading shapefile...
Rows with 'tower' in site_refer: 208
Opening NetCDF (lazy)…
Lat ascending? False; Lon ascending? True
Indexing nearest pixels…
Loading land-cover band…


In [7]:
import xarray as xr
import rioxarray as rxr

nc_path      = "/explore/nobackup/people/spotter5/anna_v/v2/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.area-subset.90.180.0.-180.nc"
output_tif   = "/explore/nobackup/people/spotter5/anna_v/v2/esa_landcover_2018.tif"

# Open the NetCDF and select the 2018 land-cover band
ds = xr.open_dataset(nc_path, engine="netcdf4")
da = ds["lccs_class"].sel(time=ds["time"][0])

# Attach CRS and write to GeoTIFF (keep integer codes)
da.rio.write_crs("EPSG:4326", inplace=True)
da.astype("int16").rio.to_raster(
    output_tif,
    compress="LZW",
    dtype="int16",
    tiled=True,
    blockxsize=256,
    blockysize=256
)

print(f"Saved full 2018 land-cover grid to {output_tif}")


Saved full 2018 land-cover grid to /explore/nobackup/people/spotter5/anna_v/v2/esa_landcover_2018.tif


In [None]:
't'

In [5]:
import pandas as pd

df = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/all_landcover_classes.csv")
esa = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/esa_landcover.csv")
esa = esa.rename(columns = {'site_refer': 'site_reference',
                            'land_cover_name': 'esa_cci'})

esa = esa[['site_reference', 'esa_cci']]

df = pd.merge(df, esa, on = 'site_reference', how = 'left')

df = df[['site_reference', 'latitude', 'longitude', 'land_cover_eco', 'bawld_class', 'esa_cci', 'land_cover']]

df = df.rename(columns = {'land_cover': 'TEM'})

df.to_csv("/explore/nobackup/people/spotter5/anna_v/v2/all_landcover_classes_v2.csv", index = False)



In [6]:
 df['esa_cci'].isna().sum()

30

In [11]:
df

Unnamed: 0,site_reference,latitude,longitude,land_cover_eco,bawld_class,esa_cci,TEM
0,Skyttorp 2_SE-Sk2_tower,60.129667,17.840056,"Tree cover, needleleaved, evergreen",Boreal Forest,"Tree cover, needleleaved, evergreen, closed to...",Herbaceous
1,Wolf_creek_forest_CA-WCF_tower,60.596886,-134.952833,"Tree cover, needleleaved, evergreen",Boreal Forest,,Shrub
2,"Alberta - Western Peatland - LaBiche River,Bla...",54.953840,-112.466980,"Tree cover, flooded, fresh or brackish water",Fen,"Tree cover, needleleaved, evergreen, closed to...",Spruce
3,Elgeeii forest station_RU-Ege_tower,60.015516,133.824012,"Tree cover, mixed leaf type",Boreal Forest,"Tree cover, needleleaved, deciduous, closed to...",Larch
4,Faejemyr_SE-Faj_tower,56.265500,13.553500,"Shrub/herbaceous cover, flooded (fresh/saline/...",Bog,"Shrub/herbaceous cover, flooded, fresh/saline/...",Unclassified
...,...,...,...,...,...,...,...
204,Barrow-CMDL_US-Brw_tower,71.322525,-156.609200,"Shrub/herbaceous cover, flooded (fresh/saline/...",Wet Tundra,Lichens and mosses,Gram-Tundra
205,"Bayelva, Spitsbergen_SJ-Blv_tower",78.921600,11.831100,Grassland,Dry Tundra,Shrubland,Barren/Other
206,"Bayelva, Spitsbergen_SJ-Blv_tower",78.921600,11.831100,Grassland,Dry Tundra,Shrubland,Barren/Other
207,Central Marsh_US-Cms_tower,71.320190,-156.622270,"Shrub/herbaceous cover, flooded (fresh/saline/...",Wet Tundra,Lichens and mosses,Gram-Tundra


In [12]:
df['esa_cci'].value_counts()

Tree cover, needleleaved, evergreen, closed to open (>15%)    41
Sparse vegetation (<15%)                                      28
Lichens and mosses                                            22
Shrub/herbaceous cover, flooded, fresh/saline/brackish        21
Tree cover, needleleaved, deciduous, closed to open (>15%)    14
Grassland                                                     12
Shrubland                                                     12
Tree cover, mixed leaf type (broadleaved and needleleaved)     8
Water bodies                                                   5
Bare areas                                                     5
Mosaic tree and shrub (>50%) / herbaceous (<50%)               5
Tree cover, flooded, fresh or brackish water                   3
Mosaic natural vegetation (>50%) / cropland (<50%)             1
Cropland, rainfed                                              1
Tree cover, broadleaved, deciduous, closed to open (>15%)      1
Name: esa_cci, dtype: int

In [14]:
df['esa_cci'].isna().sum()

30