# Example usage of py-madaclim

## Raster layers metadata with MadaclimLayers class from py_madaclim.info module

### Layers dataframe containing with all metadata

In [1]:
from py_madaclim.info import MadaclimLayers

Instantiate a `MadaclimLayers` object and explore the information about the rasters in the Madaclim db 

In [2]:
# Instantiate the MadaclimLayers without the rasters
mada_info = MadaclimLayers()

print(mada_info)

MadaclimLayers(
	all_layers = DataFrame(79 rows x 6 columns)
	categorical_layers = DataFrame(Layers 75, 76, 77, 78 with a total of 79 categories
	public methods -> download_data, fetch_specific_layers, get_categorical_combinations
			 get_layers_labels, select_geoclim_type_layers
)


The `all_layers` property is a Dataframe containing all layers information

In [3]:
print(mada_info.all_layers.shape)
mada_info.all_layers

(79, 6)


Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,is_categorical,units
0,clim,1,tmin1,Monthly minimum temperature - January,False,°C x 10
1,clim,2,tmin2,Monthly minimum temperature - February,False,°C x 10
2,clim,3,tmin3,Monthly minimum temperature - March,False,°C x 10
3,clim,4,tmin4,Monthly minimum temperature - April,False,°C x 10
4,clim,5,tmin5,Monthly minimum temperature - May,False,°C x 10
...,...,...,...,...,...,...
74,env,75,geo,Rock types,True,"[1=Alluvial_&_Lake_deposits, 2=Unconsolidated_..."
75,env,76,soi,Soil types,True,"[1=Bare_Rocks, 2=Raw_Lithic_Mineral_Soils, 3=P..."
76,env,77,veg,Vegetation types,True,"[1=VegCat_01, 2=VegCat_02, 3=VegCat_03, 4=VegC..."
77,env,78,wat,Watersheds,True,"[1=CoE_N-Bemarivo, 2=CoE_S-Bemarivo_N-Mangoro,..."


An in-depth look at the categorical layers in the Madaclim db with the `categorical_layers` property

In [4]:
mada_info.categorical_layers.head()

Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,raster_value,category
0,env,75,geo,Rock types,1,Alluvial_&_Lake_deposits
1,env,75,geo,Rock types,2,Unconsolidated_Sands
2,env,75,geo,Rock types,4,Mangrove_Swamp
3,env,75,geo,Rock types,5,Tertiary_Limestones_+_Marls_&_Chalks
4,env,75,geo,Rock types,6,Sandstones


Madaclim db contains 'climate'(`clim`) and 'environmental'(`env`) type raster layers. Filter the dataframe with the `select_geoclim_type_layers` method.

In [5]:
mada_info.select_geoclim_type_layers(geoclim_type="env")

Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,is_categorical,units
70,env,71,alt,Altitude,False,meters
71,env,72,slo,Slope,False,degrees
72,env,73,asp,Aspect; clockwise from North,False,degrees
73,env,74,solrad,"Solar radiation; computed from altitude, slope...",False,Wh.m-2.day-1
74,env,75,geo,Rock types,True,"[1=Alluvial_&_Lake_deposits, 2=Unconsolidated_..."
75,env,76,soi,Soil types,True,"[1=Bare_Rocks, 2=Raw_Lithic_Mineral_Soils, 3=P..."
76,env,77,veg,Vegetation types,True,"[1=VegCat_01, 2=VegCat_02, 3=VegCat_03, 4=VegC..."
77,env,78,wat,Watersheds,True,"[1=CoE_N-Bemarivo, 2=CoE_S-Bemarivo_N-Mangoro,..."
78,env,79,forcov,Percentage of forest cover in 1 km by 1 km gri...,False,%


### Layers labels for downstream applications

Extract the layers labels in a simple format with the `get_layers_labels` method. All values are unique

In [6]:
all_labels = mada_info.get_layers_labels()    # Defaults to "all" for all layers
all_labels[:5]

['layer_1', 'layer_2', 'layer_3', 'layer_4', 'layer_5']

In [7]:
# Extract environmental layers only
env_layers = mada_info.get_layers_labels(layers_subset="env")
env_layers

['layer_71',
 'layer_72',
 'layer_73',
 'layer_74',
 'layer_75',
 'layer_76',
 'layer_77',
 'layer_78',
 'layer_79']

Option for a more detailed label format

In [8]:
# Get a single layer with a more descriptive label
mada_info.get_layers_labels(37, as_descriptive_labels=True)

['clim_37_bio1_Annual mean temperature (degrees)']

In [9]:
# Categorical layers contains the possible raster values for each categories
categ_layers_nums = list(mada_info.categorical_layers["layer_number"].unique())
mada_info.get_layers_labels(categ_layers_nums, as_descriptive_labels=True)

['env_75_geo_Rock types (categ_vals: 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13)',
 'env_76_soi_Soil types (categ_vals: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)',
 'env_77_veg_Vegetation types (categ_vals: 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 22, 23, 25)',
 'env_78_wat_Watersheds (categ_vals: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)']

Extract a subset of layers based on layer number and taking advantage of the descriptive naming structure of the `as_descriptive_out` output:

 \<geoclim_type>\_\<dasd>\_\<unique_layername>\_\<details>

In [10]:
# Layers subset with a more descriptive label
labels_37_55 = mada_info.get_layers_labels(list(range(37, 56)), as_descriptive_labels=True)

# Or using the unique_layername
bioclim_labels = [label for label in mada_info.get_layers_labels(as_descriptive_labels=True) if "bio" in label]
assert labels_37_55 == bioclim_labels

### Fetch specific layers

Fetch specific layers using the previously generated labels, or numbers, we can fetch specific data from the `all_layers` df

In [11]:
# From a list of layer_numbers
mada_info.fetch_specific_layers([1, 15, 55, 71])

Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,is_categorical,units
0,clim,1,tmin1,Monthly minimum temperature - January,False,°C x 10
14,clim,15,tmax3,Monthly maximum temperature - March,False,°C x 10
54,clim,55,bio19,Precipitation of coldest quarter,False,mm.3months-1
70,env,71,alt,Altitude,False,meters


In [12]:
# Using the output from `get_layers_labels` method
mada_info.fetch_specific_layers(layers_labels=bioclim_labels)

Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,is_categorical,units
36,clim,37,bio1,Annual mean temperature,False,degrees
37,clim,38,bio2,Mean diurnal range,False,mean of monthly max temp - monthy min temp
38,clim,39,bio3,Isothermality = BIO2/BIO7 x 100,False,No units
39,clim,40,bio4,Temperature seasonality,False,standard deviation x 100
40,clim,41,bio5,Max temperature of warmest month,False,degrees
41,clim,42,bio6,Min temperature of coldest month,False,degrees
42,clim,43,bio7,Temperature annual range = BIO5-BIO6,False,degrees
43,clim,44,bio8,Mean temperature of wettest quarter,False,degrees
44,clim,45,bio9,Mean temperature of driest quarter,False,degrees
45,clim,46,bio10,Mean temperature of warmest quarter,False,degrees


In [13]:
# or from simple labels
print(env_layers)
mada_info.fetch_specific_layers(env_layers,)

['layer_71', 'layer_72', 'layer_73', 'layer_74', 'layer_75', 'layer_76', 'layer_77', 'layer_78', 'layer_79']


Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,is_categorical,units
70,env,71,alt,Altitude,False,meters
71,env,72,slo,Slope,False,degrees
72,env,73,asp,Aspect; clockwise from North,False,degrees
73,env,74,solrad,"Solar radiation; computed from altitude, slope...",False,Wh.m-2.day-1
74,env,75,geo,Rock types,True,"[1=Alluvial_&_Lake_deposits, 2=Unconsolidated_..."
75,env,76,soi,Soil types,True,"[1=Bare_Rocks, 2=Raw_Lithic_Mineral_Soils, 3=P..."
76,env,77,veg,Vegetation types,True,"[1=VegCat_01, 2=VegCat_02, 3=VegCat_03, 4=VegC..."
77,env,78,wat,Watersheds,True,"[1=CoE_N-Bemarivo, 2=CoE_S-Bemarivo_N-Mangoro,..."
78,env,79,forcov,Percentage of forest cover in 1 km by 1 km gri...,False,%


Pass in any valid position args (column names of the all_layers df) and extract as a nested-dict structure

In [14]:
try:
    mada_info.fetch_specific_layers(10, "layer_info")
except Exception as e:
    print(e)

"Invalid args: ['layer_info']. Args must be one of a key of ['geoclim_type', 'layer_number', 'layer_name', 'layer_description', 'is_categorical', 'units'] or 'all'"


In [15]:
# Fetch as dict with keys as layer_<num> and vals of choice using 
mada_info.fetch_specific_layers([15, 55, 75], "geoclim_type", "layer_name", "is_categorical")

{'layer_15': {'geoclim_type': 'clim',
  'layer_name': 'tmax3',
  'is_categorical': False},
 'layer_55': {'geoclim_type': 'clim',
  'layer_name': 'bio19',
  'is_categorical': False},
 'layer_75': {'geoclim_type': 'env',
  'layer_name': 'geo',
  'is_categorical': True}}

In [16]:
# Get all keys with the `all` argument
mada_info.fetch_specific_layers(bioclim_labels[0], "all")

{'layer_37': {'geoclim_type': 'clim',
  'layer_number': 37,
  'layer_name': 'bio1',
  'layer_description': 'Annual mean temperature',
  'is_categorical': False,
  'units': 'degrees'}}

### Categorical layers

The `categorical_layers` property contains a detailed Dataframe of all the raster layers with categorical data

In [17]:
mada_info.categorical_layers.head()

Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,raster_value,category
0,env,75,geo,Rock types,1,Alluvial_&_Lake_deposits
1,env,75,geo,Rock types,2,Unconsolidated_Sands
2,env,75,geo,Rock types,4,Mangrove_Swamp
3,env,75,geo,Rock types,5,Tertiary_Limestones_+_Marls_&_Chalks
4,env,75,geo,Rock types,6,Sandstones


The `get_categorical_combinations` method formats the data as a dict structure of layers/possible categories combinations

In [18]:
# as dict format for all layers
print(mada_info.get_categorical_combinations().keys())
mada_info.get_categorical_combinations()["layer_75"]

dict_keys(['layer_75', 'layer_76', 'layer_77', 'layer_78'])


{1: 'Alluvial_&_Lake_deposits',
 2: 'Unconsolidated_Sands',
 4: 'Mangrove_Swamp',
 5: 'Tertiary_Limestones_+_Marls_&_Chalks',
 6: 'Sandstones',
 7: 'Mesozoic_Limestones_+_Marls_(inc._"Tsingy")',
 9: 'Lavas_(including_Basalts_&_Gabbros)',
 10: 'Basement_Rocks_(Ign_&_Met)',
 11: 'Ultrabasics',
 12: 'Quartzites',
 13: 'Marble_(Cipolin)'}

In [19]:
# If a single layer is specified
mada_info.get_categorical_combinations("layer_76")

{'layer_76': {1: 'Bare_Rocks',
  2: 'Raw_Lithic_Mineral_Soils',
  3: 'Poorly_Evolved_Erosion_Soils,_Lithosols',
  4: 'Wind-deposited_Soils',
  5: 'Alluvio-colluvial_Deposited_Soils',
  6: 'Salty_Deposited_Soils',
  7: 'Fluvio-marine_Deposited_Soils_-_Mangroves',
  8: 'Vertisols',
  9: 'Andosols',
  10: 'Podzolic_Soils_and_Podzols',
  11: 'Hydromorphic_Soils',
  12: 'Red_Fersiallitic_Soils',
  13: 'Yellow_Fersiallitic_Soils',
  14: 'Yellow-red_Ferruginous_Soils_(Red_Sands)',
  15: 'Red_Ferruginous_Soils',
  16: 'Skeletal_Shallow_Eroded_Ferruginous_Soils',
  17: 'Typical_Reddish-brown_Ferralitic_Soils_with_Little_Degrading_Structure',
  18: 'Humic_Ferralitic_Soils',
  19: 'Indurated-Concretion_Ferralitic_Soils',
  20: 'Humic_Rejuvenated_Ferralitic_Soils',
  21: 'Rejuvenated_Ferralitic_Soils_with_Little_Degrading_Structure',
  22: 'Rejuvenated_Ferralitic_Soils_with_Degrading_Structure',
  23: 'Highly_Rejuvenated,_Penevoluted_Ferralitic_Soils'}}

In [20]:
# To match the descriptive labels from the previous output, one can also specify
mada_info.get_categorical_combinations("layer_76", as_descriptive_keys=True)

{'env_76_soi_Soil types (categ_vals: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)': {1: 'Bare_Rocks',
  2: 'Raw_Lithic_Mineral_Soils',
  3: 'Poorly_Evolved_Erosion_Soils,_Lithosols',
  4: 'Wind-deposited_Soils',
  5: 'Alluvio-colluvial_Deposited_Soils',
  6: 'Salty_Deposited_Soils',
  7: 'Fluvio-marine_Deposited_Soils_-_Mangroves',
  8: 'Vertisols',
  9: 'Andosols',
  10: 'Podzolic_Soils_and_Podzols',
  11: 'Hydromorphic_Soils',
  12: 'Red_Fersiallitic_Soils',
  13: 'Yellow_Fersiallitic_Soils',
  14: 'Yellow-red_Ferruginous_Soils_(Red_Sands)',
  15: 'Red_Ferruginous_Soils',
  16: 'Skeletal_Shallow_Eroded_Ferruginous_Soils',
  17: 'Typical_Reddish-brown_Ferralitic_Soils_with_Little_Degrading_Structure',
  18: 'Humic_Ferralitic_Soils',
  19: 'Indurated-Concretion_Ferralitic_Soils',
  20: 'Humic_Rejuvenated_Ferralitic_Soils',
  21: 'Rejuvenated_Ferralitic_Soils_with_Little_Degrading_Structure',
  22: 'Rejuvenated_Ferralitic_Soils_with_Degrading_Struct

### Raster files specific methods

Even without the raster files locally, we saw that we have access to a lot of methods.

Only method we don't have access to is `get_bandnums_from_layers` which returns the band number from given layer(s)

In [21]:
try:
    mada_info.get_bandnums_from_layers(bioclim_labels)
except AttributeError as e:
    print(e)


Undefined attribute: 'clim_raster'. You need to assign a valid pathlib.Path to the related raster attribute first.


Use the `download_data` method to fetch the rasters and save it to disk

In [22]:
from pathlib import Path
cwd = Path.cwd()

raster_files = ["madaclim_current.tif", "madaclim_enviro.tif"]
if not all([file.name in raster_files for file in cwd.iterdir()]):
    mada_info.download_data(save_dir=cwd)


####   Trying get request to Madaclim website...   ####
madaclim_current.tif is 21.8 MB
Server response OK from madaclim.cirad.fr, starting to download madaclim_current.tif
Progress for madaclim_current.tif : 100.00 % completed of 21.8 MB downloaded [ average speed of  3.2 MB/s ]
Done downloading madaclim_current.tif in 6.71 seconds !

####   Trying get request to Madaclim website...   ####
madaclim_enviro.tif is 5.5 MB
Server response OK from madaclim.cirad.fr, starting to download madaclim_enviro.tif
Progress for madaclim_enviro.tif : 100.00 % completed of 5.5 MB downloaded [ average speed of  2.1 MB/s ]
Done downloading madaclim_enviro.tif in 2.64 seconds !


In [23]:
print([file.name for file in cwd.iterdir() if file.suffix == ".tif"])

['madaclim_current.tif', 'madaclim_enviro.tif']


Setting the raster-related attributes of the `mada_info` instance

In [24]:
mada_info.clim_raster = "./madaclim_current.tif"
mada_info.env_raster = "./madaclim_enviro.tif"
print(mada_info)    # Now the 'get_bandnums_from_layers' is visible

MadaclimLayers(
	all_layers = DataFrame(79 rows x 6 columns)
	categorical_layers = DataFrame(Layers 75, 76, 77, 78 with a total of 79 categories
	clim_raster = madaclim_current.tif
	clim_crs = EPSG:32738
	env_raster = madaclim_enviro.tif
	env_crs = EPSG:32738
	public methods -> download_data, fetch_specific_layers, get_bandnums_from_layers
			 get_categorical_combinations, get_layers_labels, select_geoclim_type_layers
)


In [25]:
# Check the CRS
print(mada_info.clim_crs)
assert mada_info.clim_crs == mada_info.env_crs

EPSG:32738


In [26]:
# Example
print(f'layer_75 is band #{mada_info.get_bandnums_from_layers(["layer_75"])[0]} on {mada_info.env_raster.name}')

layer_75 is band #5 on madaclim_enviro.tif


One could also pass in the rasters to the `MadaclimLayers` constructor

In [28]:
MadaclimLayers(clim_raster="madaclim_current.tif", env_raster="madaclim_enviro.tif")

MadaclimLayers(
	all_layers = DataFrame(79 rows x 6 columns)
	categorical_layers = DataFrame(Layers 75, 76, 77, 78 with a total of 79 categories
	clim_raster = madaclim_current.tif
	clim_crs = EPSG:32738
	env_raster = madaclim_enviro.tif
	env_crs = EPSG:32738
	public methods -> download_data, fetch_specific_layers, get_bandnums_from_layers
			 get_categorical_combinations, get_layers_labels, select_geoclim_type_layers
)

## Exploring the raster data from the Madaclim db using the MadaclimRasters class

### Using layers label to visualize the raster maps

Instantiate a `MadaclimRasters` object

In [27]:
from py_madaclim.raster_manipulation import MadaclimRasters

mada_rasters = MadaclimRasters(clim_raster=cwd / "madaclim_current.tif", env_raster=cwd / "madaclim_enviro.tif")
print(mada_rasters)

MadaclimRasters(
	clim_raster = madaclim_current.tif,
	clim_crs = EPSG:32738,
	clim_nodata_val = -32768.0
	env_raster = madaclim_enviro.tif,
	env_crs = EPSG:32738,
	env_nodata_val = -32768.0
)
