In [4]:
import os
from pathlib import Path
from dotenv import load_dotenv

cwd = Path.cwd()
# Load dotenv into sys PATH
dotenv_file = cwd / "../../../example/.env"
assert dotenv_file.is_file()
load_dotenv(dotenv_file)

True

In [5]:
import pandas as pd
csv_file = r"../../../example/gbif_example\0026013-240906103802322\0026013-240906103802322.csv"
coff_dl_df = pd.read_csv(csv_file, sep='\t')

In [6]:
df = coff_dl_df.loc[coff_dl_df["taxonRank"] == "SPECIES"]
df = df.loc[:, ["verbatimScientificName", "decimalLongitude", "decimalLatitude", "year"]]
df = df.reset_index().drop(columns="index")
df["specimen_id"] = df.apply(lambda row: f"{row['verbatimScientificName']}_{row.name}", axis=1)
df["specimen_id"] = df["specimen_id"].str.strip("Coffea ")
df.columns = ["genus_species", "longitude", "latitude", "year", "specimen_id"]
df.head()

Unnamed: 0,genus_species,longitude,latitude,year,specimen_id
0,Psilanthus brassii,0.0,0.0,1967.0,Psilanthus brassii_0
1,Psilanthus brassii,147.333333,-9.333333,1967.0,Psilanthus brassii_1
2,Psilanthus brassii,146.5,-8.783333,1933.0,Psilanthus brassii_2
3,Coffea arabica,0.0,0.0,1928.0,rabica_3
4,Coffea arabica Linnaeus,167.425,-22.658333,1981.0,rabica Linnaeus_4


In [11]:
# Last 6 years
df_2018_p = df.loc[df["year"] > 2018]
print(len(df_2018_p))
print(df_2018_p.head())

unique_values = df_2018_p['genus_species'].unique()

# Print or work with the unique values
print("Unique species : ", len(unique_values))

3927
       genus_species   longitude   latitude    year  specimen_id
5935  Coffea arabica  -84.636676  10.479850  2019.0  rabica_5935
5936  Coffea arabica -156.243057  20.911530  2024.0  rabica_5936
5937  Coffea arabica  -75.850023   5.613698  2024.0  rabica_5937
5938  Coffea arabica  -66.419957  18.478251  2024.0  rabica_5938
5939  Coffea arabica  -75.254248   2.936914  2022.0  rabica_5939
Unique species :  67


In [33]:
import sys
import os
import re
import pandas as pd

# Assuming df_2018_p is already defined
df_gbif = df_2018_p[['specimen_id', 'longitude', 'latitude']].copy()  # Use .copy() to avoid warnings
df_node = pd.read_csv(r"..\input\node_names.csv")

# Function to extract part of the specimen_id for matching
def extract_name(specimen_id):
    # Use regex to extract pattern (e.g., everything before the underscore)
    return re.split(r'[ _]', specimen_id)[0]

# Apply extraction function to both DataFrames
df_gbif.loc[:, 'key'] = df_gbif['specimen_id'].apply(extract_name)  
df_node.loc[:, 'key'] = df_node['node_name'].apply(lambda x: re.sub(r'^C_|_[\dA-Za-z]+$', '', x))


In [30]:
df_gbif.to_csv("../tmp/gbif_extract_w_key.csv")
df_node.to_csv("../tmp/node_extract_w_key.csv")


In [25]:
len(df_gbif)

3927

In [34]:
# Create a dictionary for mapping key to Node Name
mapping = df_node.set_index('key')['node_name'].to_dict()

# Map the Node Name into a new column in df_gbif
df_gbif.loc[:, 'node_name'] = df_gbif['key'].map(mapping)

# Drop the key column
df_gbif.drop(columns='key', inplace=True)

# Drop rows where 'Node Name' is NaN
df_gbif.dropna(subset=['node_name'], inplace=True)

# Select the final columns
df_new = df_gbif[['node_name', 'longitude', 'latitude']].copy()

# Renaming columns
df_new.rename(columns={'node_name': 'specimen_id'}, inplace=True)

# Save the DataFrame to a new CSV
base_name, extension = os.path.splitext(csv_file)
formatted_csv_file = base_name + '_formatted' + extension
#df_new.to_csv(formatted_csv_file, index=False)

#print(f"Data saved to {formatted_csv_file}")
df_new.to_csv(r"../tmp/file_formatted.csv", index=False)

print(f"Data saved to {formatted_csv_file}")

Data saved to ../../../example/gbif_example\0026013-240906103802322\0026013-240906103802322_formatted.csv


In [38]:
import pandas as pd

#input_file = r'..\input\0026013-240906103802322_formatted.csv'
input_file = r'..\tmp\file_formatted.csv'
caffeine_file = r'..\input\no_caffeine_nodes_w_specimen.csv'

def add_caffeine(input_file, caffeine_file):
    
    gbif_df = pd.read_csv(input_file)
    node_names_df = pd.read_csv(caffeine_file)

    # Merge the two DataFrames based on 'specimen_id' in gbif_df and 'Species_name' in node_names_df
    merged_df = pd.merge(gbif_df, node_names_df[['Species_name', 'caffeine_percent']], 
                         left_on='specimen_id', right_on='Species_name', how='left')

    # Drop the 'Species_name' column as it's no longer needed
    merged_df = merged_df.drop(columns=['Species_name'])
    
    # Retain only the required columns
    merged_df = merged_df[['specimen_id', 'longitude', 'latitude', 'caffeine_percent']]
    
    # Drop rows where 'caffeine_percent' is NaN
    merged_df = merged_df.dropna(subset=['caffeine_percent'])

    # Create output filename by replacing 'formatted' with 'w_caffeine'
    output_file = input_file.replace('formatted', 'w_caffeine')

    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_file, index=False)

    # Print the output file name
    print(f"Data saved to: {output_file}")
    
    

add_caffeine(input_file, caffeine_file)



Data saved to: ..\tmp\file_w_caffeine.csv


In [36]:
df_w_caff = pd.read_csv(r"..\tmp\file_w_caffeine.csv")


In [39]:

unique_values = df_w_caff['specimen_id'].unique()

# Print or work with the unique values
print(unique_values)


['C_perrieri_A12' 'C_humbertii_RNF785' 'C_bissetiae' 'C_pervilleana_A957'
 'C_boiviniana_A980' 'C_leroyi_A315' 'C_resinosa_A8' 'C_richardii_A575'
 'C_liaudii_A1013' 'C_mauritiana_Makes4' 'C_boinensis' 'C_tsirananae_A515'
 'C_heimii_A516' 'C_kihansiensis_APD2922' 'C_mcphersonii_A977']


#MadaclimCollection points must have unique specimen_id

Creation of a Collection per species for visualisation

In [24]:

# df_2018_p.head()
#Extract all rows where any column contains the word 'heimii'

heimii_df = df_2018_p[df_2018_p.apply(lambda row: row.astype(str).str.contains('heimii', case=False).any(), axis=1)]

# Print or further manipulate the filtered DataFrame
print(heimii_df)


       genus_species  longitude   latitude    year   specimen_id
14672  Coffea heimii  49.263119 -12.279828  2020.0  heimii_14672
16490  Coffea heimii  49.347115 -12.210881  2020.0  heimii_16490
16893  Coffea heimii  49.303872 -12.318289  2020.0  heimii_16893
16895  Coffea heimii  49.226443 -12.281807  2020.0  heimii_16895
17832  Coffea heimii  49.342500 -12.396327  2019.0  heimii_17832
17834  Coffea heimii  49.275700 -12.398991  2019.0  heimii_17834
19479  Coffea heimii  49.569592 -12.454103  2019.0  heimii_19479
19555  Coffea heimii  49.549809 -12.580463  2019.0  heimii_19555


In [41]:
def extract_rows_containing_word(df, word):
    filtered_df = df[df.apply(lambda row: row.astype(str).str.contains(word, case=False).any(), axis=1)]
    return filtered_df

Species with no caffeine

In [47]:
andrambovatensis_df = extract_rows_containing_word(df_2018_p, "andrambovatensis")
print(andrambovatensis_df.shape)

abbayesii_df = extract_rows_containing_word(df_2018_p, "bbayesii")
print(abbayesii_df.shape)

arenesiana_df = extract_rows_containing_word(df_2018_p, "arenesiana")
print(arenesiana_df.shape)

bertrandii_df = extract_rows_containing_word(df_2018_p, "bertrandii")
print(bertrandii_df.shape)

dubardii_df = extract_rows_containing_word(df_2018_p, "dubardii")
print(dubardii_df.shape)

millotii_df = extract_rows_containing_word(df_2018_p, "millotii")
print(millotii_df.shape)

resinosa_df = extract_rows_containing_word(df_2018_p, "resinosa")
print(resinosa_df.shape)

toshii_df = extract_rows_containing_word(df_2018_p, "toshii")
print(toshii_df.shape)

vohemarensis_df = extract_rows_containing_word(df_2018_p, "vohemarensis")
print(vohemarensis_df.shape)


(0, 5)
(0, 5)
(0, 5)
(0, 5)
(0, 5)
(0, 5)
(3, 5)
(0, 5)
(0, 5)


extract points linked to non caffeinated coffea species (MG)

In [48]:
heimii_df = extract_rows_containing_word(df_2018_p, "heimii")
resinosa_df = extract_rows_containing_word(df_2018_p, "resinosa")
humbertii_df = extract_rows_containing_word(df_2018_p, "humbertii")
perrieri_df = extract_rows_containing_word(df_2018_p, "perrieri")
pervilleana_df = extract_rows_containing_word(df_2018_p, "pervilleana")
tsirananae_df = extract_rows_containing_word(df_2018_p, "tsirananae")

print(humbertii_df.shape)
print(perrieri_df.shape)
print(pervilleana_df.shape)
print(tsirananae_df.shape)
print(resinosa_df.shape)
print(heimii_df.shape)


(3, 5)
(22, 5)
(2, 5)
(10, 5)
(3, 5)
(8, 5)


In [49]:
from py_madaclim.raster_manipulation import MadaclimCollection

In [50]:
coll_heimii = MadaclimCollection.populate_from_df(heimii_df)

Creating MadaclimPoint(specimen_id=heimii_14672...)
Creating MadaclimPoint(specimen_id=heimii_16490...)
Creating MadaclimPoint(specimen_id=heimii_16893...)
Creating MadaclimPoint(specimen_id=heimii_16895...)
Creating MadaclimPoint(specimen_id=heimii_17832...)
Creating MadaclimPoint(specimen_id=heimii_17834...)
Creating MadaclimPoint(specimen_id=heimii_19479...)
Creating MadaclimPoint(specimen_id=heimii_19555...)
Created new MadaclimCollection with 8 samples.


In [60]:
coll_humbertii = MadaclimCollection.populate_from_df(humbertii_df)
coll_perrieri = MadaclimCollection.populate_from_df(perrieri_df)
coll_pervilleana = MadaclimCollection.populate_from_df(pervilleana_df)
coll_tsirananae = MadaclimCollection.populate_from_df(tsirananae_df)
coll_resinosa = MadaclimCollection.populate_from_df(resinosa_df)


Creating MadaclimPoint(specimen_id=humbertii_7021...)
Creating MadaclimPoint(specimen_id=humbertii J.-F.Leroy_8949...)
Creating MadaclimPoint(specimen_id=humbertii J.-F.Leroy_8951...)
Created new MadaclimCollection with 3 samples.
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_6802...)
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_6858...)
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_6916...)
Creating MadaclimPoint(specimen_id=perrieri_7584...)
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_8151...)
Creating MadaclimPoint(specimen_id=perrieri_8546...)
Creating MadaclimPoint(specimen_id=perrieri_8666...)
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_9136...)
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_9214...)
Creating MadaclimPoint(specimen_id=perrieri Drake ex Jum. & H. Perrier_9215...)
Creating MadaclimPoint(specimen_id=perrieri Drake 

In [52]:
coll_heimii.all_points[0]

MadaclimPoint(
	specimen_id = heimii_14672,
	source_crs = 4326,
	longitude = 49.263119,
	latitude = -12.279828,
	mada_geom_point = POINT (963987.2160799548 8638826.905155072),
	sampled_layers = None (Not sampled yet),
	nodata_layers = None (Not sampled yet),
	is_categorical_encoded = False,
	genus_species = Coffea heimii,
	year = 2020.0,
	gdf.shape = (1, 10)
)

Access the Geopandas dataframe attribute `gdf` based on the `mada_geom_point` geometry of each object

In [53]:
coll_heimii.gdf

Unnamed: 0,specimen_id,source_crs,longitude,latitude,mada_geom_point,sampled_layers,nodata_layers,is_categorical_encoded,genus_species,year
0,heimii_14672,4326,49.263119,-12.279828,POINT (963987.216 8638826.905),,,False,Coffea heimii,2020.0
1,heimii_16490,4326,49.347115,-12.210881,POINT (973268.317 8646324.577),,,False,Coffea heimii,2020.0
2,heimii_16893,4326,49.303872,-12.318289,POINT (968361.925 8634492.084),,,False,Coffea heimii,2020.0
3,heimii_16895,4326,49.226443,-12.281807,POINT (959985.375 8638670.641),,,False,Coffea heimii,2020.0
4,heimii_17832,4326,49.3425,-12.396327,POINT (972432.287 8625771.942),,,False,Coffea heimii,2019.0
5,heimii_17834,4326,49.2757,-12.398991,POINT (965147.750 8625594.362),,,False,Coffea heimii,2019.0
6,heimii_19479,4326,49.569592,-12.454103,POINT (997074.808 8618950.269),,,False,Coffea heimii,2019.0
7,heimii_19555,4326,49.549809,-12.580463,POINT (994676.992 8604973.911),,,False,Coffea heimii,2019.0


### Sample from rasters

Just like the `MadaclimPoint` class, use the `sample_from_rasters` method to get data based on the collection's specimens.

In [54]:
from py_madaclim.info import MadaclimLayers
mada_info = MadaclimLayers()

In [55]:
print(mada_info.all_layers.shape)
mada_info.all_layers

(79, 6)


Unnamed: 0,geoclim_type,layer_number,layer_name,layer_description,is_categorical,units
0,clim,1,tmin1,Monthly minimum temperature - January,False,°C x 10
1,clim,2,tmin2,Monthly minimum temperature - February,False,°C x 10
2,clim,3,tmin3,Monthly minimum temperature - March,False,°C x 10
3,clim,4,tmin4,Monthly minimum temperature - April,False,°C x 10
4,clim,5,tmin5,Monthly minimum temperature - May,False,°C x 10
...,...,...,...,...,...,...
74,env,75,geo,Rock types,True,"[1=Alluvial_&_Lake_deposits, 2=Unconsolidated_..."
75,env,76,soi,Soil types,True,"[1=Bare_Rocks, 2=Raw_Lithic_Mineral_Soils, 3=P..."
76,env,77,veg,Vegetation types,True,"[1=VegCat_01, 2=VegCat_02, 3=VegCat_03, 4=VegC..."
77,env,78,wat,Watersheds,True,"[1=CoE_N-Bemarivo, 2=CoE_S-Bemarivo_N-Mangoro,..."


In [56]:
raster_files = ["madaclim_current.tif", "madaclim_enviro.tif"]
if not all([(cwd / raster_file).exists() for raster_file in raster_files]):
    mada_info.download_data(save_dir=cwd)


In [57]:

mada_info.clim_raster = "./madaclim_current.tif"
mada_info.env_raster = "./madaclim_enviro.tif"
print(mada_info)    # Now the 'get_bandnums_from_layers' is visible

MadaclimLayers(
	all_layers = DataFrame(79 rows x 6 columns)
	categorical_layers = DataFrame(Layers 75, 76, 77, 78 with a total of 79 categories
	clim_raster = madaclim_current.tif
	clim_crs = EPSG:32738
	env_raster = madaclim_enviro.tif
	env_crs = EPSG:32738
	public methods -> download_data, fetch_specific_layers, get_bandnums_from_layers
			 get_categorical_combinations, get_layers_labels, select_geoclim_type_layers
)


In [58]:
from py_madaclim.raster_manipulation import MadaclimRasters
from pathlib import Path
cwd = Path.cwd()
mada_rasters = MadaclimRasters(clim_raster=cwd / "madaclim_current.tif", env_raster=cwd / "madaclim_enviro.tif")
print(mada_rasters)

MadaclimRasters(
	clim_raster = madaclim_current.tif,
	clim_crs = EPSG:32738,
	clim_nodata_val = -32768.0
	env_raster = madaclim_enviro.tif,
	env_crs = EPSG:32738,
	env_nodata_val = -32768.0
)


In [59]:
coll_heimii.sample_from_rasters(
    clim_raster=mada_rasters.clim_raster,
    env_raster=mada_rasters.env_raster
)


######################################## [1mExtracting data for: heimii_14672[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 8 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: heimii_16490[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 78 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.52 seconds.


######################################## [1mExtracting data for: heimii_16893[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.48 seconds.


######################################## [1mExtracting data for: heimii_16895[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 78 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.56 seconds.


######################################## [1mExtracting data for: heimii_17832[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.55 seconds.


######################################## [1mExtracting data for: heimii_17834[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.64 seconds.


######################################## [1mExtracting data for: heimii_19479[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 7 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.54 seconds.


######################################## [1mExtracting data for: heimii_19555[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.60 seconds.



In [61]:
coll_humbertii.sample_from_rasters(
    clim_raster=mada_rasters.clim_raster,
    env_raster=mada_rasters.env_raster
)

coll_perrieri.sample_from_rasters(
    clim_raster=mada_rasters.clim_raster,
    env_raster=mada_rasters.env_raster
)

coll_pervilleana.sample_from_rasters(
    clim_raster=mada_rasters.clim_raster,
    env_raster=mada_rasters.env_raster
)

coll_tsirananae.sample_from_rasters(
    clim_raster=mada_rasters.clim_raster,
    env_raster=mada_rasters.env_raster
)
coll_resinosa.sample_from_rasters(
    clim_raster=mada_rasters.clim_raster,
    env_raster=mada_rasters.env_raster
)


######################################## [1mExtracting data for: humbertii_7021[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 1 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.53 seconds.


######################################## [1mExtracting data for: humbertii J.-F.Leroy_8949[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: humbertii J.-F.Leroy_8951[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.58 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_6802[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.49 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_6858[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.46 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_6916[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.47 seconds.


######################################## [1mExtracting data for: perrieri_7584[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.46 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_8151[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.52 seconds.


######################################## [1mExtracting data for: perrieri_8546[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.47 seconds.


######################################## [1mExtracting data for: perrieri_8666[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.50 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_9136[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.47 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_9214[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.57 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_9215[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.49 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_9731[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.67 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_9737[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.48 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_9772[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.61 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_10379[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.49 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_10396[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.50 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_10406[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.48 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_12493[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.62 seconds.


######################################## [1mExtracting data for: perrieri Drake ex Jum. & H. Perrier_12501[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.49 seconds.


######################################## [1mExtracting data for: perrieri_12538[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: perrieri_12549[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.53 seconds.


######################################## [1mExtracting data for: perrieri_15407[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.60 seconds.


######################################## [1mExtracting data for: perrieri_16995[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.54 seconds.


######################################## [1mExtracting data for: pervilleana_8667[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.58 seconds.


######################################## [1mExtracting data for: pervilleana_8672[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.50 seconds.


######################################## [1mExtracting data for: tsirananae_14671[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 6 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.62 seconds.


######################################## [1mExtracting data for: tsirananae_16502[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: tsirananae_16891[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: tsirananae_16892[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.50 seconds.


######################################## [1mExtracting data for: tsirananae_16894[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: tsirananae_16896[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.61 seconds.


######################################## [1mExtracting data for: tsirananae_16909[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.50 seconds.


######################################## [1mExtracting data for: tsirananae_17828[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 6 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.69 seconds.


######################################## [1mExtracting data for: tsirananae_17831[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.52 seconds.


######################################## [1mExtracting data for: tsirananae_19273[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   


BEWARE! 78 layer(s) contain a nodata value at the specimen location

Finished raster sampling operation in 0.52 seconds.


######################################## [1mExtracting data for: resinosa (Hook. f.) Radlk._9233[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.51 seconds.


######################################## [1mExtracting data for: resinosa (Hook. f.) Radlk._10714[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.65 seconds.


######################################## [1mExtracting data for: resinosa (Hook. f.) Radlk._16989[0m ########################################

Sampling 70 layer(s) from madaclim_current.tif (geoclim_type=clim)...


Extracting layer 70: Number of dry months in the year:  100%|██████████| layer 70/70 [Time remaining: 00:00]                                              



Sampling 9 layer(s) from madaclim_enviro.tif (geoclim_type=env)...


Extracting layer 79: Percentage of forest cover in 1 km by 1 km grid cells:  100%|██████████| layer 9/9 [Time remaining: 00:00]   



Finished raster sampling operation in 0.57 seconds.



In [62]:
#mada_rasters.plot_layer(71);    # Also accepts layer="layer_71"
#coll_heimii.sampled_layers
coll_heimii.gdf


Unnamed: 0,specimen_id,source_crs,longitude,latitude,mada_geom_point,sampled_layers,nodata_layers,is_categorical_encoded,genus_species,year,...,layer_70,layer_71,layer_72,layer_73,layer_74,layer_75,layer_76,layer_77,layer_78,layer_79
0,heimii_14672,4326,49.263119,-12.279828,POINT (963987.216 8638826.905),79,8,False,Coffea heimii,2020.0,...,9.0,,,,,,,25,,
1,heimii_16490,4326,49.347115,-12.210881,POINT (973268.317 8646324.577),79,78,False,Coffea heimii,2020.0,...,,,,,,,,25,,
2,heimii_16893,4326,49.303872,-12.318289,POINT (968361.925 8634492.084),79,0,False,Coffea heimii,2020.0,...,9.0,71.0,6.0,85.0,7863.0,7.0,4.0,6,1.0,0.0
3,heimii_16895,4326,49.226443,-12.281807,POINT (959985.375 8638670.641),79,78,False,Coffea heimii,2020.0,...,,,,,,,,25,,
4,heimii_17832,4326,49.3425,-12.396327,POINT (972432.287 8625771.942),79,0,False,Coffea heimii,2019.0,...,8.0,141.0,7.0,246.0,7788.0,7.0,20.0,7,1.0,4.0
5,heimii_17834,4326,49.2757,-12.398991,POINT (965147.750 8625594.362),79,0,False,Coffea heimii,2019.0,...,8.0,184.0,3.0,313.0,7969.0,9.0,8.0,7,1.0,3.0
6,heimii_19479,4326,49.569592,-12.454103,POINT (997074.808 8618950.269),79,7,False,Coffea heimii,2019.0,...,8.0,,,,,,4.0,25,,
7,heimii_19555,4326,49.549809,-12.580463,POINT (994676.992 8604973.911),79,0,False,Coffea heimii,2019.0,...,8.0,4.0,1.0,250.0,7880.0,2.0,4.0,3,1.0,64.0


In [63]:
bioclim_labels = [label for label in mada_info.get_layers_labels(as_descriptive_labels=True) if "bio" in label]
env_layers = [label for label in mada_info.get_layers_labels(as_descriptive_labels=True) if "env" in label]

In [66]:
# From the collection's objects
print(list(coll_heimii.sampled_layers.keys()))
for specimen_id in coll_heimii.sampled_layers:
    print(f"\nSampled layer values for {specimen_id}")
    for layer, value in coll_heimii.sampled_layers[specimen_id].items():
        if layer in bioclim_labels[0]:    #print only first bioclim layer
            print(f"{layer} => {value}")    
    

['heimii_14672', 'heimii_16490', 'heimii_16893', 'heimii_16895', 'heimii_17832', 'heimii_17834', 'heimii_19479', 'heimii_19555']

Sampled layer values for heimii_14672

Sampled layer values for heimii_16490

Sampled layer values for heimii_16893

Sampled layer values for heimii_16895

Sampled layer values for heimii_17832

Sampled layer values for heimii_17834

Sampled layer values for heimii_19479

Sampled layer values for heimii_19555


In [68]:
# Or from the geodataframe attribute
#coll_heimii.gdf[["specimen_id"] + bioclim_labels[:1]]

exploring categorical data

In [69]:
categ_layers = mada_info.get_categorical_combinations(as_descriptive_keys=True)
print(list(categ_layers.keys())[0])

env_75_geo_Rock types (categ_vals: 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13)


In [71]:
# No encoding
#coll_heimii.gdf[["specimen_id"] + list(categ_layers.keys())]

In [72]:
# Encoding categorical layers

coll_heimii.binary_encode_categorical()
print(coll_heimii.is_categorical_encoded)
print(f"Shape of geodataframe post one-hot-encoding: {coll_heimii.gdf.shape}")

True
Shape of geodataframe post one-hot-encoding: (8, 168)


In [75]:
coll_humbertii.binary_encode_categorical()
coll_perrieri.binary_encode_categorical()
coll_pervilleana.binary_encode_categorical()
coll_resinosa.binary_encode_categorical()
coll_tsirananae.binary_encode_categorical()

In [73]:
coll_heimii.gdf

Unnamed: 0,specimen_id,source_crs,longitude,latitude,mada_geom_point,sampled_layers,nodata_layers,is_categorical_encoded,genus_species,year,...,layer_78_RetDisp_Mahavavy,layer_78_RetDisp_Manampatrana,layer_78_RetDisp_Mananara_du_Sud,layer_78_RetDisp_Mandrare,layer_78_RetDisp_Mangoky,layer_78_RetDisp_Mangoro,layer_78_RetDisp_Onilahy,layer_78_RetDisp_Sambirano,layer_78_RetDisp_Tsiribihina,layer_78__nodata
0,heimii_14672,4326,49.263119,-12.279828,POINT (963987.216 8638826.905),79,8,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,1
1,heimii_16490,4326,49.347115,-12.210881,POINT (973268.317 8646324.577),79,78,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,1
2,heimii_16893,4326,49.303872,-12.318289,POINT (968361.925 8634492.084),79,0,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,0
3,heimii_16895,4326,49.226443,-12.281807,POINT (959985.375 8638670.641),79,78,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,1
4,heimii_17832,4326,49.3425,-12.396327,POINT (972432.287 8625771.942),79,0,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,0
5,heimii_17834,4326,49.2757,-12.398991,POINT (965147.750 8625594.362),79,0,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,0
6,heimii_19479,4326,49.569592,-12.454103,POINT (997074.808 8618950.269),79,7,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,1
7,heimii_19555,4326,49.549809,-12.580463,POINT (994676.992 8604973.911),79,0,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,0


Append all rows of raster env and clim data related to non caffeinated coffee species

In [76]:
no_caff_dfs = pd.concat([coll_heimii.gdf, coll_humbertii.gdf, coll_pervilleana.gdf, coll_perrieri.gdf, coll_resinosa.gdf, coll_tsirananae.gdf], axis=0, ignore_index=True)
no_caff_dfs

Unnamed: 0,specimen_id,source_crs,longitude,latitude,mada_geom_point,sampled_layers,nodata_layers,is_categorical_encoded,genus_species,year,...,layer_78_RetDisp_Mahavavy,layer_78_RetDisp_Manampatrana,layer_78_RetDisp_Mananara_du_Sud,layer_78_RetDisp_Mandrare,layer_78_RetDisp_Mangoky,layer_78_RetDisp_Mangoro,layer_78_RetDisp_Onilahy,layer_78_RetDisp_Sambirano,layer_78_RetDisp_Tsiribihina,layer_78__nodata
0,heimii_14672,4326,49.263119,-12.279828,POINT (963987.216 8638826.905),79,8,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,1
1,heimii_16490,4326,49.347115,-12.210881,POINT (973268.317 8646324.577),79,78,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,1
2,heimii_16893,4326,49.303872,-12.318289,POINT (968361.925 8634492.084),79,0,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,0
3,heimii_16895,4326,49.226443,-12.281807,POINT (959985.375 8638670.641),79,78,True,Coffea heimii,2020.0,...,0,0,0,0,0,0,0,0,0,1
4,heimii_17832,4326,49.3425,-12.396327,POINT (972432.287 8625771.942),79,0,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,0
5,heimii_17834,4326,49.2757,-12.398991,POINT (965147.750 8625594.362),79,0,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,0
6,heimii_19479,4326,49.569592,-12.454103,POINT (997074.808 8618950.269),79,7,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,1
7,heimii_19555,4326,49.549809,-12.580463,POINT (994676.992 8604973.911),79,0,True,Coffea heimii,2019.0,...,0,0,0,0,0,0,0,0,0,0
8,humbertii_7021,4326,43.610638,-23.078725,POINT (357689.039 7447088.901),79,1,True,Coffea humbertii,2024.0,...,0,0,0,0,0,0,0,0,0,0
9,humbertii J.-F.Leroy_8949,4326,44.1366,-23.5318,POINT (411865.585 7397342.999),79,0,True,Coffea humbertii J.-F.Leroy,2020.0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
no_caff_dfs.to_csv(r"../input/no_caffeine_data_for_clustering.csv", index=False)