In [1]:
import geopandas as gpd
import pickle

# Prepare Allen Coral Atlas File
Using the benthic map file downloaded from allen coral atlas, this notebook prepares a pickle file to be used for pulling satellite data. The benthic map for a given region can be downloaded here: https://allencoralatlas.org/atlas

In [2]:
allen_coral_df = gpd.read_file("./files/benthic.gpkg")
allen_coral_df

Unnamed: 0,class,geometry
0,Rock,"POLYGON ((-77.33956 26.82419, -77.33947 26.824..."
1,Rubble,"POLYGON ((-77.34117 26.82414, -77.34113 26.824..."
2,Seagrass,"POLYGON ((-77.33947 26.82325, -77.33938 26.823..."
3,Rubble,"POLYGON ((-77.33830 26.82298, -77.33821 26.822..."
4,Seagrass,"POLYGON ((-77.33727 26.82378, -77.33718 26.823..."
...,...,...
10205273,Seagrass,"POLYGON ((-77.25184 22.72302, -77.25184 22.722..."
10205274,Seagrass,"POLYGON ((-77.25830 22.72868, -77.25830 22.728..."
10205275,Seagrass,"POLYGON ((-77.25467 22.73155, -77.25467 22.731..."
10205276,Sand,"POLYGON ((-77.25718 22.73164, -77.25709 22.731..."


Create a centroid column from the bounding box provided in the geometry column.

In [3]:
allen_coral_df['centroid_column'] = allen_coral_df.centroid


  allen_coral_df['centroid_column'] = allen_coral_df.centroid


Extract longitude and latitude from each centroid.

In [4]:
allen_coral_df['long'] = allen_coral_df['centroid_column'].x
allen_coral_df['lat'] = allen_coral_df['centroid_column'].y
allen_coral_df

Unnamed: 0,class,geometry,centroid_column,long,lat
0,Rock,"POLYGON ((-77.33956 26.82419, -77.33947 26.824...",POINT (-77.33951 26.82417),-77.339511,26.824166
1,Rubble,"POLYGON ((-77.34117 26.82414, -77.34113 26.824...",POINT (-77.34114 26.82404),-77.341136,26.824036
2,Seagrass,"POLYGON ((-77.33947 26.82325, -77.33938 26.823...",POINT (-77.33942 26.82320),-77.339421,26.823200
3,Rubble,"POLYGON ((-77.33830 26.82298, -77.33821 26.822...",POINT (-77.33825 26.82294),-77.338246,26.822938
4,Seagrass,"POLYGON ((-77.33727 26.82378, -77.33718 26.823...",POINT (-77.33733 26.82317),-77.337328,26.823174
...,...,...,...,...,...
10205273,Seagrass,"POLYGON ((-77.25184 22.72302, -77.25184 22.722...",POINT (-77.25179 22.72290),-77.251787,22.722904
10205274,Seagrass,"POLYGON ((-77.25830 22.72868, -77.25830 22.728...",POINT (-77.25622 22.72925),-77.256224,22.729252
10205275,Seagrass,"POLYGON ((-77.25467 22.73155, -77.25467 22.731...",POINT (-77.25286 22.72945),-77.252856,22.729454
10205276,Sand,"POLYGON ((-77.25718 22.73164, -77.25709 22.731...",POINT (-77.25863 22.73055),-77.258626,22.730546


Function to create a subsample of data points from the allen coral atlas. The coral/algae and non-coral classes will be equally represented.

In [5]:
def getAllenCoralSample(allen_df, numSamples):
    # Rename all Non-Coral classes
    allen_df.loc[allen_df["class"] == "Rubble", "class"] = 'Non-Coral'
    allen_df.loc[allen_df["class"] == "Rock", "class"] = 'Non-Coral'
    allen_df.loc[allen_df["class"] == "Sand", "class"] = 'Non-Coral'
    allen_df.loc[allen_df["class"] == "Microalgal Mats", "class"] = 'Non-Coral'
    allen_df.loc[allen_df["class"] == "Seagrass", "class"] = 'Non-Coral'
    
    # Sample Coral/Algae and Non-Coral classes equally
    allen_sample = allen_df.groupby('class').apply(lambda x: x.sample(numSamples))
    
    return allen_sample

In [6]:
allen_sample = getAllenCoralSample(allen_coral_df, 25000)

In [7]:
allen_sample['class'].value_counts()

Coral/Algae    25000
Non-Coral      25000
Name: class, dtype: int64

In [None]:
allen_sample.to_pickle('./files/northern_carribean_subset.pkl')