In [1]:
import folium
import geemap.eefolium as gmap
import ee
import pandas as pd
import json

# ee.Authenticate()
ee.Initialize()

In [2]:
Map = gmap.Map(control=True)
Map.add_child(folium.LayerControl())

In [3]:
def import_aois(csv_loc, Full_Congo_Pull=False, start_date=None,
                end_date=None, days_duration=90, poly_start=0, poly_limit=None):
    
    if Full_Congo_Pull:
        with open(csv_loc,"r",encoding='utf-8') as jsonfile:
            data = json.load(jsonfile)
            for geometry in data["features"]:
                polygon = geometry["geometry"]["coordinates"][0][0]
                poly_obj = ee.Geometry.Polygon(polygon)
                feature = ee.Feature(poly_obj)
                
        return feature



zhenya_cb_no_grid = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Geometry/congo_basin_boundary/Congo_Basin_Boundary_no_islands_v5.geojson"

In [4]:
feature = import_aois(zhenya_cb_no_grid, Full_Congo_Pull=True)

In [5]:
Full_Basin_coords = feature.getInfo()["geometry"]["coordinates"]

In [64]:
AOI_full_basin = ee.Geometry.Polygon(Full_Basin_coords)
START_DATE = '2019-01-01'
END_DATE = '2020-12-31'
CLOUD_FILTER = 100


vis = {'bands': 'array'}



def make_s2_sr_col(aoi=AOI_full_basin,start_date = START_DATE, end_date=END_DATE, CLOUD_FILTER=CLOUD_FILTER):
    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR')
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', CLOUD_FILTER)))
    return s2_sr_col

def visualize_geo(coords,Map = Map):
    Map = gmap.Map()
    geo_obj = ee.Geometry.Polygon(coords)
    Map.centerObject(geo_obj,3)
    Map.add_layer(geo_obj, {}, 'default display')
    Map.add_child(folium.LayerControl())
    return Map

def visualize_raster(img):
    Map = gmap.Map()
    Map.centerObject(img,10)
    Map.addLayer(img, vis)
    Map.add_child(folium.LayerControl())
    return Map
    

In [65]:
col = make_s2_sr_col()

In [66]:
col.size().getInfo()

70097

In [9]:
col.first().getInfo()

{'type': 'Image',
 'bands': [{'id': 'B1',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [1830, 1830],
   'crs': 'EPSG:32735',
   'crs_transform': [60, 0, 199980, 0, -60, 9500020]},
  {'id': 'B2',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [10980, 10980],
   'crs': 'EPSG:32735',
   'crs_transform': [10, 0, 199980, 0, -10, 9500020]},
  {'id': 'B3',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [10980, 10980],
   'crs': 'EPSG:32735',
   'crs_transform': [10, 0, 199980, 0, -10, 9500020]},
  {'id': 'B4',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 65535},
   'dimensions': [10980, 10980],
   'crs': 'EPSG:32735',
   'crs_transform': [10, 0, 199980, 0, -10, 9500020]},
  {'id': 'B5',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min':

In [44]:
col2 = make_s2_sr_col(CLOUD_FILTER=.05)

In [45]:
col2.size().getInfo()

1178

# Feature or Image Area Calculation

In [31]:
def get_area(feature):
    stateArea = feature.geometry().area()
    stateAreaSqKm = ee.Number(stateArea).divide(1e6).round()
    print(stateAreaSqKm.getInfo(),"square kilometers")
    

get_area(feature)

2508463 square kilometers


# Image Area Calculation

In [37]:
get_feature_area(col.first())

3600 square kilometers


In [46]:
# ee.Image(col.get(2)).getInfo()

listOfImages = col2.toList(col.size())

In [49]:
for i in range(0,20):
    test_image = ee.Image(listOfImages.get(i))
    get_area(test_image)

114 square kilometers
561 square kilometers
221 square kilometers
4761 square kilometers
2075 square kilometers
12081 square kilometers
12100 square kilometers
11463 square kilometers
1 square kilometers
7 square kilometers
8331 square kilometers
544 square kilometers
7593 square kilometers
12112 square kilometers
12110 square kilometers
12118 square kilometers
196 square kilometers
2140 square kilometers
5145 square kilometers
2799 square kilometers


# Composite Cloud Metrics

In [67]:
cloudiness_list = col.aggregate_array('CLOUDY_PIXEL_PERCENTAGE').getInfo()

In [68]:
sum(cloudiness_list) / len(cloudiness_list)

63.70878218672867

In [69]:
df = pd.DataFrame(cloudiness_list)

In [70]:
df.value_counts()

100.000000    3309
99.999999      697
99.999997      592
99.999998      478
0.000000       264
              ... 
87.006431        1
87.006129        1
87.005637        1
87.004940        1
67.685999        1
Length: 63151, dtype: int64

In [71]:
df

Unnamed: 0,0
0,97.747635
1,90.111278
2,99.429986
3,96.774037
4,77.805116
...,...
70092,51.346035
70093,25.916541
70094,11.926514
70095,45.947831


## 8% of Basin is Moderately CloudFree Over a Two Year Period

In [73]:
df[df[0] <= 5].shape[0] / df.shape[0]

0.08008901950154786

## 4% of Basin is Relatively CloudFree Over a Two Year Period

In [77]:
df[df[0] <= 1].shape[0] / df.shape[0]

0.03811860707305591

## .4% of Basin is Completely CloudFree Over a Two Year Period

In [76]:
df[df[0] == 0].shape[0] / df.shape[0]

0.0037662096808707933

## 35% of CB less than %50 cloudy Over Two Year Period

In [57]:
df[df[0] <= 50].shape[0] / df.shape[0]

0.3511595982949209

# Acquisition Cadence Stats 

In [55]:
id_list = col.aggregate_array('system:index').getInfo()

In [56]:
id_list[0] + id_list[1]

'20190101T082331_20190101T083702_T35MKQ20190101T082331_20190101T083702_T35MKR'

In [57]:
id_list[0].split("_")[0][:8] + id_list[0].split("_")[2]

'20190101T35MKQ'

In [58]:
date_stamps = [i.split("_")[0][:8] for i in id_list]

In [59]:
tile_ids = [i.split("_")[2] for i in id_list]

In [78]:
df = pd.DataFrame({"Date_Stamps":date_stamps,"Tile_IDs":tile_ids, "Cloudy_Percentage":cloudiness_list})

In [79]:
df

Unnamed: 0,Date_Stamps,Tile_IDs,Cloudy_Percentage
0,20190101,T35MKQ,97.747635
1,20190101,T35MKR,90.111278
2,20190101,T35MLQ,99.429986
3,20190101,T35MLR,96.774037
4,20190101,T35MLS,77.805116
...,...,...,...
70092,20201230,T32NNL,51.346035
70093,20201230,T32NNM,25.916541
70094,20201230,T32NNN,11.926514
70095,20201230,T32NPM,45.947831


In [107]:
df["Tile_IDs"].value_counts()

T34MCV    309
T35MLV    307
T35MNR    307
T35MMV    306
T34MDV    306
         ... 
T33MUP    144
T33MUS    143
T33MVU    143
T32NMN    143
T33MVT    142
Name: Tile_IDs, Length: 319, dtype: int64

In [102]:
df_cloud_free = df[df["Cloudy_Percentage"] <= 5]

In [106]:
df_cloud_free["Tile_IDs"].value_counts()

T33NTH    62
T32NRN    57
T34NFM    55
T35NME    53
T34MFT    52
          ..
T32NPJ     1
T32NQK     1
T32NQF     1
T32MRB     1
T33MUS     1
Name: Tile_IDs, Length: 303, dtype: int64

In [94]:
df_means_dates = df.groupby('Date_Stamps').mean()

In [98]:
df_means_dates[df_means_dates["Cloudy_Percentage"] <= 20].shape[0]

12

In [92]:
df_means_tiles = df.groupby('Tile_IDs').mean()

In [99]:
df_means_tiles[df_means_tiles["Cloudy_Percentage"] <= 20]

Unnamed: 0_level_0,Cloudy_Percentage
Tile_IDs,Unnamed: 1_level_1


In [62]:
df["Date_Stamps"].value_counts()

20200622    138
20200831    137
20190718    136
20200508    136
20200423    136
           ... 
20190923     45
20190521     45
20190522     43
20201207     43
20191127     25
Name: Date_Stamps, Length: 729, dtype: int64

In [109]:
df["Tile_IDs"].value_counts()

T34MCV    309
T35MLV    307
T35MNR    307
T35MMV    306
T34MDV    306
         ... 
T33MUP    144
T33MUS    143
T33MVU    143
T32NMN    143
T33MVT    142
Name: Tile_IDs, Length: 319, dtype: int64

In [108]:
df["Tile_IDs"].value_counts().mean()

219.73981191222572

In [110]:
pd.DataFrame(timestamps).value_counts()

20190606T34NFK    2
20190525T32NLM    2
20200828T35NPC    2
20200309T35MLS    2
20200309T35MLR    2
                 ..
20200421T34MDT    1
20200421T34MDE    1
20200421T34MDD    1
20200421T34MDC    1
20190101T32MRA    1
Length: 67246, dtype: int64

In [40]:
time_stamp_counts = pd.DataFrame(timestamps).value_counts().tolist()

In [42]:
time_stamp_counts

[2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,


In [41]:
sum(time_stamp_counts) / len(time_stamp_counts)

1.0291318442732653

In [28]:
time_stamp_counts

[138,
 137,
 136,
 136,
 136,
 136,
 136,
 134,
 134,
 134,
 134,
 133,
 133,
 133,
 133,
 132,
 131,
 131,
 131,
 131,
 131,
 131,
 131,
 131,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 130,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 129,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 128,
 127,
 127,
 127,
 127,
 127,
 127,
 127,
 127,
 127,
 126,
 126,
 125,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 124,
 123,
 123,
 123,
 123,
 123,
 123,
 123,
 123,
 122,
 120,
 120,
 119,
 118,
 118,
 118,
 118,
 118,
 118,
 117,
 117,
 117,
 117,
 117,
 117,
 117,
 116,
 116,
 116,
 115,
 115,
 114,
 114,
 114

In [29]:
timestamps

['20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',
 '20190101',