In [None]:
import ee
import geemap
import math 
import json
import pandas as pd
from utils.utils import *
from utils.ee_utils import *
from utils.utils import TextColors as c
from tqdm import tqdm

In [None]:
Map = geemap.Map()
# ee.Initialize()

In [None]:
# for LUCAS dataset
with open("shapefiles\Europe.geojson") as f: geojson = json.load(f)
eruope_roi = geemap.geojson_to_ee(geojson)

# # for RacA dataset
# with open("shapefiles\contiguous-usa.geojson") as f: geojson = json.load(f)
# eruope_roi = geemap.geojson_to_ee(geojson)

In [None]:
SIMCLR_CSV_PATH = "simclr/"
# check if the folder exists, if not create it
if not os.path.exists(SIMCLR_CSV_PATH):
    os.makedirs(SIMCLR_CSV_PATH)

In [None]:
# NUM_POINTS = 1000 # should be 1000 for Nafiseh
# NUM_CSV_FILES = 100 # must be 100 for Nafiseh


# In case you face issues downloading samples due to numerical problems, consider decreasing the numbers by 40,000.
NUM_POINTS = 1000 # should be 1000 for Nafiseh
NUM_CSV_FILES = 70 # must be 100 for Nafiseh

In [None]:
for i in range(NUM_CSV_FILES):
    df = random_point_csv_generator(eruope_roi, num_points= NUM_POINTS,
                                    file_name=SIMCLR_CSV_PATH + "random_points_LUCAS_" + str(i),
                                    start_id=10000000 + i*NUM_POINTS, seed=i)
    print(f"{c.OKGREEN}Successfully generated {SIMCLR_CSV_PATH + 'random_points_LUCAS_' + str(i)}.csv file.{c.ENDC}") 

In [None]:
random_point = df.sample(n=1)

In [None]:
random_point

In [None]:
lat = random_point['lat'].values[0]
lon = random_point['long'].values[0]
roi = get_square_roi(lat,lon, roi_size=10000, return_gee_object=True)
Map.addLayer(eruope_roi, {}, 'Europe')
Map.addLayer(roi, {}, 'ROI')
Map.centerObject(roi, 10)
land_cover = ee.ImageCollection('ESA/WorldCover/v100').first().clip(roi)
vis = {'bands': ['Map']}
Map.addLayer(land_cover, vis, 'Land Cover')
Map


| Value | Description |
|---|---|
| 10  | Tree cover |
| 20  | Shrubland |
| 30  | Grassland |
| 40 | Cropland |
| 50 |  Built-up |
| 60 |  Bare / sparse vegetation |
| 70 |  Snow and ice |
| 80 | Permanent water bodies |
| 90 |  Herbaceous wetland |
| 95 |  Mangroves |
| 100 |  Moss and lichen |

In [None]:
# read csv files in SIMCLR_CSV_PATH as a pandas dataframe
csv_files = [SIMCLR_CSV_PATH + file for file in os.listdir(SIMCLR_CSV_PATH) if file.endswith('.csv')]

for file in csv_files:
    df = pd.read_csv(file)
    print(f"{c.OKGREEN}Successfully read {file}.{c.ENDC}")
    df['unwanted_cover_percentage'] = 0
    loop = tqdm(df.iterrows(), total=df.shape[0], position=0, leave=True)
    for point in loop:
        lat = point[1]['lat']
        lon = point[1]['long']
        roi = get_square_roi(lat,lon, roi_size=1920, return_gee_object=True)
        land_cover = ee.ImageCollection('ESA/WorldCover/v100').first().clip(roi)
        percentage = calculate_land_cover_percentage(land_cover, [80,90,50,100,95], roi=roi, scale=30).getInfo()
        df.loc[point[0], 'unwanted_cover'] = percentage
        loop.set_postfix({'unwanted_cover_percentage': percentage})
        # save the dataframe to the same csv file
    df.to_csv(file, index=True, index_label='OID_')

In [None]:
# # read csv files in SIMCLR_CSV_PATH as a pandas dataframe
# csv_files = [SIMCLR_CSV_PATH + file for file in os.listdir(SIMCLR_CSV_PATH) if file.endswith('.csv')]

# for file in csv_files:
#     df = pd.read_csv(file)
#     print(f"Successfully read {file}.")
#     df['unwanted_cover_percentage'] = 0
#     loop = tqdm(df.iterrows(), total=df.shape[0], position=0, leave=True)
#     percentage_tasks = []
#     for point in loop:
#         lat = point[1]['lat']
#         lon = point[1]['long']
#         roi = get_square_roi(lat,lon, roi_size=1920, return_gee_object=True)
#         land_cover = ee.ImageCollection('ESA/WorldCover/v100').first().clip(roi)
#         percentage_task = calculate_land_cover_percentage(land_cover, [80,90,50,100,95], scale=30, roi=roi)
#         percentage_tasks.append(percentage_task)
#         loop.set_postfix({'unwanted_cover_percentage': 'Calculating...'})

#     # After the loop, call getInfo() on all tasks
#     for i, task in enumerate(tqdm(percentage_tasks, desc='Getting info', position=0, leave=True)):
#         df.loc[i, 'unwanted_cover'] = task.getInfo()

#     # save the dataframe to the same csv file
#     df.to_csv(file, index=True, index_label='OID_')