### Get Environment Variables

In [None]:
import os
import ast

In [None]:
str_location = os.getenv('AREA')
location = ast.literal_eval(str_location)
start_date = os.getenv('START_DATE')
end_date = os.getenv('END_DATE')

### Connect to Google Earth Engine

In [None]:
import ee
import json

In [None]:
# if you need to re-authenticate:
# !rm -rf ~/.config/earthengine

In [None]:
def initialize_earth_engine():
    service_account = 'deforestation-project@alert-sol-419717.iam.gserviceaccount.com'
    service_account_key = os.getenv('SERVICE_ACCOUNT_KEY')

    key_path = '/app/service_account_key_temp.json'
    with open(key_path, 'w') as key_file:
        key_file.write(service_account_key)
    
    credentials = ee.ServiceAccountCredentials(service_account, key_path)
    ee.Initialize(credentials)

In [None]:
initialize_earth_engine()

### Start Spark Session

In [None]:
import subprocess

# Set bash as default shell
os.environ['SHELL'] = '/bin/bash'
subprocess.run(["bash", "--version"])  # Run bash to test if it's available

In [None]:
import pyspark

In [None]:
os.environ['JAVA_HOME'] =  os.getenv('JAVA_HOME')
os.environ['PATH'] = os.getenv('PATH')

os.environ['PATH'] = os.environ['JAVA_HOME'] + '/bin:' + os.environ['PATH']

os.environ['PYSPARK_PYTHON'] = os.getenv('PYTHON_PATH')
os.environ['PYSPARK_DRIVER_PYTHON'] = os.getenv('PYTHON_PATH')

conf = pyspark.SparkConf()
conf.set('spark.driver.memory','4g')

sc = pyspark.SparkContext(conf=conf)
spark = pyspark.SQLContext.getOrCreate(sc)

In [None]:
print("Done")

### Connect to MongoDB

In [None]:
# from pymongo import MongoClient
# from gridfs import GridFS

In [None]:
def connect_to_mongo():
    client = MongoClient(os.getenv('MONGO_URI'))
    db = client['deforestation_db']
    fs = GridFS(db)
    return fs

### Gather Satellite Images

In [None]:
def add_3_months(date):
    year, month = map(int, date.split('-'))
    month += 3
    if month > 12:
        month -= 12
        year += 1
    if month == 2:
        return f"{year:04d}-{month:02d}-28"
    elif month in [1, 3, 5, 7, 8, 10, 12]:
        return f"{year:04d}-{month:02d}-31"
    else:
        return f"{year:04d}-{month:02d}-30"
    return f"{year:04d}-{month:02d}"

In [None]:
def apply_scale_factors(image):
  optical_bands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
  thermal_bands = image.select('ST_B.*').multiply(0.00341802).add(149.0)
  return image.addBands(optical_bands, None, True).addBands(
      thermal_bands, None, True
  )

In [None]:
def ndvi(image):
    ndvi = image.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')
    return image.addBands(ndvi)

In [None]:
def create_composite_image(date):
    # EE satellites revisit a place on earth every 6 days
        # gather at least 3 months of relevant images to make a good composite
    start_date = date + '-01'
    end_date = add_3_months(date)
    return ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
        .filterDate(start_date, end_date) \
        .filterBounds(area_of_interest) \
        .filter(ee.Filter.lt('CLOUD_COVER', 10))  \
        .map(apply_scale_factors) \
        .map(ndvi) \
        .median() \
        .clip(area_of_interest)

In [None]:
# area_of_interest = ee.Geometry.Rectangle(location)
# bands = ['B4', 'B3', 'B2']

# start_compositeimg = create_composite_image(start_date)
# end_compositeimg = create_composite_image(end_date)

In [None]:
# print("Done")

### Visualize Satellite Images

In [None]:
# import geemap

In [None]:
def calculate_center(min_lon, min_lat, max_lon, max_lat):
    center_lat = (min_lat + max_lat) / 2
    center_lon = (min_lon + max_lon) / 2
    return center_lat, center_lon

In [None]:
# center_lat, center_lon = calculate_center(location[0], location[1], location[2], location[3])

In [None]:
# composite_viz_params = {
#     'bands': ['SR_B4', 'SR_B3', 'SR_B2'],
#     'min': 0.03,
#     'max': 0.4,
#     'gamma': 2.5
# }

In [None]:
# Map = geemap.Map(center=(center_lat, center_lon), zoom=8)
# Map.addLayer(start_compositeimg, composite_viz_params, 'Start Composite')
# Map.addLayerControl()
# Map

In [None]:
# Map = geemap.Map(center=(center_lat, center_lon), zoom=8)
# Map.addLayer(end_compositeimg, composite_viz_params, 'End Composite')
# Map.addLayerControl()
# Map

### Calculate Deforestation

In [None]:
# # sample data from start and end composite images for training

# training_start = start_compositeimg.sample(**{
#     'region': area_of_interest,
#     'scale': 30,
#     'numPixels': 500,
#     'tileScale': 10
# })

# training_end = end_compositeimg.sample(**{
#     'region': area_of_interest,
#     'scale': 30,
#     'numPixels': 500,
#     'tileScale': 10
# })

In [None]:
# # apply k-means clustering
# numberOfClusters = 3

# clusterer_start = ee.Clusterer.wekaKMeans(numberOfClusters).train(training_start)
# clusterer_end = ee.Clusterer.wekaKMeans(numberOfClusters).train(training_end)

# classified_start = start_compositeimg.cluster(clusterer_start)
# classified_end = end_compositeimg.cluster(clusterer_end)

In [None]:
# # calculate difference between clustered images to detect deforestation
# deforestation = classified_end.subtract(classified_start).abs()

In [None]:
# print("Done")

### Visualize Results

In [None]:
# cluster_viz_params = {
#     'min': 0,
#     'max': numberOfClusters - 1,
#     'palette': ['red', 'green', 'blue']
# }

In [None]:
# Map = geemap.Map(center=(center_lat, center_lon), zoom=8)
# Map.addLayer(deforestation, cluster_viz_params, 'Deforestation')
# Map.addLayerControl()
# Map

### Save Maps to MongoDB

In [None]:
# import requests
# from PIL import Image
# import io

In [None]:
# function to get byte value of maps
def export_image_to_bytes(image, region, scale=30, crs='EPSG:4326'):
    download_url = image.getDownloadURL({
        'scale': scale,
        'region': region,
        'crs': crs,
        'format': 'GEO_TIFF'
    })
    response = requests.get(download_url)
    if response.status_code == 200:
        # convert TIFF image to PNG
        tiff_image = Image.open(io.BytesIO(response.content))
        img_io = io.BytesIO()
        tiff_image.save(img_io, format='PNG')
        img_io.seek(0)
        return img_io.getvalue()
    else:
        return None

In [None]:
# function to process and store images in MongoDB
def process_and_store_image(task):
    image, region, scale = task

    # initialize google earth engine
    initialize_earth_engine()

    # connect to MongoDB
    fs = connect_to_mongo()
    
    image_bytes = export_image_to_bytes(image, region, scale)
    if image_bytes:
        image_id = fs.put(image_bytes, filename=f'{image}', metadata={'description': f'{image} Visualization'})
        if not image_id:
            print("Failed to store image in MongoDB")        
        return image_id
    return None

In [None]:
# # preprocess maps for optimal display

# start_composite_viz = start_compositeimg.visualize(**{
#     'bands': ['SR_B4', 'SR_B3', 'SR_B2'],
#     'min': 0.03,
#     'max': 0.4,
#     'gamma': 2.5
# })

# end_composite_viz = end_compositeimg.visualize(**{
#     'bands': ['SR_B4', 'SR_B3', 'SR_B2'],
#     'min': 0.03,
#     'max': 0.4,
#     'gamma': 2.5
# })

# clustered_composite_viz = deforestation.visualize(**{
#     'min': 0,
#     'max': numberOfClusters - 1,
#     'palette': ['red', 'green', 'blue']
# })

In [None]:
# print("Done")

In [None]:
# tasks = [
#     (start_composite_viz, area_of_interest, 100),
#     (end_composite_viz, area_of_interest, 100),
#     (clustered_composite_viz, area_of_interest, 500)
# ]

# # parallelize tasks using Spark
# rdd = sc.parallelize(tasks)
# image_ids = rdd.map(process_and_store_image).collect()

In [None]:
# print("Done")

In [None]:
# image_ids_dict = {
#     "start_image_id": str(image_ids[0]),
#     "end_image_id": str(image_ids[1]),
#     "clustered_image_id": str(image_ids[2])
# }

In [None]:
# print(image_ids_dict)

### Store Image IDs in JSON File

In [None]:
# import json

In [None]:
# with open('image_ids.json', 'w') as f:
#     json.dump(image_ids_dict, f)

In [None]:
# print("Done")