In [1]:
import pandas as pd
import numpy as np
import json
import io
import tifffile
import calendar
import time
from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session
import os

In [2]:
with open("log.json") as e:
    creds = json.load(e)

CLIENT_ID = creds["client_id"]
CLIENT_SECRET = creds["client_secret"]
TOKEN_URL = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'

def get_authenticated_session():
    client = BackendApplicationClient(client_id=CLIENT_ID)
    oauth = OAuth2Session(client=client)
    token = oauth.fetch_token(
        token_url=TOKEN_URL,
        client_secret=CLIENT_SECRET,
        include_client_id=True
    )
    return oauth

oauth = get_authenticated_session()
print("Authentication successful.")

Authentication successful.


In [3]:
source_file = 'populated_bird_data.csv'       
working_file = 'populated_bird_data_WIP.csv'  

if os.path.exists(working_file):
    print(f"Found progress file '{working_file}'. Resuming from there...")
    df = pd.read_csv(working_file)
else:
    print(f"No progress file found. Starting fresh from '{source_file}'...")
    df = pd.read_csv(source_file)
    
    new_cols = ['ndvi', 'ndwi', 'ndmi', 'ndbi', 'mndwi']
    for col in new_cols:
        df[col] = np.nan

total_rows = len(df)
processed_rows = df['ndvi'].notna().sum()
remaining_rows = df['ndvi'].isna().sum()

print(f"Total locations: {total_rows}")
print(f"Already processed: {processed_rows}")
print(f"Remaining: {remaining_rows}")

Found progress file 'populated_bird_data_WIP.csv'. Resuming from there...
Total locations: 26918
Already processed: 971
Remaining: 25947


In [4]:
evalscript = """
function setup() {
  return {
    input: ["B03", "B04", "B08", "B11"],
    output: { bands: 5, sampleType: "FLOAT32" }
  };
}

function evaluatePixel(sample) {
  let ndvi = index(sample.B08, sample.B04); 
  let ndwi = index(sample.B03, sample.B08); 
  let ndmi = index(sample.B08, sample.B11); 
  let ndbi = index(sample.B11, sample.B08);
  let mndwi = index(sample.B03, sample.B11);
  return [ndvi, ndwi, ndmi, ndbi, mndwi];
}
"""

def fetch_indices_for_point(lat, lon, year, month, session):
    offset = 0.0001
    bbox = [lon - offset, lat - offset, lon + offset, lat + offset]
    
    last_day = calendar.monthrange(int(year), int(month))[1]
    start_date = f"{int(year)}-{int(month):02d}-01"
    end_date = f"{int(year)}-{int(month):02d}-{last_day}"
    json_request = {
        'input': {
            'bounds': {
                'bbox': bbox,
                'properties': { 'crs': 'http://www.opengis.net/def/crs/OGC/1.3/CRS84' }
            },
            'data': [{
                'type': 'S2L2A',
                'dataFilter': {
                    'timeRange': {
                        'from': f'{start_date}T00:00:00Z',
                        'to': f'{end_date}T23:59:59Z'
                    },
                    'mosaickingOrder': 'leastCC',
                },
            }]
        },
        'output': {
            'width': 1,  
            'height': 1,
            'responses': [{ 'identifier': 'default', 'format': { 'type': 'image/tiff' } }]
        },
        'evalscript': evalscript
    }

    url = "https://sh.dataspace.copernicus.eu/api/v1/process"
    
    try:
        response = session.post(url, json=json_request)
        
        if response.status_code == 401:
            print("\nToken expired. Refreshing...", end='')
            session = get_authenticated_session()
            response = session.post(url, json=json_request)
            
        if response.status_code != 200:
            return None, session 
            
        image_arr = tifffile.imread(io.BytesIO(response.content))
        
        if image_arr.ndim == 3:
            vals = image_arr.flatten()
            return vals, session
            
    except Exception as e:
        return None, session

    return None, session

In [5]:
print("\nStarting processing...")
start_time = time.time()
save_target = working_file 

indices_to_process = df[df['ndvi'].isna()].index

print(f"Queue size: {len(indices_to_process)} items to fetch.")

try:
    for i, index in enumerate(indices_to_process):
        
        row = df.loc[index]
        
        print(f"Processing row {index} (Session: {i+1}/{len(indices_to_process)})...", end='\r')
        
        vals, oauth = fetch_indices_for_point(
            row['latitude'], 
            row['longitude'], 
            row['year'], 
            row['month'], 
            oauth
        )
        
        if vals is not None and len(vals) == 5:
            df.at[index, 'ndvi'] = vals[0]
            df.at[index, 'ndwi'] = vals[1]
            df.at[index, 'ndmi'] = vals[2]
            df.at[index, 'ndbi'] = vals[3]
            df.at[index, 'mndwi'] = vals[4] 
        
        if (i + 1) % 100 == 0:
            df.to_csv(save_target, index=False)
            
except KeyboardInterrupt:
    print("\n\n!! Interrupted by user !!")
    print("Saving current progress before exiting...")

except Exception as e:
    print(f"\n\n!! Unexpected Error: {e} !!")
    print("Saving current progress before crashing...")
    raise e 

finally:
    df.to_csv(save_target, index=False)
    print(f"\nSaved progress to: {save_target}")
    
    processed_now = df['ndvi'].notna().sum()
    print(f"Total processed so far: {processed_now} / {len(df)}")
    print(f"Time elapsed: {time.time() - start_time:.0f} seconds.")

if df['ndvi'].isna().sum() == 0:
    print("All rows processed! Overwriting the main database...")
    df.to_csv(source_file, index=False)
    print(f"Success. {source_file} has been updated.")
else:
    print("Processing interrupted. Progress saved to WIP file only.")


Starting processing...
Queue size: 25947 items to fetch.
Processing row 4964 (Session: 3994/25947)...
Processing row 8127 (Session: 7157/25947)...965 (Session: 3995/25947)...
Processing row 11536 (Session: 10566/25947)...8 (Session: 7158/25947)...

!! Interrupted by user !!
Saving current progress before exiting...

Saved progress to: populated_bird_data_WIP.csv
Total processed so far: 11534 / 26918
Time elapsed: 5154 seconds.
Processing interrupted. Progress saved to WIP file only.
