### Import packages

In [1]:
import numpy as np
import os
import pandas as pd
from PIL import Image
from scipy.ndimage import generic_filter
import time
from tqdm import tqdm


### Import csv file with points

In [2]:
# Read the CSV file into a DataFrame
df = pd.read_csv('../Data/site_coord_geoenrich2_LM.csv')

# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Create a dictionary to store the DataFrames for each year
dfs_by_year = {}

# Iterate over the years from 2008 to 2020
for year in range(2008, 2021):
    # Filter the rows for the current year
    df_year = df[df['date'].dt.year == year]
    # Store the DataFrame in the dictionary with the year as the key
    dfs_by_year[year] = df_year

# Access the DataFrames for each year
df_2008 = dfs_by_year[2008]
df_2009 = dfs_by_year[2009]
df_2010 = dfs_by_year[2010]
df_2011 = dfs_by_year[2011]
df_2012 = dfs_by_year[2012]
df_2013 = dfs_by_year[2013]
df_2014 = dfs_by_year[2014]
df_2015 = dfs_by_year[2015]
df_2016 = dfs_by_year[2016]
df_2017 = dfs_by_year[2017]
df_2018 = dfs_by_year[2018]
df_2019 = dfs_by_year[2019]
df_2020 = dfs_by_year[2020]


# With buffer

### Define functions

In [7]:
# Define the buffer size in pixels (each pixel = 5km)
buffer_size = 2

# Function to calculate the mean within the buffer region
def calculate_mean(image, point):
    # Convert the latitude and longitude to pixel coordinates
    lat, lon = point['latitude'], point['longitude']
    x = int((lon + 180) * (image.shape[1] / 360))
    y = int((90 - lat) * (image.shape[0] / 180))

    # Create a circular mask centered at the point with the buffer size
    mask = np.zeros_like(image, dtype=bool)
    yy, xx = np.ogrid[:image.shape[0], :image.shape[1]]
    mask[(xx - x) ** 2 + (yy - y) ** 2 <= buffer_size ** 2] = True

    # Apply the mask to the image and calculate the mean within the buffer region
    buffer_values = image[mask]
    mean_value = np.mean(buffer_values)

    return mean_value


In [8]:
# Function to extract the mean pixel value within the buffer for a given point and image
def get_mean_pixel_value(image_path, point):
    image = np.array(Image.open(image_path))
    mean_value = calculate_mean(image, point)
    date_str = os.path.basename(image_path).split('_')[-1].split('.')[0]
    date = pd.to_datetime(date_str, format='%Y%m%d').date()
    return date, mean_value


### Define paths

In [15]:
# Path to the folder containing the images
images_folder = '../DHW/2008'

# Points DataFrame
points_df = df_2008

print(points_df)


         id  latitude  longitude       date
0        c1       -20        149 2008-01-01
14      c15       -20        150 2008-01-01
28      c29       -21        151 2008-01-01
42      c43       -21        152 2008-01-01
56      c57       -21        153 2008-01-01
...     ...       ...        ...        ...
1778  c1779       -44        147 2008-01-01
1792  c1793       -42        145 2008-01-01
1806  c1807       -41        148 2008-01-01
1820  c1821       -41        145 2008-01-01
1834  c1835       -40        148 2008-01-01

[132 rows x 4 columns]


### Extract DHW values from images

In [14]:
# Create an empty list to store the results
results_list = []

# Start the timer
start_time = time.time()

# Iterate over the points and extract mean pixel values
for _, point in points_df.iterrows():
    image_date_mean = []
    for filename in tqdm(os.listdir(images_folder), desc=f"Point {point['id']}"):
        image_path = os.path.join(images_folder, filename)
        result = get_mean_pixel_value(image_path, point)
        if result is not None:
            date, mean_value = result
            image_date_mean.append((date, mean_value))

    # Append the results to the list
    point_data = {
        'id': point['id'],
        'latitude': point['latitude'],
        'longitude': point['longitude'],
        'date': [date for date, _ in image_date_mean],
        'mean_dhw': [mean for _, mean in image_date_mean]
    }
    point_df = pd.DataFrame(point_data)
    results_list.append(point_df)

# Concatenate all the results into a single DataFrame
results_df = pd.concat(results_list, ignore_index=True)

# Calculate the execution time
execution_time = time.time() - start_time

# Print the results
print(results_df)
print(f"Execution time: {execution_time} seconds")


Point c1: 100%|███████████████████████████████| 366/366 [02:30<00:00,  2.44it/s]

     id  latitude  longitude        date  mean_dhw
0    c1       -20        149  2008-06-19    238.75
1    c1       -20        149  2008-09-25    238.75
2    c1       -20        149  2008-01-01    238.75
3    c1       -20        149  2008-01-02    238.75
4    c1       -20        149  2008-01-03    238.75
..   ..       ...        ...         ...       ...
361  c1       -20        149  2008-01-12    238.75
362  c1       -20        149  2008-01-13    238.75
363  c1       -20        149  2008-01-14    238.75
364  c1       -20        149  2008-01-15    238.75
365  c1       -20        149  2008-01-16    238.75

[366 rows x 5 columns]
Execution time: 150.25933170318604 seconds





### Save results to csv

In [1]:
# Save the results to a CSV file
results_df.to_csv('../Results/DHW_2008_test1.csv', index=False)

NameError: name 'results_df' is not defined

# Without buffer

### Define functions

In [7]:
# Function to extract pixel value at a given location
def get_pixel_value(image_path, point):
    try:
        image = Image.open(image_path)
    except (IOError, SyntaxError) as e:
        print(f"Error opening image {image_path}: {e}")
        return None
    
    date_str = os.path.basename(image_path).split('_')[-1].split('.')[0]
    date = pd.to_datetime(date_str, format='%Y%m%d').date()
    pixel_coords = (int(point['longitude']), int(point['latitude']))
    dhw_value = image.getpixel(pixel_coords)
    return date, dhw_value


### Define paths

In [8]:
# Path to the folder containing the images
images_folder = '../DHW/2008'

# Load points data from the CSV file into a DataFrame
points_df = df_2008[:1]

### Extract DHW value without buffer

In [9]:
# Iterate over the points and extract pixel values
results_list = []

for _, point in points_df.iterrows():
    image_date_dhw = []
    for filename in tqdm(os.listdir(images_folder), desc=f"Point {point['id']}"):
        image_path = os.path.join(images_folder, filename)
        result = get_pixel_value(image_path, point)
        if result is not None:
            date, dhw_value = result
            image_date_dhw.append((date, dhw_value))
    
    # Append the results to the list
    point_data = {
        'id': point['id'],
        'latitude': point['latitude'],
        'longitude': point['longitude'],
        'date': [date for date, _ in image_date_dhw],
        'dhw': [dhw for _, dhw in image_date_dhw]
    }
    point_df = pd.DataFrame(point_data)
    results_list.append(point_df)

# Concatenate all the results into a single DataFrame
results_df = pd.concat(results_list, ignore_index=True)

# Print the results
print(results_df)
print(f"Execution time: {execution_time} seconds")


Point c1: 100%|███████████████████████████████| 366/366 [01:17<00:00,  4.72it/s]

     id  latitude  longitude        date                   dhw
0    c1       -20        149  2008-06-19  (150, 150, 150, 255)
1    c1       -20        149  2008-09-25  (150, 150, 150, 255)
2    c1       -20        149  2008-01-01  (150, 150, 150, 255)
3    c1       -20        149  2008-01-02  (150, 150, 150, 255)
4    c1       -20        149  2008-01-03  (150, 150, 150, 255)
..   ..       ...        ...         ...                   ...
361  c1       -20        149  2008-01-12  (150, 150, 150, 255)
362  c1       -20        149  2008-01-13  (150, 150, 150, 255)
363  c1       -20        149  2008-01-14  (150, 150, 150, 255)
364  c1       -20        149  2008-01-15  (150, 150, 150, 255)
365  c1       -20        149  2008-01-16  (150, 150, 150, 255)

[366 rows x 5 columns]





NameError: name 'execution_time' is not defined

### Save results

In [10]:
# Save the results to a CSV file
results_df.to_csv('../Results/DHW_2008_test1_noBuffer.csv', index=False)