In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr, kendalltau
import matplotlib.pyplot as plt

# Vondelpark

In [2]:
# Load data from Sentinel-2 (NDVI) and Sentinel-5P (AQI)
Vondel_NDVI_csv = pd.read_csv("Datasets/Sentinel-2/Vondel_NDVI/Vondel_NDVI_csv.csv")
Vondel_AQI_csv = pd.read_csv("Datasets/Sentinel-5P/Vondelpark/Vondel_AirQualityIndex/Vondel_AirQualityIndex_csv.csv")

In [3]:
# Check data structure
Vondel_NDVI_csv

Unnamed: 0,Date,Coordinates,Value
0,2024-03-01,"626205.0, 5802955.0",0.074513
1,2024-03-01,"626215.0, 5802955.0",0.072915
2,2024-03-01,"626225.0, 5802955.0",0.071516
3,2024-03-01,"626235.0, 5802955.0",0.068068
4,2024-03-01,"626245.0, 5802955.0",0.061473
...,...,...,...
271195,2024-06-01,"628155.0, 5801835.0",0.184967
271196,2024-06-01,"628165.0, 5801835.0",0.153550
271197,2024-06-01,"628175.0, 5801835.0",0.182766
271198,2024-06-01,"628185.0, 5801835.0",0.143232


In [4]:
# Check data structure
Vondel_AQI_csv

Unnamed: 0,Date,Coordinates,Value
0,2024-03-01,"626205.0, 5802955.0",28.314556
1,2024-03-01,"626215.0, 5802955.0",28.314556
2,2024-03-01,"626225.0, 5802955.0",28.314556
3,2024-03-01,"626235.0, 5802955.0",28.314556
4,2024-03-01,"626245.0, 5802955.0",28.314556
...,...,...,...
271195,2024-06-01,"628155.0, 5801835.0",22.487692
271196,2024-06-01,"628165.0, 5801835.0",22.487692
271197,2024-06-01,"628175.0, 5801835.0",22.487692
271198,2024-06-01,"628185.0, 5801835.0",22.487692


In [5]:
# Set index keys
if Vondel_NDVI_csv.index.name != "Date":
    Vondel_NDVI_csv.set_index('Date', inplace=True)

if Vondel_AQI_csv.index.name != "Date":
    Vondel_AQI_csv.set_index('Date', inplace=True)

In [None]:
# Merge the two indices together on Date to ensure that Values are aligned
Vondel_merged_csv = pd.merge(Vondel_NDVI_csv, Vondel_AQI_csv, left_index=True, right_index=True, how="inner")
Vondel_merged_csv

In [None]:
# Cleaning merged .csv
# Drop missing values
Vondel_merged_csv_cleaned = Vondel_merged_csv.copy().dropna()

# Combine Date columns
Vondel_merged_csv_cleaned.loc[:, 'Date'] = Vondel_merged_csv_cleaned['Date_x'].combine_first(
    Vondel_merged_csv_cleaned['Date_y']
)

# Combine Coordinates columns
Vondel_merged_csv_cleaned.loc[:, 'Coordinates'] = Vondel_merged_csv_cleaned['Coordinates_x'].combine_first(
    Vondel_merged_csv_cleaned['Coordinates_y']
)

# Drop redundant columns
Vondel_merged_csv_cleaned.drop(columns=['Date_x', 'Date_y', 'Coordinates_x', 'Coordinates_y'], inplace=True)

# Rename Value columns to NDVI and AQI
Vondel_merged_csv_cleaned.rename(columns={'Value_x': 'NDVI', 'Value_y': 'AQI'}, inplace=True)

Vondel_merged_csv_cleaned

In [None]:
# Define 2% data cumulative cut-off from bottom and top of data
# This is done to remove outliers and thus focus on central range of data

# Get NDVI values and sort
ndvi_data = Vondel_merged_csv_cleaned['NDVI'].copy()
sorted_ndvi_data = ndvi_data.sort_values().reset_index(drop=True)

# Calculate cumulative percentages
cumulative_percentage = sorted_ndvi_data.rank(pct=True)

# Define cumulative cut-off range (2% to 98%)
lower_cut = 0.02 # 2%
upper_cut = 0.98 # 98%

# Filter NDVI values within the range
filtered_NDVI_data = sorted_ndvi_data[
    (cumulative_percentage >= lower_cut) &
    (cumulative_percentage <= upper_cut)
    ]

In [None]:
# Set lower and upper values for NDVI
lower_cut_value = filtered_NDVI_data.min()
upper_cut_value = filtered_NDVI_data.max()

# Keep rows where NDVI value is within range
Vondel_merged_csv_final = Vondel_merged_csv_cleaned[
    (Vondel_merged_csv_cleaned['NDVI'] >= lower_cut_value) &
    (Vondel_merged_csv_cleaned['NDVI'] <= upper_cut_value)
]

Vondel_merged_csv_final

In [None]:
# Conduct correlation tests
Vondel_NDVI_Values = Vondel_merged_csv_final.loc[:, 'NDVI']
Vondel_AQI_Values = Vondel_merged_csv_final.loc[:, 'AQI']

pearson_corr, p_pearson = pearsonr(Vondel_NDVI_Values, Vondel_AQI_Values)
spearman_corr, p_spearman = spearmanr(Vondel_NDVI_Values, Vondel_AQI_Values)
kendall_corr, p_kendall = kendalltau(Vondel_NDVI_Values, Vondel_AQI_Values)

print("Pearson's Correlation:")
print(f"Coefficient: {pearson_corr:.4f}, p-value: {p_pearson:.4e}")

print("\nSpearman's Correlation:")
print(f"Coefficient: {spearman_corr:.4f}, p-value: {p_spearman:.4e}")

print("\nKendall's Tau Correlation:")
print(f"Coefficient: {kendall_corr:.4f}, p-value: {p_kendall:.4e}")

In [None]:
plt.scatter(Vondel_NDVI_Values, Vondel_AQI_Values)
plt.xlabel('NDVI')
plt.ylabel('AQI')
plt.title('Scatter Plot of AQI vs NDVI for Vondelpark')
plt.show()

# Westerpark

In [None]:
# Load data from Sentinel-2 (NDVI) and Sentinel-5P (AQI)
Wester_NDVI_csv = pd.read_csv("Datasets/Sentinel-2/Wester_NDVI/Wester_NDVI_csv.csv")
Wester_AQI_csv = pd.read_csv("Datasets/Sentinel-5P/Westerpark/Wester_AirQualityIndex/Wester_AirQualityIndex_csv.csv")

In [None]:
# Check data structure
Wester_NDVI_csv

In [None]:
# Check data structure
Wester_AQI_csv

In [None]:
# Set index keys
if Wester_NDVI_csv.index.name != "Date":
    Wester_NDVI_csv.set_index('Date', inplace=True)

if Wester_AQI_csv.index.name != "Date":
    Wester_AQI_csv.set_index('Date', inplace=True)

In [None]:
# Merge the two indices together on Date to ensure that Values are aligned
Wester_merged_csv = pd.merge(Wester_NDVI_csv, Wester_AQI_csv, left_index=True, right_index=True, how="inner")
Wester_merged_csv

In [None]:
# Cleaning merged .csv
# Drop missing values
Wester_merged_csv_cleaned = Wester_merged_csv.copy().dropna()

# Combine Date columns
Wester_merged_csv_cleaned.loc[:, 'Date'] = Wester_merged_csv_cleaned['Date_x'].combine_first(
    Wester_merged_csv_cleaned['Date_y']
)

# Combine Coordinates columns
Wester_merged_csv_cleaned.loc[:, 'Coordinates'] = Wester_merged_csv_cleaned['Coordinates_x'].combine_first(
    Wester_merged_csv_cleaned['Coordinates_y']
)

# Drop redundant columns
Wester_merged_csv_cleaned.drop(columns=['Date_x', 'Date_y', 'Coordinates_x', 'Coordinates_y'], inplace=True)

# Rename Value columns to NDVI and AQI
Wester_merged_csv_cleaned.rename(columns={'Value_x': 'NDVI', 'Value_y': 'AQI'}, inplace=True)

Wester_merged_csv_cleaned

In [None]:
# Define 2% data cumulative cut-off from bottom and top of data
# This is done to remove outliers and thus focus on central range of data

# Get NDVI values and sort
ndvi_data = Wester_merged_csv_cleaned['NDVI'].copy()
sorted_ndvi_data = ndvi_data.sort_values().reset_index(drop=True)

# Calculate cumulative percentages
cumulative_percentage = sorted_ndvi_data.rank(pct=True)

# Define cumulative cut-off range (2% to 98%)
lower_cut = 0.02  # 2%
upper_cut = 0.98  # 98%

# Filter NDVI values within the range
filtered_NDVI_data = sorted_ndvi_data[
    (cumulative_percentage >= lower_cut) &
    (cumulative_percentage <= upper_cut)
    ]

In [None]:
# Set lower and upper values for NDVI
lower_cut_value = filtered_NDVI_data.min()
upper_cut_value = filtered_NDVI_data.max()

# Keep rows where NDVI value is within range
Wester_merged_csv_final = Wester_merged_csv_cleaned[
    (Wester_merged_csv_cleaned['NDVI'] >= lower_cut_value) &
    (Wester_merged_csv_cleaned['NDVI'] <= upper_cut_value)
    ]

Wester_merged_csv_final

In [None]:
# Conduct correlation tests
Wester_NDVI_Values = Wester_merged_csv_final.loc[:, 'NDVI']
Wester_AQI_Values = Wester_merged_csv_final.loc[:, 'AQI']

pearson_corr, p_pearson = pearsonr(Wester_NDVI_Values, Wester_AQI_Values)
spearman_corr, p_spearman = spearmanr(Wester_NDVI_Values, Wester_AQI_Values)
kendall_corr, p_kendall = kendalltau(Wester_NDVI_Values, Wester_AQI_Values)

print("Pearson's Correlation:")
print(f"Coefficient: {pearson_corr:.4f}, p-value: {p_pearson:.4e}")

print("\nSpearman's Correlation:")
print(f"Coefficient: {spearman_corr:.4f}, p-value: {p_spearman:.4e}")

print("\nKendall's Tau Correlation:")
print(f"Coefficient: {kendall_corr:.4f}, p-value: {p_kendall:.4e}")

In [None]:
plt.scatter(Wester_NDVI_Values, Wester_AQI_Values)
plt.xlabel('NDVI')
plt.ylabel('AQI')
plt.title('Scatter Plot of AQI vs NDVI for Westerpark')
plt.show()

# Amstelpark

In [None]:
# Load data from Sentinel-2 (NDVI) and Sentinel-5P (AQI)
Amstel_NDVI_csv = pd.read_csv("Datasets/Sentinel-2/Amstel_NDVI/Amstel_NDVI_csv.csv")
Amstel_AQI_csv = pd.read_csv("Datasets/Sentinel-5P/Amstelpark/Amstel_AirQualityIndex/Amstel_AirQualityIndex_csv.csv")

In [None]:
# Check data structure
Amstel_NDVI_csv

In [None]:
# Check data structure
Amstel_AQI_csv

In [None]:
# Set index keys
if Amstel_NDVI_csv.index.name != "Date":
    Amstel_NDVI_csv.set_index('Date', inplace=True)

if Amstel_AQI_csv.index.name != "Date":
    Amstel_AQI_csv.set_index('Date', inplace=True)

In [None]:
# Merge the two indices together on Date to ensure that Values are aligned
Amstel_merged_csv = pd.merge(Amstel_NDVI_csv, Amstel_AQI_csv, left_index=True, right_index=True, how="inner")
Amstel_merged_csv

In [None]:
# Cleaning merged .csv
# Drop missing values
Amstel_merged_csv_cleaned = Amstel_merged_csv.copy().dropna()

# Combine Date columns
Amstel_merged_csv_cleaned.loc[:, 'Date'] = Amstel_merged_csv_cleaned['Date_x'].combine_first(
    Amstel_merged_csv_cleaned['Date_y']
)

# Combine Coordinates columns
Amstel_merged_csv_cleaned.loc[:, 'Coordinates'] = Amstel_merged_csv_cleaned['Coordinates_x'].combine_first(
    Amstel_merged_csv_cleaned['Coordinates_y']
)

# Drop redundant columns
Amstel_merged_csv_cleaned.drop(columns=['Date_x', 'Date_y', 'Coordinates_x', 'Coordinates_y'], inplace=True)

# Rename Value columns to NDVI and AQI
Amstel_merged_csv_cleaned.rename(columns={'Value_x': 'NDVI', 'Value_y': 'AQI'}, inplace=True)

Amstel_merged_csv_cleaned

In [None]:
# Define 2% data cumulative cut-off from bottom and top of data
# This is done to remove outliers and thus focus on central range of data

# Get NDVI values and sort
ndvi_data = Amstel_merged_csv_cleaned['NDVI'].copy()
sorted_ndvi_data = ndvi_data.sort_values().reset_index(drop=True)

# Calculate cumulative percentages
cumulative_percentage = sorted_ndvi_data.rank(pct=True)

# Define cumulative cut-off range (2% to 98%)
lower_cut = 0.02  # 2%
upper_cut = 0.98  # 98%

# Filter NDVI values within the range
filtered_NDVI_data = sorted_ndvi_data[
    (cumulative_percentage >= lower_cut) &
    (cumulative_percentage <= upper_cut)
    ]

In [None]:
# Set lower and upper values for NDVI
lower_cut_value = filtered_NDVI_data.min()
upper_cut_value = filtered_NDVI_data.max()

# Keep rows where NDVI value is within range
Amstel_merged_csv_final = Amstel_merged_csv_cleaned[
    (Amstel_merged_csv_cleaned['NDVI'] >= lower_cut_value) &
    (Amstel_merged_csv_cleaned['NDVI'] <= upper_cut_value)
    ]

Amstel_merged_csv_final

In [None]:
# Conduct correlation tests
Amstel_NDVI_Values = Amstel_merged_csv_final.loc[:, 'NDVI']
Amstel_AQI_Values = Amstel_merged_csv_final.loc[:, 'AQI']

pearson_corr, p_pearson = pearsonr(Amstel_NDVI_Values, Amstel_AQI_Values)
spearman_corr, p_spearman = spearmanr(Amstel_NDVI_Values, Amstel_AQI_Values)
kendall_corr, p_kendall = kendalltau(Amstel_NDVI_Values, Amstel_AQI_Values)

print("Pearson's Correlation:")
print(f"Coefficient: {pearson_corr:.4f}, p-value: {p_pearson:.4e}")

print("\nSpearman's Correlation:")
print(f"Coefficient: {spearman_corr:.4f}, p-value: {p_spearman:.4e}")

print("\nKendall's Tau Correlation:")
print(f"Coefficient: {kendall_corr:.4f}, p-value: {p_kendall:.4e}")

In [None]:
plt.scatter(Amstel_NDVI_Values, Amstel_AQI_Values)
plt.xlabel('NDVI')
plt.ylabel('AQI')
plt.title('Scatter Plot of AQI vs NDVI for Amstelpark')
plt.show()

# Rembrandtpark

In [None]:
# Load data from Sentinel-2 (NDVI) and Sentinel-5P (AQI)
Rembrandt_NDVI_csv = pd.read_csv("Datasets/Sentinel-2/Rembrandt_NDVI/Rembrandt_NDVI_csv.csv")
Rembrandt_AQI_csv = pd.read_csv("Datasets/Sentinel-5P/Rembrandtpark/Rembrandt_AirQualityIndex/Rembrandt_AirQualityIndex_csv.csv")

In [None]:
# Check data structure
Rembrandt_NDVI_csv

In [None]:
# Check data structure
Rembrandt_AQI_csv

In [None]:
# Set index keys
if Rembrandt_NDVI_csv.index.name != "Date":
    Rembrandt_NDVI_csv.set_index('Date', inplace=True)

if Rembrandt_AQI_csv.index.name != "Date":
    Rembrandt_AQI_csv.set_index('Date', inplace=True)

In [None]:
# Merge the two indices together on Date to ensure that Values are aligned
Rembrandt_merged_csv = pd.merge(Rembrandt_NDVI_csv, Rembrandt_AQI_csv, left_index=True, right_index=True, how="inner")
Rembrandt_merged_csv

In [None]:
# Cleaning merged .csv
# Drop missing values
Rembrandt_merged_csv_cleaned = Rembrandt_merged_csv.copy().dropna()

# Combine Date columns
Rembrandt_merged_csv_cleaned.loc[:, 'Date'] = Rembrandt_merged_csv_cleaned['Date_x'].combine_first(
    Rembrandt_merged_csv_cleaned['Date_y']
)

# Combine Coordinates columns
Rembrandt_merged_csv_cleaned.loc[:, 'Coordinates'] = Rembrandt_merged_csv_cleaned['Coordinates_x'].combine_first(
    Rembrandt_merged_csv_cleaned['Coordinates_y']
)

# Drop redundant columns
Rembrandt_merged_csv_cleaned.drop(columns=['Date_x', 'Date_y', 'Coordinates_x', 'Coordinates_y'], inplace=True)

# Rename Value columns to NDVI and AQI
Rembrandt_merged_csv_cleaned.rename(columns={'Value_x': 'NDVI', 'Value_y': 'AQI'}, inplace=True)

Rembrandt_merged_csv_cleaned

In [None]:
# Define 2% data cumulative cut-off from bottom and top of data
# This is done to remove outliers and thus focus on central range of data

# Get NDVI values and sort
ndvi_data = Rembrandt_merged_csv_cleaned['NDVI'].copy()
sorted_ndvi_data = ndvi_data.sort_values().reset_index(drop=True)

# Calculate cumulative percentages
cumulative_percentage = sorted_ndvi_data.rank(pct=True)

# Define cumulative cut-off range (2% to 98%)
lower_cut = 0.02  # 2%
upper_cut = 0.98  # 98%

# Filter NDVI values within the range
filtered_NDVI_data = sorted_ndvi_data[
    (cumulative_percentage >= lower_cut) &
    (cumulative_percentage <= upper_cut)
    ]

In [None]:
# Set lower and upper values for NDVI
lower_cut_value = filtered_NDVI_data.min()
upper_cut_value = filtered_NDVI_data.max()

# Keep rows where NDVI value is within range
Rembrandt_merged_csv_final = Rembrandt_merged_csv_cleaned[
    (Rembrandt_merged_csv_cleaned['NDVI'] >= lower_cut_value) &
    (Rembrandt_merged_csv_cleaned['NDVI'] <= upper_cut_value)
    ]

Rembrandt_merged_csv_final

In [None]:
# Conduct correlation tests
Rembrandt_NDVI_Values = Rembrandt_merged_csv_final.loc[:, 'NDVI']
Rembrandt_AQI_Values = Rembrandt_merged_csv_final.loc[:, 'AQI']

pearson_corr, p_pearson = pearsonr(Rembrandt_NDVI_Values, Rembrandt_AQI_Values)
spearman_corr, p_spearman = spearmanr(Rembrandt_NDVI_Values, Rembrandt_AQI_Values)
kendall_corr, p_kendall = kendalltau(Rembrandt_NDVI_Values, Rembrandt_AQI_Values)

print("Pearson's Correlation:")
print(f"Coefficient: {pearson_corr:.4f}, p-value: {p_pearson:.4e}")

print("\nSpearman's Correlation:")
print(f"Coefficient: {spearman_corr:.4f}, p-value: {p_spearman:.4e}")

print("\nKendall's Tau Correlation:")
print(f"Coefficient: {kendall_corr:.4f}, p-value: {p_kendall:.4e}")

In [None]:
plt.scatter(Rembrandt_NDVI_Values, Rembrandt_AQI_Values)
plt.xlabel('NDVI')
plt.ylabel('AQI')
plt.title('Scatter Plot of AQI vs NDVI for Rembrandt Park')
plt.show()