In [None]:
import numpy as np
import pandas as pd
import rasterio
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib

# =============================
#  CESDR Significance and Distribution Analysis
# Function:
#   1. Read CESDR rasters derived from two different population datasets (SRFPop and WorldPop);
#   2. Perform a Mann–Whitney U test to evaluate whether their differences are statistically significant;
#   3. Visualize results using boxplots and difference histograms.
# =============================


# === Global font settings ===
matplotlib.rcParams['font.family'] = 'Times New Roman'
matplotlib.rcParams['axes.unicode_minus'] = False  # Ensure minus signs display correctly


# === Function: Read raster as numpy array ===
def read_raster(path):
    """
    Read a single-band raster file and return it as a NumPy array.
    
    Parameters:
        path (str): Path to the raster (.tif file)
    
    Returns:
        arr (np.ndarray): Raster values (float32), with NoData replaced by NaN
    """
    with rasterio.open(path) as src:
        arr = src.read(1).astype(np.float32)
        arr[arr == src.nodata] = np.nan
    return arr


# === Step 1: File paths ===
path_pred = r"D:\Documents\Desktop\ESDR-P\ESDR\0.01\0.01_CESDR.tif"     # CESDR based on SRFPop
path_wp   = r"D:\Documents\Desktop\ess\draw\worldpopCESDR1.tif"         # CESDR based on WorldPop


# === Step 2: Read and clean data ===
esdr_pred = read_raster(path_pred)
esdr_wp   = read_raster(path_wp)

# Keep only valid pixels (exclude NaN)
mask = (~np.isnan(esdr_pred)) & (~np.isnan(esdr_wp))
pred_clean = esdr_pred[mask]
wp_clean   = esdr_wp[mask]
print(f" Number of valid pixels: {len(pred_clean)}")


# === Step 3: Mann–Whitney U test ===
"""
The Mann–Whitney U test is a non-parametric test 
used to determine whether two independent samples 
come from the same distribution. It is robust to 
non-normal data.
"""
stat, p = mannwhitneyu(pred_clean, wp_clean, alternative='two-sided')
print(f"\n Mann-Whitney U Test Results:")
print(f"Statistic U = {stat:.2f}")
print(f"p-value = {p:.4f}")
print(" Significant difference (p < 0.05)" if p < 0.05 else " Not significant (p ≥ 0.05)")


# === Step 4: Boxplot visualization ===
"""
The boxplot shows the distribution of CESDR values 
for the two population datasets. The median, interquartile 
range (IQR), and outliers help visualize differences 
in central tendency and variability.
"""
df = pd.DataFrame({
    "CESDR": np.concatenate([pred_clean, wp_clean]),
    "Source": ["SRFPop"] * len(pred_clean) + ["WorldPop"] * len(wp_clean)
})

plt.figure(figsize=(8, 6), dpi=600)

# Customize outlier (flier) appearance
flier_props = dict(
    marker='o',
    markersize=4,
    markerfacecolor='none',
    markeredgecolor='black',
    alpha=0.8
)

sns.boxplot(
    x="Source", y="CESDR", data=df, palette="Set2",
    showfliers=True, flierprops=flier_props
)

plt.title("CESDR (Mann-Whitney Test)", fontsize=16, pad=15)
plt.xlabel("0.01° Population Source", fontsize=14, labelpad=10)
plt.ylabel("CESDR Value", fontsize=14, labelpad=5)
plt.xticks(fontsize=12)
plt.yticks(np.arange(-1.5, 1.6, 0.25), fontsize=12)
plt.ylim(-1.25, 1.25)
plt.grid(True, linestyle='--', alpha=0.6)

plt.subplots_adjust(left=0.12, right=0.95, top=0.9, bottom=0.12)
plt.savefig("boxplot_CESDR.png", dpi=300, bbox_inches='tight')
plt.show()


# === Step 5: Difference histogram ===
"""
The difference histogram illustrates pixel-wise 
differences between SRFPop- and WorldPop-based CESDR.
> 0 → CESDR (SRFPop) > CESDR (WorldPop)
< 0 → CESDR (SRFPop) < CESDR (WorldPop)
"""
diff = pred_clean - wp_clean

plt.figure(figsize=(8, 5), dpi=300)
plt.hist(diff, bins=100, color='skyblue', edgecolor='black')
plt.axvline(0, color='red', linestyle='--', linewidth=1)
plt.title("CESDR Difference Histogram (RFPop - WorldPop)", fontsize=16, pad=15)
plt.xlabel("Difference", fontsize=14, labelpad=10)
plt.ylabel("Pixel Count", fontsize=14, labelpad=10)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlim(-0.75, 0.75)
plt.ylim(0, 160000)
plt.grid(True, linestyle='--', alpha=0.6)
plt.subplots_adjust(left=0.12, right=0.95, top=0.9, bottom=0.12)
plt.savefig("hist_diff_CESDR.png", dpi=300, bbox_inches='tight')
plt.show()
