In [None]:
import geopandas as gpd
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import seaborn as sns
import os
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

root = Path.cwd()

In [None]:

def jm_per_band(x_class1, x_class2, n_points=200):
    x1 = np.asarray(x_class1)
    x2 = np.asarray(x_class2)
    x1 = x1[~np.isnan(x1)]
    x2 = x2[~np.isnan(x2)]

    if len(x1) < 5 or len(x2) < 5:
        return np.nan
  
    #estimate pdfs for both classes
    kde1 = gaussian_kde(x1)
    kde2 = gaussian_kde(x2)

    #get the min/max range for both classes
    lo = min(x1.min(),x2.min())
    hi = max(x1.max(),x2.max())
    x_linspace = np.linspace(lo, hi, n_points)

    #evaluate estimated pdfs for this range
    u = kde1(x_linspace)
    v = kde2(x_linspace)

    # Normalize to integrate to 1
    u /= np.trapezoid(u, x_linspace)
    v /= np.trapezoid(v, x_linspace)

    # Bhattacharyya distance
    bc = np.trapezoid(np.sqrt(u * v), x_linspace)
    B = -np.log(bc + 1e-15)

    # JM distance
    J = 2 * (1 - np.exp(-B))
     
    return J

def get_fad_classes(df,year,fad):
    if year == 'allyears':
        class1_df = df.loc[df['FAD'].str.contains(fad),:]
        class2_df = df.loc[~df['FAD'].str.contains(fad),:]
    else:
        class1_df = df.loc[(df['FAD'].str.contains(fad))&(df['year'].isin(year)),:]
        class2_df = df.loc[(~df['FAD'].str.contains(fad))&(df['year'].isin(year)),:]
    
    return class1_df, class2_df


def get_classes(df,year,class1,class2):
    if year == 'allyears':
        class1_df = df.loc[(df['status'].isin(class1)),:]
        class2_df = df.loc[(df['status']==class2),:]
    else:
        class1_df = df.loc[(df['status'].isin(class1))&(df['year'].isin(year)),:]
        class2_df = df.loc[(df['status']==class2)&(df['year'].isin(year)),:]
    
    return class1_df, class2_df
   

In [None]:
os.makedirs(root / 'figs' / 'harvard' / 'jm_distance_plots',exist_ok=True)

In [None]:
pertree_df = pd.read_csv(root / 'data' / 'harvard' / 'per_tree_average_monthly_spectra_and_health_all.csv')

In [None]:
pertree_df['BLD'] = pertree_df['BLD'].replace(np.nan,0.0)

In [None]:
beech_tags = pertree_df.loc[pertree_df['year']==2025,'StemTag'].unique()
beechonly_df = pertree_df.loc[pertree_df['StemTag'].isin(beech_tags),:]

In [None]:
fad_pertree_df = pertree_df.loc[~pertree_df['FAD'].isna(),:].copy()

In [None]:
beech_pixels = perpixel_df.loc[perpixel_df['StemTag'].isin(beech_tags)]
plot_df = beech_pixels.drop_duplicates(['row','col','StemTag'])

ax = plot_df['frac'].hist()

plt.xlabel('% of Pixel Covered by Beech Crown Polygon')
plt.ylabel('Num Pixels')
plt.title('Beech Canopy Coverage per Pixel')

plt.savefig(root / 'figs'/ 'harvard' / 'histogram_beechonly_canopy_coverage_per_pixel.png')
plt.show()

In [None]:
j_cols = [x for x in pertree_df.columns if '_B' in x]


# year = [2023,2025]
# class1 = ['AU']
# class2 = 'A'

# class1_df, class2_df = get_classes(pertree_df,year=year,class1=class1,class2=class2)

title = 'Beech Only'
filename = 'beechonly'

# year = [2025]
# class1 = 'LF'
# class2 = 'No LF'
# fad = 'LF'
# 
# class1_df, class2_df = get_fad_classes(fad_pertree_df,year=year,fad='DF')

########################################
year = [2023,2025]
class1 = 'BLD'
class2 = 'No BLD'

def get_bld_classes(df,year):
    if year == 'allyears':
        class1_df = df.loc[df['BLD']==1.0,:]
        class2_df = df.loc[(df['BLD']==0.0),:]
    else:
        class1_df = df.loc[(df['BLD']==1.0)&(df['year'].isin(year)),:]
        class2_df = df.loc[(df['BLD']==0.0)&(df['year'].isin(year)),:]
    
    return class1_df, class2_df

class1_df, class2_df = get_bld_classes(beechonly_df,year=year)


############################################

class1_n = len(class1_df)
class2_n = len(class2_df)

jm_values = {}

for j in j_cols:

    jm = jm_per_band(x_class1=class1_df[j],x_class2=class2_df[j])

    jm_values[j] = jm


wide_df = pd.DataFrame(jm_values,index=[0],columns=j_cols)
long_df = wide_df.T.reset_index()
long_df = long_df.rename(columns={'index':'month_band',0:'jm'})
long_df[['month','band']] = long_df['month_band'].str.split('_', expand=True)
pivot_df = long_df.pivot(index='month',columns='band',values='jm')
pivot_df = pivot_df.rename(columns={'B02':'Blue','B03':'Green','B04':'Red','B05':'Red Edge 1','B06':'Red Edge 2','B07':'Red Edge 3','B08':'NIR','B11':'SW1','B12':'SW2','B8A': 'NIR narrow'})

order = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
pivot_df = pivot_df.reindex(order)

# drop nan columns
#pivot_df = pivot_df.dropna(axis=0)

pivot_df = pivot_df.astype('float')

plt.figure(figsize=(8,4))
sns.heatmap(pivot_df, annot=True, cmap='Reds', fmt='.2f',vmin=0,vmax=1.5)

plt.xlabel('Band',fontsize=12)
plt.ylabel('Month',fontsize=12)
plt.yticks(rotation=0)

plt.title(f"JM Distance, {title}: {year}",pad=10,fontsize=14,loc='right')

plt.text(-0.12,1.02,f"Class 1: {class1}  (n={class1_n})\nClass 2: {class2}  (n={class2_n})",ha='left', va='bottom', fontsize=10, transform=plt.gca().transAxes)

plt.savefig(root / 'figs' / 'harvard' / 'jm_distance_plots' / f'jmdistance_heatmap_weightedfracs_{filename}_{year}_{class1}_{class2}.png')

plt.show()




In [None]:


def lda_spectral(by_tree_df, band_cols, label_col='BLD'):
    X = by_tree_df[band_cols].values
    y = by_tree_df[label_col].values

    lda = LDA(n_components=1)
    Z = lda.fit_transform(X, y)

    plt.figure(figsize=(7,4))
    plt.hist(Z[y==1], bins=20, alpha=0.6, label='BLD', color='red')
    plt.hist(Z[y==0], bins=20, alpha=0.6, label='No BLD', color='blue')
    plt.title("LDA Discriminant Projection")
    plt.xlabel("LDA Score")
    plt.ylabel("Frequency")
    plt.legend()
    plt.grid(alpha=0.2)
    plt.show()

    # Band importance (loadings)
    loadings = pd.Series(lda.coef_[0], index=band_cols)
    loadings = loadings.sort_values(key=np.abs, ascending=False)

    return lda, Z, loadings

In [None]:
band_cols = [x for x in pertree_df.columns if any(y in x for y in ['Jun_B','Jul_B','Aug_B','Sep_B'])]
label_col = ['BLD']

pertree_2523 = pertree_df.loc[pertree_df['year'].isin([2025,2023]),band_cols + label_col].copy()
#pertree_2524['BLD'] = pertree_2524['BLD'].replace(np.nan,0.0)
pertree_2523 = pertree_2523.dropna(subset=band_cols + label_col)

In [None]:
lda, z, loadings = lda_spectral(pertree_2523,band_cols=band_cols)