In [None]:
"""
Convert building footprints to centroid and sample the image stack
"""

# Packages
import os, time, sys
from os.path import join
import geopandas as gpd
import pandas as pd
import rasterio as rio
import seaborn as sns

from concurrent.futures import ProcessPoolExecutor

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

print(os.getcwd())

# Coordinate Ref. System
proj = 32618  # UTM Zone 18N

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/earth-lab/opp-rooftop-mapping/'

begin = time.time()  # start time

In [None]:
# Load the footprint data
fp = join(maindir, 'data/spatial/mod/dc_data/training/dc_data_reference_centroids.gpkg')
centroids = gpd.read_file(fp)
centroids.head()

In [None]:
# Load the final image stack
stack_path = join(maindir,'data/spatial/mod/dc_data/planet-data/dc_0623_psscene8b_final_norm.tif')
da_stack = rio.open(stack_path)

# Grab some metadata
desc = list(da_stack.descriptions)
metadata = da_stack.meta
nodata = da_stack.nodata
print(f'Raster description: {desc}; \n Metadata: {metadata}\nNo Data: {nodata}')
if isinstance(da_stack, rio.io.DatasetReader):
    print("The object is a rasterio dataset.")

In [None]:
band_names = desc
band_names

In [None]:
# Sample all the centroids by roof material type
all_vals = img_vals_at_pts(da_stack,centroids,band_names)

# Check on the results
print(all_vals.head())
print(all_vals['nir'].describe)  # check one column
print(all_vals.columns.values.tolist())

In [None]:
# Write to a gpkg and csv
all_vals = all_vals.to_crs(proj)
all_vals.to_file(join(maindir,'data/spatial/mod/dc_data/training/dc_data_reference_centroid_sampled.gpkg'))
all_vals.drop('geometry',axis=1).to_csv(join(maindir,'data/tabular/mod/dc_data/training/dc_data_reference_centroid_sampled.csv'))

In [None]:
import scipy.stats as stats

# Assuming df is your DataFrame with columns 'class' and 'reflectance_band1'
classes = all_vals['class_code'].unique()

# Perform one-way ANOVA
f_value, p_value = stats.f_oneway(*(all_vals[all_vals['class_code'] == cls]['NISI9x9'] for cls in classes))
f_value_, p_value_ = stats.f_oneway(*(all_vals[all_vals['class_code'] == cls]['NISI27x27'] for cls in classes))

print(f'ANOVA F-value (NISI9x9): {f_value}, p-value: {p_value}\nANOVA F-value (NISI27x27): {f_value_}, p-value: {p_value_}')

### Spectral signature plot

In [None]:
# Plot the distribution of abundances per material type by class
print(all_vals.columns.values)
print(all_vals['description'].unique())

new_cols = ['nir', 'NDBIbg', 'NDBIrg', 'NISI', 'MNF1', 'NISI9x9', 'NISI27x27']

# Melt the DataFrame to long format
all_vals_m = all_vals.melt(
    id_vars=['class_code'],
    value_vars=new_cols,
    var_name='Band',
    value_name='Value')
# Create a facet plot
g = sns.catplot(data=all_vals_m, x='Value', y='class_code', col='Band',
                kind='violin', sharey=True, sharex=False, inner='quartile', 
                col_wrap=2, height=2, aspect=1.5)
# Adjusting the titles and axis labels (if needed)
g.set_titles("{col_name}")
g.set_axis_labels("Value", "Class Code")
# Display the plot
plt.show()