In [2]:
"""
Classifying roof materials for building footprints from trained ResNet-18 models
Author: maxwell.cook@colorado.edu
"""

import sys, os, gc, time
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from torchsat.models.classification import resnet18
from torch.nn.functional import softmax

# Custom functions (__functions.py)
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/earth-lab/opp-rooftop-mapping'
homedir = '/home/jovyan' # cyverse

# results_dir = os.path.join(maindir, 'results/resnet18/')
results_dir = os.path.join(homedir, 'results/resnet18-cv/')
print(results_dir)

print("Imports successful !")

/home/jovyan/results/resnet18-cv/
Imports successful !


In [3]:
# Best params from tuning
params = {'window_size': 78, 'batch_size': 64, 'learning_rate': 0.01, 'weight_decay': 0.01, 'momentum': 0.85, 'patience': 5}
print(f'Model params: {params}')

Model params: {'window_size': 78, 'batch_size': 64, 'learning_rate': 0.01, 'weight_decay': 0.01, 'momentum': 0.85, 'patience': 5}


In [None]:
# Load MS building footprint data

In [4]:
fp = os.path.join(maindir, 'data/spatial/raw/dc_data/footprints/dc-ms_footprints.gpkg')
# fp = os.path.join(homedir, 'OPP/training/dc/dc-ms_footprints.gpkg')
footprints = gpd.read_file(fp)
footprints['uid'] = footprints.index + 1
footprints = footprints[['uid', 'geometry']]
footprints.head()

Unnamed: 0,uid,geometry
0,1,"POLYGON ((334794.168 4306846.311, 334799.662 4..."
1,2,"POLYGON ((334703.572 4306870.743, 334701.114 4..."
2,3,"POLYGON ((334666.123 4306432.288, 334671.751 4..."
3,4,"POLYGON ((334616.324 4306162.858, 334621.692 4..."
4,5,"POLYGON ((334622.665 4306603.184, 334630.936 4..."


In [None]:
# Load the sampled data as well (holdout)

In [25]:
# Load the holdout data
holdout_df = gpd.read_file(os.path.join(maindir,'results/resnet18/cv-results/dc-resnet18_cv_holdout_ref.gpkg'))
# holdout_df = gpd.read_file(os.path.join(results_dir, 'dc-resnet18_cv_holdout_ref.gpkg'))
print("Holdout set class distribution:\n", holdout_df[['class_code','code']].value_counts())

Holdout set class distribution:
 class_code  code
CS          0       7427
ME          1       7373
SL          2       3054
UR          3        256
WS          5        231
TL          4        185
SH          6        157
Name: count, dtype: int64


In [26]:
# Create dictionaries for mapping
code_mapping = dict(zip(holdout_df['code'], holdout_df['class_code']))  # Mapping to original 'class_code'
desc_mapping = dict(zip(holdout_df['code'], holdout_df['description']))
print(f'Code map: \n{code_mapping}\nDescription map: \n{desc_mapping}')

Code map: 
{0: 'CS', 1: 'ME', 2: 'SL', 3: 'UR', 4: 'TL', 5: 'WS', 6: 'SH'}
Description map: 
{0: 'Composition Shingle', 1: 'Metal', 2: 'Slate', 3: 'Urethane', 4: 'Tile', 5: 'Wood shake/shingle', 6: 'Shingle'}


In [None]:
# Load the Planet imagery

In [6]:
# Load our image data to check on the format
stack_da_fp = os.path.join(maindir,'data/spatial/mod/dc_data/planet-data/dc_0623_psscene8b_final_norm.tif')
# stack_da_fp = os.path.join(homedir,'opp-data/denver_0815_psscene8b_final_norm.tif')
stack_da = rxr.open_rasterio(stack_da_fp, mask=True, cache=False).squeeze()
n_bands = stack_da.values.shape[:1][0] # get a list of band names
print(
    f"shape: {stack_da.rio.shape}\n"
    f"bands: {n_bands}\n"
    f"resolution: {stack_da.rio.resolution()}\n"
    f"bounds: {stack_da.rio.bounds()}\n"
    f"sum: {stack_da.sum().item()}\n"
    f"CRS: {stack_da.rio.crs}\n"
    f"NoData: {stack_da.rio.nodata}\n"
    f"Array: {stack_da}"
)
del stack_da

shape: (8223, 6714)
bands: 6
resolution: (3.0, -3.0)
bounds: (315267.0, 4294629.0, 335409.0, 4319298.0)
sum: 8.181640625
CRS: EPSG:32618
NoData: None
Array: <xarray.DataArray (band: 6, y: 8223, x: 6714)> Size: 1GB
[331255332 values with dtype=float32]
Coordinates:
  * band         (band) int64 48B 1 2 3 4 5 6
  * x            (x) float64 54kB 3.153e+05 3.153e+05 ... 3.354e+05 3.354e+05
  * y            (y) float64 66kB 4.319e+06 4.319e+06 ... 4.295e+06 4.295e+06
    spatial_ref  int64 8B 0
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
    long_name:      ('nir', 'NDBIbg', 'NDBIrg', 'NISI', 'MNF1', 'NISI5x5')


In [None]:
# Create the roof image dataset for inference (all MS building footprints)

In [7]:
footprints_ds = UnlabeledRoofImageDataset(footprints, img_path=stack_da_fp, n_bands=n_bands, img_dim=params['window_size'])
dloader = DataLoader(
    footprints_ds, 
    batch_size=params['batch_size'], 
    num_workers=2, 
    shuffle=False, 
    pin_memory=True
)
print("Data loaded for all footprints !")

Data loaded !


In [None]:
# Initialize the ResNet-18 model

In [8]:
# Define whether to leverage cpu or gpu (for my local machine it is only cpu)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # get device for gpu or cpu
print(f'Using {device} for model eval ...')

Using cpu for model eval ...


In [15]:
# Load the trained model for the current fold
best_fold = 4  # from holdout accuracy
model_fp = os.path.join(maindir,f'results/resnet18/cv-models/dc-resnet18_fold{best_fold}.pth')
# model_fp = os.path.join(results_dir, f'dc-resnet18_fold{best_fold}.pth')

print(f"Loading model from path: {model_fp}")
checkpoint = torch.load(model_fp, map_location=device)

# Initialize the model architecture
n_classes = len(code_mapping.keys())
model, _, _, _ = initialize_resnet18(
    n_classes=n_classes,
    n_channels=n_bands,
    device=device,
    params=params
)

# Load the trained weights
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Set the model to evaluation mode
print("\tModel loaded !")

Loading model from path: /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/earth-lab/opp-rooftop-mapping/results/resnet18/cv-models/dc-resnet18_fold4.pth
	Made CPU parallel.
	Model loaded !


In [30]:
# Create a dataframe to store the results
n = footprints_ds.__len__() # length of the dataset
class_labels = [code_mapping[i] for i in range(n_classes)]
print(f"Length of footprint dataset: {n}")
columns=['chunk_idx', 'bbox', 'prediction', 'confidence'] + class_labels
res_df = pd.DataFrame(
    columns=columns,
    index=range(n)
)
res_df.head(3)

Length of footprint dataset: 77851


Unnamed: 0,chunk_idx,bbox,prediction,confidence,CS,ME,SL,UR,TL,WS,SH
0,,,,,,,,,,,
1,,,,,,,,,,,
2,,,,,,,,,,,


In [31]:
t0 = time.time()

# Run inference
with torch.no_grad():
    for batch_idx, sample in enumerate(dloader):
        image = sample['image'].to(device) # retrieve the image chunks (unlabeled)
        bboxes = sample['bbox']  # Get the bounding boxes for the image chunks

        # Make predictions
        output = model(image.float())
        probabilities = softmax(output, dim=1).cpu().numpy()  # Get probabilities for all classes
        predictions = output.argmax(dim=1).cpu().numpy() # the predicted class
        confidence = probabilities.max(axis=1)  # max probability for the predicted class

        # Assign predictions and probabilities to all footprints that intersect with the bounding box
        for i, bbox in enumerate(bboxes):
            prob_dict = {code_mapping[j]: probabilities[i, j] for j in range(n_classes)}  # Use class names as column headers
            res_df = res_df.append({
                'chunk_idx': batch_idx * params['batch_size'] + i,  # Optional chunk ID
                'bbox': bbox,  # Bounding box of the chunk
                'prediction': code_mapping[predictions[i]],  # Map prediction to class name
                'confidence': confidence[i],
                **prob_dict  # Include all class probabilities with class names as column headers
            }, ignore_index=True)
        if batch_idx % 10 == 0:
            print(f"\tProcessed {batch_idx * params['batch_size']} samples.")

        # Clear GPU memory after each batch
        torch.cuda.empty_cache()
        gc.collect()

print("\n~~~~~~~~~~\n")
t2 = (time.time() - t0) / 60
print(f"Total elapsed time for inference: {t2:.2f} minutes.")
print("\n~~~~~~~~~~\n")

NameError: name 'time' is not defined

In [None]:
out_fp = os.path.join(results_dir,'classification/dc-resnet18-inference_ms-footprints.csv')
res_df.to_csv(out_fp, index=False)

In [None]:
gc.collect()