In [1]:
import json
import os
import pickle
from pathlib import Path
from joblib import Parallel, delayed
from statistics import mode

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import rioxarray

from xrspatial import focal, slope
import seaborn as sns
from tqdm import tqdm
from joblib_progress import joblib_progress
from xrspatial.multispectral import ndvi, savi
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay)
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.model_selection import RandomizedSearchCV as RSCV
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss


In [23]:
# paths
helena_path = Path.cwd().parent / 'data' / 'helena'
feature_dir = helena_path / 'features'
crowns_path = helena_path / 'spectral_crowns' / 'crowns_100.parquet'

In [3]:
# get paths for features using 100 m geomorphons
parquets = [p for p in os.listdir(feature_dir) if p.endswith('_100.parquet')]
parquets.sort()

parquets

['features_2018_crowns_100.parquet',
 'features_2020_crowns_100.parquet',
 'features_2022_crowns_100.parquet']

We will use the model which was tuned and trained in `src/mortality_classification.ipynb`.  It was pickled.

In [4]:
# load model created in src/mortality_classification.ipynb
pickle_path = Path.cwd() / 'RF_model.sav'
model = pickle.load(open(pickle_path, 'rb'))

Now we will make model predictions for the samples and create a timeseries of survival probabilities for each sample over the years for which we have NAIP data.

In [18]:
pred_list = []
for f in parquets:
    print(f'-------{f}----------')
    
    # get year and geomorphon radius
    split_fname = f.split('_')
    y = split_fname[1]
    r = split_fname[3].split('.')[0]
    
    # read parquet, make input feature df (X)
    df = pd.read_parquet(feature_dir / f)
    cols = list(model.feature_names_in_)
    X = df[cols]
    pred = pd.DataFrame()
    pred['UniqueID'] = df['UniqueID']
    pred[f'pred_{y}'] = model.predict_proba(X)[:, 1]
    pred = pred.set_index('UniqueID')
    pred_list.append(pred)
    


-------features_2018_crowns_100.parquet----------
-------features_2020_crowns_100.parquet----------
-------features_2022_crowns_100.parquet----------


In [32]:
# join all years into one df
pred_df = pd.concat(pred_list, axis=1)

# open desired columns of crowns
crowns = gpd.read_parquet(crowns_path)[[
    'UniqueID',
    'area',
    'zq95',
    'treatment',
    'geomorph_100',
    'geometry'
]].set_index('UniqueID')

crowns = pd.concat([crowns, pred_df], axis=1).reset_index()

crowns

In [34]:
crowns.to_parquet(helena_path / 'predictions_g100.parquet')

Unnamed: 0_level_0,area,zq95,treatment,geomorph_100,geometry,pred_2018,pred_2020,pred_2022
UniqueID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10N_494412_4510841,84.9355,35.8625,0,10,"POLYGON ((494416.290 4510841.000, 494416.420 4...",0.559633,0.916958,0.238532
10N_496561_4522691,89.7756,13.1400,0,10,"POLYGON ((496565.930 4522690.360, 496565.940 4...",0.980924,0.980866,0.990641
10N_495328_4511010,23.7956,5.1120,0,10,"POLYGON ((495329.680 4511009.800, 495329.780 4...",0.944182,0.980741,0.999280
10N_489208_4521757,40.4712,13.5745,0,10,"POLYGON ((489210.310 4521759.010, 489210.220 4...",0.954440,0.990552,0.531654
10N_494747_4519082,70.5364,13.1645,0,10,"POLYGON ((494750.980 4519080.730, 494750.970 4...",0.980924,0.990735,0.999971
...,...,...,...,...,...,...,...,...
10N_493216_4513565,30.1150,14.3875,12,10,"POLYGON ((493217.370 4513565.570, 493217.470 4...",0.082569,0.220183,0.999365
10N_493690_4512747,36.7080,22.6880,12,10,"POLYGON ((493692.830 4512749.100, 493690.770 4...",0.851333,0.971892,0.872535
10N_491422_4513175,44.2510,18.3300,12,10,"POLYGON ((491425.340 4513174.760, 491425.200 4...",0.073394,0.696811,0.486239
10N_491005_4512808,43.8851,18.7000,12,10,"POLYGON ((491007.710 4512807.670, 491007.660 4...",0.900542,0.915273,0.987982
