In [1]:
import json
import os
import pickle
from pathlib import Path
from joblib import Parallel, delayed
from statistics import mode

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import rioxarray

from xrspatial import focal, slope
import seaborn as sns
from tqdm import tqdm
from joblib_progress import joblib_progress
from xrspatial.multispectral import ndvi, savi
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay)
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.model_selection import RandomizedSearchCV as RSCV
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss


In [None]:
# paths
helena_path = Path.cwd().parent / 'data' / 'helena'
feature_dir = helena_path / 'features'

In [None]:
# get paths for features using 100 m geomorphons

We will use the model which was tuned and trained in `src/mortality_classification.ipynb`.  It was pickled.

In [None]:
# load model created in src/mortality_classification.ipynb
pickle_path = Path.cwd() / 'RF_model.sav'
model = pickle.load(open(pickle_path, 'rb'))

Now we will make model predictions for the samples and create a timeseries of survival probabilities for each sample over the years for which we have NAIP data.

In [None]:
treatment_keys = dict_of_samples.keys()
geomorphon_keys = range(1,11)
years = [2018, 2020, 2022]

for tk in treatment_keys:
    for gk in geomorphon_keys:
        ...
def sample_mortality_timeseries(sample_dict, years):
    '''
    Takes a dict of years for a given sample,
    returns a df of probabilities of being alive
    by year.
    '''
    t_series = []
    for y in years:
        cols = sample_dict[y].drop(['y', 'label', 'UniqueID'], axis=1).columns
        X = sample_dict[y][cols]
        lil_df = pd.DataFrame()
        lil_df['UniqueID'] = sample_dict[y]['UniqueID']
        lil_df['pred'] = model.predict_proba(X)[:, 1]
        t_series.append(lil_df)
        
    t_series = [t_series[0].join(df_, on='UniqueID') for df_ in t_series[1:]][0]
        
    return t_series


