# Prototype Aus-wide saltmarsh

In [1]:
# load imports/set up
%run 'saltmarsh_prototype_imports.ipynb'

ncpus = 15


## Model fitting

- for the moment just fit the model to all the data
- eventually this will be a more comprehensive evaluatiion + cross-val

### Load the training data

In [2]:
# training data file path
training_data = "training_data/prelim_covariate_data.txt"

# load the data
model_input = np.loadtxt(training_data)

# load the column_names
with open(training_data, 'r') as file:
    header = file.readline()

column_names = header.split()[1:]
print(column_names)

['site', 'nbart_coastal_aerosol', 'nbart_blue', 'nbart_green', 'nbart_red', 'nbart_nir', 'nbart_swir_1', 'nbart_coastal_aerosol_std', 'nbart_blue_std', 'nbart_green_std', 'nbart_red_std', 'nbart_nir_std', 'nbart_swir_1_std', 'NDVI', 'NDWI', 'DateEnd', 'Class', 'DateSta', 'wetland']


### Create the model data, and the wetland/ecosystem type labels

In [3]:
# Extract relevant indices from training data
# Do this manually to allow for various other inputs in the trianing data/covariate data exports
covars_to_use = ['nbart_coastal_aerosol', 'nbart_blue', 'nbart_green', 'nbart_red', 'nbart_nir', 'nbart_swir_1',
                 'nbart_coastal_aerosol_std', 'nbart_blue_std', 'nbart_green_std', 'nbart_red_std', 'nbart_nir_std', 'nbart_swir_1_std', 
                 'NDVI', 'NDWI']
print('Using covariates:')
print(covars_to_use)

model_col_indices = [
    column_names.index(var_name) for var_name in covars_to_use
]

# convert variable names into sci-kit learn nomenclature
X = model_input[:, model_col_indices]
y_wetland = model_input[:, column_names.index('wetland')]
y_ecosystem = model_input[:, column_names.index('Class')]

print('Model covariates peak:')
print(X[0:2,])
print('Unique y values:')
print(np.unique(y_wetland))
print('Unique y values:')
print(np.unique(y_ecosystem))

Using covariates:
['nbart_coastal_aerosol', 'nbart_blue', 'nbart_green', 'nbart_red', 'nbart_nir', 'nbart_swir_1', 'nbart_coastal_aerosol_std', 'nbart_blue_std', 'nbart_green_std', 'nbart_red_std', 'nbart_nir_std', 'nbart_swir_1_std', 'NDVI', 'NDWI']
Model covariates peak:
[[ 5.72000000e+02  5.78000000e+02  7.10000000e+02  6.91000000e+02
   2.40500000e+03  2.04900000e+03  1.92928852e+03  1.96933013e+03
   1.92294453e+03  1.98098431e+03  1.54302014e+03  9.04068940e+02
   5.53618000e-01 -5.44141000e-01]
 [ 7.34000000e+02  8.01000000e+02  9.63000000e+02  9.84000000e+02
   2.82800000e+03  2.15000000e+03  1.59642596e+03  1.59729887e+03
   1.54076170e+03  1.55026363e+03  1.06719962e+03  7.28462540e+02
   4.83736000e-01 -4.91955000e-01]]
Unique y values:
[0. 1.]
Unique y values:
[ 0.  1.  5.  6.  9. 11.]


### Fit the models

In [4]:
# wetland model
wetland_rf = RandomForestClassifier(n_estimators=500, criterion='gini', min_samples_split=2, min_samples_leaf=2, max_features="sqrt", n_jobs=ncpus)
wetland_rf.fit(X, y_wetland)
print(wetland_rf.score(X, y_wetland))
print(wetland_rf.predict(X[0:20,]))
print(wetland_rf.predict_proba(X[0:20,]))

# ecosystem model
ecosystem_rf = RandomForestClassifier(n_estimators=500, criterion='gini', min_samples_split=2, min_samples_leaf=2, max_features="sqrt", n_jobs=ncpus)
ecosystem_rf.fit(X, y_ecosystem)
print(ecosystem_rf.score(X, y_ecosystem))
print(ecosystem_rf.predict(X[0:50,]))
print(ecosystem_rf.predict_proba(X[0:50,]))


0.9954389965792474
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0.84299524 0.15700476]
 [0.64609048 0.35390952]
 [0.8508     0.1492    ]
 [0.62092619 0.37907381]
 [0.7904     0.2096    ]
 [0.92433333 0.07566667]
 [0.98223333 0.01776667]
 [0.84042381 0.15957619]
 [0.9612619  0.0387381 ]
 [0.91420952 0.08579048]
 [0.94076667 0.05923333]
 [0.70252857 0.29747143]
 [0.98616667 0.01383333]
 [0.93085238 0.06914762]
 [0.68838571 0.31161429]
 [0.90185238 0.09814762]
 [0.94903333 0.05096667]
 [0.91212857 0.08787143]
 [0.64089048 0.35910952]
 [0.9116     0.0884    ]]
0.9897377423033067
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 11.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[[8.35754762e-01 0.00000000e+00 1.64245238e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [6.18109524e-01 1.33333333e-03 3.58790476e-01 0.00000000e+00
  3.00000000e-03 1.87666667e-02]
 [7.9

In [5]:
# save the models
joblib.dump(wetland_rf, 'models/prelim_wetland_model.joblib')
joblib.dump(ecosystem_rf, 'models/prelim_ecosystem_model.joblib')

['models/prelim_ecosystem_model.joblib']