In [1]:
from sklearn.ensemble import RandomForestClassifier
import numpy as NUM
import SSDataObject as SSDO
import seaborn as SEA
import matplotlib.pyplot as PLOT
import pandas as PD
import arcgisscripting as ARC
import arcpy.management as DM
import arcpy as ARCPY
import SSUtilities as UTILS

In [2]:
##USE USA DATA TO PREDICT GLOBE
#Prediction Variables
predictVars = ['SALINITY', 'TEMP', 'PHOSPHATE','NITRATE', 
               'SILICATE', 'DISSO2']
#Names of Classification Variable
classVar = ['PRESENT']
#Import USA Seagrass Data 
inputFC = r'E:\Projects\ML\Seagrass\Seagrass.gdb\USAPointData_SpatialJoin'
#Create a SS Data Object
ssdo = SSDO.SSDataObject(inputFC, useChordal = False)
#List of all Variables
allVars = predictVars + classVar
#Obtain Data from Table
ssdo.obtainData(fields = allVars)
#Initialize array to hold all prediction variables
inputVars = NUM.ndarray(shape = (ssdo.numObs, len(allVars)), dtype = object)
#Read all of the prediction variables except String variable
for ind, var in enumerate(allVars):
    inputVars[:, ind] = ssdo.fields[var].data
#Place all of the Non-String Prediction Variables in a Pandas Dataframe
USA_Train = PD.DataFrame(inputVars, columns = allVars)

In [3]:
#Import Global Data for Prediction
inputFC = r'E:\Projects\ML\Seagrass\Seagrass.gdb\GlobalPredict'
#Create a SS Data Object
ssdoGl = SSDO.SSDataObject(inputFC, useChordal = False)
#Obtain Data from Table
ssdoGl.obtainData(fields = predictVars)
#Initialize array to hold all prediction variables
globalPredict = NUM.ndarray(shape = (ssdoGl.numObs, len(predictVars)))
#Read in all of the prediction variables
for ind, var in enumerate(predictVars):
    globalPredict[:, ind] = ssdoGl.fields[var].data

In [4]:
##Train Random Forest Using USA Data
#Encode Seagrass Presence as Classes
indicatorUSA, _ = PD.factorize(USA_Train['PRESENT'])
#Create Random Forest Classification Object
rfco = RandomForestClassifier(n_estimators = 500)
#Perform Classification Using Training Set
rfco.fit(USA_Train[predictVars], indicatorUSA)
#Predict Global Seagrass Occurance
seagrassPredGlobal = rfco.predict(globalPredict)

In [5]:
##BRING OUTPUT BACK INTO ARCGIS
#Locations Where Seagrass Occurance is Predicted
index = NUM.where(NUM.asarray(seagrassPredGlobal)==1)
#Create a New Feature Class with the Same Spatial Reference
#Get Indeces for the Test Dataset
nameFC = 'Seagrass_Pred'
outputFC = r'E:\Projects\ML\Seagrass\Seagrass.gdb\GlobalPredResults'
outField = SSDO.CandidateField(nameFC, type = "DOUBLE", data = seagrassPredGlobal[index])
#Update SS Data Object
ssdoGl.xyCoords = ssdoGl.xyCoords[index]
ssdoGl.numObs = index[0].shape[0]

ARC._ss.output_featureclass_from_dataobject(ssdoGl, outputFC, [outField])