# ArcGIS Online Item Audit
Description: This will check for new or deleted items in your AGO Organization and 
 
Created on: 10/27/2019
 
Purpose: Various fields have technical or data hygeine requirements. This scours the data, filling in incomplete data where possible and reporting instances where populating is not possible. 
 
Authored by: Rick Frantz

# Connect to ArcGIS Online

In [None]:
# import
from arcgis.gis import GIS
from arcgis.gis.server import Server
import getpass
from IPython.display import display
from arcgis.mapping import WebMap
import json
import requests
from arcgis.features import FeatureSet, Feature
#import time

# Connection Variables
Organization = "https://audubon.maps.arcgis.com"
#Organization = input("What's your oranization? ")
User = input("What's your  username? ")
Password = getpass.getpass('Password: ')

# Connection 
gis = GIS(Organization, User, Password)
gis

# Audit Section

In [None]:
#Load the tree layer

itemId = '1b248cba12994420a1fbc7957f8e4cf9'
fs =gis.content.get(itemId)
display(fs)
lyrs = fs.layers
for lyr in lyrs:
    print(lyr.properties.name)
    tree_lyr = lyr

In [None]:
# This takes care of features where the Labeled field is blank

beforeLabel = tree_lyr.query(where= 'Labeled is NULL', return_count_only= True)
if beforeLabel > 0:
    print("Number of Trees where Labeled field is blank:",beforeLabel)
    noLabels = tree_lyr.query(where= 'Labeled is NULL', out_fields = "*", returnGeometry = False)

    for noL in noLabels:
       noL.set_value('Labeled', "No") #set the blanks to No

    # update the features in AGOL
    results = tree_lyr.edit_features(updates=noLabels)
    results = None
    print ("Took care of them.")
    
else:
    print("There were no trees with a blank Labeled field")

In [None]:
# This takes care of features where the KeyID field is blank or duplicated

# Let's see if there are records that need to be fixed
totalRecords = tree_lyr.query(return_count_only= True)
blankKeys = tree_lyr.query(where= 'KeyID is NULL', return_count_only= True)
zeroKeys = tree_lyr.query(where= 'KeyID=0', return_count_only= True)
    
# Checking for records where the KeyID is 0
if zeroKeys > 0:
    print("There are {} 0s in there".format(str(zeroKeys)))
    
if blankKeys > 0:
    print ("There are {} NULLs in there".format(str(blankKeys)))

    noKey = tree_lyr.query(where= 'KeyID is NULL', out_fields = ["KeyID"], returnGeometry = False)

    for nK in noKey:
        nK.set_value('KeyID', 0) #set the nulls to 0
    results = tree_lyr.edit_features(updates=noKey)
    results = None
    print ("Made the blank KeyIDs now 0")

# Let's check for duplicate KeyIDs using pandas
import pandas as pd
distKeys = tree_lyr.query(where= 'KeyID <> 0', out_fields=['KeyID'],returnGeometry = False) # query all the KeyIDs
df = distKeys.sdf # turn that query into a pandas dataframe

dups = df[df.duplicated(subset=['KeyID'], keep='first')] # get only the duplicated KeyIDs RETAINING the first

# Make a list of those OBJECTIDs
dupOID = []
for index, row in dups.iterrows():
    dupOID.append(row['OBJECTID'])

# Use that list of OBJECTIDs to change the duplicated KeyIDs to 0
if dupOID:
    print ("There are {} duplicates in there".format(str(len(dupOID))))
    for d in dupOID:
        fix = tree_lyr.query(where= 'OBJECTID = '+str(d), out_fields = ['OBJECTID','KeyID'], returnGeometry = False)
        for f in fix: # silly to create a for loop when only 1 record will be returned, but indexing isn't supported in featureSet
            f.set_value('KeyID', 0) # set the duplicate KeyID to 0
            results = tree_lyr.edit_features(updates=fix)

# Now let's make those 0s into unique IDs
maxKey = df['KeyID'].max()
print ("The maximum KeyID is", str(maxKey))
needKeys = tree_lyr.query(where= 'KeyID=0', out_fields=['KeyID'],returnGeometry = False) # query all the KeyIDs
print ("About to populate {} KeyIDs".format(str(len(needKeys))))
for k in needKeys:
    maxKey += 1
    k.set_value('KeyID', maxKey) # set the duplicate KeyID to a unique number
results = tree_lyr.edit_features(updates=needKeys)
print ("Made all the 0 KeyIDs a unique integer")

print ("\nThe KeyID field should be good")

In [None]:
# Some code that brings the tree layer into a pandas df for analysis

import pandas as pd
testset = tree_lyr.query(out_fields = "*", returnGeometry = False)
df = testset.sdf

#filter the dataframe to features where the Labelled field is blank
af = df.loc[(df['Labeled'].isna()), ['OBJECTID','KeyID','Labeled']]
af['Labeled']= "No"
af

In [None]:
# Commit the duplicate edits
txt_dupOID = [ '%.0f' % elem for elem in dupOID ] # need to format the query
qrystring = ','.join(txt_dupOID)
dupKeys = tree_lyr.query(where= 'OBJECTID IN ('+ qrystring + ')', out_fields = ['OBJECTID','KeyID'], returnGeometry = False)
del results
results = tree_lyr.edit_features(updates=dupKeys)
print (results)
print("Committed {} duplicates to 0".format(str(len(dupKeys))))
print ("Duplicate KeyIDs have been made 0")