# Portal Dependencies Visualization
Created: June 3, 2024
By: Tanner Hammond

## Intialization

In [16]:
#Edit these variables
## Portal URL, username, and password
portal =''
GISuser = '' 
GISpass = ''

## Filters
filterType = '' #Params: 'Include' - keep only the types listed; 'Exclude' - don't keep any of the types listed; '' - apply no filter
itemFilter = [] #Ex: ['Web Map', 'Map Service', 'Feature Service', 'Site Application','Dashboard','Web Mapping Application','Web Experience','Site Page','Form','CSV','Image']

##Export
generateNodes = False 
exportType = '' 
pointName = '' 
lineName = ''
outPath = r''
workspace = r'' 

##List of all items possible to query in Portal (change only if new types are added to ArcGIS Portal)
portalItemTypes = ['360 VR Experience','CityEngine Web Scene','Map Area','Pro Map','Web Map','Web Scene','Feature Collection','Feature Collection Template','Feature Service','Geodata Service','Group Layer','Image Service','KML','KML Collection','Map Service','OGCFeatureServer','Oriented Imagery Catalog','Relational Database Connection','3DTilesService','Scene Service','Vector Tile Service','WFS','WMS','WMTS','Geometry Service','Geocoding Service','Geoprocessing Service','Network Analysis Service','Workflow Manager Service','AppBuilder Extension','AppBuilder Widget Package','Code Attachment','Dashboard','Data Pipeline','Deep Learning Studio Project','Esri Classification Schema','Excalibur Imagery Project','Experience Builder Widget','Experience Builder Widget Package','Form','GeoBIM Application','GeoBIM Project','Hub Event','Hub Initiative','Hub Initiative Template','Hub Page','Hub Project','Hub Site Application','Insights Workbook','Insights Workbook Package','Insights Model','Insights Page','Insights Theme','Insights Data Engineering Workbook','Insights Data Engineering Model','Investigation','Knowledge Studio Project','Mission','Mobile Application','Notebook','Notebook Code Snippet Library','Native Application','Native Application Installer','Ortho Mapping Project','Ortho Mapping Template','Solution','StoryMap','Web AppBuilder Widget','Web Experience','Web Experience Template','Web Mapping Application','Workforce Project','Administrative Report','Apache Parquet','CAD Drawing','Color Set','Content Category Set','CSV','Document Link','Earth configuration','Esri Classifier Definition','Export Package','File Geodatabase','GeoJson','GeoPackage','GML','Image','iWork Keynote','iWork Numbers','iWork Pages','Microsoft Excel','Microsoft Powerpoint','Microsoft Word','PDF','Report Template','Service Definition','Shapefile','SQLite Geodatabase','Statistical Data Collection','StoryMap Theme','Style','Symbol Set','Visio Document','ArcPad Package','Compact Tile Package','Explorer Map','Globe Document','Layout','Map Document','Map Package','Map Template','Mobile Basemap Package','Mobile Map Package','Mobile Scene Package','Project Package','Project Template','Published Map','Scene Document','Task File','Tile Package','Vector Tile Package','Explorer Layer','Image Collection','Layer','Layer Package','Pro Report','Scene Package','3DTilesPackage','Desktop Style','ArcGIS Pro Configuration','Deep Learning Package','Geoprocessing Package','Geoprocessing Package (Pro version)','Geoprocessing Sample','Locator Package','Raster function template','Rule Package','Pro Report Template','ArcGIS Pro Add In','Code Sample','Desktop Add In','Desktop Application','Desktop Application Template','Explorer Add In','Survey123 Add In','Workflow Manager Package']

In [2]:
#Import Modules
from arcgis import GIS
import pandas as pd
import json
import requests
import warnings
import time
import networkx as nx
import matplotlib.pyplot as plt #might not need
import arcpy
from arcpy import env
import numpy as np

#Connect to ArcGIS Portal
gis = GIS(portal, GISuser, GISpass)

#Set Workspace
if workspace == 'Memory' or 'memory':
    pass
else:
    try:
        arcpy.env.workspace = workspace
    except:
        workspace = arcpy.env.workspace


In [3]:
#Define functions
## Convert id dependency type to url when is a rest link
def restType(dependencyType, dependency):
    if dependencyType == 'id' and '/' in dependency:
        dependencyType = 'url'
    else:
        dependencyType
    return(dependencyType)

## Generate URLs
def urlGen(dependencyType, dependency, portal):
    if dependencyType == 'id':
        url = portal + f'/home/item.html?id=' + dependency
    elif dependencyType == 'url':
        url = dependency
    else:
        url = ''
    return(url)

## Test if links to dependencies are broken. get title and type
def testLink(dependency, dependencyType, url):
    if url in linkDict:
        broken = linkDict[url][0]
        itemType = linkDict[url][1]
        itemTitle = linkDict[url][2]
    elif dependencyType == 'id':
        try:

            broken = False
            itemType = gis.content.search(dependency, outside_org = True)[0].type
            itemTitle = gis.content.search(dependency, outside_org = True)[0].title
        except:
            broken = True
            itemType = ''
            itemTitle = ''
    elif dependencyType == 'url':
        try:
            url = url + '?f=pjson'
            response = requests.get(url)
            if response.status_code == 200:
                response = requests.get(url)
                try:
                    item_json = response.json()
                    itemType = item_json['type']
                    itemTitlte = item_json['title']
                    broken = False                                   
                except: 
                    broken = False
                    itemType = ''
                    itemTitle = ''                
            else:
                broken = True
                itemType = ''
                itemTitle = ''
        except requests.ConnectionError as e:
            broken = True
            itemType = ''
            itemTitle = ''
    else:
        broken = ''
        itemType = ''
        itemTitle = ''
        #try:
            #broken and itemType
        #except:
            #broken = ''
            #itemType = ''
    if url not in linkDict:
        linkDict[url] = ([broken, itemType, itemTitle])
    return pd.Series([broken, itemType, itemTitle])

## Combine columns by replacing blank values
def combineColumns(check, column1, column2):
    if check != '':
        value = column1
    elif check == '':
        value = column2
    else:
        value = ''
    return value

## Get All Portal Items & Groups

In [4]:
#Check filter against item type list
if filterType == 'Exclude':
    itemFilter[:] = [i for i in portalItemTypes if i not in itemFilter]
elif filterType == 'Include':
    itemFilter[:] = [i for i in portalItemTypes if i in itemFilter]
elif filterType == '' or itemFilter == []:
    pass
else:
    raise Exception(f'Correct filter type not specified. Exclude, Include, or an empty string are the only accepted inputs. Your input: {filterType}.')

#Try regular search
##If over 500, search by user
##If user has over 500, try to search by item type and user
if itemFilter != []:
    query = '" OR "'.join(itemFilter)
    query = f'type: ("{query}"), NOT owner: esri*'
    itemInfo = gis.content.search(query=query,max_items=500)
else:
    itemInfo = gis.content.search(query='NOT owner:esri*',max_items=500) 

if len(itemInfo) == 500:
    itemInfo.clear()
    users = gis.users.search(max_users = 10000)
    users = [user for user in users if user.storageUsage != 0]
    for user in users:
        if itemFilter != []: 
            items = gis.content.search(query=f'type: ("{query}"), owner: {user.username}', outside_org=False, max_items=500)
        else:
            items = gis.content.search(query=f'owner: {user.username}', outside_org=False, max_items=500)
            
        if not items:
            pass
        elif len(items) < 500:
            itemInfo.extend(items)
        elif len(items) != 500:
            for itemType in itemFilter:
                itemsByType = gis.content.search(query=f'type: {itemType} AND owner: {user.username}',  outside_org=False, max_items=500)
                if len(itemsByType) == 0:
                    pass
                if len(itemsByType) == 500:                
                    warnings.warn(f'{user.username} has over 500 portal items of a single type. Unable to query all {itemType} items. Items currently queried added to content list.')
                    itemInfo.extend(itemsByType)
                else:
                    itemInfo.extend(itemsByType)
        else:
            Exception('More than 500 items returned. Please update script with new max_items arguement.')
elif len(itemInfo) == 0:
    raise Exception('No items returned with the function gis.content.search.')
    
#Get group data
groups = gis.groups.search(max_groups=-1)
groupItems = []
groupName = []
for group in groups:
    groupItem = group.content()
    groupItems.extend(groupItem)
    for item in groupItem:
        groupName.append(group.title)

groupItems = pd.DataFrame(groupItems)
groupItems['group'] = groupName
groupItems = groupItems[['id','group']]
groupItems = groupItems.groupby(['id'], as_index=False).agg({'group':', '.join})

## Get Dependencies

In [5]:
#Iterate through itemID with dependent_upon and dependent_to function
dependencies = []
dependType = []
for item in itemInfo:
    dependencyUpon = item.dependent_upon()
    dependencies.append(dependencyUpon)
    dependType.append('Item Depends On')
    #---dependencyTo = item.dependent_to()
    #---dependency.append(dependencyTo) 
    #---dependType.append('Item Dependent To')

#Convert to dataframes, add dependency direction, copy total to item info
itemInfo = pd.DataFrame(itemInfo)
dependencies = pd.DataFrame(dependencies)
dependencies['dependencyRelate'] = dependType
itemInfo['totalDependencies'] = dependencies['total'].loc[dependencies['dependencyRelate'] == 'Item Depends On']
itemInfo['totalDepends'] = dependencies['total'].loc[dependencies['dependencyRelate'] == 'Item Dependent To']

#Combine dataframes, rename id column
dependencies = pd.merge(dependencies, itemInfo['id'], left_index=True, right_index=True)
dependencies.rename(columns = {'id':'itemID'}, inplace = True)

#Data formatting
## Split each dependency into a new column that preserves item ID
dependencies = dependencies.explode('list')

## Normalize to seperate into diff columns, concat columns for each type
dependNorm = pd.json_normalize(dependencies['list']).set_index(dependencies.index)
dependNorm = dependNorm.fillna('')
dependNorm['dependency'] = dependNorm[dependNorm.columns.difference(['dependencyType'])].astype(str).sum(axis=1)
## Merge dataframes to get original ID values
dependencies = pd.merge(dependencies, dependNorm, left_index=True, right_index=True)
## Remove all empty rows w/ no dependencies
try:
    dependencies = dependencies[dependencies['dependencyType'] != '']
except:
    raise Exception('No items with dependencies found using the provided item type filter.')

## Group dataframe to remove duplicate dependencies under each itemID
dependencies = dependencies.groupby(['itemID','dependency']).first().reset_index()

#Apply functions
## Fix dependency types with REST urls
dependencies['dependencyType'] = dependencies.apply(lambda x: restType(x['dependencyType'], x['dependency']), axis=1)
## Generate URLs for dependencies
dependencies['url'] = dependencies.apply(lambda x: urlGen(x['dependencyType'], x['dependency'], portal), axis=1)  
dependencies['url'].fillna('', inplace=True)
## Check for broken depenedency links and item type, apply testLink function
linkDict = {}
dependencies['linkTest'] = dependencies.apply(lambda x: list(testLink(x['dependency'], x['dependencyType'], x['url'])), axis=1)
dependencies[['broken','itemType','itemTitle']] = pd.DataFrame(dependencies['linkTest'].tolist(), index = dependencies.index) #This line not ideal, can't get linkTest() to go into multiple columns instead of one
# Drop all columns but necessary ones
dependencies = dependencies.loc[:, dependencies.columns.intersection(['itemID','dependencyType','dependency','dependencyRelate','broken','itemType','itemTitle','url'])]

#View dataframe
#dependencies

## Format/Clean Up Item Info

In [6]:
#Add url
itemInfo['url'] = portal + f'/home/item.html?id=' + itemInfo['id']
#Add Groups
itemInfo = itemInfo.merge(groupItems, on='id', how='left')

#Fix dates, tags, access, & status, replace NA/None values w/ blanks
itemInfo['created'] = pd.to_datetime(itemInfo['created'],unit='ms').dt.date
itemInfo['modified'] = pd.to_datetime(itemInfo['modified'],unit='ms').dt.date
itemInfo['tags'] = itemInfo['tags'].apply(str)
itemInfo.replace('[]','', inplace=True)
itemInfo['access'].replace(['public','private','shared','org'], ['Public','Private','Shared with Other Users','Shared with Organization'], inplace=True)
itemInfo['contentStatus'].replace(['org_authoritative','deprecated'], ['Authoritative','Deprecated'], inplace=True)
itemInfo.fillna('', inplace=True)

#Remove extra columns (any not listed)
itemInfo = itemInfo.loc[:, itemInfo.columns.intersection(['id','owner','created','modified','title','type','snippet','tags','access','numViews','contentStatus','totalDependencies','totalDepends','group','url'])]

#Rename columns
itemInfo.rename(columns = {'id':'itemID'}, inplace = True)

# ***TEMP Line while dependent_to doesn't work to add missing column***
itemInfo['totalDepends'] = 0

#View dataframe
#itemInfo

## Generate Geometry

In [7]:
if generateNodes == True:
    #Apply filters
    ##Remove serverId items and ArcGIS Utility dependencies, create opposite dataframe
    dependencies_filter = dependencies.loc[dependencies['dependency'].str.contains('services2.arcgis|utility.arcgisonline|services.arcgisonline|services6.arcgis')]
    dependencies_filter = pd.concat([dependencies_filter, dependencies[dependencies.dependencyType == 'serverId']], ignore_index = True)
    dependencies = dependencies[dependencies.dependencyType != 'serverId']
    dependencies = dependencies.loc[~dependencies['dependency'].str.contains('services2.arcgis|utility.arcgisonline|services.arcgisonline|services6.arcgis')]

    #Construct graph
    ## create graph object
    G = nx.Graph()
    ## add edges from the dataframe to the graph
    for index, row in dependencies.iterrows():
        G.add_edge(row['itemID'], row['dependency'])
    ## get positions and convert to dataframe
    nodeXY = nx.spring_layout(G,seed=1234)
    nodeXY = pd.DataFrame(nodeXY).T.reset_index()
    nodeXY.columns = ['Node', 'X', 'Y']
    ## add 1 to all positions to prevent issues w/ crossing dateline
    nodeXY[['X','Y']] += 1
    
    #Add X,Y to point data
    ## add positions 
    itemInfo = itemInfo.merge(nodeXY, left_on='itemID', right_on='Node', how='outer')
    itemInfo.fillna('', inplace=True)
    ## pull descriptive data about non-organization items to table
    itemInfo = itemInfo.merge(dependencies[['dependency','itemType','url','itemTitle']], left_on='Node', right_on='dependency', how='left', suffixes=('','_x'))
    ## merge columns based on values
    itemInfo['type'] = itemInfo.apply(lambda x: combineColumns(x['itemID'], x['type'], x['itemType']), axis=1)
    itemInfo['url'] = itemInfo.apply(lambda x: combineColumns(x['itemID'], x['url'], x['url_x']), axis=1)
    itemInfo['title'] = itemInfo.apply(lambda x: combineColumns(x['itemID'], x['title'], x['itemTitle']), axis=1)
    itemInfo.drop(columns={'dependency','itemType','url_x','itemTitle'}, inplace=True)
    itemInfo['Node'] = itemInfo.apply(lambda x: combineColumns(x['itemID'], x['itemID'], x['Node']), axis=1)
    itemInfo = itemInfo.groupby(['Node']).first().reset_index()
    
    #Add X,Y to line data
    ## original item column (itemID)
    dependencies = dependencies.merge(nodeXY, left_on='itemID', right_on='Node', how='left')
    dependencies.rename(columns = {'X':'X1','Y':'Y1'}, inplace = True)
    ## dependency column (dependency)
    dependencies = dependencies.merge(nodeXY, left_on='dependency', right_on='Node', how='left')
    dependencies.rename(columns = {'X':'X2','Y':'Y2'}, inplace = True)
    ## drop extras created from merge
    dependencies.drop(columns=['Node_x','Node_y'], inplace=True)
    ##Merge filter dataframe back in
    dependencies = pd.concat([dependencies, dependencies_filter], ignore_index = True)

## Export to CSV

In [8]:
#Export to CSV
if exportType == 'CSV' or 'All':
    if outPath == '':
        if '\\' in workspace:
            parentFolder = workspace.split('\\')
            parentFolder.remove(parentFolder[-1])
            parentFolder = '\\'.join(parentFolder)
        elif '/' in workspace:
            parentFolder = workspace.split('/')
            parentFolder.remove(parentFolder[-1])
            parentFolder = '/'.join(parentFolder)
        elif workspace == 'Memory' or 'memory':
            warnings.warn('Cannot export CSVs to memory.')
        else:
            raise Exception('Geodatabase path is not valid.')

        dependencies.to_csv(f'{parentFolder}/{lineName}.csv',index=False)
        itemInfo.to_csv(f'{parentFolder}/{pointName}.csv',index=False)
    else:
        dependencies.to_csv(f'{outPath}/{lineName}.csv',index=False)
        itemInfo.to_csv(f'{outPath}/{pointName}.csv',index=False)
elif exportType == 'Features' or exportType == '':
    pass
else:
    warnings.warn('No acceptable export type specified. Data has not been exported as CSV files.')

## Clean Data for ArcPy

In [None]:
if exportType == ('Features' or 'Geo Table' or 'All'):
    itemInfo = itemInfo.replace({np.nan: None,'': None})
    itemInfo.replace('', 'None', inplace=True)
    itemInfo['numViews'].fillna(0, inplace=True)
    itemInfo['totalDependencies'].fillna(0, inplace=True)
    itemInfo['totalDepends'].fillna(0, inplace=True)
    itemInfo = itemInfo.astype({'itemID':'string','owner':'string','created':'string','modified':'string','title':'string',
                              'type':'string','snippet':'string','tags':'string','url':'string',
                              'access':'string','numViews':'int','contentStatus':'string','group':'string',
                              'totalDependencies':'int','totalDepends':'int','Node':'string'})
    
    dependencies = dependencies.replace({np.nan: None,'':None})
    dependencies = dependencies.astype({'itemID':'string','dependency':'string','dependencyRelate':'string','dependencyType':'string',
                            'url':'string','broken':'string','itemType':'string','itemTitle':'string'})

#Change Dtypes for X,Y
if generateNodes == True:
    itemInfo = itemInfo.astype({'X':'float','Y':'float'})
    dependencies = dependencies.astype({'X1':'float','Y1':'float','X2':'float','Y2':'float'})

## Export to Features

In [10]:
#Export to feature layers
if exportType == ('Features' or 'All') and generateNodes == True:
    ## use arcpy to create feature layers
    if workspace == ('Memory' or 'memory'):
        pointLayer = arcpy.management.XYTableToPoint(in_table = itemInfo, out_feature_class = f'memory/{pointName}', x_field = 'X', y_field = 'Y', coordinate_system = arcpy.SpatialReference(4326))
        lineLayer = arcpy.management.XYToLine(dependencies, f'memory/{lineName}','X1','Y1','X2','Y2', arcpy.SpatialReference(4326), attributes = 'ATTRIBUTES')
    else:
        pointLayer = arcpy.management.XYTableToPoint(in_table = itemInfo, out_feature_class = f'{workspace}/{pointName}', x_field = 'X', y_field = 'Y', coordinate_system = arcpy.SpatialReference(4326))
        lineLayer = arcpy.management.XYToLine(dependencies, f'{workspace}/{lineName}','X1','Y1','X2','Y2', arcpy.SpatialReference(4326), attributes = 'ATTRIBUTES')

## Export to GDB Tables (Non-Spatial)

In [19]:
if exportType == ('Geo Table' or 'All'):
        if workspace == ('Memory' or 'memory'):
            pointTable = arcpy.conversion.ExportTable(in_table = itemInfo, out_table= f'memory/{pointName}', use_field_alias_as_name='NOT_USE_ALIAS') #field_mapping = {}
            lineTable = arcpy.conversion.ExportTable(in_table = dependencies, out_table= f'memory/{lineName}', use_field_alias_as_name='NOT_USE_ALIAS') #field_mapping = {}
        else:
            pointTable = arcpy.conversion.ExportTable(in_table = itemInfo, out_table= f'{workspace}/{pointName}', use_field_alias_as_name='NOT_USE_ALIAS') #field_mapping = {}
            lineTable = arcpy.conversion.ExportTable(in_table = dependencies, out_table= f'{workspace}/{lineName}', use_field_alias_as_name='NOT_USE_ALIAS') #field_mapping = {} 