### calcSpatialImageStats.py ###
Calculate Spatial Clustering Statistics for PSPNet category predictions <br>
**Author** Andrew Larkin <br>
**Affiliation** Oregon State University, Spatial Health Lab <br>
**Date Last Modified** September 2nd, 2019 <br>

### Import packages ###

In [1]:
import os # for collecting file lists
import numpy as np # PSPNet predictions are stored as a numpy array
from PIL import Image # for image visualization/debugging
from copy import deepcopy
import pandas as ps # for saving as csv
from scipy import ndimage, misc # for focal stats

### define folders containing input image classifications and output statistic csvs ###

In [2]:
parentFolder = "C:/users/larkinan/Desktop/categorystats"
testFile = "pZMx7YKWt0uPiKD99hgDgNA_180.npy"

### Define groups or "categories" of image labels ###
**Outputs**:  <br>
&nbsp;&nbsp;&nbsp;&nbsp;**categoryDict** (Dict): dictionary, where each key corresponds to the string labels that belong to the category set

In [3]:
def defineCategoryDicts():
    categoryDict = {}
    categoryDict['greenspace'] = ['tree','grass','plant','field','flower']
    categoryDict['accessibility'] = ['sidewalk','escalator','path','stairs','stairway','bench','step']
    categoryDict['allNature'] = ['tree','grass','plant','field','land','flower','water','sea','waterfall','lake','earth',
                'mountain','rock','sky','sand','hill','dirt track']
    categoryDict['bluespace'] = ['water','sea','waterfall','lake']
    categoryDict['otherNature'] = ['earth','mountain','rock','sky','sand','hill','dirt track','land']
    categoryDict['animate'] = ['person','boat','car','bus','truck','airplane','van','ship','minibike','animal','bicycle']
    categoryDict['builtEnv'] = ['wall','building','road','windowpane','sidewalk','hovel','house','fence','railing',
               'signboard','skyscraper','path','stairs','runway','screen', 'door', 'screen door','stairway',
                'bridge','bench','booth','awning','streetlight','pole','bannister','escalator',
               'fountain','swimming pool','step','sculpture','traffic light','pier','bulletin board']
    return(categoryDict)

### Get ordered set of PSPNet prediction labels ###
**Outputs**:  <br>
&nbsp;&nbsp;&nbsp;&nbsp;set of prediction labels, in the same order as PSPNet output numpy matrices

In [None]:
def getAllCategories():
    return(["wall","building","sky","floor","tree","ceiling","road","bed",
              "windowpane","grass","cabinet","sidewalk","person","earth",
              "door","table","mountain","plant","curtain","chair","car",
              "water","painting","sofa","shelf","house","sea","mirror",
              "rug","field","armchair","seat","fence","desk","rock",
              "wardrobe","lamp","bathtub","railing","cushion","base",
              "box","column","signboard","chest of drawers","counter",
              "sand","sink","skyscraper","fireplace","refrigerator",
              "grandstand","path","stairs","runway","case","pool table",
              "pillow","screen door","stairway","river","bridge","bookcase",
              "blind","coffee table","toilet","flower","book","hill","bench",
              "countertop","stove","palm","kitchen island","computer","swivel chair",
              "boat","bar","arcade machine","hovel","bus","towel","light",
              "truck","tower","chandelier","awning","streetlight","booth",
              "television receiver","airplane","dirt track","apparel","pole",
              "land","bannister","escalator","ottoman","bottle","buffet","poster",
              "stage","van","ship","fountain","conveyer belt","canopy","washer",
              "plaything","swimming pool","stool","barrel","basket","waterfall",
              "tent","bag","minibike","cradle","oven","ball","food","step","tank",
              "trade name","microwave","pot","animal","bicycle","lake","dishwasher",
              "screen","blanket","sculpture","hood","sconce","vase","traffic light",
              "tray","ashcan","fan","pier","crt screen","plate","monitor",
              "bulletin board","shower","radiator","glass","clock","flag"
    ])

### create a dictionary of category set indices in PSPNet output matrix ###

**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**inDict** (Dictionary) - contains category set string labels <br>
&nbsp;&nbsp;&nbsp;&nbsp;**categories** (String Set) - set of PSPNet model predictions, in same order as in the prediction matrix <br>
**Outputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**newDict** (Dictionary) - set of indices for each category <br>

In [4]:
def addCateogryNumsToDict(inDict,categories):
    keys = deepcopy(list(catDict.keys()))
    keys.sort()
    print(keys)
    newDict = {}
    for key in keys:
        keyVals = catDict[key]
        tempList = []
        for val in keyVals:
            tempList.append(categories.index(val))
        newDict[key + "_num"] = tempList
    return(newDict)

### create binary numpy matrix from PSPNet matrix, indicating whether predictions belong to a single label or category of interest ###
**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**categoryNums** (Integer List) - values that should return true in the output binary matrix <br>
&nbsp;&nbsp;&nbsp;&nbsp;**imgArray** (Numpy matrix) - input predictions that should be screened for categories of interest.  <br>
**Outputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp; numpy matrix of true/false values, with same dimensions as input imgArray

In [6]:
def createBinaryCategorical(categoryNums,imgArray):
    return(np.isin(imgArray,categoryNums))

### create header for output csv file containing spatial clustering statistics for each image ###
**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**catDict** (Dictionary) - list of all groups spatial statistics were dervied for <br>
&nbsp;&nbsp;&nbsp;&nbsp;**statCategories** (String Set) - list of all dervied spatial statistics <br>
**Outputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**header** (String) - the header (top line) for the output csv file(s)

In [7]:
def createHeader(catDict,statCategories):
    categories = list(catDict.keys())
    categories.sort()
    header = ["filename"]
    subSep = "_"
    for category in categories:
        for subCat in statCategories:
            header.append(category + subSep + subCat)
        subGroups = catDict[category]
        for subGroup in subGroups:
            for subCat in statCategories:
                header.append(subGroup + subSep + subCat)
    return(header)

### calculate spatial statistics for one label and one image ###
**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**imgArr** (numpy integer Array) - category predictions for each image pixel, in numpy matrix format <br> 
**Outputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp**results** (float array) - overall spatial statistics for the entire image <br>

In [8]:
def calcStatsOneImageOneLabel(imgArr):
    results = []
    neighbImg = np.multiply(ndimage.uniform_filter(imgArr*1.0,2),imgArr*1.0)*100
    results.append(np.sum(neighbImg)/np.sum(imgArr))
    return(results)

### perform image preprocessing and dervice summary statistics for one category, including individual labels within the cateegory ###
**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**numDict** (Dictionary) - set of integers that belong to each category <br>
&nbsp;&nbsp;&nbsp;&nbsp;**catName** (String) - name of the numDict key that corresponds to the integer values for the category of interest <br>
&nbsp;&nbsp;&nbsp;&nbsp;**results** (numpy float Matrix) - previously dervied statistics for images within the same batch.  Newly dervied statistics are appended to this <br>
&nbsp;&nbsp;&nbsp;&nbsp;**img** (numpy integer array) - PSPNet model classifications for each image pixel <br>
**Outputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**results** (numpy float matrix) - the input results matrix witht newly derived statistics appended to the end <br>

In [9]:
def processSingleCategory(numDict,catName,results,img):
    catBinary = createBinaryCategorical(numDict[catName],img)
    numPixels = (img.shape[0]*img.shape[1]*4 - 2*(img.shape[0]+img.shape[1]))
    catResults = np.asarray(calcStatsOneImageOneCat(catBinary))
    if(results.shape[0] == 0):
        results = catResults.reshape((catResults.shape[0],1))
    else:
        results = np.concatenate((results,catResults.reshape((catResults.shape[0],1))))
    for subsetCat in numDict[catName]:
        binarySubset = createBinaryCategorical([subsetCat],img)
        numPixels = np.sum(binarySubset*1)
        tempResults = None
        if(numPixels > 0):
            tempResults = np.asarray(calcStatsOneImageOneLabel(binarySubset))
        else:
            numOutcomes = 1
            tempResults = fillArray = np.full((numOutcomes,1),np.nan)
        results = np.concatenate((results,tempResults.reshape((tempResults.shape[0],1))))
    return(results)

### derive spatial statistics for one image ###
**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**imgFilepath** (String) - absolute filepath of image to process <br>
&nbsp;&nbsp;&nbsp;&nbsp;**numDict** (Dictionary) - contains sets of integers sets for each category to process <br>
&nbsp;&nbsp;&nbsp;&nbsp;**imgName** (Sting) - unique id for each image <br>
**Outputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**results** (float array) - summary statistics for all categories of interest <br>

In [10]:
def processSingleImage(imgFilepath,numDict,imgName):
    img = np.load(imgFilepath)
    results = np.asarray([imgName]).reshape((1,1))
    categories = list(numDict.keys())
    categories.sort()
    for category in categories:
        results = processSingleCategory(numDict,category,results,img)
    return(results)

### process all images within a given folder ###
**Inputs** <br>
&nbsp;&nbsp;&nbsp;&nbsp;**imageFolder** (String) - absoluste filepath to folder containing PSPNet image predictions <br>
&nbsp;&nbsp;&nbsp;&nbsp;**numDict** (Dictionary) - set of integers that belong to each category <br>
**Outputs**<br>
&nbsp;&nbsp;&nbsp;&nbsp;**results** (float numpy array) - summary statistics for each images within the folder <br>

In [11]:
def processAllImages(imageFolder,numDict,prevFiles):
    candidateFiles = os.listdir(imageFolder)
    filesToProcess = []
    for candidate in candidateFiles:
        if(candidate[len(candidate)-3:len(candidate)] == 'npy'):
            filesToProcess.append(candidate)
    results = np.asarray([])
    for filename in filesToProcess:
        print(filename)
        filepath = imageFolder + "/" + filename
        tempResults = processSingleImage(filepath,numDict,filename)
        if(results.shape[0] ==0):
            results = tempResults.reshape((tempResults.shape[0],1))
        else:
            results = np.concatenate((results,tempResults.reshape((tempResults.shape[0],1))),axis=1)
    return(results)

### main function ###

In [12]:
#unique_elements, counts_elements = np.unique(testData, return_counts=True)
catDict = defineCategoryDicts()
numDict = addCateogryNumsToDict(catDict,getAllCategories())
statCategories = ['ratio']
createHeader(catDict,statCategories)
results = processAllImages(parentFolder,numDict)

['accessibility', 'allNature', 'animate', 'bluespace', 'builtEnv', 'greenspace', 'otherNature']
pZMx7YKWt0uPiKD99hgDgNA_180.npy


  after removing the cwd from sys.path.


testFile.npy


In [14]:
header = createHeader(catDict,statCategories)
print(header)
testDataframe = ps.DataFrame(results.transpose())
print(testDataframe)
testDataframe.head()
testDataframe.columns = header
testDataframe.to_csv(parentFolder + "/spatial_clustering_results.csv")

['filename', 'accessibility_ratio', 'sidewalk_ratio', 'escalator_ratio', 'path_ratio', 'stairs_ratio', 'stairway_ratio', 'bench_ratio', 'step_ratio', 'allNature_ratio', 'tree_ratio', 'grass_ratio', 'plant_ratio', 'field_ratio', 'land_ratio', 'flower_ratio', 'water_ratio', 'sea_ratio', 'waterfall_ratio', 'lake_ratio', 'earth_ratio', 'mountain_ratio', 'rock_ratio', 'sky_ratio', 'sand_ratio', 'hill_ratio', 'dirt track_ratio', 'animate_ratio', 'person_ratio', 'boat_ratio', 'car_ratio', 'bus_ratio', 'truck_ratio', 'airplane_ratio', 'van_ratio', 'ship_ratio', 'minibike_ratio', 'animal_ratio', 'bicycle_ratio', 'bluespace_ratio', 'water_ratio', 'sea_ratio', 'waterfall_ratio', 'lake_ratio', 'builtEnv_ratio', 'wall_ratio', 'building_ratio', 'road_ratio', 'windowpane_ratio', 'sidewalk_ratio', 'hovel_ratio', 'house_ratio', 'fence_ratio', 'railing_ratio', 'signboard_ratio', 'skyscraper_ratio', 'path_ratio', 'stairs_ratio', 'runway_ratio', 'screen_ratio', 'door_ratio', 'screen door_ratio', 'stairway