In [112]:
#!/usr/bin/env python
# coding: utf-8

import numpy as np
import pandas as pd

from PIL import Image # used for loading images
import os # used for navigating to image path
import imageio # used for writing images
import re # for matching image file name classes
import matplotlib.pyplot as plt
import random
import ntpath
import csv
from timeit import default_timer as timer

In [113]:
# Download and extract new images from sharepoint
SUMMER_21_IMAGES = '../../data/raw/Summer 2021 AI Photos'

In [114]:
def getListOfFiles(dirName):
    """Returns single list of the filepath of each of the training image files taken in the Summer of 2021"""
    # source: https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
    # modified
    level_one = [f.path for f in os.scandir(dirName) if f.is_dir()] # get all of the immediate subdirectories in the parent folder downloaded from SharePoint
    allFiles = list()
    for subdir in level_one:
        level_two = [f.path for f in os.scandir(subdir) if f.is_dir()] # get all of the immediate subdirectories for each arborist
        for subdir2 in level_two:
            if ('Away' in subdir2): # do not include images taken away powerlines
                print('Ignoring images of trees away from power lines.\n')
            else:
                listOfFile = os.listdir(subdir2) # retain only images taken near power lines
                # Iterate over all the entries
                for entry in listOfFile:
                    # Create full path
                    fullPath = os.path.join(subdir2, entry)
                    # If entry is a directory then get the list of files in this directory 
                    if os.path.isdir(fullPath):
                        allFiles = allFiles + getListOfFiles(fullPath)
                    else:
                        allFiles.append(fullPath)
                
    return allFiles

In [115]:
summer_photos = getListOfFiles(SUMMER_21_IMAGES)

Ignoring images of trees away from power lines.

Ignoring images of trees away from power lines.

Ignoring images of trees away from power lines.



In [116]:
len(summer_photos)

271

In [118]:
summer_photos[140]

'../../data/raw/Summer 2021 AI Photos\\Ryan\\Near power lines\\15-improbable.jpg'

In [119]:
summer_photos[140].split('_')[0].split('\\')[0].split('/')[-1]

'Summer 2021 AI Photos'

In [120]:
def splitIndexDescrArb():
    index_list = []
    description_list = []
    arborist_list = []
    for fn in summer_photos:
        if ('_' in fn):
            index = fn.split('_')[0].split('\\')[-1]
            description = fn.split('_')[1]
            arborist = fn.split('_')[0].split('\\')[1].split('/')[-1]
        else:
            index = fn.split('-')[0].split('\\')[-1].rstrip(' ')
            description = fn.split('-')[1]
            arborist = fn.split('_')[0].split('\\')[1].split('/')[-1]        
        index = int(index)
        index_list.append(index)
        description_list.append(description)
        arborist_list.append(arborist)
    return index_list, description_list, arborist_list

In [121]:
index_list, description_list, arborist_list = splitIndexDescrArb()

In [122]:
df = pd.DataFrame([summer_photos, arborist_list, index_list, description_list])# #], columns=['Index_Label', 'Filename'])
df = df.transpose()
df.columns=['Filename','Arborist', 'Index_Label', 'Description']

In [123]:
df.Index_Label.unique()

array([15, 16, 17, 18, 19, 1, 20, 21, 29, 2, 30, 31, 32, 33, 36, 37, 38,
       39, 40, 41, 10, 11, 12, 13, 14, 22, 23, 24, 25, 26, 27, 28, 3, 34,
       35, 4, 42, 43, 44, 45, 46, 47, 48, 49, 5, 50, 51, 52, 53, 54, 55,
       56, 57, 58, 59, 6, 60, 61, 62, 63, 64, 65, 66, 7, 8, 9, 100, 101,
       102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
       115, 116, 117, 118, 119, 120, 121, 67, 68, 69, 70, 71, 72, 73, 74,
       75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
       92, 93, 94, 95, 96, 97, 98, 99], dtype=object)

In [126]:
df.Filename[0]

'../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\15_probable.jpg'

In [145]:
df.loc[df.Index_Label == 44]

Unnamed: 0,Filename,Arborist,Index_Label,Description
72,../../data/raw/Summer 2021 AI Photos\Ari\Near ...,Ari,44,Improbable.jpg
188,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,44,possible basal wound closeup.jpg
189,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,44,possible crown dieback and basal wound.jpg
190,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,44,possible crown dieback closeup.jpg


In [149]:
# dfex = df.loc[df.Index_Label == 44] 
dfex = df.loc[df.Arborist == "Ryan"]

In [150]:
dfex

Unnamed: 0,Filename,Arborist,Index_Label,Description
105,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,1,improbable
106,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,1,improbable.jpg
107,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,10,improbable.jpg
108,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,100,improbable.jpg
109,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,101,probable significant deadwood and decay.jpg
...,...,...,...,...
266,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,95,improbable.jpg
267,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,96,improbable.jpg
268,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,97,improbable.jpg
269,../../data/raw/Summer 2021 AI Photos\Ryan\Near...,Ryan,98,improbable.jpg


In [151]:
good_index_list = []
for r in dfex.index:
    if 'closeup' not in dfex.loc[r, 'Description']:
        good_index_list.append(r)

In [152]:
good_index_list

[105,
 106,
 107,
 108,
 109,
 111,
 112,
 113,
 114,
 115,
 116,
 118,
 119,
 120,
 121,
 123,
 124,
 125,
 126,
 127,
 128,
 130,
 131,
 132,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 142,
 143,
 144,
 145,
 147,
 149,
 151,
 153,
 155,
 156,
 158,
 159,
 160,
 162,
 163,
 165,
 166,
 167,
 170,
 171,
 174,
 175,
 176,
 177,
 178,
 179,
 181,
 183,
 184,
 185,
 186,
 187,
 189,
 192,
 193,
 194,
 196,
 197,
 199,
 200,
 201,
 202,
 203,
 205,
 206,
 207,
 208,
 209,
 210,
 212,
 213,
 215,
 216,
 217,
 218,
 219,
 220,
 221,
 222,
 223,
 225,
 226,
 227,
 228,
 229,
 230,
 231,
 232,
 234,
 235,
 236,
 237,
 238,
 240,
 242,
 243,
 246,
 247,
 248,
 251,
 252,
 253,
 255,
 257,
 260,
 261,
 263,
 265,
 266,
 267,
 268,
 269,
 270]

In [138]:
good_index_list = [188, 189]

In [135]:
dfex['DescLength'] = 1

In [153]:
dfex['DescLength'] = dfex.Description.str.len()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfex['DescLength'] = dfex.Description.str.len()


In [154]:
dfex.DescLength.idxmin()

132

In [157]:
unique_image_list = []
for arborist in df.Arborist.unique():
    print(arborist)
    dfsub = df.loc[df.Arborist == arborist]
    print(dfsub.Index_Label.unique())
    for index in dfsub.Index_Label.unique():
        dfsub2 = dfsub.loc[dfsub.Index_Label == index]
        if len(dfsub2) == 1:
            selected_filename = dfsub2.Filename.tolist()[0]
        else:
            good_index_list = []
            for r in dfsub2.index:
                #print(dfsub2.loc[r, 'Description'])
                if 'closeup' not in dfsub2.loc[r, 'Description']:
                    good_index_list.append(r)
            print("Non closeup names; good index list:")
            print(good_index_list)
            if len(good_index_list) == 1:
                selected_filename = dfsub2.loc[good_index_list, 'Filename'].tolist()[0]
            else:
                testdf = dfsub2.loc[good_index_list]
                testdf['DescriptionLength'] = testdf.Description.str.len()
                for i in good_index_list:
                    selected_filename = testdf.loc[testdf.DescriptionLength.idxmin(), 'Filename']
                #print("Multiple cases without closeup found. Expand conditions.")
        print(selected_filename)                
        unique_image_list.append(selected_filename)

Amanda
[15 16 17 18 19 1 20 21 29 2 30 31 32 33 36 37 38 39 40 41]
Non closeup names; good index list:
[0, 1]
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\15_probable_crack.jpg
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\16_possible.jpg
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\17_improbable.jpg
Non closeup names; good index list:
[4, 5]
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\18_possible_codominant_stems.jpg
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\19_improbable.jpg
Non closeup names; good index list:
[7, 8]
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\1_possible_dead_branch.jpg
Non closeup names; good index list:
[9, 10]
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\20_possible_decay.jpg
../../data/raw/Summer 2021 AI Photos\Amanda\Near power lines\21_probable.jpg
Non closeup names; good index list:
[12, 13]
../../data/raw/Summer 2021 AI Photos\Amanda\Near power 

In [240]:
unique_image_list

['../../data/raw/summer21/amanda\\15_probable_crack.jpg',
 '../../data/raw/summer21/amanda\\16_possible.jpg',
 '../../data/raw/summer21/amanda\\17_improbable.jpg',
 '../../data/raw/summer21/amanda\\18_possible_codominant_stems.jpg',
 '../../data/raw/summer21/amanda\\19_improbable.jpg',
 '../../data/raw/summer21/amanda\\1_possible_dead_branch.jpg',
 '../../data/raw/summer21/amanda\\20_possible_decay.jpg',
 '../../data/raw/summer21/amanda\\21_probable.jpg',
 '../../data/raw/summer21/amanda\\29_possible_codominant_stems.jpg',
 '../../data/raw/summer21/amanda\\2_improbable.jpg',
 '../../data/raw/summer21/amanda\\30_improbable.jpg',
 '../../data/raw/summer21/amanda\\31_improbable.jpg',
 '../../data/raw/summer21/amanda\\32_possible_codominant_stems.jpg',
 '../../data/raw/summer21/amanda\\33_hidden_defect.jpg',
 '../../data/raw/summer21/amanda\\36_improbable.jpg',
 '../../data/raw/summer21/amanda\\37_probable_dead_branch.jpg',
 '../../data/raw/summer21/amanda\\38_improbable.jpg',
 '../../data

In [159]:
len(unique_image_list)

207