In [1]:
#!/usr/bin/env python
# coding: utf-8

import numpy as np
import pandas as pd

from PIL import Image # used for loading images
import os # used for navigating to image path
import imageio # used for writing images
import re # for matching image file name classes
import matplotlib.pyplot as plt
import random
import ntpath
import csv
from timeit import default_timer as timer

In [3]:
# Download and extract new images from sharepoint
SUMMER_21_IMAGES = '../../data/raw/Summer 2021 AI Photos'

In [37]:
def getListOfFiles(dirName):
    """Returns single list of the filepath of each of the training image files taken in the Summer of 2021"""
    # source: https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
    # modified
    level_one = [f.path for f in os.scandir(dirName) if f.is_dir()] # get all of the immediate subdirectories in the parent folder downloaded from SharePoint
    allFiles = list()
    for subdir in level_one:
        level_two = [f.path for f in os.scandir(subdir) if f.is_dir()] # get all of the immediate subdirectories for each arborist
        for subdir2 in level_two:
            if ('Away' in subdir2): # do not include images taken away powerlines
                print('Ignoring images of trees away from power lines.\n')
            else:
                listOfFile = os.listdir(subdir2) # retain only images taken near power lines
                # Iterate over all the entries
                for entry in listOfFile:
                    # Create full path
                    fullPath = os.path.join(subdir2, entry)
                    # If entry is a directory then get the list of files in this directory 
                    if os.path.isdir(fullPath):
                        allFiles = allFiles + getListOfFiles(fullPath)
                    else:
                        allFiles.append(fullPath)
                
    return allFiles

In [38]:
summer_photos = getListOfFiles(SUMMER_21_IMAGES)

Ignoring images of trees away from power lines.

Ignoring images of trees away from power lines.

Ignoring images of trees away from power lines.



In [41]:
summer_photos

['../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\15_probable.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\15_probable_crack.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\16_possible.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\17_improbable.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\18_possible.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\18_possible_codominant_stems.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\19_improbable.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\1_possible.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\1_possible_dead_branch.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\20_possible_decay.jpg',
 '../../data/raw/Summer 2021 AI Photos\\Amanda\\Near power lines\\20_possible_decay2.jpg',
 '../../data/raw/Summer 20

In [56]:
summer_photos[140].split('_')[0].split('\\')[1].split('/')[-1]

'Ryan'

In [78]:
def splitIndexDescrArb():
    index_list = []
    description_list = []
    arborist_list = []
    for fn in summer_photos:
        if ('_' in fn):
            index = fn.split('_')[0].split('\\')[-1]
            description = fn.split('_')[1]
            arborist = fn.split('_')[0].split('\\')[1].split('/')[-1]
        else:
            index = fn.split('-')[0].split('\\')[-1].rstrip(' ')
            description = fn.split('-')[1]
            arborist = fn.split('_')[0].split('\\')[1].split('/')[-1]        
        index = int(index)
        index_list.append(index)
        description_list.append(description)
        arborist_list.append(arborist)
    return index_list, description_list, arborist_list

In [79]:
index_list, description_list, arborist_list = splitIndexDescrArb()

In [80]:
df = pd.DataFrame([summer_photos, arborist_list, index_list, description_list])# #], columns=['Index_Label', 'Filename'])
df = df.transpose()
df.columns=['Filename','Arborist', 'Index_Label', 'Description']


In [111]:
df.Index_Label.unique()

array([15, 16, 17, 18, 19, 1, 20, 21, 29, 2, 30, 31, 32, 33, 36, 37, 38,
       39, 40, 41, 10, 11, 12, 13, 14, 22, 23, 24, 25, 26, 27, 28, 3, 34,
       35, 4, 42, 43, 44, 45, 46, 47, 48, 49, 5, 50, 51, 52, 53, 54, 55,
       56, 57, 58, 59, 6, 60, 61, 62, 63, 64, 65, 66, 7, 8, 9, 100, 101,
       102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
       115, 116, 117, 118, 119, 120, 121, 67, 68, 69, 70, 71, 72, 73, 74,
       75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
       92, 93, 94, 95, 96, 97, 98, 99], dtype=object)

In [108]:
len(df)

271

In [69]:
dfex = df.loc[df.Index_Label == 44] 
dfex = dfex.loc[df.Arborist == "ryan"]

In [70]:
good_index_list = []
for r in dfex.index:
    if 'closeup' not in dfex.loc[r, 'Description']:
        good_index_list.append(r)

In [75]:
good_index_list = [188, 189]

In [None]:
dfex[['DescLength']] = 1

In [None]:
dfex[['DescLength']]   = dfex.Description.str.len()

In [215]:
dfex.DescLength.idxmin()

188

In [None]:
unique_image_list = []
for arborist in df.Arborist.unique():
    print(arborist)
    dfsub = df.loc[df.Arborist == arborist]
    print(dfsub.Index_Label.unique())
    for index in dfsub.Index_Label.unique():
        dfsub2 = dfsub.loc[dfsub.Index_Label == index]
        if len(dfsub2) == 1:
            selected_filename = dfsub2.Filename.tolist()[0]
        else:
            good_index_list = []
            for r in dfsub2.index:
                #print(dfsub2.loc[r, 'Description'])
                if 'closeup' not in dfsub2.loc[r, 'Description']:
                    good_index_list.append(r)
            print("Non closeup names; good index list:")
            print(good_index_list)
            if len(good_index_list) == 1:
                selected_filename = dfsub2.loc[good_index_list, 'Filename'].tolist()[0]
            else:
                for i in good_index_list:
                    testdf = dfsub2.loc[good_index_list]
                    testdf[['DescriptionLength']] = testdf.Description.str.len()
                    selected_filename = testdf.loc[testdf.DescriptionLength.idxmin(), 'Filename']
                #print("Multiple cases without closeup found. Expand conditions.")
        print(selected_filename)                
        unique_image_list.append(selected_filename)

In [240]:
unique_image_list

['../../data/raw/summer21/amanda\\15_probable_crack.jpg',
 '../../data/raw/summer21/amanda\\16_possible.jpg',
 '../../data/raw/summer21/amanda\\17_improbable.jpg',
 '../../data/raw/summer21/amanda\\18_possible_codominant_stems.jpg',
 '../../data/raw/summer21/amanda\\19_improbable.jpg',
 '../../data/raw/summer21/amanda\\1_possible_dead_branch.jpg',
 '../../data/raw/summer21/amanda\\20_possible_decay.jpg',
 '../../data/raw/summer21/amanda\\21_probable.jpg',
 '../../data/raw/summer21/amanda\\29_possible_codominant_stems.jpg',
 '../../data/raw/summer21/amanda\\2_improbable.jpg',
 '../../data/raw/summer21/amanda\\30_improbable.jpg',
 '../../data/raw/summer21/amanda\\31_improbable.jpg',
 '../../data/raw/summer21/amanda\\32_possible_codominant_stems.jpg',
 '../../data/raw/summer21/amanda\\33_hidden_defect.jpg',
 '../../data/raw/summer21/amanda\\36_improbable.jpg',
 '../../data/raw/summer21/amanda\\37_probable_dead_branch.jpg',
 '../../data/raw/summer21/amanda\\38_improbable.jpg',
 '../../data

In [157]:
unique_image_list

[30    ../../data/raw/summer21/ari\1 - Probable.jpg
 Name: Filename, dtype: object]