preparing labels and imagery for use with tensorflow

In [1]:
# takes a set of images and labels and prepares a structure that can be fed into
# inception v3 (which can handle imagenet 2012 inputs)
# this involves
# finding the coordinates of labelled points and the corresponding image
#  cropping at most 299 x 299 pixels around each labeled point.
# storing each cropped image in directories named after the type of label
import pandas as pd
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import string
import glob

%matplotlib inline

In [2]:
#training_path = '/Users/opizarro/training_data/max-woodside'
training_path = '/Volumes/LZD1601/training_data/benthoz_99patch_flips'

def maybe_makedir(dirname, force=False):
  if os.path.isdir(dirname) and not force:
    # You may override by setting force=True.
    print('%s already present - Skipping making dir' % (dirname))
  else:
    print('Making dir %s.' % dirname)
    os.makedirs(dirname)
  return 

maybe_makedir(training_path)

Making dir /Volumes/LZD1601/training_data/benthoz_99patch_flips.


In [6]:
def save_patch(crop_image,imagename,x,y,halfsize,imlabel,training_path):
    # save cropped image in corresponding directory
    crop_name = imagename + '_' + str(x) + '_' + str(y) + '_' + str(halfsize) + '.jpg'
    fullcrop_name = os.path.join(training_path,imlabel,crop_name)
    cv2.imwrite(fullcrop_name,crop_image)

def save_patch_4rot(crop_image,imagename,x,y,halfsize,imlabel,training_path):
    
    (h, w) = crop_image.shape[:2]
    center = (w / 2, h / 2)
    
    angles = [0,90,180,270]
    for angle in angles:
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated = cv2.warpAffine(crop_image, M, (w, h))
        save_patch(rotated,imagename+'_'+str(angle),x,y,halfsize,imlabel,training_path)
    

In [11]:
#survey_sheet = '/Users/opizarro/max-woodside/QN01/MSA157-40_QN01.xls'
#images_location = '/Users/opizarro/max-woodside/QN01/Stations'

def process_sheet(survey_sheet, images_location):
    #df = pd.read_excel(survey_sheet)
    df = pd.read_csv(survey_sheet)
    df.head()
    
#df.columns

#for each row, 
#1)make a directory using Species label (if it doesn't exist already),
#2)cut out the part of the image that corresponds to a 299x299 (at most) window around the X Y coords
#3)save the cropped inmage into the corresponding directory
    class_label_set = set(df['code'])
    print class_label_set
    class_label_list = list(class_label_set)
    print('Number of classes %i') % len(class_label_list)
    print('Testing...')
    for imclass in class_label_list:
        #imclasses = string.split(imclass,':')
        #imclass_short = imclasses[0]
        #print('*** imclass %s shortened to %s') % (imclass, imclass_short)
        print('*** imclass %s') % imclass
        maybe_makedir(os.path.join(training_path, imclass))
    
    image_set = set(df['web_location'])
    xdim = 1360
    ydim = 1024
    halfsize = (99-1)/2
    image_list = list(image_set)
    print("Number of images %i, number of entries %i") % (len(image_list),len(df['web_location']))

    prior_imagename = ''
    
    for row in df.iterrows():
    #print(row[1])
    # read image
        imagename = os.path.basename(row[1].web_location)
        fullimagename = os.path.join(images_location,imagename)
        if os.path.isfile(fullimagename):
            #print('reading %s') % fullimagename
            if fullimagename != prior_imagename:
                # only read image if its a different one from the one we've been using
                image = cv2.imread(os.path.join(images_location,imagename))
                #print('image shape %i %i %i') % image.shape
                prior_imagename = fullimagename
        
    
            #cv2.imshow("original",image)
            #plt.figure(1)
            #plt.imshow(image)
            # read label
            #print('row label %s') % row[1].code
            #imlabels = string.split(row[1].name,':')
            #imlabel = imlabels[0]
            imlabel = row[1].code
            #print ('entry %s has label %s') % (imagename, imlabel)
   
   
            # find centre points
            x = round(row[1].x*xdim)
            y = round(row[1].y*ydim)
        
         
            #print('x %i, y %i, xdim %i, ydim %i') % (x,y,xdim,ydim)
            # check dimensions correpond
            if xdim != image.shape[1] or ydim !=image.shape[0]:
                print('WARNING: actual image size and size in database not consistent')
    
    
            # draw circle
            #cv2.circle(image,(x,y),11,(0,255,0),-1)
    
            # crop around centre point
            dx = min(min(x,halfsize),min(halfsize,xdim-x));
            dy = min(min(y,halfsize),min(halfsize,ydim-y));
            hs = min(dx,dy)
            # at least 81 pixels across to have some context
            if hs > 40 : 
                crop_image = image[y-hs:y+hs, x-hs:x+hs]
        
                # generate rotated versions
                # save with unique name
                
                # save cropped image in corresponding directory
                #crop_name = imagename + '_' + str(x) + '_' + str(y) + '_' + str(halfsize) + '.jpg'
                #fullcrop_name = os.path.join(training_path,imlabel,crop_name)
                #cv2.imwrite(fullcrop_name,crop_image)
                save_patch_4rot(crop_image,imagename,x,y,halfsize,imlabel,training_path)
                crop_im_fliplr = cv2.flip(crop_image,1)
                save_patch_4rot(crop_im_fliplr,imagename+'_lr',x,y,halfsize,imlabel,training_path)
                #crop_im_flipud = cv2.flip(crop_image,0)
                #save_patch_4rot(crop_im_flipud,imagename+'_ud',x,y,halfsize,imlabel,training_path)
                
    
                if 0 and imlabel != 'Sand' and imlabel != 'Turf':
                    cutstr =  ('this dot %s has label %s') % (imagename, imlabel)
                    titstr =  ('x %i, y %i, xdim %i, ydim %i, hs %i') % (x,y,xdim,ydim,hs)
                    plt.figure(1)
                    plt.imshow(image)
                    plt.title(titstr)
                    plt.figure(2)
                    plt.imshow(crop_image)
                    plt.title(cutstr)
        
                if row[0]%100 == 0:
                    print("processing entry " + str(row[0]) + "\r")
        else:
            if row[0]%100 == 0:
                    print("processing entry " + str(row[0]) + "\r")
                    print('**** WARNING: could not find image %s') % fullimagename

In [15]:
# each campaign folder in benthoz2015 has a cvs file and the related images

base_location = '/Volumes/LZD1601/benthoz2015'

campaign_list=os.listdir(base_location)
for campaign in campaign_list:
    if ("W") in campaign:
        print("processing campaign {}".format(campaign))
        full_campaign=os.path.join(base_location,campaign)
        if os.path.isdir(full_campaign):
            image_sheet=glob.glob(os.path.join(full_campaign,'*.csv'))
            print os.path.join(full_campaign,'*.csv')
            if len(image_sheet)>0:
                process_sheet(image_sheet[0], full_campaign)
            else:
                print('no csv found in %s') % campaign
        else:
            print('%s is not a folder') % campaign
    else:
        print("not processing campaign {}".format(campaign))

processing campaign BENTHOZ-2015-imagelist.csv
BENTHOZ-2015-imagelist.csv is not a folder
processing campaign ._BENTHOZ-2015-imagelist.csv
._BENTHOZ-2015-imagelist.csv is not a folder
not processing campaign PS201012
not processing campaign Batemans201011
not processing campaign SolitaryIs201208
not processing campaign PS201211
not processing campaign Batemans201211
not processing campaign SEQueensland201010
processing campaign Tasmania200810
/Volumes/LZD1601/benthoz2015/Tasmania200810/*.csv
set(['CNCAZ', 'SPHT', 'CSSOA', 'SPMST', 'SUS', 'SPHC', 'CBFFC', 'MASG', 'MALAR', 'MALCB', 'MOG', 'CBBFA', 'BRYS', 'MAEFR', 'MAECB', 'WPOT', 'AUC', 'UNS', 'ESS', 'F', 'MAF', 'EFU', 'BIOTA', 'ESUR', 'MAENR', 'SPMB', 'SPMSI', 'MA', 'SPMC', 'SPCE', 'MO', 'CNHYD', 'AUS', 'SPEP', 'SPCC', 'MAECG', 'CBBNA', 'SPEL', 'SPMR', 'CBW', 'BRYHF', 'EOBSS', 'SC', 'SUPB', 'SPEB'])
Number of classes 45
Testing...
*** imclass CNCAZ
/Volumes/LZD1601/training_data/benthoz_99patch_flips/CNCAZ already present - Skipping ma



processing entry 0
processing entry 100
processing entry 200
processing entry 300
processing entry 400
processing entry 500
processing entry 600
processing entry 700
processing entry 800
processing entry 900
processing entry 1100
processing entry 1300
processing entry 1400
processing entry 1500
processing entry 1600
processing entry 1800
processing entry 1900
processing entry 2000
processing entry 2100
processing entry 2200
processing entry 2300
processing entry 2400
processing entry 2600
processing entry 2700
processing entry 2800
processing entry 2900
processing entry 3000
processing entry 3100
processing entry 3200
processing entry 3300
processing entry 3400
processing entry 3500
processing entry 3600
processing entry 3800
processing entry 3900
processing entry 4000
processing entry 4200
processing entry 4300
processing entry 4400
processing entry 4500
processing entry 4600
processing entry 4700
processing entry 4900
processing entry 5000
processing entry 5100
processing entry 5200


KeyboardInterrupt: 