preparing labels and imagery for use with tensorflow

In [22]:
# takes a set of images and labels and prepares a structure that can be fed into
# inception v3 (which can handle imagenet 2012 inputs)
# this involves
# finding the coordinates of labelled points and the corresponding image
#  cropping at most 299 x 299 pixels around each labeled point.
# storing each cropped image in directories named after the type of label
import pandas as pd
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import string
import glob
import random

%matplotlib inline

In [23]:
#training_path = '/Users/opizarro/training_data/max-woodside'
#training_path = '/Volumes/LZD1601/training_data/ziggy_99patch_flips'
training_path = '/home/opizarro/data/training_data/ziggy_299patch_4rot'

def maybe_makedir(dirname, force=False):
  if os.path.isdir(dirname) and not force:
    # You may override by setting force=True.
    print('%s already present - Skipping making dir' % (dirname))
  else:
    print('Making dir %s.' % dirname)
    os.makedirs(dirname)
  return 

maybe_makedir(training_path)

Making dir /home/opizarro/data/training_data/ziggy_299patch_4rot.


In [24]:
def save_patch(crop_image,imagename,x,y,halfsize,imlabel,training_path):
    # save cropped image in corresponding directory
    crop_name = imagename + '_' + str(x) + '_' + str(y) + '_' + str(halfsize) + '.jpg'
    fullcrop_name = os.path.join(training_path,imlabel,crop_name)
    #print "saving cropped image to " + fullcrop_name
    cv2.imwrite(fullcrop_name,crop_image)

def save_patch_4rot(crop_image,imagename,x,y,halfsize,imlabel,training_path):
    
    (h, w) = crop_image.shape[:2]
    center = (w / 2, h / 2)
    
    angles = [0,90,180,270]
    for angle in angles:
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated = cv2.warpAffine(crop_image, M, (w, h))
        save_patch(rotated,imagename+'_'+str(angle),x,y,halfsize,imlabel,training_path)
    

In [25]:
# usinging Bewley's code in squidle repo
import sys 
import os
sys.path.append('/home/opizarro/git/visnavml/prep_survey_images')
import cpc

xdim = 1360
ydim = 1024
patchsize =299
halfsize = int((patchsize-1)/2)
padsize = halfsize

#ziggy_root = '/Volumes/LZD1601/AUV-2010_cpc_img'
#ziggy_root = '/Users/opizarro/AUV-2010_cpc_img'
ziggy_root = '/media/opizarro/LZD1601/AUV-2010_cpc_img'
#cp = cpc.CPCFolderParser(os.path.join(ziggy_root,'BB_site1sz_15-1'))
cp = cpc.CPCFolderParser(ziggy_root)
#cp.bigdf.head()
#print cp.image_list
#print cp.cpc_list
#print cp.bigdf['label_number']


premake the directories for the different labels

In [26]:

class_label_set = set(cp.bigdf['cpc_code'])
print class_label_set
class_label_list = list(class_label_set)
print('Number of classes %i') % len(class_label_list)
print('Testing...')
for imclass in class_label_list:
    #imclasses = string.split(imclass,':')
    #imclass_short = imclasses[0]
    #print('*** imclass %s shortened to %s') % (imclass, imclass_short)
    print('*** imclass %s') % imclass
    maybe_makedir(os.path.join(training_path, imclass))

set(['P', 'S', 'MF', 'PAD', 'SARG'])
Number of classes 5
Testing...
*** imclass P
Making dir /home/opizarro/data/training_data/ziggy_299patch_4rot/P.
*** imclass S
Making dir /home/opizarro/data/training_data/ziggy_299patch_4rot/S.
*** imclass MF
Making dir /home/opizarro/data/training_data/ziggy_299patch_4rot/MF.
*** imclass PAD
Making dir /home/opizarro/data/training_data/ziggy_299patch_4rot/PAD.
*** imclass SARG
Making dir /home/opizarro/data/training_data/ziggy_299patch_4rot/SARG.


In [27]:
def process_cpc(row):
#print row[1].fraction_from_image_left
    #print row[1]
    imagename = row[1].image_name
    image_location = row[1].folder
    prior_imagename = ''
    fullimagename = os.path.join(image_location,imagename+'.jpg')
    if os.path.isfile(fullimagename):
        if fullimagename != prior_imagename:
            image = cv2.imread(fullimagename)
            reflect101 = cv2.copyMakeBorder(image,padsize,padsize,padsize,padsize,cv2.BORDER_REFLECT_101)
            prior_imagename = fullimagename
            
        imlabel = row[1].cpc_code    
        # find centre points
        
        x = int(round(row[1].fraction_from_image_left*(xdim-1))+padsize)
        y = int(round(row[1].fraction_from_image_top*(ydim-1))+padsize)
        
        # check dimensions correpond
        if xdim != image.shape[1] or ydim !=image.shape[0]:
            print('WARNING: actual image size and size in database not consistent')
    
        # crop around centre point
        #dx = min(min(x,halfsize),min(halfsize,xdim-x));
        #dy = min(min(y,halfsize),min(halfsize,ydim-y));
        #hs = min(dx,dy)
        hs = halfsize
        # at least 81 pixels across to have some context
        if hs == halfsize : 
            crop_image = reflect101[y-hs:y+hs+1, x-hs:x+hs+1]
        
            # generate rotated versions
            # save with unique name
        
            # save cropped image in corresponding director
            #crop_name = imagename + '_' + str(x) + '_' + str(y) + '_' + str(halfsize) + '.jpg'
            #fullcrop_name = os.path.join(training_path,imlabel,crop_name)
            #cv2.imwrite(fullcrop_name,crop_image)
            save_patch_4rot(crop_image,imagename,x,y,halfsize,imlabel,training_path) 
            #crop_im_fliplr = cv2.flip(crop_image,1)
            #save_patch_4rot(crop_im_fliplr,imagename+'_lr',x,y,halfsize,imlabel,training_path)
            
            cutstr =  ('this dot %s has label %s') % (imagename, imlabel)
            titstr =  ('x %i, y %i, xdim %i, ydim %i, hs %i') % (x,y,xdim,ydim,hs)
            
            if 0:
                plt.figure(1)
                plt.imshow(image)
                plt.title(titstr)
                plt.show()
                plt.figure(2)
                plt.imshow(crop_image)
                plt.title(cutstr)
                plt.show()
            
            #if counter%100 == 0:
            if (random.random() < 0.001):
                print("processing " + fullimagename)
                   # print("processing entry " + str(counter) + "\r")
        else:
          #  print("entry " + str(counter) + " Patch too small! " + str(hs) + " half size \r" )
            # print("Patch too small! " + str(hs) + " half size \r" )
            pass   
    else:
        #if row[0]%100 == 0:
       # print("PROBLEM processing entry " + str(counter) + "\r")
        print('**** WARNING: could not find image %s') % fullimagename
    

In [28]:

from joblib import Parallel, delayed
import multiprocessing

num_cores = multiprocessing.cpu_count()

#for row in cp.bigdf.iterrows():
#    process_cpc(row)
results = Parallel(n_jobs=num_cores)(delayed(process_cpc)(row) for row in cp.bigdf.iterrows())            
    
# add weblocation (basename)
# 

processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_042204_688_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_042216_716_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_042507_673_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_043944_699_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_044136_728_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_044551_701_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site1sz_15-1/PR_20101121_044830_699_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site2guz_14-2/PR_20101120_232744_622_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site2guz_14-2/PR_20101120_232846_642_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/BB_site2guz_14-2/PR_20101120_232859_632_LC16.j

processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_005319_728_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_005419_772_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_005819_764_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_010119_777_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_010519_781_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_011119_807_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_011719_847_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_011819_879_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-2/PR_20101219_011819_879_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_b11-3/PR_20101219_015220_001_LC16.jpg
processing /media/opizarro/LZD1601/AUV-2010_cpc_img/PS_f3-1/PR_2010121