In [None]:
#Print Figures inline
%matplotlib inline

#Import required python Libraries
import os
import glob
import csv

import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import cv2

In [None]:
# Specify starting folder to process
# Noting the directory structure to store the images relative to this notebook
folderNames = ["../Original Paper Scans/Black background/Main results/Size1/", "../Original Paper Scans/Black background/Main results/Size2/"]

#Specify the field names for the CSV file
fieldnames = ['order', "#", "test", "bird", "block", "area", "perimeter", "height", "width", "areaMM", "perimeterMM", "heightMM", "widthMM"] # "areaComparedToLarge", "areaComparedToSmall", "matchLarge", "matchSmall"


#
# The rest of this code in this section was used for testing and development
# But it was not used in the final analysis, but was kept incase it proved useful later
#

# Folders used for code development, but not in final analysis. Provided the ability to define which folders to process
folderNameBlock ["../Original Paper Scans/Black background/Main results/Baseline/"]
folderNamesTest = ["../Original Paper Scans/Black background/Main results/test/"]
folderNameDiscards = ["../Original Paper Scans/Black background/Discards/"]
folderNameExamples = ["../Original Paper Scans/Black background/Examples/"]
folderNameKnownSizes = ["../Original Paper Scans/Black background/Known Sizes/"]

#Scan in the detials for the Large Template
imgLarge = cv2.imread('../Original Paper Scans/Black background/Examples/Examples-Large-B0-12.png')
img_gray_large = cv2.cvtColor(imgLarge,cv2.COLOR_BGR2GRAY)
ret_large, thresh_large = cv2.threshold(img_gray_large, 0, 255,cv2.THRESH_BINARY) 
image_large, contours_large,hierarchy_large = cv2.findContours(thresh_large,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cntLarge = contours_large[1]
areaLarge = cv2.contourArea(cntLarge)
perimeterLarge = cv2.arcLength(cntLarge,True)

#Scan in the deatils for the Small Template
imgSmall = cv2.imread('../Original Paper Scans/Black background/Examples/Examples-Small-B0-12.png')
img_gray_small = cv2.cvtColor(imgSmall,cv2.COLOR_BGR2GRAY)
ret_small, thresh_small = cv2.threshold(img_gray_small, 0, 255,cv2.THRESH_BINARY) 
image_small, contours_small, hierarchy_small = cv2.findContours(thresh_small,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cntSmall = contours_small[1]
areaSmall = cv2.contourArea(cntSmall)
perimeterSmall = cv2.arcLength(cntSmall,True)

In [None]:
#Define the function for processing a collection of pieces
def parseFile (order, writer, filename):
    
    #Extract Properties from the Filename
    splitFilename = filename.split("-")
    birdName = splitFilename[-3]
    blockName = splitFilename[-2]
    testName = splitFilename[-4].split('/')[-1]
    numOfPieces = splitFilename[-1].split(".")[-2]
    print(filename, numOfPieces)
    
    
    #Process the file
    img = cv2.imread(filename)
    img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(img_gray, 0, 255,cv2.THRESH_BINARY)
    
    th2 = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    
    image, contours,hierarchy = cv2.findContours(th2,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    
    number = 1
    for cnt in contours:
        if cv2.contourArea(cnt) > 1000:
            cv2.drawContours(img, [cnt], 0, (0,255,0), 3)
            x,y,w,h = cv2.boundingRect(cnt)
            img = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)

            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)
            box = np.int0(box)
            img = cv2.drawContours(img,[box],0,(0,0,255),2)

            #Bottom Right
            x1 = box[0][0]
            y1 = box[0][1]
            #Bottom Left
            x2 = box[1][0]
            y2 = box[1][1]
            #Top Left
            x3 = box[2][0]
            y3 = box[2][1]
            #Top Right
            x4 = box[3][0]
            y4 = box[3][1]

            height = abs(((x3-x4)^2 + (y3-y4)^2)^(1/2))
            width = abs(((x2-x3)^2 + (y2-y3)^2)^(1/2))
            
            area = cv2.contourArea(cnt)
            perimeter = cv2.arcLength(cnt,True)
            
            # Depreciated, but left incase the idea is interesting to someone
            areaComparedToLarge = (area/ cv2.contourArea(cntLarge))
            areaComparedToSmall = (area / cv2.contourArea(cntSmall))
            
            # Depreciated, but left incase the idea is interesting to someone
            matchLarge = cv2.matchShapes(cnt,cntLarge,1,0.0)
            matchSmall = cv2.matchShapes(cnt,cntSmall,1,0.0)


            #Draw key dimenions on the image
            #cv2.circle(img,(x1,y1), 10, (0,0,255), -1)
            #cv2.circle(img,(x2,y2), 10, (0,0,255), -1)
            #cv2.line(img,(x3,y3),(x4,y4),(255,0,0),1)

            #cv2.circle(img,(x3,y3), 10, (255,0,255), -1)
            #cv2.circle(img,(x4,y4), 10, (0,0,255), -1)
            #cv2.line(img,(x3,y3),(x2,y2),(0,255,0),1)
            
            
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(img,str(number),((x3+x4)/2, (y3+y2)/2), font, 4,(255,255,255),2,cv2.LINE_AA)

            #Write output to CSV file
            # Includes conversions from pixels to mm. This number was determined empiracally through scanning in a range of known sized objects on the same scanner used for the paper fragments
            row = [order, number, testName, birdName, blockName, area, perimeter, height, width, (area / 139.24), (perimeter/11.8), (height/11.8), (width/11.8)] # areaComparedToLarge, areaComparedToSmall, matchLarge, matchSmall
            writer.writerow(dict(zip(fieldnames, row)))
            number = number + 1
            order = order + 1
            
    #plt.imshow(img)
    cv2.imwrite('../Processed Images/'+str(testName)+'/'+str(testName)+'-'+str(birdName)+'-'+str(blockName)+'-'+str(numOfPieces)+'.png',img)
    #cv2.imshow('image',img)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
            
    #plt.show()
    return order

In [None]:
# Open the CSV file that will contain the output data
with open("../Generated Data/output.csv", "wb") as out_file: #(output.csv)(examples.csv)(discards.csv)
    writer = csv.DictWriter(out_file, delimiter=',', fieldnames=fieldnames)
    writer.writeheader()
    
    order = 1
    for foldername in folderNames: #folderNameKnownSizes (folderNames)(folderNameDiscards)(folderNameExamples)(folderNameKnownSizes)
        # Generate the list of all the files to process
        files = glob.glob(foldername+"/*.png")

        # Go through each source file, one at a time
        for filename in files:
            order = parseFile(order, writer, filename)

            