### Plan:

A1 - A6, are the previous subalgorithms that we worked with.
- A1: Pots with no or one handle only, e.g. Alabastron.
- A2: Pots with two handles that stick out, e.g. Bell-Kraters.
- A3: Pots with two handles that loop back on to the pot, e.g. Amphorae
- A4: Cups, e.g. Little Master Band Cups.
- A5: Pots with one handle right at the top of the pot, e.g. Askos.
- A6: Pots (including cups) that have two handles sticking out, right at the top, e.g. Kotyles.

We have sorted/changed these into the following four algorithms.

##### Algorithm 1:
- Uses the previous (A1,"left").
- Usually works well for pots with only one handle on the right, or pots with no handle.

##### Algorithm 1:
- Uses the previous (A1,"right").
- Usually works well for pots with only one handle on the left, or pots with no handle.

##### Algorithm 3:
- Uses the previous A2, A4 and A6.
- Usually works well for cups and all vases with a handle sticking out on each side.

##### Algorithm 4:
- Uses the previous A3 and A5.
- Usually works well for pots with handles on each side that loop round to the pot (amphora-like handles) or pots with one handle on the top of the pot (e.g. askos vases).

### Functions

In [None]:
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
from math import sqrt
from scipy import ndimage
from skimage.filters import threshold_otsu
from skimage import measure
import os
from glob import glob
import re
import itertools as it
import csv
from tqdm import tqdm_notebook as tqdm
from PIL import Image
from skimage import data, img_as_float
from copy import deepcopy
from matplotlib.transforms import Bbox


#### Image Binarization

In [None]:
def binary_colour(img):
    # This function is used to binarize pot images.
    # Input: coloured image
    # Output: greyscaled image
    
    # 1) Get image size
    ###################
    x_len = len(img[0])
    y_len = len(img)
    
    img2 = deepcopy(img)
        
    # 2) Decide on darkest and lightest pixel colour, t1,t2 respectively. 
    #####################################################################
    # Any pixel darker (smaller) than t1 will be turned black (0-0-0) whilst any that are lighter (larger) than 
    # t2 will be turned white (255-255-255).
    
    t1 = 185 #black #used to be 190. 
    t2 = 215 #white
    
    if np.mean(img) < 110: #If the image is already pretty dark, set darker values for t1 and t2.
        t1 = 110 #used to be 121 in c4
        t2 = 120
    
    # Image could be of different shape depending on its colours. Check this.
    k = 1
    if isinstance(img[1][2], np.uint8) == True:
        k = 0;

    # 3) Iterate through pixels, turning them black or white.
    #########################################################
    # Those that are in between t1 and t2 will remain the same for now.
    if k == 1:
        for i in range(0,y_len):
            for j in range(0,x_len):
                if (img[i][j][0] < t1) and (img[i][j][1] < t1) and (img[i][j][2] < t1): 
                    img2[i][j] = [0,0,0]

                if (img[i][j][0] > t2) and (img[i][j][1] > t2) and (img[i][j][2] > t2): 
                    img2[i][j] = [255,255,255]
    else:
        for i in range(0,y_len):
            for j in range(0,x_len):
                if (img[i][j] < t1):
                    img2[i][j] = 0

                if (img[i][j] > t2):
                    img2[i][j] = 255
                    
    # 4) Go through each pixel once more, turning them black.
    ##########################################################
    # If certain neighbouring pixels are black, then turn the pixel black.
    
    for i in range(1,y_len-1):
        for j in range(1,x_len-1):
            if ((np.sum(img2[i+1][j+1]) == 0) & (np.sum(img2[i-1][j-1]) == 0)) or ((np.sum(img2[i+1][j]) == 0) & (np.sum(img2[i-1][j]) == 0)) or ((np.sum(img2[i][j+1]) == 0) & (np.sum(img2[i][j-1]) == 0)) or ((np.sum(img2[i+1][j-1]) == 0) & (np.sum(img2[i-1][j+1]) == 0)):
                if k == 1:
                    img2[i][j] = [0,0,0]
                else:
                    img2[i][j] = 0

    # 5) Define new neighbourhood
    #############################
    by = len(img2)
    bx = len(img2[1])
    by = int(np.ceil(by*0.01))
    bx = int(np.ceil(bx*0.01))
    
    # 6) Go through each pixel for a final time.
    ############################################
    # This time we have defined a larger neighbourhood. If a pixel is surrounded by black pixels, then turn pixel black.

    for i in range(by,y_len-by):
        for j in range(bx,x_len-bx):
            if ((np.sum(img2[i+by][j]) == 0) & (np.sum(img2[i-by][j]) == 0)) or ((np.sum(img2[i][j+bx]) == 0) & (np.sum(img2[i][j-bx]) == 0)):
                if k == 1:
                    img2[i][j] = [0,0,0]
                else:
                    img2[i][j] = 0

    return img2

In [None]:
def binary_colour_matamados(img):
    # Used for Matamados pots as their images were primarily brown and only the background was white.
    # Input: coloured image
    # Output: B+W image
    
    # 1) Get image size
    ###################
    x_len = len(img[0])
    y_len = len(img)
    img2 = deepcopy(img)

    # 2) Binarize image
    ###################
    # Pixels that are very light (sum>=700) will be turned white, all other pixels will be turned black.
    for i in range(0,y_len):
        for j in range(0,x_len):
            if sum(img[i][j]) < 700:
                img2[i][j] = [0,0,0]
            else:
                img2[i][j] = [255,255,255]

    # 3) Iterate through pixels once more to binarize.
    ##################################################
    # If a pixel is surrounded by black pixels, it will be turned black.
    for i in range(1,y_len-1):
        for j in range(1,x_len-1):
            if ((np.sum(img2[i+1][j+1]) == 0) & (np.sum(img2[i-1][j-1]) == 0)) or ((np.sum(img2[i+1][j]) == 0) & (np.sum(img2[i-1][j]) == 0)) or ((np.sum(img2[i][j+1]) == 0) & (np.sum(img2[i][j-1]) == 0)) or ((np.sum(img2[i+1][j-1]) == 0) & (np.sum(img2[i-1][j+1]) == 0)):
                if k == 1:
                    img2[i][j] = [0,0,0]
                else:
                    img2[i][j] = 0

    return img2

#### Outline Contour

In [None]:
def get_outline_contour(img):
    # This functions finds the outline contour for pots except those with amphora-like handles or askos-like handles.
    # Input: coloured image
    # Output: one outline contour
    
    # 1) Threshold image
    #####################
    thresh = threshold_otsu(img)
    binary = img > thresh
    
    # 2) Find all contours, using Marching Squares.
    ###############################################
    cont = measure.find_contours(binary, 0.8)
    
    # 3) Find longest contour
    #########################
    cont_ln = []
    for n, contour in enumerate(cont):
        cont_ln.append(len(contour))            
    k = np.argmax(cont_ln)
            
    return cont[k]

In [None]:
def get_longest_contours_a3(img,filename):
    # This function finds the outline contour and handle contours of amphora-like pots.
    # Input: coloured image
    # Output: one outline contour, and two handle contours.
    
    # 1) Threshold image
    #####################
    thresh = threshold_otsu(img)
    binary = img > thresh
    
    # 2) Find all contours, using Marching Squares.
    ###############################################
    cont = measure.find_contours(binary, 0.8)

    cont_ln = []
    for n, contour in enumerate(cont):
        cont_ln.append(len(contour))
    
    # 3) Define group of top longest contours.
    ##################################
    # We will be looking through the top group to find the outline contour and handle contours.
    if "shoulder" in filename:
        longest_c = sorted(cont_ln,reverse=True)[:20]
    else:
        longest_c = sorted(cont_ln,reverse=True)[:10] 
        
    long_ind = []
    long_ind_rngsx = []
    long_ind_rngsy = []
    long_ind_mx = []
    long_ind_mn = []
    long_ind_mn_x = []
    long_ind_mx_x = []
    
    ln_x = len(img[0])
    # 4) Set bounds for location of handle contours.
    ###############################################
    # This will only be used for amphora-shoulder pots.
    lbound = (ln_x/2)-(ln_x/10)
    ubound = (ln_x/2)+(ln_x/10)

    # 5) Find longest contours.
    for i in range(0,len(cont_ln)):
        if cont_ln[i] in longest_c: 
            #check if contour is in the top group. If so, save the contour index and further information.
            c = cont[i]
            x = c[:,1]
            y = c[:,0]
            mid = min(x) + ((max(x)-min(x))/2)            
            if ("shoulder" not in filename) or (("shoulder" in filename) and ((mid<lbound) or (mid>ubound))):
                long_ind.append(i)
                rngy = abs(max(y) - min(y))
                rngx = abs(max(x) - min(x))
                long_ind_rngsy.append(rngy)
                long_ind_rngsx.append(rngx)
                long_ind_mx.append(max(y))
                long_ind_mn.append(min(y))
                long_ind_mx_x.append(max(x))
                long_ind_mn_x.append(min(x))
    
    # Due to excluding contours within a certain bound (for amphora-shoulders),
    # we need to include the longest contour manually.
    if "shoulder" in filename:
        long = np.argmax(cont_ln)
        c = cont[long]
        x = c[:,1]
        y = c[:,0]
        long_ind.append(long)
        rngy = abs(max(y) - min(y))
        rngx = abs(max(x) - min(x))
        long_ind_rngsy.append(rngy)
        long_ind_rngsx.append(rngx)
        long_ind_mx.append(max(y))
        long_ind_mn.append(min(y))
        long_ind_mx_x.append(max(x))
        long_ind_mn_x.append(min(x))
               
    n = len(long_ind)
    
    mn = min(long_ind_mn)
    
    mx = max(long_ind_rngsy)
        
    k1 = np.argmax(long_ind_rngsy)
    
    ub = long_ind_mn[k1] + (mx/4)
    
    inds = list(range(0,n))
    inds.remove(k1)    
    tot = 1000
    
    rng_inds = []
    for i in range(0,n):
        if all([long_ind_rngsy[i] > mx/8,long_ind_mn[i] < ub,long_ind_mx[i] > mn, i!=k1]):
            rng_inds.append(i)
            
    if len(rng_inds) >=2:
        t = 8
    else:
        t = 9
    
    # 6) Find outline contour
    #########################
    # This will be the longest contour.
    outline_point = k1
    k1 = long_ind[k1]
    c1 = cont[k1]
    x1 = c1[:,1]
    y1 = c1[:,0]
    
    egth = min(x1) + ((max(x1) - min(x1))/8)
    egth2 = max(x1) - ((max(x1) - min(x1))/8)
    thrd = min(y1) + ((max(y1) - min(y1))/3)
    thrd2 = min(y1) + (2*(max(y1) - min(y1))/3)
    
    if "shoulder" not in filename:
        k = (inds[0],inds[1])
    else:
        koptions = []
        for i in range(0,n):
            if (i != outline_point) and ((long_ind_mn_x[i] < egth) or (long_ind_mx_x[i] > egth2)):
                koptions.append(i)
        k = (koptions[0],koptions[1])
        
    # 7) Find the two most similar contours that fit certain bounds.
    ################################################################
    # These contours will be the two handle contours.

    if "shoulder" in filename:
        for i in range(0,n):
            if (long_ind_mn_x[i] < egth) or (long_ind_mx_x[i] > egth2):
                if all([long_ind_mx[i] < thrd2,long_ind_mn[i] > thrd,i!=outline_point,long_ind_rngsx[i]>2,long_ind_rngsy[i]>2]):
                    for j in range(i,n):
                        if (long_ind_mn_x[i] < egth) or (long_ind_mx_x[i] > egth2):
                            if all([long_ind_mx[i] < thrd2,long_ind_mn[i] > thrd, i!=j, j!=outline_point,
                                    long_ind_rngsx[j]>2,long_ind_rngsy[j]>2]):
                                d1 = abs(long_ind_rngsy[i] - long_ind_rngsy[j])
                                d2 = abs(long_ind_rngsx[i] - long_ind_rngsx[j])
                                d3 = abs(long_ind_mx[i] - long_ind_mx[j])
                                sm = d1+d2+d3
                                if sm < tot:
                                    tot = sm
                                    k = (i,j)
    else:
        for i in range(0,n):
            if all([long_ind_rngsy[i] > mx/t,long_ind_mn[i] < ub,long_ind_mx[i] > mn, i!=outline_point]):
                for j in range(i,n):
                    if all([long_ind_rngsy[j] > mx/t, long_ind_mn[j] < ub, long_ind_mn[j] > mn, i!=j, j!=outline_point]):
                        d1 = abs(long_ind_rngsy[i] - long_ind_rngsy[j])
                        d2 = abs(long_ind_rngsx[i] - long_ind_rngsx[j])
                        d3 = abs(long_ind_mx[i] - long_ind_mx[j])
                        sm = d1+d2+d3
                        if sm < tot:
                            tot = sm
                            k = (i,j)

    
    # 8) Define handle contours.
    ############################
    k2 = long_ind[k[0]]
    k3 = long_ind[k[1]]

    c2 = cont[k2]
    x2 = c2[:,1]
    y2 = c2[:,0]

    c3 = cont[k3]
    x3 = c3[:,1]
    y3 = c3[:,0]
    
    return x1,y1,x2,y2,x3,y3

In [None]:
def get_longest_contours_a5(img):
    # This function finds the outline contour and handle contours of askos-like pots.
    # Input: coloured image
    # Output: one outline contour, and one handle contour.
    
    # 1) Threshold image
    #####################
    thresh = threshold_otsu(img)
    binary = img > thresh
    
    # 2) Find all contours, using Marching Squares.
    ###############################################
    cont = measure.find_contours(binary, 0.8)

    # 3) Define group of longest contours
    #####################################
    cont_ln = []
    for n, contour in enumerate(cont):
        cont_ln.append(len(contour))

    longest_c = sorted(cont_ln,reverse=True)[:10]
    long_ind = []
    long_ind_rngsx = []
    long_ind_rngsy = []
    long_ind_mx = []
    long_ind_mn = []

    # 4) Find longest contours
    ##########################
    # Save the indices of the longest contours and their information.
    for i in range(0,len(cont_ln)):
        if cont_ln[i] in longest_c:
            long_ind.append(i)
            c = cont[i]
            x = c[:,1]
            y = c[:,0]
            rngy = abs(max(y) - min(y))
            rngx = abs(max(x) - min(x))
            long_ind_rngsy.append(rngy)
            long_ind_rngsx.append(rngx)
            long_ind_mx.append(max(y))
            long_ind_mn.append(min(y))

    n = len(long_ind)
    
    mn = min(long_ind_mn)
    
    mx = max(long_ind_rngsy)
    
    k1 = np.argmax(long_ind_rngsy)
    
    # 5) Set boundaries for location of handle contour
    ##################################################
    ub = long_ind_mn[k1] + (long_ind_rngsy[k1]/3)
    
    # 6) Look for handle contour
    ############################
    inds = []
    for i in range(0,n):
        if (long_ind_mn[i] < ub) & (i != k1):
            inds.append(i)
    
    k2 = np.argmax(np.array([long_ind_rngsy[i]+long_ind_rngsx[i] for i in range(len(long_ind_rngsy))])[inds])
    k2 = inds[k2]

    
    k1 = long_ind[k1]
    k2 = long_ind[k2]

    # 7) Define outline contour
    ###########################
    # This will be the contour with the greatest range (height of pot).
    c1 = cont[k1]
    x1 = c1[:,1]
    y1 = c1[:,0]

    # 8) Define handle contour
    ##########################
    c2 = cont[k2]
    x2 = c2[:,1]
    y2 = c2[:,0]

    
    return x1,y1,x2,y2

In [None]:
def get_contour_side(X,Y,D):
    # This function finds the left or right side of the outline contour. If D is not equal "L" or "R", then it finds 
    # the shortest side of the outline contour.
    # Input: outline contour and D, i.e. desired side or shortest side.
    # Output: desired side of outline contour, co-ordinates for the centre of pot, and the presented side, D = "L" or "R".
    
    # 1) Find centre of pot.
    #########################
    xc,yc,coords = find_centre(X,Y)    
    
    # 2) Define centre range.
    #########################
    # Find all points that are close to the centre. We hope these will be the points on the top and the base of the pot.
    tnth = min(X) + (max(X)-min(X))/10
    lb = xc - tnth
    ub = xc + tnth
    rng = []

    rng = np.where((X >= lb) & (X <=ub))[0]
    
    # 3) Define boundary of body of pot
    ###################################
    lb_y = min(Y)+((max(Y)-min(Y))/3)
    ub_y = min(Y)+(2*(max(Y)-min(Y))/3)

    # 4) Find top and bottom and bot.
    #################################
    ktop = np.argmax(Y[rng])
    top_pnt = rng[ktop]

    kbot = np.argmin(Y[rng])
    bot_pnt = rng[kbot]

    mx_pnt = max(top_pnt,bot_pnt)
    mn_pnt = min(top_pnt,bot_pnt)

    # We check if the correct top and bottom of the pot have been found.
    if np.abs(Y[mx_pnt] - Y[mn_pnt]) < (np.max(Y)-np.min(Y))/5:
    # If the difference between the top and bottom of the pot is too small, then define new top and bottom points.
        mn_pnt = np.argmin(Y)
        mx_pnt = np.argmax(Y)

    # 5) Define the left and right side of the outline contour.
    ###########################################################
    xs1 = list(X[mn_pnt:mx_pnt])
    ys1 = list(Y[mn_pnt:mx_pnt])

    xs2 = list(X)[mx_pnt:] + list(X)[:mn_pnt] 
    ys2 = list(Y)[mx_pnt:] + list(Y)[:mn_pnt] 
    
    
    # 6) Find desired side of outline
    #################################
    # If D has already been set to be "L" or "R", we find the left/right side respectively.
    # However, if the desired side is too short (only a few points on that side), then the opposite side is
    # found instead and D is updated.
    if D == "R":
        if len(xs2) > 2:
            xs = xs2
            ys = ys2
        else:
            xs = xs1
            ys = ys1
            D  = "L"
    else:
        if D == "L":
            if len(xs1) > 2:
                xs = xs1
                ys = ys1
            else:
                xs = xs2
                ys = ys2
                D = "R"
        # If there is no preference for which side, then we find the shortest side.
        else:
            ys1_rng = np.where((np.array(ys1)>=lb_y)&(np.array(ys1)<=ub_y))[0]
            ys2_rng = np.where((np.array(ys2)>=lb_y)&(np.array(ys2)<=ub_y))[0]

            if len(ys1_rng) == 0:
                ys1_rng = list(range(0,len(ys1)-1))

            if len(ys2_rng) == 0:
                ys2_rng = list(range(0,len(ys2)-1))
                
            # Decide which side is left and which is right.
            if np.min(np.array(xs1)[ys1_rng]) > np.min(np.array(xs2)[ys2_rng]):
                D1 = "R"
                D2 = "L"
            else:
                D1 = "L"
                D2 = "R"
            
            # Find shortest side.
            if ((len(ys1) >= len(ys2)) and len(ys2)>2) or ((len(ys2) >= len(ys1)) and len(ys1)<=2):
                ys = ys2
                xs = xs2
                D = D2
            else:
                ys = ys1
                xs = xs1
                D = D1

    return xs,ys,xc,yc,D

#### Centers

In [None]:
def find_centre(x,y):
    # Finds coordinates for the centre of the pot, by focusing on the middle of the pot.
    # Input: outline contour
    # Output: co-ordinates for the centre of pot, and the coordinates of the middle points.
    
    # 1) Decide boundaries
    ######################
    yc = min(y)+(max(y)-min(y))/2
    tnth = max(y)-(max(y)-min(y))/10
    coords = np.where((np.array(y)<tnth) & (np.array(y)>yc)) [0]
    
    # 2) Find centre
    #################
    xc = min(np.array(x)[coords])+(max(np.array(x)[coords])-min(np.array(x)[coords]))/2
    
    return xc,yc,coords

In [None]:
def get_top_pot_a3(x,y,xc):
    # Finds coordinates for the top (centre) of the pot. This function is used for amphora-like pots.
    # Input: outline contour and centre coordinates.
    # Output: index for the top of the pot.
    
    # 1) Define range
    #################
    # Finds the points on the pot ouline that have an x coordinates that is close to the centre.
    rng = np.where((x <= xc+2) & (x >= xc-2))[0]
    
    # 2) Find top of pot
    ####################
    # This will be the point with the minimum y coordinate (since the pots are upside down) that is within the range.
    if len(rng) < 1:
        k = np.min(y)
    else:
        k = np.argmin(np.array(y)[rng])
        k = y[rng[k]]
    
    return k

#### Handles

In [None]:
def remove_handles_a2(xs,ys,D,filename):
    # Removes handles of bell-krater like pots.
    # Input: side outline contour and the position of the side (either left or right).
    # Output: new side contour with handles removed.

    if ys[0] > ys[-1]:
        ys.reverse()
        xs.reverse()
        
    # 1) Define range for location of handle
    #########################################
    lb = min(ys) + ((max(ys) - min(ys))*0.1)   
    ub = max(ys) - ((max(ys) - min(ys))*0.2)   
    rng = np.where((ys<ub) & (ys > lb))[0]
    
    # 2) Find tip of handle
    #######################
    # If we have the left-side contour, then we assume that the point with the minimum x coordinate is tip of
    # the handle. If we have the right-side contour, then we assume that the point with the maximum x coordinate
    # is the tip of the handle.
    if D == "L":
        k = np.argmin(np.array(xs)[rng])
    else:
        k = np.argmax(np.array(xs)[rng])
        
    # 3) Find the beginning and end of handle
    #########################################
    k1 = np.argmax(np.array(ys)[rng][:k-2])
    k2 = np.where((np.array(xs)[rng[k:]] <= xs[rng[k1]] + 1) & (np.array(xs)[rng[k:]] >= xs[rng[k1]] - 1))[0]
    
    if len(k2) < 1:
        k2 = len(rng) -1
    else:
        k2 = k2[0] + k
        
    # We find the last point that's less than the lower bound and the last point that's less than the upper bound.
    t1 = np.where(ys < lb)[0] 
    t1 = t1[-1]
    t2 = np.where(ys < ub)[0]
    t2 = t2[-1]
    
    # 4) Define new side-contour whilst cutting the handle.
    #######################################################
    xsr = []
    ysr = []
    xsr.extend(xs[:t1])
    ysr.extend(ys[:t1])
    xsr.extend(np.array(xs)[rng[:k1]])
    ysr.extend(np.array(ys)[rng[:k1]])
    xsr.extend(np.array(xs)[rng[k2:]])
    ysr.extend(np.array(ys)[rng[k2:]])
    xsr.extend(xs[t2:])
    ysr.extend(ys[t2:])
    
    return xsr,ysr
    

In [None]:
def get_top_bot_handle_a3(x_,y_):
    # Finds the top and bottom regions of the handles of amphora-like pots.
    # Input: handle contour
    # Output: coordinates for the top region and bottom regiion of the handle contour.

    # 1) Define range for top and bottom region
    ###########################################
    mx = np.argmax(y_)
    mn = np.argmin(y_)
    ubt = np.max(y_)
    lbb = np.min(y_)
    ubb = ubt - ((ubt - lbb)/10) 
    lbt = lbb + ((ubt - lbb)/10)

    # 2) Find points within the range.
    #################################
    bot = []
    top = []
    for i in range(0,len(x_)):
        if lbb <= y_[i] <= lbt:
            bot.append(i)
        if ubb <= y_[i] <= ubt:
            top.append(i)
    return top,bot

In [None]:
def get_handle_side_a3(x_,y_,top,bot,D,filename):
    # Gets the desired side of the handle contour for amphora-like pots. If the left side of the pot outline contour was
    # chosen, then the right side of the handle contour is chosen, and if the right side of the outline contour was chosen
    # then the left side of the handle is chosen.
    # Input: handle contour
    # Output: coordinates for the top region and bottom regiion of the handle contour.
    
    # 1) Find start and end points of handle side contour.
    ######################################################
    # If we have the left side of the outline contour, we want the side handle contour to start from the most-left point at 
    # the top of the handle and end at the right-most point on the bottom of the handle.
    # If we have the right side of the outline contour, we want the side handle contour to start from the right-most point at 
    # the top of the handle and end at the left-most point on the bottom of the handle.
    if D == "L":
        p1 = np.argmin(x_[top])
        p2 = np.argmax(x_[bot])
    else:
        p1 = np.argmax(x_[top])
        p2 = np.argmin(x_[bot])

    p1 = top[p1]
    p2 = bot[p2]

    # 2) Determine direction of contour.
    ###################################
    d_ = "L"
    if (x_[p2] > x_[p2-1]) or (x_[p2] > x_[p2-2]):
        d_ = "R"

    xh = []
    yh = []

    x_2 = list(x_)
    y_2 = list(y_)

    # 3) Find side of handle
    ########################
    # Case 1:
    if (d_ == "R" and D == "L") or (d_ =="L" and D == "R"):
        if p1 > p2:
            xh = x_2[p2:p1]
            yh = y_2[p2:p1]
        else:
            xh = x_2[p2:]
            xh.extend(x_2[0:p1])
            yh = y_2[p2:]
            yh.extend(y_2[0:p1])
    # Case 2:        
    elif (d_ == "L" and D == "L") or (d_ =="R" and D == "R"):
        if p1 > p2:
            xh = x_2[p1:]
            xh.extend(x_2[0:p2])
            yh = y_2[p1:]
            yh.extend(y_2[0:p2])
        else:
            xh = x_2[p1:p2]
            yh = y_2[p1:p2]
            
    if "shoulder" in filename:
        mid = min(y_)+((max(y_) - min(y_))/2)
        yh2 = []
        xh2 = []
        for i in range(0,len(yh)):
            if yh[i] > mid:
                yh2.append(yh[i])
                xh2.append(xh[i])
        yh = yh2
        xh = xh2
        
    return xh,yh

In [None]:
def find_top_of_handle_a3(xs,ys,yh,yhs,D,filename):
    # This function finds the start of the handle on the outline contour of amphora-like pots.
    # Input: side outline contour, side handle contour and chosen side (L or R)
    # Output: index for the lowest point on the ourline contour before the handle starts.
    
    if "shoulder" not in filename:
        # 1.1 Define range on outline contour before handle starts
        ##########################################################
        lb = min(yh) - (max(yhs) - min(yhs))/5
        try:
            rng = np.where(np.array(ys)<lb)[0]
            k = 0
            # 1.2 Find point representing the start of the handle
            #####################################################
            # We assume that the handle starts after an indent in the outline contour. So we find the the last "indent"
            # in the given range.
            if D == "L":
                for i in range(1,len(rng)-1):
                    if (np.array(xs)[rng[i]] > np.array(xs)[rng[i-1]]) & (np.array(xs)[rng[i]] >= np.array(xs)[rng[i+1]]):
                        k = i
            else:
                for i in range(1,len(rng)-1):
                    if (np.array(xs)[rng[i]] < np.array(xs)[rng[i-1]]) & (np.array(xs)[rng[i]] <= np.array(xs)[rng[i+1]]):
                        k = i
            k = rng[k]
        except:
            k = np.argmin(ys)
    else:
        try:
        # 2.1 Define range on outline contour before handle starts
        ##########################################################
        # The range of shoulder-amphorae differs as their handles are always in middle of the body whilst many amphorae
        # have their handles on the neck of the pot or somewhere else on the body.
            a = np.where(ys>min(yh)-5)[0][0]
            b = np.where(ys<min(yh)-5)[0][-1]

        # 2.2 Find start of handle
        ##########################
            inds = list(range(a,b))
            k = np.argmax(np.array(ys)[inds])
            k = inds[k]
        except:
            a = np.where(ys>min(yh)-10)[0][0]
            b = np.where(ys<min(yh)-10)[0][-1]
            try:
                inds = list(range(a,b))
            except:
                inds = list(range(a-5,a+5))
            k = np.argmax(np.array(ys)[inds])
            k = inds[k]
        
        
        
    return k

In [None]:
def get_handle_start_a4(xs,ys,rng,D):
    # Locates the point before the start of the handle on cup-like pots.
    # Input: side outline contour, assumed range of the handle, and chosen side (D).
    # Output: point on the ourline contour before the handle starts.
    
    # 1) Find start of handle
    #########################
    # We assume that the handle starts after an indent, so we look for the last indent in the chosen range.
    points = []
    for i in range(rng[0]+1,rng[-1]):
        if D == "L":
            if (xs[i] > xs[i+1]) & (xs[i] >= xs[i-1]):
                points.append(i)
        else:
            if (xs[i] < xs[i+1]) & (xs[i] <= xs[i-1]):
                points.append(i)
    if len(points) == 0:
        mid = np.round(rng[0] + (rng[-1] - rng[0])/2)
        points.append(int(mid))
                
    return points[0]

In [None]:
def get_handle_end_a4(x,y,xs,ys,k,D):
    # This function locates the point after the end of the handle on cup-like pots.
    # Input: outline contour, side outline contour, index of start of handle, and chosen side (D).
    # Output: point on the ourline contour after the handle ends.
    
    # 1) Define boundary of where handle might end
    ##############################################
    # Assume that the thickness of the handle near the pot is about 16% of the total pot range.
    # Additionally, assume that the x coord for the bottom of the handle is 4% of the total pot width, 
    # away from the xcoord of the top of the handle.    
    hnd_len = 0.16*(max(ys) - min(ys))  
    pot_width = max(xs) - min(xs) 
    lb = ys[k] + hnd_len
    
    # 2) Find end of handle
    ########################
    if D == "L":
        hnd_rng = np.where((np.array(ys) >= lb) & (np.array(xs) > (xs[k] + 0.04*pot_width)))[0]
    else:
        hnd_rng = np.where((np.array(ys) >= lb) & (np.array(xs) < (xs[k] - 0.04*pot_width)))[0]


    return hnd_rng[0]

In [None]:
def get_bot_area_handle_a5(x_,y_):
    # This function finds the bottom region of the handle contour for askos-like pots.
    # Input: handle contour.
    # Output: coordinates for the bottom region of the handle contour.

    # 1) Define boundary for bottom region of handle
    ################################################
    ubt = np.max(y_)
    mn = np.min(y_)
    ubb = ubt - ((ubt - mn)/10) 

    bot = []
    # 2) Find bottom region
    #######################
    for i in range(0,len(x_)):
        if ubb <= y_[i] <= ubt:
            bot.append(i)
            
    return bot

In [None]:
def get_handle_bot_a5(x_,y_,bot):
    # This function outputs the bottom contour of the handle of askos-like pots.
    # Input: handle contour and coordiantes of bottom region.
    # Output: bottom handle contour.
    
    # 1) Define start and end of bottom contour
    ###########################################
    p1 = np.argmin(np.array(x_)[bot])
    p2 = np.argmax(np.array(x_)[bot])
    p1 = bot[p1]
    p2 = bot[p2]
    
    # 2) Determine direction of contour (clockwise or anticlockwise).
    ################################################################
    direc = "cw"
    mid = int(min(p1,p2) + (abs(p1-p2)/2))
    if (mid in bot) & (p1 < p2):
        direc = "acw"
        
    # 3) Find bottom contour of handle
    ##################################
    xh = []
    yh = []
    
    if p1 < p2:
        if direc == "cw":
            xh.extend(x_[p2:-1])
            yh.extend(y_[p2:-1])
            xh.extend(x_[0:p1])
            yh.extend(y_[0:p1])
        else:
            xh.extend(x_[p1:p2])
            yh.extend(y_[p1:p2])
    else:
        if direc == "cw":
            xh.extend(x_[p2:p1])
            yh.extend(y_[p2:p1])
        else:
            xh.extend(x_[p1:end])
            yh.extend(y_[p1:end])
            xh.extend(x_[0:p2])
            yh.extend(y_[0:p2])
    
    return xh,yh

In [None]:
def remove_handles_a6(xs,ys,D,filename):
    # This function removes handles off of kotyle-like pots (pots with thin handles sticking out close to the top).
    # Input: side outline contour and chosen side (D).
    # Output: side outline contour with handle removed.

    if ys[0] > ys[-1]:
        ys.reverse()
        xs.reverse()
    
    # 1) Define range of where handle might be located
    ##################################################
    # We assume for most pots that the handle will be in the top 30th percentile of the pot.
    if "bird" in filename: # bird cup handles are very thin and their positioning differs slightly in comparison to others.
        ub = np.round(min(ys) + (0.15*(max(ys)-min(ys))))
    else:
        ub = np.round(min(ys) + (3*(max(ys)-min(ys))/10))
        
    rng = np.where(ys<ub)[0]
    
    # 2) Find tip of handle.
    #######################
    # If we have the left-side of the outline contour, we find the left-most point in the range.
    # If we have the right-side of the outline contour, we find the right-most point in the range.
    if D == "L":
        k = np.argmin(np.array(xs)[rng])
    else:
        k = np.argmax(np.array(xs)[rng])
            
    # 3) Define range for end of handle
    ###################################
    rng2 = rng[k:]
    
    # 4) Find end of handle
    #######################
    if D == "L":
        k2 = np.argmax(np.array(xs)[rng2])
    else:
        k2 = np.argmin(np.array(xs)[rng2])
        
    # 5) Find start of handle
    #########################
    l = np.where((xs <= xs[rng2[k2]]+2) & (xs >= xs[rng2[k2]]-2))[0]
    
    if len(l) == 0:
        xsr.append()
        l = 1
    else:
        l = l[0]
        
    # 6) Remove handle from sideoutline contour
    ####################################
    xsr = []
    ysr = []
    
    xsr.extend(xs[:l])
    ysr.extend(ys[:l])
    xsr.extend(xs[rng2[k2]+2:])
    ysr.extend(ys[rng2[k2]+2:])

    
    return xsr,ysr

#### Pot Ends

In [None]:
def edit_pot_ends(x,y,xs,ys,side,filename,img):
    # This function edits the top and bottom of the side outline contour so that the contour starts and ends at the
    # centre of the pot (on the top and bottom respectively).
    # Input: original image, outline contour, side outline contour (with handles removed) and chosen side (L or R).
    # Output: side outline contour that has had its top and bottom cut or extended.
    
    # 1) Find centre of pot
    #######################
    xc,yc,coords = find_centre(x,y)
    
    # 2) Define boundary for top of pot
    ###################################
    top = np.where(np.array(ys)<min(ys)+((max(ys)-min(ys))/3))[0]
    
    # We double check whether centre found is appropriate and if not, we define the centre by computing
    # the centre of mass of the image.
    if ((side == "L") and (min(np.array(xs)[top])+5>xc)) or ((side == "R") and (min(np.array(xs)[top])-5<xc)):
        xc = ndimage.measurements.center_of_mass(img)[1]        
        
    # 3) Cut / Extend the top and bottom of pot contour
    ###################################################
    try:
        xnew = []
        ynew = []

        ymx = np.max(ys)

        if side == "unknown":
            if min(np.array(x)[coords]) < xc:
                side = "L"
            else:
                side = "R"

        if ("stamnos" in filename) or ("lebes" in filename) or ("pyxis" in filename) or ("tankard" in filename):
        # These pots usually have a handle sticking upwards the top hence why we treat them separately
            try:
                # 3.1.1. Find top region of pot
                ###############################
                # We assume that this would be region with x coordinates close to the centre of the pot.
                tnth = (np.max(x) - np.min(x))/10
                mid_coords = np.where((np.array(x) <= xc+tnth) & (np.array(x) >= xc - tnth))[0]
                top = min(y[mid_coords])

                new_coords = np.where(ys > top)[0]

                xs = np.array(xs)[new_coords]
                ys = np.array(ys)[new_coords]

                if side == "L":
                    t = np.where(np.array(xs) < xc)[0][-1]
                else:
                    t = np.where(np.array(xs) > xc)[0][-1]

                ys = ys[:t]
                xs = xs[:t]

                xnew.append(xc)
                ynew.append(top)
                xnew.extend(xs)
                ynew.extend(ys)
                xnew.append(xc)
                ynew.append(ymx)
            except:
                if side == "L":
                    coords2 = np.where(np.array(xs)<xc)[0]
                else:
                    coords2 = np.where(np.array(xs)>xc)[0]

                xnew.append(xc)
                ynew.append(np.array(ys)[coords2][0]-1)
                xnew.extend(np.array(xs)[coords2])
                ynew.extend(np.array(ys)[coords2])
                xnew.append(xc)
                ynew.append(ys[-1])
        else:

            if side == "L":
                coords2 = np.where(np.array(xs)<xc)[0]
            else:
                coords2 = np.where(np.array(xs)>xc)[0]

            xnew.append(xc)
            ynew.append(np.array(ys)[coords2][0]-1)
            xnew.extend(np.array(xs)[coords2])
            ynew.extend(np.array(ys)[coords2])
            xnew.append(xc)
            ynew.append(ys[-1])
            
    except:
        xnew = xs
        ynew = ys
    
    if side == "L":
        xnewr = [(-1*i)+2*xc for i in xnew]
    else:
        xnewr = xnew

    return xnew,ynew,xnewr

#### Smoothing

In [None]:
def smooth_side_contour(x,y,direction):
    
    y_ = deepcopy(np.round(y))
    x_ = deepcopy(np.round(x))
    
    ys = sorted(y_)
    ys = np.unique(ys)
        
    xs = []
    for i in range(0,len(ys)):
        inds = np.where(y_==ys[i])
        xy = x_[inds]
        if direction == 'R':
            xs.append(max(xy))
        else:
            xs.append(min(xy))
            
    return xs,ys

In [None]:
def final_smoothing(xcont,ycont):
    
    if ((max(xcont)-min(xcont)) > 10) & ((max(ycont)-min(ycont)) > 10):
        xcont = np.round(xcont)
        ycont = np.round(ycont)
    
    # Remove Repeated Points:
    inds = []
    inds.append(0)
    inds.append(1)
    for i in range(2,len(xcont)-2):
        if (xcont[i] != xcont[i-1]) or (ycont[i] != ycont[i-1]):
            inds.append(i)
    inds.append(len(xcont)-2)
    inds.append(len(xcont)-1)

    xcont2 = np.round(np.array(xcont)[inds])
    ycont2 = np.round(np.array(ycont)[inds])

    # Remove dips #1:
    dips = []
    non_dips = []
    non_dips.append(0)
    non_dips.append(1)
    for i in range(2,len(xcont2)-2):
        if ((xcont2[i] < xcont2[i+2]) & (xcont2[i]<xcont2[i-2])) or ((xcont2[i] > xcont2[i+2]) & (xcont2[i]>xcont2[i-2])):
            dips.append(i)
        else:
            non_dips.append(i)
    non_dips.append(len(xcont2)-2)       
    non_dips.append(len(xcont2)-1)

    xcont3 = np.array(xcont2)[non_dips]
    ycont3 = np.array(ycont2)[non_dips]

    # Remove dips #2:
    dips2 = []
    non_dips2 = []
    non_dips2.append(0)
    non_dips2.append(1)
    for i in range(2,len(xcont3)-2):
        if ((xcont3[i] < xcont3[i+1]) & (xcont3[i]<xcont3[i-1])) or ((xcont3[i] > xcont3[i+1]) & (xcont3[i]>xcont3[i-1])):
            dips2.append(i)
        else:
            non_dips2.append(i)
    non_dips2.append(len(xcont3)-2)
    non_dips2.append(len(xcont3)-1)

    xcont4 = np.array(xcont3)[non_dips2]
    ycont4 = np.array(ycont3)[non_dips2]
    
    return xcont4,ycont4

#### Contour Extraction

In [None]:
def cont_extraction_1(image_grey,hnd_side,filename):

    # 1) Get outline contour.
    contour = get_outline_contour(image_grey)
    xcont = contour[:,1]
    ycont = contour[:,0]

    # 2) Get one side of contour. 
    # "L" in the third parameter gets the left side of the pot, "R" the right side, and anything else gets
    # the shortest side.
    xs,ys,xc,yc,D = get_contour_side(xcont,ycont,hnd_side)
    
    # 3) Smooth the contour
    xsmooth,ysmooth = smooth_side_contour(xs,ys,D)
    
    # 4) Edit contour ends so that they start and end at the centre of the pot.
    x,y,xr = edit_pot_ends(xcont,ycont,xsmooth,ysmooth,D,filename,image_grey)
    
    # 5) Remove repeated points and smooth contour once more.
    x_,y_ = final_smoothing(x,y)
    
    # 6) Smooth reflected contour.
    x_r,y_r = final_smoothing(xr,y)
    
    return x_,y_,x_r,y_r    

In [None]:
def cont_extraction_2(image_grey,filename):
    
    # 1) Find outline of the pot.
    
    contour = get_outline_contour(image_grey)
    xcont = contour[:,1]
    ycont = contour[:,0]


    # 2) Get one side of contour. 
    # "L" in the third parameter gets the left side of the pot, "R" the right side, and anything else gets
    # the shortest side.
    xs,ys,xc,yc,D = get_contour_side(xcont,ycont,"Z")


    # 3) Remove handle.   
    if ("bird" in filename) or ("skyphos" in filename) or ("kotyle" in filename) or ("pithos" in filename) or ("lekanis" in filename) or ("plemochoe" in filename):
        xsr,ysr = remove_handles_a6(xs,ys,D,filename)
    else:
        xsr,ysr = remove_handles_a2(xs,ys,D,filename)


    # 4) Smooth the contour
    xsmooth,ysmooth = smooth_side_contour(xsr,ysr,D)

    # 5) Edit contour ends so that they start and end at the centre of the pot.
    x,y,xr = edit_pot_ends(xcont,ycont,xsmooth,ysmooth,D,filename,image_grey)
    
    # 6) Remove repeated points and smooth contour once more.
    x_,y_ = final_smoothing(x,y)
    
    # 7) Smooth reflected contour.
    x_r,y_r = final_smoothing(xr,y)
    
    return x_,y_,x_r,y_r    

In [None]:
def cont_extraction_3(image_grey,potclass):

    x1,y1,x2,y2,x3,y3 = get_longest_contours_a3(image_grey,potclass)

    xs,ys,xc,yc,D = get_contour_side(x1,y1,"Z")
    
    if "shoulder" not in potclass:
        xsmooth,ysmooth = smooth_side_contour(xs,ys,D)
    else:
        xsmooth = xs
        ysmooth = ys
        if ysmooth[0] > ysmooth[-1]:
            xsmooth.reverse()
            ysmooth.reverse()

    if min(x2) < min(x3):
        if D == "L":
            xh = x2
            yh = y2
        else:
            xh = x3
            yh = y3
    else:
        if D == "L":
            xh = x3
            yh = y3
        else:
            xh = x2
            yh = y2
        
    top,bot = get_top_bot_handle_a3(xh,yh)
    xhs,yhs = get_handle_side_a3(xh,yh,top,bot,D,potclass)
    
    k1 = find_top_of_handle_a3(xsmooth,ysmooth,yh,yhs,D,potclass)
    
    ub = max(yh) + (max(yhs) - min(yhs))/4
    rng2 = np.where(np.array(ysmooth)>ub)[0]
    k2 = rng2[0]
    
    p1 = min(k1,k2)
    p2 = max(k1,k2)
    
    if yhs[0] > yhs[-1]:
        yhs.reverse()
        xhs.reverse()
        
    k = np.argmax(yhs)-1
    
    xr = []
    yr = []
    if ("volute" not in potclass) & ("amphora" not in potclass) & ("nestoris" not in potclass) & ("column" not in potclass) & ("kantharos" not in potclass):
        if p1 == 0:
            xr.append(xsmooth[p1])
            yr.append(ysmooth[p1])
            xr.extend(xhs[1:k])
            yr.extend(yhs[1:k])
        else:
            xr.extend(xsmooth[:p1])
            yr.extend(ysmooth[:p1])
            xr.extend(xhs[2:k])
            yr.extend(yhs[2:k])

        xr.extend(xsmooth[p2:])
        yr.extend(ysmooth[p2:])
    else:
        if "volute" in potclass:
            v = np.argmin(yh)
            xr.append(xh[v])
            yr.append(yh[v])
            xr.extend(xhs[2:k])
            yr.extend(yhs[2:k])
            xr.extend(xsmooth[p2:])
            yr.extend(ysmooth[p2:])
        if "column" in potclass:
            if D == "L":
                m = min(xh)
            else:
                m = max(xh)
    
            v = np.argmin(yh)
            xr.append(m)
            yr.append(min(ysmooth))
            xr.append(xh[v])
            yr.append(yh[v])
            xr.extend(xhs[2:k])
            yr.extend(yhs[2:k])
            xr.extend(xsmooth[p2:])
            yr.extend(ysmooth[p2:])

        if ("kantharos" in potclass) or ("amphora" in potclass) or ("nestoris" in potclass):
            p = get_top_pot_a3(x1,y1,xc)
            t = np.where(yhs<p)[0]
            if len(t) < 1:
                t = 0
                if p1 == 0:
                    xr.append(xsmooth[p1])
                    yr.append(ysmooth[p1])
                else:
                    xr.extend(xsmooth[:p1])
                    yr.extend(ysmooth[:p1])
            else:
                t = t[-1] + 1
            
            xr.extend(xhs[t:k])
            yr.extend(yhs[t:k])
            xr.extend(xsmooth[p2:])
            yr.extend(ysmooth[p2:])    

    xsr,ysr = smooth_side_contour(xr,yr,D)            
    x,y,xr = edit_pot_ends(x1,y1,xsr,ysr,D,potclass,image_grey)
    x_,y_ = final_smoothing(x,y)
    x_r,y_r = final_smoothing(xr,y)
    return x_,y_,x_r,y_r
    

In [None]:
def cont_extraction_4(image_grey,filename):
        
    cont = get_outline_contour(image_grey)

    xcont = cont[:,1]
    ycont = cont[:,0]

    xs,ys,xc,yc,D = get_contour_side(xcont,ycont,"Z")

    lb = np.min(ycont) + (np.max(ycont) - np.min(ycont))*0.4
    ub = np.min(ycont) + (np.max(ycont) - np.min(ycont))*0.15
    
    if ys[0] > ys[-1]:
        ys.reverse()
        xs.reverse()

    rng = np.where((np.array(ys)>ub)&(np.array(ys)<lb))[0]
    
    k = get_handle_start_a4(xs,ys,rng,D)
        
    p = get_handle_end_a4(xcont,ycont,xs,ys,k,D)
    
    xr = []
    yr = []
    
    xr.extend(xs[:k])
    yr.extend(ys[:k])
    xr.extend(xs[p:])
    yr.extend(ys[p:])
    
    xrs,yrs = smooth_side_contour(xr,yr,D)
    
    x,y,xr = edit_pot_ends(xcont,ycont,xrs,yrs,D,filename,image_grey)
    
    x_,y_ = final_smoothing(x,y)
    x_r,y_r = final_smoothing(xr,y)

    return x_,y_,x_r,y_r

In [None]:
def cont_extraction_5(image_grey,filename):

    x1,y1,x2,y2 = get_longest_contours_a5(image_grey)
    
    xs,ys,xc,yc,D = get_contour_side(x1,y1,"Z")
    
    xsmooth,ysmooth = smooth_side_contour(xs,ys,D)
    
    bot = get_bot_area_handle_a5(x2,y2)
    
    xh,yh = get_handle_bot_a5(x2,y2,bot)
    
    if len(xh) < 1:
        xh = x2
        yh = y2
    
    mx = max(yh)
    
    ub = mx + (mx-min(y2))/7
    
    k2 = np.where(ysmooth > ub)[0][0]
    
    if D == "L":
        if xh[0] < xh[-1]:
            xh.reverse()
            yh.reverse()
    
    if D == "R":
        if xh[0] > xh[-1]:
            xh.reverse()
            yh.reverse()
            
    if ysmooth[0] > ysmooth[-1]:
        ysmooth.reverse()
        xsmooth.reverse()
        
    t = []
    if D == "L":
        for i in range(0,len(xh)-1):
            if (yh[i] < yh[i+1]) & (xh[i] > xh[i+1]):
                t.append(i)
    else:
        for i in range(0,len(xh)-1):
            if (yh[i] < yh[i+1]) & (xh[i] < xh[i+1]):
                t.append(i)
    
    k1 = len(xh)-1
    if len(t) != 0:            
        k1 = t[-1]
    
    xr = []
    yr = []
    
    xr.extend(xh[:k1])
    yr.extend(yh[:k1])        
    xr.extend(xsmooth[k2:])
    yr.extend(ysmooth[k2:])
    
    xsr,ysr = smooth_side_contour(xr,yr,D)            
    x,y,xr = edit_pot_ends(x1,y1,xsr,ysr,D,filename,image_grey)
    x_,y_ = final_smoothing(x,y)
    x_r,y_r = final_smoothing(xr,y)
    
    return x_,y_,x_r,y_r

#### Choosing Algorithm by Class

In [None]:
table = pd.read_csv('pot_classes.csv')

In [None]:
def choose_alg(filename):
    t = 0
    alg = 'tbc'
    foundAlg = False
    filename = re.findall("[a-z]+|[A-Z][a-z]+",filename)
        
    for j in range(0,len(table)):
        a = str(table['A1'][j])
        b = re.findall("[a-z]+|[A-Z][a-z]+",a)
        k = 1
        if len(b) > 0:
            for i in range(0,len(b)):
                if b[i] not in filename:
                    k = 0

        if k > 0:
            t = j
            alg = 'A1'
            foundAlg = True
            break
            
        
    for j in range(0,len(table)):
        if not foundAlg:
            a = str(table['A4'][j])
            b = re.findall("[a-z]+|[A-Z][a-z]+",a)
            k = 1
            if len(b) > 0:
                for i in range(0,len(b)):
                    if b[i] not in filename:
                        k = 0
            if k > 0:
                t = j
                alg = 'A4'
                foundAlg = True
                break

            
    for j in range(0,len(table)):
        if not foundAlg:
            a = str(table['A3'][j])
            b = re.findall("[a-z]+|[A-Z][a-z]+",a)
            k = 1
            if len(b) > 0:
                for i in range(0,len(b)):
                    if b[i] not in filename:
                        k = 0
            if k > 0:
                t = j
                alg = 'A3'
                foundAlg = True
                break
    
    if (t == 0) and (alg == 'tbc'):
        if ("krater" in filename) or ("cup" in filename):
            alg = 'A3'
        else:
            if "amphora" in filename:
                alg = 'A4'
            else:
                alg = 'A1'
            
                
    return t, alg       


#### Detecting Bad Contours

In [None]:
def group_extremas(x,width):
    n = len(x);
    grouped = []
    grouped.append([x[0]])
    v = 0
    for i in range(1,n):
        p = grouped[v]
        q = p[0]
        if x[i] <= (q+(2*width)):
            p.append(x[i])
            grouped[v] = p
        else:
            v = v+1
            grouped.append([x[i]])
            
    return grouped

### Extracting Contours

In [None]:
def full_extent(ax, pad=0.0):
    """Get the full extent of an axes, including axes labels, tick labels, and
    titles."""
    # For text objects, we need to draw the figure first, otherwise the extents
    # are undefined.
    ax.figure.canvas.draw()
    items = ax.get_xticklabels() + ax.get_yticklabels() 
#    items += [ax, ax.title, ax.xaxis.label, ax.yaxis.label]
    items += [ax, ax.title]
    bbox = Bbox.union([item.get_window_extent() for item in items])

    return bbox.expanded(1.0 + pad, 1.0 + pad)

#### Create Folders

In [None]:
direc_nme = 'C:\\Users\\arian\\Documents\\Conts_temp'
cont_fold = 'C:\\Users\\arian\\Documents\\GitHub\\pots\\Code\\Contours_temp'


In [None]:
for classfold in os.listdir(direc_nme):
    os.mkdir(cont_fold+"\\"+classfold)
    os.mkdir(cont_fold+"\\"+classfold+"\\Good")
    os.mkdir(cont_fold+"\\"+classfold+"\\Bad")
    os.mkdir(cont_fold+"\\"+classfold+"\\CSVs")
    for fold in os.listdir(direc_nme+"\\"+classfold):
        os.mkdir(cont_fold+"\\"+classfold+"\\Good\\"+fold)
        os.mkdir(cont_fold+"\\"+classfold+"\\Bad\\"+fold)
        os.mkdir(cont_fold+"\\"+classfold+"\\CSVs\\"+fold)

In [None]:
files = []
start_dir = direc_nme+"\\"+classfold
class_ln = len(direc_nme+"\\")+len(classfold)+1
for dir,_,_ in os.walk(start_dir):
    files.extend(glob(os.path.join(dir,pattern)))

#### Extract Contours

In [None]:
direc_nme = 'C:\\Users\\arian\\Documents\\Conts_temp'
cont_fold = 'C:\\Users\\arian\\Documents\\GitHub\\pots\\Code\\Contours_temp'
current_dir = 'C:\\Users\\arian\\Documents\\GitHub\\pots\\Code' 

a6 = "(bird|skyphos|kotyle|lekanis|plemochoe|pithos)"
a5 = "(askos|bail|ring|kernos)"

pot_errors = []

pot_tol = 0

pattern   = "*.jpg"

glob_errors = []

for classfold in tqdm(os.listdir(direc_nme)):
    files = []
    start_dir = direc_nme+"\\"+classfold
    class_ln = len(direc_nme+"\\")+len(classfold)+1
    for dir,_,_ in os.walk(start_dir):
        files.extend(glob(os.path.join(dir,pattern)))
    for pot_path in tqdm(files):
        k = re.search("([^\\\\])+\\\\pot.+",pot_path).span()
        if pot_path[k[0]:k[1]] not in all_cont_files:
            try:
                loc = re.search("\\\\pot",pot_path).span()
                filename = pot_path[loc[0]+1:]
                loc = re.search(classfold+'\\\\([^\\\\])+\\\\',pot_path).span() 
                fold = pot_path[class_ln:loc[1]-1]

                # 1) Choose Algorithm: 
                #####################
                c = choose_alg(str(fold))
                
                if "matamados" in pot_path:
                    
                    foo = Image.open(pot_path)
                    p = foo.size
                    foo = foo.resize((int(p[0]*0.4),int(p[1]*0.4)),Image.ANTIALIAS)
                    foo.save(current_dir+filename,quality=95)
                    image_col = data.load(current_dir+filename,as_gray=False)
                    try:
                        image_bw = binary_colour_matamados(image_col)
                        fig, ax = plt.subplots(figsize=(5, 7))
                        ax.imshow(image_bw)
                        ax.set_axis_off()
                        plt.savefig("pot_new.jpg",bbox_inches = 'tight',pad_inches=0.0)
                        plt.close()        
                        # 2.3) Get longest contour from new pot image.
                        img = data.load(current_dir+'\\pot_new.jpg',as_gray=True)
                    except:
                        img = data.load(current_dir+filename,as_gray=True)
                        image_col = img
                        
                else:
                    # 2) Binarize Image:
                    ###################
                    try:
                        # 2.1) Get B/W version of original image.
                        image_col = data.load(pot_path,as_gray=False)
                        image_bw = binary_colour(image_col)
                        if np.mean(image_bw) > 15:
                            # 2.2) Create a plot and temporarily save. This will be the new B/W of the pot.
                            fig, ax = plt.subplots(figsize=(5, 7))
                            ax.imshow(image_bw)
                            ax.set_axis_off()
                            plt.savefig("pot_new.jpg",bbox_inches = 'tight',pad_inches=0.0)
                            plt.close()        
                            # 2.3) Get longest contour from new pot image.
                            img = data.load(current_dir+'\\pot_new.jpg',as_gray=True)
                        else:
                            img = data.load(pot_path,as_gray=True)
                    except:
                        img = data.load(pot_path,as_gray=True)
                        image_col = img

                # 3) Find Contour:
                #################
                if c[1] == 'A1':
                    contour = get_outline_contour(img)
                    xcont = contour[:,1]
                    ycont = contour[:,0]
                    ub = min(ycont)+(max(ycont)-min(ycont))/3
                    lb = min(ycont)
                    coords2 = np.where((np.array(ycont)>lb)&(np.array(ycont)<ub))[0]
                    if len(coords2) == 0:
                        coords2 = list(range(0,len(xcont)))
                    mn = min(np.array(xcont)[coords2])
                    mx = max(np.array(xcont)[coords2])
                    try:
                        xc,yc,coords = find_centre(xcont,ycont)
                    except:
                        xc = mn + ((mx-mn)/2)
                        yc = lb + ((ub-lb)/2)

                    if abs(xc-mn) < abs(mx-xc):
                        alg = 'A1'
                        try:
                            xnr,ynr,x,y = cont_extraction_1(img,"L",fold)
                        except:
                            try:
                                xnr,ynr,x,y = cont_extraction_1(img,"R",fold)
                                alg = 'A1-A2'
                            except:
                                xnr = [0,0,0]
                                ynr = [0,0,0]
                                x = xnr
                                y = ynr
                                alg = 'A1-A2'
                                pot_errors.append(str(fold)+"-"+str(filename)+"-"+str(alg))
                    else:
                        alg = 'A2'
                        try:
                            xnr,ynr,x,y = cont_extraction_1(img,"R",fold)
                        except:
                            try:
                                xnr,ynr,x,y = cont_extraction_1(img,"L",fold)
                                alg = 'A2-A1'
                            except:
                                xnr = [0,0,0]
                                ynr = [0,0,0]
                                x = xnr
                                y = ynr
                                alg = 'A2-A1'
                                pot_errors.append(str(fold)+"-"+str(filename)+"-"+str(alg))

                if c[1] == 'A3':
                    alg = 'A3'
                    if ("cup" not in fold) or ("bowl" not in fold) or (re.search(a6,fold)):
                        try:
                            xnr,ynr,x,y = cont_extraction_2(img,fold)
                        except:
                            try:
                                xnr,ynr,x,y = cont_extraction_4(img,fold)
                                alg = 'A2-A2cup'
                            except:
                                xnr = [0,0,0]
                                ynr = [0,0,0]
                                x = xnr
                                y = ynr
                                alg = 'A2-A2cup'
                                pot_errors.append(str(fold)+"-"+str(filename)+"-"+str(alg))

                    else:
                        try:
                            xnr,ynr,x,y = cont_extraction_4(img,fold)
                        except:
                            try:
                                xnr,ynr,x,y = cont_extraction_2(img,fold)
                                alg = 'A2cup-A2'
                            except: 
                                xnr = [0,0,0]
                                ynr = [0,0,0]
                                x = xnr
                                y = ynr
                                alg = 'A2cup-A2'
                                pot_errors.append(str(fold)+"-"+str(filename)+"-"+str(alg))

                if c[1] == 'A4':
                    alg = 'A4'
                    if re.search(a5,fold):
                        try:
                            xnr,ynr,x,y = cont_extraction_5(img,fold)
                        except:
                            xnr = [0,0,0]
                            ynr = [0,0,0]
                            x = xnr
                            y = ynr
                            pot_errors.append(str(fold)+"-"+str(filename)+"-"+str(alg))
                    else:
                        try:
                            xnr,ynr,x,y = cont_extraction_3(img,fold)
                        except:
                            try:
                                xnr,ynr,x,y = cont_extraction_2(img,fold)
                                alg = 'A4-A3'
                            except:
                                xnr = [0,0,0]
                                ynr = [0,0,0]
                                x = xnr
                                y = ynr
                                alg = 'A4-A3'
                                pot_errors.append(str(fold)+"-"+str(filename)+"-"+str(alg))      


                # 4) Create Plot:  
                ################

                # Create Plot:  \n",
                rng1 = len(image_col)
                rng2 = len(img)
                t = rng1/rng2
                x_ = deepcopy(xnr)
                y_ = deepcopy(ynr)
                if sum(x) > 0:
                    x_ = x_*t
                    y_ = y_*t
                fig, ax = plt.subplots(figsize=(5, 7))
                plt.gray()
                ax.imshow(image_col,cmap='gray')
                ax.plot(x_,y_,'-r')
                ax.set_axis_off()

                # 5) Detect Bad Contours:
                ########################

                pix_mn = np.mean(image_bw)

                if pix_mn < 55:    
                    plt.title(alg+' - '+str(pix_mn))
                    plt.savefig(cont_fold+"\\"+classfold+"\\Bad\\"+fold+"\\"+filename[:-4]+".png")
                    plt.close()
                else:
                    xprime = np.diff(x)
                    T = [];
                    for j in range(0,len(y)-1):
                        T.append((y[j+1]+y[j])/2)
                    xprime_ = abs(xprime)
                    extrm = []
                    for i in range(0,len(xprime)):
                        if xprime_[i] >= 10:
                            extrm.append(T[i])
                    if len(extrm) > 0:
                        grouped = group_extremas(extrm,5)
                        b = len(grouped)
                    else:
                        b = 1000

                    a = len(x)
                    c = np.max(xprime_)
                    d = sum(xprime_)

                    plt.title(alg+' - '+str(pix_mn)+ ' - '+str([a,b,c,d]))

                    if ("cup" not in fold) and ("lekanis" not in fold) and ("lydion" not in fold) and ("plate" not in fold):
                        if (a<65) or (b>4) or (c>140) or (d>400):
                            plt.savefig(cont_fold+"\\"+classfold+"\\Bad\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                        else:
                            plt.savefig(cont_fold+"\\"+classfold+"\\Good\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                    if "cup" in fold:
                        if (a<55) or (b>4) or (c>140) or (d>400):
                            plt.savefig(cont_fold+"\\"+classfold+"\\Bad\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                        else:
                            plt.savefig(cont_fold+"\\"+classfold+"\\Good\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                    if ("lekanis" in fold) or ("plate" in fold):
                        if (a<40) or (b>4) or (c>140) or (d>400):
                            plt.savefig(cont_fold+"\\"+classfold+"\\Bad\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                        else:
                            plt.savefig(cont_fold+"\\"+classfold+"\\Good\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                    if "lydion" in fold:
                        if (a<65) or (b>4) or (c>140) or (d>500):
                            plt.savefig(cont_fold+"\\"+classfold+"\\Bad\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()
                        else:
                            plt.savefig(cont_fold+"\\"+classfold+"\\Good\\"+fold+"\\"+filename[:-4]+".png")
                            plt.close()


                # Save CSV:
                ###########
                cont_coord = {}
                cont_coord['x'] = x
                cont_coord['y'] = y
                with open(cont_fold+"\\"+classfold+"\\CSVs\\"+fold+"\\"+filename[:-4]+".csv", "w", newline='') as outfile:
                    writer = csv.writer(outfile)
                    writer.writerow(cont_coord.keys())
                    writer.writerows(it.zip_longest(*cont_coord.values()))

                pot_tol = pot_tol +1
            except:
                plt.close
                glob_errors.append(pot_path) 
            


with open("alg_errors_v4.txt", "w") as output:
    output.write(str(pot_errors))
           
print('\nSuccessful Contour Extractions: '+str(pot_tol))
print('\nUnsuccessful Contour Extractions: '+str(len(pot_errors)))

In [None]:
pattern   = "*.csv"
cont_files = []
for classfold in tqdm(os.listdir(cont_fold)):
    start_dir = cont_fold+"\\"+classfold
    for dir,_,_ in os.walk(start_dir):
        cont_files.extend(glob(os.path.join(dir,pattern)))
        
all_cont_files = []
for i in range(0,len(cont_files)):
    nme = cont_files[i][len('C:\\Users\\arian\\Documents\\GitHub\\pots\\Code'):]
    nme = nme.replace("csv","jpg")
    k = re.search("([^\\\\])+\\\\pot.+",nme).span()
    all_cont_files.append(nme[k[0]:k[1]])
    