### Plan:

A1 - A6, are the previous subalgorithms that we worked with.
- A1: Pots with no or one handle only, e.g. Alabastron.
- A2: Pots with two handles that stick out, e.g. Bell-Kraters.
- A3: Pots with two handles that loop back on to the pot, e.g. Amphorae
- A4: Cups, e.g. Little Master Band Cups.
- A5: Pots with one handle right at the top of the pot, e.g. Askos.
- A6: Pots (including cups) that have two handles sticking out, right at the top, e.g. Kotyles.

We have sorted/changed these into the following four algorithms.

##### Algorithm 1:
- Uses the previous (A1,"left").
- Usually works well for pots with only one handle on the right, or pots with no handle.

##### Algorithm 1:
- Uses the previous (A1,"right").
- Usually works well for pots with only one handle on the left, or pots with no handle.

##### Algorithm 3:
- Uses the previous A2, A4 and A6.
- Usually works well for cups and all vases with a handle sticking out on each side.

##### Algorithm 4:
- Uses the previous A3 and A5.
- Usually works well for pots with handles on each side that loop round to the pot (amphora-like handles) or pots with one handle on the top of the pot (e.g. askos vases).

### Functions

In [2]:
import matplotlib.pyplot as plt 
import numpy as np
import pylab as pl
import pandas as pd
from math import sqrt
from skimage.filters import threshold_otsu
from skimage import measure
import os
import re
import itertools as it
import csv
from tqdm import tqdm_notebook as tqdm
from PIL import Image
from skimage.color import rgb2gray
from skimage.filters import gaussian
from skimage.segmentation import active_contour
from skimage import data, img_as_float
from copy import deepcopy
from matplotlib.transforms import Bbox
from skimage.segmentation import (morphological_chan_vese,
                                  morphological_geodesic_active_contour,
                                  inverse_gaussian_gradient,
                                  checkerboard_level_set)
pl.ion()

#### Image Binarization

In [3]:
def binary_colour(img):
    x_len = len(img[0])
    y_len = len(img)
    img2 = deepcopy(img)
    k = 1
    
    t1 = 185 #black #used to be 190. 
    t2 = 215 #white
    
    if np.mean(img) < 110:
        t1 = 110 #used to be 121 in c4
        t2 = 120
    
    if isinstance(img[1][2], np.uint8) == True:
        k = 0;

    if k == 1:
        for i in range(0,y_len):
            for j in range(0,x_len):
                if (img[i][j][0] < t1) and (img[i][j][1] < t1) and (img[i][j][2] < t1): 
                    img2[i][j] = [0,0,0]

                if (img[i][j][0] > t2) and (img[i][j][1] > t2) and (img[i][j][2] > t2): 
                    img2[i][j] = [255,255,255]
    else:
        for i in range(0,y_len):
            for j in range(0,x_len):
                if (img[i][j] < t1):
                    img2[i][j] = 0

                if (img[i][j] > t2):
                    img2[i][j] = 255
    
    for i in range(1,y_len-1):
        for j in range(1,x_len-1):
            if ((np.sum(img2[i+1][j+1]) == 0) & (np.sum(img2[i-1][j-1]) == 0)) or ((np.sum(img2[i+1][j]) == 0) & (np.sum(img2[i-1][j]) == 0)) or ((np.sum(img2[i][j+1]) == 0) & (np.sum(img2[i][j-1]) == 0)) or ((np.sum(img2[i+1][j-1]) == 0) & (np.sum(img2[i-1][j+1]) == 0)):
                if k == 1:
                    img2[i][j] = [0,0,0]
                else:
                    img2[i][j] = 0

    by = len(img2)
    bx = len(img2[1])
    by = int(np.ceil(by*0.01))
    bx = int(np.ceil(bx*0.01))

    for i in range(by,y_len-by):
        for j in range(bx,x_len-bx):
            if ((np.sum(img2[i+by][j]) == 0) & (np.sum(img2[i-by][j]) == 0)) or ((np.sum(img2[i][j+bx]) == 0) & (np.sum(img2[i][j-bx]) == 0)):
                if k == 1:
                    img2[i][j] = [0,0,0]
                else:
                    img2[i][j] = 0

    return img2

#### Outline Contour

In [4]:
def get_outline_contour(img):
    
    # Get all contours:
    thresh = threshold_otsu(img)
    binary = img > thresh
    cont = measure.find_contours(binary, 0.8)
    
    # Find longest contour:
    cont_ln = []
    for n, contour in enumerate(cont):
        cont_ln.append(len(contour))

    longest_c = sorted(cont_ln,reverse=True)[:10] #used to be 20 for c1 and 3 for c4
    long_ind = []
    for i in range(0,len(cont_ln)):
        if cont_ln[i] in longest_c:
            long_ind.append(i)
            
    k = np.argmax(cont_ln)
            
    return cont[k]

In [5]:
def get_longest_contours_a3(img,filename):
    
    thresh = threshold_otsu(img)
    binary = img > thresh
    cont = measure.find_contours(binary, 0.8)

    # Find longest contour:
    cont_ln = []
    for n, contour in enumerate(cont):
        cont_ln.append(len(contour))

    if "shoulder" in filename:
        longest_c = sorted(cont_ln,reverse=True)[:20]
    else:
        longest_c = sorted(cont_ln,reverse=True)[:10]
        
    long_ind = []
    long_ind_rngsx = []
    long_ind_rngsy = []
    long_ind_mx = []
    long_ind_mn = []
    long_ind_mn_x = []
    long_ind_mx_x = []


    for i in range(0,len(cont_ln)):
        if cont_ln[i] in longest_c:
            long_ind.append(i)
            c = cont[i]
            x = c[:,1]
            y = c[:,0]
            rngy = abs(max(y) - min(y))
            rngx = abs(max(x) - min(x))
            long_ind_rngsy.append(rngy)
            long_ind_rngsx.append(rngx)
            long_ind_mx.append(max(y))
            long_ind_mn.append(min(y))
            long_ind_mx_x.append(max(x))
            long_ind_mn_x.append(min(x))
            

    n = len(long_ind)
    
    mn = min(long_ind_mn)
    
    mx = max(long_ind_rngsy)
        
    k1 = np.argmax(long_ind_rngsy)
    
    ub = long_ind_mn[k1] + (mx/4)
    
    inds = list(range(0,n))
    inds.remove(k1)    
    k = (inds[0],inds[1])
    tot = 1000
    
    rng_inds = []
    for i in range(0,n):
        if all([long_ind_rngsy[i] > mx/8,long_ind_mn[i] < ub,long_ind_mx[i] > mn, i!=k1]):
            rng_inds.append(i)
            
    if len(rng_inds) >=2:
        t = 8
    else:
        t = 9
    
    k1 = long_ind[k1]
    c1 = cont[k1]
    x1 = c1[:,1]
    y1 = c1[:,0]
    
    egth = min(x1) + ((max(x1) - min(x1))/8)
    egth2 = max(x1) - ((max(x1) - min(x1))/8)
    thrd = min(y1) + ((max(y1) - min(y1))/3)
    thrd2 = min(y1) + (2*(max(y1) - min(y1))/3)
    
    if "shoulder" in filename:
        for i in range(0,n):
            if (long_ind_mn_x[i] < egth) or (long_ind_mx_x[i] > egth2):
                if all([long_ind_mx[i] < thrd2,long_ind_mn[i] > thrd]):
                    for j in range(i,n):
                        if (long_ind_mn_x[i] < egth) or (long_ind_mx_x[i] > egth2):
                            if all([long_ind_mx[i] < thrd2,long_ind_mn[i] > thrd, i!=j, j!=k1]):
                                d1 = abs(long_ind_rngsy[i] - long_ind_rngsy[j])
                                d2 = abs(long_ind_rngsx[i] - long_ind_rngsx[j])
                                d3 = abs(long_ind_mx[i] - long_ind_mx[j])
                                sm = d1+d2+d3
                                if sm < tot:
                                    tot = sm
                                    k = (i,j)
    else:
        for i in range(0,n):
            if all([long_ind_rngsy[i] > mx/t,long_ind_mn[i] < ub,long_ind_mx[i] > mn, i!=k1]):
                for j in range(i,n):
                    if all([long_ind_rngsy[j] > mx/t, long_ind_mn[j] < ub, long_ind_mn[j] > mn, i!=j, j!=k1]):
                        d1 = abs(long_ind_rngsy[i] - long_ind_rngsy[j])
                        d2 = abs(long_ind_rngsx[i] - long_ind_rngsx[j])
                        d3 = abs(long_ind_mx[i] - long_ind_mx[j])
                        sm = d1+d2+d3
                        if sm < tot:
                            tot = sm
                            k = (i,j)

    
    k2 = long_ind[k[0]]
    k3 = long_ind[k[1]]

    c2 = cont[k2]
    x2 = c2[:,1]
    y2 = c2[:,0]

    c3 = cont[k3]
    x3 = c3[:,1]
    y3 = c3[:,0]
    
    return x1,y1,x2,y2,x3,y3

In [6]:
def get_longest_contours_a5(img):
    
    thresh = threshold_otsu(img)
    binary = img > thresh
    cont = measure.find_contours(binary, 0.8)

    # Find longest contour:
    cont_ln = []
    for n, contour in enumerate(cont):
        cont_ln.append(len(contour))

    longest_c = sorted(cont_ln,reverse=True)[:10]
    long_ind = []
    long_ind_rngsx = []
    long_ind_rngsy = []
    long_ind_mx = []
    long_ind_mn = []


    for i in range(0,len(cont_ln)):
        if cont_ln[i] in longest_c:
            long_ind.append(i)
            c = cont[i]
            x = c[:,1]
            y = c[:,0]
            rngy = abs(max(y) - min(y))
            rngx = abs(max(x) - min(x))
            long_ind_rngsy.append(rngy)
            long_ind_rngsx.append(rngx)
            long_ind_mx.append(max(y))
            long_ind_mn.append(min(y))

    n = len(long_ind)
    
    mn = min(long_ind_mn)
    
    mx = max(long_ind_rngsy)
        
    k1 = np.argmax(long_ind_rngsy)
    
    ub = long_ind_mn[k1] + (long_ind_rngsy[k1]/3)
    
    inds = []
    for i in range(0,n):
        if (long_ind_mn[i] < ub) & (i != k1):
            inds.append(i)
    
    k2 = np.argmax(np.array([long_ind_rngsy[i]+long_ind_rngsx[i] for i in range(len(long_ind_rngsy))])[inds])
    k2 = inds[k2]

    
    k1 = long_ind[k1]
    k2 = long_ind[k2]


    c1 = cont[k1]
    x1 = c1[:,1]
    y1 = c1[:,0]

    c2 = cont[k2]
    x2 = c2[:,1]
    y2 = c2[:,0]

    
    return x1,y1,x2,y2

In [7]:
def get_contour_side(X,Y,D):
    
    xc,yc,coords = find_centre(X,Y)    
    
    tnth = min(X) + (max(X)-min(X))/10
    lb = xc - tnth
    ub = xc + tnth
    rng = []
    
    rng = np.where((X >= lb) & (X <=ub))[0]
    
    ktop = np.argmax(Y[rng])
    top_pnt = rng[ktop]
    
    kbot = np.argmin(Y[rng])
    bot_pnt = rng[kbot]

    mx_pnt = max(top_pnt,bot_pnt)
    mn_pnt = min(top_pnt,bot_pnt)
        
    xs1 = list(X[mn_pnt:mx_pnt])
    ys1 = list(Y[mn_pnt:mx_pnt])

    xs2 = list(X)[mx_pnt:] + list(X)[:mn_pnt] 
    ys2 = list(Y)[mx_pnt:] + list(Y)[:mn_pnt] 
    
    if D == "R":
        xs = xs2
        ys = ys2
    else:
        if D == "L":
            xs = xs1
            ys = ys1
        else:
            if min(xs1) > min(xs2):
                D1 = "R"
                D2 = "L"
            else:
                D1 = "L"
                D2 = "R"

            if len(ys1) >= len(ys2):
                ys = ys2
                xs = xs2
                D = D2
            else:
                ys = ys1
                xs = xs1
                D = D1


    return xs,ys,xc,yc,D

#### Centers

In [8]:
def find_centre(x,y):
    yc = min(y)+(max(y)-min(y))/2
    tnth = max(y)-(max(y)-min(y))/10
    coords = np.where((np.array(y)<tnth) & (np.array(y)>yc)) [0]
    xc = min(np.array(x)[coords])+(max(np.array(x)[coords])-min(np.array(x)[coords]))/2
    return xc,yc,coords

In [9]:
def get_top_pot_a3(x,y,xc):
    
    rng = np.where((x <= xc+2) & (x >= xc-2))[0]
    if len(rng) < 1:
        k = np.min(y)
    else:
        k = np.argmin(np.array(y)[rng])
        k = y[rng[k]]
    
    return k

#### Handles

In [10]:
def remove_handles_a2(xs,ys,D,filename):

    if ys[0] > ys[-1]:
        ys.reverse()
        xs.reverse()
        
    lb = min(ys) + ((max(ys) - min(ys))*0.1)
        
    ub = max(ys) - ((max(ys) - min(ys))*0.2)
        
    rng = np.where((ys<ub) & (ys > lb))[0]
    
    if D == "L":
        k = np.argmin(np.array(xs)[rng])
    else:
        k = np.argmax(np.array(xs)[rng])
        
    k1 = np.argmax(np.array(ys)[rng][:k-2])
        
    k2 = np.where((np.array(xs)[rng[k:]] <= xs[rng[k1]] + 1) & (np.array(xs)[rng[k:]] >= xs[rng[k1]] - 1))[0]
    
    if len(k2) < 1:
        k2 = len(rng) -1
    else:
        k2 = k2[0] + k
        
    xsr = []
    ysr = []
    
    t1 = np.where(ys < lb)[0]
    t1 = t1[-1]
    
    t2 = np.where(ys < ub)[0]
    t2 = t2[-1]
    
    xsr.extend(xs[:t1])
    ysr.extend(ys[:t1])
    xsr.extend(np.array(xs)[rng[:k1]])
    ysr.extend(np.array(ys)[rng[:k1]])
    xsr.extend(np.array(xs)[rng[k2:]])
    ysr.extend(np.array(ys)[rng[k2:]])
    xsr.extend(xs[t2:])
    ysr.extend(ys[t2:])
    
    return xsr,ysr
    

In [11]:
def get_top_bot_handle_a3(x_,y_):

    mx = np.argmax(y_)
    mn = np.argmin(y_)
    ubt = np.max(y_)
    lbb = np.min(y_)
    ubb = ubt - ((ubt - lbb)/10) #used to be /10
    lbt = lbb + ((ubt - lbb)/10)

    bot = []
    top = []

    for i in range(0,len(x_)):
        if lbb <= y_[i] <= lbt:
            bot.append(i)
        if ubb <= y_[i] <= ubt:
            top.append(i)
    return top,bot

In [12]:
def get_handle_side_a3(x_,y_,top,bot,D):
    if D == "L":
        p1 = np.argmin(x_[top])
        p2 = np.argmax(x_[bot])
    else:
        p1 = np.argmax(x_[top])
        p2 = np.argmin(x_[bot])

    p1 = top[p1]
    p2 = bot[p2]

    # Determine direction of contour.
    d_ = "L"
    if (x_[p2] > x_[p2-1]) or (x_[p2] > x_[p2-2]):
        d_ = "R"

    xh = []
    yh = []

    x_2 = list(x_)
    y_2 = list(y_)

    # Case 1:
    if (d_ == "R" and D == "L") or (d_ =="L" and D == "R"):
        if p1 > p2:
            xh = x_2[p2:p1]
            yh = y_2[p2:p1]
        else:
            xh = x_2[p2:]
            xh.extend(x_2[0:p1])
            yh = y_2[p2:]
            yh.extend(y_2[0:p1])
    # Case 2:        
    elif (d_ == "L" and D == "L") or (d_ =="R" and D == "R"):
        if p1 > p2:
            xh = x_2[p1:]
            xh.extend(x_2[0:p2])
            yh = y_2[p1:]
            yh.extend(y_2[0:p2])
        else:
            xh = x_2[p1:p2]
            yh = y_2[p1:p2]
    
    return xh,yh

In [13]:
def find_top_of_handle_a3(xs,ys,yh,yhs,D):
    lb = min(yh) - (max(yhs) - min(yhs))/5
    try:
        rng = np.where(np.array(ys)<lb)[0]
        k = 0
        if D == "L":
            for i in range(1,len(rng)-1):
                if (np.array(xs)[rng[i]] > np.array(xs)[rng[i-1]]) & (np.array(xs)[rng[i]] >= np.array(xs)[rng[i+1]]):
                    k = i
        else:
            for i in range(1,len(rng)-1):
                if (np.array(xs)[rng[i]] < np.array(xs)[rng[i-1]]) & (np.array(xs)[rng[i]] <= np.array(xs)[rng[i+1]]):
                    k = i
        k = rng[k]
    except:
        k = np.argmin(ys)
        
    return k

In [14]:
def get_handle_start_a4(xs,ys,rng,D):
    points = []
    for i in range(rng[0]+1,rng[-1]):
        if D == "L":
            if (xs[i] > xs[i+1]) & (xs[i] >= xs[i-1]):
                points.append(i)
        else:
            if (xs[i] < xs[i+1]) & (xs[i] <= xs[i-1]):
                points.append(i)
    if len(points) == 0:
        mid = np.round(rng[0] + (rng[-1] - rng[0])/2)
        points.append(int(mid))
                
    return points[0]

In [15]:
def get_handle_end_a4(x,y,xs,ys,k,D):
    # Assume that the thickness of the handle near the pot is about 18% of the total pot range.
    # Assume that the x coord for the bottom of the handle is 4% of the total pot width, away from the xcoord of the top
    # of the handle.
    
    hnd_len = 0.16*(max(ys) - min(ys))
    
    pot_width = max(xs) - min(xs)
    
    lb = ys[k] + hnd_len
    
    if D == "L":
        hnd_rng = np.where((np.array(ys) >= lb) & (np.array(xs) > (xs[k] + 0.04*pot_width)))[0]
    else:
        hnd_rng = np.where((np.array(ys) >= lb) & (np.array(xs) < (xs[k] - 0.04*pot_width)))[0]


    return hnd_rng[0]

In [16]:
def get_bot_area_handle_a5(x_,y_):

    ubt = np.max(y_)
    mn = np.min(y_)
    ubb = ubt - ((ubt - mn)/10) 

    bot = []

    for i in range(0,len(x_)):
        if ubb <= y_[i] <= ubt:
            bot.append(i)
            
    return bot

In [17]:
def get_handle_bot_a5(x_,y_,bot):


    p1 = np.argmin(np.array(x_)[bot])
    p2 = np.argmax(np.array(x_)[bot])
    p1 = bot[p1]
    p2 = bot[p2]
    

    # Determine direction of contour (clockwise or anticlockwise).
    direc = "cw"
    mid = int(min(p1,p2) + (abs(p1-p2)/2))
    if (mid in bot) & (p1 < p2):
        direc = "acw"
        
    
    xh = []
    yh = []
    
    
    if p1 < p2:
        if direc == "cw":
            xh.extend(x_[p2:-1])
            yh.extend(y_[p2:-1])
            xh.extend(x_[0:p1])
            yh.extend(y_[0:p1])
        else:
            xh.extend(x_[p1:p2])
            yh.extend(y_[p1:p2])
    else:
        if direc == "cw":
            xh.extend(x_[p2:p1])
            yh.extend(y_[p2:p1])
        else:
            xh.extend(x_[p1:end])
            yh.extend(y_[p1:end])
            xh.extend(x_[0:p2])
            yh.extend(y_[0:p2])
    
    return xh,yh

In [18]:
def remove_handles_a6(xs,ys,D,filename):

    if ys[0] > ys[-1]:
        ys.reverse()
        xs.reverse()
    
    if "bird" in filename: # bird cup handles are very thin and their positioning differs slightly in comparison to others.
        ub = np.round(min(ys) + (0.15*(max(ys)-min(ys))))
    else:
        ub = np.round(min(ys) + (3*(max(ys)-min(ys))/10))
        
    rng = np.where(ys<ub)[0]
    
    if D == "L":
        k = np.argmin(np.array(xs)[rng])
    else:
        k = np.argmax(np.array(xs)[rng])
            
    rng2 = rng[k:]
    
    if D == "L":
        k2 = np.argmax(np.array(xs)[rng2])
    else:
        k2 = np.argmin(np.array(xs)[rng2])
                
    l = np.where((xs <= xs[rng2[k2]]+2) & (xs >= xs[rng2[k2]]-2))[0]
    
    if len(l) == 0:
        xsr.append()
        l = 1
    else:
        l = l[0]
        
    xsr = []
    ysr = []
    
    xsr.extend(xs[:l])
    ysr.extend(ys[:l])
    xsr.extend(xs[rng2[k2]+2:])
    ysr.extend(ys[rng2[k2]+2:])

    
    return xsr,ysr

#### Pot Ends

In [19]:
def edit_pot_ends(x,y,xs,ys,side,filename):
    
    xc,yc,coords = find_centre(x,y)
    
    xnew = []
    ynew = []
    
    ymx = np.max(ys)
    
    if side == "unknown":
        if min(np.array(x)[coords]) < xc:
            side = "L"
        else:
            side = "R"
    
    if ("stamnos" in filename) or ("lebes" in filename) or ("pyxis" in filename) or ("tankard" in filename):
        tnth = (np.max(x) - np.min(x))/10
        mid_coords = np.where((np.array(x) <= xc+tnth) & (np.array(x) >= xc - tnth))[0]
        top = min(y[mid_coords])
            
        new_coords = np.where(ys > top)[0]
            
        xs = np.array(xs)[new_coords]
        ys = np.array(ys)[new_coords]
        
        if side == "L":
            t = np.where(np.array(xs) < xc)[0][-1]
        else:
            t = np.where(np.array(xs) > xc)[0][-1]
            
        ys = ys[:t]
        xs = xs[:t]
        
        xnew.append(xc)
        ynew.append(top)
        xnew.extend(xs)
        ynew.extend(ys)
        xnew.append(xc)
        ynew.append(ymx)
    else:

        if side == "L":
            coords2 = np.where(np.array(xs)<xc)[0]
        else:
            coords2 = np.where(np.array(xs)>xc)[0]

        xnew.append(xc)
        ynew.append(ys[0])
        xnew.extend(np.array(xs)[coords2])
        ynew.extend(np.array(ys)[coords2])
        xnew.append(xc)
        ynew.append(ys[-1])
    
    if side == "L":
        xnew = [(-1*i)+2*xc for i in xnew]

    return xnew,ynew

#### Smoothing

In [20]:
def smooth_side_contour(x,y,direction):
    
    y_ = deepcopy(np.round(y))
    x_ = deepcopy(np.round(x))
    
    ys = sorted(y_)
    ys = np.unique(ys)
        
    xs = []
    for i in range(0,len(ys)):
        inds = np.where(y_==ys[i])
        xy = x_[inds]
        if direction == 'R':
            xs.append(max(xy))
        else:
            xs.append(min(xy))
            
    return xs,ys

In [21]:
def final_smoothing(xcont,ycont):
    
    if ((max(xcont)-min(xcont)) > 10) & ((max(ycont)-min(ycont)) > 10):
        xcont = np.round(xcont)
        ycont = np.round(ycont)
    
    # Remove Repeated Points:
    inds = []
    inds.append(0)
    inds.append(1)
    for i in range(2,len(xcont)-2):
        if (xcont[i] != xcont[i-1]) or (ycont[i] != ycont[i-1]):
            inds.append(i)
    inds.append(len(xcont)-2)
    inds.append(len(xcont)-1)

    xcont2 = np.round(np.array(xcont)[inds])
    ycont2 = np.round(np.array(ycont)[inds])

    # Remove dips #1:
    dips = []
    non_dips = []
    non_dips.append(0)
    non_dips.append(1)
    for i in range(2,len(xcont2)-2):
        if ((xcont2[i] < xcont2[i+2]) & (xcont2[i]<xcont2[i-2])) or ((xcont2[i] > xcont2[i+2]) & (xcont2[i]>xcont2[i-2])):
            dips.append(i)
        else:
            non_dips.append(i)
    non_dips.append(len(xcont2)-2)       
    non_dips.append(len(xcont2)-1)

    xcont3 = np.array(xcont2)[non_dips]
    ycont3 = np.array(ycont2)[non_dips]

    # Remove dips #2:
    dips2 = []
    non_dips2 = []
    non_dips2.append(0)
    non_dips2.append(1)
    for i in range(2,len(xcont3)-2):
        if ((xcont3[i] < xcont3[i+1]) & (xcont3[i]<xcont3[i-1])) or ((xcont3[i] > xcont3[i+1]) & (xcont3[i]>xcont3[i-1])):
            dips2.append(i)
        else:
            non_dips2.append(i)
    non_dips2.append(len(xcont3)-2)
    non_dips2.append(len(xcont3)-1)

    xcont4 = np.array(xcont3)[non_dips2]
    ycont4 = np.array(ycont3)[non_dips2]
    
    return xcont4,ycont4

#### Contour Extraction

In [22]:
def cont_extraction_1(image_grey,hnd_side,filename):

    # 1) Get outline contour.
    contour = get_outline_contour(image_grey)
    xcont = contour[:,1]
    ycont = contour[:,0]

    # 2) Get one side of contour. 
    # "L" in the third parameter gets the left side of the pot, "R" the right side, and anything else gets
    # the shortest side.
    xs,ys,xc,yc,D = get_contour_side(xcont,ycont,hnd_side)
    
    # 3) Smooth the contour
    xsmooth,ysmooth = smooth_side_contour(xs,ys,D)
    
    # 4) Edit contour ends so that they start and end at the centre of the pot.
    x,y = edit_pot_ends(xcont,ycont,xsmooth,ysmooth,D,filename)
    
    # 5) Remove repeated points and smooth contour once more.
    x_,y_ = final_smoothing(x,y)
    
    return x_,y_    

In [23]:
def cont_extraction_2(image_grey,filename):
    
    # 1) Find outline of the pot.
    
    contour = get_outline_contour(image_grey)
    xcont = contour[:,1]
    ycont = contour[:,0]


    # 2) Get one side of contour. 
    # "L" in the third parameter gets the left side of the pot, "R" the right side, and anything else gets
    # the shortest side.
    xs,ys,xc,yc,D = get_contour_side(xcont,ycont,"Z")


    # 3) Remove handle.   
    if ("bird" in filename) or ("skyphos" in filename) or ("kotyle" in filename) or ("lekanis" in filename) or ("plemochoe" in filename):
        xsr,ysr = remove_handles_a6(xs,ys,D,filename)
    else:
        xsr,ysr = remove_handles_a2(xs,ys,D,filename)


    # 4) Smooth the contour
    xsmooth,ysmooth = smooth_side_contour(xsr,ysr,D)

    # 5) Edit contour ends so that they start and end at the centre of the pot.
    x,y = edit_pot_ends(xcont,ycont,xsmooth,ysmooth,D,filename)
    
    # 6) Final smoothing.
    x_,y_ = final_smoothing(x,y)

    return x_,y_    

In [24]:
def cont_extraction_3(image_grey,potclass):

    x1,y1,x2,y2,x3,y3 = get_longest_contours_a3(image_grey,potclass)

    xs,ys,xc,yc,D = get_contour_side(x1,y1,"Z")
    
    xsmooth,ysmooth = smooth_side_contour(xs,ys,D)

    if min(x2) < min(x3):
        if D == "L":
            xh = x2
            yh = y2
        else:
            xh = x3
            yh = y3
    else:
        if D == "L":
            xh = x3
            yh = y3
        else:
            xh = x2
            yh = y2
        
    top,bot = get_top_bot_handle_a3(xh,yh)
    xhs,yhs = get_handle_side_a3(xh,yh,top,bot,D)
    
    k1 = find_top_of_handle_a3(xsmooth,ysmooth,yh,yhs,D)
    
    ub = max(yh) + (max(yhs) - min(yhs))/4
    rng2 = np.where(np.array(ysmooth)>ub)[0]
    k2 = rng2[0]
    
    p1 = min(k1,k2)
    p2 = max(k1,k2)
    
    if yhs[0] > yhs[-1]:
        yhs.reverse()
        xhs.reverse()
        
    k = np.argmax(yhs)
    
    xr = []
    yr = []
    if ("volute" not in potclass) & ("amphora" not in potclass) & ("nestoris" not in potclass) & ("column" not in potclass) & ("kantharos" not in potclass):
        if p1 == 0:
            xr.append(xsmooth[p1])
            yr.append(ysmooth[p1])
            xr.extend(xhs[1:k])
            yr.extend(yhs[1:k])
        else:
            xr.extend(xsmooth[:p1])
            yr.extend(ysmooth[:p1])
            xr.extend(xhs[2:k])
            yr.extend(yhs[2:k])

        xr.extend(xsmooth[p2:])
        yr.extend(ysmooth[p2:])
    else:
        if "volute" in potclass:
            v = np.argmin(yh)
            xr.append(xh[v])
            yr.append(yh[v])
            xr.extend(xhs[2:k])
            yr.extend(yhs[2:k])
            xr.extend(xsmooth[p2:])
            yr.extend(ysmooth[p2:])
        if "column" in potclass:
            if D == "L":
                m = min(xh)
            else:
                m = max(xh)
    
            v = np.argmin(yh)
            xr.append(m)
            yr.append(min(ysmooth))
            xr.append(xh[v])
            yr.append(yh[v])
            xr.extend(xhs[2:k])
            yr.extend(yhs[2:k])
            xr.extend(xsmooth[p2:])
            yr.extend(ysmooth[p2:])

        if ("kantharos" in potclass) or ("amphora" in potclass) or ("nestoris" in potclass):
            p = get_top_pot_a3(x1,y1,xc)
            t = np.where(yhs<p)[0]
            if len(t) < 1:
                t = 0
                if p1 == 0:
                    xr.append(xsmooth[p1])
                    yr.append(ysmooth[p1])
                else:
                    xr.extend(xsmooth[:p1])
                    yr.extend(ysmooth[:p1])
            else:
                t = t[-1] + 1
            
            xr.extend(xhs[t:k])
            yr.extend(yhs[t:k])
            xr.extend(xsmooth[p2:])
            yr.extend(ysmooth[p2:])    

    xsr,ysr = smooth_side_contour(xr,yr,D)            
    x,y = edit_pot_ends(x1,y1,xsr,ysr,D,potclass)
    x_,y_ = final_smoothing(x,y)
    
    return x_,y_
    

In [25]:
def cont_extraction_4(image_grey,filename):
        
    cont = get_outline_contour(image_grey)

    xcont = cont[:,1]
    ycont = cont[:,0]

    xs,ys,xc,yc,D = get_contour_side(xcont,ycont,"Z")

    lb = np.min(ycont) + (np.max(ycont) - np.min(ycont))*0.4
    ub = np.min(ycont) + (np.max(ycont) - np.min(ycont))*0.15
    
    if ys[0] > ys[-1]:
        ys.reverse()
        xs.reverse()

    rng = np.where((np.array(ys)>ub)&(np.array(ys)<lb))[0]
    
    k = get_handle_start_a4(xs,ys,rng,D)
        
    p = get_handle_end_a4(xcont,ycont,xs,ys,k,D)
    
    xr = []
    yr = []
    
    xr.extend(xs[:k])
    yr.extend(ys[:k])
    xr.extend(xs[p:])
    yr.extend(ys[p:])
    
    xrs,yrs = smooth_side_contour(xr,yr,D)
    
    x,y = edit_pot_ends(xcont,ycont,xrs,yrs,D,filename)
    
    x_,y_ = final_smoothing(x,y)

    return x_,y_

In [26]:
def cont_extraction_5(image_grey,filename):

    x1,y1,x2,y2 = get_longest_contours_a5(image_grey)
    
    xs,ys,xc,yc,D = get_contour_side(x1,y1,"Z")
    
    xsmooth,ysmooth = smooth_side_contour(xs,ys,D)
    
    bot = get_bot_area_handle_a5(x2,y2)
    
    xh,yh = get_handle_bot_a5(x2,y2,bot)
    
    if len(xh) < 1:
        xh = x2
        yh = y2
    
    mx = max(yh)
    
    ub = mx + (mx-min(y2))/7
    
    k2 = np.where(ysmooth > ub)[0][0]
    
    if D == "L":
        if xh[0] < xh[-1]:
            xh.reverse()
            yh.reverse()
    
    if D == "R":
        if xh[0] > xh[-1]:
            xh.reverse()
            yh.reverse()
            
    if ysmooth[0] > ysmooth[-1]:
        ysmooth.reverse()
        xsmooth.reverse()
        
    t = []
    if D == "L":
        for i in range(0,len(xh)-1):
            if (yh[i] < yh[i+1]) & (xh[i] > xh[i+1]):
                t.append(i)
    else:
        for i in range(0,len(xh)-1):
            if (yh[i] < yh[i+1]) & (xh[i] < xh[i+1]):
                t.append(i)
    
    k1 = len(xh)-1
    if len(t) != 0:            
        k1 = t[-1]
    
    xr = []
    yr = []
    
    xr.extend(xh[:k1])
    yr.extend(yh[:k1])        
    xr.extend(xsmooth[k2:])
    yr.extend(ysmooth[k2:])
    
    xsr,ysr = smooth_side_contour(xr,yr,D)            
    x,y = edit_pot_ends(x1,y1,xsr,ysr,D,filename)
    x_,y_ = final_smoothing(x,y)
    
    
    return x_,y_

#### Choosing Algorithm by Class

In [28]:
table = pd.read_csv('pot_classes.csv')

In [27]:
def choose_alg(filename):
    t = 0
    alg = 'tbc'
    
    filename = re.findall("[a-z]+|[A-Z][a-z]+",filename)
    
    for j in range(0,len(table)):
        a = str(table['A1'][j])
        b = re.findall("[a-z]+|[A-Z][a-z]+",a)
        k = 1
        if len(b) > 0:
            for i in range(0,len(b)):
                if b[i] not in filename:
                    k = 0

        if k > 0:
            t = j
            alg = 'A1'
            break
    for j in range(0,len(table)):
        a = str(table['A3'][j])
        b = re.findall("[a-z]+|[A-Z][a-z]+",a)
        k = 1
        if len(b) > 0:
            for i in range(0,len(b)):
                if b[i] not in filename:
                    k = 0
        if k > 0:
            t = j
            alg = 'A3'
            break
            
    for j in range(0,len(table)):
        a = str(table['A4'][j])
        b = re.findall("[a-z]+|[A-Z][a-z]+",a)
        k = 1
        if len(b) > 0:
            for i in range(0,len(b)):
                if b[i] not in filename:
                    k = 0
        if k > 0:
            t = j
            alg = 'A4'
            break
    
    if (t == 0) and (alg == 'tbc'):
        if ("krater" in filename) or ("cup" in filename):
            alg = 'A3'
        else:
            if "amphora" in filename:
                alg = 'A4'
            else:
                alg = 'A1'
            
                
    return t, alg       


#### Detecting Bad Contours

In [6]:
def group_extremas(x,width):
    n = len(x);
    grouped = []
    grouped.append([x[0]])
    v = 0
    for i in range(1,n):
        p = grouped[v]
        q = p[0]
        if x[i] <= (q+(2*width)):
            p.append(x[i])
            grouped[v] = p
        else:
            v = v+1
            grouped.append([x[i]])
            
    return grouped

### Extracting Contours

In [26]:
def full_extent(ax, pad=0.0):
    """Get the full extent of an axes, including axes labels, tick labels, and
    titles."""
    # For text objects, we need to draw the figure first, otherwise the extents
    # are undefined.
    ax.figure.canvas.draw()
    items = ax.get_xticklabels() + ax.get_yticklabels() 
#    items += [ax, ax.title, ax.xaxis.label, ax.yaxis.label]
    items += [ax, ax.title]
    bbox = Bbox.union([item.get_window_extent() for item in items])

    return bbox.expanded(1.0 + pad, 1.0 + pad)

In [None]:
direc_nme = 'C:\\Users\\arian\\Downloads\\pots' # Folder with files to be renamed.
current_dir = 'C:\\Users\\arian\\Documents\\GitHub\\pots\\Code'
fold_dir_1 = 'C:\\Users\\arian\\Documents\\Good'
fold_dir_2 = 'C:\\Users\\arian\\Documents\\Bad'
fold_dir_3 = 'C:\\Users\\arian\\Documents\\all_csv'


a6 = "(bird|skyphos|kotyle|lekanis|plemochoe)"
a5 = "(askos|bail|ring)"

for filename in tqdm(os.listdir(direc_nme)):     
    c = choose_alg(str(filename))

    # Binarize Image:
    ################
    try:
        # 1) Get B/W version of original image.
        image_col = data.load(direc_nme+"\\"+filename,as_gray=False)
        image_bw = binary_colour(image_col)
        # 2) Create a plot and temporarily save. This will be the new B/W of the pot.
        fig, ax = plt.subplots(figsize=(5, 7))
        ax.imshow(image_bw)
        ax.set_axis_off()
        plt.savefig("pot_new.jpg")
        plt.close()        
        # 3) Get longest contour from new pot image.
        img = data.load(current_dir+'\\pot_new.jpg',as_gray=True)        
    except:
        img = data.load(img_path,as_gray=True)

    if c[1] == 'A1':
        contour = get_outline_contour(img)
        xcont = contour[:,1]
        ycont = contour[:,0]
        xc,yc,coords = find_centre(xcont,ycont)
        ub = min(ycont)+(max(ycont)-min(ycont))/3
        lb = min(ycont)
        coords2 = np.where((np.array(ycont)>lb)&(np.array(ycont)<ub))[0]
        mn = min(np.array(xcont)[coords2])
        mx = max(np.array(xcont)[coords2])

        if abs(xc-mn) < abs(mx-xc):
            alg = 'A1'
            try:
                x,y = cont_extraction_1(img,"L",filename)
            except:
                print(filename,alg)
                x = [0,0,0]
                y = [0,0,0]
        else:
            alg = 'A2'
            try:
                x,y = cont_extraction_1(img,"R",filename)
            except:
                print(filename,alg)
                x = [0,0,0]
                y = [0,0,0]
            
    if c[1] == 'A3':
        alg = 'A3'
        if ("cup" not in filename) or (re.search(a6,filename)):
            try:
                x,y = cont_extraction_2(img,filename)
            except:
                print(filename,alg)
                x = [0,0,0]
                y = [0,0,0]
        else:
            try:
                x,y = cont_extraction_4(img,filename)
            except:
                print(filename,alg)
                x = [0,0,0]
                y = [0,0,0]
    
    if c[1] == 'A4':
        alg = 'A4'
        if re.search(a5,filename):
            try:
                x,y = cont_extraction_5(img,filename)
            except:
                print(filename,alg)
                x = [0,0,0]
                y = [0,0,0]
        else:
            try:
                x,y = cont_extraction_3(img,filename)
            except:
                print(filename,alg)
                x = [0,0,0]
                y = [0,0,0]
            
    # Create Plot:  
    ##############
    fig, ax = plt.subplots(figsize=(5, 7))
    ax.imshow(img,cmap='gray')
    ax.plot(x,y,'-r')
    ax.set_axis_off()
    
    pix_mn = np.mean(img)
    
    if pix_mn < 55:    
        plt.title(alg+' - '+str(pix_mn))
        plt.savefig(fold_dir_2+"\\"+filename[:-4]+".png")
        plt.close()
    else:
        xprime = np.diff(x)
        T = [];
        for j in range(0,len(y)-1):
            T.append((y[j+1]+y[j])/2)
        xprime_ = abs(xprime)
        extrm = []
        for i in range(0,len(xprime)):
            if xprime_[i] >= 10:
                extrm.append(T[i])
        if len(extrm) > 0:
            grouped = group_extremas(extrm,5)
            b = len(grouped)
        else:
            b = 'na'
                
        a = len(x)
        c = np.max(xprime_)
        d = sum(xprime_)
        
        plt.title(alg+' - '+str(pix_mn)+ ' - '+str([a,b,c,d]))
        
        if ("cup" not in filename) and ("lekanis" not in filename) and ("lydion" not in filename):
            if (a<65) or (b>4) or (c>140) or (d>400):
                plt.savefig(fold_dir_2+"\\"+filename[:-4]+".png")
                plt.close()
            else:
                plt.savefig(fold_dir_1+"\\"+filename[:-4]+".png")
                plt.close()
        if "cup" in filename:
            if (a<55) or (b>4) or (c>140) or (d>400):
                plt.savefig(fold_dir_2+"\\"+filename[:-4]+".png")
                plt.close()
            else:
                plt.savefig(fold_dir_1+"\\"+filename[:-4]+".png")
                plt.close()
        if "lekanis" in filename:
            if (a<40) or (b>4) or (c>140) or (d>400):
                plt.savefig(fold_dir_2+"\\"+filename[:-4]+".png")
                plt.close()
            else:
                plt.savefig(fold_dir_1+"\\"+filename[:-4]+".png")
                plt.close()
        if "lydion" in filename:
            if (a<65) or (b>4) or (c>140) or (d>500):
                plt.savefig(fold_dir_2+"\\"+filename[:-4]+".png")
                plt.close()
            else:
                plt.savefig(fold_dir_1+"\\"+filename[:-4]+".png")
                plt.close()
        
        
    # Save CSV:
    ###########
    cont_coord = {}
    cont_coord['x'] = x
    cont_coord['y'] = y
    with open(fold_dir_3+"\\"+filename[:-4]+".csv", "w", newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(cont_coord.keys())
        writer.writerows(it.zip_longest(*cont_coord.values()))


In [102]:
# direc_nme = 'C:\\Users\\arian\\Downloads\\pots' # Folder with files to be renamed.
# current_dir = 'C:\\Users\\arian\\Documents\\GitHub\\pots\\Code'
# fold_dir_j = 'C:\\Users\\arian\\Documents\\joint_plot'
# fold_dir_m = 'C:\\Users\\arian\\Documents\\multi_plot'
# fold_dir_1 = 'C:\\Users\\arian\\Documents\\a1_plot'
# fold_dir_2 = 'C:\\Users\\arian\\Documents\\a2_plot'
# fold_dir_3 = 'C:\\Users\\arian\\Documents\\a3_plot'
# fold_dir_4 = 'C:\\Users\\arian\\Documents\\a4_plot'
# fold_dir_csv_1 = 'C:\\Users\\arian\\Documents\\a1_csv'
# fold_dir_csv_2 = 'C:\\Users\\arian\\Documents\\a2_csv'
# fold_dir_csv_3 = 'C:\\Users\\arian\\Documents\\a3_csv'
# fold_dir_csv_4 = 'C:\\Users\\arian\\Documents\\a4_csv'



# a6 = "(bird|skyphos|kotyle|lekanis|plemochoe)"
# a5 = "(askos|bail|ring)"

# for filename in tqdm(os.listdir(direc_nme)):      
#     # Binarize Image:
#     ################
#     try:
#         # 1) Get B/W version of original image.
#         image_col = data.load(direc_nme+"\\"+filename,as_gray=False)
#         image_bw = binary_colour(image_col)
#         # 2) Create a plot and temporarily save. This will be the new B/W of the pot.
#         fig, ax = plt.subplots(figsize=(5, 7))
#         ax.imshow(image_bw)
#         ax.set_axis_off()
#         plt.savefig("pot_new.jpg")
#         plt.close()        
#         # 3) Get longest contour from new pot image.
#         img = data.load(current_dir+'\\pot_new.jpg',as_gray=True)        
#     except:
#         img = data.load(img_path,as_gray=True)

#     # Create Plot:  
#     ##############
#     fig, ax = plt.subplots(figsize=(5, 7))
#     ax.imshow(img,cmap='gray')
#     ax.set_axis_off()

#     # Create Contours:
#     #################
#     # Contours 1:
#     try:
#         x1,y1 = cont_extraction_1(img,"L",filename)
#         ax.plot(x1,y1,'-r')
#     except:
#         x1 = []
#         y1 = []
#     # Contours 2:
#     try:
#         x2,y2 = cont_extraction_1(img,"R",filename)
#         ax.plot(x2,y2,'-y')
#     except:
#         x2 = []
#         y2 = []
#     # Contours 3:
#     try:
#         if ("cup" not in filename) or (re.search(a6,filename)):
#             x3,y3 = cont_extraction_2(img,filename)
#         else:
#             x3,y3 = cont_extraction_4(img,filename)
#         ax.plot(x3,y3,'-g')
#     except:
#         x3 = []
#         y3 = []
#     # Contours 4:
#     try:
#         if re.search(a5,filename):
#             x4,y4 = cont_extraction_5(img,filename)
#         else:
#             x4,y4 = cont_extraction_3(img,filename)
#         ax.plot(x4,y4,'-b')
#     except:
#         x4 = []
#         y4 = []

#     plt.savefig(fold_dir_j+"\\"+filename[:-4]+".png")
#     plt.close()

#     # Create Multi-Plot
#     ###################
#     fig, (ax1, ax2, ax3, ax4) = plt.subplots(1,4,figsize=(16, 17))
#     ax1.imshow(img,cmap='gray')
#     ax1.set_axis_off()
#     ax1.plot(x1,y1,'-r')
#     ax2.imshow(img,cmap='gray')
#     ax2.set_axis_off()
#     ax2.plot(x2,y2,'-y')
#     ax3.imshow(img,cmap='gray')
#     ax3.set_axis_off()
#     ax3.plot(x3,y3,'-g')
#     ax4.imshow(img,cmap='gray')
#     ax4.set_axis_off()
#     ax4.plot(x4,y4,'-b')

#     plt.savefig(fold_dir_m+"\\"+filename[:-4]+".png")
#     plt.close()

#     # Create Separate Plots
#     #######################
#     extent = full_extent(ax1).transformed(fig.dpi_scale_trans.inverted())
#     fig.savefig(fold_dir_1+"\\"+filename[:-4]+".png", bbox_inches=extent)
#     extent = full_extent(ax2).transformed(fig.dpi_scale_trans.inverted())
#     fig.savefig(fold_dir_2+"\\"+filename[:-4]+".png", bbox_inches=extent)
#     extent = full_extent(ax3).transformed(fig.dpi_scale_trans.inverted())
#     fig.savefig(fold_dir_3+"\\"+filename[:-4]+".png", bbox_inches=extent)
#     extent = full_extent(ax4).transformed(fig.dpi_scale_trans.inverted())
#     fig.savefig(fold_dir_4+"\\"+filename[:-4]+".png", bbox_inches=extent)
#     plt.close()

#     # Save CSVs:
#     ###########
#     cont_coord = {}
#     cont_coord['x'] = x1
#     cont_coord['y'] = y1
#     with open(fold_dir_csv_1+"\\"+filename[:-4]+".csv", "w", newline='') as outfile:
#         writer = csv.writer(outfile)
#         writer.writerow(cont_coord.keys())
#         writer.writerows(it.zip_longest(*cont_coord.values()))

#     cont_coord = {}
#     cont_coord['x'] = x2
#     cont_coord['y'] = y2
#     with open(fold_dir_csv_2+"\\"+filename[:-4]+".csv", "w", newline='') as outfile:
#         writer = csv.writer(outfile)
#         writer.writerow(cont_coord.keys())
#         writer.writerows(it.zip_longest(*cont_coord.values()))

#     cont_coord = {}
#     cont_coord['x'] = x3
#     cont_coord['y'] = y3
#     with open(fold_dir_csv_3+"\\"+filename[:-4]+".csv", "w", newline='') as outfile:
#         writer = csv.writer(outfile)
#         writer.writerow(cont_coord.keys())
#         writer.writerows(it.zip_longest(*cont_coord.values()))
#     cont_coord = {}
#     cont_coord['x'] = x4
#     cont_coord['y'] = y4
#     with open(fold_dir_csv_4+"\\"+filename[:-4]+".csv", "w", newline='') as outfile:
#         writer = csv.writer(outfile)
#         writer.writerow(cont_coord.keys())
#         writer.writerows(it.zip_longest(*cont_coord.values()))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=0, max=820), HTML(value='')))


