In [1]:
import os
import skimage
from skimage.io import imread, imshow
from skimage.draw import line
from matplotlib import pyplot as plt
#import matplotlib.pyplot as plt # the same as above
import numpy as np
from PIL import Image, ImageDraw
import random

In [2]:
colors = [
'#0000ff',
'#ff7f0e',
'#2ca02c',
'#d62728',
'#9467bd',
'#8c564b',
'#e377c2',
'#7f7f7f',
'#bcbd22',
'#17becf']

In [3]:
def get_end_indexs_to_cut(arr, minwidth):
    # find slicing index
    indexlist=[]
    for j in range(len(arr)-1):
        if arr[j] + minwidth < arr[j+1]:
            indexlist.append(j) 
    return indexlist

def get_slice_arrays(arr, minwidth):
    
    # find slicing index
    indexlist=get_end_indexs_to_cut(arr, minwidth)
  
    # no need to slice        
    if len(indexlist) == 0:
        return [arr]
    
    arraylist=[]
    s=0
    for cutindex in indexlist:
        arraylist.append(arr[s:cutindex+1])
        s = cutindex+1
    arraylist.append(arr[s:])
    return arraylist

def get_slice_dataarrays(indexarr, minwidth, dataarray):
    
    # find slicing index
    indexlist=get_end_indexs_to_cut(indexarr, minwidth)
            
    # no need to slice        
    if len(indexlist) == 0:
        return [dataarray]
    
    arraylist=[]
    s=0
    for cutindex in indexlist:
        arraylist.append(dataarray[s:cutindex+1])
        s = cutindex+1
    arraylist.append(dataarray[s:])
    return arraylist

def cal_sum_pixel_v3_lineslice(img, axis, threshold):    
    img_binarise = np.where(img > 0.3, 1.0, 0.0)
    list_pixel =[]
    if axis == 1: #horizontal
        axis_max = img.shape[0] 
        for i in range(axis_max):
            # 1.Get indexs of the current row that contain pixel
            haspixel = np.where(img_binarise[i,:]==1)[0]
            # 2.keep the row if the number of pixel > threshold
            if len(haspixel) > threshold:
                #3. slice the current row to lines, if they are separate apart
                slicearrays = get_slice_arrays(haspixel,len(haspixel)) 
                # append to list
                for sa in slicearrays:
                    if len(sa)>1:
                        list_pixel.append((i, len(sa), sa[0], sa[-1]))                 

    elif axis == 0: #vertical
        axis_max = img.shape[1]         
        for i in range(axis_max):
            haspixel = np.where(img_binarise[:,i]==1)[0]
            # if there are enough pixels
            if len(haspixel) > threshold:
                slicearrays = get_slice_arrays(haspixel,len(haspixel)) 
                #4. keep the sliced lines into a list
                for sa in slicearrays:
                    if len(sa)>1:
                        list_pixel.append((i, len(sa), sa[0], sa[-1]))
                        
    img_a1_data = np.array(list_pixel,dtype={'names':('index', 'sum_pixel', 'p1', 'p2'),
                                 'formats':('i4','i4','i4','i4')})                        
    return img_a1_data

def cal_sum_pixel_v4_allmergeline(img, axis, threshold_row_pixels,
                               threshold_same_group,
                               max_white_space):    
    img_binarise = np.where(img > 0.3, 1.0, 0.0)
    
    list_pixel =[]
    if axis == 1: #horizontal
        axis_max = img.shape[0] 
        for i in range(axis_max):
            # 1.Get indexs of the current row that contain pixel
            haspixel = np.where(img_binarise[i,:]==1)[0]
            # 2.keep the row if the number of pixel > threshold
            if len(haspixel) > threshold_row_pixels:
                #3. slice the current row to lines, if they are separate apart
                slicearrays = get_slice_arrays(haspixel,len(haspixel)) 
                # append to list
                for sa in slicearrays:
                    if len(sa)>1:
                        list_pixel.append((i, len(sa), sa[0], sa[-1]))   
                        

    elif axis == 0: #vertical
        axis_max = img.shape[1]         
        for i in range(axis_max):
            haspixel = np.where(img_binarise[:,i]==1)[0]
            # if there are enough pixels
            if len(haspixel) > threshold_row_pixels:
                slicearrays = get_slice_arrays(haspixel,len(haspixel)) 
                #4. keep the sliced lines into a list
                for sa in slicearrays:
                    if len(sa)>1:
                        list_pixel.append((i, len(sa), sa[0], sa[-1]))
                        
    img_sum_axis = np.array(list_pixel,dtype={'names':('index', 'sum_pixel', 'p1', 'p2'),
                                 'formats':('i4','i4','i4','i4')})  
    final_lines=[]
    # 1. group the "nearby lines" top/down or left/right together
    group_slicearrays = get_slice_dataarrays(img_sum_axis['index'],
                                            threshold_same_group,
                                            img_sum_axis)
    
    # 2. in each group of the nearby line, 
    #    separate subgroup along the line width direction, 
    #    (separate the line that is far away each other)
    for group_id,group in enumerate(group_slicearrays):

        #print("------------------------\ngroup #",group_id)
        #print(group)
        line_index_l0=group['index'][0]
        line_index_l1=group['index'][-1]
        index_range = line_index_l1 - line_index_l0 +1  
        index_array = np.arange(line_index_l0, line_index_l1+1)    
        # 3. create group_pixel, 2d array, size(index_range,total_pixel_along_line) 
        #    to store pixels from the group
        group_pixel=np.zeros((index_range, axis_max))    
        #print("group_pixel",group_pixel.shape)
        #print("index l0,l1:",line_index_l0,line_index_l1)  

        # 4. fill 1 on the group_pixel cell that has pixel. (or get it from the raw image?)
        for index,sump,p1,p2 in group:
            group_pixel[index-line_index_l0,p1:p2+1]=1

 

        #print(np.sum(group_pixel,axis=1))

        # 5. merge the nearby lines, to then find a gap
        merge_index = np.sum(group_pixel,axis=0)
        pixel_list = np.where(merge_index>0)[0]  # return a tuple (len=1) of numpy.ndarray, 
        #                                                so select the first index [0]
        #print("index_list_pixel",type(index_list_pixel),len(index_list_pixel),index_list_pixel)
        pixel_p0 = pixel_list[0]
        pixel_p1 = pixel_list[-1]
        print("pixel p0,p1:",pixel_p0, pixel_p1)
        #print(pixel_list)

        # 6. get index of the linesgroup gap
        pixel_to_cut_line = get_end_indexs_to_cut(pixel_list,max_white_space)
        #slice_daraarrays = get_slice_dataarrays(index_list_pixel[0],max_white_space)


        print("\n pixel_to_cut_line",pixel_to_cut_line)

        # 7. if there is no gap, find the average index
        if len(pixel_to_cut_line) == 0:

            count_row_pixels = np.sum(group_pixel,axis=1) # size = 1d
            count_all_pixels = np.sum(count_row_pixels) # size = 1d
            #print("count_row_pixels",count_row_pixels.size)
            #print("count_all_pixels",count_all_pixels.size)
            #init_index
    #         total_index=0
    #         # 8. sum up all pixel index value.
    #         for row_index,row_px in zip(index_array,count_row_pixels):
    #             total_index=total_index + row_index*row_px  
            total_index=index_array.dot(count_row_pixels) #get single value
            avg_index = int(round(total_index/count_all_pixels))

            #print("total_index",total_index)
            print("avg_index,p1,p2:",avg_index,pixel_p0, pixel_p1)
            final_lines.append((avg_index, pixel_p1-pixel_p0+1, pixel_p0, pixel_p1))

            # done!

        else:
            # 8. if there are gaps, combine the lines pixel as each block, then find the average index
            #fine lines that can merge, and cannot

            start_pixel=0
            #print(np.sum(group_pixel))
            #index_list_pixel
            # add the last cut index
            pixel_to_cut_line.append(len(pixel_list)-1)
            print("pixel_to_cut_line",pixel_to_cut_line)

            for cut_pixel in pixel_to_cut_line:

                #sub_group_pixel=np.zeros_like(group_pixel)
                #print("\nsub_group_pixel",sub_group_pixel.shape)  

                end_pixel = cut_pixel

                print("cut lineindex:",line_index_l0," to ",line_index_l1)
                print("cut pixelindex:",start_pixel," to ",end_pixel)            
                print("pixel value:",pixel_list[start_pixel]," to ",pixel_list[end_pixel])

                #print("pixel value;",group_pixel[start_pixel]," to ",group_pixel[end_pixel])


                # 9. crop the cells of pixel-block 

                print("sub_group_pixel")#,group_pixel[:, pixel_list[start_pixel]:pixel_list[end_pixel]+1])
                sub_group_pixel=group_pixel[:, pixel_list[start_pixel]:pixel_list[end_pixel]+1]
                #sub_group_pixel[line_index_l0:line_index_l1+1, start_pixel:end_pixel+1] = \
                #                    group_pixel[line_index_l0:line_index_l1+1, start_pixel:end_pixel+1]            



                #print(sub_group_pixel)
                count_row_pixels = np.sum(sub_group_pixel,axis=1) # size = 1d
                count_all_pixels = np.sum(count_row_pixels) # #get single value
                #print("count_row_pixels",count_row_pixels)
                #print("count_all_pixels",count_all_pixels) 


                #print("index_values",index_values) 
                total_index=index_array.dot(count_row_pixels) #get single value
                #print("sum_index_pixel",total_index) 
                avg_index=int(round(total_index/count_all_pixels))

                print("avg_index,p1,p2:",avg_index,pixel_list[start_pixel], pixel_list[end_pixel])

                final_lines.append((avg_index, 
                                    pixel_list[end_pixel]-pixel_list[start_pixel]+1, 
                                    pixel_list[start_pixel], pixel_list[end_pixel]))

                start_pixel = end_pixel+1             
                print("---")

    img_a1_data = np.array(final_lines,dtype={'names':('index', 'sum_pixel', 'p1', 'p2'),
                                 'formats':('i4','i4','i4','i4')})                        
    return img_a1_data   

# img_group_axis_0 = cal_sum_pixel_v4_mergeline(img, 0, threshold_row_pixels=10,
#                                               threshold_same_group=10,
#                                               max_white_space=30)

# img_group_axis_1 = cal_sum_pixel_v4_mergeline(img, 1, threshold_row_pixels=10,
#                                               threshold_same_group=10,
#                                               max_white_space=30)

In [7]:
print(img_sum_axis_0)

[(  7,  49,  42,  91) (  8, 143, 111, 274) ( 10,   9,  27,  35)
 ( 10,  14, 100, 113) (194,  50,  34,  83) (195,  13, 117, 129)
 (263,  63, 203, 274)]


In [4]:

def cal_sum_pixel_v4_mergeline(img_sum_axis, total_pixel_along_line, threshold_same_group, max_white_space): 
    
    final_lines=[]
    # 1. group the "nearby lines" top/down or left/right together
    group_slicearrays = get_slice_dataarrays(img_sum_axis['index'],
                                            threshold_same_group,
                                            img_sum_axis)
    

    # merge each rows in group to 1 row, then calculate the most occupied index

    # 2. in each group of the nearby line, 
    #    separate subgroup along the line width direction, 
    #    (separate the line that is far away each other)
    for group_id,group in enumerate(group_slicearrays):

        print("------------------------\ngroup #",group_id)
        print(group)
        line_index_l0=group['index'][0]
        line_index_l1=group['index'][-1]
        index_range = line_index_l1 - line_index_l0 +1  
        index_array = np.arange(line_index_l0, line_index_l1+1)    
        # 3. create group_pixel, 2d array, size(index_range,total_pixel_along_line) 
        #    to store pixels from the group
        group_pixel=np.zeros((index_range, total_pixel_along_line))    
        print("group_pixel",group_pixel.shape)
        print("index l0,l1:",line_index_l0,line_index_l1)  

        #most_p1=total_pixel_along_line
        #most_p2=0      
        # 4. fill 1 on the group_pixel cell that has pixel. (or get it from the raw image?)
        for index,sump,p1,p2 in group:
            group_pixel[index-line_index_l0,p1:p2+1]=1
            #most_p1 = p1 if p1 < most_p1 else most_p1
            #most_p2 = p2 if p2 > most_p2 else most_p2

        #new_p1=total_pixel_along_line
        #new_p2=0    

        #print(np.sum(group_pixel,axis=1))

        # 5. merge the nearby lines, to then find a gap
        merge_index = np.sum(group_pixel,axis=0)
        pixel_list = np.where(merge_index>0)[0]  # return a tuple (len=1) of numpy.ndarray, 
        #                                                so select the first index [0]
        #print("index_list_pixel",type(index_list_pixel),len(index_list_pixel),index_list_pixel)
        pixel_p0 = pixel_list[0]
        pixel_p1 = pixel_list[-1]
        print("pixel p0,p1:",pixel_p0, pixel_p1)
        #print(pixel_list)

        # 6. get index of the linesgroup gap
        pixel_to_cut_line = get_end_indexs_to_cut(pixel_list,max_white_space)
        #slice_daraarrays = get_slice_dataarrays(index_list_pixel[0],max_white_space)


        print("\n pixel_to_cut_line",pixel_to_cut_line)

        # 7. if there is no gap, find the average index
        if len(pixel_to_cut_line) == 0:

            count_row_pixels = np.sum(group_pixel,axis=1) # size = 1d
            count_all_pixels = np.sum(count_row_pixels) # size = 1d
            #print("count_row_pixels",count_row_pixels.size)
            #print("count_all_pixels",count_all_pixels.size)
            #init_index
    #         total_index=0
    #         # 8. sum up all pixel index value.
    #         for row_index,row_px in zip(index_array,count_row_pixels):
    #             total_index=total_index + row_index*row_px  
            total_index=index_array.dot(count_row_pixels) #get single value
            avg_index = int(round(total_index/count_all_pixels))

            #print("total_index",total_index)
            print("avg_index,p1,p2:",avg_index,pixel_p0, pixel_p1)
            final_lines.append((avg_index, pixel_p1-pixel_p0+1, pixel_p0, pixel_p1))

            # done!

        else:
            # 8. if there are gaps, combine the lines pixel as each block, then find the average index
            #fine lines that can merge, and cannot

            start_pixel=0
            #print(np.sum(group_pixel))
            #index_list_pixel
            # add the last cut index
            pixel_to_cut_line.append(len(pixel_list)-1)
            print("pixel_to_cut_line",pixel_to_cut_line)

            for cut_pixel in pixel_to_cut_line:

                #sub_group_pixel=np.zeros_like(group_pixel)
                #print("\nsub_group_pixel",sub_group_pixel.shape)  

                end_pixel = cut_pixel

                print("cut lineindex:",line_index_l0," to ",line_index_l1)
                print("cut pixelindex:",start_pixel," to ",end_pixel)            
                print("pixel value:",pixel_list[start_pixel]," to ",pixel_list[end_pixel])

                #print("pixel value;",group_pixel[start_pixel]," to ",group_pixel[end_pixel])


                # 9. crop the cells of pixel-block 

                print("sub_group_pixel")#,group_pixel[:, pixel_list[start_pixel]:pixel_list[end_pixel]+1])
                sub_group_pixel=group_pixel[:, pixel_list[start_pixel]:pixel_list[end_pixel]+1]
                #sub_group_pixel[line_index_l0:line_index_l1+1, start_pixel:end_pixel+1] = \
                #                    group_pixel[line_index_l0:line_index_l1+1, start_pixel:end_pixel+1]            



                #print(sub_group_pixel)
                count_row_pixels = np.sum(sub_group_pixel,axis=1) # size = 1d
                count_all_pixels = np.sum(count_row_pixels) # #get single value
                #print("count_row_pixels",count_row_pixels)
                #print("count_all_pixels",count_all_pixels) 


                #print("index_values",index_values) 
                total_index=index_array.dot(count_row_pixels) #get single value
                #print("sum_index_pixel",total_index) 
                avg_index=int(round(total_index/count_all_pixels))

                print("avg_index,p1,p2:",avg_index,pixel_list[start_pixel], pixel_list[end_pixel])

                final_lines.append((avg_index, 
                                    pixel_list[end_pixel]-pixel_list[start_pixel]+1, 
                                    pixel_list[start_pixel], pixel_list[end_pixel]))

                start_pixel = end_pixel+1             
                print("---")

    img_a1_data = np.array(final_lines,dtype={'names':('index', 'sum_pixel', 'p1', 'p2'),
                                 'formats':('i4','i4','i4','i4')})                        
    return img_a1_data 


In [6]:
img_path = "predictresult/243_color_a_x01.png"
img = imread(img_path,as_gray=True)

img_sum_axis_0 =  cal_sum_pixel_v3_lineslice(img,0,10)
img_group_axis_0 = cal_sum_pixel_v4_mergeline(img_sum_axis_0,
                                              total_pixel_along_line = img.shape[0],
                                              threshold_same_group=10,
                                              max_white_space=40)

img_sum_axis_1 =  cal_sum_pixel_v3_lineslice(img,1,10)
img_group_axis_1 = cal_sum_pixel_v4_mergeline(img_sum_axis_1,
                                              total_pixel_along_line = img.shape[1],
                                              threshold_same_group=10,
                                              max_white_space=40)

im = Image.open(img_path)
draw = ImageDraw.Draw(im)

#print("horizontal line") 
c=0
for index,sump,p1,p2 in img_group_axis_1: # horizontal line
    draw.line((p1, index, p2, index), fill=(255, 0, 0), width=1) #fill=colors[c]
    c=c+1 if c<len(colors)-1 else 0
    #print(p1, index, p2, index)   
    
#print("vertical line") 
c=0
for index,sump,p1,p2  in img_group_axis_0: #vertical line
    draw.line((index, p1, index, p2), fill=(0, 0, 255), width=1)  
    c=c+1 if c<len(colors)-1 else 0
    
im.show() # pop up a window

------------------------
group # 0
[( 7,  49,  42,  91) ( 8, 143, 111, 274) (10,   9,  27,  35)
 (10,  14, 100, 113)]
group_pixel (4, 288)
index l0,l1: 7 10
pixel p0,p1: 27 274

 pixel_to_cut_line []
avg_index,p1,p2: 8 27 274
------------------------
group # 1
[(194, 50,  34,  83) (195, 13, 117, 129)]
group_pixel (2, 288)
index l0,l1: 194 195
pixel p0,p1: 34 129

 pixel_to_cut_line []
avg_index,p1,p2: 194 34 129
------------------------
group # 2
[(263, 63, 203, 274)]
group_pixel (1, 288)
index l0,l1: 263 263
pixel p0,p1: 203 274

 pixel_to_cut_line []
avg_index,p1,p2: 263 203 274
------------------------
group # 0
[(25, 14,  26,  39) (29, 29,  48,  80) (31, 22,  81, 107)
 (34, 11, 184, 194)]
group_pixel (10, 288)
index l0,l1: 25 34
pixel p0,p1: 26 194

 pixel_to_cut_line [73]
pixel_to_cut_line [73, 84]
cut lineindex: 25  to  34
cut pixelindex: 0  to  73
pixel value: 26  to  107
sub_group_pixel
avg_index,p1,p2: 29 26 107
---
cut lineindex: 25  to  34
cut pixelindex: 74  to  84
pixel va

In [4]:
#img_path = "../weights500/014_color_a_x01.png"
#img_path = "../beike097_a_00.png"
img_path = "../005_color_a_x01.png"
img = imread(img_path,as_gray=True)
print(img.shape)
# Get a line (p0,p1)
threshold_max_pixel_sparse=10
img_sum_axis_0 =  cal_sum_pixel_v3_lineslice(img,0,10)
img_sum_axis_1 =  cal_sum_pixel_v3_lineslice(img,1,10)

im = Image.open(img_path)
draw = ImageDraw.Draw(im)

#print("horizontal line") 
for index,sump,p1,p2 in img_sum_axis_1: # horizontal line
    draw.line((p1, index, p2, index), fill=(255, 0, 0), width=1)
    #print(p1, index, p2, index)   
    
#print("vertical line")     
for index,sump,p1,p2  in img_sum_axis_0:
    draw.line((index, p1, index, p2), fill=(0, 0, 255), width=1)   
    #print(index, p1, index, p2)
im.show() # pop up a window

(384, 512)


In [10]:
img_sum_axis_0 =  cal_sum_pixel_v3_lineslice(img,0,10)
img_sum_axis_1 =  cal_sum_pixel_v3_lineslice(img,1,10)

In [16]:
img_sum_axis_0['index']

array([ 13,  15,  81,  86,  86,  86,  87,  87,  87,  88,  88,  89, 421,
       423, 423, 469, 471, 473, 473, 475, 475, 477, 479], dtype=int32)

In [14]:
img_sum_axis_0['p1']

array([137, 217,  33,  17, 115, 241, 131, 241, 310, 241, 313, 329,  19,
        19,  41, 333, 289,  49, 277,  65, 263, 161, 199], dtype=int32)

In [18]:
lines =[]
for index,sump,p1,p2  in img_sum_axis_0:
    lines.append([index,p1,index,p2])

In [19]:
lines

[[13, 137, 13, 214],
 [15, 217, 15, 243],
 [81, 33, 81, 134],
 [86, 17, 86, 30],
 [86, 115, 86, 134],
 [86, 241, 86, 308],
 [87, 131, 87, 134],
 [87, 241, 87, 272],
 [87, 310, 87, 312],
 [88, 241, 88, 255],
 [88, 313, 88, 328],
 [89, 329, 89, 354],
 [421, 19, 421, 37],
 [423, 19, 423, 24],
 [423, 41, 423, 53],
 [469, 333, 469, 360],
 [471, 289, 471, 343],
 [473, 49, 473, 137],
 [473, 277, 473, 283],
 [475, 65, 475, 68],
 [475, 263, 475, 276],
 [477, 161, 477, 196],
 [479, 199, 479, 246]]

In [None]:
img_sum_axis_0 =  cal_sum_pixel_v3_lineslice(img,0,10)
img_group_axis_0 = cal_sum_pixel_v4_mergeline(img_sum_axis_0,
                                              total_pixel_along_line = img.shape[0],
                                              threshold_same_group=10,
                                              max_white_space=30)

img_sum_axis_1 =  cal_sum_pixel_v3_lineslice(img,1,10)
img_group_axis_1 = cal_sum_pixel_v4_mergeline(img_sum_axis_1,
                                              total_pixel_along_line = img.shape[1],
                                              threshold_same_group=10,
                                              max_white_space=30)


In [45]:
final_lines=[]
# 1. group the "nearby lines" top/down or left/right together
threshold_same_group=10
group_slicearrays = get_slice_dataarrays(img_sum_axis_1['index'],
                                        threshold_same_group,
                                        img_sum_axis_1)
max_white_space=10
total_pixel_along_line = img.shape[1]
# merge each rows in group to 1 row, then calculate the most occupied index

# 2. in each group of the nearby line, 
#    separate subgroup along the line width direction, 
#    (separate the line that is far away each other)
for group_id,group in enumerate(group_slicearrays):
    
    print("------------------------\ngroup #",group_id)
    print(group)
    line_index_l0=group['index'][0]
    line_index_l1=group['index'][-1]
    index_range = line_index_l1 - line_index_l0 +1  
    index_array = np.arange(line_index_l0, line_index_l1+1)    
    # 3. create group_pixel, 2d array, size(index_range,total_pixel_along_line) 
    #    to store pixels from the group
    group_pixel=np.zeros((index_range, total_pixel_along_line))    
    print("group_pixel",group_pixel.shape)
    print("index l0,l1:",line_index_l0,line_index_l1)  

    #most_p1=total_pixel_along_line
    #most_p2=0      
    # 4. fill 1 on the group_pixel cell that has pixel. (or get it from the raw image?)
    for index,sump,p1,p2 in group:
        group_pixel[index-line_index_l0,p1:p2+1]=1
        #most_p1 = p1 if p1 < most_p1 else most_p1
        #most_p2 = p2 if p2 > most_p2 else most_p2

    #new_p1=total_pixel_along_line
    #new_p2=0    
    
    #print(np.sum(group_pixel,axis=1))
    
    # 5. merge the nearby lines, to then find a gap
    merge_index = np.sum(group_pixel,axis=0)
    pixel_list = np.where(merge_index>0)[0]  # return a tuple (len=1) of numpy.ndarray, 
    #                                                so select the first index [0]
    #print("index_list_pixel",type(index_list_pixel),len(index_list_pixel),index_list_pixel)
    pixel_p0 = pixel_list[0]
    pixel_p1 = pixel_list[-1]
    print("pixel p0,p1:",pixel_p0, pixel_p1)
    #print(pixel_list)

    # 6. get index of the linesgroup gap
    pixel_to_cut_line = get_end_indexs_to_cut(pixel_list,max_white_space)
    #slice_daraarrays = get_slice_dataarrays(index_list_pixel[0],max_white_space)
    
    
    print("\n pixel_to_cut_line",pixel_to_cut_line)
    
    # 7. if there is no gap, find the average index
    if len(pixel_to_cut_line) == 0:

        count_row_pixels = np.sum(group_pixel,axis=1) # size = 1d
        count_all_pixels = np.sum(count_row_pixels) # size = 1d
        #print("count_row_pixels",count_row_pixels.size)
        #print("count_all_pixels",count_all_pixels.size)
        #init_index
#         total_index=0
#         # 8. sum up all pixel index value.
#         for row_index,row_px in zip(index_array,count_row_pixels):
#             total_index=total_index + row_index*row_px  
        total_index=index_array.dot(count_row_pixels) #get single value
        avg_index = int(round(total_index/count_all_pixels))
        
        #print("total_index",total_index)
        print("avg_index,p1,p2:",avg_index,pixel_p0, pixel_p1)
        final_lines.append((avg_index, pixel_p1-pixel_p0+1, pixel_p0, pixel_p1))
   
        # done!
        
    else:
        # 8. if there are gaps, combine the lines pixel as each block, then find the average index
        #fine lines that can merge, and cannot

        start_pixel=0
        #print(np.sum(group_pixel))
        #index_list_pixel
        # add the last cut index
        pixel_to_cut_line.append(len(pixel_list)-1)
        print("pixel_to_cut_line",pixel_to_cut_line)
        
        for cut_pixel in pixel_to_cut_line:
            
            #sub_group_pixel=np.zeros_like(group_pixel)
            #print("\nsub_group_pixel",sub_group_pixel.shape)  
            
            end_pixel = cut_pixel

            print("cut lineindex:",line_index_l0," to ",line_index_l1)
            print("cut pixelindex:",start_pixel," to ",end_pixel)            
            print("pixel value:",pixel_list[start_pixel]," to ",pixel_list[end_pixel])

            #print("pixel value;",group_pixel[start_pixel]," to ",group_pixel[end_pixel])
            

            # 9. crop the cells of pixel-block 
            
            print("sub_group_pixel")#,group_pixel[:, pixel_list[start_pixel]:pixel_list[end_pixel]+1])
            sub_group_pixel=group_pixel[:, pixel_list[start_pixel]:pixel_list[end_pixel]+1]
            #sub_group_pixel[line_index_l0:line_index_l1+1, start_pixel:end_pixel+1] = \
            #                    group_pixel[line_index_l0:line_index_l1+1, start_pixel:end_pixel+1]            

            
            
            #print(sub_group_pixel)
            count_row_pixels = np.sum(sub_group_pixel,axis=1) # size = 1d
            count_all_pixels = np.sum(count_row_pixels) # #get single value
            #print("count_row_pixels",count_row_pixels)
            #print("count_all_pixels",count_all_pixels) 
            
            
            #print("index_values",index_values) 
            total_index=index_array.dot(count_row_pixels) #get single value
            #print("sum_index_pixel",total_index) 
            avg_index=int(round(total_index/count_all_pixels))
            
            print("avg_index,p1,p2:",avg_index,pixel_list[start_pixel], pixel_list[end_pixel])
            
            final_lines.append((avg_index, 
                                pixel_list[end_pixel]-pixel_list[start_pixel]+1, 
                                pixel_list[start_pixel], pixel_list[end_pixel]))
            
            start_pixel = end_pixel+1             
            print("---")
            

    #break
im = Image.open(img_path)
draw = ImageDraw.Draw(im)

#print("horizontal line") 
c=0
for index,sump,p1,p2 in final_lines: # horizontal line
    draw.line((p1, index, p2, index), fill=colors[c], width=1)
    c=c+1 if c<len(colors)-1 else 0
    #print(p1, index, p2, index)   
im.show() # pop up a window

------------------------
group # 0
[(17,  18,  86, 103) (17,  18, 256, 274) (18, 246,  84, 369)
 (19,   8,  65,  72) (19,  31, 381, 423)]
group_pixel (3, 512)
index l0,l1: 17 19
pixel p0,p1: 65 423

 pixel_to_cut_line [7, 293]
pixel_to_cut_line [7, 293, 336]
cut lineindex: 17  to  19
cut pixelindex: 0  to  7
pixel value: 65  to  72
sub_group_pixel
avg_index,p1,p2: 19 65 72
---
cut lineindex: 17  to  19
cut pixelindex: 8  to  293
pixel value: 84  to  369
sub_group_pixel
avg_index,p1,p2: 18 84 369
---
cut lineindex: 17  to  19
cut pixelindex: 294  to  336
pixel value: 381  to  423
sub_group_pixel
avg_index,p1,p2: 19 381 423
---
------------------------
group # 1
[(53, 36, 423, 458)]
group_pixel (1, 512)
index l0,l1: 53 53
pixel p0,p1: 423 458

 pixel_to_cut_line []
avg_index,p1,p2: 53 423 458
------------------------
group # 2
[(134, 37, 49, 87) (137, 19, 13, 37) (138, 16, 41, 56)]
group_pixel (5, 512)
index l0,l1: 134 138
pixel p0,p1: 13 87

 pixel_to_cut_line []
avg_index,p1,p2: 136 13

In [145]:

threshold_same_group=10
groupslicearrays = get_slice_dataarrays(img_sum_axis_0['index'],
                                        threshold_same_group,
                                        img_sum_axis_0)
max_white_space=10
final_lines=[]
# merge each rows in group to 1 row, then calculate the most occupied index
for group in groupslicearrays:
    
    print("\ngroup:")
    print(group)
    init_index=group['index'][0]
    last_index=group['index'][-1]
    index_range = last_index - init_index +1  
    
    # 1. create numpy 2d array to store pixels from the group
    group_pixel=np.zeros((index_range,img.shape[0]))    
    #print(group_pixel.shape, init_index)     
    new_p1=img.shape[0]
    new_p2=0    
    most_p1=img.shape[0]
    most_p2=0      
    # 2. mark 1 on the cell that has pixel, or get it from the raw image?
    for index,sump,p1,p2 in group:
        group_pixel[index-init_index,p1:p2+1]=1
        most_p1 = p1 if p1 < most_p1 else most_p1
        most_p2 = p2 if p2 > most_p2 else most_p2
        
    #print(np.sum(group_pixel,axis=1))
    
    # 3. merge along nearby index, then find a gap
    merge_index = np.sum(group_pixel,axis=0)
    index_list_pixel = np.where(merge_index>0)
    #print("index_list_pixel",index_list_pixel)
    index_to_cut_line = get_end_indexs_to_cut(index_list_pixel[0],max_white_space)
    #slice_daraarrays = get_slice_dataarrays(index_list_pixel[0],max_white_space)
    
    
    print("\nindex_to_cut_line",index_to_cut_line)
    print("most_p1",most_p1)
    if len(index_to_cut_line) == 0:
        #ok it just one line then find average index 
        count_row_pixels = np.sum(group_pixel,axis=1)
        count_all_pixels = np.sum(count_row_pixels)
        #print("count_row_pixels",count_row_pixels)
        #print("count_all_pixels",count_all_pixels)
        #init_index
        total_index=0

        for row_index,row_px in zip(np.arange(init_index,last_index+1),count_row_pixels):
            total_index=total_index + row_index*row_px
  
        avg_index = int(round(total_index/count_all_pixels))
        #print("total_index",total_index)
        print("avg_index,p1,p2:",avg_index,most_p1, most_p2)
        final_lines.append((avg_index, most_p2-most_p1+1, most_p1, most_p2))
        pass
        # done!
        
    else:
        #fine lines that can merge, and cannot
        split_group_pixel=[]
        split_p1=[]
        split_p2=[]
        s=most_p1
        print("\n")
        #print(np.sum(group_pixel))
        print("group_pixel",group_pixel.shape)
        
        for cutindex in index_to_cut_line:
            endcut = cutindex+most_p1
            split_group_pixel.append(group_pixel[:,s:endcut+1])
            split_p1.append(s)
            split_p2.append(endcut)
            s = endcut+1
            
        split_group_pixel.append(group_pixel[:,s:])  
        split_p1.append(s)
        split_p2.append(most_p2) 
            
        for sgp,p1,p2 in zip(split_group_pixel,split_p1,split_p2):
            print("\n",sgp.shape)
            #print(sgp)
            count_row_pixels = np.sum(sgp,axis=1,keepdims=False)
            count_all_pixels = np.sum(count_row_pixels)
            print("count_row_pixels",count_row_pixels)
            
            count_index_pixels = np.sum(sgp,axis=0)
            print(count_index_pixels.shape)
            index_first_pixel = np.where(count_index_pixels>0)[0]
            this_p1= p1+index_first_pixel
            print("index_first_pixel",index_first_pixel)     
            total_index=0
            for row_index,row_px in zip(np.arange(init_index,last_index+1),count_row_pixels):
                total_index=total_index + row_index*row_px            
            avg_index = int(round(total_index/count_all_pixels))
            #print("total_index",total_index)
            print("avg_index,p1,p2:",avg_index,this_p1, p2)
            final_lines.append((avg_index, p2-this_p1+1, this_p1, p2))
            
        
print(final_lines)        

    #break
im = Image.open(img_path)
draw = ImageDraw.Draw(im)
#print("vertical line")   
c=0
for index,sump,p1,p2 in final_lines: # horizontal line
    draw.line((index, p1, index, p2), fill=colors[c], width=1)
    c=c+1 if c<len(colors)-1 else 0
    #print(p1, index, p2, index)   
im.show() # pop up a window


group:
[(13, 78, 137, 214) (15, 27, 217, 243)]

index_to_cut_line []
most_p1 137
avg_index,p1,p2: 14 137 243

group:
[(81, 73,  33, 134) (86,  6,  17,  30) (86, 17, 115, 134)
 (86, 32, 241, 308) (87,  4, 131, 134) (87, 17, 241, 272)
 (87,  3, 310, 312) (88, 15, 241, 255) (88, 16, 313, 328)
 (89, 26, 329, 354)]

index_to_cut_line [115]
most_p1 17


group_pixel (9, 384)

 (9, 116)
count_row_pixels [100.   0.   0.   0.   0.  32.   2.   0.   0.]
(116,)
index_first_pixel [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  16  17  18  19
  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109
 110 111 112 113 114 115]
avg_index,p1,p2: 82 [ 17  18  19  20  21  22  23  24  25  26  27

SystemError: <built-in method draw_lines of ImagingDraw object at 0x7f8be985d3d0> returned a result with an error set

In [143]:
for line in img_sum_axis_0:
    print(line)

(13, 78, 137, 214)
(15, 27, 217, 243)
(81, 73, 33, 134)
(86, 6, 17, 30)
(86, 17, 115, 134)
(86, 32, 241, 308)
(87, 4, 131, 134)
(87, 17, 241, 272)
(87, 3, 310, 312)
(88, 15, 241, 255)
(88, 16, 313, 328)
(89, 26, 329, 354)
(421, 18, 19, 37)
(423, 6, 19, 24)
(423, 6, 41, 53)
(469, 25, 333, 360)
(471, 37, 289, 343)
(473, 72, 49, 137)
(473, 6, 277, 283)
(475, 4, 65, 68)
(475, 13, 263, 276)
(477, 35, 161, 196)
(479, 36, 199, 246)


In [115]:
print(np.arange(init_index,last_index+1))

[350 351 352 353 354 355 356 357 358 359 360 361 362 363]


In [68]:
slicearrays = get_slice_dataarrays(img_sum_axis_1['index'],10,img_sum_axis_1)
print(type(slicearrays))
for group in slicearrays:
    print(group['index'][0],"->",group['index'][-1])
    for line in group:
        print(line)
    print("\n")

<class 'list'>
17 -> 363
(17, 18, 86, 103)
(17, 18, 256, 274)
(18, 246, 84, 369)
(19, 8, 65, 72)
(19, 31, 381, 423)
(53, 36, 423, 458)
(134, 37, 49, 87)
(137, 19, 13, 37)
(138, 16, 41, 56)
(241, 52, 15, 88)
(350, 6, 132, 152)
(350, 19, 181, 202)
(353, 31, 89, 166)
(354, 9, 89, 100)
(354, 7, 167, 173)
(355, 56, 174, 253)
(357, 4, 330, 334)
(357, 33, 389, 423)
(358, 23, 366, 388)
(359, 25, 263, 289)
(363, 23, 434, 456)




IndexError: index 0 is out of bounds for axis 0 with size 0

In [9]:
print(slicearrays)

[array([17, 17, 18, 19, 19], dtype=int32), array([53], dtype=int32), array([134, 137, 138], dtype=int32), array([241], dtype=int32), array([350, 350, 353, 354, 354, 355, 357, 357, 358, 359, 363], dtype=int32)]


In [262]:
img_sum_axis_1 =  cal_sum_pixel_v2_linecrop(img,1,10)
print(type(img_sum_axis_1))
img_sum_axis_1 =  cal_sum_pixel_v3_lineslice(img,1,10)
print(type(img_sum_axis_1))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [263]:
img_sum_axis_1['index']

array([ 17,  17,  18,  19,  19,  53, 134, 137, 138, 241, 350, 350, 353,
       354, 354, 355, 357, 357, 358, 359, 363], dtype=int32)