In [1]:
from transformers import pipeline
from PIL import Image
import requests
import numpy as np
import cv2
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
import torch
import matplotlib.pyplot as plt
import sys
from skimage import filters, color, morphology
from skimage.segmentation import flood, flood_fill
import skimage
import math
import os
import random
from sklearn.cluster import KMeans
import scipy

  from .autonotebook import tqdm as notebook_tqdm


# Load Depth Model

In [2]:
pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")

In [3]:
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

Using cache found in /user/georg.eckardt/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /user/georg.eckardt/.cache/torch/hub/intel-isl_MiDaS_master


# Load Sam

In [4]:
model_type = "vit_b"
sam_checkpoint = "model/"+model_type+".pth"

torch.cuda.set_device(7)

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to("cuda")

predictor = SamPredictor(sam)

# Functions

In [20]:
%matplotlib inline
def calc_center( mask ):
    coords = np.where(mask==True)
    x = coords[0].sum() / coords[0].shape
    y = coords[1].sum() / coords[1].shape

    return int(y), int(x)

def calc_iou(im_1, im_2):
    intersection = np.logical_and(im_1, im_2)
    union = np.logical_or(im_1,im_2)

    return np.sum(intersection)/np.sum(union)

def calc_best_subset(masks, num_masks, min_masks):
    # PERFORMANCE BOOST: Sort out masks with very high overlap and try the out later
    # Sort out masks with clear cut boundaries
    min_value = float("inf")
    min_arr = []

    # add empty masks
    for i in range(num_masks-min_masks):
        masks.append(np.zeros((128,128)))
    
    min_value, min_arr = iteration_layer(min_value, min_arr, [], num_masks, masks)

    return min_arr

def mkdir(s):
    try:
        os.mkdir(s)
    except:
        pass

def three_dim(x):
    return np.stack( ( x.copy()*234, x.copy()*345, x.copy()*567 ), 2).astype(np.uint8)  

def three_dim_bw(x):
    return np.stack( ( x.copy()*255, x.copy()*255, x.copy()*255 ), 2).astype(np.uint8)  

def create_video(s):
    return cv2.VideoWriter(s,cv2.VideoWriter_fourcc(*'mp4v'), 12, (128,128))

def iteration_layer(min_value, min_arr, current_arr, level, masks, starting_index=0):
    if level == 0:
        uni_map = np.zeros((128,128))
        for x in range(len(current_arr)):
            uni_map += masks[current_arr[x]] 
        full_score = np.absolute(uni_map-1).sum()   
        # check for new best
        if full_score < min_value:
            min_arr = current_arr.copy()
            del current_arr[-1]
            return full_score, min_arr
        else:
            del current_arr[-1]
            return min_value, min_arr
    
    for i in range(starting_index,len(masks)):
        current_arr.append(i)
        min_value, min_arr = iteration_layer(min_value, min_arr, current_arr, level-1, masks, i+1)
        
    try:
        del current_arr[-1]
    except:
        pass
    return min_value, min_arr

def create_sobel(frame):
    return np.array(filters.sobel(frame))

def calc_centers_colormean(regions,frame):
    center = []
    color_mean = []
        
    for k in range(np.unique(regions).shape[0]):
        area = regions == k
        size = area.sum()
        center.append( calc_center(area) )
        color_mean.append( (area  * ((frame[..., 0] + frame[..., 1] + frame[..., 2] )/3).astype(np.uint8)).sum()/ size )
    
    return center, color_mean

def display_pointers(center, image):
    for x in range(len(center)):
            image = cv2.circle( image, (int(center[x][0]), int(center[x][1])), 3, (255, 255, 255) )
    return image

def display_bb(bbs, image):
    for bb in bbs:
            p1 = (bb[0][1], bb[0][0])
            p2 = (bb[1][1], bb[1][0])
            image = cv2.rectangle(image, p1,p2, (0,0,255), 1)
    return image


def bw_frame(frame):
    return (frame[...,0] + frame[...,1] + frame[...,2])/3

def normalise(x):
    x = np.array(x)
    std = x.std()
    if std == 0:
        return (x-x.mean())
    else:
        return (x-x.mean())/std

def point_dist(p1,p2):
    return math.sqrt( (p1[0]-p2[0])**2 + (p1[1]-p2[1])**2  )

# Average distance between points is 1
def calc_normalize(center):
    centerx = np.array([ x  for x,y in center])
    centery = np.array([ y  for x,y in center])
    norm_center = np.stack((centerx.copy(), centery.copy()), axis=-1)

    distance = []
    for i,p1 in enumerate(center):
        for j,p2 in enumerate(center):
            if i != j:
                distance.append( point_dist(p1,p2) )
    
    distance = np.array(distance)

    return norm_center/distance.mean()

def calc_centerdepth(center, dist):
    centerdist = []
    for x in center:
        centerdist.append(dist[x[0]][x[1]])
    return centerdist

def calc_div_matrix(color_mean):
    size = len(color_mean)
    color_mean = np.array(color_mean)
    matrix = np.zeros((size,size))
                      
    for x in range(size):
        matrix[x] = color_mean - color_mean[x]
    
    return np.absolute( matrix )

def calc_dist_matrix(points):
    size = points.shape[0]
    centerx = points[:,0]
    centery = points[:,1]

    matrix = np.zeros((size,size))
                      
    for x in range(size):
        matrix[x] = np.sqrt((centerx - centerx[x])**2 + (centery - centery[x])**2)
    
    return np.absolute( matrix )

def get_bb(images):
    bb = []
    for im in images:
        x, y = np.where(im != 0)
        bb.append( ( (x.min(), y.min()), (x.max(), y.max() ) ) )
        
    return bb

def calc_depth_mean(depth, mask_arr):
    mean_depths = []
    for mask in mask_arr:
        area = depth * mask
        mean = area.sum() / (area != 0).sum()
        max_diff = area.max()-area.min()
        mean_depths.append( (mean, max_diff) )

    return mean_depths

# unify masks
def unify_masks_by_iou(mask_all, iou_thresh):
    len_mask_all = len(mask_all)
    iou_err = np.zeros((len_mask_all,len_mask_all))

    for n in range(len_mask_all):
        for m in range(n):
            if n != m:
                iou = calc_iou(mask_all[n], mask_all[m])
                iou_err[n][m] = iou
                iou_err[m][n] = iou

    remove_list = []
    for n in range(len_mask_all):
        for m in range(len_mask_all):
            if n != m and n not in remove_list and iou_err[n][m] > iou_thresh:
                mask_all[n] = np.logical_or(mask_all[n],mask_all[m])
                remove_list.append(m)
    
    # remove remove_list
    new_masks = []
    for x in range(len_mask_all):
        if x not in remove_list:
            new_masks.append(mask_all[x])
    
    return new_masks

def background_removal(masks, mask_depth_vis, threshhold_depth):
    mean = mask_depth_vis[:,1].mean()
    foreground_masks = []
    for i,x in enumerate(mask_depth_vis):
        if scipy.stats.norm(mean, 0.2).pdf(x[0]) > threshhold_depth:
            foreground_masks.append(masks[i])
            
    return foreground_masks
    

In [32]:
iou_error = 0.9
lam_dis = 1
lam_col = 1
num_masks_removal = 3
number_of_masks = 30
bb_max_size = (128*128)/3
threshhold_depth = 0.1

def get_results(rand_images, rand_frames):

    error = 0

    for i in rand_images:
        mkdir("images/"+str(i)+"/")
        data = np.load('data/MOVIE/videos/'+str(i)+'.npy')
        annotation = np.load('data/MOVIE/segmentations/'+str(i)+'.npy')

        for xsx in rand_frames:
            frame = data[xsx]
            an = annotation[xsx]
            bw_image = bw_frame(frame.copy())
            norm_bw = normalise(bw_image.copy())

            # predict the depth values
            depth_vals = np.array(pipe(Image.fromarray(bw_image.copy()))["depth"])

            sobel = create_sobel(norm_bw.copy())
            sobel = np.digitize(sobel, bins=[sobel.mean()]).astype(np.uint8)


            sobel = np.digitize(sobel, bins=[sobel.mean()]).astype(np.uint8)
            regions = skimage.morphology.label(sobel,1)

            center, color_mean = calc_centers_colormean(regions.copy(), frame.copy())

            save_regions = 0

            while len(center) > number_of_masks:
                centerdepth = normalise(calc_centerdepth(center, depth_vals))
                norm_center = normalise(calc_normalize(center.copy()))

                dis_mat = calc_div_matrix(centerdepth)
                col_mat = calc_div_matrix(color_mean)
                cen_mat = calc_dist_matrix(norm_center)

                
                lam_cen = 1/cen_mat.std()**2

                dis = lam_dis * math.e**(-lam_dis * dis_mat)
                col = lam_col * math.e**(-lam_col * col_mat)
                cen = lam_cen * math.e**(-lam_cen * cen_mat)

                uni = ( dis + col + cen )/3
                uni *= np.identity(uni.shape[0]) == False           # remove center values 
                

                for j in range(num_masks_removal):
                    if len(center)-j > number_of_masks:
                        maxi = np.unravel_index(uni.argmax(), uni.shape)
                        uni[maxi[0]][maxi[1]] = 0
                        regions += (regions == maxi[0])*(maxi[1]-maxi[0])
                        regions = np.digitize(regions, bins=np.unique(regions))-1
                        save_regions = regions.copy()

                center, color_mean = calc_centers_colormean(regions, frame.copy())
            
            save_center = center.copy()
            
            # Predict the Masks
            mask_all = []
            score_all = []
            predictor.set_image(frame.copy())
            for point in center:
                point = np.expand_dims(np.array(point), axis=0)
                mask, score, _  = predictor.predict( point_coords=point, point_labels=[1])
                mask_all.append(mask[0])
                score_all.append(score[0])

            all_centers = display_pointers(center.copy(), frame.copy())
            
            # check for the largest cluster | remove all smaller cluster
            for m, mask in enumerate(mask_all):
                mask_regions = (skimage.morphology.label(mask,background=-1,connectivity=1)+1) * mask
                largest_mask = 0
                size = 0

                for ind in range(1, mask_regions.max()+2):
                    area = (mask_regions == ind).sum()
                    if area > size:
                        largest_mask = ind
                        size = area
                
                mask_all[m] = (mask_regions == largest_mask)
            
            # get upper/lower and left/right bounds
            bb = get_bb(mask_all)

            for x, mask in enumerate(mask_all):
                save = display_bb([bb[x]],three_dim(mask))
            
            save = display_bb(bb,frame.copy())

            new_bb = []
            old_bb = []
            for c, ((x1, y1), (x2, y2)) in enumerate(bb):
                if (x2-x1)*(y2-y1) < bb_max_size:
                    new_bb.append( [y1, x1, y2, x2] )
                    old_bb.append( bb[c] )
            
            bb_num = np.array( new_bb )
            bb_num = torch.from_numpy(bb_num).to("cuda")

            # WHAT TO DO IF THERE ARE NO BOXES
            transformed_boxes = predictor.transform.apply_boxes_torch(bb_num, frame.shape[:2])
            masks, _, _ = predictor.predict_torch(
                point_coords=None,
                point_labels=None,
                boxes=transformed_boxes,
                multimask_output=False,
            )
            
            masks = masks.cpu().numpy().squeeze()

            # merge masks with high iou
            masks = unify_masks_by_iou(list(masks), iou_error)
            # sort masks by size (smallest first)
            masks.sort( key = lambda masks: masks.sum() )
            
            # Foreground Background Distingtion (take mean of mean depths of all masks and then model normal distirbution with std as learnable parameter )
            np_mask_depth = np.array(calc_depth_mean(normalise(depth_vals.copy()), masks))
            np_mask_depth +=  np_mask_depth.min()*-1
            mask_depth_vis = np_mask_depth / np_mask_depth.max()

            masks = background_removal(masks, mask_depth_vis, threshhold_depth)

            # add masks together
            final = np.zeros((128,128))
            for x, mask in enumerate(masks):
                non_overlap = (final == 0) * mask
                deep = mask_depth_vis[x][0]
                final = final + non_overlap * (x+1)
            
            cv2.imwrite("images/"+str(i)+"/final.jpg", three_dim(final))
            cv2.imwrite("images/"+str(i)+"/ann.jpg", three_dim(an.squeeze()) )

            error = error + calc_iou(final,an.squeeze())

    return error / (len(rand_images) * len(rand_frames))


 # iou_error   lam_dis   lam_col   num_masks_removal   number_of_masks   bb_max_size   threshhold_depth
def change_params( direction, change ):
    global iou_error   
    global lam_dis   
    global lam_col   
    global number_of_masks   
    global bb_max_size   
    global threshhold_depth
    
    iou_error = iou_error + (change[0] * iou_error * direction[0] )
    lam_dis = lam_dis + (change[1] * lam_dis * direction[1] )
    lam_col = lam_col + (change[2] * lam_col * direction[2] )
    number_of_masks = number_of_masks + (change[3] * number_of_masks * direction[3] )
    bb_max_size = bb_max_size + (change[4] * bb_max_size * direction[4] )
    threshhold_depth = threshhold_depth + (change[5] * threshhold_depth * direction[5] )
    return

def print_params():
    global iou_error   
    global lam_dis   
    global lam_col   
    global number_of_masks   
    global bb_max_size   
    global threshhold_depth
    print("new params:")
    print("iou_error:           ",str(iou_error))
    print("lam_dis:             ",str(lam_dis))
    print("lam_col:             ",str(lam_col))
    print("number_of_masks:     ",str(number_of_masks))
    print("bb_max_size:         ",str(bb_max_size))
    print("threshhold_depth:    ",str(threshhold_depth))
    print("-------------------------------------------")



def parameter_fitting_all(epochs, iterations, im_per_iter):
    change = [0.01, 0.05, 0.05, 0.05, 0.05, 0.05 ]

    for x in range(epochs):
        print(x)
        direction_of_change = np.array([random.randint(-1,1) for x in range(6)])

        rand_images = [ random.randint(0,50) for x in range(iterations) ]
        rand_frames = [ random.randint(0,23) for x in range(im_per_iter) ]

        p1 = get_results(rand_images,rand_frames)

        change_params(direction_of_change, change)
        
        p2 = get_results(rand_images,rand_frames)
        
        if p2 < p1:
            change_params(direction_of_change*-1, change)
        
        print(p1)
        print(p2)
        print()
    
    print_params()

def check_parameters():
    rand_images = [ x for x in range(50) ]
    rand_frames = [ 0 ]
    print(get_results(rand_images,rand_frames))

# 0.42953019158842976
parameter_fitting_all(5, 3, 3)
check_parameters()


0
0.5462109963268498
0.5195673681035236

1
0.2902630277230174
0.2691966709820804

2
0.6322431615690484
0.6322431615690484

3
0.31584664168490223
0.39336137626861545

4
0.4599681367768105
0.41517969073745437

new params:
iou_error:            0.8998200089999999
lam_dis:              0.992518734375
lam_col:              0.9428927976562501
number_of_masks:      28.357678125
bb_max_size:          5447.679999999999
threshhold_depth:     0.09975
-------------------------------------------


RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1, 256, 256] because the unspecified dimension size -1 can be any value and is ambiguous

# Iteration

In [85]:
iou_error = 0.9
lam_dis = 1
lam_col = 1
num_masks_removal = 3
number_of_masks = 30
bb_max_size = (128*128)/3
threshhold_depth = 0.1

for i in range(1):
    print("Video: "+str(i))
    mkdir("images/"+str(i)+"/")

    data = np.load('data/MOVIE/videos/'+str(i)+'.npy')
    annotation = np.load('data/MOVIE/segmentations/'+str(i)+'.npy')

    
    for frame in data:
        bw_image = bw_frame(frame.copy())
        norm_bw = normalise(bw_image.copy())

        # predict the depth values
        depth_vals = np.array(pipe(Image.fromarray(bw_image.copy()))["depth"])

        sobel = create_sobel(norm_bw.copy())
        sobel = np.digitize(sobel, bins=[sobel.mean()]).astype(np.uint8)


        sobel = np.digitize(sobel, bins=[sobel.mean()]).astype(np.uint8)
        regions = skimage.morphology.label(sobel,1)

        center, color_mean = calc_centers_colormean(regions.copy(), frame.copy())

        save_regions = 0

        while len(center) > number_of_masks:
            centerdepth = normalise(calc_centerdepth(center, depth_vals))
            norm_center = normalise(calc_normalize(center.copy()))

            dis_mat = calc_div_matrix(centerdepth)
            col_mat = calc_div_matrix(color_mean)
            cen_mat = calc_dist_matrix(norm_center)

            
            lam_cen = 1/cen_mat.std()**2

            dis = lam_dis * math.e**(-lam_dis * dis_mat)
            col = lam_col * math.e**(-lam_col * col_mat)
            cen = lam_cen * math.e**(-lam_cen * cen_mat)

            uni = ( dis + col + cen )/3
            uni *= np.identity(uni.shape[0]) == False           # remove center values 
            

            for j in range(num_masks_removal):
                if len(center)-j > number_of_masks:
                    maxi = np.unravel_index(uni.argmax(), uni.shape)
                    uni[maxi[0]][maxi[1]] = 0
                    regions += (regions == maxi[0])*(maxi[1]-maxi[0])
                    regions = np.digitize(regions, bins=np.unique(regions))-1
                    save_regions = regions.copy()

            center, color_mean = calc_centers_colormean(regions, frame.copy())
        
        save_center = center.copy()
        
        # Predict the Masks
        mask_all = []
        score_all = []
        predictor.set_image(frame.copy())
        for point in center:
            point = np.expand_dims(np.array(point), axis=0)
            mask, score, _  = predictor.predict( point_coords=point, point_labels=[1])
            mask_all.append(mask[0])
            score_all.append(score[0])

        all_centers = display_pointers(center.copy(), frame.copy())
        
        # check for the largest cluster | remove all smaller cluster
        for m, mask in enumerate(mask_all):
            mask_regions = (skimage.morphology.label(mask,background=-1,connectivity=1)+1) * mask
            largest_mask = 0
            size = 0

            for ind in range(1, mask_regions.max()+2):
                area = (mask_regions == ind).sum()
                if area > size:
                    largest_mask = ind
                    size = area
            
            mask_all[m] = (mask_regions == largest_mask)
        
        # get upper/lower and left/right bounds
        bb = get_bb(mask_all)

        for x, mask in enumerate(mask_all):
            save = display_bb([bb[x]],three_dim(mask))
        
        save = display_bb(bb,frame.copy())

        new_bb = []
        old_bb = []
        for c, ((x1, y1), (x2, y2)) in enumerate(bb):
            if (x2-x1)*(y2-y1) < bb_max_size:
                new_bb.append( [y1, x1, y2, x2] )
                old_bb.append( bb[c] )
        
        bb_num = np.array( new_bb )
        bb_num = torch.from_numpy(bb_num).to("cuda")

        transformed_boxes = predictor.transform.apply_boxes_torch(bb_num, frame.shape[:2])
        masks, _, _ = predictor.predict_torch(
            point_coords=None,
            point_labels=None,
            boxes=transformed_boxes,
            multimask_output=False,
        )

        masks = masks.cpu().numpy().squeeze()

        # merge masks with high iou
        masks = unify_masks_by_iou(list(masks), iou_error)
        # sort masks by size (smallest first)
        masks.sort( key = lambda masks: masks.sum() )

        print(len(masks))

        # Foreground Background Distingtion (take mean of mean depths of all masks and then model normal distirbution with std as learnable parameter )
        mask_depth = np.array(calc_depth_mean(normalise(depth_vals.copy()), masks))
        np_mask_depth +=  np_mask_depth.min()*-1
        mask_depth_vis = np_mask_depth / np_mask_depth.max()

        masks = background_removal(masks, mask_depth_vis, threshhold_depth)

        # add masks together
        final = np.zeros((128,128))
        for x, mask in enumerate(masks):
            cv2.imwrite("images/"+str(i)+"/"+str(x)+"simple_mask.jpg", three_dim_bw(mask))
            non_overlap = (final == 0) * mask
            deep = mask_depth_vis[x][0]
            cv2.imwrite("images/"+str(i)+"/"+str(x)+"mask_depth.jpg", three_dim_bw(deep * non_overlap))
            
            final = final + non_overlap * (x+1)
            #print(final.max())



        cv2.imwrite("images/"+str(i)+"/depth.jpg", three_dim_bw(depth_vals/depth_vals.max()))
        cv2.imwrite("images/"+str(i)+"/regions.jpg", three_dim(save_regions))
        save = display_pointers(save_center,three_dim(annotation[i].squeeze()))
        cv2.imwrite("images/"+str(i)+"/centers.jpg", save)
        cv2.imwrite("images/"+str(i)+"/final.jpg", three_dim(final))
        cv2.imwrite("images/"+str(i)+"/clean.jpg", frame.copy())
        save = display_bb(old_bb,three_dim(annotation[i].squeeze()))
        cv2.imwrite("images/"+str(i)+"/ann.jpg", save )
        
        break

        

Video: 0
14
14
0.20943979750752567
0.25178137610722273
0.16541378112030702
0.6360696773201121
0.7824957290395488
0.6864540706631811
1.345673938354527
1.891257060865595
8
0 0.019022858802571856
1 0.07262203028921688
2 0.3616317782235253
3 0.37934601846829685
4 0.3399549333515471
5 0.2132874808373096
6 0.21718919740786283
7 0.4838722842549318


In [37]:
       '''

        

        # save images
        for c in range(len(mask_all)):
            save = np.stack( ( mask_all[c].copy()*255, mask_all[c].copy()*128, mask_all[c].copy()*56 ), 2).astype(np.uint8)    
            save = cv2.circle( save, (int(center[c][0]), int(center[c][1])), 3, (255, 255, 255) )
            cv2.imwrite("images/"+str(i)+"/"+str(c)+".jpg", save)
        
        # save annotation
        for x in range(len(mask_all)):
            annotation = cv2.circle( annotation, (int(center[x][0]), int(center[x][1])), 3, (255, 255, 255) )
        
        # Calc best Subset | ALTERNATIVE (GET BOUNDING BOXES FOR ALL THE IMAGES AND THEN RUN SAM AGAIN)
        min_arr = calc_best_subset(mask_all, number_of_mask_final_assamble, 2)
        final = np.zeros((128,128))
        for f, ids in enumerate(min_arr):
            final += mask_all[ids]*(f+1)
        final = np.stack( ( final.copy()*234, final.copy()*345, final.copy()*567 ), 2).astype(np.uint8)  

        print("done"+str(i))
        cv2.imwrite("images/"+str(i)+"/all_centers.jpg", all_centers)
        
        cv2.imwrite("images/"+str(i)+"/final.jpg", final)
        cv2.imwrite("images/"+str(i)+"/without.jpg", frame)
        '''

'\n\n \n\n # save images\n for c in range(len(mask_all)):\n     save = np.stack( ( mask_all[c].copy()*255, mask_all[c].copy()*128, mask_all[c].copy()*56 ), 2).astype(np.uint8)    \n     save = cv2.circle( save, (int(center[c][0]), int(center[c][1])), 3, (255, 255, 255) )\n     cv2.imwrite("images/"+str(i)+"/"+str(c)+".jpg", save)\n \n # save annotation\n for x in range(len(mask_all)):\n     annotation = cv2.circle( annotation, (int(center[x][0]), int(center[x][1])), 3, (255, 255, 255) )\n \n # Calc best Subset | ALTERNATIVE (GET BOUNDING BOXES FOR ALL THE IMAGES AND THEN RUN SAM AGAIN)\n min_arr = calc_best_subset(mask_all, number_of_mask_final_assamble, 2)\n final = np.zeros((128,128))\n for f, ids in enumerate(min_arr):\n     final += mask_all[ids]*(f+1)\n final = np.stack( ( final.copy()*234, final.copy()*345, final.copy()*567 ), 2).astype(np.uint8)  \n\n print("done"+str(i))\n cv2.imwrite("images/"+str(i)+"/all_centers.jpg", all_centers)\n \n cv2.imwrite("images/"+str(i)+"/final.jp

In [7]:

        break



        save = three_dim(regions.copy())  
        cv2.imwrite("images/"+str(i)+"/sobel_segmentation.jpg", save)

        center, color_mean = calc_centers_colormean(regions.copy(), frame.copy())


        all_centers = display_pointers(center.copy(), frame.copy())
        cv2.imwrite("images/"+str(i)+"/all_centers.jpg", all_centers)
        
        break
        
        # fuse and remove pointers
        for n in range(len(center)):
            for m in range(len(center)):
                if n != m and math.sqrt(abs(center[n][0] - center[m][0])**2 + abs(center[n][1] - center[m][1])**2) < distance_for_fusing and abs(color_mean[n]-color_mean[m]) < color_grad_for_fusing:
                    regions += (regions == m)*(n-m)

        # restore counting from 0        
        regions = np.digitize(regions, bins=np.unique(regions))-1
        #print(regions.max())
        save = np.stack( ( regions.copy()*255, regions.copy()*128, regions.copy()*56 ), 2).astype(np.uint8)    
        cv2.imwrite("images/"+str(i)+"/regions.jpg", save)
        #plt.imshow(regions, cmap="prism")


        # recalculated center
        center = []
        color_mean = []
        for k in range(np.unique(regions).shape[0]):
            area = regions == k
            center.append( calc_center(area) )
        

        # Predict Masks for all points (check if better with negative points)
        mask_all = []
        score_all = []
        predictor.set_image(frame.copy())
        for point in center:
            point = np.expand_dims(np.array(point), axis=0)
            mask, score, _  = predictor.predict( point_coords=point, point_labels=[1])
            mask_all.append(mask[0])
            score_all.append(score[0])


        
       

SyntaxError: 'break' outside loop (3361205629.py, line 1)