In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from rtseg.utils.param_io import load_params
from pathlib import Path
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage.io import imread
from rtseg.utils.disk_ops import write_files
from rtseg.forkplot import compute_forkplot_stats
import matplotlib.patches as patches
%matplotlib qt5

In [3]:
from rtseg.segmentation import LiveNet, get_live_model, live_segment

  check_for_updates()


In [4]:
params_path = Path("F:/Oscar/EXP-25-CD0120/expt_params.yaml")
params = load_params(params_path, ref_type='expt')

In [5]:
phase_path = Path('F:/Oscar/EXP-25-CD0120/Pos28/phase/phase_0000.tiff')
fluor_path = Path('F:/Oscar/EXP-25-CD0120/Pos28/fluor/fluor_0000.tiff')
phase_img = imread(phase_path).astype('float32')
fluor_img = imread(fluor_path)
datapoint = {
    'phase': phase_img,
    'fluor': fluor_img,
    'position': 28,
    'timepoint': 0,
}

In [6]:
live_net = get_live_model(params)

In [7]:
seg_result = live_segment(datapoint, live_net, params)

In [8]:
plt.figure()
plt.imshow(seg_result['seg_mask'])
plt.show()

In [9]:
plt.figure()
plt.imshow(phase_img)
plt.show()

In [10]:
fig, ax = plt.subplots()
ax.imshow(phase_img, cmap='gray')
for row in seg_result['barcode_locations']:
    rect = patches.Rectangle((row[0], row[1]), row[2] - row[0], row[3] - row[1], linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
plt.show()

### Dot coorindates

In [11]:
from rtseg.dotdetect import compute_dot_coordinates

In [12]:
params.Hardware.device

'cuda:0'

In [13]:
dots= compute_dot_coordinates(seg_result['fluor'], seg_result['seg_mask'], params)

In [14]:
dots

{'raw_coords': array([[ 560.19048373, 1471.04870319],
        [ 805.31760947, 2865.97199774],
        [ 652.55547054, 2865.49196634],
        ...,
        [ 860.72103062,   56.63658438],
        [  15.09939641,   48.28243317],
        [   8.04573452,   46.08915922]]),
 'rotated_coords': array([[1495.95129681,  560.19048373],
        [ 101.02800226,  805.31760947],
        [ 101.50803366,  652.55547054],
        ...,
        [2910.36341562,  860.72103062],
        [2918.71756683,   15.09939641],
        [2920.91084078,    8.04573452]])}

In [15]:
def plot_dots(fluor_image, dots):
    plt.figure()
    plt.imshow(fluor_image)
    plt.plot(dots[:, 1], dots[:, 0], 'ro')
    plt.show()

In [16]:
plot_dots(seg_result['seg_mask'], dots['raw_coords'])

### Computing Internal coordinates to plot

In [17]:
from rtseg.cells.utils import regionprops_custom

In [18]:
trap_locations = seg_result['trap_locations_list']

In [19]:
trap_locations

[65,
 167,
 269,
 371,
 472,
 575,
 677,
 779,
 881,
 983,
 1085,
 1187,
 1290,
 1391,
 1531,
 1633,
 1736,
 1838,
 1940,
 2042,
 2145,
 2247,
 2349,
 2451,
 2554,
 2656,
 2758,
 2861]

In [20]:
import bisect

from rtseg.cells.utils import regionprops_custom, compute_arc_length, compute_projected_points

In [21]:
forkplot_data = compute_forkplot_stats(seg_result['seg_mask'], 
                        dots['rotated_coords'],
                        1,
                        0,
                        seg_result['trap_locations_list'])

In [22]:
len(forkplot_data)

688

In [23]:
forkplot_data

[{'position': 1,
  'timepoint': 0,
  'trap': 27,
  'cell_label': 18,
  'area': 433.0,
  'length': 43.93329508735538,
  'normalization_counts': 1,
  'internal_coord': (18.861033021349154, -1.9246797591669722),
  'normalized_internal_x': 0.42931068529793986,
  'bbox': (2873, 20, 2898, 62),
  'global_coords': (2884.333152615885, 37.727568446733535),
  'local_coords': (11.333152615884956, 17.727568446733535)},
 {'position': 1,
  'timepoint': 0,
  'trap': 24,
  'cell_label': 21,
  'area': 377.0,
  'length': 42.831998113529515,
  'normalization_counts': 1,
  'internal_coord': (29.615902722989183, 1.841558927461486),
  'normalized_internal_x': 0.6914434074378213,
  'bbox': (2587, 41, 2614, 81),
  'global_coords': (2594.0902350411507, 66.74464298954548),
  'local_coords': (7.090235041150663, 25.744642989545483)},
 {'position': 1,
  'timepoint': 0,
  'trap': 10,
  'cell_label': 23,
  'area': 431.0,
  'length': 41.6005986644253,
  'normalization_counts': 1,
  'internal_coord': (32.20835336987599

In [24]:
for thing in forkplot_data:
    if thing['trap'] is None:
        print(thing)

In [43]:
data_for_forks = []

seg_mask = seg_result['seg_mask']
rotated_coords = dots['rotated_coords']
trap_locations = seg_result['trap_locations_list']
trap_width = 40
position = 1
timepoint = 0
traps_per_img = 28

cell_mask_rot = np.rot90(seg_mask).copy()

# calcuates poles, fitcoeff, arc_length for each cell using 
# standard regionprops with some extra calucations
props = regionprops_custom(cell_mask_rot)

# assign dots to cells by label and calculate internal coordinates

# grab the pixel in which dot lies
dot_coords_int = rotated_coords.astype('int') 
x, y = dot_coords_int[:, 0], dot_coords_int[:, 1]


# for each dot we pick the cell label
dot_labels = cell_mask_rot[x, y]

unique_cell_labels, dot_counts = np.unique(dot_labels, return_counts=True)


# figureing out trap number
trap_locations_rot = sorted([cell_mask_rot.shape[0]-loc for loc in trap_locations])

#trap_bins = [(loc-trap_width, loc+trap_width) for loc in trap_locations_rot]
trap_locations_left = [loc-trap_width for loc in trap_locations_rot]
trap_locations_right = [loc+trap_width for loc in trap_locations_rot]
# iterate over unique cell labels, grab and compute appropriate things
for i, single_cell_label in enumerate(unique_cell_labels, 0):
    if single_cell_label != 0: # 0 is for background label
        trap_no = None
        dot_idxs = np.where(dot_labels == single_cell_label)[0]
        # used as normalization 
        dots_per_cell = dot_counts[i]
        # grab the props of cell by indexing into the cell props array
        cell_prop = props[single_cell_label-1]
        left_index = bisect.bisect_left(trap_locations_left, cell_prop.centroid[0])
        right_index = bisect.bisect_right(trap_locations_right, cell_prop.centroid[0])
        #print(cell_prop.centroid[0], left_index, right_index)
        if left_index - 1 == right_index:
            trap_no = traps_per_img-right_index-1

        fit_coeff = cell_prop.fit_coeff
        poles = cell_prop.poles
        #img = cell_prop.image
        #img_size = img.shape
        bbox = cell_prop.bbox
        arc_length = cell_prop.arc_length[0]
        #x_data = np.arange(-0.5, img_size[1]+0.5)
        #y_data = fit_coeff[0] * x_data**2 + fit_coeff[1] * x_data + fit_coeff[2]

        # for all dots inside the cell, compute internal coordinates

        #plt.figure()
        #plt.imshow(img)
        #plt.plot(x_data, y_data, 'r--')
        #plt.plot(poles[:, 0], poles[:, 1], '*')
        for dot_idx in dot_idxs:
            dot_x, dot_y = rotated_coords[dot_idx]
            local_x, local_y = dot_x - bbox[0], dot_y - bbox[1]
            projected_point, internal_y = compute_projected_points(fit_coeff, np.array([[local_y, local_x]]))
            distance_to_pole_along_arc = compute_arc_length(fit_coeff, poles[0, 0], projected_point[0, 0])

            #plt.plot([local_y, projected_point[0, 0]], [local_x, projected_point[0, 1]], 'b--')
            #plt.plot(local_y, local_x, 'go')
            #plt.plot(projected_point[0, 0], projected_point[0, 1], 'b*')
            #print(distance_to_pole_along_arc[0], arc_length)
            if trap_no is not None:
                dot_datapoint = {'position': position,
                                'timepoint': timepoint,
                                'trap': trap_no,
                                'cell_label': single_cell_label,
                                'area': cell_prop.area,
                                'length': arc_length,
                                'normalization_counts': dots_per_cell,
                                'internal_coord': (distance_to_pole_along_arc[0], internal_y[0]),
                                'normalized_internal_x': distance_to_pole_along_arc[0]/arc_length,
                                'bbox': bbox,
                                'global_coords': (dot_x, dot_y),
                                'local_coords': (local_x, local_y),
                                }
                data_for_forks.append(dot_datapoint)



2884.3718244803695 28 27
2599.5172413793102 25 24
1130.830626450116 11 10
615.8110151187905 6 5
1880.2778864970646 18 17
1333.1221804511279 13 12
1436.1809145129225 14 13
2084.316427783903 20 19
2899.5723684210525 28 27
410.537037037037 4 3
309.3060686015831 3 2
2187.006396588486 21 20
2493.04786545925 24 23
2391.210588235294 23 22
1128.6234096692112 11 10
1677.108061749571 16 15
207.72422360248447 2 1
1881.5488505747126 18 17
1435.9546391752576 14 13
1332.173076923077 13 12
2901.333976833977 28 27
1779.2339449541284 17 16
616.1188222923239 6 5
2595.2082878953106 25 24
1230.5650793650793 12 11
2289.3205849268843 22 21
1677.0083160083161 16 15
2085.1396648044692 20 19
411.95394736842104 4 3
2899.967359050445 28 27
1434.0812720848057 14 13
2493.347174163783 24 23
1026.2666666666667 10 9
1128.330177514793 11 10
2697.2914438502676 26 25
2187.277591973244 21 20
1880.9913916786227 18 17
1982.8534107402031 19 18
719.7744807121662 7 6
1779.2957317073171 17 16
514.6374172185431 5 4
310.31662591

In [41]:
print(trap_locations_left)
print(trap_locations_right)

[2863, 2761, 2659, 2557, 2456, 2353, 2251, 2149, 2047, 1945, 1843, 1741, 1638, 1537, 1397, 1295, 1192, 1090, 988, 886, 783, 681, 579, 477, 374, 272, 170, 67]
[2943, 2841, 2739, 2637, 2536, 2433, 2331, 2229, 2127, 2025, 1923, 1821, 1718, 1617, 1477, 1375, 1272, 1170, 1068, 966, 863, 761, 659, 557, 454, 352, 250, 147]


In [37]:
trap_locations_rot

[2903,
 2801,
 2699,
 2597,
 2496,
 2393,
 2291,
 2189,
 2087,
 1985,
 1883,
 1781,
 1678,
 1577,
 1437,
 1335,
 1232,
 1130,
 1028,
 926,
 823,
 721,
 619,
 517,
 414,
 312,
 210,
 107]

In [36]:
cell_mask_rot.shape

(2968, 1152)

In [35]:
2968-2861

107

In [29]:
len(data_for_forks)

0

In [27]:
data_for_forks

[{'position': 1,
  'timepoint': 0,
  'trap': 0,
  'cell_label': 18,
  'area': 433.0,
  'length': 43.93329508735538,
  'normalization_counts': 1,
  'internal_coord': (18.861033021349154, -1.9246797591669722),
  'normalized_internal_x': 0.42931068529793986,
  'bbox': (2873, 20, 2898, 62),
  'global_coords': (2884.333152615885, 37.727568446733535),
  'local_coords': (11.333152615884956, 17.727568446733535)},
 {'position': 1,
  'timepoint': 0,
  'trap': 0,
  'cell_label': 30,
  'area': 456.0,
  'length': 35.01260612406755,
  'normalization_counts': 1,
  'internal_coord': (23.156108480112003, 1.0715287482258944),
  'normalized_internal_x': 0.6613648923492893,
  'bbox': (2893, 75, 2907, 113),
  'global_coords': (2898.770818448837, 100.67230292926888),
  'local_coords': (5.7708184488369625, 25.67230292926888)},
 {'position': 1,
  'timepoint': 0,
  'trap': 24,
  'cell_label': 32,
  'area': 810.0,
  'length': 67.43819004481509,
  'normalization_counts': 2,
  'internal_coord': (15.62498947913933

In [29]:
seg_mask.shape

(1152, 2968)

In [None]:
ce

In [30]:
trap_locations_rot = [seg_mask.shape[1]-loc for loc in trap_locations]

In [34]:
plt.figure()
plt.imshow(cell_mask_rot)
plt.plot(rotated_coords[:, 1], rotated_coords[:, 0], 'ro')
for i in range(len(trap_locations_rot)):
    plt.axhline(y=trap_locations_rot[i], color='b', linestyle='--')
plt.show()

In [59]:
trap_locations_left = [loc-trap_width for loc in trap_locations]
trap_locations_right = [loc+trap_width for loc in trap_locations]

trap_bins = [(loc-trap_width, loc+trap_width) for loc in trap_locations]

In [24]:
trap_locations

[61,
 164,
 266,
 367,
 469,
 571,
 673,
 775,
 877,
 980,
 1082,
 1184,
 1286,
 1388,
 1528,
 1630,
 1732,
 1834,
 1937,
 2039,
 2141,
 2243,
 2346,
 2448,
 2550,
 2653,
 2755,
 2858]

In [69]:
for bin_no, bin_boundaries in enumerate(trap_bins):
    print(bin_no, bin_boundaries)
    if cell_prop.centroid[0] >= bin_boundaries[0] and cell_prop.centroid[0] <=bin_boundaries[1]:
        trap_no = bin_no
        print('---->', bin_no, bin_boundaries)

0 (21, 101)
1 (124, 204)
2 (226, 306)
3 (327, 407)
4 (429, 509)
5 (531, 611)
6 (633, 713)
7 (735, 815)
8 (837, 917)
9 (940, 1020)
10 (1042, 1122)
11 (1144, 1224)
12 (1246, 1326)
13 (1348, 1428)
14 (1488, 1568)
15 (1590, 1670)
16 (1692, 1772)
17 (1794, 1874)
18 (1897, 1977)
19 (1999, 2079)
20 (2101, 2181)
21 (2203, 2283)
22 (2306, 2386)
23 (2408, 2488)
24 (2510, 2590)
25 (2613, 2693)
26 (2715, 2795)
27 (2818, 2898)


In [74]:
plt.figure()
plt.imshow(cell_prop.image)
plt.show()

In [73]:
cell_prop.label

28

In [68]:
cell_prop.centroid[0]

2085.1662971175165

In [66]:
bin_boundaries

(2818, 2898)

In [65]:
trap_no

In [64]:
cell_prop.centroid

(2085.1662971175165, 67.59423503325942)

In [63]:
trap_bins

[(21, 101),
 (124, 204),
 (226, 306),
 (327, 407),
 (429, 509),
 (531, 611),
 (633, 713),
 (735, 815),
 (837, 917),
 (940, 1020),
 (1042, 1122),
 (1144, 1224),
 (1246, 1326),
 (1348, 1428),
 (1488, 1568),
 (1590, 1670),
 (1692, 1772),
 (1794, 1874),
 (1897, 1977),
 (1999, 2079),
 (2101, 2181),
 (2203, 2283),
 (2306, 2386),
 (2408, 2488),
 (2510, 2590),
 (2613, 2693),
 (2715, 2795),
 (2818, 2898)]

In [62]:
trap_no

In [45]:
left_index = bisect.bisect_left(trap_locations_left, cell_prop.centroid[0])
right_index = bisect.bisect_right(trap_locations_right, cell_prop.centroid[0])

In [57]:
left_index

20

In [49]:
print(trap_locations_left)
print(trap_locations_right)

[21, 124, 226, 327, 429, 531, 633, 735, 837, 940, 1042, 1144, 1246, 1348, 1488, 1590, 1692, 1794, 1897, 1999, 2101, 2203, 2306, 2408, 2510, 2613, 2715, 2818]
[101, 204, 306, 407, 509, 611, 713, 815, 917, 1020, 1122, 1224, 1326, 1428, 1568, 1670, 1772, 1874, 1977, 2079, 2181, 2283, 2386, 2488, 2590, 2693, 2795, 2898]


In [58]:
trap_locations_right[right_index]

2181

In [56]:
print(trap_locations)

[61, 164, 266, 367, 469, 571, 673, 775, 877, 980, 1082, 1184, 1286, 1388, 1528, 1630, 1732, 1834, 1937, 2039, 2141, 2243, 2346, 2448, 2550, 2653, 2755, 2858]


In [53]:
len(trap_locations_left)

28

In [54]:
len(trap_locations_right)

28

In [50]:
left_index

20

In [51]:
right_index

20

In [None]:
trap_locations_left[

In [52]:
trap_locations_right[right_index]

2181

In [37]:
traps_

{'position': 28,
 'timepoint': 0,
 'trap': None,
 'cell_label': 28,
 'area': 451.0,
 'length': 41.59378295396199,
 'normalization_counts': 1,
 'internal_coord': (20.411424052422667, -0.4373333536165703),
 'normalized_internal_x': 0.4907325711396585,
 'bbox': (2074, 49, 2097, 88),
 'global_coords': (2085.4053466843, 66.40175703759682),
 'local_coords': (11.405346684300184, 17.401757037596823)}

In [26]:
forkplot_data

[{'position': 28,
  'timepoint': 0,
  'trap': None,
  'cell_label': 28,
  'area': 451.0,
  'length': 41.59378295396199,
  'normalization_counts': 1,
  'internal_coord': (20.411424052422667, -0.4373333536165703),
  'normalized_internal_x': 0.4907325711396585,
  'bbox': (2074, 49, 2097, 88),
  'global_coords': (2085.4053466843, 66.40175703759682),
  'local_coords': (11.405346684300184, 17.401757037596823)},
 {'position': 28,
  'timepoint': 0,
  'trap': None,
  'cell_label': 32,
  'area': 607.0,
  'length': 56.591092611776446,
  'normalization_counts': 2,
  'internal_coord': (37.74137357754037, 3.658890985499942),
  'normalized_internal_x': 0.6669136755576001,
  'bbox': (2892, 55, 2911, 113),
  'global_coords': (2899.656625141563, 93.73839184597962),
  'local_coords': (7.656625141562927, 38.73839184597962)},
 {'position': 28,
  'timepoint': 0,
  'trap': None,
  'cell_label': 32,
  'area': 607.0,
  'length': 56.591092611776446,
  'normalization_counts': 2,
  'internal_coord': (31.757999312

In [25]:
write_files({
    'position': 28,
    'timepoint': 0,
    'fork_data': forkplot_data
}, 'forkplot_data', params)


Writing failed due to unsupported data type when reading CSV: null when reading CSV for data {'position': 28, 'timepoint': 0, 'fork_data': [{'position': 28, 'timepoint': 0, 'trap': None, 'cell_label': 28, 'area': 451.0, 'length': 41.59378295396199, 'normalization_counts': 1, 'internal_coord': (20.411424052422667, -0.4373333536165703), 'normalized_internal_x': 0.4907325711396585, 'bbox': (2074, 49, 2097, 88), 'global_coords': (2085.4053466843, 66.40175703759682), 'local_coords': (11.405346684300184, 17.401757037596823)}, {'position': 28, 'timepoint': 0, 'trap': None, 'cell_label': 32, 'area': 607.0, 'length': 56.591092611776446, 'normalization_counts': 2, 'internal_coord': (37.74137357754037, 3.658890985499942), 'normalized_internal_x': 0.6669136755576001, 'bbox': (2892, 55, 2911, 113), 'global_coords': (2899.656625141563, 93.73839184597962), 'local_coords': (7.656625141562927, 38.73839184597962)}, {'position': 28, 'timepoint': 0, 'trap': None, 'cell_label': 32, 'area': 607.0, 'length':

In [23]:
len(forkplot_data)

667

In [19]:
fork_stats = compute_forkplot_stats(seg_result['seg_mask'], dots['rotated_coords'], trap_locations=trap_locations)

In [20]:
def transform_to_polars(stats):
    keys = stats.keys()

In [21]:
stats = fork_stats

In [22]:
stats[0].keys()

dict_keys(['position', 'timepoint', 'trap', 'cell_label', 'area', 'length', 'normalization_counts', 'internal_coord', 'normalized_internal_x', 'bbox', 'global_coords', 'local_coords'])

In [23]:
d = {key: [] for key in stats[0].keys()}

In [24]:
for i in range(len(stats)):
    for key, value in stats[i].items():
        d[key].append(value)

In [25]:
import polars as pl

In [26]:
df = pl.DataFrame(d)

In [27]:
df.schema

Schema([('position', Int64),
        ('timepoint', Int64),
        ('trap', Int64),
        ('cell_label', Int32),
        ('area', Float64),
        ('length', Float64),
        ('normalization_counts', Int64),
        ('internal_coord', List(Float64)),
        ('normalized_internal_x', Float64),
        ('bbox', List(Int64)),
        ('global_coords', List(Float64)),
        ('local_coords', List(Float64))])

In [28]:
df.shape

(901, 12)

In [29]:
import glob

In [30]:
glob.glob("/mnt/sda1/REALTIME/test_runs/Pos[0-9]*/forks.parquet")

['/mnt/sda1/REALTIME/test_runs/Pos15/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos5/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos29/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos2/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos3/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos10/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos21/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos17/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos26/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos8/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos11/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos24/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos23/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos20/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos22/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos6/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos13/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos12/forks.parquet',
 '/mnt/sda1/REALTIME/test_runs/Pos16/forks.parquet'

In [31]:
path = Path("/mnt/sda1/REALTIME/test_runs/Pos[0-999]/forks.parquet")


In [67]:
# Define the columns you want to extract
columns_to_extract = ['area', 'length', 'normalized_internal_x', 'normalization_counts']

# Read the Parquet files and extract specified columns
#df = pl.read_parquet(glob.glob("/mnt/sda1/REALTIME/test_runs/Pos[0-9]*/forks.parquet"), columns=columns_to_extract)
df = pl.read_parquet(glob.glob("/mnt/sda1/REALTIME/test_runs/Pos[0-9]*/forks.parquet"))
# Display the resulting DataFrame
print(df)

shape: (567_630, 12)
┌──────────┬───────────┬──────┬────────────┬───┬────────────┬────────────┬────────────┬────────────┐
│ position ┆ timepoint ┆ trap ┆ cell_label ┆ … ┆ normalized ┆ bbox       ┆ global_coo ┆ local_coor │
│ ---      ┆ ---       ┆ ---  ┆ ---        ┆   ┆ _internal_ ┆ ---        ┆ rds        ┆ ds         │
│ i64      ┆ i64       ┆ i64  ┆ i32        ┆   ┆ x          ┆ list[i64]  ┆ ---        ┆ ---        │
│          ┆           ┆      ┆            ┆   ┆ ---        ┆            ┆ list[f64]  ┆ list[f64]  │
│          ┆           ┆      ┆            ┆   ┆ f64        ┆            ┆            ┆            │
╞══════════╪═══════════╪══════╪════════════╪═══╪════════════╪════════════╪════════════╪════════════╡
│ 15       ┆ 0         ┆ 0    ┆ 20         ┆ … ┆ 0.183908   ┆ [2898,     ┆ [2903.0426 ┆ [5.042699, │
│          ┆           ┆      ┆            ┆   ┆            ┆ 231, …     ┆ 99, 245.74 ┆ 14.743775] │
│          ┆           ┆      ┆            ┆   ┆            ┆ 309]    

In [68]:
df.schema

Schema([('position', Int64),
        ('timepoint', Int64),
        ('trap', Int64),
        ('cell_label', Int32),
        ('area', Float64),
        ('length', Float64),
        ('normalization_counts', Int64),
        ('internal_coord', List(Float64)),
        ('normalized_internal_x', Float64),
        ('bbox', List(Int64)),
        ('global_coords', List(Float64)),
        ('local_coords', List(Float64))])

### Read pandas and convert to parquet

In [3]:
import pandas as pd
from pathlib import Path
import polars as pl
import ast
import glob

# Define a custom converter function
def safe_literal_eval(value):
    try:
        return ast.literal_eval(value)
    except (SyntaxError, ValueError):
        return None  # or any other default value

In [4]:
save_dir = Path("F:Oscar/EXP-25-CD0117/live_run/")

In [10]:
fork_filenames = glob.glob(str(save_dir / Path('Pos[0-9]*/forks.parquet')))
columns_to_extract = ['area', 'length', 'normalized_internal_x', 'normalization_counts']
data = pl.read_parquet(fork_filenames, use_pyarrow=True, columns=columns_to_extract)


In [11]:
data.shape

(9591797, 4)

In [7]:

forks_filenames = [position_dir / Path('forks.csv') for position_dir in list(save_dir.glob('Pos*'))]
schema = {
        'position':pl.Int64,
        'timepoint':pl.Int64,
        'trap' : pl.Int64,
        'cell_label': pl.Int32,
        'area' : pl.Float64,
        'length' : pl.Float64,
        'normalization_counts' :pl.Int64,
        'internal_coord': pl.List(pl.Float64),
        'normalized_internal_x': pl.Float64,
        'bbox' : pl.List(pl.Int64),
        'global_coords' : pl.List(pl.Float64),
        'local_coords' : pl.List(pl.Float64)
}
for filename in forks_filenames:
    dataframes = pd.read_csv(filename, converters= {'internal_coord': safe_literal_eval,
                                                             'bbox': safe_literal_eval,
                                                             'global_coords': safe_literal_eval,
                                                             'local_coords': safe_literal_eval})
    df2 = pl.from_pandas(dataframes, schema_overrides=schema)
    write_path = filename.parent / Path('forks.parquet')
    df2.write_parquet(write_path, use_pyarrow=True, partition_by='trap')
    print(write_path, df2.shape)

F:Oscar\EXP-25-CD0117\live_run\Pos1\forks.parquet (347576, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos10\forks.parquet (314135, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos11\forks.parquet (301068, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos12\forks.parquet (333604, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos13\forks.parquet (332025, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos14\forks.parquet (299641, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos15\forks.parquet (321389, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos16\forks.parquet (326609, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos17\forks.parquet (318428, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos18\forks.parquet (314377, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos19\forks.parquet (342689, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos2\forks.parquet (289667, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos20\forks.parquet (336885, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos21\forks.parquet (308606, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos22\forks.parquet (322492, 12)
F:Oscar\EXP-25-CD0117\live_run\Pos23\forks

In [None]:
s

In [12]:
write_path

WindowsPath('F:Oscar/EXP-25-CD0117/live_run/Pos1/forks.parquet')

In [6]:
filename

WindowsPath('F:Oscar/EXP-25-CD0117/live_run/Pos1/forks.csv')

In [5]:
filename.parent

WindowsPath('F:Oscar/EXP-25-CD0117/live_run/Pos1')

In [6]:
df2.write_parquet(write_path, use_pyarrow=True, partition_by="trap")

PanicException: called `Result::unwrap()` on an `Err` value: ()

In [64]:
schema = {
        'position':pl.Int64,
        'timepoint':pl.Int64,
        'trap' : pl.Int64,
        'cell_label': pl.Int32,
        'area' : pl.Float64,
        'length' : pl.Float64,
        'normalization_counts' :pl.Int64,
        'internal_coord': pl.List(pl.Float64),
        'normalized_internal_x': pl.Float64,
        'bbox' : pl.List(pl.Int64),
        'global_coords' : pl.List(pl.Float64),
        'local_coords' : pl.List(pl.Float64)
}

In [69]:
df2 = pl.from_pandas(data, schema_overrides=schema)

In [72]:
df2.schema

Schema([('position', Int64),
        ('timepoint', Int64),
        ('trap', Int64),
        ('cell_label', Int32),
        ('area', Float64),
        ('length', Float64),
        ('normalization_counts', Int64),
        ('internal_coord', List(Float64)),
        ('normalized_internal_x', Float64),
        ('bbox', List(Int64)),
        ('global_coords', List(Float64)),
        ('local_coords', List(Float64))])

In [73]:
df.schema

Schema([('position', Int64),
        ('timepoint', Int64),
        ('trap', Int64),
        ('cell_label', Int32),
        ('area', Float64),
        ('length', Float64),
        ('normalization_counts', Int64),
        ('internal_coord', List(Float64)),
        ('normalized_internal_x', Float64),
        ('bbox', List(Int64)),
        ('global_coords', List(Float64)),
        ('local_coords', List(Float64))])

#### Testing reading parquets

In [67]:
import polars as pl

In [74]:
df = pl.read_parquet('/mnt/sda1/REALTIME/test_runs/Pos15/forks.parquet/')

In [75]:
df

position,timepoint,trap,cell_label,area,length,normalization_counts,internal_coord,normalized_internal_x,bbox,global_coords,local_coords
i64,i64,i64,i32,f64,f64,i64,list[f64],f64,list[i64],list[f64],list[f64]
15,0,0,20,969.0,77.473008,4,"[14.247897, 2.203242]",0.183908,"[2898, 231, … 309]","[2903.042699, 245.743775]","[5.042699, 14.743775]"
15,0,0,20,969.0,77.473008,4,"[5.598788, 3.350511]",0.072268,"[2898, 231, … 309]","[2902.018189, 236.998875]","[4.018189, 5.998875]"
15,0,0,20,969.0,77.473008,4,"[57.219483, -1.177507]",0.738573,"[2898, 231, … 309]","[2909.522023, 288.395867]","[11.522023, 57.395867]"
15,0,0,20,969.0,77.473008,4,"[52.055878, -2.792716]",0.671923,"[2898, 231, … 309]","[2910.430389, 283.094935]","[12.430389, 52.094935]"
15,0,0,55,808.0,72.518836,2,"[16.03578, -3.551992]",0.221126,"[2901, 312, … 385]","[2910.29358, 328.283664]","[9.29358, 16.283664]"
…,…,…,…,…,…,…,…,…,…,…,…
15,13,9,285,502.0,45.683252,2,"[34.396858, -3.359979]",0.752942,"[1999, 936, … 984]","[2012.054529, 970.844598]","[13.054529, 34.844598]"
15,13,9,298,1063.0,91.301096,2,"[29.361403, -1.199828]",0.321589,"[2000, 983, … 1077]","[2009.051945, 1012.519718]","[9.051945, 29.519718]"
15,13,9,298,1063.0,91.301096,2,"[47.641851, -6.248417]",0.52181,"[2000, 983, … 1077]","[2015.490588, 1030.496327]","[15.490588, 47.496327]"
15,13,9,328,1081.0,91.109822,1,"[70.097024, -0.988265]",0.769368,"[2003, 1073, … 1172]","[2012.958244, 1148.462952]","[9.958244, 75.462952]"


In [35]:
import pandas as pd

In [36]:
fork_stats_table = pd.DataFrame(fork_stats)

In [37]:
fork_stats_table

Unnamed: 0,position,timepoint,trap,cell_label,area,length,normalization_counts,internal_coord,normalized_internal_x,bbox,global_coords,local_coords
0,0,0,4,15,368.0,30.025607,1,"(10.331315843264065, -2.273029129056189)",0.344083,"(2490, 227, 2511, 258)","(2502.3362740292982, 237.55769754397429)","(12.336274029298238, 10.557697543974285)"
1,0,0,8,16,596.0,55.741157,4,"(48.513692552829426, 4.7893035480254476)",0.870339,"(2082, 228, 2104, 286)","(2083.5672611944983, 277.55680462224456)","(1.5672611944983146, 49.556804622244556)"
2,0,0,8,16,596.0,55.741157,4,"(38.77562401109707, 3.410713575993412)",0.695637,"(2082, 228, 2104, 286)","(2085.3359853121174, 267.1814565483476)","(3.335985312117373, 39.181456548347626)"
3,0,0,8,16,596.0,55.741157,4,"(25.49408365838375, 1.4085934599614878)",0.457366,"(2082, 228, 2104, 286)","(2090.4882352941177, 254.24359861591697)","(8.488235294117658, 26.243598615916966)"
4,0,0,8,16,596.0,55.741157,4,"(17.678084451145736, -2.1620871565639836)",0.317146,"(2082, 228, 2104, 286)","(2096.785154092566, 248.4542140726831)","(14.78515409256579, 20.4542140726831)"
...,...,...,...,...,...,...,...,...,...,...,...,...
896,0,0,23,355,582.0,47.668006,1,"(24.969438280041942, 0.11646185491855153)",0.523820,"(535, 1198, 552, 1247)","(541.9235198311655, 1222.8753698988369)","(6.9235198311655495, 24.87536989883688)"
897,0,0,19,356,371.0,37.364992,2,"(18.515859724714527, 0.49341423287447705)",0.495540,"(944, 1204, 963, 1241)","(952.9429480044522, 1221.7819049133407)","(8.942948004452205, 17.781904913340668)"
898,0,0,19,356,371.0,37.364992,2,"(28.673333550869668, 2.5895484369136805)",0.767385,"(944, 1204, 963, 1241)","(954.3035876889455, 1232.0016390457113)","(10.303587688945527, 28.001639045711272)"
899,0,0,25,357,455.0,44.946226,2,"(38.345571980881836, 3.637052744213372)",0.853143,"(331, 1215, 350, 1259)","(340.2880712321592, 1252.842084588189)","(9.288071232159211, 37.84208458818898)"


In [38]:
len(fork_stats_table)

901

In [39]:
fork_stats_table.to_csv('forks.csv', mode='a', index=False, header=False)

In [40]:
df = pd.read_csv('forks.csv')

In [41]:
len(df)

3077

In [58]:
df.iloc[1025]

position                              position
timepoint                            timepoint
trap                                      trap
cell_label                          cell_label
area                                      area
length                                  length
normalization_counts      normalization_counts
internal_coord                  internal_coord
normalized_internal_x    normalized_internal_x
bbox                                      bbox
global_coords                    global_coords
local_coords                      local_coords
Name: 1025, dtype: object

In [54]:
trap_locations_left = [loc-40 for loc in trap_locations]
trap_locations_right = [loc+40 for loc in trap_locations]

In [58]:
print(trap_locations_left)
print(trap_locations_right)

[76, 178, 280, 382, 484, 588, 688, 790, 893, 995, 1097, 1199, 1300, 1404, 1543, 1646, 1747, 1851, 1956, 2055, 2156, 2259, 2364, 2466, 2566, 2668, 2771, 2873]
[156, 258, 360, 462, 564, 668, 768, 870, 973, 1075, 1177, 1279, 1380, 1484, 1623, 1726, 1827, 1931, 2036, 2135, 2236, 2339, 2444, 2546, 2646, 2748, 2851, 2953]


In [51]:
props[0].centroid

(2933.2462686567164, 167.36567164179104)

In [52]:
import bisect

In [61]:
for prop in props:
    left_index = bisect.bisect_left(trap_locations_left, prop.centroid[0])
    right_index = bisect.bisect_right(trap_locations_right, prop.centroid[0])
    print(prop.centroid[0], trap_locations_left[left_index-1], trap_locations_right[right_index], left_index-1, right_index)
    if left_index - 1 == right_index:
        prop.trap = right_index
    else:
        prop.trap = None
    prop.left_index = left_index
    prop.right_index = right_index

2933.2462686567164 2873 2953 27 27
2063.690476190476 2055 2135 19 19
2722.548672566372 2668 2748 25 25
1464.5013698630137 1404 1484 13 13
2207.8502673796793 2156 2236 20 20
1414.361963190184 1404 1484 13 13
2705.904593639576 2668 2748 25 25
1902.078313253012 1851 1931 17 17
1991.3779193205944 1956 2036 18 18
1600.5 1543 1623 14 14
2398.1688311688313 2364 2444 22 22
1347.2350119904077 1300 1380 12 12
2599.437984496124 2566 2646 24 24
800.2452830188679 790 870 7 7
2498.6603260869565 2466 2546 23 23
2091.6593959731545 2055 2135 19 19
1235.9347826086957 1199 1279 11 11
2804.16765578635 2771 2851 26 26
1149.392857142857 1097 1177 10 10
2906.8090815273476 2873 2953 27 27
1036.440281030445 995 1075 9 9
2190.878306878307 2156 2236 20 20
430.7031630170316 382 462 3 3
1680.3676470588234 1646 1726 15 15
1589.2236842105262 1543 1623 14 14
523.6628477905074 484 564 4 4
2295.34179357022 2259 2339 21 21
626.2301458670988 588 668 5 5
2599.326530612245 2566 2646 24 24
1887.576 1851 1931 17 17
1782.9785

In [56]:
props[0].left_index

28

In [57]:
props[0].right_index

27