In [1]:
import os
import numpy as np
from skimage import io, measure
from skimage.measure import regionprops
import glob
from ultralytics import YOLO

import torch
import napari
import skimage as ski
import pandas as pd
import plotly.express as px

In [2]:
viewer = napari.Viewer()

In [3]:
current_dir = 'S:/micro/nro/fx2482/lem/smc/'

In [4]:
variant = '36'

In [5]:
model = YOLO('runs/detect/train'+variant+'/weights/best.pt')  # load a custom model

In [6]:
def pad_to_window(img, window=10000):
    height, width = img.shape[0:2]
    new_img = np.pad(img, ((0, window-height), (0, window-width), (0,0)), mode='constant', constant_values=255)
    return new_img
def window_images(img, window=10000):
    shape = img.shape[0:2]
    y_comb = []
    for y in range(0, shape[0], window):
        x_comb = []
        for x in range(0, shape[1], window):
            x_comb.append(pad_to_window(img[y:y+window, x:x+window]))
        y_comb.append(x_comb)
    return np.array(y_comb)

def de_window(img):
    rtn_img = np.concatenate([np.concatenate([x for x in simg], axis=1) for simg in img], axis=0)
    return rtn_img
def process_file(fname, display=False, viewer=None, confidence_cutoff=0.5):
    img = ski.io.imread(fname)
    windowed = window_images(img)
    confs = []
    rtn_boxes = []
    classes = []
    for y,yimg in enumerate(windowed):
        for x,ximg in enumerate(yimg):
            height = 10000
            width = 10000
            results = model(ximg)
            for box in results[0].boxes:
                xyxy = np.reshape(box.xyxy[0].cpu().numpy(), [-1,2])[:,::-1]
                xyxy = xyxy + np.array([y*10000, x*10000])[np.newaxis,:]
                rtn_boxes.append(xyxy)
                classes.append(box.cls.cpu().numpy()[0])
                confs.append(box.conf[0].cpu().numpy())
    classes = np.array(classes)
    rtn_boxes = np.array(rtn_boxes)
    confs = np.array(confs)

    classes = classes[confs > confidence_cutoff]
    rtn_boxes = rtn_boxes[confs > confidence_cutoff]
    confs = confs[confs > confidence_cutoff]
    rtn_img = de_window(windowed)

    if display:
        viewer.layers.clear()
        viewer.add_image(rtn_img, channel_axis=-1, colormap=['red', 'green', 'blue'])
        viewer.add_shapes(rtn_boxes[classes==0], shape_type='rectangle', edge_color='red', face_color='red', opacity=0.5, name='T0')
        viewer.add_shapes(rtn_boxes[classes==1], shape_type='rectangle', edge_color='blue', face_color='blue', opacity=0.5, name='T1')
        viewer.add_shapes(rtn_boxes[classes==2], shape_type='rectangle', edge_color='green', face_color='green', opacity=0.5, name='T2')
        viewer.add_shapes(rtn_boxes[classes==3], shape_type='rectangle', edge_color='yellow', face_color='yellow', opacity=0.5, name='T3')

    return rtn_img, rtn_boxes, classes, confs



In [7]:
df = pd.read_csv(current_dir + '20240923_'+variant+'.csv')
df['class'] = df['class'].astype(str)

In [9]:
df['Batch'] = df['file'].str.split('/').str[-1].str.split('\\').str[0]
df['Tray'] = df['file'].str.split('\\').str[-1].str.split('_').str[0].astype(int)
df['Slide'] = df['file'].str.split('\\').str[-1].str.split('_').str[1].astype(int)
df['Object'] = df['file'].str.split('\\').str[-1].str.split('_').str[2].str[0:2].astype(int)

In [13]:
current_dir

'S:/micro/nro/fx2482/lem/smc/'

In [14]:
mapper = pd.read_csv(current_dir+'20240923_map.csv', encoding='latin1')
#mapper['Slide'] = mapper['slide'].str.split('Slide').str[-1].astype(int)

In [16]:
mapper['File'].str.split('/').str[-1]
mapper['Batch'] = mapper['File'].str.split('/').str[-1].str.split('\\').str[0]
mapper['Tray'] = mapper['File'].str.split('\\').str[-1].str.split('_').str[0].astype(int)
mapper['Slide'] = mapper['File'].str.split('\\').str[-1].str.split('_').str[1].astype(int)


In [17]:
mapper

Unnamed: 0,File,Population,Replicate,Note,Batch,Tray,Slide
0,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,20240923_OSS_IMARE-131379,1,1
1,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,20240923_OSS_IMARE-131379,1,2
2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,20240923_OSS_IMARE-131379,1,3
3,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,20240923_OSS_IMARE-131379,1,4
4,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,20240923_OSS_IMARE-131379,1,5
...,...,...,...,...,...,...,...
130,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,20240924_OSS_IMARE-131458,1,31
131,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,20240924_OSS_IMARE-131458,1,32
132,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,20240924_OSS_IMARE-131458,1,33
133,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,20240924_OSS_IMARE-131458,1,34


In [18]:
df = df.merge(mapper, on=['Batch', 'Tray', 'Slide'], how='left')
df['Genotype'] = df['Population']


In [19]:
df.to_csv('S:/micro/nro/fx2482/lem/smc/csv_files/20240923.csv')

In [20]:
df

Unnamed: 0.1,Unnamed: 0,file,class,conf,area,Batch,Tray,Slide,Object,File,Population,Replicate,Note,Genotype
0,0,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,1.0,0.987643,219154.126740,20240923_OSS_IMARE-131379,1,1,2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,Surface fish
1,1,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,1.0,0.973777,291006.631946,20240923_OSS_IMARE-131379,1,1,2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,Surface fish
2,2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,1.0,0.968041,193132.932323,20240923_OSS_IMARE-131379,1,1,2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,Surface fish
3,3,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,1.0,0.966571,294020.109546,20240923_OSS_IMARE-131379,1,1,2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,Surface fish
4,4,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,1.0,0.960379,371603.020057,20240923_OSS_IMARE-131379,1,1,2,S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131...,Surface fish,1,Surface_3.240.3_523dpf,Surface fish
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103215,150,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,0.0,0.529548,61996.399114,20240924_OSS_IMARE-131458,1,35,5,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,Molino
103216,151,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,0.0,0.528184,5376.689469,20240924_OSS_IMARE-131458,1,35,5,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,Molino
103217,152,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,0.0,0.527561,9996.139427,20240924_OSS_IMARE-131458,1,35,5,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,Molino
103218,153,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,1.0,0.524360,95862.348933,20240924_OSS_IMARE-131458,1,35,5,S:/micro/nro/fx2482/lem/20240924_OSS_IMARE-131...,Molino,3,Molino_ 5.66.0_614dpf,Molino


In [48]:
agged = df.groupby(['Replicate', 'Genotype', 'class']).agg({'conf': 'count', 'area':'median'}).reset_index().rename(columns={'conf': 'Count'})
agged_sum = agged.groupby(['Replicate', 'Genotype']).agg({'Count': 'sum'}).reset_index().rename(columns={'Count': 'Total'})
agged = pd.merge(agged, agged_sum, on=['Replicate', 'Genotype'])
agged['Fraction'] = agged['Count'] / agged['Total']

In [49]:
# Put in a filter for getting rid of the images that are garbage (usually have less than 20 found objects)

agged = agged[agged['Total'] > 20]

In [52]:
color_map = {
    '0.0': 'red',
    '1.0': 'blue',
    '3.0': 'yellow',
    # Add more classes and colors as needed
}

category_orders = {'class':['0.0', '1.0', '3.0'], 'Genotype':['Surface fish', 'Pach—n']}
category_orders = {'class':['0.0', '1.0', '3.0'], 'Replicate':[1,2,3]}


In [53]:
df['Replicate'].unique()

array([1, 2, 3], dtype=int64)

In [54]:
f = px.bar(agged, x='Replicate', facet_row='Genotype', y='Fraction', color='class', color_discrete_map=color_map, category_orders=category_orders, height=800, width=400)
f.write_html(current_dir + '20240923_Fractions_'+variant+'.html')
f.write_image(current_dir + '20240923_Fractions_'+variant+'.png')
f

In [55]:
f = px.box(df, x='Replicate', facet_row='Genotype', y='area', facet_col='class', color='class', color_discrete_map=color_map, category_orders=category_orders, width=800)
f.write_html(current_dir + '20240923_Areas_'+variant+'.html')
f.write_image(current_dir + '20240923_Areas_'+variant+'.png')
f

In [56]:
df.shape

(103220, 14)

# Group by file

In [57]:
agged = df.groupby(['Batch', 'Tray', 'Slide', 'file', 'class', 'Genotype', 'Replicate']).agg({'conf': 'count'}).reset_index()
sums = agged.groupby(['Batch', 'Tray', 'Slide', 'file', 'Genotype', 'Replicate']).agg({'conf': 'sum'}).reset_index().rename(columns={'conf': 'Total'})
agged = pd.merge(agged, sums, on=['Batch', 'Tray', 'Slide', 'file', 'Genotype', 'Replicate'])


In [58]:
# Put in a filter for getting rid of the images that are garbage (usually have less than 20 found objects)

agged = agged[agged['Total'] > 20]

In [59]:
agged.to_csv(current_dir + '20240923_Aggregated.csv')

In [60]:
import plotly.graph_objects as go

agged['f'] = agged['file'].str.split('\\').str[-1]


f=go.FigureWidget(
    px.bar(agged, x='f', y='conf', color='class', height=800, hover_data=['file'], color_discrete_map=color_map, category_orders=category_orders, width=1600, facet_col='Replicate', facet_row='Genotype')
    )

def click_fn(trace, points, state):
    
    if (len(points.point_inds)>0):
        idx = f.data[points.trace_index]['customdata'][points.point_inds[-1]][0]
        print(idx)
        process_file(idx, display=True, viewer=viewer, confidence_cutoff=0.05)

for a in f.data:
    a.on_click(click_fn)
f.write_html(current_dir + '20240923_Counts_'+variant+'.html')
f.write_image(current_dir + '20240923_Counts_'+variant+'.png')
f

FigureWidget({
    'data': [{'alignmentgroup': 'True',
              'customdata': array([['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_01_02.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_01_03.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_01_04.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_01_05.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_01_06.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_02_02.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_02_03.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_02_04.tif'],
                                   ['S:/micro/nro/fx2482/lem/20240923_OSS_IMARE-131379\\01_02_05.tif'],
         

In [42]:
f_agged = df.groupby(['Group', 'Genotype', 'class', 'Batch', 'Tray', 'Slide']).agg({'conf': 'count', 'area':'median'}).reset_index().rename(columns={'conf': 'Count'})
f_agged_sum = f_agged.groupby(['Group', 'Genotype', 'Batch', 'Tray', 'Slide']).agg({'Count': 'sum'}).reset_index().rename(columns={'Count': 'Total'})
f_agged = pd.merge(f_agged, f_agged_sum, on=['Group', 'Genotype', 'Batch', 'Tray', 'Slide'])
f_agged['Fraction'] = f_agged['Count'] / f_agged['Total']

In [43]:
px.box(f_agged[f_agged['Group'].isin(['control', 'starvation'])], facet_row='Group', y='Fraction', color='class', x='Genotype', category_orders=category_orders, color_discrete_map=color_map, width=800, height=800, points='all')

In [44]:
px.box(f_agged[f_agged['Group'].isin(['control', 'starvation'])], x='Group', y='Fraction', color='Batch', facet_col='class', facet_row='Genotype', category_orders=category_orders, color_discrete_map=color_map, width=1400, height=800, points='all')

In [45]:
tdf = df.copy()
tdf['BatchTraySlide'] = tdf['Batch'].astype(str) + '_' + tdf['Tray'].astype(str) + '_' + tdf['Slide'].astype(str)
print(len(df['file'].unique()), len(tdf['BatchTraySlide'].unique()))

1570 690


In [46]:
# Do a Mann-Whitney U for class 0 to class 0 in surface vs pachon starvation and control

In [47]:
f_agged[(f_agged['Genotype']=='Surface fish') & (f_agged['Group']=='control') & (f_agged['class']=='0.0')]['Batch'].unique()

array(['20240712_OSS_IMARE-129759', '20240715_OSS_IMARE-129791', '20240716_OSS_IMARE-129813_2', '20240717_OSS_IMARE-129848'], dtype=object)

In [48]:
f_agged[(f_agged['Genotype']=='Surface fish') & (f_agged['Group']=='starvation') & (f_agged['class']=='0.0')]['Batch'].unique()

array(['20240722_OSS_IMARE-129920', '20240726_OSS_IMARE-130092'], dtype=object)

In [49]:
f_agged[(f_agged['Genotype']=='Pach—n') & (f_agged['Group']=='control') & (f_agged['class']=='0.0')]['Batch'].unique()

array(['20240712_OSS_IMARE-129759', '20240715_OSS_IMARE-129791', '20240716_OSS_IMARE-129813_2', '20240717_OSS_IMARE-129848'], dtype=object)

In [50]:
f_agged[(f_agged['Genotype']=='Pach—n') & (f_agged['Group']=='starvation') & (f_agged['class']=='0.0')]['Batch'].unique()

array(['20240722_OSS_IMARE-129920', '20240726_OSS_IMARE-130092', '20240729_OSS_IMARE-130129', '20240729_OSS_IMARE-130129-2'], dtype=object)

In [51]:
import scipy.stats as stats
genotypeA, genotypeB = f_agged['Genotype'].unique()
groups = ['control', 'starvation']
classes = ['0.0', '1.0', '3.0']
results = []
for group in groups:
    for c in classes:
        a = f_agged[(f_agged['Group'] == group) & (f_agged['Genotype'] == genotypeA) & (f_agged['class'] == c)]['Fraction']
        b = f_agged[(f_agged['Group'] == group) & (f_agged['Genotype'] == genotypeB) & (f_agged['class'] == c)]['Fraction']
        stat, p = stats.mannwhitneyu(a, b)
        results.append({'Group': group, 'Class': c, 'Stat': stat, 'P': p})

# Batch agged

In [52]:
b_agged = df.groupby(['Group', 'Genotype', 'class', 'Batch',]).agg({'conf': 'count', 'area':'median'}).reset_index().rename(columns={'conf': 'Count'})
b_agged_sum = b_agged.groupby(['Group', 'Genotype', 'Batch',]).agg({'Count': 'sum'}).reset_index().rename(columns={'Count': 'Total'})
b_agged = pd.merge(b_agged, b_agged_sum, on=['Group', 'Genotype', 'Batch',])
b_agged['Fraction'] = b_agged['Count'] / b_agged['Total']

In [53]:
px.box(b_agged[b_agged['Group'].isin(['control', 'starvation'])], facet_row='Group', y='Fraction', color='class', x='Genotype', category_orders=category_orders, color_discrete_map=color_map, width=800, height=800, points='all')

In [54]:
px.box(b_agged[b_agged['Group'].isin(['control', 'starvation'])], facet_row='Group', y='Fraction', x='class', color='Genotype', category_orders=category_orders, color_discrete_map=color_map, width=800, height=800, points='all')

In [55]:
import scipy.stats as stats
genotypeA, genotypeB = f_agged['Genotype'].unique()
groups = ['control', 'starvation']
classes = ['0.0', '1.0', '3.0']
results = []
for group in groups:
    for c in classes:
        a = b_agged[(b_agged['Group'] == group) & (b_agged['Genotype'] == genotypeA) & (b_agged['class'] == c)]['Fraction']
        b = b_agged[(b_agged['Group'] == group) & (b_agged['Genotype'] == genotypeB) & (b_agged['class'] == c)]['Fraction']
        stat, p = stats.mannwhitneyu(a, b)
        results.append({'Group': group, 'Class': c, 'Stat': stat, 'P': p})

In [56]:
pd.DataFrame(results)

Unnamed: 0,Group,Class,Stat,P
0,control,0.0,1.0,0.057143
1,control,1.0,11.0,0.485714
2,control,3.0,14.0,0.114286
3,starvation,0.0,0.0,0.133333
4,starvation,1.0,8.0,0.133333
5,starvation,3.0,8.0,0.133333
