# Plotting the output from Cumulative event counter

Notebook just for compiling raw listed cell/event counter data as heatmaps only using focal apoptoses that stretch back in time for a certain duration

1. Load apoptoses information
2. Filter apoptoses based on duration and initial appearance
3. Generate new cumulative plot based on those apoptoses
4. Save out/load cumualtive plots
5. Plot all varations of plot
6. Find cell ID of divisions in ROI

In [24]:
import csv, glob, re, os, json, shutil
import numpy as np
from natsort import natsorted
import matplotlib.pyplot as plt
from tqdm import tqdm
import render, dataio
import calculate_radial_analysis as calculate

# Filtering individual apoptoses based on length

Doing this because there is an notable increase in probability 26 hours prior but not every apoptotic mutant will exist 26 hours before

In [25]:
source_dir = '/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/800.800'

In [26]:
apop_IDs = [fn.split('_N_cells')[0] for fn in os.listdir(source_dir) if 'N_cells' in fn]

In [27]:
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/expt_movie_length.json') as json_file:
    movie_time_range = json.load(json_file)

In [28]:
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/scr_apop_dict.json') as json_file:
    apop_dict = json.load(json_file)

# Filter list of focal apoptoses based on length

Only want focal apoptoses that extend back 21 hours i.e. 2160 minutes = 315 frames

*** a more accurate measure is excluding the last bin of 80 frames (800/10) so going back 320 frames ***

So I only want cells that have existed for 315 or greater frames, for this I need to load the tracks of each of the individual apop IDs and check if len(track) > 315 and if t(apop) > 315 (mutually verifiable)

Could refine this criteria later as there will be many that don't extend back this far yet still are >18 or >16 hours (still areas of interest) 

#### Loading apoptotic cell track information

In [None]:
previous_h5 = ''
valid_apops = []
hdf5_root_dir = '/home/nathan/data/kraken/h2b/giulia/'
for ID in tqdm(apop_dict):
    expt = ID.split('_')[0]
    pos = ID.split('_')[1]
    cell_ID = ID.split('_')[2]
    ### load HDF5
    h5_path = os.path.join(hdf5_root_dir, expt, pos, pos+'_aligned', 'HDF/segmented.hdf5')
    if h5_path != previous_h5:
        wt_cells, scr_cells, all_cells = dataio.load_tracking_data(h5_path)
    cell = [cell for cell in scr_cells if cell.ID == -int(cell_ID)][0]
    ### if there is enough time prior to apoptosis and if the track exists for that time period
    if apop_dict[ID] > 320:
        if len(cell) > 320:
            valid_apops.append(ID)
            print(ID, len(cell), apop_dict[ID])

In [None]:
len(valid_apops)

In [None]:
valid_apops

In [None]:
### make dict out of new list of apop_IDs
valid_apop_dict = {}
for ID in valid_apops:
    valid_apop_dict[ID] = apop_dict[ID]

In [None]:
valid_apop_dict

In [None]:
### saving out list of valid apoptoses that stretch back 21 hours
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/21hr_accurate_apop_dict.json', 'w') as file:
    json.dump(valid_apop_dict, file)

In [None]:
### loading valid apop_dict
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/21hr_apop_dict.json', 'r') as file:
    valid_apop_dict = json.load(file)

In [None]:
valid_apop_dict

# Making new cumulative plot out of filtered apop IDs

Create new source directory containing only apop IDs of relevance

In [None]:
### move only apop IDs of interest into new directory and rename with focal time
for fn in natsorted(os.listdir('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/800.800')):
    ID = fn.split('_N_')[0]
    ID = ID.replace('Scr-','') + '_RFP' ### renaming filename apop ID as dict compatible
    if ID in valid_apop_dict:
        new_fn = fn.replace('.', '_focal_t_{}.'.format(str(valid_apop_dict[ID])))
        shutil.copyfile(os.path.join('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/800.800', fn),os.path.join('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/21_hours_accurate', new_fn))

Compile into numpy arrays of N_cells, N_events, P_events

In [None]:
for config in ['800.800.10', '800.800.20']:#['800.800.6', '600.600.8', '600.600.6', '500.500.6' '1000.1000.10', '1000.1000.6']:
    save_parent_dir = '/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/21_hours_accurate'
    radius = int(config.split('.')[0])
    t_range = int(config.split('.')[1])
    num_bins = int(config.split('.')[2])
    ### canon
    raw_files_dir = os.path.join('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/21_hours_accurate')
    N_cells, N_events, P_events = render.cumulative_kymo_compiler(raw_files_dir, radius, t_range, num_bins)
    limit = np.amax(P_events)
    N = int(len(os.listdir(raw_files_dir))/2)

    ### control 
    # raw_files_dir = os.path.join('/home/nathan/data/kraken/h2b/giulia/radial_analysis_output/follow/raw_lists/1600.1600/control_ninety_ten')
    # N_cells_c, N_events_c, P_events_c = render.cumulative_kymo_compiler(raw_files_dir, radius, t_range, num_bins)
    # limit_c = np.amax(P_events_c)
    # N_c = int(len(os.listdir(raw_files_dir))/2)

    ### save out raw arrays for coeff var
    fn = os.path.join(save_parent_dir,'arrays/21_hours_accurate/canon_21_hours_accurate_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius,t_range, num_bins))
    if not os.path.exists(os.path.dirname(fn)):
        os.makedirs(os.path.dirname(fn))
    # fn_c = os.path.join(save_parent_dir, 'arrays/control_ninety_ten_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius,t_range, num_bins))
    # if not os.path.exists(os.path.dirname(fn_c)):
    #     os.makedirs(os.path.dirname(fn_c))
    np.savez(fn, N_cells, N_events, P_events)
    #np.savez(fn_c, N_cells_c, N_events_c, P_events_c)

### Or load from previously compiles .npz stacks of N_cells, N_events, P_events

In [None]:
radius, t_range, num_bins = 800, 800, 10
### load npz
with np.load('/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/arrays/21_hours_accurate/canon_21_hours_accurate_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius, t_range, num_bins)) as data:
    N_cells = data['arr_0']
    N_events = data['arr_1']
    P_events = data['arr_2']
with np.load('/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/arrays/control_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius, t_range, num_bins)) as data:
    N_cells_c = data['arr_0']
    N_events_c = data['arr_1']
    P_events_c = data['arr_2']
N_c = 10491
N = len(valid_apops)# 1839
limit_c = np.amax(P_events_c)
limit = np.amax(P_events)
cbar_lim = tuple((0, max(limit_c, limit)))

In [None]:
save_parent_dir = '/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/plots/21_hours_accurate_{}.{}.{}/'.format(radius, t_range, num_bins)
render.MEGAPLOT(N_cells, N_events, P_events, N_cells_c, N_events_c, P_events_c, N, N_c, limit, limit_c, cbar_lim, radius, t_range, num_bins, save_parent_dir)

# Finding cell IDs of divisions of interest

In [52]:
### criteria
### time would be 400 frames either side in 10 bins, isolating only the penultimate temporal bin ie  
time_crit = (-240, -320) 
## space would be initial spatial bin 800 pixels/10 ie 80 pixels
distance_crit = (0, 80)

In [133]:
from collections import defaultdict
## iterate over individual focal apoptoses lists of N_events and filter for all cells that go through a division at the relevant bin
root_event_dir = '/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/21_hours'
list_of_focal_apops = [fn for fn in os.listdir(root_event_dir) if 'N_events' in fn]
N_div = 0
division_dict = defaultdict(dict)#{}#pd.DataFrame(np)
for N_events_fn in natsorted(list_of_focal_apops):
    expt_ID = N_events_fn.split('_')[0]
    pos_ID = N_events_fn.split('_')[1]
    scr_ID = N_events_fn.split('_')[2]
    wt_ID_time_distance = []
#     division_dict[expt_ID] = {}
#     division_dict[expt_ID][pos_ID] = {}
    
    with open(os.path.join(root_event_dir, N_events_fn), 'r') as file:
        reader = csv.reader(file, delimiter="\n")
        focal_time = int(re.findall(r"focal_t_(\d+)", N_events_fn)[0])
        for row in reader:
            row = eval(row[0]) ## formats each row to tuple format
            wt_ID = row[0]
            distance = row[1]
            time = row[2]
            ## filter criteria
            if focal_time + time_crit[1] < time < focal_time + time_crit[0]:
                if distance_crit[0] < distance < distance_crit[1]:
                    print(N_events_fn)
                    print(wt_ID, focal_time, distance, time)
                    N_div+=1
                    
                    wt_ID_time_distance.append((wt_ID, distance, time))
    if len(wt_ID_time_distance) > 0:
        division_dict[expt_ID+'_'+pos_ID][scr_ID] = wt_ID_time_distance

GV0795_Pos5_Scr-16_N_events_wt_rad_800_t_range_800_focal_t_753.csv
86 753 42.46 443
GV0795_Pos7_Scr-416_N_events_wt_rad_800_t_range_800_focal_t_1010.csv
455 1010 59.81 744
GV0796_Pos3_Scr-1_N_events_wt_rad_800_t_range_800_focal_t_764.csv
849 764 49.6 464
GV0796_Pos3_Scr-1_N_events_wt_rad_800_t_range_800_focal_t_764.csv
877 764 73.42 470
GV0796_Pos3_Scr-45_N_events_wt_rad_800_t_range_800_focal_t_803.csv
1130 803 62.92 502
GV0796_Pos3_Scr-52_N_events_wt_rad_800_t_range_800_focal_t_828.csv
755 828 72.47 518
GV0796_Pos3_Scr-82_N_events_wt_rad_800_t_range_800_focal_t_794.csv
832 794 45.95 498
GV0796_Pos4_Scr-321_N_events_wt_rad_800_t_range_800_focal_t_646.csv
326 646 66.42 399
GV0796_Pos4_Scr-329_N_events_wt_rad_800_t_range_800_focal_t_695.csv
483 695 61.62 453
GV0796_Pos4_Scr-391_N_events_wt_rad_800_t_range_800_focal_t_800.csv
617 800 56.46 523
GV0796_Pos4_Scr-450_N_events_wt_rad_800_t_range_800_focal_t_822.csv
616 822 76.03 543
GV0796_Pos6_Scr-565_N_events_wt_rad_800_t_range_800_focal_t_8

GV0817_Pos23_Scr-425_N_events_wt_rad_800_t_range_800_focal_t_933.csv
689 933 76.11 672
GV0817_Pos23_Scr-590_N_events_wt_rad_800_t_range_800_focal_t_942.csv
980 942 78.03 667
GV0817_Pos23_Scr-590_N_events_wt_rad_800_t_range_800_focal_t_942.csv
1373 942 75.24 696
GV0817_Pos23_Scr-740_N_events_wt_rad_800_t_range_800_focal_t_1150.csv
1111 1150 68.5 854
GV0817_Pos23_Scr-740_N_events_wt_rad_800_t_range_800_focal_t_1150.csv
1234 1150 53.65 853
GV0818_Pos12_Scr-113_N_events_wt_rad_800_t_range_800_focal_t_705.csv
189 705 65.0 418
GV0818_Pos13_Scr-218_N_events_wt_rad_800_t_range_800_focal_t_795.csv
403 795 79.35 486
GV0819_Pos0_Scr-21_N_events_wt_rad_800_t_range_800_focal_t_1053.csv
737 1053 73.37 805
GV0819_Pos0_Scr-38_N_events_wt_rad_800_t_range_800_focal_t_533.csv
108 533 77.79 262
GV0819_Pos0_Scr-53_N_events_wt_rad_800_t_range_800_focal_t_1055.csv
363 1055 71.91 752
GV0819_Pos0_Scr-456_N_events_wt_rad_800_t_range_800_focal_t_1072.csv
819 1072 77.76 788
GV0819_Pos1_Scr-360_N_events_wt_rad_800

In [126]:
N_div

133

In [135]:
division_dict

defaultdict(dict,
            {'GV0795_Pos5': {'Scr-16': [(86, 42.46, 443)]},
             'GV0795_Pos7': {'Scr-416': [(455, 59.81, 744)]},
             'GV0796_Pos3': {'Scr-1': [(849, 49.6, 464), (877, 73.42, 470)],
              'Scr-45': [(1130, 62.92, 502)],
              'Scr-52': [(755, 72.47, 518)],
              'Scr-82': [(832, 45.95, 498)]},
             'GV0796_Pos4': {'Scr-321': [(326, 66.42, 399)],
              'Scr-329': [(483, 61.62, 453)],
              'Scr-391': [(617, 56.46, 523)],
              'Scr-450': [(616, 76.03, 543)]},
             'GV0796_Pos6': {'Scr-565': [(276, 79.57, 565)]},
             'GV0796_Pos7': {'Scr-3': [(564, 70.16, 443)],
              'Scr-41': [(287, 51.13, 309)],
              'Scr-394': [(776, 68.74, 551)],
              'Scr-399': [(493, 65.5, 466), (552, 66.77, 451)]},
             'GV0796_Pos20': {'Scr-72': [(641, 44.93, 612)],
              'Scr-95': [(1232, 72.77, 621)]},
             'GV0796_Pos21': {'Scr-252': [(120, 43.23, 298)],

In [146]:
with open('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/ROIs/filtered_list_wt_divisions_d0-80_t-240--320.json', 'w') as fp:
    json.dump(division_dict, fp)

In [140]:
count2 = 0
for pos in division_dict:
    for scr_ID in division_dict[pos]:
        count2 += len(division_dict[pos][scr_ID])

In [141]:
count2

133