# Plotting the output from Cumulative event counter

Notebook just for compiling raw listed cell/event counter data as heatmaps only using focal apoptoses that stretch back in time for a certain duration

In [4]:
import csv, glob, re, os, json, shutil
import numpy as np
from natsort import natsorted
import matplotlib.pyplot as plt
from tqdm import tqdm
import render, dataio
import calculate_radial_analysis as calculate

# Filtering individual apoptoses based on length

Doing this because there is an notable increase in probability 26 hours prior but not every apoptotic mutant will exist 26 hours before

In [15]:
source_dir = '/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/800.800'

In [16]:
apop_IDs = [fn.split('_N_cells')[0] for fn in os.listdir(source_dir) if 'N_cells' in fn]

In [17]:
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/expt_movie_length.json') as json_file:
    movie_time_range = json.load(json_file)

In [18]:
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/scr_apop_dict.json') as json_file:
    apop_dict = json.load(json_file)

# Filter list of focal apoptoses based on length

Only want focal apoptoses that extend back 21 hours i.e. 2160 minutes = 315 frames

*** a more accurate measure is excluding the last bin of 80 frames (800/10) so going back 320 frames ***

So I only want cells that have existed for 315 or greater frames, for this I need to load the tracks of each of the individual apop IDs and check if len(track) > 315 and if t(apop) > 315 (mutually verifiable)

Could refine this criteria later as there will be many that don't extend back this far yet still are >18 or >16 hours (still areas of interest) 

#### Loading apoptotic cell track information

In [None]:
previous_h5 = ''
valid_apops = []
hdf5_root_dir = '/home/nathan/data/kraken/h2b/giulia/'
for ID in tqdm(apop_dict):
    expt = ID.split('_')[0]
    pos = ID.split('_')[1]
    cell_ID = ID.split('_')[2]
    ### load HDF5
    h5_path = os.path.join(hdf5_root_dir, expt, pos, pos+'_aligned', 'HDF/segmented.hdf5')
    if h5_path != previous_h5:
        wt_cells, scr_cells, all_cells = dataio.load_tracking_data(h5_path)
    cell = [cell for cell in scr_cells if cell.ID == -int(cell_ID)][0]
    ### if there is enough time prior to apoptosis and if the track exists for that time period
    if apop_dict[ID] > 320:
        if len(cell) > 320:
            valid_apops.append(ID)
            print(ID, len(cell), apop_dict[ID])

In [None]:
len(valid_apops)

In [None]:
valid_apops

In [None]:
### make dict out of new list of apop_IDs
valid_apop_dict = {}
for ID in valid_apops:
    valid_apop_dict[ID] = apop_dict[ID]

In [None]:
valid_apop_dict

In [None]:
### saving out list of valid apoptoses that stretch back 21 hours
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/21hr_accurate_apop_dict.json', 'w') as file:
    json.dump(valid_apop_dict, file)

In [None]:
### loading valid apop_dict
with open('/home/nathan/data/kraken/h2b/giulia/apoptosis_information/21hr_apop_dict.json', 'r') as file:
    valid_apop_dict = json.load(file)

In [None]:
valid_apop_dict

# Making new cumulative plot out of filtered apop IDs

Create new source directory containing only apop IDs of relevance

In [None]:
### move only apop IDs of interest into new directory and rename with focal time
for fn in natsorted(os.listdir('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/800.800')):
    ID = fn.split('_N_')[0]
    ID = ID.replace('Scr-','') + '_RFP' ### renaming filename apop ID as dict compatible
    if ID in valid_apop_dict:
        new_fn = fn.replace('.', '_focal_t_{}.'.format(str(valid_apop_dict[ID])))
        shutil.copyfile(os.path.join('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/800.800', fn),os.path.join('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/21_hours_accurate', new_fn))

Compile into numpy arrays of N_cells, N_events, P_events

In [None]:
for config in ['800.800.10', '800.800.20']:#['800.800.6', '600.600.8', '600.600.6', '500.500.6' '1000.1000.10', '1000.1000.6']:
    save_parent_dir = '/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/21_hours_accurate'
    radius = int(config.split('.')[0])
    t_range = int(config.split('.')[1])
    num_bins = int(config.split('.')[2])
    ### canon
    raw_files_dir = os.path.join('/home/nathan/data/results/radial_analysis_output/follow/raw_lists/800.800/canon/21_hours_accurate')
    N_cells, N_events, P_events = render.cumulative_kymo_compiler(raw_files_dir, radius, t_range, num_bins)
    limit = np.amax(P_events)
    N = int(len(os.listdir(raw_files_dir))/2)

    ### control 
    # raw_files_dir = os.path.join('/home/nathan/data/kraken/h2b/giulia/radial_analysis_output/follow/raw_lists/1600.1600/control_ninety_ten')
    # N_cells_c, N_events_c, P_events_c = render.cumulative_kymo_compiler(raw_files_dir, radius, t_range, num_bins)
    # limit_c = np.amax(P_events_c)
    # N_c = int(len(os.listdir(raw_files_dir))/2)

    ### save out raw arrays for coeff var
    fn = os.path.join(save_parent_dir,'arrays/21_hours_accurate/canon_21_hours_accurate_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius,t_range, num_bins))
    if not os.path.exists(os.path.dirname(fn)):
        os.makedirs(os.path.dirname(fn))
    # fn_c = os.path.join(save_parent_dir, 'arrays/control_ninety_ten_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius,t_range, num_bins))
    # if not os.path.exists(os.path.dirname(fn_c)):
    #     os.makedirs(os.path.dirname(fn_c))
    np.savez(fn, N_cells, N_events, P_events)
    #np.savez(fn_c, N_cells_c, N_events_c, P_events_c)

### Or load from previously compiles .npz stacks of N_cells, N_events, P_events

In [None]:
radius, t_range, num_bins = 800, 800, 10
### load npz
with np.load('/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/arrays/21_hours_accurate/canon_21_hours_accurate_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius, t_range, num_bins)) as data:
    N_cells = data['arr_0']
    N_events = data['arr_1']
    P_events = data['arr_2']
with np.load('/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/arrays/control_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius, t_range, num_bins)) as data:
    N_cells_c = data['arr_0']
    N_events_c = data['arr_1']
    P_events_c = data['arr_2']
N_c = 10491
N = len(valid_apops)# 1839
limit_c = np.amax(P_events_c)
limit = np.amax(P_events)
cbar_lim = tuple((0, max(limit_c, limit)))

In [None]:
save_parent_dir = '/home/nathan/data/results/radial_analysis_output/follow/cumulative_plots/plots/21_hours_accurate_{}.{}.{}/'.format(radius, t_range, num_bins)
render.MEGAPLOT(N_cells, N_events, P_events, N_cells_c, N_events_c, P_events_c, N, N_c, limit, limit_c, cbar_lim, radius, t_range, num_bins, save_parent_dir)

# Iterating over several different scales with bulk output

In [None]:
for config in ['200.200.10', '400.400.10', '800.800.10', '600.600.20', '800.800.20']:
    radius = int(config.split('.')[0])
    t_range = int(config.split('.')[1])
    num_bins = int(config.split('.')[2])
    print('starting dimensions radius, t_range, num_bins:',radius, t_range, num_bins)
    save_parent_dir = '/home/nathan/data/kraken/h2b/giulia/radial_analysis_output/follow/cumulative_plots/plots/210804_{}.{}.{}'.format(radius, t_range, num_bins)
    ### load npz
    with np.load('/home/nathan/data/kraken/h2b/giulia/radial_analysis_output/follow/cumulative_plots/arrays/canon_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius, t_range, num_bins, radius, t_range, num_bins)) as data:
        N_cells = data['arr_0']
        N_events = data['arr_1']
        P_events = data['arr_2']
    with np.load('/home/nathan/data/kraken/h2b/giulia/radial_analysis_output/follow/cumulative_plots/arrays/control_N_cell_N_event_P_event_{}.{}.{}.npz'.format(radius, t_range, num_bins, radius, t_range, num_bins)) as data:
        N_cells_c = data['arr_0']
        N_events_c = data['arr_1']
        P_events_c = data['arr_2']
    N_c = 10491
    N = 1839
    limit_c = np.amax(P_events_c)
    limit = np.amax(P_events)
    cbar_lim = tuple((0, max(limit_c, limit)))
    render.MEGAPLOT(N_cells, N_events, P_events, N_cells_c, N_events_c, P_events_c, N, N_c, limit, limit_c, cbar_lim, radius, t_range, num_bins, save_parent_dir)