<a href="https://colab.research.google.com/github/samuramirez/cellmigration/blob/master/LoopGetMasksOfTrackedCellsOnly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Mount Google Drive (Colab can see Drive files) and authenticate so that we can interact with GCP via SDK

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from google.colab import auth
#This allows SDK to see and edit Google Drive files
#SDK is required to interact with GCP
auth.authenticate_user()

# Add cell shape features to tracks

### Load required libraries 

In [3]:
import numpy as np
from ipywidgets import interact, widgets, Layout
import matplotlib.pyplot as plt
from matplotlib import cm
from skimage.io import imread, imsave, imshow
from skimage import data, filters, measure, morphology
from skimage.exposure import rescale_intensity
from skimage.transform import rescale, resize, downscale_local_mean, rotate
from skimage.measure import  regionprops, regionprops_table
import pandas as pd
import ntpath
import os
import pickle
import copy
import re
import glob
from scipy.stats import skew
from google.cloud import storage


#Get from Drive the folder libraries
!cp -r /content/drive/MyDrive/libraries/centers.py /content
from centers import get_centers

###Specify names for bucket and analysis data folder to read data

In [4]:
#@markdown Input required to copy data from bucket to colab (tracks, images and masks)

#@markdown bucketName is the GCP bucket where data is stored
bucketName = 'cellmbucket' #@param {type:"string"}
#@markdown analysis_folder is the name of the base folder in the bucket where the trial folders are found (same name as output_path in segmentationQC_trackingQC notebook excluding '/trial#' part)
analysis_folder='analysis_eliz_05042021'  #@param {type:"string"}

#bucketName = os.environ.get('cellmbucket')
os.environ['GCLOUD_PROJECT'] = bucketName

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucketName)

In [5]:
#Go through all files in bucketName/analysis_folder and find ones that contain the name "_labeledmasks.zip"
#and use these files to get the output_path and experiment names
#Note that output_path same folder where the files with "_labeledmasks.zip" are located and experiment is the portion of the file name before "_labeledmasks.zip"
for blob in bucket.list_blobs(prefix=analysis_folder):
  if '_labeledmasks.zip' in blob.name:
    output_path = bucketName + '/' + os.path.dirname(blob.name)
    experiment = os.path.basename(blob.name).replace('_labeledmasks.zip', '')

    #Get labeled masks and tracks from GCP bucket

    !gsutil cp gs://{output_path}/{experiment}_labeledmasks.zip /content
    !gsutil cp gs://{output_path}/{experiment}_tracks_shape.pkl /content

    !unzip {experiment}_labeledmasks.zip

    masks_folder = experiment+'_labeledmasks'
    trackedmasks_folder = experiment+'_trackedmasks'

    !mkdir {trackedmasks_folder}

    with open(experiment+'_tracks_shape.pkl', 'rb') as handle:
                tracks = pickle.load(handle, encoding='latin1')

    #Recombine dataframes of saved tracks. Original format is a list of dataframes where each dataframe
    #is a separate track and it contains the track movie, frame number, and label number used
    #in labeledmasks.zip and restructures it so that we are left with a list of dataframes where each
    #dataframe contains all the tracks present in each frame for each movie

    #This is accomplished by appending the dataframe of one track to the list singletr_movie and comparing the movie number (track_init) of each dataframe in "tracks" with the movie number of the next dataframe (track_next)
    #If track_init == track_next we append the dataframe corresponding to track_next to the list multitr_movie (this means there are multiple tracks for this movie)
    #Otherwise we appned the first dataframe in singletr_movie to multitr_movie and append multitr_movie to tracks_by_movie, the final list of dataframes we want
    #We append the first dataframe of singletr_movie to avoid repeats since if track_init==track_next for a movie there will be the same dataframe in both lists

    #where final list of dataframes will be stored (where each dataframe contains all tracks present in each frame for each movie)
    tracks_by_movie = []
    #list where will temporarily store dataframes while going through tracks
    singletr_movie = []
    #list where we will temporarily store dataframes if there are multiple tracks for each movie 
    multitr_movie = []
    for i in range(len(tracks)-1):
      track_init = int(tracks[i]['movie'].unique())
      singletr_movie.append(tracks[i])
      track_next = int(tracks[i+1]['movie'].unique())
      if track_init == track_next:
        multitr_movie.append(tracks[i+1])
      else:
        multitr_movie.append(singletr_movie[0])
        tracks_by_movie.append(multitr_movie)
        singletr_movie = []
        multitr_movie = []

    #The structure of tracks_by_movie  is now a list of dataframes where each element of this list reperesents each movie, however if there is more than one track for a movie there are 2 or more separate dataframes for that movie that
    #correspond to each track. We want to concatenate these dataframes so that we have one dataframe for each movie that contains all the tracks for that movie
    for i in range(len(tracks_by_movie)):
      tracks_by_movie[i] = pd.concat(tracks_by_movie[i], ignore_index=True)

    #function that returns the mask of a cell. It takes the cell label in the mask, and the movie
    #as implicit parameters determined by the track information (as used later).
    def get_cell(iframe):
      mask=get_mask(iframe)
      #get all labels for all tracked cells for each frame
      labels_for_frame = tracks_by_movie[itrack].loc[tracks_by_movie[itrack]['frame'] == iframe, 'label'].array  
      #erase all objects with different label as tracked cells
      mask[np.isin(mask, labels_for_frame, invert=True) ] =0
      #set cell positions as 1
      mask[mask > 0 ] = 1

      return mask

    #function to read a mask corresponding to a given movie (implicit parameter), and frame (explicit parameter iframe)
    get_mask = lambda iframe: imread( masks_folder +'/'+ basename+ '_s' + str(int(movie)) +'_t' + str(int(iframe)) + '.TIF')

    #Here we make a copy of tracks_by_movie and name it tracks 
    #We loop through each element (where each element is a dataframe that has labels for all the tracks for each movie) and go through each frame movie and save images of masks for all cells that are tracked in each frame
    tracks = copy.copy(tracks_by_movie)

    for itrack in range(len(tracks)):
      movie = tracks[itrack]['movie'].iloc[0]
      frame = tracks[itrack]['frame'].unique()
      masks=os.listdir(masks_folder)
      #implicit argument to read cells
      basename=re.findall(r"(.+)_s",masks[0])[0] 
      for iframe in frame:
        #get scikit defined cell metrics
        cell = get_cell(iframe)
        imsave(trackedmasks_folder + '/' + basename + "_s" + str(int(movie))+'_t'+str(int(iframe))+'.TIF' , cell)

    #zip masks of tracked cells
    !zip -r {trackedmasks_folder}.zip {trackedmasks_folder}

    #transfer data to GCP bucket
    !gsutil cp {trackedmasks_folder}.zip gs://{output_path}

    #remove labeled masks folder in local folder
    !rm -r {masks_folder}    
    #remove labeled tracked masks folder in local folder 
    !rm -r {trackedmasks_folder}

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s20_t150.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s16_t11.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s4_t54.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s1_t66.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s25_t155.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s22_t49.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s31_t60.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s3_t155.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s17_t162.TIF  
  inflating: 05042021_ARPC2_KO_sample6_labeledmasks/05042021_Sample6_w1DIC_s24_t99.TIF  
  inflating: 05042021_ARPC2_KO_sample6_label



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s7_t124.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s29_t73.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s7_t56.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s15_t111.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s20_t46.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s27_t152.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s23_t14.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s24_t107.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s11_t60.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labeledmasks/05042021_Sample7_w1DIC_s10_t77.TIF  
  inflating: 05042021_ARPC2_KO_sample7_labe



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s7_t9.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s4_t89.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s24_t35.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s28_t96.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s25_t15.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s27_t123.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s11_t34.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s4_t5.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_trackedmasks/05042021_Sample7_w1DIC_s21_t61.TIF (deflated 100%)
  adding: 05042021_ARPC2_KO_sample7_tra



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s18_t9.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s46_t33.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s16_t20.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s24_t28.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s7_t6.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s27_t131.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s39_t148.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s26_t97.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s8_t98.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeledmasks/05112021_Sample3_w1DIC_s12_t173.TIF  
  inflating: 05112021_ARPC2_KO_sample3_labeled



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s18_t63.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s4_t92.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s30_t17.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s27_t177.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s6_t131.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s11_t86.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s16_t117.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s11_t146.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s1_t88.TIF  
  inflating: 05112021_ARPC2_KO_sample4_labeledmasks/05112021_Sample4_w1DIC_s15_t85.TIF  
  inflating: 05112021_ARPC2_KO_sample4_label



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s22_t27.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s11_t126.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s5_t17.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s26_t154.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s15_t21.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s3_t47.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s20_t44.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s1_t16.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_trackedmasks/05112021_Sample4_w1DIC_s19_t4.TIF (deflated 100%)
  adding: 05112021_ARPC2_KO_sample4_t



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s3_t12.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s20_t129.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s17_t47.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s23_t23.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s5_t118.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s13_t4.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s25_t178.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s8_t55.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s18_t95.TIF  
  inflating: 060622021_ARPC2_KO_sample3_labeledmasks/060622021_Sample3_w1DIC_s5_t39.TIF  
  inflating: 060622021_ARPC



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s17_t65.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s5_t102.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s21_t53.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s18_t63.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s29_t122.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s22_t163.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s32_t114.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s14_t152.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s24_t62.TIF  
  inflating: 060622021_ARPC2_KO_sample4_labeledmasks/060622021_Sample4_w1DIC_s28_t172.TIF  
  inflating: 0606220



  adding: 060622021_ARPC2_KO_sample4_trackedmasks/ (stored 0%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s12_t148.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s1_t142.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s13_t61.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s27_t157.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s22_t172.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s27_t146.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s21_t42.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s29_t88.TIF (deflated 100%)
  adding: 060622021_ARPC2_KO_sample4_trackedmasks/060622021_Sample4_w1DIC_s7_t57.TIF (deflated 100%)
  adding: 060622