<a href="https://colab.research.google.com/github/samuramirez/cellmigration/blob/master/treatment_dataframe_cell.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Track averaged data

### Mount Google Drive (Colab can see Drive files) and authenticate so that we can interact with GCP via SDK

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import auth
#This allows SDK to see and edit Google Drive files
#SDK is required to interact with GCP
auth.authenticate_user()

### Import libraries

In [None]:
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io
import os
import os.path
from numpy import linalg as LA
import pickle
from os import listdir
from os.path import isfile, join
import re
import csv
from copy import deepcopy
import glob
from scipy import stats
import ntpath


#Get from Drive the folder libraries
!cp -r /content/drive/MyDrive/libraries /content
from libraries.track_functions import *


In [None]:
!pip install heteromotility

###Input (read treatment data) and set output destination

In [None]:
#set destination path
output_path= 'gs://cellmbucket/analysis'

In [None]:
treatment_path = 'cellmbucket/analysis/controls_paths.txt' #@param {type:"string"}


In [None]:
#read treatment locations file
!gsutil cp gs://{treatment_path} ./
treatment_file = ntpath.basename(treatment_path)
treatment_obj = open(treatment_file, "r")
treatment_locs = treatment_obj.readlines()

Copying gs://cellmbucket/analysis/controls_paths.txt...
/ [1 files][  345.0 B/  345.0 B]                                                
Operation completed over 1 objects/345.0 B.                                      


In [None]:
treatment_locs

['controls\n',
 'cellmbucket/analysis_042221/06032020-JR20-control\n',
 'cellmbucket/analysis_pri_050521/03122021-JR20P14-control-sample5\n',
 'cellmbucket/analysis_pri_050521/03122021-JR20P14-control\n',
 'cellmbucket/analysis_sam_042821/031521_control_sample3\n',
 'cellmbucket/analysis_sam_042821/031521_control_sample4\n',
 'cellmbucket/analysis_sam_042821/031521_control_sample5']

###Read data and compute matrix

In [None]:
#Parameters to compute motion metrics
center='approximate-medoid'
pixel_size=2*0.645 #um. factor of 2 because image was rescaled
sampling_t=1 #sampling time (10min)

In [None]:
treatment = treatment_locs[0].strip('\n')
!mkdir {treatment}

In [None]:
treatment

'controls'

In [None]:
#get all files full paths. Some data has the same name but is stored in different folder
file_paths =[]
for i in range(1,len(treatment_locs)):
  if len(treatment_locs[i].strip("\n")) > 0: #if line contains data
    file_paths.append(treatment_locs[i].strip("\n")+'_tracks_shape.pkl')  

In [None]:
file_paths

['cellmbucket/analysis_042221/06032020-JR20-control_tracks_shape.pkl',
 'cellmbucket/analysis_pri_050521/03122021-JR20P14-control-sample5_tracks_shape.pkl',
 'cellmbucket/analysis_pri_050521/03122021-JR20P14-control_tracks_shape.pkl',
 'cellmbucket/analysis_sam_042821/031521_control_sample3_tracks_shape.pkl',
 'cellmbucket/analysis_sam_042821/031521_control_sample4_tracks_shape.pkl',
 'cellmbucket/analysis_sam_042821/031521_control_sample5_tracks_shape.pkl']

In [None]:
#Check features
!gsutil cp gs://{file_paths[0]} ./
tracks,tracksgeo = read_tracks_aut(['./'+ ntpath.basename(file_paths[0])] ,pixel_size,center)
tracksgeo[0].columns

Copying gs://cellmbucket/analysis_042221/06032020-JR20-control_tracks_shape.pkl...
- [1 files][468.8 KiB/468.8 KiB]                                                
Operation completed over 1 objects/468.8 KiB.                                    


Index(['movie', 'frame', 'label', 'area', 'approximate-medoidx',
       'approximate-medoidy', 'gel-region', 'area', 'eccentricity',
       'orientation', 'perimeter', 'solidity', 'major_axis_length',
       'minor_axis_length', 'centroid-0', 'centroid-1', 'median_centroidx',
       'median_centroidy', 'protr_angle', 'mean_protr_angle',
       'protr_norm_area', 'retr_angle', 'mean_retr_angle', 'retr_norm_area',
       'protr_norm_radii', 'mean_protr_norm_radii', 'retr_norm_radii',
       'mean_retr_norm_radii', 'polarity_angle', 'abs-skew'],
      dtype='object')

In [None]:
exclude_cols = pd.Series(['movie','frame','label','gel-region'])
#features to average
colindices= ~tracksgeo[0].columns.isin(exclude_cols)
angles = ['polarity_angle','protr_angle','retr_angle','mean_protr_angle', 'mean_retr_angle']

In [None]:
cells_df = pd.DataFrame()
for file in file_paths:
  #get file
  !gsutil cp gs://{file} ./{treatment}
  tracks,tracksgeo = read_tracks_aut([treatment+'/'+ ntpath.basename(file)] ,pixel_size,center)
  #!rm ./{treatment+'/'+ntpath.basename(file)}
  if len(tracks)==0: print("number of tracks read is 0")
  #tracks=smooth_tracks(tracks,1)
  
  #remove tracks with less than 11 points
  tracks = [track for track in tracks if len(track) > 10 ]
  tracksgeo = [track for track in tracksgeo if len(track) > 10 ]

  #GET TRACKS MOTION METRICS
  stepsizes,turns,meancoskturn,stderrcoskturn, tseries_stats, endpointcells = basic_stats(tracks,pixel_size,sampling_t)
  
  #GET HETEROMOTILITY FEATURES
  #prepare cell coordinates for heteromotility
  object_paths={}
  for i in range(len(tracks)):
    object_paths["cell"+str(i)]= tracks[i].tolist()
  pickle.dump( object_paths, open( "pickled_paths.pkl", "wb" ) )
  #run heteromotility
  !heteromotility --exttrack pickled_paths.pkl ./
  heteromotility_df = pd.read_csv("motility_statistics.csv")
  !rm motility_statistics.csv
  heteromotility_df = heteromotility_df.drop(columns = ["Well/XY", "cell_id"])

  #Each row of the data frame contains average data for a particular cell (track)
  cells = endpointcells

  #ADD TREATMENT 
  cells['treatment'] = treatment
  
  #Concatenate directional motion metrics with heteromotility metrics
  cells = pd.concat([cells.reset_index(drop=True), heteromotility_df.reset_index(drop=True)],1)

  #CONCATENATE MEAN GEOMETRY (for now pixel units)
  
  #make data frames with cell mean and std dev of not excluded shape metrics
  cellsshape = pd.DataFrame()
  cellsshape_std = pd.DataFrame()
  for i in range(len(tracksgeo)):
    #remove columns with duplicate names
    tracksgeo[i] = tracksgeo[i].loc[:,~tracksgeo[i].columns.duplicated()]
    #compute mean over cell
    colindices= ~tracksgeo[i].columns.isin(exclude_cols)
    means = tracksgeo[i].loc[:,colindices].mean().T
    #add gel-region
    means['gel-region'] =tracksgeo[i]['gel-region'].iloc[0]
    #add mean cosine and sine
    mean_cos_angles = np.cos(tracksgeo[i][angles]).mean()
    mean_cos_angles.index = ['cos_' + feature for feature in mean_cos_angles.index  ]
    mean_sin_angles = np.sin(tracksgeo[i][angles]).mean()
    mean_sin_angles.index = ['sin_' + feature for feature in mean_sin_angles.index  ]
    means = pd.concat([means,mean_sin_angles,mean_cos_angles],axis=0)
    cellsshape = cellsshape.append(means, ignore_index=True)
    
    #get variation in shape features (STD)
    cellsshape_std = cellsshape_std.append(tracksgeo[i].loc[:,colindices].std().T, ignore_index=True)
  #set column names of std dev data frame
  cellsshape_std.columns = [feature+'_std' for feature in cellsshape_std.columns  ]
  
  #concatenate track metrics and cell mean shape metrics    
  cells = pd.concat([cells.reset_index(drop=True), cellsshape.reset_index(drop=True), cellsshape_std.reset_index(drop=True)],1)
  
  #concatenate track metrics  std shape    
  #cells = pd.concat([cells.reset_index(drop=True),],1)
  
  #Append to all-cells data frame
  cells_df = cells_df.append(cells , ignore_index=True)  
  
  print('ncells',len(endpointcells['FMI']), endpointcells['FMI'].mean(),endpointcells['FMI'].sem())


Copying gs://cellmbucket/analysis_042221/06032020-JR20-control_tracks_shape.pkl...
/ [1 files][468.8 KiB/468.8 KiB]                                                
Operation completed over 1 objects/468.8 KiB.                                    


  meanpathlength=sum_pathlengths/nlengths
  stddevpathlength = np.sqrt(sum_squared_pathlengths/nlengths - meanpathlength**2 )
  meanfmi=sum_fmis/nlengths
  stddevfmi = np.sqrt(sum_squared_fmis/nlengths - meanfmi**2 )
  meanpmi=sum_pmis/nlengths
  stddevpmi = np.sqrt(sum_squared_pmis/nlengths - meanpmi**2 )
  meanDT=sum_DTs/nlengths
  stddevDT = np.sqrt(sum_squared_DTs/nlengths - meanDT**2 )


  import pandas.util.testing as tm
Wrote  ./ motility_statistics.csv
ncells 50 0.07845932362419085 0.058101729717115115
Copying gs://cellmbucket/analysis_pri_050521/03122021-JR20P14-control-sample5_tracks_shape.pkl...
- [1 files][422.0 KiB/422.0 KiB]                                                
Operation completed over 1 objects/422.0 KiB.                                    
  import pandas.util.testing as tm
Wrote  ./ motility_statistics.csv
ncells 31 0.027580836640117876 0.06797525394493942
Copying gs://cellmbucket/analysis_pri_050521/03122021-JR20P14-control_tracks_shape.pkl...
/ [1 files][690.1 KiB/690.1 KiB]                                                
Operation completed over 1 objects/690.1 KiB.                                    
  import pandas.util.testing as tm
Wrote  ./ motility_statistics.csv
ncells 46 0.006080205330161141 0.051037836775304914
Copying gs://cellmbucket/analysis_sam_042821/031521_control_sample3_tracks_shape.pkl...
- [1 files][212.6 KiB/212.6 KiB]     

In [None]:
#leneachtrack = [len(tracks[i]) for i in range(len(tracks))]
#leneachtrack

In [None]:
cells_df

Unnamed: 0,length,T,speed,D,DoverT,FMI,PMI,angle,treatment,total_distance,net_distance,linearity,spearmanrsq,progressivity,max_speed,min_speed,avg_speed,MSD_slope,hurst_RS,nongauss,disp_var,disp_skew,rw_linearity,rw_netdist,rw_kurtosis01,rw_kurtosis02,rw_kurtosis03,rw_kurtosis04,rw_kurtosis05,rw_kurtosis06,rw_kurtosis07,rw_kurtosis08,rw_kurtosis09,rw_kurtosis10,avg_moving_speed01,avg_moving_speed02,avg_moving_speed03,avg_moving_speed04,avg_moving_speed05,avg_moving_speed06,...,mean_protr_norm_radii_std,mean_retr_angle_std,mean_retr_norm_radii_std,median_centroidx_std,median_centroidy_std,minor_axis_length_std,orientation_std,perimeter_std,polarity_angle_std,protr_angle_std,protr_norm_area_std,protr_norm_radii_std,retr_angle_std,retr_norm_area_std,retr_norm_radii_std,solidity_std,p_rturn_9_5,p_rturn_9_6,p_rturn_10_5,p_rturn_10_6,p_rturn_11_5,p_rturn_11_6,mean_theta_9_5,min_theta_9_5,max_theta_9_5,mean_theta_9_6,min_theta_9_6,max_theta_9_6,mean_theta_10_5,min_theta_10_5,max_theta_10_5,mean_theta_10_6,min_theta_10_6,max_theta_10_6,mean_theta_11_5,min_theta_11_5,max_theta_11_5,mean_theta_11_6,min_theta_11_6,max_theta_11_6
0,14.0,79.315557,6.101197,39.318557,0.495723,-0.374075,-0.325283,-2.425849,controls,79.315557,39.318557,0.661761,0.267502,0.495723,7.535190,1.094601,6.101197,1.276487,0.855765,-0.078281,19.687270,1.201642,0.452238,7.034058,0.254801,-0.402738,-1.630570,-1.500000,-2.000000,-2.000000,-3.000000,-3.000000,-3.000000,-3.000000,3.313885,3.313885,4.014579,4.288159,0.000000,0.000000,...,0.080260,1.757886,0.111328,7.511164,10.330593,20.252348,0.597602,77.469462,1.336664,1.762560,0.211239,0.133956,1.490870,0.149889,0.156845,0.048763,,,,,,,,,,,,,,,,,,,,,,,,
1,11.0,60.919323,6.091932,36.326730,0.596309,0.063527,0.592915,1.464061,controls,60.919323,36.326730,0.008562,0.021510,0.596309,4.902000,1.315547,6.091932,1.594285,0.747602,-0.443827,8.170733,-0.038164,-0.184705,7.753035,-1.450119,-0.882551,-1.500000,-2.000000,-3.000000,-3.000000,-3.000000,-3.000000,-3.000000,,3.632673,3.632673,3.632673,0.000000,0.000000,0.000000,...,0.174829,2.200002,0.105622,11.021467,1.167748,4.504313,0.931308,31.353900,0.029153,1.726570,0.125829,0.281843,1.405883,0.056907,0.210082,0.061244,,,,,,,,,,,,,,,,,,,,,,,,
2,15.0,115.374903,8.241065,105.701312,0.916155,0.838571,-0.368971,-0.414507,controls,115.374903,105.701312,0.953554,0.960997,0.916155,12.169836,2.189203,8.241065,1.696208,0.860559,-0.030765,46.297095,0.959455,0.724705,60.563143,-0.599386,-1.433019,-0.820566,-1.500000,-2.000000,-2.000000,-3.000000,-3.000000,-3.000000,-3.000000,6.919993,6.919993,6.919993,6.919993,7.524933,7.524933,...,0.127621,1.200844,0.164883,11.675473,22.495926,10.808469,0.756467,35.473968,1.927043,2.037653,0.125824,0.185122,1.686477,0.111061,0.310183,0.061014,,,,,,,,,,,,,,,,,,,,,,,,
3,22.0,252.207166,12.009865,80.818268,0.320444,0.132986,-0.291546,-1.142848,controls,252.207166,80.818268,0.086318,0.065948,0.320444,11.859584,1.153811,12.009865,0.557203,0.720637,-0.370731,41.762731,0.323751,-0.128091,1.153836,-1.020057,-1.595857,-1.263875,-1.732684,-1.414633,-1.500000,-2.000000,-2.000000,-2.000000,-2.000000,4.547553,4.547553,4.947923,5.323725,5.936548,6.442724,...,0.126316,1.823935,0.120493,11.134071,17.554615,5.702751,0.664230,66.232763,1.804285,1.891284,0.125933,0.192930,2.024330,0.092558,0.276751,0.110853,,,,,,,,,,,,,,,,,,,,,,,,
4,18.0,101.362675,5.962510,84.354559,0.832205,0.330891,-0.763595,-1.161888,controls,101.362675,84.354559,0.697246,0.939715,0.832205,7.893282,2.080062,5.962510,1.808568,0.915649,-0.285926,11.389721,0.824768,0.453551,48.579497,0.158468,1.186477,-0.425867,-0.952600,-1.500000,-2.000000,-2.000000,-2.000000,-3.000000,-3.000000,6.140330,6.140330,6.140330,6.140330,6.687582,6.687582,...,0.255920,0.986390,0.266770,23.051874,9.763210,11.031572,1.483117,21.075424,2.624500,1.253233,0.037560,0.475937,1.025868,0.048496,0.458975,0.051797,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,30.0,249.003531,8.586329,73.019014,0.293245,0.155419,0.248671,1.012197,controls,249.003531,73.019014,0.370038,0.407045,0.293245,6.946863,1.315547,8.586329,1.248924,0.847818,0.715365,82.842409,1.834603,0.133161,6.509598,2.529002,0.002056,-1.177953,-0.668721,-0.619641,-0.782316,-0.733315,-1.500000,-1.500000,-2.000000,2.817561,3.030874,3.440587,0.000000,0.000000,0.000000,...,0.201735,2.409037,0.190023,17.512795,7.517030,6.762510,0.892772,68.614083,1.307881,2.042481,0.401535,0.259070,2.139452,0.242749,0.301643,0.135022,0.214286,0.230769,0.357143,0.384615,0.285714,0.307692,0.938734,0.202520,1.992825,1.064256,0.037020,2.200271,1.138246,0.007722,2.097852,1.247935,0.160421,2.100500,1.217509,0.322953,1.997583,1.197729,0.089171,2.250510
177,62.0,735.234444,12.053024,135.591212,0.184419,-0.171945,-0.066673,-2.771687,controls,735.234444,135.591212,0.001307,0.022358,0.184419,16.237611,0.516000,12.053024,0.685532,0.852154,-0.190636,55.472831,0.851871,-0.237779,1.374421,0.287124,-1.155889,-1.432428,-1.168153,0.204640,-1.305263,-0.519318,0.424072,-0.346808,0.616920,4.278555,4.993213,5.673139,6.306726,8.349252,9.803724,...,0.218782,1.829764,0.216208,21.424353,26.006255,3.525576,0.911688,32.060028,1.742230,1.881202,0.227120,0.292785,1.851853,0.116507,0.343266,0.087592,0.600000,0.600000,0.500000,0.500000,0.428571,0.428571,0.664545,0.081513,1.426561,0.638806,0.013065,1.487301,0.805314,0.009537,2.088774,0.813812,0.062039,2.012520,0.905981,0.073596,2.229412,0.925610,0.025122,2.096511
178,15.0,132.669191,9.476371,41.999357,0.316572,0.058341,-0.311150,-1.385448,controls,132.669191,41.999357,0.034517,0.053350,0.316572,6.693099,0.729734,9.476371,1.022779,0.431830,-0.234156,36.603878,0.709671,-0.202682,-9.904863,-0.932667,-0.989008,-0.746109,-1.500000,-2.000000,-2.000000,-3.000000,-3.000000,-3.000000,-3.000000,3.288861,3.288861,3.389944,0.000000,0.000000,0.000000,...,0.124232,1.941904,0.238446,10.996536,8.526150,7.203533,0.494627,32.527696,1.257691,1.922160,0.088527,0.216459,1.837983,0.063975,0.300855,0.054784,0.000000,0.500000,0.500000,0.500000,0.500000,0.500000,1.272955,1.272955,1.272955,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
179,49.0,323.496355,6.739507,83.998714,0.259659,0.191409,0.175458,0.741947,controls,323.496355,83.998714,0.370569,0.155243,0.259659,7.513073,0.000000,6.739507,1.316408,0.878094,0.069378,29.161875,1.097605,0.112269,17.280995,1.040489,5.306496,0.300346,0.076857,-0.796350,-0.603085,-0.990992,-0.480545,-0.869998,-1.133246,2.629846,3.729851,4.074609,4.742546,5.493726,0.000000,...,0.177296,1.837121,0.194378,15.634103,18.509765,10.202180,0.878173,164.725855,1.718780,1.726180,0.159994,0.238770,1.855297,0.075562,0.278168,0.100927,0.416667,0.416667,0.363636,0.363636,0.272727,0.272727,0.492767,0.010501,1.148912,0.524893,0.013567,1.079263,0.449224,0.032579,0.970456,0.489080,0.061607,0.914661,0.471120,0.031064,1.146430,0.500026,0.034130,1.076782


###Make csv file and save to GSP

In [None]:
df_file_name = treatment+'_cells_df.csv'

In [None]:
cells_df.to_csv(df_file_name)
#cells_df = pd.read_csv(path+'cell_df.csv',index_col=0)

In [None]:
!gsutil cp {df_file_name} {output_path}

Copying file://controls_cells_df.csv [Content-Type=text/csv]...
-
Operation completed over 1 objects/413.8 KiB.                                    
