In [19]:
############################################################################# 
# Calculate percentage of spots detected by separate smFISH probes           
# Josh Titlow- November 30, 2018                                            
#                                                                            
# Requires 2 folders with output files from FISHquant analysis              
# ******NOTE****** need to delete last row of file SPOTS_END                
#      -files should have the same name                                     
#      -passed as command line arguments                                    
#      -ch1_indir                                                               
#      -ch2_indir                                                           
#                                                                           
# Requires threshold distance (in nm) for assigning co-detection            
#      -passed as command line argument                                         
#      -threshold                                                           
#      -300nm is reasonable                                                 
#    
# Cell below has code for co-detection with output from 3D object counter   
#        
# TODO                                                                     
# add output from                                                               
#############################################################################

import pandas as pd
import numpy as np
from scipy import spatial
import argparse
import sys
import os



# specify the FQ centroid files and co-detection distance threshold in nm
indir = '/Users/joshtitlow/tmp/smFISH_data/codetect/'
infiles = os.listdir(indir)



#threshold_range = [50,100,150,200,250,300,350,400,450,500] 
threshold_range = [300,500]

# create list to store data
filename = []
co_detect = []
ch1_spots = []
ch2_spots = []
threshold = []

for t in threshold_range:
    print "calculating co-detection percentage for threshold =", t, "nm"
    # loop through files
    for i in infiles:
        if i.startswith('C2'):
            if i.endswith('spots.txt'):
                ref_file = os.path.join(indir,i)
                #print "processing", i, "for threshold =", t
                ref_file = pd.read_csv(ref_file, sep='\t', header=18)
                #ref_file = ref_file[~ref_file.Pos_Y.str.contains("SPOTS_END")]
                targ_file = os.path.join(indir, 'C4'+i[2:])
                print targ_file
                targ_file = pd.read_csv(targ_file, sep='\t', header=18)
    
                # get centroid coordinates
                xpos_ref = ref_file['Pos_X']
                ypos_ref = ref_file['Pos_Y']
                zpos_ref = ref_file['Pos_Z']

                xpos_targ = targ_file['Pos_X']
                ypos_targ = targ_file['Pos_Y']
                zpos_targ = targ_file['Pos_Z']

                # convert data into a numpy array
                target_df = np.column_stack((xpos_targ,ypos_targ,zpos_targ))
                #target_df = [target_df.Pos_Y.str.contains("SPOTS_END") == False]

                # create list to store amplitude of co-detected spot, ref/targ ratio, and target distance
                targ_amp = []
                targ_dist = []

                for index, row in ref_file.iterrows():

                    # get 3D position from X,Y,Z position columns
                    pt  = float(row['Pos_X']), float(row['Pos_Y']), float(row['Pos_Z'])
                    # find nearest neighbor and calculate distance
                    distance,index = spatial.KDTree(target_df).query(pt)

                    # add nearest neighbor amp and dist to a list
                    targ_amp.append(targ_file['AMP'].iloc[index])
                    targ_dist.append(distance)

                # add lists to ref_file
                ref_file['target_amp'] = targ_amp
                ref_file['r_t_ratio'] = ref_file['AMP'].div(targ_amp)
                ref_file['targ_dist'] = targ_dist

                # calculate co-detection percentage and add it to list
                codetect = 100 * (float(len(ref_file[ref_file.targ_dist < float(t)])))/(float(len(ref_file.index)))
                print 'codetection %: ', codetect
                co_detect.append(codetect)

                # calculate number of spots and add to list, with filename
                ch1_spots.append(len(ref_file))
                ch2_spots.append(len(targ_file))
                filename.append(i)
                threshold.append(t)

                # write ref_file to csv
                #ref_file.to_csv('test_list.csv', index=False)

# add data to dataframe and save
df = pd.DataFrame({'filename':filename, 'threshold':threshold, 'co_detect':co_detect, 'ch1_spots':ch1_spots, 'ch2_spots':ch2_spots})
df = df[['filename', 'threshold', 'co_detect', 'ch1_spots', 'ch2_spots']]
df.to_csv('codetection_stats.csv', index=False)



calculating co-detection percentage for threshold = 300 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
codetection %:  32.5057885259
calculating co-detection percentage for threshold = 500 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
codetection %:  35.4000514536


In [20]:
ref_file.head()

Unnamed: 0,Pos_Y,Pos_X,Pos_Z,AMP,BGD,RES,SigmaX,SigmaY,SigmaZ,Cent_Y,...,Z_max,INT_raw,INT_filt,SC_det,SC_det_norm,TH_det,TH_fit,target_amp,r_t_ratio,targ_dist
0,28679.0,553.786,795.715,705.514,235.696,7290000.0,128.641,128.641,267.305,411.703,...,8,707,356,62.8662,0.204366,1,1,838.251,0.84165,1944.226627
1,8069.33,220.951,732.682,639.272,329.783,12800000.0,104.248,104.248,231.448,413.026,...,7,419,116,55.3208,0.179837,1,1,442.287,1.445378,772.972198
2,17031.8,787.283,577.949,621.738,292.825,10200000.0,108.3,108.3,280.09,426.837,...,7,633,281,48.6026,0.157997,1,1,669.885,0.928126,1616.109299
3,6710.65,989.219,549.029,769.896,308.886,11200000.0,137.896,137.896,256.777,416.086,...,7,550,193,75.0649,0.244021,1,1,442.287,1.740716,841.396493
4,24057.8,1064.88,595.793,570.515,224.158,5990000.0,113.622,113.622,255.768,422.065,...,7,599,266,43.5273,0.141499,1,1,682.649,0.835737,1456.840216


In [6]:
############################################################################# 
# Calculate nearest neighbor distance between mRNA and granules          
# Josh Titlow- July 11, 2019                                            
#                                                                            
# Requires results from 3D object counter              
# Requires centroid file ('_FISH-QUANT_all_spots_yymmdd.csv') from a FQ bash run                                                      
#                                                                           
# Calculates % of spots within a specified range of distances 
#        
# TODO                                                                     
# -fix plotting                                                             
#############################################################################

import pandas as pd
import numpy as np
from scipy import spatial
import argparse
import sys
import os



# specify directory of files from 3D object counter
indir = '/Users/joshtitlow/tmp/eif4e_experiment/raw_data/'
infiles = os.listdir(indir)

# specify smFISH centroid file ('_FISH-QUANT_all_spots_yymmdd.csv') from a FQ bash run
smFISH_file = '/Users/joshtitlow/tmp/eif4e_experiment/_FISH-QUANT__all_spots_190710.txt'
ref_file = pd.read_csv(smFISH_file, sep='\t', header=13)

# read the smFISH centroid file from the metadata header to get pixel size
px_size = pd.read_csv(smFISH_file, sep='\t', header=6, nrows=2)
px_xy = float(px_size['Pix-XY'].iloc[0])
px_z = float(px_size['Pix-Z'].iloc[0])

#threshold_range = [50,100,150,200,250,300,350,400,450,500] 
threshold_range = [500]

# create list to store data
filename = []
co_detect = []
ref_spots = []
targ_spots = []
threshold = []
NNdistance = []
NNfile = []
NNcentroid = []
    
def codetection():
    for t in threshold_range:
        print "calculating co-detection percentage for threshold =", t, "nm"

        # loop through files
        for i in infiles:

            # create list to store amplitude of co-detected spot, ref/targ ratio, and target distance
            targ_amp = []
            targ_dist = []

            if i.endswith('.csv'):
                targ_file = os.path.join(indir,i)
                targ_file = pd.read_csv(targ_file, header=0)
                print i

                # get centroid coordinates
                xpos_targ = targ_file['XM'] * px_xy
                ypos_targ = targ_file['YM'] * px_xy
                zpos_targ = targ_file['ZM'] * px_z

                # convert data into a numpy array
                target_df = np.column_stack((xpos_targ,ypos_targ,zpos_targ))
                #target_df = [target_df.Pos_Y.str.contains("SPOTS_END") == False]

                for index, row in ref_file.iterrows():

                    if row['File'][:-7]+'objs.csv' == i:

                        # get 3D position from X,Y,Z position columns
                        pt  = float(row['Pos_X']), float(row['Pos_Y']), float(row['Pos_Z'])

                        # find nearest neighbor and calculate distance
                        distance,index = spatial.KDTree(target_df).query(pt)

                        # add nearest neighbor amp and dist to a list
                        #targ_amp.append(targ_file['AMP'].iloc[index])
                        targ_dist.append(distance)
                        NNdistance.append(distance)
                        NNfile.append(i)
                        NNcentroid.append(pt)

                        # add lists to ref_file
                        #ref_file['target_amp'] = targ_amp
                        #ref_file['r_t_ratio'] = ref_file['AMP'].div(targ_amp)
                        #ref_file['targ_dist'] = targ_dist
                        #row['targ_dist'] = distance
                        #ref_file.at[row['File'], 'targ_dist'] = str(distance)

                # calculate co-detection percentage and add it to list
                try:
                    codetect = 100 * (sum(i < t for i in targ_dist) / (float(len(targ_dist))))
                except:
                    codetect = 0
                print 'codetection %: ', codetect
                co_detect.append(codetect)

                # calculate number of spots and add to list, with filename
                ref_spots.append(len(targ_dist))
                targ_spots.append(len(targ_file))
                filename.append(i)
                threshold.append(t)

                # write ref_file to csv
                #ref_file.to_csv('test_list.csv', index=False)

def random_codetection():
    for t in threshold_range:
        print "calculating co-detection percentage for threshold =", t, "nm"

        # loop through files
        for i in infiles:

            # create list to store amplitude of co-detected spot, ref/targ ratio, and target distance
            targ_amp = []
            targ_dist = []

            if i.endswith('.csv'):
                targ_file = os.path.join(indir,i)
                targ_file = pd.read_csv(targ_file, header=0)
                print i

                # get centroid coordinates
                xpos_targ = targ_file['XM'] * px_xy
                np.random.shuffle(xpos_targ)
                ypos_targ = targ_file['YM'] * px_xy
                np.random.shuffle(ypos_targ)
                zpos_targ = targ_file['ZM'] * px_z
                np.random.shuffle(zpos_targ)
                
                # convert data into a numpy array
                target_df = np.column_stack((xpos_targ,ypos_targ,zpos_targ))
                #target_df = [target_df.Pos_Y.str.contains("SPOTS_END") == False]

                for index, row in ref_file.iterrows():

                    if row['File'][:-7]+'objs.csv' == i:

                        # get 3D position from X,Y,Z position columns
                        pt  = float(row['Pos_X']), float(row['Pos_Y']), float(row['Pos_Z'])

                        # find nearest neighbor and calculate distance
                        distance,index = spatial.KDTree(target_df).query(pt)

                        # add nearest neighbor amp and dist to a list
                        #targ_amp.append(targ_file['AMP'].iloc[index])
                        targ_dist.append(distance)
                        NNdistance.append(distance)
                        NNfile.append(i)
                        NNcentroid.append(pt)

                        # add lists to ref_file
                        #ref_file['target_amp'] = targ_amp
                        #ref_file['r_t_ratio'] = ref_file['AMP'].div(targ_amp)
                        #ref_file['targ_dist'] = targ_dist
                        #row['targ_dist'] = distance
                        #ref_file.at[row['File'], 'targ_dist'] = str(distance)

                # calculate co-detection percentage and add it to list
                try:
                    codetect = 100 * (sum(i < t for i in targ_dist) / (float(len(targ_dist))))
                except:
                    codetect = 0
                print 'codetection %: ', codetect
                co_detect.append(codetect)

                # calculate number of spots and add to list, with filename
                ref_spots.append(len(targ_dist))
                targ_spots.append(len(targ_file))
                filename.append(i)
                threshold.append(t)

                # write ref_file to csv
                #ref_file.to_csv('test_list.csv', index=False)    

codetection()
    
# add data to dataframe and save
df = pd.DataFrame({'filename':filename, 'threshold':threshold, 'co_detect':co_detect, 'ref_spots':ref_spots, 'targ_spots':targ_spots})
df = df[['filename', 'threshold', 'co_detect', 'ref_spots', 'targ_spots']]
df.to_csv('codetection_stats.csv', index=False)

# add NNdistance data to separate dataframe and save
df = pd.DataFrame({'filename':NNfile, 'distance':NNdistance, 'centroid':NNcentroid})
df = df[['filename', 'distance', 'centroid']]
df.to_csv('NN_stats.csv', index=False)

calculating co-detection percentage for threshold = 500 nm
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p4s4r_AL_objs.csv
codetection %:  0.0
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p4s3r_AL_objs.csv
codetection %:  1.6203703703703702
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p3s3r_AL_objs.csv
codetection %:  0.7357859531772575
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p3s4r_AL_objs.csv
codetection %:  2.6474127557160045
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p2s4r_AL_objs.csv
codetection %:  0.5154639175257731
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p4s5r_AL_objs.csv
codetection %:  1.6096579476861168
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p1s4r_AL_objs.csv
codetection %:  2.7501462843768287
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p2s3l_0001_AL_objs.csv
codetection %:  0.0
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p5s4l_AL_objs.csv
codetection %:  1.5122873345935728
20190621_eIF4eGFP_msp670_syp568_HRP_viol_stim_p5s3l_AL_objs.csv
codetection

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

threshold_range = [200,2000,20000,200000]
x = threshold_range
y = ([0.4, 1, 10, 50])
plt.plot(x,y)
plt.ylabel('some numbers')
plt.show()


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections

test_file = pd.read_csv('codetection_stats.csv', header=0)
thresholds = collections.OrderedDict.fromkeys(sorted(set(test_file['threshold'])), 0) 

for threshold in thresholds.keys():
    codetect = []
    for index, row in test_file.iterrows(): 
        if row['threshold'] == threshold:
            codetect.append(row['co_detect'])
    mean_codetect = np.mean(codetect)
    thresholds.update({threshold: mean_codetect})

print thresholds
#plt.plot(list(thresholds),list(thresholds.values()))
#plt.ylabel('some numbers')
#plt.show()

OrderedDict([(200, 0.010495166321788237), (2000, 8.391300996998295), (20000, 79.58979494157295), (200000, 100.0)])


In [None]:
random = np.random.shuffle(ch1)

In [6]:
y = list(thresholds.values())
x = list(thresholds)

In [None]:
import pandas as pd
import numpy as np
from scipy import spatial
import argparse
import sys
import os



# specify the FQ centroid files and co-detection distance threshold in nm
data = read_table('Results.txt', header=True, delim_whitespace=True)

# create list to store data
masked_signal = []
co_detect = []
ch1_spots = []
ch2_spots = []
threshold = []

for index, row in ref_file.iterrows():
    if row.endswith("")

In [6]:
import pandas as pd
import numpy as np
from scipy import spatial
import argparse
import sys
import os



# specify the FQ centroid files and co-detection distance threshold in nm
indir = '/Users/joshtitlow/tmp/smFISH_data/codetect/'
infiles = os.listdir(indir)

threshold_range = [50,100,150,200,250,300,350,400,450,500] 


# create list to store data
filename = []
co_detect = []
ch1_spots = []
ch2_spots = []
threshold = []

for t in threshold_range:
    print "calculating co-detection percentage for threshold =", t, "nm"
    # loop through files
    for i in infiles:
        if i.startswith('C2'):
            if i.endswith('spots.txt'):
                ref_file = os.path.join(indir,i)
                #print "processing", i, "for threshold =", t
                ref_file = pd.read_csv(ref_file, sep='\t', header=18)
                #ref_file = ref_file[~ref_file.Pos_Y.str.contains("SPOTS_END")]
                targ_file = os.path.join(indir, 'C4'+i[2:])
                print targ_file

calculating co-detection percentage for threshold = 50 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 100 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 150 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 200 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 250 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 300 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 350 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.ome__spots.txt
calculating co-detection percentage for threshold = 400 nm
/Users/joshtitlow/tmp/smFISH_data/codetect/C4-758201.