# Training data
## 3D-STORM experiments with two cameras are performed on control samples where the labels are of only one colour. The procedure is repeated to get positive controls for AF647 and CF660C dyes. Data is filtered and cleaned as all other 3D-STORM data. The data from the two colour experiments are appended with their labels to create the training data.

In [1]:
# Import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits import mplot3d

In [2]:
# read AF647 data
df_AF647=pd.read_csv('df_sample.csv')
print(df_AF647.shape)
df_AF647.head(2)

(13795, 8)


Unnamed: 0,id,frame,x,y,z,intensity_cam1,intensity_cam2,cell_cluster
0,49322,4112.0,10693.924395,5805.306917,140.761505,7038.15,1093.590454,0
1,62185,16188.0,11682.145613,3405.061996,168.11404,3026.4148,406.817261,2


In [3]:
df_AF647["idx"]=0
print(df_AF647.shape)
df_AF647.head(2)

(13795, 9)


Unnamed: 0,id,frame,x,y,z,intensity_cam1,intensity_cam2,cell_cluster,idx
0,49322,4112.0,10693.924395,5805.306917,140.761505,7038.15,1093.590454,0,0
1,62185,16188.0,11682.145613,3405.061996,168.11404,3026.4148,406.817261,2,0


In [4]:
df_AF647["log_intensity_cam1"]=np.log(df_AF647['intensity_cam1'])
df_AF647["log_intensity_cam2"]=np.log(df_AF647['intensity_cam2'])

In [5]:
print(df_AF647.shape)
df_AF647.head(2)

(13795, 11)


Unnamed: 0,id,frame,x,y,z,intensity_cam1,intensity_cam2,cell_cluster,idx,log_intensity_cam1,log_intensity_cam2
0,49322,4112.0,10693.924395,5805.306917,140.761505,7038.15,1093.590454,0,0,8.859101,6.997222
1,62185,16188.0,11682.145613,3405.061996,168.11404,3026.4148,406.817261,2,0,8.015134,6.008364


In [6]:
# For better classification the distribution should be close to Gaussian.
# Log of intensity is used for better classification (the log of intensity is also far from Gaussian)
df_AF647=df_AF647[['log_intensity_cam1', 'log_intensity_cam2', 'idx']]
print(df_AF647.shape)
df_AF647.head(2)

(13795, 3)


Unnamed: 0,log_intensity_cam1,log_intensity_cam2,idx
0,8.859101,6.997222,0
1,8.015134,6.008364,0


In [7]:
# read CF660C data
df_CF660C=pd.read_csv('df_all.csv')
df_CF660C["idx"]=1
df_CF660C["log_intensity_cam1"]=np.log(df_CF660C['intensity_cam1'])
df_CF660C["log_intensity_cam2"]=np.log(df_CF660C['intensity_cam2'])
df_CF660C=df_CF660C[['log_intensity_cam1', 'log_intensity_cam2', 'idx']]
print(df_CF660C.shape)
df_CF660C.head(2)

(13795, 3)


Unnamed: 0,log_intensity_cam1,log_intensity_cam2,idx
0,7.969882,6.153852,1
1,7.804986,6.118971,1


In [8]:
df_train=df_AF647.append(df_CF660C)
print(df_train.shape)
df_train.head(2)

(27590, 3)


Unnamed: 0,log_intensity_cam1,log_intensity_cam2,idx
0,8.859101,6.997222,0
1,8.015134,6.008364,0


In [9]:
# The training data with equal number of AF647 and CF660C rows is saved as csv.
df_train.to_csv('AF647_CF660C_training_data.csv', index=False)