In [1]:
import numpy as np
import pandas
import scipy.linalg
import tqdm

In [2]:
def whitening_transform(X, lambda_):
    C = (1/X.shape[0]) * np.dot(X.T, X)
    s, V = scipy.linalg.eigh(C)
    D = np.diag( 1. / np.sqrt(s + lambda_) )
    W = np.dot( np.dot(V, D), V.T)
    return W

def whiten(X, mu, W):
    return np.dot( X - mu, W)

In [3]:
path = "/data1/santiago/BBBC021/deep_profiler/collapsed/cp-segmentation/Whitened.csv"
whitened = pandas.read_csv(path)

In [4]:
whitened['Week'] = whitened['Plate'].map(lambda x: x.split('_')[0])

In [5]:
weeks = whitened['Week'].unique()

In [6]:
metadata = pandas.read_csv("/data1/santiago/BBBC021/metadata/metadata.csv")

In [7]:
metadata = metadata.drop_duplicates(['Metadata_Plate', 'Metadata_Well'])

In [8]:
merged = pandas.merge(whitened, metadata, left_on=['Plate', 'Well'], right_on=['Metadata_Plate', 'Metadata_Well'])

In [9]:
merged[merged['Week'] == 'Week1'][merged['Image_Metadata_Compound'] == 'DMSO']

  """Entry point for launching an IPython kernel.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,ImageNumber,Replicate,Image_Metadata_Compound,Image_Metadata_Concentration,Metadata_Plate,Metadata_Well,DAPI,Tubulin,Actin,Metadata_Site
64,0.084055,0.063972,-0.115303,-0.016474,-0.154242,0.479887,-0.121175,0.015247,-0.011805,0.059429,...,3201,2,DMSO,0.0,Week1_22381,D02,Week1_22381/Week1_150607_D02_s1_w194EE559B-B55...,Week1_22381/Week1_150607_D02_s1_w25D370D23-DC8...,Week1_22381/Week1_150607_D02_s1_w42AA0CE16-02C...,s1
157,-0.030774,0.189381,-0.427901,0.014803,0.107679,-0.332538,-0.091502,0.15283,-0.023164,-0.0781,...,3401,3,DMSO,0.0,Week1_22401,C02,Week1_22401/Week1_150607_C02_s1_w17B99FAC9-5BE...,Week1_22401/Week1_150607_C02_s1_w299B74587-3AF...,Week1_22401/Week1_150607_C02_s1_w4E4A8B70C-CAA...,s1
164,-0.22845,-0.062562,-0.083316,-0.058027,0.045116,0.547911,-0.083293,0.124406,-0.079787,0.055306,...,3441,3,DMSO,0.0,Week1_22401,D02,Week1_22401/Week1_150607_D02_s1_w101A5701E-5D3...,Week1_22401/Week1_150607_D02_s1_w21E99755F-136...,Week1_22401/Week1_150607_D02_s1_w46CB78ECC-BEA...,s1
252,-0.010786,0.036437,-0.041283,-0.172924,0.25793,0.03956,0.152114,0.016852,-0.006552,0.018264,...,2961,1,DMSO,0.0,Week1_22361,D02,Week1_22361/Week1_150607_D02_s1_w105AB7990-5B1...,Week1_22361/Week1_150607_D02_s1_w297C73E52-531...,Week1_22361/Week1_150607_D02_s1_w4CA0A136A-664...,s1
443,0.08606,0.421338,0.032175,-0.127289,-0.163522,0.053478,0.09728,-0.011472,0.007743,-0.046567,...,3317,2,DMSO,0.0,Week1_22381,F11,Week1_22381/Week1_150607_F11_s1_w1E479FE88-06D...,Week1_22381/Week1_150607_F11_s1_w26CA45789-0D1...,Week1_22381/Week1_150607_F11_s1_w40DEA96F7-BA2...,s1
609,-0.008585,0.027789,-0.166642,-0.029891,-0.133293,-0.430339,-0.029325,-0.01588,-0.098586,0.101799,...,637,3,DMSO,0.0,Week1_22161,E11,Week1_22161/Week1_150607_E11_s1_w1F5053E7F-88C...,Week1_22161/Week1_150607_E11_s1_w28E54C770-D29...,Week1_22161/Week1_150607_E11_s1_w43B189253-C5D...,s1
652,0.061407,-0.156867,0.008264,0.054368,-0.122605,-0.34306,-0.217836,-0.196519,0.12903,0.186781,...,481,3,DMSO,0.0,Week1_22161,B02,Week1_22161/Week1_150607_B02_s1_w1B3A2C1A3-E70...,Week1_22161/Week1_150607_B02_s1_w2E94BC967-6BA...,Week1_22161/Week1_150607_B02_s1_w418C52CF8-E96...,s1
679,0.10802,0.181791,-0.093382,0.120559,-0.86162,0.356109,-0.262251,-0.006041,-0.066512,-0.139305,...,437,2,DMSO,0.0,Week1_22141,F11,Week1_22141/Week1_150607_F11_s1_w19012864B-160...,Week1_22141/Week1_150607_F11_s1_w2A59CF179-7D3...,Week1_22141/Week1_150607_F11_s1_w44CE9A16E-C2A...,s1
734,-0.024692,-0.1078,0.063747,0.074335,0.316623,-0.595144,0.26197,-0.245425,-0.043123,0.012694,...,3117,1,DMSO,0.0,Week1_22361,G11,Week1_22361/Week1_150607_G11_s1_w1177CE3C6-E95...,Week1_22361/Week1_150607_G11_s1_w295E4DA49-E19...,Week1_22361/Week1_150607_G11_s1_w41FCBABBC-ECF...,s1
803,-0.031709,0.030951,-0.014497,-0.121402,0.443321,0.137407,-0.189181,-0.233452,0.182138,-0.086177,...,237,1,DMSO,0.0,Week1_22123,G11,Week1_22123/Week1_150607_G11_s1_w1B2989688-6B6...,Week1_22123/Week1_150607_G11_s1_w2665E7CDD-FD7...,Week1_22123/Week1_150607_G11_s1_w4CDDF1C00-D50...,s1


In [10]:
final = np.zeros_like(np.asarray(whitened.drop(['Plate', 'Well', 'Week'], axis=1)))

In [11]:
for week in tqdm.tqdm(weeks):
    control = np.asarray(merged[merged['Week'] == week][merged['Image_Metadata_Compound'] == 'DMSO'][whitened.columns].drop(['Plate', 'Well', 'Week'], axis=1))
    transform = whitening_transform(control, 1)
    mu = np.zeros((4608,))
    wells = merged[merged['Week'] == week][whitened.columns].drop(['Plate', 'Well', 'Week'], axis=1)
    wellsarr = np.asarray(wells)
    final[wells.index, :] = whiten(wellsarr, mu, transform)

  
100%|██████████| 10/10 [03:08<00:00, 18.77s/it]


In [12]:
df = pandas.DataFrame(final)
df = pandas.concat([df, whitened[['Plate', 'Well']]], axis=1)

In [13]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4600,4601,4602,4603,4604,4605,4606,4607,Plate,Well
0,1.045856,-8.217713,7.319107,1.079954,11.961251,23.270967,-12.576463,11.175324,-1.405099,-1.714181,...,0.547248,-8.076076,11.786559,23.345585,-14.915505,-2.099111,6.650023,-0.621872,Week3_25421,E04
1,-5.103327,-4.208507,-6.932859,-5.630883,17.445592,25.855781,8.863179,-1.633716,-4.043558,-3.825398,...,5.377088,-4.983345,-7.125988,24.221530,1.726485,5.165781,-10.872771,31.315394,Week9_39301,F06
2,3.647798,-0.584429,0.532424,-3.332369,0.920930,-8.233509,0.917298,-0.899513,-1.913708,3.325481,...,1.161286,-1.186139,-0.412475,9.883107,3.273391,0.062595,-0.887032,-5.933053,Week3_25721,F09
3,-5.169633,-2.833864,1.597421,4.112393,4.495391,2.703490,-0.406763,5.340621,-1.442215,3.026906,...,3.099317,-3.003977,-0.315795,-1.778169,-3.003719,7.013524,-4.648930,-8.954273,Week6_32121,G08
4,-7.664674,46.131484,-18.958777,59.066612,-5.079041,59.282506,49.973337,0.055009,2.925467,-5.112500,...,22.038719,7.089911,-18.015365,23.362867,-30.026921,29.991117,25.308759,33.133083,Week10_40111,B11
5,-3.966246,16.762148,-2.540219,12.383924,9.260156,-0.014553,-3.859361,7.438973,6.561801,0.580342,...,-6.865646,7.736207,11.928265,-5.948514,-8.511113,13.123533,9.769752,15.385819,Week1_22123,D07
6,-3.178015,2.887197,-0.277280,-8.302788,0.616343,2.022360,-0.646094,-0.169136,-1.698583,-0.959962,...,-1.237343,0.223154,-2.721256,-0.842146,2.050672,-1.491063,3.806082,-3.903774,Week5_28901,D10
7,-8.135165,-5.721061,-2.758328,7.843908,10.418542,29.718288,10.063911,5.061364,-0.387905,4.171386,...,16.466353,4.790195,-2.008376,20.406419,1.792089,6.325463,-5.496009,20.125400,Week7_34641,D04
8,-0.714267,-1.362004,2.596336,3.970473,0.147063,4.759903,-0.375622,2.731916,-0.560701,1.085300,...,1.359957,-6.922055,-4.578092,-8.513787,2.589737,2.993648,1.196600,3.616101,Week4_27542,D07
9,6.251768,-1.378362,0.976653,0.904125,1.476171,-0.260127,-0.582971,-3.963130,0.789305,0.808370,...,-1.235435,1.168142,0.072745,-1.631541,-0.792052,-0.655169,-2.205930,7.929156,Week4_27821,E04


In [14]:
df.to_csv("/data1/santiago/BBBC021/deep_profiler/collapsed/cp-segmentation/Whitened2.csv", index=False)