In [3]:

from pycytominer.normalize import normalize
import pandas as pd
import numpy as np

# Create dummy data
X = pd.DataFrame(
    {
        "Metadata_plate": ["a", "a", "a", "a", "b", "b", "b", "b"],
        "Metadata_treatment": [
            "drug",
            "drug",
            "control",
            "control",
            "drug",
            "drug",
            "control",
            "control",
        ],
        "x": [1, 2, 8, 2, 5, 5, 5, 1],
        "y": [3, 1, 7, 4, 5, 9, 6, 1],
        "z": [1, 8, 2, 5, 6, 22, 2, 2],
        "zz": [1, 4, 1, 6, 3, 10, 2, 2],
        "yy": [3, 14, 1, 26, 33, 1, 21, 32],
    }
).reset_index(drop=True)

features = ["x", "y", "z", "zz", "yy"]

X = X.assign(Metadata_Batch = "X")
Y = X.copy()
Y.loc[:, features] = np.random.randn(X.shape[0], len(features)) * 10
Y = Y.assign(Metadata_Batch = "Y")

Z = pd.concat([X, Y], axis=0, ignore_index=True)


In [4]:
def run_normalize(df):
    return normalize(        
        profiles=df,
        features=features,
        meta_features="infer",
        samples="Metadata_treatment == 'control'",
        method="Spherize",
        spherize_method="PCA",
        spherize_epsilon=0,
    )

Z = run_normalize(Z)

In [5]:
X = Z.iloc[:X.shape[0], :].copy()
Y = Z.iloc[X.shape[0]:, :].copy()


In [6]:
X = run_normalize(X)
Y = run_normalize(Y)


In [7]:
Xc = X.query("Metadata_treatment == 'control'").loc[:, features]
Yc = Y.query("Metadata_treatment == 'control'").loc[:, features]
Zc = Z.query("Metadata_treatment == 'control'").loc[:, features]

d = Xc.shape[1]

print("Is the covariance matrix of Xc the identity matrix?", np.allclose(Xc.cov(), np.eye(d)))
print("Is the covariance matrix of Yc the identity matrix?", np.allclose(Yc.cov(), np.eye(d)))
print("Is the covariance matrix of Zc the identity matrix?", np.allclose(Zc.cov(), np.eye(d)))

Is the covariance matrix of Xc the identity matrix? False
Is the covariance matrix of Yc the identity matrix? False
Is the covariance matrix of Zc the identity matrix? True


In [8]:
Xc.cov().round(3)

Unnamed: 0,x,y,z,zz,yy
x,1.0,0.0,0.0,-0.0,0.0
y,0.0,1.0,0.0,-0.0,0.0
z,0.0,0.0,1.0,0.0,-0.0
zz,-0.0,-0.0,0.0,0.0,-0.0
yy,0.0,0.0,-0.0,-0.0,0.0


In [9]:
Yc.cov().round(3)

Unnamed: 0,x,y,z,zz,yy
x,1.0,-0.0,0.0,0.0,-0.0
y,-0.0,1.0,0.0,-0.0,0.0
z,0.0,0.0,1.0,-0.0,0.0
zz,0.0,-0.0,-0.0,0.0,-0.0
yy,-0.0,0.0,0.0,-0.0,0.0


In [10]:
Zc.cov().round(3)

Unnamed: 0,x,y,z,zz,yy
x,1.0,-0.0,-0.0,0.0,0.0
y,-0.0,1.0,0.0,0.0,0.0
z,-0.0,0.0,1.0,-0.0,-0.0
zz,0.0,0.0,-0.0,1.0,0.0
yy,0.0,0.0,-0.0,0.0,1.0
