In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from toxifate import *
import pycytominer as pcm
import deepod as dp

# start 01/08/2023, work in progress
# following the trend of top features (high STD) along the concentration gradient in Cell Painting data


pd.options.mode.chained_assignment = None  # default='warn'

# read in Cell Painting data
path = r"C:\Users\Roman\OneDrive - National University of Ireland, Galway\CellPainting project data\CellProfiler\PerWell\tubesPolyByWellProfiler.csv"
cp_df = pd.read_csv(path).drop(['Unnamed: 0','Count','Row','Column'], axis=1, errors='ignore')
# drop inf and nan values
cp_df = cp_df.replace([np.inf, -np.inf], np.nan).dropna(axis=1, how="any")

if 'Concentration' in cp_df.columns:
    file_type = 'harmony'
    conc_col = 'Concentration'
    print('Harmony data')
elif 'Metadata_Concentration (Image)' in cp_df.columns:
    file_type = 'cellprofiler'
    conc_col = 'Metadata_Concentration (Image)'
    print('Cell Profiler data')

feature_categories = cp_feature_classifier(cp_df, file_type)
cp_df = drop_low_variance(cp_df, file_type='cellprofiler', threshold=cp_df.var(numeric_only=True).quantile(0.1))
cp_df.set_index(['PlateID', 'Metadata_Well', 'Metadata_Concentration (Image)', 'Compound'], inplace=True)
cp_df

Cell Profiler data
Low variance filter : 98 features dropped (9.899%)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AreaShape_Area (Nuclei),AreaShape_Compactness (Nuclei),AreaShape_ConvexArea (Nuclei),AreaShape_Eccentricity (Nuclei),AreaShape_EquivalentDiameter (Nuclei),AreaShape_Extent (Nuclei),AreaShape_FormFactor (Nuclei),AreaShape_MajorAxisLength (Nuclei),AreaShape_MaxFeretDiameter (Nuclei),AreaShape_MaximumRadius (Nuclei),...,Texture_Variance_DNA_3_02_256 (Cells),Texture_Variance_DNA_3_03_256 (Cells),Texture_Variance_ER_3_00_256 (Cells),Texture_Variance_ER_3_01_256 (Cells),Texture_Variance_ER_3_02_256 (Cells),Texture_Variance_ER_3_03_256 (Cells),Texture_Variance_MITO_3_00_256 (Cells),Texture_Variance_MITO_3_01_256 (Cells),Texture_Variance_MITO_3_02_256 (Cells),Texture_Variance_MITO_3_03_256 (Cells)
PlateID,Metadata_Well,Metadata_Concentration (Image),Compound,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
tubes1BR1,C10,30,NCAP,-0.788746,-0.998152,-0.878957,-1.044486,-0.791642,-0.390663,1.001350,-0.943540,-0.857982,-0.179352,...,-0.993548,-1.041772,-1.760015,-1.733778,-1.722468,-1.736813,1.230243,1.148998,1.238844,1.277265
tubes1BR1,C11,100,NCAP,0.753405,-0.207761,0.581756,-1.055860,0.762707,0.024016,0.201875,0.121923,0.098507,-0.025568,...,0.189814,0.347607,0.307025,0.295249,0.297445,0.297336,2.470872,2.446673,2.564397,2.521777
tubes1BR1,C12,100,NCAP,0.001606,0.003045,-0.044264,1.074816,0.010047,-1.731690,-0.009893,-0.104237,-0.028062,-0.179352,...,1.407650,1.079826,0.424141,0.442630,0.430602,0.621815,1.284695,1.282487,1.234614,1.291052
tubes1BR1,C13,300,NCAP,-1.559822,0.424794,-1.542918,-0.214010,-1.584512,-1.102169,-0.431741,-1.191270,-1.288562,-1.584413,...,1.138840,1.110859,-0.142961,-0.070914,-0.107896,-0.113182,1.725082,1.735227,1.680849,1.688162
tubes1BR1,C14,300,NCAP,-0.364655,1.102571,-0.366759,1.005939,-0.360110,-1.816227,-1.104618,0.603414,0.497403,-1.584413,...,0.438686,0.381526,0.035167,0.179457,-0.025491,-0.023023,2.394873,2.441512,2.360824,2.363905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tubes5BR3,N5,0,DMSO,0.349177,-0.582309,0.208182,-1.066559,0.356350,0.721622,0.578391,-0.475488,-0.129460,-0.024885,...,-0.624785,-0.624666,-0.458185,-0.453091,-0.489829,-0.461294,-0.169152,-0.181798,-0.222178,-0.182848
tubes5BR3,N6,0,DMSO,1.334467,-0.395801,1.457276,-1.144339,1.318568,-0.338209,0.388045,1.048157,1.064561,1.513521,...,-1.089981,-1.074388,-1.056916,-1.075768,-1.007587,-1.047679,-0.295803,-0.262718,-0.262741,-0.270489
tubes5BR3,N7,10,ZIDO,1.827112,-0.192392,1.863232,-0.181283,1.792566,0.354720,0.181495,1.231895,1.354003,1.513521,...,-0.276479,-0.257091,-0.304415,-0.298599,-0.279547,-0.281422,-0.023268,-0.036111,-0.058593,-0.033094
tubes5BR3,N8,10,ZIDO,1.588735,0.746067,1.550958,1.200254,1.563790,-0.747229,-0.757582,1.934242,1.794735,1.387307,...,0.910493,0.892926,-0.547287,-0.490433,-0.480210,-0.585607,-0.735446,-0.708822,-0.705545,-0.711992


In [3]:
from deepod.models.dsvdd import DeepSVDD
cp_df = cp_df.reset_index()
clf = DeepSVDD()
clf.fit(cp_df, y=None)
scores = clf.decision_function(cp_df)

Start Training...
ensemble size: 1


KeyError: 1038