In [None]:
import os
import pandas as pd 
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate, cross_val_score
import matplotlib  as mpl
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
%matplotlib inline
import os, shutil, glob
from PIL import Image
from itertools import cycle
from random import randint
import re, math
import seaborn as sns; sns.set_style("white")
from sklearn.manifold import TSNE
import datetime
import gc
from pathlib import Path
from cytominer_eval import evaluate
import plotly.graph_objects as go

# Set current working directory
os.chdir('/share/data/analyses/specs1K-v2/')
print(os.getcwd())

In [None]:
figformat = 'png'
dpi = 150
project = 'specs1K-v2'

In [None]:
now = datetime.datetime.now()
print ('Current date and time : ')
print (now.strftime('%Y-%m-%d %H:%M:%S'))

### Read data

In [None]:
BaseDir = 'ImageMeanFeatures'
datafile = 'ImageMeanPlateAllPlates.parquet'

sorted(os.listdir(BaseDir))

### Output directory

In [None]:
OutputDir = 'Clustermaps_Radarplots'
if not os.path.exists(OutputDir):
    os.makedirs(OutputDir)

In [None]:
df = pd.read_parquet(f'{BaseDir}/{datafile}')
df.reset_index(drop=True, inplace=True)S
display(df.shape)
display(df)

In [None]:
# Add some columns
df['plate'] = df['Metadata_Barcode']
df['plate-name'] = df['Metadata_Barcode']
df['well_id'] = df['Metadata_Well']
df['L'] = df['plate'].str[-2:] # Layout
df['plateWell'] = df['L'] + '_' + df['well_id']
df['site'] = df['Metadata_Site']
df['barcode'] = df['Metadata_Barcode'].str.split(pat="-").str[0]

display(df)


In [None]:
#
# Get layout metadata from database
#

# Connection info for the database
db_uri = 'postgresql://pharmbio_readonly:readonly@imagedb-pg-postgresql.services.svc.cluster.local/imagedb'

query = """
        SELECT *
        FROM plate_v1
        """

# Query database and store result in pandas dataframe
print("Select table with database...please wait")
df_plates = pd.read_sql_query(query, db_uri)

# Join rows and add info from database where barcode martches
print("Select join table with database...please wait")
df_merged = pd.merge(df, df_plates, how='left', left_on=['barcode','well_id'], right_on=['barcode','well_id'])

# update fields
#cc_df_joined['cbk_id'] = cc_df_joined['cbkid']
#cc_df_joined['cbk_lib'] = cc_df_joined['libid']


display(df_merged)

In [None]:
df = df_merged.copy()
df.shape

In [None]:
df = df[df["ObjectNumber_nuclei"] > 5 ] ## added cell that removes images with less than 10 nuclei
df.shape

In [None]:
df['compound'] = df['cbkid']
df.dropna(subset = ['compound'], inplace=True)
df.reset_index(drop=False, inplace=True)
#df.tail(100)
display(df.filter(like='compound'))

In [None]:
CompoundList = sorted(list(set(df['compound'])))
# display a selection of the CompoundList
display(CompoundList[0:10])

In [None]:
PlateList = sorted(list(set(df['plate'])))
display(PlateList)

### Clustermap

In [None]:
SomeCompounds = ['[dmso]', '[sorb]']

In [None]:
dfSomeCompounds = dfZscores[dfZscores['compound'].isin(SomeCompounds)] 
dfSomeCompounds.shape

In [None]:
nr_rows = groupedby.shape[0]

x = groupedby.values 
#x = StandardScaler().fit_transform(x)
# x = pd.DataFrame(x, index = groupedby.index)
x = pd.DataFrame(x, index = groupedby.index, columns=groupedby.columns)
x = x - x.loc['[dmso]_0.1']

sns.set(font_scale=0.9)
sns_plot = sns.clustermap(x, method='ward', metric='euclidean', z_score=None,
                       standard_scale=None,  cbar_kws=None, figsize=(12, nr_rows/4 +1), 
                       row_cluster=False, col_cluster=True,
                       cbar_pos=(0.02, 0.8, 0.05, 0.18), 
                       row_linkage=None, col_linkage=None, row_colors=None,
                              vmin =-3, vmax=3,
                       col_colors=None, cmap="RdBu_r", xticklabels=False )
plt.suptitle('Project {}, clustermap for {}'.format(project, SomeCompounds[1]) , fontsize = 12, y=1.05)
sns_plot.savefig('{}/clustermapDarker_{}.{}'.format(OutputDir, SomeCompounds[1], figformat),  dpi=dpi)
sns_plot.savefig('{}/clustermapDarker_{}.pdf'.format(OutputDir, SomeCompounds[1]),  dpi=300)
plt.show()

In [None]:
column_order = sns_plot.dendrogram_col.reordered_ind
reorder = {column_order[i]: i for i in range(len(column_order))}
columnDf = pd.DataFrame(x.columns, columns = ['feature'])
columnDf['order'] = range(len(column_order))
columnDf_reordered = columnDf.copy()
columnDf_reordered['order'] = columnDf['order'].map(reorder)
columnDf_reordered = columnDf_reordered.sort_values(by=['order'])
columnDf_reordered.head(10)

### Clustermaps for all compounds

In [None]:
for cmp in (CompoundList[:3] + CompoundList[4:6] + CompoundList[7:]):
    SomeCompounds = ['[dmso]']
    SomeCompounds.append(cmp)
    
    dfSomeCompounds = dfZscores[dfZscores['compound'].isin(SomeCompounds)]
    dictOfnumeric = { i : np.nanmean for i in dfSomeCompounds[varying_in_DMSO]}
    groupedby = dfSomeCompounds.groupby('comp_conc').agg(dictOfnumeric)
    groupedby['compound'] = groupedby.index.str.split('_').str[0]
    groupedby['conc'] = groupedby.index.str.split('_').str[1].astype('float')
    groupedby.sort_values(['conc', 'compound'], inplace=True)
    groupedby.drop(columns=['compound', 'conc'], inplace=True)
   
    nr_rows = groupedby.shape[0]

    x = groupedby.values 
    # x = StandardScaler().fit_transform(x)
    x = pd.DataFrame(x, index = groupedby.index)
    x = x - x.loc['[dmso]_0.1']

    sns.set(font_scale=0.9)
    sns_plot = sns.clustermap(x, method='ward', metric='euclidean', z_score=None,
                           standard_scale=None,  cbar_kws=None, figsize=(12, nr_rows/4 +1), 
                           row_cluster=False, col_cluster=True,
                           cbar_pos=(0.02, 0.8, 0.05, 0.18), 
                           row_linkage=None, col_linkage=None, row_colors=None,
                                 vmin =-5, vmax=5,
                           col_colors=None, cmap="RdBu_r", xticklabels=False )
    plt.suptitle('Project {}, clustermap for {}'.format(project, SomeCompounds[1]) , fontsize = 12, y=1.05)
    sns_plot.savefig('{}/clustermap_{}.{}'.format(OutputDir, SomeCompounds[1], figformat),  dpi=dpi)
    plt.show()


### Radarplots

In [None]:
dictOfnumeric = { i : np.nanmean for i in dfZscores[varying_in_DMSO]}
groupedby = dfZscores.groupby('comp_conc').agg(dictOfnumeric)

In [None]:
RadarplotDf = groupedby.copy()
RadarplotDf = RadarplotDf.transpose()
RadarplotDf = RadarplotDf.abs()
RadarplotDf.clip(lower=-10, upper=10,  inplace=True) # Clip extreme values
RadarplotDf.tail(5)

In [None]:
grouping = pd.read_csv('Features_grouping_v7.csv', sep=',', index_col=1)  # group features in 38 categories
grouping = grouping['CHAN-MOD_biorelevant_38cat'].to_frame()
grouping

In [None]:
df_merged = pd.merge(left=RadarplotDf, right=grouping, left_index=True, right_index=True, how='left')
df_merged.tail(5)

In [None]:
df_38cat = df_merged.groupby(['CHAN-MOD_biorelevant_38cat']).mean().round(decimals=5)
df_38cat.head(2)

In [None]:
df_38cat = df_38cat.transpose()
df_38cat.replace([np.inf, -np.inf], np.nan, inplace=True)
df_38cat.dropna(axis=1, inplace=True)
df_38cat.head(4)

In [None]:
feature_groups = list(df_38cat.columns)
categories = [*feature_groups, feature_groups[0]]

In [None]:
df_38cat['compound'] = df_38cat.index.str.split('_').str[0]
df_38cat['dose'] = df_38cat.index.str.split('_').str[1].astype(float)
df_38cat.head(4)

#### Radarplots for 24OHC and TGFb

In [None]:
for compound in CompoundList[:2]:

            df_38cat_part = df_38cat[df_38cat['compound'] == compound].copy()
            doses = sorted(list(set(df_38cat_part['dose'])))
            print(doses)
            dose1 = df_38cat_part.loc["{}_{}".format(compound, doses[0])]
            dose2 = df_38cat_part.loc["{}_{}".format(compound, doses[1])]
            try:
                dose3 = df_38cat_part.loc["{}_{}".format(compound, doses[-2])]
            except:
                dose3 = dose2
            dose4 = df_38cat_part.loc["{}_{}".format(compound, doses[-1])]

            fig = go.Figure(
                data=[
                    go.Scatterpolar(r=dose4, theta=categories, fill='toself',opacity=0.5,line_color='red',fillcolor='red',
                                    connectgaps=True, name="{}, {}".format(compound, doses[3])),
                    go.Scatterpolar(r=dose3, theta=categories, fill='toself', opacity=0.7,line_color='orange',fillcolor='orange',
                                    connectgaps=True,name="{}, {}".format(compound, doses[2])),
                    go.Scatterpolar(r=dose2, theta=categories, fill='toself', opacity=0.5,line_color='yellowgreen',fillcolor='yellowgreen',
                                    connectgaps=True,name="{}, {}".format(compound, doses[1])),
                    go.Scatterpolar(r=dose1, theta=categories, fill='toself', opacity=0.3,line_color='yellow',fillcolor='yellow',
                                    connectgaps=True,name="{}, {}".format(compound, doses[0]))

                ],
                layout=go.Layout(
                    title=go.layout.Title(text='{}'.format(compound)),
                    polar={'radialaxis': {'visible': True}},
                    showlegend=True
                )
            )

            fig.update_layout(polar=dict(radialaxis=dict(type = "linear", range=[0, 4], tickangle = 0)),showlegend=True,
                              font=dict(family="Courier New, monospace", size=6, color="Black"))


            fig.show(renderer='png')


            fig.write_image("{}/radarplot_lin_{}.pdf".format(OutputDir, compound))