### Plot grit scores for Spheroid Aggregated data

In [1]:
import pandas as pd
import numpy as np
import os

# Grit scores
from cytominer_eval import evaluate

# Plotting
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns; sns.set_style("white")


# Set current working directory
print(os.getcwd())
os.chdir('/share/data/analyses/christa/colopaint3D_fork/spher_colo52_v1')
print(os.getcwd())

/share/data/analyses/christa/colopaint3D_fork/spher_colo52_v1/3_Figure3/GritScores
/share/data/analyses/christa/colopaint3D_fork/spher_colo52_v1


In [2]:
# Set up the plotting parameters
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
dpi = 300
figformat = 'pdf'

In [3]:

# # Load the data
# file = '/home/jovyan/share/data/analyses/christa/colopaint3D_fork/spher_colo52_v1/1_Data/spher_colo52_HCT116_normalized_perslice_median_aggr_selected (1).parquet'
# data = pd.read_parquet(file)

# # Load the data
# file = '/home/jovyan/share/data/analyses/christa/colopaint3D_fork/spher_colo52_v1/1_Data/FeaturesImages_150125_none/selected_data_7_HCT116.csv'
# data = pd.read_csv(file)


In [4]:
# ### TMP ONLY

# # Some function definitions

# def list_features(df):
#     # List features
#     list_of_selected_features = list(df.columns.values)
#     list_of_metadata = list(df.columns[df.columns.str.contains("Metadata_")])
#     list_of_selected_features = list(set(list_of_selected_features) - set(list_of_metadata))
#     return list_of_selected_features, list_of_metadata


# # A function to agree on Metadata columns
# rename_metadata_dict = {
#     'Metadata_Barcode': 'Metadata_barcode', 
#     'Metadata_Plate': 'Metadata_Plate',
#     'Metadata_well_id': 'Metadata_Well',
#     'Metadata_cmpd_plate_well':'Metadata_PlateWell',
#     'Metadata_cmpd_pert_type':'Metadata_pert_type',
#     'Metadata_cmpd_cmpdname':'Metadata_cmpdname',
#     'Metadata_cmpd_target':'Metadata_target',
#     'Metadata_cmpd_pathway':'Metadata_pathway',
#     'Metadata_cmpd_cmpd_conc':'Metadata_cmpd_conc',
#     'Metadata_cmpd_cell_line':'Metadata_cell_line',
# }

# miminal_metadata = ['Metadata_Well', 'Metadata_barcode', 'Metadata_PlateWell', 
#                     'Metadata_cell_line', 'Metadata_cmpd_conc', 'Metadata_cmpdname', 
#                     'Metadata_name', 'Metadata_pert_type', 'Metadata_pathway', 'Metadata_target']

# def rename_metadata_columns(df, rename_metadata_dict):
#     df.rename(columns = rename_metadata_dict, inplace = True)
#     df = df.loc[:,~df.columns.duplicated()].copy()
#     return df


# # A function to check if metadata columns are present
# def check_metadata_columns(df, minimal_metadata):
#     metadata_columns = list(df.columns[df.columns.str.contains("Metadata_")])
#     print(metadata_columns)
#     diff = set(minimal_metadata) - set(metadata_columns)
#     return diff

# dataset = data

# # Load metadata (I am missing the concentrations)
# metadata = pd.read_csv('1_Data/spher_colo52-metadata.csv')

# # # Add a short name for the compound
# dataset['Metadata_name'] = dataset['Metadata_cmpd_cmpdname'].str[:5] 

# # Merge data with metadata to get the concentrations
# # dataset = dataset.merge(metadata[['plate_well', 'cmpd_conc']], left_on='Metadata_PlateWell', right_on = 'plate_well')
# # dataset = dataset.drop(columns=['plate_well'])
# # dataset['Metadata_cmpd_conc'] = dataset['cmpd_conc'].rename('Metadata_cmpd_conc')



# # Organize metadata columns
# dataset = rename_metadata_columns(dataset, rename_metadata_dict)
# print(check_metadata_columns(dataset, miminal_metadata))

# # Remove all columns with 'FileName' or 'PathName' in the name
# dataset = dataset.loc[:,~dataset.columns.str.contains('FileName|PathName|ObjectNumber|ImageNumber|AcqID')]

In [5]:
cell_line = 'HCT116'
data_type = 'aggregates'

In [6]:
# # Load the data
dir = '1_Data/results/'
data =  pd.read_parquet(('{}selected_data_{}_{}.parquet').format(dir, data_type, cell_line))

In [7]:
# Some function definitions

def list_features(df):
    # List features
    list_of_selected_features = list(df.columns.values)
    list_of_metadata = list(df.columns[df.columns.str.contains("Metadata_")])
    list_of_selected_features = list(set(list_of_selected_features) - set(list_of_metadata))
    
    return list_of_selected_features, list_of_metadata

### Grit

In [8]:
## Prepare the metadata for the grit calculation
dataset_grit = data.copy()

dataset_grit['Metadata_name'] = dataset_grit['Metadata_cmpdname'].str[:5] 

# Add a column with the step of the concentration (easier for plotting)
dataset_grit['Metadata_conc_step'] = (
    dataset_grit.groupby('Metadata_cmpdname')['Metadata_cmpd_conc'].rank(ascending=True, method='dense')
    )
# Add a column with the name of the perturbation (compound + concentration)
dataset_grit["Metadata_pert_name"] = (
    dataset_grit["Metadata_name"] + "_" + dataset_grit["Metadata_cmpd_conc"].astype(str)
    )
# Add a column with the a unique identifier for each replicate
dataset_grit["Metadata_replicate_id"] = (
    dataset_grit["Metadata_name"] + "_" + dataset_grit.index.astype(str)
    )

In [9]:
# Setup the data for calculating grit

# Set up the input for evaluate
control_perts = dataset_grit.query("Metadata_name == 'dmso' & Metadata_cmpd_conc == 0.1").Metadata_replicate_id.unique().tolist()
grit_replicate_groups = {"profile_col": "Metadata_replicate_id", "replicate_group_col": "Metadata_pert_name"} 

In [10]:
# Calcuate the grit scores

grit_scores = []

grit_results = evaluate(
    profiles=dataset_grit,
    features=list_features(dataset_grit)[0],
    meta_features=list_features(dataset_grit)[1],
    replicate_groups=grit_replicate_groups,
    operation="grit",
    similarity_metric="pearson",
    grit_replicate_summary_method="median",
    grit_control_perts=control_perts,
)

grit_scores.append(grit_results)

In [11]:
# Organize the grit scores

grit_scores = pd.concat(grit_scores).reset_index(drop=True)
grit_scores["Metadata_name"] = grit_scores["perturbation"].str.split("_").str[0]

# Add back the well information
grit_scores = pd.merge(grit_scores,dataset_grit[['Metadata_PlateWell', 'Metadata_replicate_id','Metadata_cmpd_conc', 'Metadata_conc_step', 'Metadata_pert_type']], left_on='perturbation', right_on='Metadata_replicate_id')

# list the compounds
CompoundsUsed = grit_scores["Metadata_name"].unique()

In [12]:
# ## Plot the grit dose response

# # Plot the grit scores
# fig = plt.figure(figsize=(48, 32))
# sns.set(font_scale=1.5)
# fig.suptitle("grit barplot ", fontsize=24, x=0.3)
# sp = 1
# nrrow = 6
# nrcol = 11
# for some in CompoundsUsed[:]:
#     grit_scores_part = grit_scores[grit_scores["Metadata_name"] == some].copy()
#     grit_scores_part.sort_values(by="Metadata_conc_step", inplace=True)
#     ax = fig.add_subplot(nrrow, nrcol, sp)
#     ax = sns.barplot(
#         x="Metadata_cmpd_conc",
#         y="grit",
#         data=grit_scores_part,
#         hue="Metadata_conc_step",
#         legend=False,
#         palette="Blues_d",
#         alpha=1,
#         err_kws={'linewidth': 3,'color': 'black'},
#     )
#     ax.set_facecolor("w")
#     ax.spines["bottom"].set_color("grey")
#     ax.spines["left"].set_color("grey")
#     ax.set_ylim([-0.1, 7.5])
#     ax.set_title("{}".format(some), fontsize=24, x=0.2)
#     plt.subplots_adjust(top=0.9, wspace=0.2, hspace=0.5, left=0.0)
#     plt.xticks(rotation = 45)
#     sp += 1
    
# # fig.savefig(
# #         "3_Figure3/GritScores/result-images/GritScores_{}_{}.{}".format(cell_line, data_type,'png'), dpi=dpi, bbox_inches="tight"
# #         )

# # plt.show()


In [13]:
# # Plot only a handful of compounds for the main figure

# CompoundsUsed = ['SN-38', 'Binim', 'abema', 'etop','fenb','stau','dmso']


# # Plot the grit scores
# fig = plt.figure(figsize=(16, 4))
# # sns.set(font_scale=1.5)
# fig.suptitle("grit barplot ", fontsize=24, x=0.3)
# sp = 1
# nrrow = 1
# nrcol = 7
# for some in CompoundsUsed[:]:
#     grit_scores_part = grit_scores[grit_scores["Metadata_name"] == some].copy()
#     grit_scores_part.sort_values(by="Metadata_conc_step", inplace=True)
#     ax = fig.add_subplot(nrrow, nrcol, sp)
#     ax = sns.barplot(
#         x="Metadata_cmpd_conc",
#         y="grit",
#         data=grit_scores_part,
#         hue="Metadata_conc_step",
#         legend=False,
#         palette="Blues_d",
#         alpha=1,
#         err_kws={'linewidth': 2,'color': 'black'},
#     )
#     ax.set_facecolor("w")
#     ax.spines["bottom"].set_color("grey")
#     ax.spines["left"].set_color("grey")
#     ax.set_yticks([0, 2, 4, 6])
#     ax.set_ylim([-0.1, 7.5])
#     ax.set_xlabel('µM')
#     ax.set_title("{}".format(some), fontsize=20, x=0.2)
#     plt.subplots_adjust(top=0.9, wspace=0.2, hspace=0.5, left=0.0)
#     plt.xticks(rotation = 45)
#     sp += 1

#     fig.tight_layout()
    
# fig.savefig(
#         "3_Figure3/GritScores/result-images/GritScores_{}_{}.{}".format(cell_line, data_type, figformat), dpi=dpi, bbox_inches="tight"
#         )

# plt.show()

#### Add gritscores to the dataset for further processing

In [16]:
# Add the grit scores to selected_df
dataset_grit2 = dataset_grit.merge(grit_scores[['Metadata_PlateWell', 'Metadata_replicate_id', 'grit']], left_on='Metadata_PlateWell', right_on = 'Metadata_PlateWell')

# Rename the grit column to Metadata_grit
dataset_grit2 = dataset_grit2.rename(columns={'grit':'Metadata_grit'})

# # Save the data
# dataset_grit2.to_parquet(('{}grit_data_test8.parquet').format(dir, data_type, cell_line))


# Save the data
OutputDir = '1_Data/results/'

# Save as parquet
dataset_grit2.to_parquet(('{}grit_data_{}_{}.parquet').format(OutputDir, data_type, cell_line))

#### Return a few statistics

In [15]:
##
## Stats
##

# Count the number of consensus grit scores above 1.96
average = grit_scores.query("Metadata_pert_type == 'trt'").groupby(['Metadata_name', 'Metadata_conc_step']).grit.median().sort_values(ascending=False)

# Find where the value is above 1.96 and return the Metadata_name
print(data_type + " " + cell_line)
print("Grit scores more than 1.96: " + str(average[average > 1.96].index.get_level_values(0).nunique()))
print(average[average > 1.96].index.get_level_values(0).unique())
print("Out of: " + str(average.index.get_level_values(0).nunique()))

print(grit_scores.query("Metadata_pert_type == 'trt'").grit.describe())

aggregates HCT116
Grit scores more than 1.96: 38
Index(['AMG23', 'MK-22', 'Binim', 'Gemci', 'BMS-7', 'Trifl', 'Encor', 'AZD45',
       'Cobim', 'Crizo', 'SN-38', 'Tanes', 'Palbo', 'abema', 'Oxali', '5Z-7-',
       'Vinor', 'Regor', 'Afati', 'PD032', 'Tasel', 'Trame', 'Vorin', 'Borte',
       'Nutli', 'Pacli', 'Adavo', 'Alpel', 'Dabra', 'PI-10', 'Fluor', 'AZD77',
       'AZD80', 'Soraf', 'Velip', 'Gefit', 'Olapa', 'SB505'],
      dtype='object', name='Metadata_name')
Out of: 52
count    745.000000
mean       1.994626
std        1.766534
min       -2.201409
25%        0.625614
50%        1.682447
75%        3.360605
max        7.721709
Name: grit, dtype: float64
