In [55]:
import pandas as pd
from umap import UMAP
import seaborn as sns
import matplotlib.pyplot as plt
import utils
import os
import re
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

pio.templates.default = "simple_white"

## Load Phenotypic Profiles & UMAPs

In [2]:
operations = {
    "orf": "wellpos_cc_var_mad_outlier_featselect_sphering_harmony",
    "crispr": "wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected"
}

In [37]:
orf_metadata_df = pd.read_table(f'../00.download-and-process-annotations/output/orf_metadata.tsv.gz')[["Metadata_JCP2022","Metadata_NCBI_Gene_ID"]]
#orf_metadata_df['Metadata_NCBI_Gene_ID']=orf_metadata_df['Metadata_NCBI_Gene_ID'].apply('{:g}'.format)
crispr_metadata_df = pd.read_table(f'../00.download-and-process-annotations/output/crispr_metadata.tsv.gz')[["Metadata_JCP2022","Metadata_NCBI_Gene_ID"]]
crispr_metadata_df['Metadata_NCBI_Gene_ID']=crispr_metadata_df['Metadata_NCBI_Gene_ID'].apply('{:g}'.format)

In [3]:
orf_profiles_df = pd.read_parquet(f'../profiles/profiles_{operations["orf"]}.parquet')
orf_profiles_df.shape

crispr_profiles_df = pd.read_parquet(f'../profiles/profiles_{operations["crispr"]}.parquet')
crispr_profiles_df.shape

(81660, 726)

In [5]:
orf_phenotypic_activity_df = pd.read_csv(
    f'../03.retrieve-annotations/output/phenotypic-activity-{operations["orf"]}.csv.gz',
    usecols=["Metadata_JCP2022", "below_corrected_p"],
).rename(columns={"below_corrected_p": "Metadata_below_corrected_p"})
cripsr_phenotypic_activity_df = pd.read_csv(
    f'../03.retrieve-annotations/output/phenotypic-activity-{operations["crispr"]}.csv.gz',
    usecols=["Metadata_JCP2022", "below_corrected_p"],
).rename(columns={"below_corrected_p": "Metadata_below_corrected_p"})

In [6]:
orf_profiles_df = orf_profiles_df.merge(
    orf_phenotypic_activity_df, on="Metadata_JCP2022", how="inner"
).query("Metadata_below_corrected_p==True")
orf_profiles_df.shape
crispr_profiles_df = crispr_profiles_df.merge(
    cripsr_phenotypic_activity_df, on="Metadata_JCP2022", how="inner"
).query("Metadata_below_corrected_p==True")
crispr_profiles_df.shape

(39350, 727)

In [7]:
if os.path.isfile(f"output/orf-umap-{operations['orf']}-phenotypic-activity.csv.gz"):
    orf_projection_2d = np.loadtxt(
        f"output/orf-umap-{operations['orf']}-phenotypic-activity.csv.gz", delimiter=","
    )
else:
    raise NameError('There is no UMAP embedding for ORF. Please generate it firstly.')

if os.path.isfile(f"output/crispr-umap-{operations['crispr']}-phenotypic-activity.csv.gz"):
    crispr_projection_2d = np.loadtxt(
        f"output/crispr-umap-{operations['crispr']}-phenotypic-activity.csv.gz", delimiter=","
    )
else:
    raise NameError('There is no UMAP embedding for CRISPR. Please generate it firstly.')

#X = utils.get_featuredata(orf_profiles_df)
#orf_projection_2d = UMAP(n_components=2, random_state=12527).fit_transform(X)
orf_profiles_df["umap 1"] = orf_projection_2d[:, :1].flatten()
orf_profiles_df["umap 2"] = orf_projection_2d[:, 1:2].flatten()

#X = utils.get_featuredata(crispr_profiles_df)
#crispr_projection_2d = UMAP(n_components=2, random_state=12527).fit_transform(X)
crispr_profiles_df["umap 1"] = crispr_projection_2d[:, :1].flatten()
crispr_profiles_df["umap 2"] = crispr_projection_2d[:, 1:2].flatten()

(29909, 264)

In [49]:
orf_profiles_df = orf_profiles_df[['Metadata_JCP2022','umap 1', 'umap 2']].merge(orf_metadata_df, on="Metadata_JCP2022")

In [48]:
crispr_profiles_df = crispr_profiles_df[['Metadata_JCP2022','umap 1', 'umap 2']].merge(crispr_metadata_df, on="Metadata_JCP2022")

## Load U2OS data

In [52]:
# Source: https://depmap.org/portal/download/all/
df_dm_effects = pd.read_csv("../00.download-and-process-annotations/output/CRISPRGeneEffect_U2OS.csv.gz")
#df_dm_dependency = pd.read_csv("../00.download-and-process-annotations/output/CRISPRGeneDependency_U2OS.csv.gz")
df_dm_xpr= pd.read_csv("../00.download-and-process-annotations/output/OmicsExpressionProteinCodingGenesTPMLogp1_U2OS.csv.gz")
df_dm = df_dm_effects.merge(df_dm_xpr[["geneID","TPM"]],on="geneID", how="inner")
df_dm.columns = ["GeneID","GeneSymbol","essentiality","expr"]
df_dm['GeneID'] = df_dm['GeneID'].astype(str)

In [54]:
orf_u2os_omics_df = orf_profiles_df.merge(df_dm, left_on='Metadata_NCBI_Gene_ID', right_on ='GeneID' )
crispr_u2os_omics_df = crispr_profiles_df.merge(df_dm, left_on='Metadata_NCBI_Gene_ID', right_on ='GeneID' )

## Plots

In [87]:
fig = px.scatter(crispr_u2os_omics_df, x='umap 1', y='umap 2', color="essentiality",
                 title='Phenotypic CRISPRs Map vs. Pooled CRISPR Effect', 
                 width=900, height=600, size_max=5, range_color=[-3.77,0.6],
                  color_continuous_scale = 'Hot',
                labels={'essentiality':'DepMap CRISPR Gene Effect',
                       'umap 1':'UMAP 1',
                       'umap 2':'UMAP 2'})
fig.update_traces(marker=dict(size=5), opacity=0.2,
              selector=dict(mode='markers'))
fig.update_layout(yaxis_range=[-5,8], xaxis_range=[-5,10])
fig.update_layout(
    font=dict(
        size=24,  # Set the font size here
    )
)

fig.write_image("figures/crispr_umap_phenotypic_vs_depmap_crispr_gene_effect.png")


In [88]:
fig = px.scatter(orf_u2os_omics_df, x='umap 1', y='umap 2', color="essentiality",
                 title='Phenotypic ORFs Map vs. Pooled CRISPR Effect', 
                 width=900, height=600, size_max=5, range_color=[-3.77,0.6],
                  color_continuous_scale = 'Hot',
                labels={'essentiality':'DepMap KO Effect',
                       'umap 1':'UMAP 1',
                       'umap 2':'UMAP 2'})
fig.update_traces(marker=dict(size=5), opacity=0.2,
              selector=dict(mode='markers'))
fig.update_layout(yaxis_range=[-5,8], xaxis_range=[-5,10])
fig.update_layout(
    font=dict(
        size=24,  # Set the font size here
    )
)

fig.write_image("figures/orf_umap_phenotypic_vs_depmap_crispr_gene_effect.png")

In [89]:
fig = px.scatter(crispr_u2os_omics_df.loc[orf_u2os_omics_df.expr>(-1)], x='umap 1', y='umap 2', color="expr",
                 title='Phenotypic CRISPRs Map vs. U2OS Gene Expression',
                 width=900, height=600, size_max=5, range_color=[0,13],
                 color_continuous_scale = 'Purples',
                labels={'expr':'log(TPM)',
                       'umap 1':'UMAP 1',
                       'umap 2':'UMAP 2'})
fig.update_traces(marker=dict(size=5), opacity=0.2,
              selector=dict(mode='markers'))
fig.update_layout(yaxis_range=[-5,8], xaxis_range=[-5,10])
fig.update_layout(
    font=dict(
        size=24,  # Set the font size here
    )
)

fig.write_image("figures/crispr_umap_phenotypic_vs_u2os_gene_expression.png")

In [90]:
fig = px.scatter(orf_u2os_omics_df.loc[orf_u2os_omics_df.expr>(-1)], x='umap 1', y='umap 2', color="expr",
                 title='Phenotypic ORFs Map vs. U2OS Gene Expression',
                 width=900, height=600, size_max=5, range_color=[0,13],
                 color_continuous_scale = 'Purples',
                labels={'expr':'log(TPM)',
                       'umap 1':'UMAP 1',
                       'umap 2':'UMAP 2'})
fig.update_traces(marker=dict(size=5), opacity=0.2,
              selector=dict(mode='markers'))
fig.update_layout(yaxis_range=[-5,8], xaxis_range=[-5,10])
fig.update_layout(
    font=dict(
        size=24,  # Set the font size here
    )
)

fig.write_image("figures/orf_umap_phenotypic_vs_u2os_gene_expression.png")