In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import json

from copy import deepcopy

from tqdm.notebook import tqdm

In [3]:
data = pd.read_excel("../supplement/SupplementalTable7_top1000_pSNVs.xlsx", skiprows=1)
data.shape

(1000, 29)

In [4]:
# Check the number of TERT, RALY and CDC20 mutations.
data[data["GENE"] == "TERT"].value_counts(["POS"])

POS    
1295228    28
1295250     7
1294883     1
1295149     1
Name: count, dtype: int64

In [5]:
data[data["GENE"] == "RALY"].value_counts(["POS"])

POS     
32580927    4
32580916    1
Name: count, dtype: int64

In [6]:
data[data["GENE"] == "CDC20"].value_counts(["POS"])

POS     
43824528    1
43824529    1
Name: count, dtype: int64

In [65]:
# Get the rank sum of the mutations.
def get_rank_sum(gene_data, gene, position):
    """
    Get the rank sum of the mutations for a given gene.
    """
    gene_data["ranking"] = data.index + 1
    gene_data = gene_data[
        (gene_data["GENE"] == gene) & 
        (gene_data["POS"] == position)
    ]

    return np.sum(gene_data["ranking"])

In [8]:
def _add_remind_cancer_score(
    dataframe: pd.DataFrame,
    remind_cancer_scoring_weights: dict,
    ge_column: str
):
    for idx, row in dataframe.iterrows():
        # (1) Genomic.
        # Transcription Factors
        created_tf_weight = np.min([
            float(row["num_created_tfs_passing_tf_expression_threshold"]) *
            remind_cancer_scoring_weights["genomic"]["tfbs"]["creation_weight_per_tfbs"],
            remind_cancer_scoring_weights["genomic"]["tfbs"]["creation_weight_maximum"]
        ])

        destroyed_tf_weight = np.min([
            float(row["num_destroyed_tfs_passing_tf_expression_threshold"]) *
            remind_cancer_scoring_weights["genomic"]["tfbs"]["destruction_weight_per_tfbs"],
            remind_cancer_scoring_weights["genomic"]["tfbs"]["destruction_weight_maximum"]
        ])

        # Recurrence
        recurrence_weight = np.min([
            float(row["num_recurrent_mutations"]) *
            remind_cancer_scoring_weights["genomic"]["recurrence"]["weight_per_recurrent_mutation"],
            remind_cancer_scoring_weights["genomic"]["recurrence"]["weight_maximum"]
        ])

        # Purity
        try:
            purity_weight = remind_cancer_scoring_weights["genomic"]["purity"]["purity_weight"] if float(
                row["ICGC_Estimated_Purity"]) >= remind_cancer_scoring_weights["genomic"]["purity"]["purity_threshold_for_weight"] else 0
        except:
            # print(
            #     f"Purity of {row['ICGC_Estimated_Purity']} does not work. Setting score to 0.")
            purity_weight = 0

        # Allele Frequency
        try:
            af_weight = remind_cancer_scoring_weights["genomic"]["allele_frequency"]["af_weight"] if float(
                row["allele_frequency"]) >= remind_cancer_scoring_weights["genomic"]["allele_frequency"]["af_threshold_for_weight"] else 0
        except:
            # print(
            #     f"Allele frequency of {row['allele_frequency']} does not work. Setting score to 0.")
            af_weight = 0

        # (2) Transcriptomic
        # Gene Expression
        gene_expression_weight = np.min([
            float(row[ge_column]) *
            remind_cancer_scoring_weights["transcriptomic"]["gene_expression"]["weight_per_unit_of_expression"],
            remind_cancer_scoring_weights["transcriptomic"]["gene_expression"]["weight_maximum"]
        ])

        # (3) Annotations
        # CGC
        cgc_weight = bool(row["within_cgc_list"]) * \
            remind_cancer_scoring_weights["annotations"]["cgc"]["weight"]

        # Open Chromatin
        open_chromatin_weight = bool(
            row["open_chromatin"]) * remind_cancer_scoring_weights["annotations"]["open_chromatin"]["weight"]

        dataframe.loc[idx, "score_v2"] = np.round(
            np.sum([
                created_tf_weight,
                destroyed_tf_weight,
                recurrence_weight,
                purity_weight,
                af_weight,
                gene_expression_weight,
                cgc_weight,
                open_chromatin_weight
            ]), 2)

    dataframe = dataframe.sort_values("score_v2", ascending=False)

    return dataframe

with open("../../examples/results/configuration_file.json", "r") as f:
    scoring_weights = json.load(f)
    scoring_weights = scoring_weights["REMIND-Cancer_scoring_weights"]

data = _add_remind_cancer_score(
    dataframe=data,
    remind_cancer_scoring_weights=scoring_weights,
    ge_column="FPKM_Z_score"
)

In [143]:
scoring_weights

{'genomic': {'tfbs': {'creation_weight_per_tfbs': 2,
   'creation_weight_maximum': 6,
   'destruction_weight_per_tfbs': 2,
   'destruction_weight_maximum': 6},
  'recurrence': {'weight_per_recurrent_mutation': 5, 'weight_maximum': 25},
  'purity': {'purity_threshold_for_weight': 0.25, 'purity_weight': 10},
  'allele_frequency': {'af_threshold_for_weight': 0.3, 'af_weight': 10}},
 'transcriptomic': {'gene_expression': {'weight_per_unit_of_expression': 5,
   'weight_maximum': 25}},
 'annotations': {'open_chromatin': {'weight': 20}, 'cgc': {'weight': 15}}}

In [155]:
# Iterate through the recurrence scoring.
with open("../../examples/results/configuration_file.json", "r") as f:
    scoring_weights = json.load(f)
    scoring_weights = scoring_weights["REMIND-Cancer_scoring_weights"]

# scoring_weights["genomic"]["recurrence"]["weight_maximum"] = 10000

weight_mapping = {
    "Recurrence Weight": {
        "attribute": ["genomic", "recurrence", "weight_per_recurrent_mutation"],
        "range": np.linspace(1, 100, 100),
        "original_weight": scoring_weights["genomic"]["recurrence"]["weight_per_recurrent_mutation"],
    },
    "Gene Expression Weight": {
        "attribute": ["transcriptomic", "gene_expression", "weight_per_unit_of_expression"],
        "range": np.linspace(1, 100, 100),
        "original_weight": scoring_weights["transcriptomic"]["gene_expression"]["weight_per_unit_of_expression"],
    },
    "Purity Weight": {
        "attribute": ["genomic", "purity", "purity_weight"],
        "range": np.linspace(1, 100, 100),
        "original_weight": scoring_weights["genomic"]["purity"]["purity_weight"],
    },
    "Transcription Factor Creation Weight": {
        "attribute": ["genomic", "tfbs", "creation_weight_per_tfbs"],
        "range": np.linspace(1, 20, 100),
        "original_weight": scoring_weights["genomic"]["tfbs"]["creation_weight_per_tfbs"],
    },
    "Transcription Factor Destruction Weight": {
        "attribute": ["genomic", "tfbs", "destruction_weight_per_tfbs"],
        "range": np.linspace(1, 20, 100),
        "original_weight": scoring_weights["genomic"]["tfbs"]["destruction_weight_per_tfbs"],
    },
    "Allele Frequency Weight": {
        "attribute": ["genomic", "allele_frequency", "af_weight"],
        "range": np.linspace(1, 50, 100),
        "original_weight": scoring_weights["genomic"]["allele_frequency"]["af_weight"],
    },
    "CGC Weight": {
        "attribute": ["annotations", "cgc", "weight"],
        "range": np.linspace(1, 50, 100),
        "original_weight": scoring_weights["annotations"]["cgc"]["weight"],
    },
    "Open Chromatin Weight": {
        "attribute": ["annotations", "open_chromatin", "weight"],
        "range": np.linspace(1, 50, 100),
        "original_weight": scoring_weights["annotations"]["open_chromatin"]["weight"],
    }
 }


for name in weight_mapping:
    fig = go.Figure()

    for gene, position, color in [
        ("TERT", 1295228, "red"), 
        ("TERT", 1295250, "blue"), 
        ("RALY", 32580927, "green"), 
        ("CDC20", 43824529, "yellow")
    ]:
        x_axis_scoring_range = weight_mapping[name]["range"]
        y_axis_score = []

        for x in tqdm(x_axis_scoring_range, desc=f"Calculating scores for {gene} at position {position}"):
            scoring_weights_temp = deepcopy(scoring_weights)
            scoring_weights_temp[
                weight_mapping[name]["attribute"][0]
            ][
                weight_mapping[name]["attribute"][1]
            ][
                weight_mapping[name]["attribute"][2]
            ] = x
            data_temp = _add_remind_cancer_score(
                dataframe=data.copy(),
                remind_cancer_scoring_weights=scoring_weights_temp,
                ge_column="FPKM_Z_score"
            )

            y = get_rank_sum(data_temp, gene, position)
            y_axis_score.append(y)

        fig.add_trace(
            go.Scatter(
                x=x_axis_scoring_range,
                y=y_axis_score,
                mode="lines",
                name=f"{gene} {position}",
                marker=dict(
                    size=10,
                    color=color,
                )
            )
        )

        # Add the best theoretical score.
        best_score = np.sum(
            data[
                (data["GENE"] == gene) & (data["POS"] == position)
            ].reset_index(drop=True).index + 1
        )

        fig.add_hline(
            y=best_score,
            line=dict(color=color, width=1, dash="dash"),
            # annotation_text=f"Best Score: {best_score}",
            # name=f"Best Score {gene} {position}"
        )

    fig.update_layout(
        title=f"Iterrating through <b>{name}</b> Scoring",
        xaxis_title=name,
        yaxis_title="Sum of Ranking of Mutations",
        showlegend=True
    )

    fig.add_vline(
        x = weight_mapping[name]["original_weight"],
        line=dict(color="black", width=2, dash="dash"),
        annotation_text=f"Original Weight: {weight_mapping[name]['original_weight']}",
    )

    fig.show()

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for TERT at position 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for RALY at position 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Calculating scores for CDC20 at position 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

In [132]:
x

np.float64(5.0)

In [133]:
y

np.int64(62)

In [134]:
data_temp

Unnamed: 0,score,pid,cohort,#CHROM,POS,GENE,SEQUENCE_CONTEXT,REF,ALT,strand,...,remaining_tfs,"JASPAR2020_CORE_vertebrates_non_redundant(tf_name,binding_affinity,seq1,seq2,raw,zscore,log,tf_sequence_logo)","paths_with_recurrence(format=path,pid,cohort,bp,ref,alt,gene,chr,raw_score,zscore,log_score,confidence,purity,af,(tfs_seperated_by_//))",num_original_mutations,num_final_mutations,num_promoter_mutations,num_recurrent_mutations,cn_score,ranking,score_v2
3,86.45,d692ecd0-2433-426b-9af2-a30c44a80f7c,THCA-US,5,1295228,TERT,"AGGGCCCGGA,GGGGCTGGGC",G,A,-,...,"TFAP2A,ELF1,ELF4,ETV6,KLF16,ZBTB7A,TFAP2B,TFAP...","TFAP2A,0.09557122923715045,GCCCCCTCCGGGCC,.,0....",87e0b332-60f3-4d59-8c4e-691a820c9ee1_tumor/snv...,1099,5,20,95,3,1,542.45
6,83.73,b8151614-b08f-49a3-ab6f-2e780f765a17,GBM-US,5,1295228,TERT,"AGGGCCCGGA,GGGGCTGGGC",G,A,-,...,"TFAP2A,ELF1,ELF4,ETV6,KLF16,ZBTB7A,TFAP2B,TFAP...","TFAP2A,0.09557122923715045,GCCCCCTCCGGGCC,.,0....",87e0b332-60f3-4d59-8c4e-691a820c9ee1_tumor/snv...,2788,11,55,95,not_available,2,539.73
5,85.00,1924d200-6929-4eef-8a7f-317f8fbab915,THCA-US,5,1295228,TERT,"AGGGCCCGGA,GGGGCTGGGC",G,A,-,...,"TFAP2A,ELF1,ELF4,ETV6,KLF16,SP8,ZBTB7A,TFAP2B,...","TFAP2A,0.09557122923715045,GCCCCCTCCGGGCC,.,0....",87e0b332-60f3-4d59-8c4e-691a820c9ee1_tumor/snv...,1715,8,63,95,not_available,3,539.38
8,82.39,5c02d399-07af-4573-a568-bc1b256bc8f8,THCA-US,5,1295228,TERT,"AGGGCCCGGA,GGGGCTGGGC",G,A,-,...,"TFAP2A,ELF1,ELF4,ETV6,KLF16,ZBTB7A,TFAP2B,TFAP...","TFAP2A,0.09557122923715045,GCCCCCTCCGGGCC,.,0....",87e0b332-60f3-4d59-8c4e-691a820c9ee1_tumor/snv...,965,3,22,95,not_available,4,538.39
10,81.89,96e3db14-2bb1-4f68-aed6-5e794750c96e,GBM-US,5,1295228,TERT,"AGGGCCCGGA,GGGGCTGGGC",G,A,-,...,"TFAP2A,ELF1,ELF4,ETV6,KLF16,ZBTB7A,TFAP2B,TFAP...","TFAP2A,0.09557122923715045,GCCCCCTCCGGGCC,.,0....",87e0b332-60f3-4d59-8c4e-691a820c9ee1_tumor/snv...,5102,32,141,95,not_available,5,537.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,52.13,14c5b81d-da49-4db1-9834-77711c2b1d38,READ-US,4,109090241,LEF1,"TGATTGAGGG,TTTTTTTTCT",C,T,-,...,"STAT1,STAT2","STAT1,9.92424,.,AGGGTTTTTTTTTCT,69.2928490212,...",.,548085,1685,7490,0,not_available,996,52.13
994,52.12,46693a2b-5105-4770-a9e1-031dfedeb694,LUAD-US,11,76493378,TSKU,"TTTCTGCACC,CGAGGCCCTC",C,A,+,...,"PLAG1,TFAP2B","TFAP2A,0.0837871805613741,AGGGCCTCGGGGTG,.,0.0...",.,20143,91,425,0,3,997,52.12
996,52.11,22edd867-dcd6-42b5-92bb-025ceb84b106,SKCM-US,2,119980904,STEAP3,"ATTGGGGGGG,ACCACAAAGG",G,A,+,...,"ZNF740,E2F8","RUNX1,11.6897,.,CTTTGTGGTTC,6.60147418021,0.68...",.,27192,125,502,0,3,998,52.11
997,52.10,d053f88f-09e6-472e-a691-bb293c461399,SKCM-US,4,37892372,TBC1D1,"CCCCCCTATT,CCCCCACAGA",C,T,+,...,"PLAG1,E2F7","PLAG1,0.15360511197812662,GGGGGGAATAGGGG,.,0.4...",.,19832,87,420,0,not_available,999,52.10


In [135]:
get_rank_sum(data_temp, gene, position)

np.int64(62)

# Try the mean, median, min and max of the scores.



In [102]:
# Iterate through the recurrence scoring.
with open("../../examples/results/configuration_file.json", "r") as f:
    scoring_weights = json.load(f)
    scoring_weights = scoring_weights["REMIND-Cancer_scoring_weights"]

# scoring_weights["genomic"]["recurrence"]["weight_maximum"] = 10000

weight_mapping = {
    "Recurrence Weight": {
        "attribute": ["genomic", "recurrence", "weight_per_recurrent_mutation"],
        "range": np.linspace(1, 100, 100),
        "original_weight": scoring_weights["genomic"]["recurrence"]["weight_per_recurrent_mutation"],
    },
    "Gene Expression Weight": {
        "attribute": ["transcriptomic", "gene_expression", "weight_per_unit_of_expression"],
        "range": np.linspace(1, 100, 100),
        "original_weight": scoring_weights["transcriptomic"]["gene_expression"]["weight_per_unit_of_expression"],
    },
    "Purity Weight": {
        "attribute": ["genomic", "purity", "purity_weight"],
        "range": np.linspace(1, 100, 100),
        "original_weight": scoring_weights["genomic"]["purity"]["purity_weight"],
    },
    "Transcription Factor Creation Weight": {
        "attribute": ["genomic", "tfbs", "creation_weight_per_tfbs"],
        "range": np.linspace(1, 20, 100),
        "original_weight": scoring_weights["genomic"]["tfbs"]["creation_weight_per_tfbs"],
    },
    "Transcription Factor Destruction Weight": {
        "attribute": ["genomic", "tfbs", "destruction_weight_per_tfbs"],
        "range": np.linspace(1, 20, 100),
        "original_weight": scoring_weights["genomic"]["tfbs"]["destruction_weight_per_tfbs"],
    },
    "Allele Frequency Weight": {
        "attribute": ["genomic", "allele_frequency", "af_weight"],
        "range": np.linspace(1, 50, 100),
        "original_weight": scoring_weights["genomic"]["allele_frequency"]["af_weight"],
    },
    "CGC Weight": {
        "attribute": ["annotations", "cgc", "weight"],
        "range": np.linspace(1, 50, 100),
        "original_weight": scoring_weights["annotations"]["cgc"]["weight"],
    },
    "Open Chromatin Weight": {
        "attribute": ["annotations", "open_chromatin", "weight"],
        "range": np.linspace(1, 50, 100),
        "original_weight": scoring_weights["annotations"]["open_chromatin"]["weight"],
    }
 }


for name in weight_mapping:
    fig = go.Figure()

    means = {}
    medians = {}
    mins = {}
    maxes = {}
    for gene, position, color in [
        ("TERT", 1295228, "red"), 
        ("TERT", 1295250, "blue"), 
        ("RALY", 32580927, "green"), 
        ("CDC20", 43824529, "yellow")
    ]:
        if gene not in means:
            means[gene] = {}
            medians[gene] = {}
            mins[gene] = {}
            maxes[gene] = {}
        
        if position not in means[gene]:
            means[gene][position] = []
            medians[gene][position] = []
            mins[gene][position] = []
            maxes[gene][position] = []

        x_axis_scoring_range = weight_mapping[name]["range"]
        y_axis_score = []

        for x in tqdm(x_axis_scoring_range, desc=f"{name}: {gene} {position}"):
            scoring_weights_temp = deepcopy(scoring_weights)
            scoring_weights_temp[
                weight_mapping[name]["attribute"][0]
            ][
                weight_mapping[name]["attribute"][1]
            ][
                weight_mapping[name]["attribute"][2]
            ] = x
            data_temp = _add_remind_cancer_score(
                dataframe=data.copy(),
                remind_cancer_scoring_weights=scoring_weights_temp,
                ge_column="FPKM_Z_score"
            )

            data_temp = data_temp.sort_values("score_v2", ascending=False, inplace=False)
            data_temp = data_temp.reset_index(drop=True)

            # Get the median rank.
            median = np.median(list(data_temp[(data_temp["GENE"] == gene) & (data_temp["POS"] == position)].index))
            mean = np.mean(list(data_temp[(data_temp["GENE"] == gene) & (data_temp["POS"] == position)].index))
            min = np.min(list(data_temp[(data_temp["GENE"] == gene) & (data_temp["POS"] == position)].index))
            max = np.max(list(data_temp[(data_temp["GENE"] == gene) & (data_temp["POS"] == position)].index))

            means[gene][position].append(mean)
            medians[gene][position].append(median)
            mins[gene][position].append(min)
            maxes[gene][position].append(max)

        create_subplots(medians, means, mins, maxes, name)


Recurrence Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Recurrence Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Recurrence Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Recurrence Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Gene Expression Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Gene Expression Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Gene Expression Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Gene Expression Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Purity Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Purity Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Purity Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Purity Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Creation Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Creation Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Creation Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Creation Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Destruction Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Destruction Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Destruction Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Transcription Factor Destruction Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Allele Frequency Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Allele Frequency Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Allele Frequency Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Allele Frequency Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

CGC Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

CGC Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

CGC Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

CGC Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

Open Chromatin Weight: TERT 1295228:   0%|          | 0/100 [00:00<?, ?it/s]

Open Chromatin Weight: TERT 1295250:   0%|          | 0/100 [00:00<?, ?it/s]

Open Chromatin Weight: RALY 32580927:   0%|          | 0/100 [00:00<?, ?it/s]

Open Chromatin Weight: CDC20 43824529:   0%|          | 0/100 [00:00<?, ?it/s]

In [101]:
# For each gene and position, plot the four graphs in a plotly subplot.
import plotly.subplots as sp

def create_subplots(medians, means, mins, maxes, name):
    """
    Create subplots for the given means, medians, mins, and maxes.
    """
    fig = sp.make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            "Mean Rank", "Median Rank",
            "Min Rank", "Max Rank"
        ],
    )

    # Plot the mean rank.
    fig.add_trace(
        go.Scatter(
            x=weight_mapping[name]["range"],
            y=means[gene][position],
            mode="lines",
            name=f"{gene} {position} Mean Rank",
            marker=dict(size=10, color="blue")
        ),
        row=1, col=1
    )
    # Also, add a vertical line for the original weight.
    fig.add_vline(
        x=weight_mapping[name]["original_weight"],
        line=dict(color="black", width=2, dash="dash"),
    )

    # Plot the median rank.
    fig.add_trace(
        go.Scatter(
            x=weight_mapping[name]["range"],
            y=medians[gene][position],
            mode="lines",
            name=f"{gene} {position} Median Rank",
            marker=dict(size=10, color="orange")
        ),
        row=1, col=2
    )
    # Also, add a vertical line for the original weight.
    fig.add_vline(
        x=weight_mapping[name]["original_weight"],
        line=dict(color="black", width=2, dash="dash"),
    )

    # Plot the min rank.
    fig.add_trace(
        go.Scatter(
            x=weight_mapping[name]["range"],
            y=mins[gene][position],
            mode="lines",
            name=f"{gene} {position} Min Rank",
            marker=dict(size=10, color="green")
        ),
        row=2, col=1
    )
    # Also, add a vertical line for the original weight.
    fig.add_vline(
        x=weight_mapping[name]["original_weight"],
        line=dict(color="black", width=2, dash="dash"),
    )

    # Plot the max rank.
    fig.add_trace(
        go.Scatter(
            x=weight_mapping[name]["range"],
            y=maxes[gene][position],
            mode="lines",
            name=f"{gene} {position} Max Rank",
            marker=dict(size=10, color="red")
        ),
        row=2, col=2
    )
    # Also, add a vertical line for the original weight.
    fig.add_vline(
        x=weight_mapping[name]["original_weight"],
        line=dict(color="black", width=2, dash="dash"),
    )
    fig.update_layout(
        title=f"Iterrating through <b>{name}</b> Scoring for {gene} at position {position} (n={data[(data['GENE'] == gene) & (data['POS'] == position)].shape[0]})",
        showlegend=True
    )
    # Add a common x-axis label.
    fig.update_xaxes(title_text=name, row=2, col=1)
    fig.update_xaxes(title_text=name, row=2, col=2)
    # Add a common y-axis label.
    fig.update_yaxes(title_text="Rank", row=1, col=1)
    fig.update_yaxes(title_text="Rank", row=2, col=1)

    fig.write_html(f"./plots/{name.replace(' ', '_')}_{gene}_{position}_score_iteration.html")
    # fig.show()
