In [1]:
import shutil
from pathlib import Path

import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange, Whisker, Legend
from bokeh.palettes import Bright6, Category10, Category20
from bokeh.transform import factor_cmap, dodge
from bokeh.io import output_notebook
from bokeh.io import export_svgs, export_png

from dctap.libs.pandas import set_defaultoptions, display, displaydf_full
from dctap.qpcr.constants import QPCRPATHS
from dctap.qpcr.utils.core import *

In [2]:
# Set pandas settings
set_defaultoptions(pd, supresscopywarning=None)
output_notebook()

In [3]:
# Read and annotate data from different plates and combine them
experiment_id = "JR92-250106"
plate_ids = ["JR92-250106-plate1", "JR92-250106-plate2", "JR92-250106-plate3"]

dfs = []
for plate_id in plate_ids:
    dfs.append(get_plate_data(experiment_id, plate_id))

df = pd.concat(dfs)
df.reset_index(inplace=True, drop=True)

display(df)

Unnamed: 0,experiment_id,Well,Sample,Primer,Cq,plate_id
0,JR92-250106,A01,K562_MOI1_ALAS_BR1_R1,bActin,20.363152,JR92-250106-plate1
1,JR92-250106,A02,K562_MOI1_ALAS_BR1_R1,bActin,20.534508,JR92-250106-plate1
2,JR92-250106,A03,K562_MOI1_ALAS_BR2_R1,bActin,20.610607,JR92-250106-plate1
3,JR92-250106,A04,K562_MOI1_ALAS_BR2_R1,bActin,20.773966,JR92-250106-plate1
4,JR92-250106,A05,K562_MOI1_ALAS_BR3_R1,bActin,20.574499,JR92-250106-plate1
...,...,...,...,...,...,...
1051,JR92-250106,P20,K562_CTRL_SLC25A39_BR1_R2,SLC25A39,21.968748,JR92-250106-plate3
1052,JR92-250106,P21,K562_CTRL_SLC25A39_BR2_R2,SLC25A39,21.564505,JR92-250106-plate3
1053,JR92-250106,P22,K562_CTRL_SLC25A39_BR2_R2,SLC25A39,22.004490,JR92-250106-plate3
1054,JR92-250106,P23,K562_CTRL_SLC25A39_BR3_R2,SLC25A39,22.718958,JR92-250106-plate3


In [4]:
df_samples = get_sample_metadata(cast(pd.Series, df.Sample), sep="_")

with displaydf_full():
  display(df_samples)

Unnamed: 0,Sample,0,1,2,3,4
0,K562_MOI1_ALAS_BR1_R1,K562,MOI1,ALAS,BR1,R1
1,K562_MOI1_ALAS_BR2_R1,K562,MOI1,ALAS,BR2,R1
2,K562_MOI1_ALAS_BR3_R1,K562,MOI1,ALAS,BR3,R1
3,K562_CTRL_ALAS_BR1_R1,K562,CTRL,ALAS,BR1,R1
4,K562_CTRL_ALAS_BR2_R1,K562,CTRL,ALAS,BR2,R1
5,K562_CTRL_ALAS_BR3_R1,K562,CTRL,ALAS,BR3,R1
6,K562_MOI1_TFR2_BR1_R1,K562,MOI1,TFR2,BR1,R1
7,K562_MOI1_TFR2_BR2_R1,K562,MOI1,TFR2,BR2,R1
8,K562_MOI1_TFR2_BR3_R1,K562,MOI1,TFR2,BR3,R1
9,K562_CTRL_TFR2_BR1_R1,K562,CTRL,TFR2,BR1,R1


In [5]:
conditions = ["bio_reps", "ctrl_calibrator", "cond_targetgene"]
df = set_conditions(
    df,
    df_samples,
    conditions=conditions,
    merge_cols=["0124", "014", "024"],
)

display(df)

Unnamed: 0,experiment_id,Well,Sample,Primer,Cq,plate_id,bio_reps,ctrl_calibrator,cond_targetgene
0,JR92-250106,A01,K562_MOI1_ALAS_BR1_R1,bActin,20.363152,JR92-250106-plate1,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1
1,JR92-250106,A02,K562_MOI1_ALAS_BR1_R1,bActin,20.534508,JR92-250106-plate1,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1
2,JR92-250106,A03,K562_MOI1_ALAS_BR2_R1,bActin,20.610607,JR92-250106-plate1,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1
3,JR92-250106,A04,K562_MOI1_ALAS_BR2_R1,bActin,20.773966,JR92-250106-plate1,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1
4,JR92-250106,A05,K562_MOI1_ALAS_BR3_R1,bActin,20.574499,JR92-250106-plate1,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1
...,...,...,...,...,...,...,...,...,...
1051,JR92-250106,P20,K562_CTRL_SLC25A39_BR1_R2,SLC25A39,21.968748,JR92-250106-plate3,K562_CTRL_SLC25A39_R2,K562_CTRL_R2,K562_SLC25A39_R2
1052,JR92-250106,P21,K562_CTRL_SLC25A39_BR2_R2,SLC25A39,21.564505,JR92-250106-plate3,K562_CTRL_SLC25A39_R2,K562_CTRL_R2,K562_SLC25A39_R2
1053,JR92-250106,P22,K562_CTRL_SLC25A39_BR2_R2,SLC25A39,22.004490,JR92-250106-plate3,K562_CTRL_SLC25A39_R2,K562_CTRL_R2,K562_SLC25A39_R2
1054,JR92-250106,P23,K562_CTRL_SLC25A39_BR3_R2,SLC25A39,22.718958,JR92-250106-plate3,K562_CTRL_SLC25A39_R2,K562_CTRL_R2,K562_SLC25A39_R2


In [None]:
print(get_primers(df))
df1 = []
for primer in get_primers(df)[1:]:
    df1.append(get_deltaCq_expression_data(
        df, ref_primer="bActin", test_primer=primer
    ))

df1 = pd.concat(df1)
df.reset_index(inplace=True, drop=True)

with displaydf_full():
    display(df1)

SyntaxError: invalid syntax (1171369458.py, line 8)

In [None]:
df2 = get_deltaCq_stats(df1, biorep_col="bio_reps")
df_calibrators = get_calibrators(
    df2,
    ctrl_col="bio_reps",
    condition_col="cond_targetgene",
    assign_ctrl_samples=[
        "K562_CTRL_ALAS_R1",
        "K562_CTRL_ALAS_R2",
        "K562_CTRL_TFR2_R1",
        "K562_CTRL_TFR2_R2",
        "K562_CTRL_PIN1_R1",
        "K562_CTRL_PIN1_R2",
        "K562_CTRL_PIM1_R1",
        "K562_CTRL_PIM1_R2",
        "K562_CTRL_FADS1_R1",
        "K562_CTRL_FADS1_R2",
        "K562_CTRL_RTN4_R1",
        "K562_CTRL_RTN4_R2",
        "K562_CTRL_GATA1_R1",
        "K562_CTRL_GATA1_R2",
        "K562_CTRL_SLC25A39_R1",
        "K562_CTRL_SLC25A39_R2",
        "K562_CTRL_RNF181_R1",
        "K562_CTRL_EIF3K_R1",
        "K562_CTRL_YIF1B_R1",
        "K562_CTRL_POLR2J_R1",
        "K562_CTRL_RPL27_R1",
        "K562_CTRL_RPS16_R1",
    ],
    assign_cond_group=[
        "K562_ALAS_R1",
        "K562_ALAS_R2",
        "K562_TFR2_R1",
        "K562_TFR2_R2",
        "K562_PIN1_R1",
        "K562_PIN1_R2",
        "K562_PIM1_R1",
        "K562_PIM1_R2",
        "K562_FADS1_R1",
        "K562_FADS1_R2",
        "K562_RTN4_R1",
        "K562_RTN4_R2",
        "K562_GATA1_R1",
        "K562_GATA1_R2",
        "K562_SLC25A39_R1",
        "K562_SLC25A39_R2",
        "K562_RNF181_R1",
        "K562_EIF3K_R1",
        "K562_YIF1B_R1",
        "K562_POLR2J_R1",
        "K562_RPL27_R1",
        "K562_RPS16_R1",
    ],
)
with displaydf_full():
  display(df_calibrators)

Unnamed: 0,deltaCq_ALASvbActin,deltaCq_TFR2vbActin,deltaCq_PIN1vbActin,deltaCq_PIM1vbActin,deltaCq_FADS1vbActin,deltaCq_RTN4vbActin,deltaCq_GATA1vbActin,deltaCq_SLC25A39vbActin,deltaCq_RNF181vbActin,deltaCq_EIF3KvbActin,deltaCq_YIF1BvbActin,deltaCq_POLR2JvbActin,deltaCq_RPL27vbActin,deltaCq_RPS16vbActin
K562_ALAS_R1,3.48289,,,,,,,,,,,,,
K562_ALAS_R2,5.135822,,,,,,,,,,,,,
K562_TFR2_R1,,4.240532,,,,,,,,,,,,
K562_TFR2_R2,,4.766577,,,,,,,,,,,,
K562_PIN1_R1,,,5.189736,,,,,,,,,,,
K562_PIN1_R2,,,5.975743,,,,,,,,,,,
K562_PIM1_R1,,,,5.351144,,,,,,,,,,
K562_PIM1_R2,,,,3.823137,,,,,,,,,,
K562_FADS1_R1,,,,,3.516164,,,,,,,,,
K562_FADS1_R2,,,,,4.072538,,,,,,,,,


: 

In [None]:
df3 = get_deltadeltaCqMethod_foldchange(
    df1, df_calibrators, biorep_col="bio_reps", condition_col="cond_targetgene"
)

df3.to_csv("~/Downloads/jr92.csv")

with displaydf_full():
  display(df3)

Unnamed: 0,bio_reps,ctrl_calibrator,cond_targetgene,2^(deltadeltaCq_ALASvbActin)_mean,2^(deltadeltaCq_ALASvbActin)_std,2^(deltadeltaCq_ALASvbActin)_ste,2^(deltadeltaCq_ALASvbActin)_ci95_upper,2^(deltadeltaCq_ALASvbActin)_ci95_lower,2^(deltadeltaCq_TFR2vbActin)_mean,2^(deltadeltaCq_TFR2vbActin)_std,2^(deltadeltaCq_TFR2vbActin)_ste,2^(deltadeltaCq_TFR2vbActin)_ci95_upper,2^(deltadeltaCq_TFR2vbActin)_ci95_lower,2^(deltadeltaCq_PIN1vbActin)_mean,2^(deltadeltaCq_PIN1vbActin)_std,2^(deltadeltaCq_PIN1vbActin)_ste,2^(deltadeltaCq_PIN1vbActin)_ci95_upper,2^(deltadeltaCq_PIN1vbActin)_ci95_lower,2^(deltadeltaCq_PIM1vbActin)_mean,2^(deltadeltaCq_PIM1vbActin)_std,2^(deltadeltaCq_PIM1vbActin)_ste,2^(deltadeltaCq_PIM1vbActin)_ci95_upper,2^(deltadeltaCq_PIM1vbActin)_ci95_lower,2^(deltadeltaCq_FADS1vbActin)_mean,2^(deltadeltaCq_FADS1vbActin)_std,2^(deltadeltaCq_FADS1vbActin)_ste,2^(deltadeltaCq_FADS1vbActin)_ci95_upper,2^(deltadeltaCq_FADS1vbActin)_ci95_lower,2^(deltadeltaCq_RTN4vbActin)_mean,2^(deltadeltaCq_RTN4vbActin)_std,2^(deltadeltaCq_RTN4vbActin)_ste,2^(deltadeltaCq_RTN4vbActin)_ci95_upper,2^(deltadeltaCq_RTN4vbActin)_ci95_lower,2^(deltadeltaCq_GATA1vbActin)_mean,2^(deltadeltaCq_GATA1vbActin)_std,2^(deltadeltaCq_GATA1vbActin)_ste,2^(deltadeltaCq_GATA1vbActin)_ci95_upper,2^(deltadeltaCq_GATA1vbActin)_ci95_lower,2^(deltadeltaCq_SLC25A39vbActin)_mean,2^(deltadeltaCq_SLC25A39vbActin)_std,2^(deltadeltaCq_SLC25A39vbActin)_ste,2^(deltadeltaCq_SLC25A39vbActin)_ci95_upper,2^(deltadeltaCq_SLC25A39vbActin)_ci95_lower,2^(deltadeltaCq_RNF181vbActin)_mean,2^(deltadeltaCq_RNF181vbActin)_std,2^(deltadeltaCq_RNF181vbActin)_ste,2^(deltadeltaCq_RNF181vbActin)_ci95_upper,2^(deltadeltaCq_RNF181vbActin)_ci95_lower,2^(deltadeltaCq_EIF3KvbActin)_mean,2^(deltadeltaCq_EIF3KvbActin)_std,2^(deltadeltaCq_EIF3KvbActin)_ste,2^(deltadeltaCq_EIF3KvbActin)_ci95_upper,2^(deltadeltaCq_EIF3KvbActin)_ci95_lower,2^(deltadeltaCq_YIF1BvbActin)_mean,2^(deltadeltaCq_YIF1BvbActin)_std,2^(deltadeltaCq_YIF1BvbActin)_ste,2^(deltadeltaCq_YIF1BvbActin)_ci95_upper,2^(deltadeltaCq_YIF1BvbActin)_ci95_lower,2^(deltadeltaCq_POLR2JvbActin)_mean,2^(deltadeltaCq_POLR2JvbActin)_std,2^(deltadeltaCq_POLR2JvbActin)_ste,2^(deltadeltaCq_POLR2JvbActin)_ci95_upper,2^(deltadeltaCq_POLR2JvbActin)_ci95_lower,2^(deltadeltaCq_RPL27vbActin)_mean,2^(deltadeltaCq_RPL27vbActin)_std,2^(deltadeltaCq_RPL27vbActin)_ste,2^(deltadeltaCq_RPL27vbActin)_ci95_upper,2^(deltadeltaCq_RPL27vbActin)_ci95_lower,2^(deltadeltaCq_RPS16vbActin)_mean,2^(deltadeltaCq_RPS16vbActin)_std,2^(deltadeltaCq_RPS16vbActin)_ste,2^(deltadeltaCq_RPS16vbActin)_ci95_upper,2^(deltadeltaCq_RPS16vbActin)_ci95_lower
0,K562_CTRL_ALAS_R1,K562_CTRL_R1,K562_ALAS_R1,1.110301,0.539467,0.311461,1.720765,0.499838,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,K562_CTRL_ALAS_R2,K562_CTRL_R2,K562_ALAS_R2,1.002966,0.09436,0.054479,1.109745,0.896188,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1,0.62473,0.137959,0.079651,0.780846,0.468614,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,K562_MOI1_ALAS_R2,K562_MOI1_R2,K562_ALAS_R2,3.300738,0.728511,0.420606,4.125126,2.476349,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,K562_CTRL_TFR2_R1,K562_CTRL_R1,K562_TFR2_R1,,,,,,1.023057,0.27598,0.159337,1.335358,0.710756,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,K562_CTRL_TFR2_R2,K562_CTRL_R2,K562_TFR2_R2,,,,,,1.022205,0.263236,0.151979,1.320084,0.724326,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,K562_MOI1_TFR2_R1,K562_MOI1_R1,K562_TFR2_R1,,,,,,0.388696,0.115777,0.066844,0.51971,0.257681,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,K562_MOI1_TFR2_R2,K562_MOI1_R2,K562_TFR2_R2,,,,,,0.558386,0.137733,0.07952,0.714246,0.402526,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,K562_CTRL_PIN1_R1,K562_CTRL_R1,K562_PIN1_R1,,,,,,,,,,,1.036739,0.325398,0.187869,1.404962,0.668516,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,K562_CTRL_PIN1_R2,K562_CTRL_R2,K562_PIN1_R2,,,,,,,,,,,1.018186,0.241346,0.139341,1.291295,0.745077,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


: 

In [None]:
def rename_col(col_name: str) -> str:
    pattern = r"^2\^\(.*_(.*?)v.*\)_(.*)$"
    match = re.match(pattern, col_name)
    if match:
        return f"{match.group(1)}_{match.group(2)}"
    else:
        return col_name

def create_float_range(start: float, end: float, interval: float) -> list:
    """
    Creates a list of floats starting at `start`, increasing by `interval`,
    and ending exactly at `end` (even if the step doesn't perfectly divide the range).
    """
    floats = []
    x = start
    # Use a small tolerance to avoid floating-point precision issues.
    while x < end - 1e-9:
        floats.append(x)
        x += interval
    # Ensure the final value is exactly `end`
    floats.append(end)
    return floats

df = df3.copy()
df = df.rename(columns=rename_col)
display(df)
primers = list(dict.fromkeys([re.sub(r"^K562_(.*?)_R.*",r"\1",primer) for primer in df.cond_targetgene]))
cond_targetgene = list(dict.fromkeys([re.sub(r"^K562_(.*)$", r"\1", cond_group) for cond_group in df.cond_targetgene]))

# Extracting foldchange and cleaning up column names
data = {
    "Samples": ["Ctrl", "Sample"]
}

for primer in primers:
    searchrow1ctrl = ""
    searchrow2ctrl = ""
    searchrow1moi1 = ""
    searchrow2moi1 = ""
    searchcolmean = ""
    searchcolup = ""
    searchcollo = ""
    if primer not in ["RNF181", "EIF3K", "YIF1B", "POLR2J", "RPL27", "RPS16"]:
        searchrow1ctrl = "K562_CTRL_" + primer + "_R1"
        searchrow2ctrl = "K562_CTRL_" + primer + "_R2"
        searchrow1moi1 = "K562_MOI1_" + primer + "_R1"
        searchrow2moi1 = "K562_MOI1_" + primer + "_R2"
    else:
        searchrow1ctrl = "K562_CTRL_" + primer + "_R1"
        searchrow1moi1 = "K562_MOI1_" + primer + "_R1"

        searchrow2ctrl = None
        searchrow2moi1 = None

    searchcolmean = primer + "_mean"
    searchcolup = primer + "_ci95_upper"
    searchcollo = primer + "_ci95_lower"

    ctrl1_mean = df.loc[df["bio_reps"] == searchrow1ctrl, searchcolmean].iloc[0]
    ctrl1_up = df.loc[df["bio_reps"] == searchrow1ctrl, searchcolup].iloc[0]
    ctrl1_lo = df.loc[df["bio_reps"] == searchrow1ctrl, searchcollo].iloc[0]

    moi11_mean = df.loc[df["bio_reps"] == searchrow1moi1, searchcolmean].iloc[0]
    moi11_up = df.loc[df["bio_reps"] == searchrow1moi1, searchcolup].iloc[0]
    moi11_lo = df.loc[df["bio_reps"] == searchrow1moi1, searchcollo].iloc[0]

    if searchrow2ctrl is not None:
        ctrl2_mean = df.loc[df["bio_reps"] == searchrow2ctrl, searchcolmean].iloc[0]
        ctrl2_up = df.loc[df["bio_reps"] == searchrow2ctrl, searchcolup].iloc[0]
        ctrl2_lo = df.loc[df["bio_reps"] == searchrow2ctrl, searchcollo].iloc[0]

        moi12_mean = df.loc[df["bio_reps"] == searchrow2moi1, searchcolmean].iloc[0]
        moi12_up = df.loc[df["bio_reps"] == searchrow2moi1, searchcolup].iloc[0]
        moi12_lo = df.loc[df["bio_reps"] == searchrow2moi1, searchcollo].iloc[0]

    new_data1 = {
        primer + "_R1": [ctrl1_mean, moi11_mean],
        primer + "_R1_up": [ctrl1_up, moi11_up],
        primer + "_R1_lo": [ctrl1_lo, moi11_lo],
    }

    new_data2 = {}
    if searchrow2ctrl is not None:
        new_data2 = {
        primer + "_R2": [ctrl2_mean, moi12_mean],
        primer + "_R2_up": [ctrl2_up, moi12_up],
        primer + "_R2_lo": [ctrl2_lo, moi12_lo],
        }

    data.update(new_data1)
    data.update(new_data2)

source = ColumnDataSource(data=data)

# Plotting the figure
p = figure(
    x_range=["Ctrl", "Sample"], y_range=(0.03, 12),
    height=600, width=1200,
    title="Relative fold change from JR92 qPCR Validation",
    toolbar_location=None, tools="", y_axis_type="log"
)
p.add_layout(Legend(), "right")

bar_width = 0.04
end_dodge = ((len(cond_targetgene) / 2) * bar_width ) - (bar_width / 2)
str_dodge =  -1 * end_dodge
dodge_intervals = create_float_range(str_dodge, end_dodge, bar_width)

for i, condition in enumerate(cond_targetgene):
    colors = i % 20
    p.vbar(
        x=dodge("Samples", dodge_intervals[i], range=p.x_range),
        top=condition, bottom=0.1, width=bar_width, source=source,
        color=Category20[20][colors], legend_label=condition
    )
    err = Whisker(
        base=dodge("Samples",dodge_intervals[i], range=p.x_range),
        upper=condition + "_up", lower=condition + "_lo", source=source, level="overlay"
    )
    err.upper_head.size = 10
    err.lower_head.size = 10
    p.add_layout(err)


p.legend.orientation = "vertical"

# Export directory
exportfilename = "JR92-250106-test.svg"
source = QPCRPATHS.ROOT/ "analysis" / exportfilename
destination = QPCRPATHS.DATADIR / experiment_id / "250129_qpcr_analysis" / "plots" / exportfilename

# p.output_backend = "svg"
# export_svgs(p, filename=exportfilename)
# shutil.move(str(source), str(destination))

show(p)

Unnamed: 0,bio_reps,ctrl_calibrator,cond_targetgene,ALAS_mean,ALAS_std,ALAS_ste,ALAS_ci95_upper,ALAS_ci95_lower,TFR2_mean,TFR2_std,TFR2_ste,TFR2_ci95_upper,TFR2_ci95_lower,PIN1_mean,PIN1_std,PIN1_ste,PIN1_ci95_upper,PIN1_ci95_lower,PIM1_mean,PIM1_std,PIM1_ste,PIM1_ci95_upper,PIM1_ci95_lower,FADS1_mean,FADS1_std,FADS1_ste,FADS1_ci95_upper,FADS1_ci95_lower,RTN4_mean,RTN4_std,RTN4_ste,RTN4_ci95_upper,RTN4_ci95_lower,GATA1_mean,GATA1_std,GATA1_ste,GATA1_ci95_upper,GATA1_ci95_lower,SLC25A39_mean,SLC25A39_std,SLC25A39_ste,SLC25A39_ci95_upper,SLC25A39_ci95_lower,RNF181_mean,RNF181_std,RNF181_ste,RNF181_ci95_upper,RNF181_ci95_lower,EIF3K_mean,EIF3K_std,EIF3K_ste,EIF3K_ci95_upper,EIF3K_ci95_lower,YIF1B_mean,YIF1B_std,YIF1B_ste,YIF1B_ci95_upper,YIF1B_ci95_lower,POLR2J_mean,POLR2J_std,POLR2J_ste,POLR2J_ci95_upper,POLR2J_ci95_lower,RPL27_mean,RPL27_std,RPL27_ste,RPL27_ci95_upper,RPL27_ci95_lower,RPS16_mean,RPS16_std,RPS16_ste,RPS16_ci95_upper,RPS16_ci95_lower
0,K562_CTRL_ALAS_R1,K562_CTRL_R1,K562_ALAS_R1,1.110301,0.539467,0.311461,1.720765,0.499838,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,K562_CTRL_ALAS_R2,K562_CTRL_R2,K562_ALAS_R2,1.002966,0.094360,0.054479,1.109745,0.896188,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1,0.624730,0.137959,0.079651,0.780846,0.468614,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,K562_MOI1_ALAS_R2,K562_MOI1_R2,K562_ALAS_R2,3.300738,0.728511,0.420606,4.125126,2.476349,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,K562_CTRL_TFR2_R1,K562_CTRL_R1,K562_TFR2_R1,,,,,,1.023057,0.27598,0.159337,1.335358,0.710756,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,K562_MOI1_POLR2J_R1,K562_MOI1_R1,K562_POLR2J_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.256544,0.114352,0.066021,0.385946,0.127142,,,,,,,,,,
40,K562_CTRL_RPL27_R1,K562_CTRL_R1,K562_RPL27_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.127259,0.708585,0.409102,1.929099,0.325419,,,,,
41,K562_MOI1_RPL27_R1,K562_MOI1_R1,K562_RPL27_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.514902,0.227639,0.131427,0.772499,0.257304,,,,,
42,K562_CTRL_RPS16_R1,K562_CTRL_R1,K562_RPS16_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.123754,0.668148,0.385756,1.879835,0.367673


: 

In [None]:
df = df3.copy()
df = df.rename(columns=rename_col)
display(df)
primers = list(dict.fromkeys([re.sub(r"^K562_(.*?)_R.*",r"\1",primer) for primer in df.cond_targetgene]))
cond_targetgene = list(dict.fromkeys([cond for cond in df.cond_targetgene]))
cond_targetgene_shorten = list(dict.fromkeys([re.sub(r"^K562_(.*)$", r"\1", cond_group) for cond_group in df.cond_targetgene]))

# Extracting foldchange and cleaning up column names
data = {
    "Genes": cond_targetgene_shorten,
}

ctrl_list = []
moi1_list = []
ctrl_list_up = []
moi1_list_up = []
ctrl_list_lo = []
moi1_list_lo = []

for primer in primers:
    searchrow1ctrl = ""
    searchrow2ctrl = ""
    searchrow1moi1 = ""
    searchrow2moi1 = ""
    searchcolmean = ""
    searchcolup = ""
    searchcollo = ""
    if primer not in ["RNF181", "EIF3K", "YIF1B", "POLR2J", "RPL27", "RPS16"]:
        searchrow1ctrl = "K562_CTRL_" + primer + "_R1"
        searchrow2ctrl = "K562_CTRL_" + primer + "_R2"
        searchrow1moi1 = "K562_MOI1_" + primer + "_R1"
        searchrow2moi1 = "K562_MOI1_" + primer + "_R2"
    else:
        searchrow1ctrl = "K562_CTRL_" + primer + "_R1"
        searchrow1moi1 = "K562_MOI1_" + primer + "_R1"

        searchrow2ctrl = None
        searchrow2moi1 = None

    searchcolmean = primer + "_mean"
    searchcolup = primer + "_ci95_upper"
    searchcollo = primer + "_ci95_lower"

    ctrl1_mean = df.loc[df["bio_reps"] == searchrow1ctrl, searchcolmean].iloc[0]
    ctrl1_up = df.loc[df["bio_reps"] == searchrow1ctrl, searchcolup].iloc[0]
    ctrl1_lo = df.loc[df["bio_reps"] == searchrow1ctrl, searchcollo].iloc[0]

    moi11_mean = df.loc[df["bio_reps"] == searchrow1moi1, searchcolmean].iloc[0]
    moi11_up = df.loc[df["bio_reps"] == searchrow1moi1, searchcolup].iloc[0]
    moi11_lo = df.loc[df["bio_reps"] == searchrow1moi1, searchcollo].iloc[0]

    if searchrow2ctrl is not None:
        ctrl2_mean = df.loc[df["bio_reps"] == searchrow2ctrl, searchcolmean].iloc[0]
        ctrl2_up = df.loc[df["bio_reps"] == searchrow2ctrl, searchcolup].iloc[0]
        ctrl2_lo = df.loc[df["bio_reps"] == searchrow2ctrl, searchcollo].iloc[0]

        moi12_mean = df.loc[df["bio_reps"] == searchrow2moi1, searchcolmean].iloc[0]
        moi12_up = df.loc[df["bio_reps"] == searchrow2moi1, searchcolup].iloc[0]
        moi12_lo = df.loc[df["bio_reps"] == searchrow2moi1, searchcollo].iloc[0]

    ctrl_list.append(ctrl1_mean)
    moi1_list.append(moi11_mean)
    ctrl_list_up.append(ctrl1_up)
    moi1_list_up.append(moi11_up)
    ctrl_list_lo.append(ctrl1_lo)
    moi1_list_lo.append(moi11_lo)

    if searchrow2ctrl is not None:
        ctrl_list.append(ctrl2_mean)
        moi1_list.append(moi12_mean)
        ctrl_list_up.append(ctrl2_up)
        moi1_list_up.append(moi12_up)
        ctrl_list_lo.append(ctrl2_lo)
        moi1_list_lo.append(moi12_lo)


data = {
    "genes": cond_targetgene_shorten,
    "ctrl": ctrl_list,
    "moi1": moi1_list,

    "ctrl_up": ctrl_list_up,
    "moi1_up": moi1_list_up,

    "ctrl_lo": ctrl_list_lo,
    "moi1_lo": moi1_list_lo,
}

source = ColumnDataSource(data=data)

p = figure(
    x_range=cond_targetgene_shorten, y_range=(0.03, 12),
    height=600, width=1200,
    title="Relative fold change from JR92 qPCR Validation",
    toolbar_location=None, tools="", y_axis_type="log"
)
bar_width = 0.4
p.vbar(
    x=dodge("genes", -0.2, range=p.x_range),
    top="ctrl", bottom=0.1, width=bar_width, source=source,
    color=Category20[8][0], legend_label="Ctrl", line_color=Category20[8][1], line_width=1
)

p.vbar(
    x=dodge("genes", 0.2, range=p.x_range),
    top="moi1", bottom=0.1, width=bar_width, source=source,
    color=Category20[8][6], legend_label="MOI1", line_color=Category20[8][7], line_width=1
)


ctrl_err = Whisker(
    base=dodge("genes", -0.2, range=p.x_range),
    upper='ctrl_up', lower='ctrl_lo', source=source, level="overlay"
)
ctrl_err.upper_head.size = 10
ctrl_err.lower_head.size = 10
p.add_layout(ctrl_err)

moi1_err = Whisker(
    base=dodge("genes", 0.2, range=p.x_range),
    upper='moi1_up', lower='moi1_lo', source=source, level="overlay"
)
moi1_err.upper_head.size = 10
moi1_err.lower_head.size = 10
p.add_layout(moi1_err)


p.y_range.start = 0.03
p.x_range.range_padding = 0.02
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

show(p)

Unnamed: 0,bio_reps,ctrl_calibrator,cond_targetgene,ALAS_mean,ALAS_std,ALAS_ste,ALAS_ci95_upper,ALAS_ci95_lower,TFR2_mean,TFR2_std,TFR2_ste,TFR2_ci95_upper,TFR2_ci95_lower,PIN1_mean,PIN1_std,PIN1_ste,PIN1_ci95_upper,PIN1_ci95_lower,PIM1_mean,PIM1_std,PIM1_ste,PIM1_ci95_upper,PIM1_ci95_lower,FADS1_mean,FADS1_std,FADS1_ste,FADS1_ci95_upper,FADS1_ci95_lower,RTN4_mean,RTN4_std,RTN4_ste,RTN4_ci95_upper,RTN4_ci95_lower,GATA1_mean,GATA1_std,GATA1_ste,GATA1_ci95_upper,GATA1_ci95_lower,SLC25A39_mean,SLC25A39_std,SLC25A39_ste,SLC25A39_ci95_upper,SLC25A39_ci95_lower,RNF181_mean,RNF181_std,RNF181_ste,RNF181_ci95_upper,RNF181_ci95_lower,EIF3K_mean,EIF3K_std,EIF3K_ste,EIF3K_ci95_upper,EIF3K_ci95_lower,YIF1B_mean,YIF1B_std,YIF1B_ste,YIF1B_ci95_upper,YIF1B_ci95_lower,POLR2J_mean,POLR2J_std,POLR2J_ste,POLR2J_ci95_upper,POLR2J_ci95_lower,RPL27_mean,RPL27_std,RPL27_ste,RPL27_ci95_upper,RPL27_ci95_lower,RPS16_mean,RPS16_std,RPS16_ste,RPS16_ci95_upper,RPS16_ci95_lower
0,K562_CTRL_ALAS_R1,K562_CTRL_R1,K562_ALAS_R1,1.110301,0.539467,0.311461,1.720765,0.499838,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,K562_CTRL_ALAS_R2,K562_CTRL_R2,K562_ALAS_R2,1.002966,0.094360,0.054479,1.109745,0.896188,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,K562_MOI1_ALAS_R1,K562_MOI1_R1,K562_ALAS_R1,0.624730,0.137959,0.079651,0.780846,0.468614,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,K562_MOI1_ALAS_R2,K562_MOI1_R2,K562_ALAS_R2,3.300738,0.728511,0.420606,4.125126,2.476349,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,K562_CTRL_TFR2_R1,K562_CTRL_R1,K562_TFR2_R1,,,,,,1.023057,0.27598,0.159337,1.335358,0.710756,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,K562_MOI1_POLR2J_R1,K562_MOI1_R1,K562_POLR2J_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.256544,0.114352,0.066021,0.385946,0.127142,,,,,,,,,,
40,K562_CTRL_RPL27_R1,K562_CTRL_R1,K562_RPL27_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.127259,0.708585,0.409102,1.929099,0.325419,,,,,
41,K562_MOI1_RPL27_R1,K562_MOI1_R1,K562_RPL27_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.514902,0.227639,0.131427,0.772499,0.257304,,,,,
42,K562_CTRL_RPS16_R1,K562_CTRL_R1,K562_RPS16_R1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.123754,0.668148,0.385756,1.879835,0.367673


: 

In [None]:
def cal_perknockdown(new, original):
    if new == original:
        return new / original
    kd = (new - original) / original
    # Multiplying by negative one because above calculates percentage change increase
    # Calculating % increase might be helpful with calculating standard error of % change
    per_kd = -1 * kd
    return per_kd

def cal_perchange_se(new, new_std, original, original_std):
    # we assume new and original are independent for calculating std err
    new_var = np.pow(new_std, 2)
    ori_var = np.pow(original_std, 2)
    se = np.sqrt((new_var * np.pow(original, 2) + ori_var * np.pow(new, 2))/(np.pow(original,4)))
    return se

def cal_perchange_ci95_up(new, new_std, original, original_std):
    return cal_perknockdown(new, original) + 1.96 * cal_perchange_se(new, new_std, original, original_std)

def cal_perchange_ci95_lo(new, new_std, original, original_std):
    return cal_perknockdown(new, original) - 1.96 * cal_perchange_se(new, new_std, original, original_std)

print(cal_perknockdown(1.0181857349450552, 1.0181857349450552))
print(cal_perchange_ci95_up(1.0181857349450552, 0.24134618240180455, 1.0181857349450552, 0.24134618240180455))
print(cal_perchange_ci95_lo(1.0181857349450552, 0.24134618240180455, 1.0181857349450552, 0.24134618240180455))

1.0
1.6570289329580146
0.34297106704198543


: 

In [None]:
df = df3.copy()
df = df.rename(columns=rename_col)
primers = list(dict.fromkeys([re.sub(r"^K562_(.*?)_R.*",r"\1",primer) for primer in df.cond_targetgene]))
cond_targetgene = list(dict.fromkeys([cond for cond in df.cond_targetgene]))
cond_targetgene_shorten = list(dict.fromkeys([re.sub(r"^K562_(.*)$", r"\1", cond_group) for cond_group in df.cond_targetgene]))

# Extracting foldchange and cleaning up column names
data = {
    "Genes": cond_targetgene_shorten,
}

ctrl_list = []
moi1_list = []
ctrl_list_up = []
moi1_list_up = []
ctrl_list_lo = []
moi1_list_lo = []

for primer in primers:
    searchrow1ctrl = ""
    searchrow2ctrl = ""
    searchrow1moi1 = ""
    searchrow2moi1 = ""
    searchcolmean = ""
    searchcolstd = ""
    searchcolup = ""
    searchcollo = ""
    if primer not in ["RNF181", "EIF3K", "YIF1B", "POLR2J", "RPL27", "RPS16"]:
        searchrow1ctrl = "K562_CTRL_" + primer + "_R1"
        searchrow2ctrl = "K562_CTRL_" + primer + "_R2"
        searchrow1moi1 = "K562_MOI1_" + primer + "_R1"
        searchrow2moi1 = "K562_MOI1_" + primer + "_R2"
    else:
        searchrow1ctrl = "K562_CTRL_" + primer + "_R1"
        searchrow1moi1 = "K562_MOI1_" + primer + "_R1"

        searchrow2ctrl = None
        searchrow2moi1 = None

    searchcolmean = primer + "_mean"
    searchcolstd = primer + "_std"
    searchcolup = primer + "_ci95_upper"
    searchcollo = primer + "_ci95_lower"

    ctrl1_mean = df.loc[df["bio_reps"] == searchrow1ctrl, searchcolmean].iloc[0]
    ctrl1_std = df.loc[df["bio_reps"] == searchrow1ctrl, searchcolstd].iloc[0]

    moi11_mean = df.loc[df["bio_reps"] == searchrow1moi1, searchcolmean].iloc[0]
    moi11_std = df.loc[df["bio_reps"] == searchrow1moi1, searchcolstd].iloc[0]

    if searchrow2ctrl is not None:
        ctrl2_mean = df.loc[df["bio_reps"] == searchrow2ctrl, searchcolmean].iloc[0]
        ctrl2_std = df.loc[df["bio_reps"] == searchrow2ctrl, searchcolstd].iloc[0]

        moi12_mean = df.loc[df["bio_reps"] == searchrow2moi1, searchcolmean].iloc[0]
        moi12_std = df.loc[df["bio_reps"] == searchrow2moi1, searchcolstd].iloc[0]

    ctrl_list.append(100 * cal_perknockdown(ctrl1_mean, ctrl1_mean))
    moi1_list.append(100 * cal_perknockdown(moi11_mean, ctrl1_mean))
    ctrl_list_up.append(100 * cal_perchange_ci95_up(ctrl1_mean, ctrl1_std, ctrl1_mean, ctrl1_std))
    moi1_list_up.append(100 * cal_perchange_ci95_up(moi11_mean, moi11_std, ctrl1_mean, ctrl1_std))
    ctrl_list_lo.append(100 * cal_perchange_ci95_lo(ctrl1_mean, ctrl1_std, ctrl1_mean, ctrl1_std))
    moi1_list_lo.append(100 * cal_perchange_ci95_lo(moi11_mean, moi11_std, ctrl1_mean, ctrl1_std))

    if searchrow2ctrl is not None:
        ctrl_list.append(100 * cal_perknockdown(ctrl2_mean, ctrl2_mean))
        moi1_list.append(100 * cal_perknockdown(moi12_mean, ctrl2_mean))
        ctrl_list_up.append(100 * cal_perchange_ci95_up(ctrl2_mean, ctrl2_std, ctrl2_mean, ctrl2_std))
        moi1_list_up.append(100 * cal_perchange_ci95_up(moi12_mean, moi12_std, ctrl2_mean, ctrl2_std))
        ctrl_list_lo.append(100 * cal_perchange_ci95_lo(ctrl2_mean, ctrl2_std, ctrl2_mean, ctrl2_std))
        moi1_list_lo.append(100 * cal_perchange_ci95_lo(moi12_mean, moi12_std, ctrl2_mean, ctrl2_std))


data = {
    "genes": cond_targetgene_shorten,
    "ctrl": ctrl_list,
    "moi1": moi1_list,

    "ctrl_up": ctrl_list_up,
    "moi1_up": moi1_list_up,

    "ctrl_lo": ctrl_list_lo,
    "moi1_lo": moi1_list_lo,
}

source = ColumnDataSource(data=data)

p = figure(
    x_range=cond_targetgene_shorten, y_range=(-550, 400),
    height=600, width=1200,
    title="Percent Decrease in Gene Expression Compared to Controls from JR92 qPCR Validation",
    toolbar_location=None, tools="",
)
p.add_layout(Legend(), "right")

bar_width = 0.4
p.vbar(
    x=dodge("genes", -0.2, range=p.x_range),
    top="ctrl", bottom=0.1, width=bar_width, source=source,
    color=Category20[8][0], legend_label="Ctrl", line_color=Category20[8][1], line_width=1
)

p.vbar(
    x=dodge("genes", 0.2, range=p.x_range),
    top="moi1", bottom=0.1, width=bar_width, source=source,
    color=Category20[8][6], legend_label="MOI1", line_color=Category20[8][7], line_width=1
)


ctrl_err = Whisker(
    base=dodge("genes", -0.2, range=p.x_range),
    upper='ctrl_up', lower='ctrl_lo', source=source, level="overlay"
)
ctrl_err.upper_head.size = 10
ctrl_err.lower_head.size = 10
p.add_layout(ctrl_err)

moi1_err = Whisker(
    base=dodge("genes", 0.2, range=p.x_range),
    upper='moi1_up', lower='moi1_lo', source=source, level="overlay"
)
moi1_err.upper_head.size = 10
moi1_err.lower_head.size = 10
p.add_layout(moi1_err)


p.x_range.range_padding = 0.02
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

# Export directory
exportfilename = "JR92-250106_perchange.png"
source = QPCRPATHS.ROOT/ "analysis" / exportfilename
destination = QPCRPATHS.DATADIR / experiment_id / "250307_qpcr_analysis" / "plots" / exportfilename

export_png(p, filename=exportfilename)
shutil.move(str(source), str(destination))

show(p)

: 