In [1]:
from copy import deepcopy

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import logomaker

import bean as be
from bean import Edit
import bean.plotting.editing_patterns

In [2]:
#plt.style.use('jr')
plt.rcParams["pdf.use14corefonts"] = True
matplotlib.rcParams['pdf.fonttype'] = 42 
matplotlib.rcParams["axes.unicode_minus"] = False

In [3]:
plt.style.use('default')

# Load and prepare ReporterScreen


## Tiling screen data

In [4]:
cdata = be.read_h5ad(f"../../results/filtered_annotated/LDLRCDS/bean_count_LDLRCDS_masked.h5ad")

In [5]:
cdata_bulk = cdata[:,cdata.samples.bin == "bulk"]

In [6]:
cdata_bulk_fs = cdata_bulk[:,~cdata_bulk.samples.rep.isin([f"rep{n}" for n in range(1,5)])]

In [7]:
sum(cdata_bulk_fs.guides.index.map(lambda x: "CONTROL" in x))

150

In [None]:
cedit_rates_df = bean.plotting.editing_patterns.get_edit_rates(cdata_bulk_fs)

Calibrating edits in editable positions...:   0%|          | 0/7320 [00:00<?, ?it/s]

## Variant targeting screen

Guides in this data has designated target position at one of P3-8 with. We will use the target position editing efficiency for PAM preference and not for position-wise editing efficiency analyses.

In [None]:
vdata = be.read_h5ad(f"../../results/filtered_annotated/LDLvar/bean_count_LDLvar_masked.h5ad")

In [None]:
vdata_bulk = vdata[:, vdata.samples.bin == "bulk"]

In [None]:
vdata_bulk_fs = vdata_bulk[:,~vdata_bulk.samples.rep.isin([f"rep{n}" for n in range(1,5)])]

In [None]:
target_pos = vdata_bulk_fs.guides.loc[vdata_bulk_fs.uns['edit_counts'].guide, "target_pos"].reset_index(drop=True)

In [None]:
rel_pos = vdata_bulk_fs.uns['edit_counts'].edit.map(lambda e: e.rel_pos)

In [None]:
vdata_bulk_fs.uns['target_edit_counts'] = vdata_bulk_fs.uns['edit_counts'].loc[rel_pos == target_pos, :].reset_index(drop=True)

In [None]:
vedit_rates_df = be.pl.editing_patterns.get_edit_rates(vdata_bulk_fs, edit_count_key='target_edit_counts', add_absent=False)

In [None]:
be.pl.editing_patterns.plot_by_pos_context(cedit_rates_df)

### BE-Hive like plotting


In [None]:
cedit_rates_df.spacer_pos = cedit_rates_df.spacer_pos.astype(int)

In [None]:
be.pl.editing_patterns.plot_by_pos_behive(
    cedit_rates_df,
    cdata_bulk_fs,
    target_basechange=cdata_bulk_fs.uns['target_base_change'], 
    nonref_base_changes = ["C>T", "C>G"]
)
plt.savefig("1b_pos_eff_behive_LDLRCDS.pdf", bbox_inches = 'tight')

In [None]:
be.pl.editing_patterns.plot_by_pos_behive(
    cedit_rates_df,
    cdata_bulk_fs,
    target_basechange=cdata_bulk_fs.uns['target_base_change'], 
    nonref_base_changes = ["C>T", "C>G"],
    normalize=True
)
plt.savefig("1b_pos_eff_behive_LDLRCDS_normed.pdf", bbox_inches = 'tight')

## Combination with PAM


In [None]:
cedit_rates_df_ag = cedit_rates_df.loc[cedit_rates_df.base_change == "A>G",:].reset_index(drop=True)

In [None]:
cedit_rates_df

In [None]:
len(cedit_rates_df.guide.unique())

In [None]:
plt.style.use('default')

In [None]:
be.pl.editing_patterns.plot_by_pos_pam(cdata_bulk_fs, cedit_rates_df_ag)
plt.savefig("1b_pos_by_pam_LDLRCDS.pdf", bbox_inches = 'tight')

In [None]:
cedit_rates_df_ag.spacer_pos.min()

In [None]:
cedit_rates_df_ag_P38=cedit_rates_df_ag.loc[cedit_rates_df_ag.spacer_pos.isin([3,4,5,6,7,8])]

In [None]:
cedit_rates_df_ag_P38.groupby(["guide", "pam23"])

In [None]:
max_P38_by_pam = cedit_rates_df_ag_P38.groupby(["guide", "pam23"])["rep_mean"].max()
edit_P38_by_pam = max_P38_by_pam.groupby("pam23").mean()

In [None]:
print(edit_P38_by_pam.min())
print(edit_P38_by_pam.max())

In [None]:
edit_P38_by_pam = cedit_rates_df_ag_P38.groupby("pam23")["rep_mean"].mean()
print(edit_P38_by_pam.min())
print(edit_P38_by_pam.max())

In [None]:
cedit_rates_df_ag_P56=cedit_rates_df_ag.loc[cedit_rates_df_ag.spacer_pos.isin([5,6])]
edit_P56_by_pam = cedit_rates_df_ag_P56.groupby("pam23")["rep_mean"].mean()
print(edit_P56_by_pam.min())
print(edit_P56_by_pam.max())

In [None]:
vedit_rates_df_ag = vedit_rates_df.loc[vedit_rates_df.base_change == "A>G",:].reset_index(drop=True)
cedit_rates_df_ag_window = cedit_rates_df_ag.loc[(cedit_rates_df_ag.spacer_pos >= 3) & (cedit_rates_df_ag.spacer_pos <= 8)].copy()
all_rates = deepcopy(pd.concat([cedit_rates_df_ag_window, vedit_rates_df_ag]))

In [None]:
be.pl.editing_patterns.plot_pam_preference(all_rates)
#plt.savefig("1b_pam_varcds_combined.pdf", bbox_inches='tight')

## Context preference logo


In [None]:
def info_content(v, background = 0.25):
    v=v/v.sum()
    print(v)
    """Get information content-normalized v for 1d np.ndarry v"""
    return ((v*np.log2(v/background)).sum())*v

In [None]:
cedit_rates_df_ag_window["context"] = cedit_rates_df_ag_window.apply(
    lambda row: cdata_bulk_fs.guides.loc[row.guide, "Reporter"][
        row.rel_pos - 1 : row.rel_pos + 2
    ],
    axis=1,
)

In [None]:
con_mean_er = {}
for i in range(3):
    cedit_rates_df_ag_window[f"context_{i}"] = cedit_rates_df_ag_window.context.map(lambda s: s[i])
    con_mean_er[i] = cedit_rates_df_ag_window.groupby(f"context_{i}")["rep_mean"].mean()

In [None]:
con_mean_er

In [None]:
bg = con_mean_er[1]["A"]
context_0 = info_content(con_mean_er[0], background = 0.25)
context_2 = info_content(con_mean_er[2], background = 0.25)

In [None]:
context_0

In [None]:
ic_tbl = pd.concat([con_mean_er[0]/con_mean_er[0].sum(), pd.DataFrame([1,0,0,0], index=["A", "C", "G", "T"]), con_mean_er[2]/con_mean_er[2].sum()], axis=1).T
ic_tbl.index = [-1, 0, 1]

In [None]:
fig, ax = plt.subplots(figsize=(3,5))

logomaker.Logo(ic_tbl, ax = ax)
ax.set_ylabel("Relative frequency")
fig.savefig("context_preference_3_8.pdf")