# ReporterScreen sample / guide quality report

Examine the quality of the guide and samples and masks the low-quality guides and samples.

In [None]:
import perturb_tools as pt
import beret as be
import matplotlib.pyplot as plt
plt.style.use('default')

In [None]:
! jt -r

In [None]:
plt.style.use('default')

In [None]:
exp_id = "LDLRCDS"
save_format = "png"
bdata_path = "../results/mapped/LDLRCDS/beret_count_LDLRCDS_combined.h5ad"
out_bdata_path = "../results/mapped/LDLRCDS/beret_count_LDLRCDS_masked.h5ad"
edit_quantification_start_pos = 2
edit_quantification_end_pos = 7
corr_X_thres=0.7
edit_rate_thres=0.1
lfc_thres=-0.1

In [None]:
bdata = be.read_h5ad(bdata_path)

Annotate unannotated samples & log-normalize guides

In [None]:
bdata.samples[["replicate", "bin"]] = bdata.samples.index.to_series().str.split("_", expand=True)

In [None]:
bdata.log_norm()

## Sample quality

### Visualize quality metrics

#### 1. Guide coverage

In [None]:
pt.qc.plot_guide_coverage(bdata, figsize=(6,4))


In [None]:
plt.style.use('default')
pt.qc.plot_X_gini(bdata)
plt.savefig(f"{exp_id}_gini.{save_format}")

#### 2. Guide abundance correlation

In [None]:
pt.qc.plot_correlation(bdata, "Spearman")

#### 3. LFC correlation of positive controls

In [None]:
lfcs = bdata[bdata.guides.Group == "PosCtrl",:].log_fold_change_reps(cond1="top", cond2="bot", rep_condit="replicate", compare_condit="bin")

In [None]:
ax=pt.qc.plot_lfc_correlation(bdata, bdata.guides.Group == "PosCtrl", method="Spearman", cond1="top", cond2="bot", rep_condit="replicate", compare_condit="bin", figsize=(10,10))

ax.set_title("top/bot LFC correlation, Spearman")
plt.yticks(rotation=0) 
plt.xticks(rotation=90) 
plt.show()

#### 4. Guide editing rates

In [None]:
bdata.uns['allele_counts'] = bdata.uns['allele_counts'].loc[bdata.uns['allele_counts'].allele.map(str) != ""]

In [None]:
bdata.get_edit_from_allele()
bdata.get_edit_mat_from_uns(
    rel_pos_start=edit_quantification_start_pos, 
    rel_pos_end=edit_quantification_end_pos, 
    rel_pos_is_reporter=False
)
bdata.get_guide_edit_rate(
    editable_base_start = edit_quantification_start_pos, 
    editable_base_end=edit_quantification_end_pos)
be.qc.plot_guide_edit_rates(bdata)

In [None]:
bdata.get_edit_rate(
    editable_base_start = edit_quantification_start_pos, 
    editable_base_end=edit_quantification_end_pos
)
be.qc.plot_sample_edit_rates(bdata)

### Mask low-quality samples

In [None]:
bdata.samples.style.background_gradient(cmap="coolwarm_r")

Assign sample mask to mask low-quality samples.

In [None]:
bdata.samples['mask'] = 1
bdata.samples.loc[bdata.samples.median_corr_X < corr_X_thres, 'mask'] = 0
bdata.samples.loc[bdata.samples.median_editing_rate < edit_rate_thres, 'mask'] = 0
bdata = bdata[:, bdata.samples["median_lfc_corr.top_bot"] > lfc_thres]

In [None]:
bdata.samples.style.background_gradient(cmap="coolwarm_r")

## Identify outlier guides

In [None]:
outlier_guides, mask = be.qc.get_outlier_guides_and_mask(bdata, condit_col = 'bin', replicate_col = "replicate")

In [None]:
outlier_guides


In [None]:
outlier_guides_n_samples = outlier_guides['name'].value_counts()
guides_to_exclude = outlier_guides_n_samples.loc[outlier_guides_n_samples > 2].index
guides_to_exclude

In [None]:
bdata.uns['repguide_mask'] = mask

In [None]:
bdata = bdata[~bdata.guides.index.isin(guides_to_exclude),:]

In [None]:
bdata

In [None]:
bdata.uns['repguide_mask'].shape

In [None]:
bdata.write(out_bdata_path)