**Author: Max Trauernicht**\
**Date: 24-10-2020**

# Introduction
Barcode counts that were produced from p53 and Gr reporter plasmids were amplified from RNA and sequenced. In a previous script, these counts were normalized and filtered. In this analysis, the features in the reporter design that influence the reporter activity will be determined. 

In [60]:
# Library import
import pandas as pd
import re

In [61]:
# Data import
cDNA_df = pd.read_csv("/DATA/usr/m.trauernicht/projects/SuRE_deep_scan_trp53_gr/data/gcf6210/results/mt20201123_reporter_activity_filt.csv")
cDNA_df.head()

Unnamed: 0,barcode,pDNA_counts_rpm,condition,starcode_counts,tf,oligo.barcode,spacing,promoter,position,distance,...,rpm,rep,activity,reporter_activity,reporter_activity_sd,neg_ctrls,rand_promoter,mean_activity,log_activity,log_reporter_activity
0,AACAACACGCCG,235.196278,A549_DMSO,451,Gr,2,10,mCMV,0,10,...,451.102859,1,2.182094,1.819375,0.535153,No,No,2.44394,1.125713,0.863443
1,AACAACACGCCG,235.196278,A549_Dex-100,328,Gr,2,10,mCMV,0,10,...,328.160224,1,1.58739,2.184985,0.608081,No,No,1.56282,0.666657,1.127623
2,AACAACACGCCG,235.196278,A549_DMSO,333,Gr,2,10,mCMV,0,10,...,333.156205,3,1.611557,1.819375,0.535153,No,No,1.430512,0.688455,0.863443
3,AACAACACGCCG,235.196278,mES_N2B27-HQ,295,Gr,2,10,mCMV,0,10,...,295.141194,3,1.427669,1.345745,0.335543,No,No,1.440457,0.513662,0.428406
4,AACAACACGCCG,235.196278,mES_N2B27-HQ,302,Gr,2,10,mCMV,0,10,...,302.209644,2,1.461861,1.345745,0.335543,No,No,1.251034,0.547806,0.428406


In [62]:
# Subset to core df
p53_df = cDNA_df[cDNA_df["rand_promoter"] == "No"]
p53_df = p53_df[p53_df["neg_ctrls"] == "No"]
p53_df = p53_df[p53_df["tf"] == "Trp53"]
p53_df = p53_df[p53_df["spacing"] == 7]
p53_df = p53_df[p53_df["position"] == 0]
p53_df = p53_df[p53_df["condition"] == "MCF7-WT_Nutlin"]
p53_df["tf"] = "p53"
p53_df = p53_df[["condition", "reporter_id", "background", "reporter_activity", "affinity_pos1", "affinity_pos2", "affinity_pos3", "affinity_pos4"]]
p53_df.head()

Unnamed: 0,condition,reporter_id,background,reporter_activity,affinity_pos1,affinity_pos2,affinity_pos3,affinity_pos4
20,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3,1,0.642677,3,4,4,3
33,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3,1,0.642677,3,4,4,3
223,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4,2,0.49512,3,3,4,4
233,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4,2,0.49512,3,3,4,4
277,MCF7-WT_Nutlin,Trp53_mCMV_p_0_s_7_d_10_bg_1_a1_0_a2_0_a3_4_a4_4,1,2.272875,0,0,4,4


In [66]:
# Calculate mean per background
p53_df['reporter_id_2'] = p53_df['reporter_id'].replace('[_bg_1]', '',regex=True)

#p53_df["mean_activity"] = p53_df.groupby("reporter_id")["reporter_activity"].mean()
#p53_df = p53_df.drop(columns = ["background"])
p53_df.head()

Unnamed: 0,condition,reporter_id,background,reporter_activity,affinity_pos1,affinity_pos2,affinity_pos3,affinity_pos4,reporter_id_2
20,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3,1,0.642677,3,4,4,3,Trp53minPp0s7d0a3a24a34a43
33,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3,1,0.642677,3,4,4,3,Trp53minPp0s7d0a3a24a34a43
223,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4,2,0.49512,3,3,4,4,Trp53minPp0s7d02a3a23a34a44
233,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4,2,0.49512,3,3,4,4,Trp53minPp0s7d02a3a23a34a44
277,MCF7-WT_Nutlin,Trp53_mCMV_p_0_s_7_d_10_bg_1_a1_0_a2_0_a3_4_a4_4,1,2.272875,0,0,4,4,Trp53mCMVp0s7d0a0a20a34a44


In [54]:
p53_df

Unnamed: 0,condition,reporter_id,background,reporter_activity,affinity_pos1,affinity_pos2,affinity_pos3,affinity_pos4,mean_activity,reporter_id_2
20,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3,1,0.642677,3,4,4,3,,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3
33,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3,1,0.642677,3,4,4,3,,Trp53_minP_p_0_s_7_d_10_bg_1_a1_3_a2_4_a3_4_a4_3
223,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4,2,0.495120,3,3,4,4,,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4
233,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4,2,0.495120,3,3,4,4,,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_4_a4_4
277,MCF7-WT_Nutlin,Trp53_mCMV_p_0_s_7_d_10_bg_1_a1_0_a2_0_a3_4_a4_4,1,2.272875,0,0,4,4,,Trp53_mCMV_p_0_s_7_d_10_bg_1_a1_0_a2_0_a3_4_a4_4
...,...,...,...,...,...,...,...,...,...,...
213587,MCF7-WT_Nutlin,Trp53_mCMV_p_0_s_7_d_10_bg_2_a1_4_a2_3_a3_3_a4_4,2,3.448800,4,3,3,4,,Trp53_mCMV_p_0_s_7_d_10_bg_2_a1_4_a2_3_a3_3_a4_4
213650,MCF7-WT_Nutlin,Trp53_mCMV_p_0_s_7_d_10_bg_2_a1_1_a2_2_a3_2_a4_2,2,3.368994,1,2,2,2,,Trp53_mCMV_p_0_s_7_d_10_bg_2_a1_1_a2_2_a3_2_a4_2
213659,MCF7-WT_Nutlin,Trp53_mCMV_p_0_s_7_d_10_bg_2_a1_1_a2_2_a3_2_a4_2,2,3.368994,1,2,2,2,,Trp53_mCMV_p_0_s_7_d_10_bg_2_a1_1_a2_2_a3_2_a4_2
213795,MCF7-WT_Nutlin,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_2_a4_2,2,0.414268,3,3,2,2,,Trp53_minP_p_0_s_7_d_10_bg_2_a1_3_a2_3_a3_2_a4_2


In [None]:
# 