## Analysis of promotor and 3'UTR effect on expression

### Load libraries and functions

In [None]:
import fcsparser as fcs
import os
import pandas as pd
import warnings
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
sns.set(style ="whitegrid")
sns.set_context("poster")
warnings.filterwarnings("once")

### Load files and metadata

In [None]:
#folder with fcs files in subfolders
folder="C:\\FCS_experiment\\"

ffolderlist = [f for f in os.listdir(folder) if os.path.isdir(os.path.join(folder, f))]


df=pd.DataFrame()
aa=1

#load all fcs files
for fdn in ffolderlist:
    ffilelist = os.listdir(folder+fdn)
    print (fdn)
    plate = fdn.split("_")[2]
    run = fdn.split("_")[3]
    for fn in ffilelist:
        filename, file_ext = os.path.splitext(fn)
        if file_ext ==".fcs":
            path = folder + fdn + "//" + fn
            meta, df1 = fcs.parse(path, meta_data_only=False, reformat_meta=True)
            df1["WellName"]=fn.split("_")[3].split(".")[0]
            df1["WellNumber"]= aa
            df1["Plate"] =plate
            df1["Run"] = run
            df=df.append(df1)
            aa=aa+1

# metadata file
meta = "C:\\metadata.csv"
df1=pd.read_csv(meta, names=["WellName", "Name", "ExperimentName", "Info", "Plate"], dtype= object)

dfnew=df1.merge(df, on=["WellName", "Plate"])


### remove zeros, gate live cells and log data

In [None]:
#remove zeros
dfnew= dfnew[dfnew > 0]

#remove doublets
dfnew["doublet"] = dfnew["FSC-A"] / dfnew["FSC-H"]
df2=dfnew[(dfnew.doublet < 2)& (dfnew.doublet > 1.25)]
df2=dfnew

#remove small events
df2 =df2[df2["FSC-A"]>0E5]
df2 =df2[(df2["SSC-A"]<2E5)]

#show data for first well
g = sns.jointplot("FSC-A", "SSC-A", data=df2[df2["WellNumber"]==1], s=1)#.plot_joint(sns.kdeplot, zorder=0, n_levels=6);
plt.show()

#log data
for col in df2.columns:
    if df2[col].dtype=="float32":
        df2["log"+ col]=df2[col].apply(math.log10)


### Choose experiment and gate cells expressing circuit

In [None]:
df_Term=df2[df2["ExperimentName"]=="TRE"]
df_Term = df_Term[(df_Term["logPE-CF594-A"]>3) & (df_Term["logAPC-Cy7-A"]>2.5)]
df_Term["normFITC-A"] = df_Term["FITC-A"]/df_Term["APC-Cy7-A"]
df_Term.dropna(inplace =True)

In [None]:
### Plot results

In [None]:
df_Term_ind2 = df_Term.groupby(['Name', 'WellName']).count()
df_Term_ind2 = df_Term_ind2.reset_index()
excluded_wells=df_Term_ind2.loc[df_Term_ind2["FITC-A"]<0,"WellName"]


df_Term_ind = df_Term.groupby(['Name', 'WellName']).mean()
df_Term_ind = df_Term_ind.reset_index()
df_Term_ind = df_Term_ind[~df_Term_ind['WellName'].isin(excluded_wells)]


df_Term_ind['normnormFITC-A'] = df_Term_ind['FITC-A']/np.mean(df_Term_ind.loc[df_Term_ind_1['Name']=='SadCas9::mRuby2','FITC-A'])
df_Term_ind['lognormnormFITC-A'] = df_Term_ind['normnormFITC-A'].apply(math.log10)
df_Term_ind = df_Term_ind.sort_values('normnormFITC-A')
df_Term_ind_g = df_Term_ind.groupby("Name")['lognormnormFITC-A'].mean().sort_values()


g2 = sns.barplot(x="lognormnormFITC-A", y="Name", data=df_Term_ind, color='mediumaquamarine', ci = None, order = df_Term_ind_g.index)
g2 = sns.swarmplot(x="lognormnormFITC-A", y="Name", data=df_Term_ind, color='darkgreen', order = df_Term_ind_g.index, size=4)
sns.despine(left=True, bottom= True)
plt.xlim(-.5, 2)
plt.xticks([-1, -.5, 0, .5, 1, 1.5, 2, 2.5])
plt.savefig(folder + 'TRE.eps', type = 'eps')
plt.savefig(folder + 'TRE.png', type = 'png', dpi =300)
plt.show()