# TS_Plasmids - Flying Evaluation
## Scan Folders for files 
- Agar plate evaluation .json in .../pickolo/json
- OD reader .xml files in .../OD/xml

In [1]:
use_backup_data = True

import pandas as pd
import EvalFunctions as ef
import seaborn as sns
from matplotlib import pyplot as plt

path = "/Users/malte/polybox/Shared/Robot-Shared/TSPlasmids/experiments/20210417_output"

## Load all Barcodes
barcodes = pd.read_csv(path+"/barcodes.csv")
barcodes = barcodes.set_index("barcode")

## Initialize Datframe
Data = pd.DataFrame(columns = ["ID", "Time", "Replicate", "OD_B", "OD_A", "Growth", "Row", "Column", "Strategy", "Agar_N", "Agar_A", "Agar_B", "Agar_AB", "Phenotype"])
Data = Data.set_index("ID")

## Load all names of ... files in folder ... in the experiment folder ... and drop name of lock files (appear when opened)
agar_file_names = ef.FileNames(".json", path, "pickolo/json")
od_file_names = ef.FileNames(".xml", path, "OD/xml")
instruction_file_names = ef.FileNames(".csv", path, "platefiles")

## Build Dictionaries

In [2]:
## Build dictionary to assign colors
hue_infos = {"U":"gray", "S":"green", "A":"yellow", "B":"orange", "AB":"red", "A&B":"pink", "Fishy":"black", "Fishy_OD":"Brown"}

## Build dictionary to assign strategies
strategies = {"P1": "No treatment", "P2":"Mono A", "P3":"Mono B","P4":"Combo","P5":"Cycling", "P6":"Mixing"}

## Well Dictionary
well = 1
well_dict = {}
plate_rows = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"]
for row in plate_rows:
    for col in range(1,25):
        well_dict.update({well : str(row)+str(col)})
        well += 1
        
## Transfer Dict
transfer_dict = {}
for t in range(1,13):
    transfer_dict.update({t:"t"+str(t)})

## Import Agar Plate Data and evaluate Phenotype

In [None]:
if use_backup_data == False:
    Data = ef.ImportAgarPlates(agar_file_names, Data, strategies)
    Data = ef.EvaluatePhenotype(Data)
    Data = ef.Label_Replicates(Data, plate_rows)
    Data_backup = Data
    Data_backup.to_csv("Data_backup.csv")
else:
    Data = pd.read_csv("Data_backup.csv", index_col=0)    
    Data_backup = Data
## Count incidence of each Phenotype per Strategy and Replicate
summary = ef.Summerize_Results_hist(Data)
    

## Plot Result

In [None]:
import numpy as np
import seaborn as sns; sns.set(style="ticks", color_codes=True)
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker   

sns.set(font_scale=2, style='ticks')  # crazy big

g = sns.lmplot(data=summary,  x="Time", y="Fraction",  
               col="Strategy", col_wrap=3,  
               hue="Phenotype", #markers=marker,
               fit_reg=False, x_estimator=np.mean,               # Don't draw regression, draw errorbars
               palette=hue_infos, # choosing colors
               height=10, aspect = 1.2,                          # figure size
               legend_out=False,                                 # Legend in figure
               scatter_kws={"s": 400, 'alpha': 0.6}                           # Markersize
              )
#g.set(yscale="log")
g.set_axis_labels("Transfers", "Fraction [%]") # Correct axes Labels
g.tight_layout() # Make everything look nicer
g.fig.get_axes()[2].legend(loc='upper right') # Place Legend Upper Left 

plt.savefig('Summary.png')

In [None]:
g = sns.relplot(
    data=summary, x="Time", y="Fraction",
    col="Strategy", col_wrap = 3,  hue="Phenotype", 
    kind="line", palette=hue_infos
)
g.set_axis_labels("Transfers", "Fraction [%]")
plt.savefig('Summary2.png')

## Add the OD measurements to Dataframe 
- <span style="color:red"> split CalculateGrowthAndCleanDataframe into two and don't clean the dataframe. The instructions for the later transfers are sometimes usefull to track contaminated wells

In [None]:
if use_backup_data == False:
    Data = ef.LoadODs(od_file_names, Data)
    Data_backup_od = Data
    Data_backup_od.to_csv("Data_backup_OD.csv")
else:
    Data = pd.read_csv("Data_backup_OD.csv", index_col=0)  
    Data_backup_od = Data

# Clearing happens to all rows where necessary Data are missing. So don't leave more than one NaN per row behind...
# Data = ef.CalculateGrowthAndCleanDataframe(Data)

## Add the Instructions to Dataframe

In [None]:
Data, r_well_dict  = ef.Include_Instructions(Data, instruction_file_names, plate_rows)

import re 
def AddWellNames(data, r_well_dict):
    for i, row in data.iterrows():
        s = i.split("_")
        data.loc[i, "cr_well"] = s[2]
        data.loc[i, "r_well"] = r_well_dict[s[2]]
        r = re.sub("\d", "", s[2])
        c = re.sub("\D", "", s[2])
        data.loc[i, "Row"] = r
        data.loc[i, "Column"] = int(c)
        t = re.sub("\D", "", s[0])
        data.loc[i, "Time"] = int(t)
    return data
Data = AddWellNames(Data, r_well_dict)
Data = ef.Label_Replicates(Data, plate_rows)





In [None]:
Data, r_well_dict  = ef.Include_Instructions(Data, instruction_file_names, plate_rows)



## Exclude wells affected by contamination
- <span style="color:red"> Figure out what I meant by OD_A and OD_B

In [None]:
# track contaminated cells find cont
## In which transfers were did the contamination happen??
transfers = [10, 11]
## For each transfer: Which reservoirs have been contaminated?
contaminated_reservoirs = [["A_r", "B_r"], ["A_r", "B_r"]]
## When was the Contamination fixed
t_contamination_fixed = 13

In [None]:
def Contaminated_by_turnover(Data, transfers, contaminated_reservoirs):
    Data["Exclude"] = False
    Data["Contaminated"] = False
    Data_Full = Data.copy()
    count = 0
    data = pd.DataFrame(columns = Data.columns)
    for t in transfers:
        data_t = Data_Full[Data_Full["Time"] == t]
        for reservoir in contaminated_reservoirs[count]:
            d_t = data_t[data_t["Turnover"] == reservoir]
            data = data.append(d_t)
        count += 1
    return data
data_contaminated_by_Turnover =  Contaminated_by_turnover(Data, transfers, contaminated_reservoirs)

In [None]:
Data.loc[data_contaminated_by_Turnover.index,"Contaminated"] = True
Data.loc[data_contaminated_by_Turnover.index,"Exclude"] = True
data_contaminated_by_Turnover.r_well.unique()

## Figure out which wells are affected by contamination
- <span style = "color:red"> find method to "cure" contaminated cells
- t_contamination_fixed = 12

In [None]:
## Affected by contaminated transfer
t_start = transfers[1]
t_end = t_contamination_fixed-1
print(t_start, t_end)

In [None]:

for t in range(t_start, t_contamination_fixed):
    
    data_contaminated_by_Turnover = data_contaminated_by_Turnover[data_contaminated_by_Turnover["Time"]==t-1]
    contaminated_rwells = data_contaminated_by_Turnover.r_well.unique()
    
    ## Check if this well was transfered by an previously infected r_well
    for rw in contaminated_rwells:
        Data.loc[(Data["r_well"] == rw) & (Data["Time"] == t), "Exclude"] = list(Data.loc[(Data["r_well"] == rw) & (Data["Time"] == t-1), "Exclude"])
        Data.loc[(Data["r_well"] == rw) & (Data["Time"] == t), "Contaminated"] = list(Data.loc[(Data["r_well"] == rw) & (Data["Time"] == t-1), "Contaminated"])
    
    ## Check if well was infected with contaminated well
    Data_t = Data.loc[(Data["Time"] == t)]
    for i, row in Data_t.iterrows():
        infecting_id = row["infected_by_"] 
        if infecting_id != "None":
            print("Was infected by%s"%Data.loc[infecting_id, "r_well"])
            if Data.loc[infecting_id, "Contaminated"]:
                Data.loc[i, "Contaminated"] = True
                Data.loc[i, "Exclude"] = True
                print(i, "Well was contaminated due to infection from well", infecting_id)
            

In [None]:
data_contaminated = Data[(Data["Contaminated"] == True) ]
data_contaminated.r_well.unique()

In [None]:
Data["infected_by_"].unique()

## Antibiotic Treamtent

In [None]:
sns.catplot(x="Time", data=Data, kind="count", hue="treatment_with", col="Strategy", col_wrap=3)
plt.savefig('Treatment.png')

## Turnover Strains

In [None]:
Data_noNone = Data.loc[Data["Turnover"] != "None"]
sns.catplot(x="Time", data=Data_noNone, kind="count", hue="Turnover", col="Strategy", col_wrap=3)
plt.savefig('Turnover.png')

In [None]:
Data_backup_od

## Interesting histories
- Reconstruct histories over time and plot trees
    - A + B + Treatment = B+A + Treatment
    - A + Treatment
    - B + Treatment
    - A&B + Treatment
    - S+Treatment
    - Exclude fishy Datapoints

- infected by false, weil muss eine platte vorher sein
- infection checken mit Bitwise addition


In [None]:
tmp = Data[(Data["Time"] == 11)]

In [None]:
infecting = tmp["infected_by_"].unique()

In [None]:
Data.loc[infecting[1:], "r_well"].unique()

In [None]:
infecting

In [None]:
Data.loc["t11_P1_M19"]

In [None]:
Data[Data["infected_by_"] == "t11_P1_M19"]