# Screening for side effects of Covid-19 drug candidates on cardiovascular development 

# Analysis notebook


In [6]:

import os
from glob import glob
import pandas as pd
import numpy as np

from glob import glob
from functions import analysis as ana
from functions.literature_search import searchPubmed
from functions.analysis import joined_find
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
from importlib import reload
from functions.literature_search import find_intersection

import math
import datetime

ModuleNotFoundError: No module named 'functions'

In [5]:
# Uncomment below if functions are not loaded
import sys
sys.path.append("./functions/analysis.py")

# Pubmed search 

In [None]:
# Uncomment to execute literature search
#drug_list = list(pd.read_excel("./tables/LiteratureSearch_Druglist.xlsx")["MODIFIED_NAME"])
#instance = searchPubmed("alexander.ernst@unibe.ch", drug_list, api_key="05b06f8adeace65e91dca5a0fac7d6ba330a")
#df_literature = instance.multiple_context_search()[0]
#df_intersect = find_intersection(df_literature)

In [None]:
# Get all the results from the Morphology assay
<b>Execute only if you have new data

Explanation:
    
One folder per experiment, the folders are identified by keywords which are referred to in the code to find the target content. 

Each folder has an "ExpID" at the end and contains 3 image folders, "Measurements"-folder and a "_PlateDescription.xlsx".

The images of one experiment are as tif files in 3 folders,     #1. 2x , #2. 4x and #3. 10x

Inside of the "Measurements"-folder are 4 tables "_HeartUnet_analysis","_ISVUnetAnalysis", "Summary_measurements", "Summary_heart" containing the analysis of the experiment.

The next cell loads all of them and accesses the "_10x" heartbeat movies to perform a frame rate correction on the heart beat counts. In this way alterations of the acquisition speed are corrected.

Afterwards the measurements are calibrated to "physical" units.

In [None]:
#enter the path to the acquifer data folder
acquifer_folder = "E:/Data_Drug_Screening_CovidBox/Acquifer"

directory = os.listdir(acquifer_folder)

for c, folders in enumerate(directory):
    print(folders)
    if "_ExpID" in folders:
        
        expid = folders.split("_ExpID")[1]
        measurements_heart_path = joined_find(acquifer_folder, folders, "heart")
        measurements_size_path = joined_find(acquifer_folder, folders, "Summary_measurements")
        measurements_HeartSeg_path = joined_find(acquifer_folder, folders, "Table_HeartUnetAnalysis", file_type = ".csv")
        measurements_ISVSef_path = joined_find(acquifer_folder, folders, "Table_ISVUnetAnalysis", file_type = ".csv")
        frame_heart_path = joined_find(acquifer_folder, folders, folder_measurements = "",keyword = "BF_550_10x", file_type = "")
        plate_map_path = joined_find(acquifer_folder, folders, folder_measurements = "",keyword = "_PlateDescription", file_type = ".xlsx")
        
        try:
            df_platemap = pd.read_excel(plate_map_path[0])
            dict_platemap = df_platemap.to_dict()
            
            
            df_heart = pd.read_excel(measurements_heart_path[0]).rename(columns= {"Hearbeat":"Heartbeats","Heartbeat":"Heartbeats","Hearbeats":"Heartbeats"}) 
            df_heart["Well"] = df_heart["Label"].str.split("--",expand=True)[0].str.replace("-","")
            exception = "E:\Data_Drug_Screening_CovidBox/Acquifer/20201203_drug_screen_FliV_myl7mR_1C02_1C10_ExpID20/20201203_drug_screen_FliV_myl7mR_1C11_ExpID20.1/20201203_FV_M7R_1C11_1Co3_BF_550_10x"
            
            frametime = list(df_heart["Well"].apply(lambda x: ana.get_well_framerate(path = frame_heart_path[0] , well = x) if not x.startswith("2") else ana.get_well_framerate(path = exception , well = x.split("2", 1)[1])))
            
            df_heart["Avg_Frametime (ms)"] = [i[0] for i in frametime]
            df_heart["Sum_Frametime (ms)"] = [i[1] for i in frametime]
            
            df_size = pd.read_excel(measurements_size_path[0])
            #*3.250
            if not "Well" in df_size.columns:
                df_size["Well"] = df_size["Image_title"].str.split(":-",expand=True)[1].str.split("--PO",expand=True)[0]
            
            df_heart_seg = pd.read_csv(measurements_HeartSeg_path[0])
            df_ISV_seg = pd.read_csv(measurements_ISVSef_path[0])         

                
            df_heart_size = pd.merge(left = df_size,right = df_heart, on = "Well").drop(["Image_title_y","Image_title_x"],axis=1)
            df_heart_size = pd.merge(left = df_heart_size,right = df_heart_seg, on = "Well")
            df_heart_size = pd.merge(left = df_heart_size,right = df_ISV_seg, on = "Well")
            df_heart_size["Experiment ID"] = expid 
            df_heart_size["Concentration"] = df_heart_size["Well"].apply(ana.assign_compounds, dict_compound=dict_platemap) 
            
        except  Exception as e:
            print("An {} error occurred in: {}".format(e,folders))
            break
    
    if c == 0:
        df_collection = df_heart_size.copy()
        
    else:
        df_collection = pd.concat([df_collection,df_heart_size],axis=0)
        
        
list_todrop = list(filter(lambda x: x.startswith("Unnamed") | x.endswith("_y"), df_collection.columns)) + ["Hearbeat","Heartbeat"]
df_collection = df_collection.drop(list_todrop, axis=1)

df_collection = df_collection.rename(columns={"Delta_DIA-SYS":"Ejection fraction (%)","Length": "Body length (µm)","Heartbeats":"Heart Beats (Count)","Slices":"Frames (Count)",'Median_minor_axis_length':'ISV width (µm)','Median_major_axis_length':'ISV length (µm)','N_ISV':'Number ISV (Count)'})

df_collection["Drug"] = df_collection.Concentration.str.split("__c", expand=True)[0]
df_collection["TRIVIAL_NAME"] = df_collection["Drug"].str.split("_",expand = True)[0]
df_collection = df_collection.set_index("Drug")
df_collection["Concentration (µM)"] = df_collection.Concentration.str.split("_c", expand=True)[1].astype(float)
df_collection["Concentration (µM)"].loc[np.isnan(df_collection["Concentration (µM)"])] = 1.0
df_collection["Body length (µm)"] = df_collection["Body length (µm)"] * 3.25
df_collection["Heart Rate (BPM)"] = (df_collection["Heart Beats (Count)"] * 60000) / (df_collection["Avg_Frametime (ms)"]* df_collection["Frames (Count)"])
df_collection["Ejection fraction (%)"] = df_collection["Ejection fraction (%)"] * 100

df_collection["ISV length (µm)"] = df_collection["ISV length (µm)"] * 1.625
df_collection["ISV width (µm)"] = df_collection["ISV width (µm)"] * 1.625
df_collection["ISV area (µm^2)"] = df_collection["ISV length (µm)"] * df_collection["ISV width (µm)"] 
df_collection["Repeat"] = df_collection["Experiment ID"]
#df_collection = df_collection[df_collection["Ejection fraction (%)"]<60]
df_collection["Ejection fraction (%)"].loc[df_collection["Ejection fraction (%)"] > 60] = np.nan
df_collection = df_collection.drop(["Concentration", "Drug_x"],axis=1)

In [None]:
# Uncomment to  save the table
#df_collection.to_excel("./tables/{}_Morphology_Assay_Collected_Measurements.xlsx".format(datetime.date.today()))

# Get all the results from the Behavior assay

In [None]:
dv_folder = "E:/Data_Drug_Screening_CovidBox/DanioVision"
directory = os.listdir(dv_folder)
df_program = pd.read_excel("E:/Data_Drug_Screening_CovidBox/DanioVision/20210115_A03_A11_ExpID0/150121_CoVasc_1min_bin_Bright_Dark_scheme.xlsx")
dict_program_melt = df_program.set_index("Bin").melt(var_name="Bin",value_name="Phase").set_index("Bin").to_dict()["Phase"]

for c, folders in enumerate(directory):
    #print(folders)
    if "_ExpID" in folders:
        
        expid = folders.split("_ExpID")[1]
        bin1_path = joined_find(dv_folder, folders, folder_measurements = "",keyword = "CoVasc_*1min*", file_type = ".xlsx",exclude=True)[0]
        filtered_path = list(filter(lambda x: "scheme" not in x ,bin1_path))
        map_path = joined_find(dv_folder, folders, folder_measurements = "",keyword = "_PlateMap", file_type = ".xlsx",exclude=True)[0]
        
        df_platemap = pd.read_excel(map_path)
        dict_platemap = df_platemap.dropna(axis=1).to_dict()
        df_bin1 = pd.read_excel(bin1_path, header = [0,1,2,3])
        
        df_bin1.columns = ['Trial', 'Well', 'Bin [1 sec]', 'Distance_moved [mm]', 'Velocity [mm/s]', 'Moving [s]','Not_Moving [s]']

        df_bin1["Phase"] = df_bin1["Bin [1 sec]"].apply(ana.assign_phase, dict_program_clean=dict_program_melt)
        df_bin1["Drug"] = df_bin1["Well"].apply(ana.assign_compounds, dict_compound=dict_platemap)
        
        check = list(filter(lambda x: "_c" in x ,list(df_bin1["Drug"])))
        
        if check:
            df_bin1["Concentration (µM)"] = df_bin1["Drug"].str.split("_c", expand=True)[1].astype(float)
            df_bin1["Concentration (µM)"].loc[np.isnan(df_bin1["Concentration (µM)"])] = 1.0
        else:
            df_bin1["Concentration (µM)"] = 1.
        
        df_bin1["Drug"] = df_bin1["Drug"].str.split("__c",expand=True)[0]    
        df_bin1["Experiment ID"] = expid
        df_bin1 = df_bin1.loc[df_bin1["Phase"] != "Remove"]
        df_bin1 = df_bin1.loc[df_bin1["Drug"] != "Blank"]
        
        
        df_bin1_plot = df_bin1.groupby(["Experiment ID","Drug","Concentration (µM)","Bin [1 sec]"]).mean()
        df_bin1_phasemedian = df_bin1.groupby(["Experiment ID","Drug","Concentration (µM)","Well","Phase"]).median()
        
        
        if c==0:
            df_collect_lineplot = df_bin1_plot.copy()
            df_collect_phasemedian = df_bin1_phasemedian.copy()
        else:
            df_collect_lineplot = pd.concat([df_collect_lineplot, df_bin1_plot],axis=0)
            df_collect_phasemedian = pd.concat([df_collect_phasemedian, df_bin1_phasemedian],axis=0)
        print(map_path)
        #if expid == "17":
        #    break
        