In [None]:
import anndata as ad
import pandas as pd
import numpy as np
import scanpy as sc
import decoupler as dc
import os
import re
import statistics
import seaborn as sns
import matplotlib.pyplot as plt
#import pyreadr

In [None]:
def validate_input_arguments (arguments_list):
    if arguments_list["out_path"] is None:
        print("Please provide an output path")
    elif arguments_list["out_path"][-1] != "/":
        arguments_list["out_path"] = arguments_list["out_path"] + "/"

    if arguments_list["celltype"] is None:
        print("Please provide the name of the metadata field containing cell type annotations")

    if arguments_list["condition"] is None:
        print("Please provide the name of the metadata field containing condition annotations")

    if arguments_list["organism"] is None:
        arguments_list["organism"] = "human"

    if arguments_list["comparison_list"] is None:
        arguments_list["comparison_list"] = np.nan

    if arguments_list["logfc"] is None:
        arguments_list["logfc"] = 0.0

    if arguments_list ["pval"] is None:
        arguments_list["pval"] = 0.05

    if arguments_list["reg"] is None:
        arguments_list["reg"] = load_dorothea_regulon(arguments_list["organism"])

    elif isinstance(arguments_list["reg"], str):
        arguments_list["reg"] = pd.read_csv(arguments_list["reg"], index_col=0)
        arguments_list["reg"] = pd.DataFrame.rename(arguments_list["reg"], columns={"source" : "tf"})

    if not "tf" in arguments_list["reg"] and "target" in arguments_list["reg"] and "weight"in arguments_list["reg"]:
        raise Exception("Not all necessary columns found in regulon table! Please make sure that the regulon has the columns source, target and weight!")
    
 
   
    return(arguments_list)



In [None]:
def AverageExpression(sub_object, celltype = None, name_iterable = None, outpath = None):
    gene_ids = sub_object.var.index.values
    #cluster line even necessary if not returned?
    #clusters = anndataobject.obs[celltype].cat.categories
    obs = sub_object[:,gene_ids].X.toarray()
    obs = np.expm1(obs)
    avg_df = pd.DataFrame(obs,columns=gene_ids,index= sub_object.obs[celltype])
    avg_df = avg_df.groupby(level=0, observed=False).mean()
    avg_df.T.to_csv(outpath + name_iterable + "_average_gene_expression_by_cluster_exp.csv")

    return avg_df.T


In [None]:
def eval_pval(p_val):
    p_val = float(p_val)
    if p_val < 0.001: 
      txt = "***"
    elif p_val < 0.01: 
      txt = "**"
    elif p_val < 0.05: 
      txt = "*"
    else:
      txt = "ns"
    return(txt)


def eval_log_fc_tag(log_fc):
    if log_fc >= 1.0: 
      txt = "***"
    elif log_fc > 0.5: 
      txt = "**"
    elif log_fc > 0.0: 
      txt = "*"
    else:
      txt = "ns"
    return(txt)

In [101]:
def create_unfiltered_tf_scores(tf_scores_df, condition, celltype, out_path):   
    summarized_tf_scores_df = tf_scores_df.groupby(celltype, observed = True).mean().T
    tf_scores_df.groupby(celltype, observed = True).apply(display)
    #agg(["mean", "var"])
    summarized_tf_scores_df.to_csv(out_path + "/unfiltered_tf_scores_" + condition + ".csv")
    return summarized_tf_scores_df

#np.var() returns a value that is different from R's var(), whereas statistics.variance() is the same as R's var()
def save_variable_tf_score(filtered_summarized_tf_scores_df, condition, out_path, plot):
    filtered_summarized_tf_scores_df["var"] = filtered_summarized_tf_scores_df.apply(statistics.variance, axis=1).unique()
    filtered_summarized_tf_scores_df.to_csv(out_path + "/variable_tf_scores_" + condition + ".csv")

    if plot:
        top_variable_tfs = filtered_summarized_tf_scores_df.sort_values("var", ascending=False).head(n=20).drop(columns="var")
        fig, ax = plt.subplots(figsize=(8,7))   
        ax = sns.heatmap(top_variable_tfs, cmap="vlag", center=0, vmin=top_variable_tfs.min(axis=None), cbar_kws={"label": "z-score"})
        ax.set(xlabel="Cell Type", ylabel="Transcription Factor")
        ax.get_figure()
        plt.savefig(out_path + "/tf_activity_top20_variable_" + condition + ".pdf")
        plt.close()

    filtered_summarized_tf_scores_df_var = filtered_summarized_tf_scores_df
    return filtered_summarized_tf_scores_df_var
    

In [107]:
def plot_tf_activity(filtered_summarized_tf_scores_df, tag_mapping, condition, out_path):
    filtered_summarized_tf_scores_df = filtered_summarized_tf_scores_df.drop(columns="var")
    sns.clustermap(filtered_summarized_tf_scores_df, cbar_kws={"label": "z-score"}, figsize=(8,7), cmap="vlag", center=0)
    plt.savefig(out_path + "/tf_activity_compressed_" + condition + ".pdf")
    plt.close()

    sns.clustermap(filtered_summarized_tf_scores_df, cbar_kws={"label": "z-score"}, figsize=(8,50), cmap="vlag", center=0, annot= tag_mapping, fmt="")
    plt.savefig(out_path + "/tf_activity_" + condition + ".pdf")
    plt.close()


    

In [None]:
def map_z_value_filtered(filtered_summarized_tf_scores_df, anndataobject_markers):
    z_score = []
    for gene in anndataobject_markers["gene"]:
        if gene in filtered_summarized_tf_scores_df.index:
            z_score.append(filtered_summarized_tf_scores_df.loc[gene, anndataobject_markers["cluster"]])
    return z_score


In [81]:
#not per cluster but cluster and pval etc need to be added to csv (check against specific marker csv from lr2tf test run in R)

def get_significant_tfs_single(tf_activities_sub, celltype, condition, out_path, pval, logfc, name, plot):
    
    #does not work if condition is None
    renamed_condition = condition.replace(",", "_")
    name = name.replace(",", "_")

    single_result_path = out_path + renamed_condition 
    if not os.path.isdir(single_result_path):
        os.mkdir(single_result_path)

    #tf_activities_scaled = sc.pp.scale(tf_activities_sub, copy=True, max_value=10)
    #or sc.pp.normalize_total(anndataobject)
    #sc.pp.log1p(tf_activities) 
    # "warning: seems to be already log transformed"

    number_of_clusters = len(tf_activities_sub.obs["new_annotation"].cat.categories) 


    sc.tl.rank_genes_groups(tf_activities_sub, groupby= "new_annotation", reference="rest", method="wilcoxon", key_added="wilcoxon_markers", corr_method= "bonferroni")
    #sc.tl.filter_rank_genes_groups(anndataobject, min_in_group_fraction=0, key="wilcoxon_markers", key_added= "wilcoxon_markers_filtered")
    

    sc.tl.rank_genes_groups(tf_activities_sub, groupby= "new_annotation", reference="rest", method="t-test_overestim_var", key_added="t_test_overestim_var_markers", corr_method= "bonferroni" )
    #sc.tl.filter_rank_genes_groups(anndataobject, min_in_group_fraction=0, key="t_test_overestim_var_markers", key_added="t_test_overestim_filtered")

    #FindAllMarkers(seuratobject, only.pos = TRUE, min.pct = 0, logfc.threshold = 0, verbose = FALSE)

    result1 = tf_activities_sub.uns['wilcoxon_markers']
    groups = result1['names'].dtype.names
    anndataobject_markers = pd.DataFrame(
    {group + '_' + key[:1]: result1[key][group]
    for group in groups for key in ['names','logfoldchanges','pvals','pvals_adj']})

    result2 = tf_activities_sub.uns['t_test_overestim_var_markers']
    groups = result2['names'].dtype.names
    anndataobject_markers = pd.DataFrame(
    {group + '_' + key[:1]: result2[key][group]
    for group in groups for key in ['names','logfoldchanges','pvals','pvals_adj']})

    anndataobject_markers_wilcoxon = sc.get.rank_genes_groups_df(tf_activities_sub, group = None, log2fc_min=0, key="wilcoxon_markers")
    
    anndataobject_markers_wilcoxon.rename(columns={"names":"gene", "group": "cluster"}, inplace=True)
    
    #documentation says this won't update the main dataframe but will only overwrite a copy in future pandas versions (after v3)
    anndataobject_markers_wilcoxon["tag"] = None
    anndataobject_markers_wilcoxon["log_fc_tag"] = None
    for i in range(len(anndataobject_markers_wilcoxon["pvals_adj"])):
         anndataobject_markers_wilcoxon["tag"].iloc[i,] = eval_pval(anndataobject_markers_wilcoxon["pvals_adj"].iloc[i,])
         anndataobject_markers_wilcoxon["log_fc_tag"].iloc[i,] = eval_log_fc_tag(anndataobject_markers_wilcoxon["logfoldchanges"].iloc[i,])
     

    anndataobject_markers_wilcoxon.to_csv(single_result_path + "/" + name + "_specific_markers_wilcoxon_test.csv",index=0)

    anndataobject_markers_t_over = sc.get.rank_genes_groups_df(tf_activities_sub, group = None, log2fc_min=0, key="t_test_overestim_var_markers")
 
    anndataobject_markers_t_over.rename(columns={"names":"gene", "group": "cluster"}, inplace=True)

    anndataobject_markers_t_over["tag"] = None
    anndataobject_markers_t_over["log_fc_tag"] = None
    for i in range(len(anndataobject_markers_t_over["pvals_adj"])):
         anndataobject_markers_t_over["tag"].iloc[i,] = eval_pval(anndataobject_markers_t_over["pvals_adj"].iloc[i,])
         anndataobject_markers_t_over["log_fc_tag"].iloc[i,] = eval_pval(anndataobject_markers_t_over["logfoldchanges"].iloc[i,])

    anndataobject_markers_t_over.to_csv(single_result_path + "/" + name + "_specific_markers_t_test_overestim_test.csv",index=0)

   #tag mapping wilcoxon
    tag_mapping_wilcox = anndataobject_markers_wilcoxon[["gene", "tag", "log_fc_tag", "cluster", "pvals_adj", "logfoldchanges"]]
    tag_mapping_wilcox = tag_mapping_wilcox[(tag_mapping_wilcox["pvals_adj"] < float(pval)) & 
                              ((tag_mapping_wilcox["logfoldchanges"] > float(logfc)) | 
                              (tag_mapping_wilcox["logfoldchanges"] < -float(logfc)))]

    tag_mapping_wilcox = tag_mapping_wilcox.pivot(index="gene", columns="cluster", values="tag")
    clusters = anndataobject_markers_wilcoxon["cluster"].unique()

    for cluster in clusters:
        if cluster not in tag_mapping_wilcox.columns:
            tag_mapping_wilcox[cluster] = np.nan

    tag_mapping_wilcox.fillna("ns", inplace=True)

    tf_activities_sub.obs_names = tf_activities_sub.obs[celltype]
    tf_scores_df = tf_activities_sub.to_df()
    unfiltered_tf_scores = create_unfiltered_tf_scores(tf_scores_df, condition, celltype, single_result_path)
   
    #Filter to only include tfs that match the tag_mapping/are markers
    col_num = tf_scores_df.columns.isin(tag_mapping_wilcox.index)  
    filtered_tf_scores_df = tf_scores_df.loc[:, col_num] 
    
    filtered_summarized_tf_scores_df = filtered_tf_scores_df.groupby(celltype, observed = False).mean().T
    filtered_summarized_tf_scores_df.index.name = "gene"
    filtered_summarized_tf_scores_df.to_csv(f'{single_result_path}/tf_scores_{condition}.csv')
    tf_scores_variable_table = save_variable_tf_score(filtered_summarized_tf_scores_df, condition, single_result_path, plot)

    if plot:
        plot_tf_activity(filtered_summarized_tf_scores_df, tag_mapping_wilcox, condition, single_result_path)
    
    #filtered_summarized_tf_scores_df.index = re.sub((".,"), "_", filtered_summarized_tf_scores_df.index)

    
    #anndataobject_markers_wilcoxon["z_score"] = map_z_value_filtered(filtered_summarized_tf_scores_df, anndataobject_markers_wilcoxon)
    #anndataobject_markers_t_over["z_score"] = map_z_value_filtered(filtered_summarized_tf_scores_df, anndataobject_markers_t_over)
    
    #res needs to contain gene, pval tag, cluster and z score
    #drop doubles??
    res_wilcoxon = anndataobject_markers_wilcoxon[["gene","tag", "cluster"]]
    #res_t_test = anndataobject_markers_t_over[["gene","tag", "cluster", "z_score"]]

    

     #//TODO: 
    #delete one variant and put test type as a variable

    return res_wilcoxon #, res_t_test

In [None]:
#check if it's okay to not pre set the column type as string or float (maybe no problem in python but was problem in r)

def create_empty_CTR_dataframe():
  
  empty_df = pd.DataFrame(
    columns=[
    "source",
    "target",
    "gene_A",
    "gene_B",
    "type_gene_A",
    "type_gene_B",
    "MeanLR"
    ])
  
  return(empty_df)


def add_entry_to_CTR_dataframe(source, target, gene_A, gene_B, type_gene_A, type_gene_B, MeanLR):
  df = pd.DataFrame(
      source,
      target,
      gene_A,
      gene_B,
      type_gene_A,
      type_gene_B,
      MeanLR)

  return df

In [None]:
#def generate_CrossTalkeR_input(tf_activities, gene_expression, regulon = None):

tf_activities =  pd.read_csv("tf_activities_test_wilcox.csv")  
#tf_activities["cluster"] = "Neural"
gene_expression = pd.read_csv("script_test/control_average_gene_expression_by_cluster_exp.csv", index_col = 0)
print(gene_expression)
regulon = pd.read_csv("filterd_regulon.csv", index_col = 0)  
regulon = regulon.rename(columns={"source" : "tf"})



ligands_readin = pyreadr.read_r("ligands_human.rda")
ligands = ligands_readin["ligands_human"]

rtf_readin = pyreadr.read_r("RTF_DB_2.rda") 
rtf_db = rtf_readin["RTF_DB_2"]

#put tf as index column/rownames and receptors as column 1

R2TF = rtf_db.set_index("tf")

sorted_regulon = regulon[["tf", "target"]]
sorted_regulon.rename(columns={"target" : "targets"})
sorted_regulon = sorted_regulon.set_index("tf")

output_df = create_empty_CTR_dataframe()

for row in range(len((tf_activities))):

  #r_tf = create_empty_CTR_dataframe()
  #tf_l = create_empty_CTR_dataframe()

  #if (tf_activities["z_score"].iloc[row] == 0):
  tf_var = str(tf_activities["gene"].iloc[row])
  if tf_var in sorted_regulon.index:
    targets = sorted_regulon.loc[tf_var]
  if tf_var in R2TF.index:
    receptors = R2TF.loc[tf_var]
  tf_ligands = np.intersect1d(targets, ligands)
  

      expressed = False
    if ligand in gene_expression.index:
      ex_value = gene_expression.loc[ligand, "Neural"]
      if (ex_value != 0):
        expressed = True
        
    #print(tf_activities.iloc[row[]])
    if (expressed == True):
      df_list_l = list[tf_activities.iloc[row, 2],
                                             tf_activities.iloc[row, 2],
                                             tf_activities.iloc[row, 0],
                                             ligand,
                                             "Transcription Factor",
                                             "Ligand",
                                             tf_activities.iloc[row, 3]]
      
    if (len(receptors) > 0):
       for receptor in receptors:
         df_list_r = list[tf_activities.iloc[row, 2],
                                         tf_activities.iloc[row, 2],
                                         receptor,
                                         tf_activities.iloc[row, 0],
                                         'Receptor',
                                         'Transcription Factor',
                                         tf_activities.iloc[row, 3]]
   
tf_l = pd.DataFrame(df_list_l, columns=["source", "target", "gene_A", "gene_B", "type_gene_A", "type_gene_B", "MeanLR"])
print(tf_l)
r_tf = pd.DataFrame(df_list_r)
      
  #    r_tf["gene_A"] <- gsub("_", "+", r_tf$gene_A, fixed = TRUE)
   #   r_tf["gene_B"] <- gsub("_", "+", r_tf$gene_B, fixed = TRUE)
   #   tf_l["gene_A"] <- gsub("_", "+", tf_l$gene_A, fixed = TRUE)
   #   tf_l["gene_B"] <- gsub("_", "+", tf_l$gene_B, fixed = TRUE)

   #   output_df = pd.concat(output_df, r_tf)
   #   output_df = pd.concat(output_df, tf_l)

  #return output_df

In [99]:
#ignore extra tfs from decoupler while writing script 

def tf_activity_analysis (anndataobject, arguments_list = None):
    
    if (isinstance(anndataobject, str)):
        anndataobject = ad.read_h5ad(anndataobject)

    arguments_list = validate_input_arguments(arguments_list)

    if not os.path.isdir(arguments_list["out_path"]):
        os.mkdir(arguments_list["out_path"])
        tf_path = arguments_list["out_path"] + "TF_results/"
        os.mkdir(tf_path)
    else:
        tf_path = arguments_list["out_path"] + "TF_results/"

    #rename the arguments inserted into argument list (eg protocol to condition)
    condition = anndataobject.obs[arguments_list["condition"]]

    #checks for tf activity csv, if nothing there, runs decoupler
    if isinstance(arguments_list["tf_activities"], str):
         tf_activities = ad.read_csv(arguments_list["tf_activities"])
         tf_activities.obsm = anndataobject.obsm
         tf_activities.uns = anndataobject.uns
         tf_activities.obs = anndataobject.obs
    elif arguments_list["tf_activities"] is None:
         dc.run_ulm(mat = anndataobject, net = "reg", source ='source', target ='target', weight ='weight', verbose = True, use_raw = False)
         tf_activities = anndataobject.obsm['ulm_estimate']
         tf_activities.to_csv("decoupler_results.csv")
    print(tf_activities.obs.name)

    #sets the stage for decision if single condition or comparison analysis is done

    if not np.isnan(arguments_list["comparison_list"]):
        if len(arguments_list["comparison_list"]) > 0 & len(anndataobject.obs["comparison_list"]) < 2:
            arguments_list["comparison_list"] <- np.nan
            print("Only one condition was found in the data, although a list of comparisons was provided. The analyses are performed only for the present condition!")

    #code for single condition  analysis

    if np.isnan(arguments_list["comparison_list"]):
        result_list = []
        gene_expression_list = []
        CTR_cluster_list = []
        intranet_cluster_list = []

        #creates loop until after tf activity score
        
        for name_iterable in anndataobject.obs.protocol.unique():
            sub_object = anndataobject[anndataobject.obs.protocol == name_iterable]
            tf_activities_sub = tf_activities[tf_activities.obs.protocol == name_iterable]

            sub_object_avg = AverageExpression(sub_object, name_iterable= name_iterable, celltype = arguments_list["celltype"], outpath= arguments_list["out_path"])
            #check if its fine to only have the average expression as a dataframe and not as part of an anndata object

            #check how sub object is returned by average expression (subobject.T) or rename return?
            tf_activity_scores = get_significant_tfs_single(tf_activities_sub, celltype = arguments_list["celltype"],condition = name_iterable, out_path= tf_path, pval = arguments_list["pval"], logfc = arguments_list["logfc"], name = name_iterable, plot = arguments_list["plot"])
           
            #tf_activity_scores[0].to_csv("tf_activities_test_wilcox.csv", index = 0)
            #tf_activity_scores[1].to_csv("tf_activities_test_t_test.csv", index = 0)

            #result_list[[name_iterable]] = tf_activity_scores
            #gene_expression_list[[name_iterable + "_average_expression"]] = sub_object_avg
            
            #if (arguments_list["organism"] == "human"):
            #    CTR_cluster_list[name_iterable] = generate_CrossTalkeR_input(tf_activity_scores[["cluster"]],
            #                                                                     gene_expression_list[[(name_iterable + "_average_expression")]],
            #                                                                     arguments_list["reg"])
            #else:
            #  CTR_cluster_list[[name_iterable]] = generate_CrossTalkeR_input_mouse(tf_activity_scores[["cluster"]],
            #                                                                     gene_expression_list[[(name_iterable + "_average_expression")]],
            #                                                                     arguments_list["reg"])
              
            #intranet_cluster_list[[name_iterable]] = generate_intracellular_network(tf_activity_scores[["cluster"]],
            #                                                          gene_expression_list[[paste0(name, "_average_expression")]],
            #                                                          arguments_list$reg,
            #                                                          arguments_list$organism)
        #return(sub_object) #return tf when finished

In [108]:
result = tf_activity_analysis(anndataobject= "LR2TF_test_run/anndata_object.h5ad", arguments_list= {"out_path" : "script_test", "celltype" : "new_annotation", "condition" : "protocol", "organism" : None, "comparison_list" : None, "logfc" : "0.5", "pval" : None, "reg" : "filterd_regulon.csv", "tf_activities" : "decoupler_results.csv", "plot" : True})


H18_19086_TB_AAATGCCAGTGTACGG-1    H18_19086_TB
H18_19086_TB_AACTCTTCATCGTCGG-1    H18_19086_TB
H18_19086_TB_AATCGGTAGAGAGCTC-1    H18_19086_TB
H18_19086_TB_ACACCAAGTAAGTGTA-1    H18_19086_TB
H18_19086_TB_ACAGCCGTCATCTGTT-1    H18_19086_TB
                                       ...     
H18_19892_TB_CAGCAGCGTAACGCGA-1    H18_19892_TB
H18_19892_TB_GGTGCGTGTTTGTTTC-1    H18_19892_TB
H18_19892_TB_TACCTATCACAGGCCT-1    H18_19892_TB
H18_19892_TB_TAGAGCTGTTAGATGA-1    H18_19892_TB
H18_19892_TB_TCAACGACAGGTCCAC-1    H18_19892_TB
Name: name, Length: 498, dtype: category
Categories (3, object): ['H18_19086_TB', 'H18_19892_TB', 'H18_20186_TB']


  adata.uns[key_added] = {}
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer]

Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Neural,-1.068466,-0.897962,-0.686542,0.745729,-0.603957,-0.945632,-1.011911,1.192392,2.888415,-1.113644,...,0.098607,0.658266,1.762705,-1.091156,-0.049534,-1.157072,-1.050426,0.489335,-1.035406,-0.670443
Neural,-0.959735,0.166781,0.192266,-0.517124,-0.479099,0.326174,0.119614,0.148205,0.848945,-0.284765,...,-0.918034,-0.630782,-0.432844,-0.980115,0.034674,-0.067207,-0.943530,-0.758811,-0.930041,-0.602221
Neural,-1.004267,-0.844010,-0.898726,-0.613263,-1.327886,-0.888815,-0.951111,0.109775,0.024770,-1.046730,...,-0.960630,-0.660049,0.235610,-1.025594,-0.056149,-1.087548,-0.987310,-0.794019,-0.973194,-0.630162
Neural,-1.015729,-0.853643,1.025356,0.100356,-0.592738,-0.898961,-0.961967,1.353047,3.083334,-0.152460,...,1.107587,-0.667583,1.993323,-0.072404,-1.007182,-1.099962,-0.998580,-0.803082,-0.984301,0.929534
Neural,-0.982070,2.981083,1.970954,0.187780,-0.425638,-2.706097,-0.930091,0.845391,4.724492,-1.023596,...,0.130249,-0.645460,-0.479741,-0.010118,2.883209,-1.063511,0.069514,-0.776470,-0.951683,-0.616235
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Neural,-1.005217,-0.844808,1.729793,0.879658,-0.567200,-0.889656,-0.952011,1.197757,0.647093,-1.047722,...,-0.961539,-0.660673,1.026745,-1.026563,0.906058,-0.150877,-0.988245,-0.794770,-0.974113,-0.630758
Neural,-1.047994,0.018887,3.127711,-0.693770,0.804381,1.245624,0.274998,4.007155,3.904044,1.112169,...,0.009111,0.703715,0.874722,0.807512,1.087975,0.662040,-1.030298,-0.828590,0.982276,-0.657598
Neural,-1.194201,-1.003628,0.727110,-0.367487,-0.312988,0.591762,-1.130988,1.771604,1.873642,-0.671226,...,-0.265188,-0.784872,-1.536145,-0.405442,2.768999,-0.514145,-1.174034,0.099989,-1.157250,0.572626
Neural,-1.267747,-1.065434,0.370182,-0.322097,0.141417,0.456086,1.429373,0.171550,0.126962,0.142322,...,-0.373078,1.478291,-0.400798,0.263793,1.769506,-0.627134,-0.433952,-0.002853,-0.399435,-0.795476


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MSC,-0.886975,1.019133,-0.358027,1.154298,0.083175,0.774495,-2.084374,-0.483882,3.825168,-0.88774,...,0.54369,1.090877,0.544359,1.833333,2.940516,-0.745931,-0.733416,0.864781,0.142596,0.70263
MSC,-0.317679,4.014532,1.580925,-0.668647,0.175103,-0.111638,0.224762,-0.06805,4.861105,-0.142745,...,-0.129695,0.039361,0.279035,-0.849148,1.413218,-0.107218,-1.085214,0.019644,0.854728,-0.619165
MSC,-1.390283,1.211192,2.225591,1.045575,1.32183,-2.399527,-0.684033,-0.049533,6.853034,0.467005,...,0.413737,-1.546048,0.256709,0.824855,0.687195,-0.260946,-0.236893,0.075645,-0.734579,0.726058
MSC,-0.933601,0.415138,1.737014,0.043227,-0.069956,-2.613871,-1.004594,0.796486,4.550402,-0.612637,...,-0.354821,1.780467,0.999571,-0.679112,-0.659396,-0.737892,-0.845138,0.672167,-1.692939,1.121193
MSC,-1.818361,2.214356,1.074872,1.464348,2.407113,0.896942,-0.423229,0.378901,3.854934,1.713017,...,1.252426,-0.586886,-0.164461,-1.140451,1.490054,0.789917,-1.760616,-0.015142,-0.111054,1.423875
MSC,-0.983256,1.042877,2.637252,2.479361,0.438994,-1.500126,-3.337292,-0.613815,7.951768,-0.340963,...,-0.659436,-0.065903,1.097365,-0.726627,-0.322115,-1.210158,-1.503628,-0.545086,-1.654594,0.194253
MSC,-2.697753,1.273276,3.783352,2.768971,-0.253661,-0.825763,-2.834384,0.43883,7.727686,0.726918,...,-0.211084,-0.166951,-0.089374,-0.10732,0.337145,-1.048863,-0.529539,-0.010203,-1.700011,-0.181748
MSC,0.062742,1.195314,0.109602,0.184578,0.096878,0.982756,-0.85825,1.153243,3.236984,0.261052,...,-1.054717,1.12268,-0.573755,0.90454,0.455428,-0.706365,-1.415642,0.063495,0.881207,-0.428912
MSC,-0.896528,-0.352671,-0.247726,0.545627,-0.893672,1.057905,-1.268817,0.138378,4.099694,-0.218918,...,1.259458,1.714875,-0.493476,-0.822535,1.741125,-0.363594,0.804197,-0.212547,0.661147,2.188895
MSC,-1.419277,0.758285,2.326444,0.592517,1.375874,-2.846393,-1.317361,1.527936,6.685905,0.952788,...,0.46232,-0.145105,-0.513302,-0.17418,0.505256,-0.217419,-0.462997,0.425178,-1.521503,0.828271


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Fibroblast,-1.504947,-1.086288,-0.076325,-0.435353,-1.521925,1.277241,-0.436811,1.911485,4.563710,-0.334244,...,0.152247,0.107060,-1.307879,-0.602643,-0.121086,-1.244807,-1.462315,-0.350595,-0.915450,-0.475175
Fibroblast,-1.891181,-1.102314,-0.434258,-0.610240,-1.710156,0.972705,-1.276389,-1.622781,0.877594,-0.539153,...,1.725516,-0.489156,-0.828271,-0.683432,-1.381690,-2.048055,-0.799501,0.756975,-1.832647,2.114552
Fibroblast,-1.352192,-1.085841,0.281653,1.137759,-0.879712,1.625572,-1.258424,0.280771,3.503838,1.073630,...,-0.128663,1.830724,0.097928,-1.921622,0.309623,-2.037739,-0.679054,-0.161318,-0.723115,-0.341038
Fibroblast,-1.157564,-0.313637,0.382674,-0.723645,-2.418796,1.602301,-0.125262,0.101169,5.416654,0.079921,...,0.652373,1.117058,-0.401406,0.785435,-1.188493,-1.393182,-0.145740,-0.262543,-0.566220,0.313575
Fibroblast,-1.493503,-1.208843,-0.519960,-0.433314,-1.074839,-1.339858,-0.914591,-0.603012,2.757330,-0.270863,...,-0.588210,-0.468096,0.140231,0.137157,-1.501867,0.123445,-0.957487,-0.731219,0.191263,0.296584
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Fibroblast,-1.440472,-0.608273,-0.761421,2.477217,-1.042629,-1.719589,-1.343153,-0.413032,2.567454,-0.872792,...,1.220938,0.178612,-0.175634,-0.246813,-0.497077,-1.634411,-0.283805,2.063844,-0.316477,0.517103
Fibroblast,-1.421399,-0.581939,0.780281,0.441307,-1.000108,-1.132240,-1.324673,-0.765742,2.188591,0.452397,...,1.925412,0.473853,-0.380753,0.027291,0.026497,-1.136157,-1.379366,-0.244915,-0.276332,1.225038
Fibroblast,-0.847687,-0.024966,-0.362663,-0.617631,-1.196682,-0.095402,-1.485060,-1.156234,2.636507,-0.332188,...,-0.473580,-0.142450,-1.322806,-1.169598,-0.048775,-0.698387,-1.242358,-0.362113,0.669572,-0.664664
Fibroblast,-0.264694,-1.548294,-1.366863,1.811167,0.030446,-1.630501,-1.224641,2.224856,2.487135,0.278183,...,0.451269,2.732588,0.246487,0.173111,0.754311,-0.925598,-1.275637,1.178583,-1.238737,1.492685


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Megakaryocyte,-2.057124,-1.728771,-0.650971,-1.302446,-0.51256,0.966899,-1.483519,-0.525911,2.529577,-1.17434,...,0.381661,-0.671417,2.128782,-0.464678,-1.510827,-1.78852,-0.109013,-0.006466,-1.505159,0.944313
Megakaryocyte,-0.962614,-0.713145,0.451321,0.935562,-0.204955,-1.314856,-0.70414,-0.120847,3.200122,-0.092897,...,0.722059,0.161639,2.251135,0.92238,0.247673,-1.660396,-1.933346,0.286679,-0.862415,1.380019
Megakaryocyte,-1.368409,-1.100814,-0.640365,-0.42415,-0.142692,-0.065452,-1.274369,0.152797,1.940801,-0.072653,...,1.255486,0.2548,1.821999,-0.181783,-0.890426,-1.558278,-1.327175,0.959999,-1.825512,-1.181966
Megakaryocyte,-1.289071,-0.030169,-1.031073,-1.148496,-0.785915,0.049423,1.272329,-0.552076,1.799716,-0.673773,...,0.736097,-0.289086,-0.169855,0.439834,-1.809196,-0.474491,-1.793743,0.170093,0.457315,1.549881
Megakaryocyte,-1.795413,-1.868142,-0.424272,-1.823254,-1.576569,-1.4841,-2.105283,-1.152893,2.816698,-0.748416,...,-0.436815,-0.100525,0.518262,-1.619221,-0.097457,-1.608088,-1.750054,0.253658,-0.821485,0.772523
Megakaryocyte,-1.571552,-0.014826,-1.590778,0.365404,-0.624493,0.251727,-1.065124,1.569977,3.139096,-0.774947,...,1.027905,1.706388,1.083568,-0.59735,0.402772,-0.343936,-1.919054,0.953484,-0.647678,0.395791
Megakaryocyte,-1.642452,-1.606517,-1.901596,-0.536195,-0.260409,-0.311628,-1.5117,0.409329,4.008789,-1.437853,...,-0.408584,0.179505,2.010641,-0.890538,0.015938,-1.933568,0.628517,0.885171,-1.156379,1.32153
Megakaryocyte,-1.305264,-1.541102,-1.86016,-1.676669,-1.220578,-1.622932,-1.21402,-1.671065,0.77823,0.12974,...,0.111507,1.226491,-0.250206,-0.141101,0.186223,-1.491838,-0.726568,0.109112,-1.777032,1.363679
Megakaryocyte,-1.870067,-1.571589,-1.648593,0.626378,-1.292815,-1.655035,-1.77106,-1.4113,1.731471,-1.236552,...,0.642641,-0.210992,0.991211,0.347029,0.109843,-2.025189,-1.311148,0.118826,-1.274023,-0.352147
Megakaryocyte,-1.342455,-1.859233,-2.362087,-1.122759,-0.878689,-0.483282,-0.804898,0.338037,2.373354,-1.40814,...,0.075927,-0.194102,-0.031959,-1.582049,-0.818849,-2.395932,-1.289194,0.753572,-2.143906,-0.69835


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Myeloid,-1.517829,-1.713544,-1.387136,-0.888324,-1.467159,-1.461583,-1.530954,-1.585441,2.324761,-1.754083,...,-0.269842,0.820037,-1.375568,-0.522416,-1.254512,-2.208151,-1.5926,-0.118181,-1.975891,1.92814
Myeloid,-0.638851,-1.681895,-0.618728,-0.733642,0.694569,-1.143983,-0.997012,-0.359582,3.634471,-0.523632,...,-0.330358,0.146203,0.562754,-1.173853,-1.401445,-1.318178,-1.504986,0.303282,-0.995401,1.625352
Myeloid,-1.313648,-0.89842,0.594094,0.745591,-0.027368,0.957018,-0.557689,-1.05954,1.49398,-0.506584,...,-0.569588,-0.464099,-1.560962,0.214269,0.111004,-1.025749,-0.759964,0.586812,-1.762008,-0.34064
Myeloid,0.269535,-0.394323,-0.096845,2.341669,2.056955,-0.238308,1.04733,0.966888,3.308522,0.360493,...,-1.107788,-0.099884,0.030574,1.354169,1.256399,0.641578,1.123839,0.575184,-1.620487,-1.355934
Myeloid,-1.338242,-1.438487,0.773509,0.732557,-0.413774,0.569572,-2.065482,1.011625,3.228294,-0.823835,...,1.28816,0.528553,-0.067944,0.285668,0.036625,-1.574103,-0.764804,0.181897,-1.237723,-0.032157
Myeloid,-3.092874,-2.869725,0.523186,0.345554,-1.184365,-0.973738,-2.051475,0.104604,2.91167,-1.774359,...,-0.591128,-0.235825,0.353577,-1.001349,0.448796,-2.206661,-0.950122,0.117893,-2.095819,-0.542644
Myeloid,-1.077312,-1.912254,-1.176444,1.643822,1.468066,-1.711776,-0.643858,0.538905,4.220756,-0.059883,...,0.160451,0.559908,3.047107,-0.002287,1.726484,-0.772509,-1.354643,0.566315,-1.645144,-0.348843


  adata.uns[key_added] = {}
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
  self.stats[group_name, "logfoldchanges"] = np.log2(
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer]

Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Neural,0.167237,0.622543,1.864244,1.795306,0.368752,-1.480762,-0.743671,2.409681,4.101205,-1.935915,...,0.308638,0.994671,1.287765,1.330893,0.405642,-1.852950,-0.343504,1.721915,-0.814809,-1.826344
Neural,-2.195596,-1.178162,1.022098,1.744554,0.285590,-1.718818,-2.397963,1.944567,2.512977,-0.532641,...,1.909220,1.127669,3.602030,1.438605,0.154587,-2.516162,-1.728888,1.472503,-2.735405,-0.482834
Neural,1.037637,0.509502,2.131648,-0.576166,-0.538549,-2.126927,-0.989477,1.393968,3.404340,-0.849000,...,0.639046,0.098342,-0.147700,1.365947,1.169333,-2.241637,-1.037988,-0.932552,-1.526246,-0.057286
Neural,-0.218478,-1.755481,0.242907,2.292270,-0.258599,-1.848701,0.796204,1.505390,2.938564,1.005107,...,2.182324,-0.018135,1.775900,0.985428,3.210070,-1.824952,-1.101169,2.851978,-2.024255,-1.310620
Neural,-1.550683,-1.774503,-1.175687,0.091655,-0.292653,-0.857617,-0.593620,0.896945,1.419348,-1.671017,...,0.249065,-0.575428,0.638279,0.292753,-0.146129,-1.843573,-0.539691,0.329105,-0.970874,0.178348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Neural,0.012969,-0.834746,-0.507896,0.902459,-1.313310,-0.026018,-0.940675,0.170314,-0.553845,-1.035243,...,-0.950088,0.804873,0.273571,-0.031503,0.923702,-1.075613,-0.976475,-0.785306,0.083151,-0.623247
Neural,-1.089628,-0.915744,0.533775,0.689531,-0.032230,-0.964362,-1.031955,0.523351,1.797573,0.352904,...,0.063777,-0.716148,0.838032,-0.086161,0.678396,-0.313218,-1.071230,0.455237,-1.055912,-0.683721
Neural,-1.109648,-1.412404,-1.080394,0.281767,0.090252,-1.003353,0.290320,1.014275,1.325158,0.418683,...,0.435867,-1.104527,0.554196,-0.043416,-0.435508,-0.752650,0.250659,0.816869,0.151192,0.006819
Neural,-1.004265,-0.844009,-1.222896,-0.613264,-0.575351,-0.888817,-0.951110,-1.271534,1.827722,-1.046729,...,0.082036,2.211090,-0.528051,-1.025594,-0.056152,-1.087548,0.021588,0.447262,-0.973192,-0.630162


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MSC,-0.059454,1.42976,-0.368215,4.951663,2.314223,-1.401104,0.025559,2.163652,8.880983,0.029983,...,0.48643,0.238634,0.54768,0.402053,1.033357,-0.810285,-0.984241,1.891847,0.079408,-0.488406
MSC,0.261076,2.275735,1.662428,2.443953,0.778078,-0.902127,-0.551638,0.926727,6.046927,-0.303347,...,0.62502,0.63753,1.890182,-1.16877,1.201476,0.507098,0.384204,0.857652,0.24523,1.292124
MSC,-1.215385,1.458211,2.661959,0.145733,1.271976,-0.837283,-1.890813,0.760341,6.42809,1.576655,...,1.021965,-0.237376,-0.80941,-0.27405,3.182328,-1.410169,-1.763197,0.134037,-1.086167,1.806362
MSC,-1.220469,2.841705,1.327324,0.615499,1.581052,-1.560243,-1.420173,0.130613,8.154379,3.075087,...,2.618629,0.532044,-1.578467,1.714826,1.848143,0.933996,-0.403657,-0.530664,1.288788,-1.103649
MSC,-1.27515,4.636679,0.623698,1.011182,1.062188,1.643232,-0.098902,-1.295622,2.845184,0.112996,...,0.565799,-0.403646,1.17959,0.46053,0.273645,-1.462271,-0.116398,-0.066779,0.51409,0.596717
MSC,-0.666793,2.2869,1.535034,2.394618,1.100273,-2.592988,-2.174268,1.053469,6.460404,1.222715,...,-0.063744,-1.287388,-0.022262,1.206625,-0.364717,-0.517468,-1.404515,0.068309,-1.709125,1.16443
MSC,-0.102155,2.918625,3.059382,0.036423,-0.282045,-1.137695,-1.270237,0.292523,6.309955,0.427316,...,-0.965655,1.803365,0.502193,-0.632943,1.227714,-0.844707,-2.206859,0.714331,1.066528,-1.312309
MSC,-2.090085,0.491674,0.545537,0.579563,-0.143964,-1.466849,-1.747551,-0.091727,6.618186,0.016541,...,-0.436477,0.423264,0.412821,-0.297158,-0.684696,-1.618306,-1.164867,-0.245677,-1.321568,0.682512
MSC,0.097254,4.668667,1.815727,0.381059,1.641793,0.745546,0.954142,2.997339,6.115344,1.693819,...,-0.152233,1.113802,2.050098,1.732244,2.291265,1.007388,0.177202,0.035864,0.779446,0.719503
MSC,-1.080342,2.021039,1.287605,2.55513,1.400882,-0.545428,0.111306,0.901016,6.782904,1.525609,...,1.989806,1.006529,-1.46364,-0.124204,-0.004982,1.000237,-0.276678,0.932096,-0.37809,1.49126


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Fibroblast,-2.306736,-1.938509,0.46466,0.900443,-1.977719,-2.041453,-2.18459,-0.135228,1.149152,-0.988374,...,0.39851,-0.220427,0.103792,-0.164276,-1.332873,-2.118862,-1.306641,0.452708,-1.39227,-0.671955
Fibroblast,-1.052675,-0.46747,-1.087907,0.15315,-1.647741,-1.426621,-2.156106,-0.248726,1.90468,-0.248946,...,1.395869,1.465467,0.835736,-0.00508,-0.71272,-2.583872,-1.629137,0.312149,-1.203333,-1.345288
Fibroblast,-0.401211,-0.667111,0.233505,1.083929,-1.774177,-1.27334,-1.153518,0.386829,2.295817,-1.558571,...,-0.621364,0.206526,2.133005,-0.337348,-1.427368,-2.085311,-0.766197,-0.069512,-1.337848,1.209384
Fibroblast,-0.081215,-2.028037,-1.986212,-1.09073,-0.773253,-0.706159,-1.855515,-0.096074,2.275005,-0.969989,...,-0.441247,0.892334,0.463549,-1.715695,-0.45972,-2.39417,-1.388207,-1.370834,-0.657963,-1.220876
Fibroblast,-1.534016,-0.443417,0.263172,0.972891,-0.558336,-1.388201,-1.824018,-0.6905,1.198278,-0.703835,...,-0.43842,0.516257,0.324315,-0.401606,-0.311194,-2.549712,-0.226783,1.734488,-1.855193,1.255824
Fibroblast,-1.845363,-1.046317,-0.52072,-1.2775,-0.802429,-1.176163,-1.214517,-0.89532,2.069158,-0.439959,...,0.090217,-0.298736,-1.127148,-0.688802,-1.318532,-1.998433,-1.265237,-0.600645,-1.228017,-0.302969
Fibroblast,-1.838772,-1.54529,0.515589,0.38189,-0.80149,-1.172499,-0.680289,-0.366457,4.163024,0.361258,...,1.257532,-0.431346,-0.226703,0.307995,-0.805662,-1.489732,-0.71513,0.677704,-0.666836,-0.302893
Fibroblast,0.090998,0.989433,0.430045,0.222203,0.674494,0.077298,-1.230858,2.168983,5.480515,-0.470821,...,0.515654,0.37535,1.722375,0.786355,1.472974,-1.518851,-0.353632,1.371658,-0.43864,-0.913528
Fibroblast,-0.290047,-0.378288,-1.12882,-1.108722,-0.714645,-0.503249,-0.413439,1.803827,4.233761,0.969522,...,0.634705,-1.009736,2.859558,1.496857,0.389651,-1.801184,0.90884,1.243137,-0.615384,-1.21614
Fibroblast,-0.717879,1.032488,-0.040582,0.825191,-1.647517,-0.286635,-0.94198,0.875745,6.145082,-1.443087,...,0.092511,0.849372,0.229816,-1.158803,0.933932,-1.415031,-0.846985,-0.643079,-1.521921,1.648263


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Megakaryocyte,-1.496811,-2.662679,-1.198333,-0.110838,-1.78173,-2.542561,-2.745786,-1.128111,2.995493,-1.765167,...,0.083835,0.173125,1.665858,-0.914953,-0.613672,-4.061589,-1.526998,-0.42919,-3.487074,-0.110597
Megakaryocyte,1.000516,-1.22947,1.308271,0.33741,1.371783,-1.431728,0.275207,2.457267,3.6222,1.560861,...,2.962087,0.551208,1.972033,1.471966,0.000494,-0.845381,-0.134738,3.6844,-2.146014,-0.426495
Megakaryocyte,-1.916782,-1.519296,-1.720798,0.034247,-0.516029,-0.287649,-1.669769,0.08517,3.659109,-1.132166,...,1.292572,0.52515,1.031004,0.485754,0.400155,-1.023189,-1.199967,0.688006,-1.330189,0.973853
Megakaryocyte,-0.010025,-1.78132,-0.410161,-0.141453,-0.225483,-1.097146,-1.702748,2.075026,2.028177,0.376929,...,1.031018,0.144457,1.322322,1.13162,-0.719996,-0.963184,-0.20667,2.293733,-2.112367,-1.701319
Megakaryocyte,-0.127419,-2.120586,1.051093,0.160886,0.128967,-1.492649,-1.773123,-0.360044,2.824672,-0.920148,...,2.136316,0.182244,1.798842,-0.023704,-0.037054,-1.994549,-1.69727,1.195045,-1.673399,-0.144969
Megakaryocyte,-2.322224,-2.963132,-0.667056,-0.570139,-1.286687,-1.773662,-2.25569,0.459891,2.578975,-1.183122,...,0.999498,0.914625,1.036606,-0.201214,0.127769,-2.441476,-1.469933,0.058917,-2.884602,-0.145039
Megakaryocyte,-1.782982,-1.895503,-0.050723,-1.12956,-1.639386,-0.926025,-1.756172,-1.454626,1.951687,0.516014,...,1.751909,-0.667323,0.075898,0.258079,-1.20429,-2.000908,-0.172086,1.08759,-1.694652,-0.806002
Megakaryocyte,-1.707615,-2.51623,-1.491596,-1.152981,-1.849577,-2.906157,-2.819848,-1.337226,2.00795,-1.90567,...,0.161166,-0.173178,2.164273,-1.094753,-1.473614,-3.011807,-1.049929,-0.626221,-3.649768,-0.95402
Megakaryocyte,0.158724,0.059671,-0.964452,1.236422,0.069215,-0.304607,-1.307347,0.966818,2.034318,-1.289359,...,0.564969,1.594692,1.608118,1.430514,1.100505,-1.362592,-0.602362,1.485246,-1.398361,-0.542873
Megakaryocyte,-1.24322,-1.303826,-1.496742,0.316233,-0.640454,-1.483005,-1.916231,0.754222,2.906575,-0.530695,...,0.33917,0.248225,0.408651,0.170184,-0.703553,-2.269473,-1.180143,2.252657,-1.535301,1.129061


Unnamed: 0_level_0,ADNP,AHR,AR,ARID2,ARID3A,ARNT,ASCL1,ATF1,ATF2,ATF3,...,ZNF589,ZNF592,ZNF639,ZNF644,ZNF740,ZNF750,ZNF766,ZNF83,ZNF92,ZZZ3
new_annotation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Myeloid,-1.340812,-1.543372,-0.96038,-1.362048,-0.922619,-1.065078,-1.249062,-0.566927,1.478165,-1.573098,...,0.961849,-0.489045,-1.088828,-0.804182,-0.410964,-1.525469,-1.300737,0.542733,-1.779651,0.587028
Myeloid,-0.783158,-1.531452,-0.451411,-1.28984,-1.225664,-1.025452,-1.211941,0.032738,1.239601,-1.184358,...,-0.102929,0.307477,-0.7421,-0.845986,-1.314144,-1.487717,-1.262404,0.511759,-1.225918,1.752644
Myeloid,-0.769488,-1.749353,-1.02063,1.0269,-1.275905,-1.842245,-0.52962,-1.019904,2.465897,-1.166483,...,1.023,1.165897,-0.917629,-0.278123,-0.22056,-1.4365,-0.265107,1.029399,-1.108084,-0.612411
Myeloid,-2.089752,-2.050814,0.398472,-0.207756,-2.250698,-1.763444,-1.824327,-1.884248,2.622831,-1.016144,...,-0.135345,-0.38324,-0.800218,-0.845999,-1.200913,-2.232268,-0.113643,0.249208,-2.000473,-0.19483
Myeloid,-1.883857,-1.177228,-0.741202,-1.888663,-2.712801,-0.458597,-1.378348,-1.268098,0.814367,-1.832144,...,-1.08522,-1.496741,-1.304027,-0.926682,-1.884995,-0.99475,-0.946704,-0.006276,-2.20707,-0.180617
Myeloid,-2.085588,-1.382324,-1.080253,-1.683458,-1.253637,-1.554702,-1.514027,-1.007898,0.677446,-1.661213,...,-1.304728,0.086731,-0.164536,-0.13454,-0.040552,-2.647664,-0.575288,0.197071,-1.249506,-0.249199
Myeloid,-0.922903,0.672677,1.788896,0.762444,-0.174378,-2.081532,-1.152029,0.086374,3.38752,-1.304169,...,1.021785,0.310343,-0.940482,0.121847,-1.414975,-1.594154,-0.873344,1.137071,-0.324671,1.279958
Myeloid,-0.643289,-1.287684,-0.619151,-0.055543,-1.373958,-0.942681,-1.478472,-1.18153,3.251783,-0.838717,...,1.75942,-0.105267,1.12273,0.045473,-0.399311,-1.382943,-0.266018,1.790468,-1.501448,0.892974
Myeloid,-1.30594,-1.518133,-1.708514,-0.124568,-0.868193,-1.598743,-1.710819,-0.992041,2.139253,-0.276092,...,-0.674631,-0.462292,-0.270083,-0.378391,-1.316553,-1.956295,-0.756722,1.288381,-1.750545,-0.339751
Myeloid,-0.269688,-1.044323,-0.576357,-1.291175,-1.619104,-1.170867,-0.700626,-0.077776,2.137662,-0.710936,...,-0.536532,1.047305,-1.144497,0.374731,-1.801862,-1.485616,-1.261128,-0.652386,-0.688765,-0.322049


In [None]:
def csv_comparison (result_1, result_2, outpath, suffixes_df=("_py", "_r")):
    result1 = pd.read_csv(result_1)
    result2 = pd.read_csv(result_2)


    result1 = result1[result1["pvals_adj"] < 0.05]
    if "p_val_adj" in result2.columns:
        result2 = result2[result2["p_val_adj"] < 0.05]
    else:
        result2 = result2[result2["pvals_adj"] < 0.05]

    result1 = result1.rename(columns={"names": "gene", "group": "cluster"})
    result2 = result2.rename(columns={"names": "gene", "group": "cluster"})


    df_output_1 = pd.DataFrame()

    for i in range(len(result1)):
        a = result1["gene"].iloc[i]
        for j in range(len(result2)):
            b = result2["gene"].iloc[j]
            if a == b:
                df_output_1[i] = result1.iloc[i]
            

    df_output_1 = df_output_1.T

    df_output_2 = pd.DataFrame()
    for i in range(len(result2)):
        a = result2["gene"].iloc[i]
        for j in range(len(result1)):
            b = result1["gene"].iloc[j]
            if a == b:
                df_output_2[i] = result2.iloc[i]


    df_output_2 = df_output_2.T


    df_output = pd.merge(df_output_1, df_output_2, on="gene", suffixes=suffixes_df)

    df_output_3 = pd.DataFrame()
    for i in range(len(result1)):
        a = result1["gene"].iloc[i]
        b = list(result2["gene"])
        if a not in b:
            df_output_3[i] = result1.iloc[i]
            
    df_output_3 = df_output_3.T
    df_output.rename(columns={"Unnamed: 0": "gene_r"}, inplace=True)
    df_output_3.rename(columns={"cluster": "cluster" + suffixes_df[0]}, inplace=True)
    if suffixes_df[0] == "_py_wilcox":
        df_output_3.rename(columns={"scores": "scores" + suffixes_df[0], "logfoldchanges": "logfoldchanges" + suffixes_df[0], "pvals": "pvals" + suffixes_df[0], "pvals_adj": "pvals_adj" + suffixes_df[0]}, inplace=True)
        df_output_4 = pd.DataFrame()
        for i in range(len(result2)):
            a = result2["gene"].iloc[i]
            b = list(result1["gene"])
            if a not in b:
                df_output_4[i] = result2.iloc[i]
        df_output_4 = df_output_4.T
        df_output_4.rename(columns={"cluster": "cluster" + suffixes_df[1], "scores": "scores" + suffixes_df[1], "logfoldchanges": "logfoldchanges" + suffixes_df[1], "pvals": "pvals" + suffixes_df[1], "pvals_adj": "pvals_adj" + suffixes_df[1]}, inplace=True)
        df_output_3 = pd.concat([df_output_3, df_output_4])
        

    df_output = pd.concat([df_output, df_output_3])

    df_output.to_csv(outpath, index=0)
    print(df_output_3.columns)



In [None]:
#WINDOWS VERSION

#CONTROL
#comparison python wilcoxon and r wilcoxon control
csv_comparison("script_test\\TF_results\\control\\control_specific_markers_wilcoxon_test.csv", "LR2TF_test_run\\results\\TF_results\\control\\all_specificmarker__control.csv",
                "tf_comparison_r_py\\py_wilcoxon_r_same_genes_control_filtered.csv", suffixes_df=("_py", "_r"))

#comparison python t test and r wilcoxon control
csv_comparison("script_test\\TF_results\\control\\control_specific_markers_t_test_overestim_test.csv", "LR2TF_test_run\\results\\TF_results\\control\\all_specificmarker__control.csv",
                "tf_comparison_r_py\\py_t_test_r_same_genes_control_filtered.csv", suffixes_df=("_py", "_r"))

# py wilcox vs py t test control

csv_comparison("script_test\\TF_results\\control\\control_specific_markers_wilcoxon_test.csv", "script_test\\TF_results\\control\\control_specific_markers_t_test_overestim_test.csv",
                "tf_comparison_r_py\\py_wilcox_py_t_test_same_genes_control_filtered.csv", suffixes_df=("_py_wilcox", "_py_t_test"))
################################################################################################################################################################################################
#PMF MF2
#python wilcox vs r wilcox PMF MF2
csv_comparison("script_test\\TF_results\\PMF_MF2\\PMF_MF2_specific_markers_wilcoxon_test.csv", "LR2TF_test_run\\results\\TF_results\\PMF_MF2\\all_specificmarker__PMF_MF2.csv",
                "tf_comparison_r_py\\py_wilcoxon_r_same_genes_PMF_MF2_filtered.csv", suffixes_df=("_py", "_r"))

#comparison python t test and r wilcoxon PMF MF2
csv_comparison("script_test\\TF_results\\PMF_MF2\\PMF_MF2_specific_markers_t_test_overestim_test.csv", "LR2TF_test_run\\results\\TF_results\\PMF_MF2\\all_specificmarker__PMF_MF2.csv",
                "tf_comparison_r_py\\py_t_test_r_same_genes_PMF_MF2_filtered.csv", suffixes_df=("_py", "_r"))

csv_comparison("script_test\\TF_results\\PMF_MF2\\PMF_MF2_specific_markers_wilcoxon_test.csv", "script_test\\TF_results\\PMF_MF2\\PMF_MF2_specific_markers_t_test_overestim_test.csv",
                "tf_comparison_r_py\\py_wilcox_py_t_test_same_genes_PMF_MF2_filtered.csv", suffixes_df=("_py_wilcox", "_py_t_test"))

In [None]:
#LINUX VERSION

#CONTROL
#comparison python wilcoxon and r wilcoxon control
csv_comparison("script_test/TF_results/control/control_specific_markers_wilcoxon_test.csv", "LR2TF_test_run/results/TF_results/control/all_specificmarker__control.csv",
                "tf_comparison_r_py//py_wilcoxon_r_same_genes_control_filtered.csv", suffixes_df=("_py", "_r"))

#comparison python t test and r wilcoxon control
csv_comparison("script_test/TF_results/control/control_specific_markers_t_test_overestim_test.csv", "LR2TF_test_run/results/TF_results/control/all_specificmarker__control.csv",
                "tf_comparison_r_py/py_t_test_r_same_genes_control_filtered.csv", suffixes_df=("_py", "_r"))

# py wilcox vs py t test control

csv_comparison("script_test/TF_results/control/control_specific_markers_wilcoxon_test.csv", "script_test/TF_results/control/control_specific_markers_t_test_overestim_test.csv",
                "tf_comparison_r_py/py_wilcox_py_t_test_same_genes_control_filtered.csv", suffixes_df=("_py_wilcox", "_py_t_test"))
################################################################################################################################################################################################
#PMF MF2
#python wilcox vs r wilcox PMF MF2
csv_comparison("script_test/TF_results/PMF_MF2/PMF_MF2_specific_markers_wilcoxon_test.csv", "LR2TF_test_run/results/TF_results/PMF_MF2/all_specificmarker__PMF_MF2.csv",
                "tf_comparison_r_py/py_wilcoxon_r_same_genes_PMF_MF2_filtered.csv", suffixes_df=("_py", "_r"))

#comparison python t test and r wilcoxon PMF MF2
csv_comparison("script_test/TF_results/PMF_MF2/PMF_MF2_specific_markers_t_test_overestim_test.csv", "LR2TF_test_run/results/TF_results/PMF_MF2/all_specificmarker__PMF_MF2.csv",
                "tf_comparison_r_py/py_t_test_r_same_genes_PMF_MF2_filtered.csv", suffixes_df=("_py", "_r"))

csv_comparison("script_test/TF_results/PMF_MF2/PMF_MF2_specific_markers_wilcoxon_test.csv", "script_test/TF_results/PMF_MF2/PMF_MF2_specific_markers_t_test_overestim_test.csv",
                "tf_comparison_r_py/py_wilcox_py_t_test_same_genes_PMF_MF2_filtered.csv", suffixes_df=("_py_wilcox", "_py_t_test"))