# Setup pipeline and initialize the three models

In [55]:
%load_ext autoreload

import os, sys
sys.path.insert(0, "../")

%autoreload 2
from flowset import *

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300


In [None]:
def download_and_unzip(download_url_link, dir_path, zipped_filename,destination_dir_name):
    #https://www.tutorialsbuddy.com/download-and-unzip-a-zipped-file-in-python
    print("Download starting")
    urllib.request.urlretrieve(
        download_url_link, os.path.join(dir_path, zipped_filename)
    )
    print("Download complete")

    print("unzipping file starting")
    with zipfile.ZipFile(os.path.join(dir_path, zipped_filename), "r") as zip_file:
        zip_file.extractall(os.path.join(dir_path, destination_dir_name))
    print("unzipping complete")


if not os.path.exists("ReactomePathways.gmt"):
    download_and_unzip("https://reactome.org/download/current/ReactomePathways.gmt.zip", ".", "ReactomePathways.gmt.zip", ".")

In [None]:
exprData = pl.read_csv("./summarised_simulated_scdata_random.tsv", has_header=True, sep="\t", null_values=["NA"])
print(exprData)

In [None]:
def pl_hist(df, column, n_bins=100,):
    data = df.select(pl.col(column))
    
    fig, ax = plt.subplots(1, 1, tight_layout=True)
    ax.hist(data, bins=n_bins, density=True)
    
    plt.show()
    plt.close()
pl_hist(exprData, "mean.cluster")
pl_hist(exprData, "expr.cluster")

In [None]:
explDFRaw, mfFuzzy = FlowAnalysis.fuzzify_exprvalues(exprData, stepsize=0.1,seriesOrder=["cluster.wildtype", "cluster.knockout01", "cluster.knockout02", "cluster.knockout03"],mfLevels = ["NO","NOLOW","LOW","LOWMED","MED","MEDHIGH","HIGH"],centers=[.1,.5,.9,1.3,1.7,2.1,2.5])
explDFWide = FlowAnalysis.to_vwide(explDFRaw, mfFuzzy)
print(explDFWide)

In [None]:
fa_triangular = FlowAnalysis(explDFWide, "gene", (("wildtype","WT"),  ("knockout01","K01"),  ("knockout02","KO2"),  ("knockout03","KO3")), mfFuzzy) 
fa_triangular.plot_flows()

In [None]:

explDFRaw, mfFuzzy = FlowAnalysis.exprDF2LongDF(exprData, stepsize=0.1,seriesOrder=["cluster.wildtype", "cluster.knockout01", "cluster.knockout02", "cluster.knockout03"],mfLevels = ["NO","NOLOW","LOW","LOWMED","MED","MEDHIGH","HIGH"],centers=[.1,.5,.9,1.3,1.7,2.1,2.5],shape="gauss")
explDFWide = FlowAnalysis.to_vwide(explDFRaw, mfFuzzy)
print(explDFWide)
fa_gaussian = FlowAnalysis(explDFWide, "gene",(("wildtype","WT"),  ("knockout01","K01"),  ("knockout02","KO2"),  ("knockout03","KO3")), mfFuzzy) 
fa_gaussian.plot_flows()


In [None]:
#Here not inly 
explDFRaw, mfFuzzy = FlowAnalysis.exprDF2LongDF(exprData, stepsize=0.1,seriesOrder=["cluster.wildtype", "cluster.knockout01", "cluster.knockout02", "cluster.knockout03"],mfLevels = ["NO","NOLOW","LOW","LOWMED","MED","MEDHIGH","HIGH"],centers=[.1,.5,.9,1.3,1.7,2.1,2.5], sdcolName=None, shape="crisp")
explDFWide = FlowAnalysis.to_vwide(explDFRaw, mfFuzzy)
print(explDFWide)
fa_crisp = FlowAnalysis(explDFWide, "gene",(("wildtype","WT"),  ("knockout01","K01"),  ("knockout02","KO2"),  ("knockout03","KO3")), mfFuzzy) 
fa_crisp.plot_flows()


In [None]:

fa_crisp.hist_level_membershipsum()
fa_gaussian.hist_level_membershipsum()
fa_triangular.hist_level_membershipsum()


In [None]:
pw_file="go_human_filtered.bp.gmt"
rp = fa_crisp.read_gmt_file(pw_file)


In [None]:
alteredPathways=pd.read_csv('simulated_changingPathways_random.tsv',sep='\t')

pattern_groups=dict()
for pattern in alteredPathways[alteredPathways.duplicated() == False].iterrows():
    strpattern = '-'.join(pattern[-1].astype(str))
    pattern_groups[strpattern]=alteredPathways[[x[-1].equals(pattern[-1]) for x in alteredPathways.iterrows()]].index

pattern_groups

# Basic overview over the memberships of the altered genes in the differnt states and models

In [None]:

pwnames=[rp[x][0] for x in rp]

for key, value in pattern_groups.items():
    union_geneset=[rp[list(rp.keys())[pwnames.index(p)]][1] for p in value]
    union_geneset = [item for sublist in union_geneset for item in sublist]
    print(union_geneset)

    fa_crisp.plot_state_memberships(union_geneset,"Crisp: Pattern -> "+key)
    fa_crisp.plot_genes_membership(union_geneset)
    fa_crisp.plot_genes(union_geneset)


    fa_gaussian.plot_state_memberships(union_geneset,"Gaussian: Pattern -> "+key)
    fa_gaussian.plot_genes_membership(union_geneset)
    fa_gaussian.plot_genes(union_geneset)

    fa_triangular.plot_state_memberships(union_geneset,"Triangular: Pattern -> "+key)
    fa_triangular.plot_genes_membership(union_geneset)
    fa_triangular.plot_genes(union_geneset)




In [None]:

fa_crisp.plot_genes(["TLR4"])
fa_gaussian.plot_genes(["TLR4"])
fa_triangular.plot_genes(["TLR4"])


# Analyse "UP" pattern
The first pattern is analyzed.
First with only the genes changed through the pathways and then pathway enrichment.

In [None]:
# Analyse simulated UP pattern

up_geneset=[]
up_pathways=[]
key=list(pattern_groups.keys())[0]
value=pattern_groups[key]
union_geneset=[rp[list(rp.keys())[pwnames.index(p)]][1] for p in value]
up_pathways=[list(rp.keys())[pwnames.index(p)] for p in value]
up_geneset = [item for sublist in union_geneset for item in sublist]

relFlows = set().union(*
    [
    fa_crisp.flow_finder(["=", "<", "="], verbose=True)
    ]
)

relFlows_strong = set().union(*
    [
    fa_triangular.flow_finder(["=", "<<", "="], verbose=True)
    ]
)

print(len(up_geneset))

In [None]:

fa_crisp.plot_flow_memberships(use_flows=relFlows,color_genes=up_geneset)
fa_gaussian.plot_flow_memberships(use_flows=relFlows_strong,color_genes=up_geneset)
fa_triangular.plot_flow_memberships(use_flows=relFlows_strong,color_genes=up_geneset)

In [None]:

pwScores_crisp = fa_crisp.analyse_pathways_grouped(use_flows=relFlows,pathways_file=pw_file)
pwScores_signif_crisp=pwScores_crisp[pwScores_crisp["adj_pval"]<0.05]
print("Crisp) Found: "+ str(pwScores_signif_crisp[pwScores_signif_crisp["pwid"].isin(up_pathways)].shape[0])+" / "+str(len(up_pathways))+" with  "+ str(pwScores_signif_crisp.shape[0]) +" significant")

pwScores_gaussian = fa_gaussian.analyse_pathways_grouped(use_flows=relFlows_strong,pathways_file=pw_file)
pwScores_signif_gaussian=pwScores_gaussian[pwScores_gaussian["adj_pval"]<0.05]
print("Gaussian) Found: "+ str(pwScores_signif_gaussian[pwScores_signif_gaussian["pwid"].isin(up_pathways)].shape[0])+" / "+str(len(up_pathways))+" with  "+ str(pwScores_signif_gaussian.shape[0]) +" significant")

pwScores_triangular = fa_triangular.analyse_pathways_grouped(use_flows=relFlows_strong,pathways_file=pw_file)
pwScores_signif_triangular=pwScores_triangular[pwScores_triangular["adj_pval"]<0.05]
print("Triangular) Found: "+ str(pwScores_signif_triangular[pwScores_signif_triangular["pwid"].isin(up_pathways)].shape[0])+" / "+str(len(up_pathways))+" with  "+ str(pwScores_signif_triangular.shape[0]) +" significant")

from upsetplot import plot, from_contents,UpSet
results={ 'triangular': pwScores_signif_triangular["pwid"], 'gaussian': pwScores_signif_gaussian["pwid"], 'crisp': pwScores_signif_crisp["pwid"] }


upset=from_contents(results)
upsetpl = UpSet(upset, orientation='vertical')
upsetpl.plot()    

plt.suptitle("Overlaps of genesets found with the different fuzzy concepts")
plt.show()

# Temp up pattern


In [None]:
# Analyse tUP pattern

tup_geneset=[]
tup_pathways=[]
key=list(pattern_groups.keys())[1]
value=pattern_groups[key]
union_geneset=[rp[list(rp.keys())[pwnames.index(p)]][1] for p in value]
tup_pathways=[list(rp.keys())[pwnames.index(p)] for p in value]
tup_geneset = [item for sublist in union_geneset for item in sublist]

relFlows = set().union(*
    [
    fa_crisp.flow_finder(["<", "=", ">"], verbose=True)
    ]
)
relFlows_strong = set().union(*
    [

    fa_triangular.flow_finder(["<<", "=", ">>"], verbose=True)
    ]
)
print(len(tup_geneset))

In [None]:

fa_crisp.plot_flow_memberships(use_flows=relFlows,color_genes=tup_geneset)
fa_gaussian.plot_flow_memberships(use_flows=relFlows_strong,color_genes=tup_geneset)
fa_triangular.plot_flow_memberships(use_flows=relFlows_strong,color_genes=tup_geneset)

In [None]:

pwScores_crisp = fa_crisp.analyse_pathways_grouped(use_flows=relFlows,pathways_file=pw_file)
pwScores_signif_crisp=pwScores_crisp[pwScores_crisp["adj_pval"]<0.05]
print("Crisp) Found: "+ str(pwScores_signif_crisp[pwScores_signif_crisp["pwid"].isin(tup_pathways)].shape[0])+" / "+str(len(tup_pathways))+" with  "+ str(pwScores_signif_crisp.shape[0]) +" significant")

pwScores_gaussian = fa_gaussian.analyse_pathways_grouped(use_flows=relFlows_strong,pathways_file=pw_file)
pwScores_signif_gaussian=pwScores_gaussian[pwScores_gaussian["adj_pval"]<0.05]
print("Gaussian) Found: "+ str(pwScores_signif_gaussian[pwScores_signif_gaussian["pwid"].isin(tup_pathways)].shape[0])+" / "+str(len(tup_pathways))+" with  "+ str(pwScores_signif_gaussian.shape[0]) +" significant")

pwScores_triangular = fa_triangular.analyse_pathways_grouped(use_flows=relFlows_strong,pathways_file=pw_file)
pwScores_signif_triangular=pwScores_triangular[pwScores_triangular["adj_pval"]<0.05]
print("Triangular) Found: "+ str(pwScores_signif_triangular[pwScores_signif_triangular["pwid"].isin(tup_pathways)].shape[0])+" / "+str(len(tup_pathways))+" with  "+ str(pwScores_signif_triangular.shape[0]) +" significant")

from upsetplot import plot, from_contents,UpSet
results={ 'triangular': pwScores_signif_triangular["pwid"], 'gaussian': pwScores_signif_gaussian["pwid"], 'crisp': pwScores_signif_crisp["pwid"] }


upset=from_contents(results)
upsetpl = UpSet(upset, orientation='vertical')
upsetpl.plot()    

plt.suptitle("Overlaps of genesets found with the different fuzzy concepts")
plt.show()

# DOWN pattern

In [None]:
# Analyse simulated DOWN pattern

down_geneset=[]
down_pathways=[]
key=list(pattern_groups.keys())[2]
value=pattern_groups[key]
union_geneset=[rp[list(rp.keys())[pwnames.index(p)]][1] for p in value]
down_pathways=[list(rp.keys())[pwnames.index(p)] for p in value]
down_geneset = [item for sublist in union_geneset for item in sublist]

relFlows = set().union(*
    [
    fa_crisp.flow_finder(["=", ">", "="], verbose=True)
    ]
)
relFlows_strong = set().union(*
    [
    fa_triangular.flow_finder(["=", ">>", "="], verbose=True)
    ]
)

print(len(down_geneset))

In [None]:

fa_crisp.plot_flow_memberships(use_flows=relFlows,color_genes=down_geneset)
fa_gaussian.plot_flow_memberships(use_flows=relFlows_strong,color_genes=down_geneset)
fa_triangular.plot_flow_memberships(use_flows=relFlows_strong,color_genes=down_geneset)

In [None]:

pwScores_crisp = fa_crisp.analyse_pathways_grouped(use_flows=relFlows,pathways_file=pw_file)
pwScores_signif_crisp=pwScores_crisp[pwScores_crisp["adj_pval"]<0.05]
print("Crisp) Found: "+ str(pwScores_signif_crisp[pwScores_signif_crisp["pwid"].isin(down_pathways)].shape[0])+" / "+str(len(down_pathways))+" with  "+ str(pwScores_signif_crisp.shape[0]) +" significant")

pwScores_gaussian = fa_gaussian.analyse_pathways_grouped(use_flows=relFlows_strong,pathways_file=pw_file)
pwScores_signif_gaussian=pwScores_gaussian[pwScores_gaussian["adj_pval"]<0.05]
print("Gaussian) Found: "+ str(pwScores_signif_gaussian[pwScores_signif_gaussian["pwid"].isin(down_pathways)].shape[0])+" / "+str(len(down_pathways))+" with  "+ str(pwScores_signif_gaussian.shape[0]) +" significant")

pwScores_triangular = fa_triangular.analyse_pathways_grouped(use_flows=relFlows_strong,pathways_file=pw_file)
pwScores_signif_triangular=pwScores_triangular[pwScores_triangular["adj_pval"]<0.05]
print("Triangular) Found: "+ str(pwScores_signif_triangular[pwScores_signif_triangular["pwid"].isin(down_pathways)].shape[0])+" / "+str(len(down_pathways))+" with  "+ str(pwScores_signif_triangular.shape[0]) +" significant")

from upsetplot import plot, from_contents,UpSet
results={ 'triangular': pwScores_signif_triangular["pwid"], 'gaussian': pwScores_signif_gaussian["pwid"], 'crisp': pwScores_signif_crisp["pwid"] }


upset=from_contents(results)
upsetpl = UpSet(upset, orientation='vertical')
upsetpl.plot()    

plt.suptitle("Overlaps of genesets found with the different fuzzy concepts")
plt.show()