# Disease Modules

### Disease Gene Associations

In [None]:
import pandas as pd

dga = pd.read_csv("data/disease_gene.tsv", sep="\t")
dga = dga[dga.diseaseType == "disease"][["geneSymbol", "diseaseName"]].drop_duplicates()
dga["diseaseName"] = dga["diseaseName"].str.lower()

num_genes = (
    dga.groupby("diseaseName")
    .agg("count")
    .sort_values(by="geneSymbol")
    .reset_index()
    .rename(columns={"geneSymbol": "count_genes"})
)

filtered_dga = dga.merge(num_genes, on="diseaseName", how="inner")

cleaned_dga = (
    filtered_dga[filtered_dga.count_genes > 10][["geneSymbol", "diseaseName"]]
    .drop_duplicates()
    .reset_index(drop=True)
)


In [None]:
ra = cleaned_dga[cleaned_dga.diseaseName == "rheumatoid arthritis"]
ra_genes = list(ra.geneSymbol.unique())


### Protein-Protein Interactions

In [None]:
import networkx as nx

ppi = pd.read_csv("data/ppi.csv")
ppi = ppi[["Symbol_A", "Symbol_B"]].drop_duplicates().dropna()
gppi = nx.from_pandas_edgelist(ppi, "Symbol_A", "Symbol_B")

# Remove self loops
sl = nx.selfloop_edges(gppi)
gppi.remove_edges_from(sl)


In [None]:
from utils.plot import get_disease_module_info

ra = get_disease_module_info("rheumatoid arthritis", cleaned_dga, gppi, print_info=True)


In [None]:
ra_ppi_genes = [node for node in gppi.nodes if node in ra_genes]


In [None]:
from utils import localization

lcc = localization.get_lcc(gppi, ra_ppi_genes)


In [None]:
module = gppi.subgraph(lcc)

nx.write_gexf(module, "data/ra_module.gexf")


In [None]:
from utils.plot import plot_disease_module_info

plot_disease_module_info(gppi, ra)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")

plt.figure(figsize=(8, 8))

nx.draw_networkx(
    module,
    with_labels=False,
    alpha=0.95,
    node_color="#5799C6",
    edge_color="#202020",
    node_size=300,
    width=2,
    linewidths=2,
)

plt.axis("off")
plt.title("Network Graph", fontsize=16, fontweight="bold")
plt.tight_layout()
plt.show()


In [None]:
G = nx.Graph()
G.add_nodes_from(cleaned_dga.geneSymbol, bipartite=0)
G.add_nodes_from(cleaned_dga.diseaseName, bipartite=1)
G.add_edges_from(cleaned_dga.values)

g_projected = nx.algorithms.bipartite.projected_graph(G, nodes=cleaned_dga.diseaseName)


In [None]:
import seaborn as sns
from matplotlib import pyplot as plt

sns.set_theme(style="whitegrid")
plt.figure(figsize=(12, 6))
sns.histplot(dict(g_projected.degree()).values(), bins=range(0, 900, 20), kde=True)

plt.xlabel("Degree Bins")
plt.ylabel("Frequency")
plt.title("Binned Degree Distribution", fontsize=16, fontweight="bold")

plt.gca().get_legend().remove()
plt.tight_layout()
plt.show()


# Disease Separation

In [None]:
import utils.separation as separation
import os

ra = get_disease_module_info("rheumatoid arthritis", cleaned_dga, gppi)

if os.path.exists("./data/separations.csv"):
    sorted_df = pd.read_csv("./data/separations.csv")
else:
    separations = [
        (
            disease,
            separation.get_separation(
                gppi,
                set(ra[2]),
                set(get_disease_module_info(disease, cleaned_dga, gppi)[2]),
            ),
        )
        for disease in cleaned_dga.diseaseName.unique()
    ]

    df_separations = pd.DataFrame(separations, columns=["Disease", "Separation"])
    sorted_df = df_separations.sort_values(by="Separation")
    sorted_df.to_csv("./data/separations.csv", index=False)


In [None]:
top_3_highest = sorted_df.tail(3)
print("Top 3 Highest Separations:")
print(top_3_highest)

top_3_lowest = sorted_df.head(3)
print("\nTop 3 Lowest Separations:")
print(top_3_lowest)


In [None]:
from utils.plot import plot_separation_value_histogram

plot_separation_value_histogram(sorted_df)


# Drug Proximity

In [None]:
dt = pd.read_csv("data/drug_target.csv")
cleaned_dt = dt[dt.organism == "Humans"][["Gene_Target", "Name"]].drop_duplicates()


In [None]:
cleaned_dt[cleaned_dt.Gene_Target.isin(ra[2])][["Name"]].drop_duplicates()


In [None]:
from utils.plot import plot_proximity

target = cleaned_dt[cleaned_dt.Name == "Alvocidib"].Gene_Target.unique()
print("Targets: ", target)
plot_proximity(gppi, set(ra[2]), set(target), 1000)


In [None]:
target = cleaned_dt[cleaned_dt.Name == "Diclofenac"].Gene_Target.unique()
print("Targets: ", target)
plot_proximity(gppi, set(ra[2]), set(target), 1000)


In [None]:
target = cleaned_dt[cleaned_dt.Name == "Ibuprofen"].Gene_Target.unique()
print("Targets: ", target)
plot_proximity(gppi, set(ra[2]), set(target), 1000)
