In [None]:
import sqlite3
import networkx as nx
from pyvis.network import Network

from utils import notebook_util, sql_query_util

notebook_util.disp_notebook_full_width()

BLACK="#000000"
WHITE="#FFFFFF"
RED="#FF0000"
GREEN="#00FF00"
BLUE="#0000FF"
YELLOW="#FFFF00"
CYAN="#00FFFF"
MAGENTA="#FF00FF"
PURPLE="#A020F0"

In [None]:
sql_db = sql_query_util.SimplifiedSqlDb()

In [None]:
for table in ["clusters", "prot2protwindow"]:
    print(f"TABLE: {table}\n")
    cur = sql_db.conn.cursor()
    cur.execute(f"SELECT * FROM {table} LIMIT 1;")
    for row in cur.fetchall():
        for key in row.keys():
            print(f"{key}: {row[key]}")
        print("\n")
    cur.close()
    
    print("******\n\n")

In [None]:
# Interesting combo (high scoring)
# bait_hash = "693388b375dbd274e1"
# tgt_hash = "1b58be4f65b975319f"

bait_hash = "693388b375dbd274e1"
print("BAIT", bait_hash)
tgt_hashes = sql_db.get_p100_windowed_neighbors(bait_hash)
print("TARGETS")
display(tgt_hashes)

tgt_hash = tgt_hashes[0]

In [None]:
# recommended combo from Jacob
bait_hash = "74fd0ac8957faee53e"  # tnpB
tgt_hash = "ba436543bec6c30920"  # rando neighbor with high icity/abundance

In [None]:
# Graph connections between target cluster (cyan) and bait cluster (magenta)
G = nx.Graph()
# add target node
G.add_node(tgt_hash, type='p100', size=10, color=CYAN)
# add bait node
G.add_node(bait_hash, type='p100', size=10, color=MAGENTA)


cluster_to_p100s = {
    'target': set(),
    'bait': set()
}


# add target cluster, bait cluster
for node, p100_color, label in ((tgt_hash, BLUE, 'target'), (bait_hash, PURPLE, 'bait')):
    cluster_row = sql_db.get_p30_cluster_for_p100(node)
    p90 = cluster_row['p90'] + "_p90"
    p30 = cluster_row['p30'] + "_p30"
    G.add_node(p90, size=25, color=YELLOW)
    G.add_edge(p90, node, type="p90_cluster")
    G.add_node(p30, size=50, color=RED)
    G.add_edge(p30, p90, type="p30_cluster")
    
    # add other p90s from cluster
    for cluster_row in sql_db.get_p30_cluster_members(cluster_row['p30']):
        print(f"found p100 for {node}")
        p100 = cluster_row['p100']
        p90 = cluster_row['p90'] + "_p90"
        p30 = cluster_row['p30'] + "_p30"
        G.add_node(p100, size=10, color=p100_color)
        G.add_node(p90, size=25, color=YELLOW)
        G.add_edge(p90, p100, type="p90_cluster")
        G.add_node(p30, size=50, color=RED)
        G.add_edge(p30, p90, type="p30_cluster")
        cluster_to_p100s[label].add(p100)

# add edges
for tgt_member in cluster_to_p100s['target']:
    tgt_member_neighbors = sql_db.get_p100_windowed_neighbors(tgt_member)
    for neighbor in tgt_member_neighbors:
        if neighbor in cluster_to_p100s['bait']:
            print("found connection")
            G.add_edge(tgt_member, neighbor)


# add back bait and target in with special coloring
# add target node
G.add_node(tgt_hash, type='p100', size=10, color=CYAN)
# add bait node
G.add_node(bait_hash, type='p100', size=10, color=MAGENTA)



In [None]:
net = Network(notebook=True, height="1000px", width="1000px")
net.from_nx(G)
net.show('./pyvis_test.html')

In [None]:
def compute_icity(tgt_hash, G):
    tgt_p30_cluster = sql_db.get_p30_cluster_for_p100(tgt_hash)['p30'] + '_p30'
    print("Target cluster:", tgt_p30_cluster)

    tgt_p100s = list(nx.descendants_at_distance(G, tgt_p30_cluster, 2))
    print("Total p100s:", len(tgt_p100s))

    p100_to_icity = {}
    for p100 in tgt_p100s:
        p100_to_icity[p100] = any([not n.endswith('_p90') for n in G.neighbors(p100)])

    num_p100_positive = sum(p100_to_icity.values())
    print("p100s positive icity:", num_p100_positive)


    tgt_p90s = list(G.neighbors(tgt_p30_cluster))
    print("Total p90s:", len(tgt_p90s))

    p90s_any_icity = 0
    p90s_majority_icity = 0

    for p90_cluster in tgt_p90s:
        p100_neighbors = [n for n in G.neighbors(p90_cluster) if n != tgt_p30_cluster]
        p100_positive = [n for n in p100_neighbors if p100_to_icity[n]]

        if len(p100_positive) > 0:
            p90s_any_icity += 1

        if (len(p100_positive) / len(p100_neighbors)) > 0.5:
            p90s_majority_icity += 1

    print("p90s with any p100:", p90s_any_icity)

    print("p90s with majority icity:", p90s_majority_icity)

    print("icity p100:", num_p100_positive / len(tgt_p100s) )

    print("icity p90 any:", p90s_any_icity / len(tgt_p90s))
    print("icity p90 majority:", p90s_majority_icity / len(tgt_p90s) )


print("#### From Target #####")
compute_icity(tgt_hash, G)
print("\n\n\n\n")
print("#### From Bait #####")
compute_icity(bait_hash, G)




In [None]:
tables = sql_db.get_tables()
list(sorted(tables))

In [None]:
%%time
sp = dict(nx.all_pairs_shortest_path(G))