In [None]:
import pandas as pd
import numpy as np
import json
import plotly.express as px
from pyvis.network import Network
import networkx as nx

import sqlite3


from utils import notebook_util
notebook_util.disp_notebook_full_width()

In [None]:
# from jacob; grabs AA sequence

con=sqlite3.connect("/GeneGraphDB/data/20220322_80kprotein_stats.db")

def get_prot_sequence(pid):
#     con=sqlite3.connect("/GeneGraphDB/data/20220322_80kprotein_stats.db")
    cur = con.cursor()
    cmd = "SELECT sequence FROM proteins WHERE pid = '%s'" % pid
    cur.execute(cmd)
    return str(cur.fetchone()[0])
    con.close()

In [None]:
dataframes = []

for json_file in [
    "/GeneGraphDB/data/20220308_neo4j_colocalization/cas1.json",
    "/GeneGraphDB/data/20220308_neo4j_colocalization/cas2.json",
    "/GeneGraphDB/data/20220308_neo4j_colocalization/tnpBs_in_testdb.p100.1e4.json"
]:
    with open(json_file) as f:
        data_dict = json.load(f)
        df = pd.DataFrame(data_dict.values())
        dataframes.append(df)

In [None]:
joint_df = pd.concat(dataframes).copy()

In [None]:
joint_df['weighted_score'] = joint_df['tgt_colocalization'] * np.power(joint_df['num_tgt_p100s'], 1/3)

In [None]:
filtered_df = joint_df.copy()

print(len(filtered_df), "rows total")
filtered_df = filtered_df[filtered_df['num_tgt_p100s'] >= 5]
print(len(filtered_df), "filtered for >5 tgt p100")
filtered_df = filtered_df[filtered_df['num_connections'] > 1]
print(len(filtered_df), "+ filtered for >1 shared edges")

filtered_df = filtered_df[filtered_df['tgt_colocalization'] > 0.1]
print(len(filtered_df), "+ filtered for >0.1 tgt colocalization")

filtered_df = filtered_df[filtered_df['bait_colocalization'] > 0.01]
print(len(filtered_df), "+ filtered for >0.01 bait colocalization")


filtered_df.sort_values('weighted_score', ascending=False, inplace=True)
filtered_df


In [None]:
for tgt_p30 in filtered_df['tgt_p30'][0:20]:
    print(tgt_p30)
    print()
    print(get_prot_sequence(tgt_p30))
    display(joint_df[(joint_df['tgt_p30'] == tgt_p30) | (joint_df['bait_p30'] == tgt_p30)])
    print("\n\n*********\n")

In [None]:
joint_df['bait_type'].unique()

In [None]:
BLACK = "#000000"
WHITE = "#FFFFFF"
RED = "#FF0000"
GREEN = "#00FF00"
BLUE = "#0000FF"
YELLOW = "#FFFF00"
CYAN = "#00FFFF"
MAGENTA = "#FF00FF"
PURPLE = "#A020F0"

TYPE_TO_COLOR = {
    'cas1': GREEN,
    'cas2': BLUE,
    'tnpB': RED,
    'unknown': MAGENTA,
}

G = nx.Graph()

def stylized_edge(G, src, dest, weight):
    if weight < 0.5:
        col = RED
        width = 1
    else:
        col = GREEN
        width = 5
    G.add_edge(src, dest, color=col, weight = width)

def num_p100s_to_size(num_p100s: int) -> int:
    
    disp_size = np.log(num_p100s)
    disp_size = int(np.round(disp_size))
    disp_size = max(1, disp_size)
    disp_size = disp_size * 3
    return disp_size

#     G.add_node(p30, size=50, color=RED)
#     G.add_node(p90, size=25, color=YELLOW)
#     G.add_edge(p30, p90, type="p30_clustering")

for tgt_p30 in filtered_df['tgt_p30'][0:200]:
    tgt_related_df = joint_df[(joint_df['tgt_p30'] == tgt_p30) | (joint_df['bait_p30'] == tgt_p30)]
    display(tgt_related_df)
    
    for row in tgt_related_df.to_dict('records'):
        tgt_p30 = row['tgt_p30']
        tgt_colocalization = row['tgt_colocalization']
        
        if tgt_p30 not in G:
            tgt_num_p100s = row['num_tgt_p100s']
            tgt_type_series = joint_df[joint_df['bait_p30'] == tgt_p30]['bait_type']
            tgt_type = tgt_type_series.unique()[0] if len(tgt_type_series) > 0 else "unknown"
            tgt_color = TYPE_TO_COLOR.get(tgt_type)
            G.add_node(tgt_p30, size=num_p100s_to_size(tgt_num_p100s), color=tgt_color)

        
        bait_p30 = row['bait_p30']
        # bait_colocalization = row['bait_colocalization']
        if bait_p30 not in G:
            bait_num_p100s = row['num_bait_p100s']
            bait_type = row['bait_type']
            bait_color = TYPE_TO_COLOR.get(bait_type)
            G.add_node(bait_p30, size=num_p100s_to_size(bait_num_p100s), color=bait_color)        
        

        
        stylized_edge(G, tgt_p30, bait_p30, tgt_colocalization)
        


In [None]:
%%time
net = Network(notebook=True, height="1000px", width="1000px")
net.from_nx(G)
net.show('./pyvis_test.html')