In [1]:
import sys
base = "./../../../"
sys.path.append(base)

import pandas
from pathlib import Path
import json
import pairs_flat_v2 as pairs
import helper
from sklearn.preprocessing import QuantileTransformer
from sklearn.decomposition import PCA as sklearnPCA
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import numpy as np
from tqdm import tqdm_notebook as tqdm
from pandas import Series
from plotly import tools

init_notebook_mode(connected=True)

import networkx as nx

tqdm.monitor_interval = 0
init_notebook_mode(connected=True)


In [2]:
# Load matrix
oscope_gencounts = pandas.read_csv(Path(base + "data/GSE64016_H1andFUCCI_normalized_EC_human.csv"))

# Set index right
oscope_gencounts.set_index("Unnamed: 0", inplace=True)

# Subset sorted
oscope_gencounts_sorted = oscope_gencounts.iloc[:, [oscope_gencounts.columns.get_loc(c) for c in oscope_gencounts.columns if "G1_" in c or "G2_" in c or "S_" in c]]

oscope_gencounts_sorted.head(10)

Unnamed: 0_level_0,G2_Exp1.059,G2_Exp1.069,G2_Exp1.075,G2_Exp1.063,G2_Exp1.029,G2_Exp1.076,G2_Exp1.013,G2_Exp1.037,G2_Exp1.057,G2_Exp1.018,...,G1_Exp1.008,G1_Exp1.055,G1_Exp1.050,G1_Exp1.076,G1_Exp1.011,G1_Exp1.063,G1_Exp1.083,G1_Exp1.030,G1_Exp1.018,G1_Exp1.046
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MKL2,26.258941,7.349801,0.590667,9.526098,2.653238,132.28829,26.87289,0.0,21.962493,577.5893,...,3.887628,84.337868,69.192927,1.126491,1.13733,0.0,36.741767,11.218839,152.79286,123.041274
CD109,2.234584,6.094681,76.627285,1.011232,2.012671,10.856459,54.251989,164.865603,1.601655,6.700829,...,4.956726,7.208501,4.299846,7.626347,5.936864,2.797575,149.063512,2.80471,15.996667,7.077119
ABTB1,0.514881,0.0,0.0,1.831942,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MAST2,0.514881,0.565369,67.336087,0.0,23.879144,0.0,63.744994,0.0,0.0,118.84839,...,0.971907,15.069427,0.0,60.830538,22.746604,0.0,0.720427,0.0,5.589983,33.230125
KAT5,3.980032,0.576677,0.0,338.473236,0.0,0.0,61.407678,2.792845,285.432055,0.0,...,0.0,96.041736,0.0,0.0,0.0,0.0,1.03021,55.09151,4.826018,0.0
WWC2,90.104208,642.694883,484.164183,74.743227,362.341379,341.330246,692.058151,253.949365,327.808953,686.49512,...,169.11182,233.122597,656.509085,412.712674,1075.186455,661.889223,104.641994,322.527589,47.514853,67.124853
CD163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MYL2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
UBE2Z,372.264249,140.211592,12.994683,12.090816,58.750276,94.365647,309.350706,130.06676,140.345685,20.347584,...,138.010796,58.87738,538.955243,5.632457,71.651801,0.656708,2.881707,610.984967,0.0,3.215239
RGPD4,0.0,2.481971,0.0,0.0,23.105915,11.103397,0.0,0.0,1.237399,0.0,...,0.0,0.0,1.91928,3.559713,0.0,0.0,2.845686,0.0,17.505962,0.0


In [3]:
# Get annotation
is_G1 = [oscope_gencounts_sorted.columns.get_loc(c) for c in oscope_gencounts_sorted.columns if "G1_" in c]
is_S = [oscope_gencounts_sorted.columns.get_loc(c) for c in oscope_gencounts_sorted.columns if "S_" in c]
is_G2M = [oscope_gencounts_sorted.columns.get_loc(c) for c in oscope_gencounts_sorted.columns if "G2_" in c]

annotation = {
    "G1": list(is_G1),
    "S": list(is_S),
    "G2M": list(is_G2M)
}

go_0007049 = [line.replace("\n","").replace("\r","") for line in open(base + "data/go_0007049_homoSapiens.csv", "r")]
cycle_base = [line.split("\t")[0] for i, line in enumerate(open(base + "data/cyclebase_top1000_genes.tsv", "r")) if 0 < i]
cycle_genes = np.unique(np.concatenate((go_0007049, cycle_base),0))

oscope_marker_pairs = pairs.sandbag(x=oscope_gencounts_sorted, phases=annotation, subset_genes=list(cycle_genes), fraction=0.6, processes=0, verbose=True)

[__set_matrix] Original Matrix 'x' has shape 19084 x 247
[__set_matrix] Removed 16689 genes that were not in 'subset_genes'. 2395 genes remaining.
[__set_matrix] Removed 61 genes that were not expressed in any samples. 2334 genes remaining.
[__set_matrix] Removed 0 samples that were not annotated in 'phases'. 247 samples remaining.
[__set_matrix] Matrix truncation done. Working with 2334 genes for 247 samples.
[sandbag] Identifying marker pairs...Processing in parallel with 15 processes...
 Done!
[sandbag] Identified 8146 marker pairs (phase: count): {'G1': 2575, 'S': 4101, 'G2M': 1470}


In [4]:
import networkx as nx
from itertools import chain

genes = oscope_gencounts_sorted.index.tolist()

marker_edges = []
for marker in oscope_marker_pairs["G2M"]:
    marker_edges.append((genes.index(marker[0]), genes.index(marker[1])))


marker_edges

[(12772, 11412),
 (18544, 15948),
 (12094, 1509),
 (9334, 6274),
 (9334, 5846),
 (9334, 7262),
 (10848, 17807),
 (9849, 15364),
 (11680, 16129),
 (16334, 16129),
 (9849, 16129),
 (13025, 17307),
 (9006, 15913),
 (13025, 15913),
 (9006, 10400),
 (6886, 1817),
 (10848, 4169),
 (99, 4169),
 (8934, 5078),
 (12094, 5078),
 (6690, 5078),
 (6886, 8995),
 (12772, 15762),
 (9334, 15762),
 (9849, 15762),
 (18265, 14526),
 (9006, 5279),
 (9006, 9836),
 (9445, 9836),
 (6886, 14792),
 (11680, 9203),
 (5656, 9203),
 (9334, 16510),
 (9334, 10343),
 (4581, 1066),
 (2125, 12563),
 (9334, 2125),
 (2125, 146),
 (14416, 2799),
 (12784, 2799),
 (15385, 2799),
 (9849, 4296),
 (9334, 11884),
 (18442, 1393),
 (9323, 1393),
 (724, 1393),
 (3499, 1393),
 (8934, 1393),
 (43, 1393),
 (15335, 1393),
 (19077, 1393),
 (12094, 1393),
 (11680, 9875),
 (9849, 9875),
 (9334, 11770),
 (12951, 16621),
 (9849, 12951),
 (10848, 17157),
 (8496, 17157),
 (12784, 17157),
 (9334, 1850),
 (744, 2017),
 (5656, 744),
 (10806, 1441

In [5]:
net_G2M=nx.DiGraph()

net_G2M.add_edges_from(marker_edges)

In [6]:
pos=nx.spring_layout(net_G2M)

dmin=1
ncenter=0
for n in pos:
    x,y=pos[n]
    d=(x-0.5)**2+(y-0.5)**2
    if d<dmin:
        ncenter=n
        dmin=d

pos

{21: array([0.57157141, 0.66829002]),
 27: array([ 0.55748653, -0.33517507]),
 37: array([ 0.95298445, -0.0877588 ]),
 43: array([-0.21889921, -0.14050914]),
 79: array([0.34845108, 0.87522382]),
 80: array([-0.67305094, -0.53942961]),
 99: array([-0.01957745, -0.38529822]),
 114: array([0.17263716, 0.96216512]),
 146: array([ 0.41819739, -0.91429883]),
 225: array([-0.21189807, -0.1521308 ]),
 233: array([-0.9910568 ,  0.14714527]),
 286: array([0.61852103, 0.32874429]),
 296: array([-0.99280131, -0.1215369 ]),
 350: array([ 0.56316519, -0.83506203]),
 354: array([-0.74039328,  0.39923668]),
 377: array([0.88708955, 0.47405553]),
 390: array([ 0.74521345, -0.74729037]),
 405: array([-0.57283956,  0.22661772]),
 454: array([-0.0465013 , -0.85012573]),
 462: array([0.04487444, 0.86197358]),
 481: array([ 0.01214739, -0.98207194]),
 517: array([ 0.31482255, -0.12860844]),
 536: array([ 0.55011952, -0.86008584]),
 543: array([-0.37048647,  0.09333692]),
 544: array([-0.1172141, -0.9920302

In [7]:
p=nx.single_source_shortest_path_length(net_G2M,ncenter)

edge_trace = go.Scatter(
    x=[],
    y=[],
    line=go.Line(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in net_G2M.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += [x0, x1, None]
    edge_trace['y'] += [y0, y1, None]

node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=go.Marker(
        showscale=True,
        # colorscale options
        # 'Greys' | 'Greens' | 'Bluered' | 'Hot' | 'Picnic' | 'Portland' |
        # Jet' | 'RdBu' | 'Blackbody' | 'Earth' | 'Electric' | 'YIOrRd' | 'YIGnBu'
        colorscale='YIGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

for node in net_G2M.nodes():
    x, y = pos[node]
    node_trace['x'].append(x)
    node_trace['y'].append(y)
    
for node in net_G2M.nodes:
    node_trace['marker']['color'].append(net_G2M.degree(node))
    node_info = '# of connections: '+str(net_G2M.degree(node))
    node_trace['text'].append(node_info)

fig = go.Figure(data=go.Data([edge_trace, node_trace]),
             layout=go.Layout(
                title='Network of Marker pairs',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=go.XAxis(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=go.YAxis(showgrid=False, zeroline=False, showticklabels=False)))

iplot(fig, filename='networkx', image="svg")