In [1]:
import plotly
import plotly.graph_objs as go

plotly.offline.init_notebook_mode(connected=True)

plotly.offline.iplot({
    "data": [go.Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1])],
    "layout": go.Layout(title="hello world")
})

In [3]:
import igraph as ig
import numpy as np

In [15]:
src = np.random.randint(10, size=10)
dst = np.random.randint(10, size=10)
edges = list(zip(src, dst))
edges

[(8, 2),
 (4, 7),
 (8, 8),
 (5, 9),
 (3, 3),
 (7, 7),
 (4, 1),
 (3, 4),
 (4, 8),
 (8, 9)]

In [17]:
G=ig.Graph(edges, directed=False)
layt=G.layout('kk', dim=3) 

In [27]:
N = 10
Xn=layt[:,0]
Yn=[layt[k][1] for k in range(N)]# y-coordinates
Zn=[layt[k][2] for k in range(N)]# z-coordinates
Xe=[]
Ye=[]
Ze=[]
for e in edges:
    Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends
    Ye+=[layt[e[0]][1],layt[e[1]][1], None]  
    Ze+=[layt[e[0]][2],layt[e[1]][2], None] 

TypeError: list indices must be integers or slices, not tuple

In [28]:
import plotly.plotly as py
import plotly.graph_objs as go

trace1=go.Scatter3d(x=Xe,
               y=Ye,
               z=Ze,
               mode='lines',
               line=dict(color='rgb(125,125,125)', width=1),
               hoverinfo='none'
               )

trace2=go.Scatter3d(x=Xn,
               y=Yn,
               z=Zn,
               mode='markers',
               name='actors',
               marker=dict(symbol='circle',
                             size=6,
                             #color=group,
                             colorscale='Viridis',
                             line=dict(color='rgb(50,50,50)', width=0.5)
                             ),
               #text=labels,
               hoverinfo='text'
               )

axis=dict(showbackground=False,
          showline=False,
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title=''
          )

layout = go.Layout(
         title="Network of coappearances of characters in Victor Hugo's novel<br> Les Miserables (3D visualization)",
         width=1000,
         height=1000,
         showlegend=False,
         scene=dict(
             xaxis=dict(axis),
             yaxis=dict(axis),
             zaxis=dict(axis),
        ),
     margin=dict(
        t=100
    ),
    hovermode='closest',
        )

data=[trace1, trace2]
fig=go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig, filename='Les-Miserables')

In [630]:
from collections import UserDict
from itertools import islice
import plotly.plotly as py
import plotly.graph_objs as go
import networkx as nx

class UniqueDict(UserDict):
        
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def add(self, x):
        try:
            return self.data[x]
        except KeyError:
            n = len(self.data)
            self.data[x] = n
            return n
        
    def to_list(self):
        return sorted(self, key=self.get)
    

class RecNet():
    
    def __init__(self, seq, y_proj=None):
        edges = []
        nodes = []
        tokens = []
        token_edges = []
        first_occ = {}
        unique_tokens = UniqueDict()
        seq = map(frozenset, filter(bool, seq))
        ylast = 0
        ypos = []
        for t, tok in enumerate(seq):
            y = None
            tok_id = unique_tokens.get(tok, None)
            if not tok_id:
                tok_id = unique_tokens.add(tok)
                first_occ[tok_id] = t
                
            matched = set()
            ysum = 0
            ywsum = 0
            for d, tok_prev in enumerate(reversed(tokens)):
                recent_common = (tok - matched) & tok_prev
                common = tok & tok_prev
                if not recent_common:
                    continue
                
                w = len(common)
                ywsum += w
                ysum += w * ypos[t-d-1]
                
                edges.append((t-d-1, t, recent_common))
                matched.update(recent_common)
                
                if len(tok) == len(matched):
                    pass#break
            
            if ysum:
                y = ysum / ywsum + len(tok - matched) + np.random.random() 
            else:
                y = ylast + len(tok - matched) * (1+np.random.random())
                ylast = y
            
            tokens.append(tok)
            nodes.append(tok_id)
            ypos.append(y)
        
        self.nodes = np.array(nodes)
        self.tokens = tokens
        self.unique_tokens = np.array(unique_tokens.to_list())
        self.first_occ = np.array([first_occ[t] for t in range(len(self.unique_tokens))])
        self.weights = [w for _,_,w in edges]
        self.edges = np.array([(t0,t1) for t0,t1,_ in edges])
        self.edges_token_ids = self.nodes[self.edges]

        self.g = nx.DiGraph()
        self.g.add_nodes_from(np.arange(len(self.nodes)))
        self.g.add_edges_from(self.edges)
                
        self.ig = ig.Graph([(t0,t1) for t0,t1,_ in edges], directed=True)
            
        self.ypos = np.array(ypos)
        
    def get_y_proj(self, y_proj=None):
        if y_proj == 'random':
            return np.random.random([len(self.unique_tokens)])
        elif y_proj == 'node_id':
            return self.nodes / len(self.unique_tokens)
        elif y_proj == 'kk':
            #pos_dict = nx.kamada_kawai_layout(self.g, dim=1)
            #return np.array([pos_dict[n][0] for n in self.nodes])
            return np.array(self.ig.layout('kk', dim=2))
        elif y_proj == 'spectral':
            pos_dict = nx.spectral_layout(self.g, dim=1)
            return np.array([pos_dict[n][0] for n in self.nodes])
        elif y_proj == 'prob':
            # TODO place nodes according to their frequency
        else:
            return self.ypos
        
    def summary(self, verbose_level=1):
        fs_fmt = lambda fs: "{" + ','.join(fs) + "}"
        list_fmt = lambda xs: "[" + ', '.join(xs) + "]"
        if verbose_level > 1:
            print("cascade identifiers:", list_fmt(fs_fmt(t) for t in self.unique_tokens))
        print(len(self.unique_tokens), "cascade identifiers")
        print(len(self.nodes), "nodes")
        if verbose_level > 1:
            print("edges:", list_fmt(f"{s}-{d} {fs_fmt(w)}" for (s, d), w in zip(self.edges, self.weights)))
        print(len(self.edges), "edges")
        
    def simplify(self):
        def new_seq():
            nodes_ids = np.argsort(self.first_occ)
            yield from (self.tokens[self.nodes[i]] for i in nodes_ids)
        
        return RecNet(t for t in new_seq())
        
    def plot2(self):
        layt = np.array(self.ig.layout('fr3d'))
        Xn = layt[:,0]
        Yn = layt[:,1]
        Zn = layt[:,2]
        E = layt[self.edges]
        #Xe = [[e0, e1, None] for e0, e1 in E[:,:,0]]
        Xe = E[:,:,0].flatten()
        Ye = E[:,:,1].flatten()
        Ze = E[:,:,2].flatten()
        #Ye = [[e0, e1, None] for e0, e1 in E[:,:,1]]
        #Ze = [[e0, e1, None] for e0, e1 in E[:,:,2]]
        print(layt[:5])
        print(Xe[:5])

        
        Fn = []
        for node, adjacencies in enumerate(self.g.adjacency()):
            t, adj = adjacencies
            #otdeg = len(adj) #sum(+1 if a < t else -1 for a in adj)
            f = np.array(list(adj))
            f[f < t] = +1
            f[f > t] = -1
            f = np.sum(f)
            Fn.append(f)
        Fn = np.array(Fn)
        
        labels = [";".join(sorted(t)) for t in self.tokens]
        
        edge_trace=go.Scatter3d(x=Xe,
               y=Ye,
               z=Ze,
               mode='lines',
               line=dict(color='rgb(125,125,125)', width=1),
               #hoverinfo='none'
               )

        node_trace=go.Scatter3d(x=Xn,
                       y=Yn,
                       z=Zn,
                       mode='markers',
                       #name='actors',
                       marker=dict(symbol='circle',
                                     size=4,
                                     color=Fn,
                                     #color='rgb(50,50,50,0.5)',
                                     colorscale='Viridis',
                                     #line=dict(color='rgb(50,50,50)', width=0.5)
                                     colorbar=dict(
                                            thickness=15,
                                            title='Deg(in) - Deg(out)',
                                            xanchor='left',
                                            titleside='right'
                                        ),
                                     ),
                       text=labels,
                       hoverinfo='text'
                       )
        
        axis=dict(showbackground=False,
                  showline=False,
                  zeroline=False,
                  showgrid=False,
                  showticklabels=False,
                  title=''
                  )

        layout = go.Layout(
            title="Recurrence Network",
            width=1000,
            height=1000,
            showlegend=False,
            scene=dict(
                xaxis=dict(axis),
                yaxis=dict(axis),
                zaxis=dict(axis),
            ),
            margin=dict(
                t=100
            ),
            hovermode='closest',
        )
        
        fig = go.Figure(data=[edge_trace,node_trace], layout=layout)
        
        plotly.offline.iplot(fig)
        
    def plot(self, layout=None, show_labels=False, timeslice=None):
        
        Yn = self.get_y_proj(layout)
        
        
        edge_trace = go.Scattergl(
            x=[],
            y=[],
            #text=[],
            line=dict(width=0.5,color='#888'),
            #hoverinfo='text',
            #textposition='bottom center',
            mode='lines' + ('+text' if show_labels else ''),
        )

        """
        nodes = zip(self.tokens, self.nodes)
        edges = zip(self.edges, self.edges_token_ids, self.weights)
        if timeslice:
            edges = islice(edges, *timeslice)
            markers = islice(markers, timeslice)
        """
        
        edge_trace['x'] = self.edges.flatten()
        edge_trace['y'] = Yn[self.edges_token_ids].flatten()
        #edge_trace['text'] = [";".join(sorted(w)) for w in self.weights]

        node_trace = go.Scattergl(
            x=[],
            y=[],
            text=[],
            mode='markers' + ('+text' if show_labels else ''),
            hoverinfo='text',
            textposition='top center',
        
            marker=dict(
                    showscale=True,
                    # colorscale options
                    #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
                    #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
                    #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
                    colorscale='YlGnBu',
                    reversescale=True,
                    color=[],
                    size=10,
                    colorbar=dict(
                        thickness=15,
                        title='Node Connections',
                        xanchor='left',
                        titleside='right'
                    ),
                    line=dict(width=2)),
        )

        node_trace['x'] = np.arange(len(self.nodes))
        node_trace['y'] = Yn[self.nodes]
        node_trace['text'] = [";".join(sorted(t)) for t in self.tokens]

        layout = dict(
            title='Recurrence network',
            xaxis=dict(
                rangeselector=dict(
                    buttons=[
                        dict(step='all')
                    ]
                ),
                rangeslider=dict(
                    visible = True,
                ),
                type='linear',
                range=[0,50],
            ),
        )
        
        for node, adjacencies in enumerate(self.g.adjacency()):
            t, adj = adjacencies
            outdeg = len(adj) #sum(+1 if a < t else -1 for a in adj)
            f = np.array(adj)
            f[f < t] = +1
            f[f > t] = -1
            f = np.sum(f)
            
            #print(t, adj, degdiff)
            node_trace['marker']['color']+=(f,)
            #node_info = '# of connections: '+str(len(adjacencies[1]))
            #node_trace['text']+=tuple([node_info])
        
        fig = go.Figure(data=[edge_trace,node_trace], layout=layout)
        
        plotly.offline.iplot(fig)#, filename='basic-line')

        
arr_eq = np.array_equal#lambda xs, ys: all(x == y for x, y in zip(xs, ys))
        
testnet = RecNet(["AB","BC","CD","ABC","BC","BD"])
assert arr_eq(testnet.nodes,     [0,1,2,3,1,4]), testnet.nodes
assert arr_eq(testnet.first_occ, [0,1,2,3,5]), testnet.first_occ
assert arr_eq(testnet.edges,                np.array([[0,1],[1,2],[2,3],[1,3],[0,3],[3,4],[4,5],[2,5]])), testnet.edges
assert arr_eq(testnet.weights, [frozenset(w) for w in [ "B",  "C",  "C",  "B",  "A", "BC",  "B", "D"]]), testnet.weights
assert arr_eq(testnet.edges_token_ids,      np.array([[0,1],[1,2],[2,3],[1,3],[0,3],[3,1],[1,4],[2,4]])), testnet.edges_token_ids

#testnet = testnet.simplify()
#assert arr_eq(testnet.nodes, [0,1,2,3,4]), testnet.nodes
#assert arr_eq(testnet.edges,                np.array([[0,1],[1,2],[2,3],[1,3],[0,3],[2,4]])), testnet.edges
#assert arr_eq(testnet.weights, [frozenset(w) for w in [ "B",  "C",  "C",  "B",  "A", "D"]]), testnet.weights
#assert arr_eq(testnet.edges_token_ids, testnet.edges), testnet.edges_token_ids


In [631]:
import random
def sample_token_seq(n):
    A = "ABCDE"
    r = np.random.random([n,2])
    m = len(A)
    for i in range(n):
        toks = []
        for j, a in enumerate(A):
            if r[i,0] < 1-j/m:
                toks.append(random.choice(A))
            if r[i,1] < j/m:
                continue
            
        yield toks

net = RecNet(sample_token_seq(100)).simplify()
net.summary()

net.plot2()#show_labels=True)

14 cascade identifiers
31 nodes
48 edges
[[ 85.85531458   9.52120286  17.84103494]
 [ 83.24559093  11.83041307  26.38620556]
 [ 79.17626158   1.4498615   18.85889316]
 [ 76.65472547   3.26541028  28.35243182]
 [ 66.64245362 -10.06638745  22.15711306]]
[85.85531458 83.24559093 83.24559093 79.17626158 85.85531458]


In [498]:
from collections import Counter

def load_tokens(filename, sep=';'):
    with open(filename, 'r') as fr:
        for line in fr.readlines():
            toks = [t.strip() for t in line.split(sep)]
            if all(toks):
                yield toks
                
toks = list(load_tokens('data\\sens_and_sens_rough_clean.csv'))#, 300)


In [633]:
net = RecNet(toks)
net.summary()
net.plot2()

513 cascade identifiers
3746 nodes
7698 edges
[[-23062.58625133 -11590.60506046  -1906.35069365]
 [-25966.94205681 -12548.98797585  -1964.50324268]
 [-27898.51527867 -13063.74759475  -1964.63824884]
 [-29249.39171845 -13428.75761987  -2116.24006248]
 [-29126.92885628 -13828.90080844  -1998.31319177]]
[-23062.58625133 -25966.94205681 -25966.94205681 -27898.51527867
 -27898.51527867]
