In [None]:
 %config Completer.use_jedi = False 
import networkx as nx
import community as c
import pandas as pd
import matplotlib.pyplot as plt    
import os as os
import numpy as np
import random
import seaborn as sns
import time
import operator
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly
from IPython import display

random.seed(246)

In [4]:
df2019q3 = pd.read_csv("2019Q3_Filtered.csv")

In [6]:
investors=pd.read_csv("investors.csv")

In [7]:
df2019q3[df2019q3.name.isin(investors["investor"])]

Unnamed: 0.1,Unnamed: 0,name,cik,state,issuers,cussip,shares,shares_type
0,5263,Neumann Advisory Hong Kong Ltd,1727862,K3,Sinovac,P8696W104,315586.0,SH
1,25953,RIVERBRIDGE PARTNERS LLC,1112325,MN,Roche,767744105,4076884.0,SH
2,26154,"SCOUT INVESTMENTS, INC.",1161927,MO,Novartis,654802206,106731.0,SH
3,26190,"SCOUT INVESTMENTS, INC.",1161927,MO,Roche,770323103,194208.0,SH
4,149471,First American Bank,1291422,IL,Glaxosmithkline,375558103,69833.0,SH
...,...,...,...,...,...,...,...,...
980,1850436,PRICE T ROWE ASSOCIATES INC /MD/,80255,MD,Novartis,65158N102,65203.0,SH
981,1869015,CALIFORNIA PUBLIC EMPLOYEES RETIREMENT SYSTEM,919079,CA,Merck,58471A105,69183.0,SH
982,1869343,CALIFORNIA PUBLIC EMPLOYEES RETIREMENT SYSTEM,919079,CA,Roche,695263103,276274.0,SH
983,1869399,CALIFORNIA PUBLIC EMPLOYEES RETIREMENT SYSTEM,919079,CA,Pfizer,712706209,8372.0,SH


In [8]:
c=df2019q3.groupby(['issuers','name'])['shares'].sum().rename('count')

In [9]:
p=c/c.groupby(level=0).sum()
p.df = p.reset_index()
onepercent = p.df[p.df["count"]>.01]
c=df2019q3.groupby(['issuers','name'])['shares'].sum().rename('count')

In [10]:
onep_list = onepercent[["name","issuers","count"]]

In [11]:
G = nx.Graph(seed=15)

In [12]:
for i in range(len(onep_list)):
  e1=onep_list.iloc[i, 0]
  e2=onep_list.iloc[i, 1]
  e3=onep_list.iloc[i, 2]
  G.add_nodes_from([e1], bipartite=0)
  G.add_nodes_from([e2], bipartite=1)
  G.add_edges_from([(e1, e2)])

In [13]:
# Create pos: dictionary keyed by node with node positions as values.

pos = nx.spring_layout(G, k=0.1, seed=2)

for n, p in pos.items():
    G.nodes[n]['pos'] = p

In [14]:
val_map = {'Pfizer':2.0,
           'Astrazeneca': 2.0,
           'Moderna': 2.0,
           'Sinovac': 3.0,
           'Curevac': 1.0,
           'Merck': 1.0,
           'Themis': 1.0,
           'Glaxosmithkline': 1.0,
           'Novartis': 1.0,
           'Roche': 1.0,
           'Sanofi': 1.0,
           'Bayer':1.0          
          }

values = [val_map.get(node, 0.25) for node in G.nodes()]

In [15]:
# Add edges as disconnected lines in a single trace and nodes as a scatter trace.

edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.4, color='#888'),
    hoverinfo='text',
    mode='lines')


node_x = []
node_y = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)
    


node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text', 
    marker=dict(
        showscale=False,
        reversescale=False,
        color=values,
        size=10,
        line_width=2))

In [None]:
# Color Node Points by the number of connections.
node_text = []
weights = nx.get_edge_attributes(G,'count').values()
weights2 =list(weights)
weights2 = [i * 15 for i in weights2]


for node, adjacencies in enumerate(G.adjacency()):
    node_text.append(adjacencies[0] + ', # of connections: '+str(len(adjacencies[1])))


node_trace.marker.color = values
node_trace.text = node_text


# Node size by betweenness centrality
betCent = nx.betweenness_centrality(G, normalized=True, endpoints=True)
node_size =  [v * 80 for v in betCent.values()]

node_trace.marker.size=node_size

In [None]:
# Color Node Points by the number of connections.
node_text = []

for node, adjacencies in enumerate(G.adjacency()):
    node_text.append(adjacencies[0] + ', # of connections: '+str(len(adjacencies[1])))


node_trace.marker.color = values
node_trace.text = node_text


# Node size by betweenness centrality

betCent = nx.betweenness_centrality(G, normalized=True, endpoints=True)
node_size =  [v * 80 for v in betCent.values()]

node_trace.marker.size=node_size

In [None]:
# Create Network Graph

fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>2019 Q3',
                titlefont_size=12,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text=' ',
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)),

                )

fig.update_layout(width=1000, height=600, plot_bgcolor='rgb(255, 255, 255)')
fig.show()

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html

app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig)
    
])

app.run_server(debug=True, use_reloader=False)