In [1]:
import glob, json
from pprint import pprint
from collections import Counter
import itertools

In [2]:
users = {}

for fname in glob.glob('jumpstart/*.txt'):
    with open(fname) as file:
        contents = json.load(file)
        results = contents.get('results')
        for result in results:
            uid = result.get('id')
            users[uid] = result

In [3]:
print(f"This dataset has {len(users.keys())} users.")

This dataset has 73154 users.


In [4]:
companies = Counter()

In [5]:
for user in users.values():
    experience = user.get('profile', {}).get('experience', [])
    for job in experience:
        org = job.get('organization', {}).get('name')
        companies[org] += 1

In [6]:
edges = []
nodes = set()

for user in users.values():
    experience = user.get('profile', {}).get('experience', [])
    orgs = []
    for job in experience:
        org = job.get('organization', {}).get('name')
        if companies[org] > 15:
            orgs.append(org)

    if len(orgs) > 1:
        for edge in list(itertools.combinations(orgs, 2)):
            edges.append(edge)
        nodes.update(orgs)

In [7]:
import plotly.graph_objects as go
import networkx as nx

In [8]:
G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)

In [9]:
graph_points = nx.spring_layout(G)

In [10]:
print(len(nodes), len(edges))

405 10952


In [11]:
edge_x = []
edge_y = []

for edge in G.edges():
    x0, y0 = graph_points.get(edge[0])
    x1, y1 = graph_points.get(edge[1])
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1.0, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
hover_text = []
for node in G.nodes():
    x, y = graph_points[node]
    node_x.append(x)
    node_y.append(y)
    text = node + '<br>'
    text += '<br>'.join(map(str, list(Counter([edge.remove(node) for edge in edges if node in edge]).most_common(5))))
    hover_text.append(text)

AttributeError: 'tuple' object has no attribute 'remove'

In [None]:
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hovertext=hover_text,
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

In [None]:
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append('# of connections: '+str(len(adjacencies[1])))

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

In [None]:
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='Recruitment Relationships among Companies',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Data from Jumpstart. ",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.show(renderer='browser')