In [1]:
import numpy as np
import pandas as pd

## Build the Transition Matrix of Probabilities is given
### - on rows we have old states
### - on columns we have new state

In [2]:
edges = pd.read_csv(f'edges.20211117_v3.csv',
                    low_memory=False, 
                    encoding='utf-8',
                    float_precision='round_trip')

In [3]:
edges.shape

(10015, 9)

In [4]:
edges.head()

Unnamed: 0,HRTB Id,Event Effective Date,seq_id,position_from,position_to,upd,position_from_upd,position_to_upd,transition_days
0,423,2007-07-10,1.0,game designer(artistique),game designer,1,game designer,game designer,0
1,423,2008-06-02,2.0,game designer,camera specialist,0,game designer,camera specialist,0
2,423,2008-12-19,3.0,camera specialist,game designer,0,camera specialist,game designer,0
3,423,2011-02-08,4.0,game designer,audio designer,0,game designer,audio designer,0
4,464,2010-09-27,1.0,network administrator(services professionnels),senior applications admin,1,network administrator,senior applications admin,0


In [5]:
edges_agg = edges.groupby(['position_from_upd','position_to_upd'])['HRTB Id'].count().to_frame().reset_index()

In [6]:
edges_agg

Unnamed: 0,position_from_upd,position_to_upd,HRTB Id
0,2d compositor,2d compositor,5
1,2d compositor,2d-3d artist,1
2,2d compositor,graphic designer,1
3,2d compositor,production coordinator,1
4,2d compositor,project manager ( qc)(design),1
...,...,...,...
3651,webmestre,conseiller - com internes,1
3652,world director,creative director,1
3653,worldwide music director,"svp, music special advisor to the president",1
3654,wrangler,3d animator,1


In [7]:
allstates = set(edges_agg['position_from_upd']).union(set(edges_agg['position_to_upd']))
n = len(allstates)
print("n=",n)

n= 1256


In [8]:
# for i,source in enumerate(allstates):
#     print(i,source)

In [9]:
# tmp = [[0 for i in range(n)] for j in range(n)]

In [10]:
# tmp[0][0]

0

In [11]:
tmp = [[0 for i in range(n)] for j in range(n)]
for i,source in enumerate(allstates):
    for j,dest in enumerate(allstates):
        df = edges_agg[(edges_agg['position_from_upd'] == source) & (edges_agg['position_to_upd'] == dest)]['HRTB Id']
        if df.shape[0] != 0:
            tmp[i][j] = df.iloc[0]

In [13]:
p = [[0 for i in range(n)] for j in range(n)]
for i in range(n):
    i_sum = 0
    for j in range(n):
        i_sum = i_sum + tmp[i][j]
    if i_sum > 0:
        for j in range(n):
            p[i][j] = tmp[i][j]/i_sum
    else:
        p[i][i] = 1

In [14]:
# The statespace
# states = ["Developer","TeamLead","Assistant","Director"]
states = list(allstates)

In [15]:
if sum(p[0])+sum(p[1])+sum(p[2]) != 3:
    print("Somewhere, something went wrong. Transition matrix, perhaps?")
else: print("All is gonna be okay, you should move on!! ;)")

All is gonna be okay, you should move on!! ;)


## Build the graph with:
### 1 - vertex = states
### 2- edges weight = -log(p(i,j)

In [16]:
import networkx as nx  # For the magic
import matplotlib.pyplot as plt  # For plotting

In [17]:
G = nx.MultiDiGraph()
labels={}
edge_labels={}

for i, origin_state in enumerate(states):
    for j, destination_state in enumerate(states):
        rate = -np.log(0.01+p[i][j])
        if rate > 0:
            G.add_edge(origin_state,
                       destination_state,
                       weight=rate,
                       label="{:.02f}".format(rate))
            edge_labels[(origin_state, destination_state)] = label="{:.02f}".format(rate)

In [18]:
# G.nodes()

In [19]:
# G.edges()

In [20]:
pos=nx.spring_layout(G)
pos

{'character modeler': array([-0.03034969, -0.20620903]),
 'it coordinator(services professionnels)': array([-0.64583492, -0.4490031 ]),
 'spécialist utilisabilité (que)(services professionnels)': array([-0.08115973, -0.2004962 ]),
 'erg & partnership coordinator': array([ 0.27436176, -0.60543668]),
 'tgo services  director': array([0.56576514, 0.49382001]),
 'analyste controle de gestion': array([0.20310867, 0.74799132]),
 'assistant artistic director - graphic': array([0.5026505 , 0.51197582]),
 'payroll manager': array([-0.36739352,  0.62916696]),
 'executive assistant - ceo': array([0.18376943, 0.75314707]),
 'assistante administrative': array([-0.33305421,  0.73112267]),
 'level design team lead': array([-0.26527518,  0.02389636]),
 'virtualization administrator': array([ 0.59476882, -0.12989363]),
 'project lead - game design': array([-0.37358734, -0.67150635]),
 'project lead - game design(informatique)': array([ 0.06276168, -0.86013049]),
 'analyste antipiratage': array([0.56763

In [None]:
# plt.figure(figsize=(14,7))
# node_size = 200
# pos = {state:list(state) for state in states}
# pos = {state:list(state) for state in [(i[0],j[0]) for i in enumerate(states) for j in enumerate(states)]}
# pos = nx.spring_layout(G)
# # pos = nx.spectral_layout(G, weight='weight',scale=0.5)
# nx.draw_networkx_edges(G, pos,width=1.0,alpha=0.5)
# nx.draw_networkx_labels(G, pos, font_weight=2)
# nx.draw_networkx_edge_labels(G, pos, edge_labels)

# plt.axis('off');

In [22]:
# plt.figure(figsize=(14,7))
# pos = nx.spring_layout(G)
# nx.draw(G, pos, with_labels=True, connectionstyle='arc3, rad = 0.1')
# edge_labels=dict([((u,v,),d['weight'])
#              for u,v,d in G.edges(data=True)])

# nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, label_pos=0.3, font_size=7)
# plt.show()

## Calculate the shortest (ie. most probable path) between two states

In [25]:
state_from = '3d animator'
state_to = 'production coordinator'

In [26]:
nx.shortest_path(G, source=state_from, target=state_to, weight='weight', method='dijkstra')

['3d animator', 'production coordinator']