## Libs & config

In [1]:
import pandas as pd
import numpy as np
import csv
import datetime
from datetime import date, timedelta
import json
from ast import literal_eval

In [2]:
import plotly.express as px

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('seaborn-white')

In [4]:
%config Completer.use_jedi = False

In [5]:
project = 'all'
degrees = 1
thr_minscore = 0
edges_as_of = '20210412'
emp_as_of = '202103'

## Read statistics

In [6]:
statistics_std = pd.read_csv(f'../../data/inter/statistics_perf_std_{project}_{edges_as_of}.csv', 
                        low_memory=False, 
                        float_precision='round_trip') \
                 .set_index('Office Email', verify_integrity = True)

In [7]:
statistics_std.shape

(2864, 102)

In [8]:
statistics_std.groupby('Installment Group2')['Installment Group2'].count().sort_values(ascending=False).head(10)

Installment Group2
Prod           1889
OtherProd       875
InterProjet     100
Name: Installment Group2, dtype: int64

## Read scores

In [9]:
scores = pd.read_csv(f'../../data/inter/scores.csv', 
                        low_memory=False, 
                        float_precision='round_trip') \
                 .set_index('Office Email', verify_integrity = True)

In [10]:
scores.shape

(2864, 1)

In [11]:
scores.sample()

Unnamed: 0_level_0,score
Office Email,Unnamed: 1_level_1
andre.roy2@ubisoft.com,0.454067


## Join statistics with score

In [12]:
statistics_std.join(scores).shape

(2864, 103)

In [13]:
final = statistics_std.join(scores)

## RAINBOW 6 SIEGE

In [14]:
rainb_std = final[final['cust_group'] == 'RAINBOW SIX SIEGE']
rainb_std.shape

(320, 103)

In [15]:
rank_pagerank_std = rainb_std['pagerank_centr_zscore'].rank(ascending = False).rename('rank_pagerank_std')
rank_pagerank = rainb_std['pagerank_centr'].rank(ascending = False).rename('rank_pagerank')

rank_indegree_std = rainb_std['indegree_w_zscore'].rank(ascending = False).rename('rank_indegree_std')
rank_indegree = rainb_std['indegree_w'].rank(ascending = False).rename('rank_indegree')

rank_closeness_std = rainb_std['closeness_centr_zscore'].rank(ascending = False).rename('rank_closeness_std')
rank_closeness = rainb_std['closeness_centr'].rank(ascending = False).rename('rank_closeness')

rank_between_std = rainb_std['between_centr_zscore'].rank(ascending = False).rename('rank_between_std')
rank_between = rainb_std['between_centr'].rank(ascending = False).rename('rank_between')

rank_constraint_std = rainb_std['constraint_centr_zscore'].rank(ascending = True).rename('rank_constraint_std')
rank_constraint = rainb_std['constraint_centr'].rank(ascending = True).rename('rank_constraint')

In [16]:
show_cols = ['HRTB ID',
#                 'cust_group',
#                 'Job Group',
                'Local Job Montreal',
                'cust_years_seniority',
                'Local Job Level Montreal',             
                'Is Manager',
                'Direct Team Size',
                'KP',
                'Final KP Status',
                'Achieve_2021',
                'LeadershipMindset_2021',
                'Challenge_2021',
                'Collaborate_2021',
                'JobExpertise_2021',
                'Achieve_2020',
                'LeadershipMindset_2020',
                'Challenge_2020',
                'Collaborate_2020',
                'JobExpertise_2020',  
             'pagerank_centr'
            ]

In [17]:
allcentr = (
    rainb_std[show_cols]
                        .join(rank_pagerank)
                        .join(rank_indegree)
                        .join(rank_closeness)    
                        .join(rank_between)
                        .join(rank_constraint)    
                        .join(rank_pagerank_std)
                        .join(rank_indegree_std)     
                        .join(rank_closeness_std)
                        .join(rank_between_std)    
                        .join(rank_constraint_std)
)

## Ranks correlation (std vs non std)

In [18]:
# allcentr[['rank_pagerank','rank_indegree']].corr(method='spearman')

In [19]:
allcentr[['rank_pagerank','rank_pagerank_std']].corr(method='spearman')

Unnamed: 0,rank_pagerank,rank_pagerank_std
rank_pagerank,1.0,0.605446
rank_pagerank_std,0.605446,1.0


In [20]:
allcentr[['rank_indegree','rank_pagerank_std']].corr(method='spearman')

Unnamed: 0,rank_indegree,rank_pagerank_std
rank_indegree,1.0,0.59273
rank_pagerank_std,0.59273,1.0


In [21]:
allcentr[['rank_closeness','rank_closeness_std']].corr(method='spearman')

Unnamed: 0,rank_closeness,rank_closeness_std
rank_closeness,1.0,0.731963
rank_closeness_std,0.731963,1.0


In [22]:
allcentr[['rank_constraint','rank_constraint_std']].corr(method='spearman')

Unnamed: 0,rank_constraint,rank_constraint_std
rank_constraint,1.0,0.763334
rank_constraint_std,0.763334,1.0


In [23]:
allcentr[['rank_between','rank_between_std']].corr(method='spearman')

Unnamed: 0,rank_between,rank_between_std
rank_between,1.0,0.866571
rank_between_std,0.866571,1.0


## Top Influence (pagerank)

In [24]:
pd.set_option("max_colwidth", 40)

In [25]:
(
    allcentr[show_cols + ['rank_pagerank_std']]
            .dropna()
            .sort_values(by=['rank_pagerank_std'], ascending = True)
            .head(30)
).index

Index(['jean-marie.seznec@ubisoft.com', 'benjamin.bounous@ubisoft.com',
       'manon.barriault@ubisoft.com', 'emmanuel.larive@ubisoft.com',
       'alex.lima@ubisoft.com', 'lawrence.hum@ubisoft.com',
       'emilien.lomet@ubisoft.com', 'alexander.karpazis@ubisoft.com',
       'nicolas.drolet@ubisoft.com', 'kevin.brodeur@ubisoft.com',
       'aurelie.bosc@ubisoft.com', 'thierry.huguenin@ubisoft.com',
       'julien.kauffmann@ubisoft.com', 'nicolas.fleury@ubisoft.com',
       'simon.ducharme@ubisoft.com', 'benjamin.azoulay@ubisoft.com',
       'daniel.bingham@ubisoft.com', 'frederic.ressaire@ubisoft.com',
       'mazen.elbawab@ubisoft.com', 'alysa-danielle.saliba@ubisoft.com',
       'mehdi.diallo@ubisoft.com', 'kenneth.bourgon@ubisoft.com',
       'etienne.dussault@ubisoft.com', 'leila.rimond@ubisoft.com',
       'nathalie.pinard@ubisoft.com', 'karim.osman@ubisoft.com',
       'vincent.loignon@ubisoft.com', 'claire.magarshack@ubisoft.com',
       'mathieu.parent2@ubisoft.com', 'alex.bu

In [26]:
# (
#     allcentr[show_cols + ['rank_pagerank','pagerank_centr']]
#             .dropna()
#             .sort_values(by=['rank_pagerank'], ascending = True)
#             .head(25)
# ).T

### Managers

In [27]:
(
    allcentr[allcentr['Is Manager'] == 1][show_cols + ['rank_pagerank','rank_constraint']]
            .dropna()
            .sort_values(by=['rank_pagerank'], ascending = True)
            .head(25)
).T

Office Email,manon.barriault@ubisoft.com,alexander.karpazis@ubisoft.com,benjamin.azoulay@ubisoft.com,paul.vlasie@ubisoft.com,salome.jugan@ubisoft.com,nathalie.pinard@ubisoft.com,deanna.stanley@ubisoft.com,mazen.elbawab@ubisoft.com,mohammed.benhenneda@ubisoft.com,thanh-liem.tran@ubisoft.com,...,laura.metzlaff@ubisoft.com,david.riley@ubisoft.com,claire.magarshack@ubisoft.com,mark.besner@ubisoft.com,fabien.jean-claude@ubisoft.com,yohann.marin@ubisoft.com,aurelie.bosc@ubisoft.com,raphael.boyon@ubisoft.com,marieve.cadoret@ubisoft.com,jason.egginton@ubisoft.com
HRTB ID,15417,19513,32436,2403,9134,22684,38591,12375,37510,4199,...,22655,53431,31091,2430,2440,27997,63607,17078,19641,23126
Local Job Montreal,Associate Producer,Artistic Director,Technical Director Online Operations,Technology Director,Associate Producer,Production Manager,Associate Producer,Project Lead - Programming,Business Strategy and Live Performan...,Project Lead - Programming,...,Production Manager,Project Lead - QC,Product Manager - Mkg,Project Lead - Programming,Graphic Design Team Lead,Live Operations Manager,UX Team Lead,Graphic Design Team Lead,Production Coordinator,Programming Team Lead
cust_years_seniority,11.126027,9.572603,6.315068,18.186301,12.969863,8.616438,5.068493,3.975342,5.375342,4.838356,...,8.386301,3.361644,4.013699,9.40274,17.991781,7.172603,1.789041,11.090411,9.572603,8.424658
Local Job Level Montreal,4,4,4,5,4,3,4,4,4,5,...,4,3,3,5,3,3,3,4,3,4
Is Manager,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
Direct Team Size,8.0,5.0,7.0,10.0,6.0,4.0,4.0,17.0,3.0,3.0,...,3.0,18.0,1.0,6.0,9.0,3.0,9.0,10.0,1.0,4.0
KP,Yes,Yes,Yes,Yes,Yes,No,No,Yes,Yes,Yes,...,Yes,No,No,Yes,No,Yes,No,Yes,No,No
Final KP Status,Key Performer,Key Performer,Key Performer,Key Performer,Key Potential,Non KP,Waiting List,Key Performer,Key Potential,Key Performer,...,Key Potential,Non KP,Non KP,Key Performer,Non KP,Key Potential,Non KP,Key Performer,Non KP,Non KP
Achieve_2021,4.0,3.0,3.0,3.0,4.0,3.0,4.0,4.0,3.0,4.0,...,3.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,3.0,4.0
LeadershipMindset_2021,3.0,3.0,3.0,2.0,3.0,3.0,2.0,4.0,2.0,4.0,...,3.0,3.0,3.0,4.0,3.0,2.0,4.0,3.0,3.0,4.0


### Non-managers

In [28]:
(
    allcentr[allcentr['Is Manager'] == 0][show_cols + ['rank_pagerank','rank_constraint']]
            .dropna()
            .sort_values(by=['rank_pagerank'], ascending = True)
            .head(25)
)

Unnamed: 0_level_0,HRTB ID,Local Job Montreal,cust_years_seniority,Local Job Level Montreal,Is Manager,Direct Team Size,KP,Final KP Status,Achieve_2021,LeadershipMindset_2021,...,Collaborate_2021,JobExpertise_2021,Achieve_2020,LeadershipMindset_2020,Challenge_2020,Collaborate_2020,JobExpertise_2020,pagerank_centr,rank_pagerank,rank_constraint
Office Email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alex.lima@ubisoft.com,19549,Realization Director,9.556164,4,0,0.0,No,Non KP,4.0,3.0,...,3.0,3.0,4.0,4.0,3.0,3.0,3.0,0.001557,8.0,265.0
thierry.huguenin@ubisoft.com,21173,Game Release Specialist,3.994521,3,0,0.0,No,Non KP,3.0,2.0,...,4.0,3.0,3.0,2.0,3.0,3.0,4.0,0.001356,12.0,42.0
jean-marie.seznec@ubisoft.com,2598,Assistant Technical Director - Graphic,17.473973,3,0,0.0,No,Non KP,3.0,3.0,...,3.0,2.0,3.0,3.0,2.0,3.0,2.0,0.001233,16.0,307.0
leila.rimond@ubisoft.com,28434,Production Manager,7.005479,3,0,0.0,No,Non KP,3.0,3.0,...,3.0,3.0,3.0,2.0,3.0,3.0,3.0,0.001126,20.0,143.0
nicolas.drolet@ubisoft.com,16434,Assistant Artistic Director - Graphic,10.745205,4,0,0.0,No,Non KP,3.0,3.0,...,3.0,4.0,3.0,3.0,3.0,3.0,4.0,0.001077,22.0,311.0
emmanuel.larive@ubisoft.com,54110,UX Designer,3.208219,4,0,0.0,No,Non KP,3.0,3.0,...,3.0,3.0,4.0,3.0,4.0,3.0,4.0,0.001047,23.0,75.0
gaelle.robert@ubisoft.com,3008,Production Manager,16.589041,4,0,0.0,Yes,Key Performer,3.0,4.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,0.000814,28.0,10.0
myriam.martin@ubisoft.com,19330,Project Manager,9.652055,2,0,0.0,No,Non KP,4.0,4.0,...,4.0,3.0,4.0,2.0,3.0,3.0,2.0,0.000767,35.0,190.0
emilien.lomet@ubisoft.com,26935,Game Designer,6.986301,3,0,0.0,No,Non KP,4.0,4.0,...,3.0,3.0,4.0,4.0,4.0,3.0,4.0,0.000683,41.0,19.0
julien.kauffmann@ubisoft.com,31118,Technical Architect,6.526027,4,0,0.0,Yes,Key Potential,4.0,3.0,...,3.0,4.0,3.0,3.0,3.0,3.0,4.0,0.000617,48.0,111.0


## Strong ties (high constraint, high subgraph density)

In [29]:
# allcentr['low_density_low_indegree'] = allcentr.apply(lambda x: (x['rank_constraint'] + x['rank_indegree'])/2, axis = 1)
# (
#     allcentr[show_cols + ['rank_constraint','rank_indegree','low_density_low_indegree']]
#             .dropna()
#             .sort_values(by=['low_density_low_indegree'], ascending = False)
#             .head(10)
# ).T

In [30]:
(
    allcentr[show_cols + ['rank_constraint','rank_indegree']]
            .dropna()
            .sort_values(by=['rank_constraint'], ascending = False)
            .head(10)
).T

Office Email,nicolas.drolet@ubisoft.com,jimmy.senechal@ubisoft.com,axel.laurelut@ubisoft.com,iohann.rabeson@ubisoft.com,jean-marie.seznec@ubisoft.com,christopher.ball@ubisoft.com,simon.tessier@ubisoft.com,loic.leinot@ubisoft.com,arman.akopian@ubisoft.com,guilherme.malfatti@ubisoft.com
HRTB ID,16434,49700,65514,36658,2598,19210,7572,51778,2186,61064
Local Job Montreal,Assistant Artistic Director - Graphic,Generalist Programmer,Web Developer,Generalist Programmer,Assistant Technical Director - Graphic,Generalist Programmer,Director,Tools Programmer,Concept Artist,Tools Programmer
cust_years_seniority,10.745205,3.227397,1.290411,3.438356,17.473973,6.526027,13.772603,3.016438,7.983562,1.923288
Local Job Level Montreal,4,2,2,2,3,3,4,2,4,2
Is Manager,0,0,0,0,0,0,0,0,0,0
Direct Team Size,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KP,No,No,No,No,No,No,No,No,No,No
Final KP Status,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP
Achieve_2021,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0
LeadershipMindset_2021,3.0,3.0,4.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0


## Diverse ties (low constraint)

In [31]:
(
    allcentr[show_cols + ['rank_constraint','rank_indegree']]
            .dropna()
            .sort_values(by=['rank_constraint'], ascending = True)
            .head(10)
).T

Office Email,steve.gagne@ubisoft.com,salome.jugan@ubisoft.com,bruno.senecal@ubisoft.com,raphael.parent@ubisoft.com,sebastien.levesque@ubisoft.com,ken-ly.duong@ubisoft.com,olivier.couture@ubisoft.com,matthieu.aubry@ubisoft.com,jerome.nantais-desrochers@ubisoft.com,gaelle.robert@ubisoft.com
HRTB ID,2561,9134,2958,2158,7772,26141,6316,31400,55706,3008
Local Job Montreal,Programming Team Lead,Associate Producer,Project Lead - QC,Project Lead - Programming,Production Manager,Programming Team Lead,Technical Director - Graphic,Programming Team Lead,Technical Designer,Production Manager
cust_years_seniority,16.435616,12.969863,15.750685,21.865753,11.857534,7.578082,14.561644,6.468493,2.939726,16.589041
Local Job Level Montreal,3,4,3,5,4,3,4,3,3,4
Is Manager,1,1,1,1,1,1,0,1,0,0
Direct Team Size,2.0,6.0,11.0,7.0,3.0,4.0,0.0,8.0,0.0,0.0
KP,No,Yes,No,Yes,Yes,Yes,Yes,No,No,Yes
Final KP Status,Non KP,Key Potential,Non KP,Key Performer,Key Performer,Key Performer,Key Performer,Waiting List,Non KP,Key Performer
Achieve_2021,4.0,4.0,3.0,3.0,3.0,4.0,3.0,4.0,4.0,3.0
LeadershipMindset_2021,4.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,3.0,4.0


## Importat Hubs (betweeness)

In [32]:
(
    allcentr[show_cols + ['rank_between']]
            .dropna()
            .sort_values(by=['rank_between'], ascending = True)
            .head(25)
)

Unnamed: 0_level_0,HRTB ID,Local Job Montreal,cust_years_seniority,Local Job Level Montreal,Is Manager,Direct Team Size,KP,Final KP Status,Achieve_2021,LeadershipMindset_2021,Challenge_2021,Collaborate_2021,JobExpertise_2021,Achieve_2020,LeadershipMindset_2020,Challenge_2020,Collaborate_2020,JobExpertise_2020,pagerank_centr,rank_between
Office Email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
marieve.cadoret@ubisoft.com,19641,Production Coordinator,9.572603,3,1,1.0,No,Non KP,3.0,3.0,3.0,2.0,3.0,4.0,4.0,4.0,3.0,4.0,0.000788,1.0
mark.besner@ubisoft.com,2430,Project Lead - Programming,9.40274,5,1,6.0,Yes,Key Performer,3.0,4.0,3.0,3.0,3.0,3.0,4.0,3.0,2.0,2.0,0.000879,2.0
raphael.parent@ubisoft.com,2158,Project Lead - Programming,21.865753,5,1,7.0,Yes,Key Performer,3.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0,4.0,2.0,0.0005,3.0
frederic.denis@ubisoft.com,57618,Tools Programmer,2.517808,3,1,3.0,No,Waiting List,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,4.0,4.0,0.000394,4.0
myriam.martin@ubisoft.com,19330,Project Manager,9.652055,2,0,0.0,No,Non KP,4.0,4.0,4.0,4.0,3.0,4.0,2.0,3.0,3.0,2.0,0.000767,5.0
maxime.lenglet@ubisoft.com,39384,Development Tester,4.953425,2,0,0.0,No,Non KP,3.0,3.0,4.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,9.5e-05,6.0
mathieu.parent@ubisoft.com,24789,Engine Programmer,6.868493,3,0,0.0,No,Non KP,4.0,4.0,3.0,4.0,3.0,4.0,3.0,3.0,3.0,4.0,0.000227,8.0
julien.kauffmann@ubisoft.com,31118,Technical Architect,6.526027,4,0,0.0,Yes,Key Potential,4.0,3.0,4.0,3.0,4.0,3.0,3.0,3.0,3.0,4.0,0.000617,9.0
luis-carlos.morales-nava@ubisoft.com,49350,Development Tester,4.071233,2,0,0.0,No,Non KP,3.0,2.0,4.0,3.0,4.0,3.0,3.0,3.0,4.0,4.0,0.000143,10.0
amelie.bernard@ubisoft.com,40583,Development Tester,2.572603,2,0,0.0,No,Non KP,4.0,4.0,4.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,0.000136,11.0


## High closeness and betweeness an low degree

In [33]:
# These nodes achieved a central place in the network with less “effort” (i.e., fewer connections)
# (
#     rainb_std.sort_values(by=['combined_zscore'], ascending = False)
#                                         .head(20)[show_cols]
# ) 

## Read nx from pickle file

In [34]:
import networkx as nx

In [35]:
nx_collab_big = nx.read_gpickle(f"../../data/inter/nx_collab_{project}_{edges_as_of}.gpickle")

In [36]:
len(nx_collab_big.nodes())

3308

In [37]:
# pd.Series(nx.degree_histogram(nx_collab_big)).hist()

## Extract subgraph

In [38]:
nodeslistrainb = list(rainb_std.reset_index()["Office Email"])
# nodeslistlabru = list(labru.reset_index()["Office Email"])

In [39]:
# nx_collab_rainb = nx_collab_big.subgraph(nodeslistrainb)
# nx_collab_labru = nx_collab_big.subgraph(nodeslistlabru)

In [40]:
# len(nx_collab_rainb.nodes())

In [41]:
# len(nx_collab_labru.nodes())

## Edges within/outside same manager team

In [42]:
def extract(lst, i):
    return [item[i] for item in lst]

In [43]:
alledges = []
for node in list(nx_collab_big.nodes()):
    nblist = list(set(
                    extract(list(nx_collab_big.out_edges([node])), 1) + 
                    extract(list(nx_collab_big.in_edges([node])), 0)))
    [alledges.append([node, nb]) for nb in nblist]


In [44]:
len(alledges)

86312

In [45]:
pdegdes = pd.DataFrame(alledges, columns = ['user', 'connection'])

In [46]:
edges_rainb = pdegdes[pdegdes['user'].isin(nodeslistrainb)]

In [47]:
edges_rainb['user'].unique().shape

(319,)

In [48]:
# edges_rainb.head()

In [49]:
empl_managers =(
    edges_rainb
        .set_index('user')
        .join(statistics_std[['Direct Manager HRTB ID', 'HRTB ID']])
        .reset_index()
        .rename(columns={'index':'user'})
        .set_index('connection')
        .join(statistics_std[['Direct Manager HRTB ID', 'HRTB ID']], rsuffix=' connection')   
        .reset_index()
        .rename(columns={'index':'connection','Direct Manager HRTB ID':'Direct Manager HRTB ID user', 'HRTB ID':'HRTB ID user'})
    )

In [50]:
# empl_managers.head(5)

In [51]:
empl_managers.shape

(10671, 6)

In [52]:
# connections with his/her manager
empl_managers['connection_manager'] = empl_managers.apply(
    lambda x: 1 if x['Direct Manager HRTB ID user'] == x['HRTB ID connection'] else 0, axis = 1)

In [53]:
empl_managers['same_team'] = empl_managers.apply(
    lambda x: 1 if (x['Direct Manager HRTB ID user'] == x['Direct Manager HRTB ID connection']) or (x['connection_manager'] == 1) else 0, axis = 1)

### Percent of empl having a connection with their manager

In [54]:
user_manager = empl_managers.groupby('user')['connection_manager'].sum().to_frame()

In [55]:
user_manager[user_manager['connection_manager'] == 1].shape[0] / user_manager.shape[0] * 100

97.17868338557993

### Percent of same-team connections in total connections

In [56]:
user_cnx = (
    empl_managers.groupby('user')
    .agg({'same_team':'sum', 'connection':'count'})
    .rename(columns={'same_team':'cnx_same_team', 'connection':'cnx_tot'})
)

In [57]:
user_cnx.shape

(319, 2)

In [58]:
# user_cnx.loc['pierre-olivier.clement@ubisoft.com']

In [59]:
user_cnx['%_sameteam'] = (user_cnx['cnx_same_team'] / user_cnx['cnx_tot']) * 100

In [60]:
# user_cnx.head()

In [61]:
user_cnx[user_cnx['cnx_tot'] == 0].shape

(0, 3)

In [62]:
user_cnx['%_sameteam'].mean(), user_cnx['%_sameteam'].median()

(26.982547428249596, 20.0)

## Centrality measures correlations

In [63]:
# fig, ax = plt.subplots(figsize=(10,10))  
# sns.heatmap(statistics_std[(statistics_std['Local Job Level Montreal'].isin([4,5])) & (statistics_std['Is Manager'] == 1)] \
#             [['indegree_centr','pagerank_centr','between_centr','constraint_centr','closeness_centr','respect_cnx_size','trust_cnx_size','diffjob_cnx_size',
#               'Achieve_2021','Challenge_2021','Collaborate_2021','JobExpertise_2021','LeadershipMindset_2021']] \
#            .corr(method='pearson'), cmap="Blues", annot = True, annot_kws={"size":9})
# plt.show()

In [64]:
# fig, ax = plt.subplots(figsize=(8,8))  
# sns.heatmap(statistics_std \
#             [['pagerank_centr_zscore','between_centr_zscore','constraint_centr_zscore','closeness_centr_zscore',
#               'Achieve','LeadershipMindset']] \
#            .corr(method='pearson'), cmap="Blues", annot = True, annot_kws={"size":9})
# plt.show()

## Visualization local network

In [65]:
# def collab_list(email, net):
#     ### nhood list
#     import operator
#     collab_list = []
#     for d in net.get_edges():
#         if d['from'] in [email]:
#             dct = {}
#             dct['key'] = d['to']
#             dct['weight'] = d['weight']
#             collab_list.append(dct)
#         if d['to'] in [email]:
#             dct = {}
#             dct['key'] = d['from']
#             dct['weight'] = d['weight']
#             collab_list.append(dct)            
#     l = sorted(collab_list, key=operator.itemgetter('weight'), reverse=True)
#     ord_l = [k['key'] + ': ' + str(k['weight']) + ' cnx' for k in l]
    
#     return ord_l

In [66]:
# centrality = 'between_centr'
# centrality_dict[centrality]['pierre-olivier.clement@ubisoft.com']

In [67]:
centrality_dict = {'pagerank_centr': 1-allcentr['rank_pagerank'],
                   'between_centr': 1-allcentr['rank_between'], 
                   'constraint_centr': allcentr['rank_constraint']
                  }

In [68]:
def rescale(node_id, centrality):
#     print(node_id, centrality)
#     print(centrality_dict[centrality][node_id])
    
    try:
        return (centrality_dict[centrality][node_id] - centrality_dict[centrality].min())/ \
               (centrality_dict[centrality].max() - centrality_dict[centrality].min())
    except:
        return 0

In [69]:
def get_feature(node, feature):
    my_proj = statistics_std.reset_index()
    try:
        return my_proj[my_proj['Office Email'] == node][feature].unique()[0]
    except:
        return 'Not found'

In [70]:
get_feature('madelina.praf@ubisoft.com', 'HRTB ID')

'Not found'

In [71]:
def vis(net, node_ids, centrality = None, colorby = 'Local Job Level Montreal', filename='test.html'):
    ###   
    net.options.edges.Color = "#FFFFFF"    
    net.toggle_physics(True)
    net.show_buttons(filter_=['physics'])
#     net.barnes_hut(gravity=-10000, spring_length=250, spring_strength=0.01) 
#     net.hrepulsion(node_distance = 230)
    net.inherit_edge_colors(status=False)

    for node in net.nodes:
        node['size'] = (0.5 + rescale(node['id'], centrality)) * 10
#         print(node['size'] )
        node['title'] = '</br> Id: ' + node['id']
        node['title'] += '</br> HRTB ID: ' + str(get_feature(node['id'],'HRTB ID'))        
        node['title'] += '</br> Manager HRTB ID: ' + str(get_feature(node['id'],'Direct Manager HRTB ID'))
        node['title'] += '</br> Job Group: ' + str(get_feature(node['id'],'Job Group'))
        node['title'] += '</br> Job: ' + str(get_feature(node['id'],'Local Job Montreal'))
        node['title'] += '</br> Custom group: ' + str(get_feature(node['id'],'cust_group'))        
        node['title'] += '</br> Is Manager: ' + str(get_feature(node['id'],'Is Manager'))
        node['title'] += '</br> Job level: ' + str(get_feature(node['id'],'Local Job Level Montreal'))            
        node['title'] += '</br> Assignment Project: ' + str(get_feature(node['id'],'Assignment Project')) 
        node['title'] += f'</br> {centrality} ' + str(get_feature(node['id'],centrality))       

        if node_ids != None:
            if node['id'] in node_ids:
                node['color'] = 'black'
            else:
                if colorby == '':
                    node['color'] = 'grey'
                else:
                    node['group'] = str(get_feature(node['id'], colorby))
        else:
            node['group'] = str(get_feature(node['id'], colorby))
            
    for edge in net.edges:
        edge['title'] = edge['weight']
        edge['value'] = 50 * edge['weight']
    net.save_graph(filename)

In [72]:
def add_nhood(nx_collab, node_ids, vis_collab):
    
    if node_ids == None:       
        vis_collab.from_nx(nx_collab)
#         nx2pyvis(nx_collab, vis_collab)
    else:
        nodes_list = []
        for node_id in node_ids:
            nodes_list.append(node_id)
            for edge in nx_collab.in_edges(node_id):
                nodes_list.append(edge[0])
                nodes_list.append(edge[1])
            for edge in nx_collab.out_edges(node_id):
                nodes_list.append(edge[0])
                nodes_list.append(edge[1])
                                  
        nx_collab_subgraph = nx.DiGraph(nx_collab_big.subgraph(nodes_list))
        print('Graph density: ', nx.density(nx_collab_subgraph))
        vis_collab.from_nx(nx_collab_subgraph)

In [73]:
import pyvis
from pyvis.network import Network
    
def build_net_degree(nx_collab, node_ids, degrees=1) -> pyvis.network.Network:
    ### degree=1: imediat nhoods
    ### degree=2: nhoods of nhoods

    # create empty
    pyvis_collab = pyvis.network.Network(height='100%', 
                     width='50%',
                     directed=True,
                     bgcolor='#222222', 
                     font_color='#FFFFFF',
                     notebook = False)
    
    if node_ids == None:
        add_nhood(nx_collab, None, pyvis_collab)
    else:
        if degrees == 1:
            add_nhood(nx_collab, node_ids, pyvis_collab)

        elif degrees == 2:
            for node_id in node_ids:
                    nblist = list(set(
                                extract(list(nx_collab.out_edges([node_id])), 1) + 
                                extract(list(nx_collab.in_edges([node_id])), 0)))
                    
                    add_nhood(nx_collab, nblist, pyvis_collab)
                    
    return pyvis_collab

## Whole network

In [74]:
# nx_collab_rainb = nx.DiGraph(nx_collab_big.subgraph(nodeslistrainb))
# nx_collab_rainb.remove_nodes_from(list(nx.isolates(nx_collab_rainb)))

In [75]:
# pyvis_collab2 = build_net_degree(nx_collab_rainb, None, None)
# vis(pyvis_collab2, None, centrality_measure, 'Job Group', 'test_whole_r1.html')
# pyvis_collab2

In [76]:
# G =  nx_collab_rainb
# any([node for node in G.nodes(data=True) if node[0] == 'joseph.angel@ubisoft.com'])

## Individuals

#### Pagerank

In [102]:
centrality_measure = 'pagerank_centr'

In [103]:
# node_ids = ['jean-marie.seznec@ubisoft.com']
# pyvis_collab1 = build_net_degree(nx_collab_big, node_ids, 1)
# vis(pyvis_collab1, node_ids, centrality_measure, 'cust_group', 'test_indv_high_pagerank_1.html')
# pyvis_collab1

In [104]:
# node_ids = ['nicolas.drolet@ubisoft.com']
# pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 2)
# vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_pagerank_2.html')
# pyvis_collab2

In [105]:
node_ids = ['manon.barriault@ubisoft.com'] # high pagerank, low diversity ties
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_pagerank_1.html')
pyvis_collab2

Graph density:  0.199250936329588


<class 'pyvis.network.Network'> |N|=90 |E|=1,596

In [106]:
node_ids = ['alexander.karpazis@ubisoft.com'] # high pagerank, low diversity ties
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_pagerank_2.html')
pyvis_collab2

Graph density:  0.185995085995086


<class 'pyvis.network.Network'> |N|=111 |E|=2,271

In [107]:
node_ids = ['emilien.lomet@ubisoft.com'] # high pagerank, low diversity ties
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_pagerank_3.html')
pyvis_collab2

Graph density:  0.20938375350140057


<class 'pyvis.network.Network'> |N|=85 |E|=1,495

In [108]:
node_ids = ['thierry.huguenin@ubisoft.com'] # high pagerank, low diversity ties
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_pagerank_4.html')
pyvis_collab2

Graph density:  0.18616161616161617


<class 'pyvis.network.Network'> |N|=100 |E|=1,843

In [109]:
node_ids = ['antoine.boisvert@ubisoft.com'] # low influence
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_low_pagerank_1.html')
pyvis_collab2

Graph density:  0.5694444444444444


<class 'pyvis.network.Network'> |N|=9 |E|=41

In [110]:
node_ids = ['laurie.poirier@ubisoft.com'] # low influence
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_low_pagerank_2.html')
pyvis_collab2

Graph density:  0.2415329768270945


<class 'pyvis.network.Network'> |N|=34 |E|=271

In [86]:
node_ids = ['anthony.de-rochefort@ubisoft.com'] # low influence
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_low_pagerank_3.html')
pyvis_collab2

Graph density:  0.5694444444444444


<class 'pyvis.network.Network'> |N|=9 |E|=41

### Constraint (diverse ties)

In [87]:
centrality_measure = 'constraint_centr'

In [88]:
node_ids = ['steve.gagne@ubisoft.com'] # high diversity ties, low influence
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_diversity.html')
pyvis_collab2

Graph density:  0.23670290919776668


<class 'pyvis.network.Network'> |N|=83 |E|=1,611

In [89]:
node_ids = ['salome.jugan@ubisoft.com'] # low diversity ties
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_high_diversity.html')
pyvis_collab2    

Graph density:  0.2015644753476612


<class 'pyvis.network.Network'> |N|=113 |E|=2,551

In [90]:
node_ids = ['nicolas.drolet@ubisoft.com'] # low diversity ties
pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_strong_ties.html')
pyvis_collab2   

Graph density:  0.48307692307692307


<class 'pyvis.network.Network'> |N|=26 |E|=314

### Ex Diverse ties (with low indegrees for simplicity)

In [91]:
allcentr['low_density_low_indegree'] = allcentr.apply(lambda x: (1/x['rank_constraint'] + x['rank_indegree'])/2, axis = 1)
(
    allcentr[show_cols + ['rank_constraint','rank_indegree','low_density_low_indegree']]
            .dropna()
            .sort_values(by=['low_density_low_indegree'], ascending = False)
            .head(10)
).T

Office Email,david.corticchiato@ubisoft.com,jesus.imery@ubisoft.com,simon.tessier@ubisoft.com,jeffrey.parent@ubisoft.com,janusz.nykiel@ubisoft.com,yusi.zhao5@ubisoft.com,jimmy.senechal@ubisoft.com,angela.elliott@ubisoft.com,jonathan.filiatrault@ubisoft.com,sam.imberman@ubisoft.com
HRTB ID,55730,63465,7572,49599,52303,63718,49700,38661,49170,36021
Local Job Montreal,Generalist Programmer,Physic Programmer,Director,Concept Artist,Generalist Programmer,Production Manager,Generalist Programmer,Development Tester,Development Tester,Tools Programmer
cust_years_seniority,2.786301,1.808219,13.772603,2.441096,3.457534,1.767123,3.227397,5.049315,1.827397,5.643836
Local Job Level Montreal,3,1,4,1,3,3,2,2,1,3
Is Manager,0,0,0,0,0,0,0,0,0,0
Direct Team Size,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KP,No,No,No,No,No,No,No,No,No,No
Final KP Status,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP,Non KP
Achieve_2021,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0
LeadershipMindset_2021,3.0,3.0,2.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0


In [92]:
# node_ids = ['simon.tessier@ubisoft.com'] # low diversity ties
# pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
# vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_low_density_low_indegree_1.html')
# pyvis_collab2

In [93]:
# node_ids = ['jesus.imery@ubisoft.com'] # low diversity ties
# pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
# vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_low_density_low_indegree_2.html')
# pyvis_collab2

In [94]:
# node_ids = ['janusz.nykiel@ubisoft.com'] # low diversity ties
# pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
# vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_low_density_low_indegree_3.html')
# pyvis_collab2

### Ex Strong ties (with low indegrees for simplicity)

In [95]:
allcentr['high_density_low_indegree'] = allcentr.apply(lambda x: (x['rank_constraint'] + x['rank_indegree'])/2, axis = 1)
(
    allcentr[show_cols + ['rank_constraint','rank_indegree','high_density_low_indegree']]
            .dropna()
            .sort_values(by=['high_density_low_indegree'], ascending = False)
            .head(5)
).T

Office Email,simon.tessier@ubisoft.com,jimmy.senechal@ubisoft.com,david.corticchiato@ubisoft.com,janusz.nykiel@ubisoft.com,jonathan.filiatrault@ubisoft.com
HRTB ID,7572,49700,55730,52303,49170
Local Job Montreal,Director,Generalist Programmer,Generalist Programmer,Generalist Programmer,Development Tester
cust_years_seniority,13.772603,3.227397,2.786301,3.457534,1.827397
Local Job Level Montreal,4,2,3,3,1
Is Manager,0,0,0,0,0
Direct Team Size,0.0,0.0,0.0,0.0,0.0
KP,No,No,No,No,No
Final KP Status,Non KP,Non KP,Non KP,Non KP,Non KP
Achieve_2021,3.0,3.0,3.0,3.0,3.0
LeadershipMindset_2021,2.0,3.0,3.0,2.0,3.0


In [96]:
# node_ids = ['jeffrey.parent@ubisoft.com'] # low diversity ties
# pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
# vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_strong_ties_low_indegree_2.html')
# pyvis_collab2

In [97]:
# node_ids = ['simon.tessier@ubisoft.com'] # low diversity ties
# pyvis_collab2 = build_net_degree(nx_collab_big, node_ids, 1)
# vis(pyvis_collab2, node_ids, centrality_measure, 'cust_group', 'test_indv_strong_ties_low_indegree_4.html')
# pyvis_collab2

#### Betweeness

In [98]:
centrality_measure = 'between_centr'

In [99]:
node_ids = ['marieve.cadoret@ubisoft.com']
pyvis_collab3 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab3, node_ids, centrality_measure, 'cust_group', 'test_indv_high_betweeness_1.html')
pyvis_collab3

Graph density:  0.2067363530778165


<class 'pyvis.network.Network'> |N|=42 |E|=356

In [100]:
node_ids = ['mark.besner@ubisoft.com']
pyvis_collab3 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab3, node_ids, centrality_measure, 'cust_group', 'test_indv_high_betweeness_2.html')
pyvis_collab3

Graph density:  0.2516636418632789


<class 'pyvis.network.Network'> |N|=58 |E|=832

In [101]:
node_ids = ['frederic.denis@ubisoft.com']
pyvis_collab3 = build_net_degree(nx_collab_big, node_ids, 1)
vis(pyvis_collab3, node_ids, centrality_measure, 'cust_group', 'test_indv_high_betweeness_3.html')
pyvis_collab3

Graph density:  0.2574750830564784


<class 'pyvis.network.Network'> |N|=43 |E|=465