In [1]:
import numpy as np
import pandas as pd 
import networkx as nx
import numpy.random as rd
from scipy.optimize import minimize 

In [2]:
%run rome.py
romes = list(graph.keys())
#romes = rd.choice(romes,10)

DEFINE GRAPH TO CALCULATE OCCUPATIONAL DISTANCES 

In [3]:
#g = nx.DiGraph(graph)

In [4]:
g = nx.Graph()
g.add_nodes_from(romes)
for rome1 in romes:
    current_edges = list(g.edges)
    for rome2 in romes:
        if (rome2,rome1) not in current_edges and (rome1 in graph[rome2] or rome2 in graph[rome1]):
            g.add_edge(rome1,rome2)
                

In [176]:
distance = {}
for rome in graph.keys():
    distance[rome] = {}
    distance[rome][rome] = 0
    for connex in (x for x in romes if x != rome):
        try: 
            distance[rome][connex] = nx.shortest_path_length(g,rome,connex)
        except:
            distance[rome][connex] = 20

DEFINE TYPES

In [128]:
class JobSeeker: 
    def __init__(self,rome,rho,T,bni='.'):
        self.rome = rome 
        self.rho = rho
        self.T = T
        self.bni = bni
    def theta(self,theta):
        self.theta = theta
        
class Branch: 
    def __init__(self,rome,rho,m,hirings,siren='.',nb='.'):
        self.rho = rho
        self.m = m 
        self.hirings = hirings
        self.rome = rome
        self.siren = siren
        self.nb = nb
    def theta(self,theta):
        self.theta = theta     
        
class Firm: 
    def __init__(self,rho,m,branches,siren='.'):
        self.rho = rho
        self.m = m 
        self.branches = branches
        self.romes = [b.rome for b in branches]
        self.nb_branches = len(branches)
        self.tot_hirings = sum([b.hirings for b in branches]) 
        self.siren = siren 

def random_branch(romes,rho,m,mean_hirings,siren='.',nb='.'): 
    return Branch(rd.choice(romes),rd.choice(rho),rd.choice(m),rd.chisquare(mean_hirings),siren=siren,nb=nb)

def random_firm(romes,rho,m,max_branches,mean_hirings,siren='.'):
    rho_f = rd.choice(rho)
    m_f = rd.choice(m)
    NB = rd.randint(1,high=max_branches)
    branches = [random_branch(romes,[rho_f],[m_f],mean_hirings,siren=siren,nb=i) for i in range(NB)]
    return Firm(rho_f,m_f,branches,siren = siren) 

CALCULATE PREDICTED NUMBER OF MATCHES 

In [184]:
def get_D(distance,DE,BRANCHES):
    D = np.empty(shape=(len(DE),len(BRANCHES)))
    for n,de in enumerate(DE):
        for b,branch in enumerate(BRANCHES):
            D[n][b] = distance[de.rome][branch.rome]
    return D

def get_THETA(DE,BRANCHES):
    thetas = {rome:{'U':0.0,'V':0.0} for rome in romes}
    for de in DE:
        thetas[de.rome]['U'] += 1
    for branch in BRANCHES: 
        thetas[branch.rome]['V'] += branch.hirings
    return thetas

def get_RHO_DE(D,DE,BRANCHES):
    RHO_DE = np.transpose(np.repeat([[de.rho for de in DE]],len(BRANCHES),axis=0))**D
    #RHO_DE = np.exp(-np.transpose(np.repeat([[de.rho for de in DE]],len(BRANCHES),axis=0))*D)
    return RHO_DE

def get_RHO_BRANCHES(D,DE,BRANCHES):
    RHO_BRANCHES = np.repeat([[branch.rho for branch in BRANCHES]],len(DE),axis=0)**D
    #RHO_BRANCHES = np.exp(-np.repeat([[branch.rho for branch in BRANCHES]],len(DE),axis=0)*D)
    return RHO_BRANCHES

def get_X(D,DE,BRANCHES):
    X = {}
    THETA = get_THETA(DE,BRANCHES)
    for n,de in enumerate(DE):
        X[n] = np.empty(shape=(6,len(BRANCHES)))
        for b,branch in enumerate(BRANCHES):
            #X[n][0][b] = de.rho
            X[n][0][b] = de.rho*D[n][b]
            X[n][1][b] = branch.rho*D[n][b]
            X[n][2][b] = branch.m 
            X[n][3][b] = D[n][b]
            X[n][4][b] = THETA[branch.rome]['U']
            X[n][5][b] = THETA[branch.rome]['V']
            #X[n][8][b] = THETA[de.rome]['U']           
            #X[n][9][b] = THETA[de.rome]['V']

    return X

def interpret(beta):
    result = {}
    #result['rho (DE)'] = beta[0]
    result['rho*d (DE)'] = beta[0]
    result['rho*d (F)'] = beta[1]
    result['m (F)'] = beta[2]
    result['d'] = beta[3]
    result['U (F)'] = beta[4]
    result['V (F)'] = beta[5]
    #result['U (DE)'] = beta[8]
    #result['V (DE)'] = beta[9]
    return result
    
def get_ALPHA(beta,X,D,DE,BRANCHES,method='.'):
    ALPHA = np.zeros(shape=(len(DE),len(BRANCHES)))
    for n,de in enumerate(DE):
        ALPHA[n] = np.exp(beta.dot(X[n]))
        ALPHA[n] = ALPHA[n]/np.sum(ALPHA[n])
        if method == 'correct' and (np.isnan(ALPHA[n]).any() or np.isinf(ALPHA[n]).any()):
            ALPHA[n] = np.exp(-D[n])/sum(np.exp(-D[n]))
            print(f'corected alpha for DE nb {n}')
    return ALPHA

def get_P(alpha,RHO_DE,DE,BRANCHES):
    P = np.empty(shape=(len(DE),len(BRANCHES)))
    T = np.transpose(np.repeat([[de.T for de in DE]],len(BRANCHES),axis=0))
    O = np.ones(shape=(len(DE),len(BRANCHES)))
    P = (O - (O-alpha)**T)*RHO_DE
    return P 

def get_C(P,DE,BRANCHES):
    PI = np.sum(P,axis=0)
    H = np.array([branch.hirings for branch in BRANCHES])
    M = np.array([branch.m for branch in BRANCHES])
    return M*(PI/H)**2

def get_PI(P,DE,BRANCHES):
    PI = np.sum(P,axis=0)
    H = np.array([branch.hirings for branch in BRANCHES])
    M = np.array([branch.m for branch in BRANCHES])
    #return np.repeat([(H/PI)*(np.ones(len(BRANCHES))-np.exp(-M*PI/H))],len(DE),axis=0)
    #return np.repeat([M*(np.ones(len(BRANCHES))-np.exp(-H/PI))],len(DE),axis=0)
    return np.repeat([M*(H/(PI+H))],len(DE),axis=0)

def get_matches(beta,X,D,RHO_DE,RHO_BRANCHES,DE,BRANCHES,method='.'):
    ALPHA = get_ALPHA(beta,X,D,DE,BRANCHES,method=method)
    P = get_P(ALPHA,RHO_DE,DE,BRANCHES)
    PI = get_PI(P,DE,BRANCHES)
    #C = get_C(P,DE,BRANCHES)
    return np.sum(RHO_BRANCHES*PI*P) #- np.sum(C)

DEFINE RECOMMENDATIONS

In [191]:
def draws_without_recall(objects,prob,nb_draws):
    draws = []
    balls = [b for b in objects]
    pi = [p for p in prob]
    for d in range(nb_draws):
        draws += [rd.choice(balls,p=pi)]
        pi = [pi[p] for p,b in enumerate(balls) if b != draws[d]]
        balls = [b for b in balls if b != draws[d]]
        pi = [p/sum(pi) for p in pi]
    return draws

def recommendations(beta,X,D,DE,BRANCHES,method='norecall',method_alpha='correct'):
    R = {}
    ALPHA = get_ALPHA(beta,X,D,DE,BRANCHES,method=method_alpha)
    branches = list(range(len(BRANCHES)))
    if method == 'norecall':
        for n,de in enumerate(DE):
            R[n] = draws_without_recall(branches,ALPHA[n],de.T)
    if method == 'rank':
        for n,de in enumerate(DE):
            R[n] = ALPHA[n].argsort()[-de.T:][::-1]
    if method == 'recall':
        for n,de in enumerate(DE):
            R[n] = rd.choice(branches,p=ALPHA[n],size=de.T)
    return {n:[(BRANCHES[b].siren,BRANCHES[b].nb) for b in branches] for n,branches in R.items()}

def stat_des(R,DE,FIRMS,m,rho):
    AD_DE = {r:0 for r in rho}
    AD_F = {r:0 for r in rho}
    NB_R_B = {f:{b:0 for b in range(FIRMS[f].nb_branches)} for f in range(len(FIRMS))}
    AR_F = {e:0 for e in m}
    for r in R.values():
        for f,b in r:
            NB_R_B[f][b] += 1
    NB_R_F = {f:sum(list(nb.values())) for f, nb in NB_R_B.items()}
    for n,de in enumerate(DE):
        AD_DE[de.rho] += np.mean([distance[de.rome][FIRMS[f].branches[b].rome] for f,b in R[n]])/(len(DE)/2)
    for n,r in R.items():
        for f,b in r:
            AD_F[FIRMS[f].rho] += (distance[DE[n].rome][FIRMS[f].branches[b].rome]/NB_R_F[f])/(len(FIRMS)/2)
    NB_R_F = {f:r/FIRMS[f].tot_hirings for f, r in NB_R_F.items()}
    for f,h in NB_R_F.items():
        AR_F[FIRMS[f].m] += h/(len(FIRMS)/2)
    return AD_DE, AD_F, AR_F 

In [185]:
rd.seed(123)
N = 100
F = 30
rho = [0.9,0.5]
m = [1,0.5]
T = [6,3]
max_branches = 2
hirings_per_branches = 10

DE = [JobSeeker(rd.choice(romes),rd.choice(rho),rd.choice(T),n) for n in range(N)]
FIRMS = [random_firm(romes,rho,m,max_branches,hirings_per_branches,siren=f) for f in range(F)]
BRANCHES = []
for firm in FIRMS:
    BRANCHES = BRANCHES + firm.branches

In [186]:
D = get_D(distance,DE,BRANCHES)
RHO_DE = get_RHO_DE(D,DE,BRANCHES)
RHO_BRANCHES = get_RHO_BRANCHES(D,DE,BRANCHES)
X = get_X(D,DE,BRANCHES)

In [187]:
res = minimize(lambda beta: -get_matches(beta,X,D,RHO_DE,RHO_BRANCHES,DE,BRANCHES,method='correct'),\
               np.zeros(6),method='Nelder-Mead')

In [188]:
interpret(res.x)

{'rho*d (DE)': 13.911316160283093,
 'rho*d (F)': 2.82384825832799,
 'm (F)': 2.4328808605880132,
 'd': -15.631504302312706,
 'U (F)': -0.46317837348661983,
 'V (F)': 0.04897278381247512}

In [189]:
beta = res.x

ALPHA = get_ALPHA(beta,X,D,DE,BRANCHES,method='correct')
 
np.nan_to_num(ALPHA).max(axis=1)

array([0.98462142, 0.14143872, 0.98360576, 0.59934871, 0.99998109,
       0.99846205, 0.99847872, 0.3411907 , 0.12920176, 0.99341025,
       0.20648608, 0.14498091, 0.58325905, 0.89258091, 0.14262886,
       0.96044375, 0.18945149, 0.50871972, 0.15635242, 0.24835541,
       0.93875468, 0.14990894, 0.96960408, 0.21885187, 0.92559845,
       0.86029469, 0.61272666, 0.24386079, 0.17167756, 0.36919898,
       0.19810298, 0.65126142, 0.60273172, 0.27290267, 0.76947205,
       0.17086968, 0.31566066, 0.99996927, 0.19690188, 0.99882537,
       0.99844406, 0.26507961, 0.46647063, 0.9986264 , 0.66745884,
       0.1607888 , 0.82303125, 0.35075753, 0.22496986, 0.26002348,
       0.19579102, 0.45481351, 0.20863686, 0.21885187, 0.14087897,
       0.34197785, 0.27175707, 0.14759414, 0.57912183, 0.89258091,
       0.97482524, 0.15614649, 0.65609158, 0.26002348, 1.        ,
       0.17313921, 0.96812716, 0.6935592 , 0.21592307, 0.19579102,
       0.77140003, 0.99999798, 0.98858799, 0.9999957 , 0.24386

In [192]:
R = recommendations(res.x,X,D,DE,BRANCHES,method='recall')

In [193]:
AD_DE, AD_F, AR_F  = stat_des(R,DE,FIRMS,m,rho)

In [194]:
AD_DE

{0.9: 4.089999999999999, 0.5: 2.230000000000001}

In [195]:
AD_F

{0.9: 3.9626873881118305, 0.5: 0.9124542124542127}

In [196]:
AR_F

{1: 1.6980412883952598, 0.5: 1.5781815804223691}