In [1]:
import numpy as np
import pandas as pd 
import networkx as nx
import numpy.random as rd
from scipy.optimize import minimize 

In [319]:
%run rome.py
romes = list(graph.keys())
romes = rd.choice(romes,10)

In [320]:
g = nx.DiGraph(graph)
distance = {}
for rome in graph.keys():
    distance[rome] = {}
    distance[rome][rome] = 0
    for connex in (x for x in romes if x != rome):
        try: 
            distance[rome][connex] = nx.shortest_path_length(g,rome,connex)
        except:
            distance[rome][connex] = 20

DEFINE TYPES

In [30]:
class JobSeeker: 
    def __init__(self,rome,rho,T,bni='.'):
        self.rome = rome 
        self.rho = rho
        self.T = T
        self.bni = bni
    def theta(self,theta):
        self.theta = theta
        
class Branch: 
    def __init__(self,rome,rho,m,hirings,siren='.',nb='.'):
        self.rho = rho
        self.m = m 
        self.hirings = hirings
        self.rome = rome
        self.siren = siren
        self.nb = nb
    def theta(self,theta):
        self.theta = theta     
        
class Firm: 
    def __init__(self,rho,m,branches,siren='.'):
        self.rho = rho
        self.m = m 
        self.branches = branches
        self.romes = list(branches.keys())
        self.nb_branches = len(branches)
        self.tot_hirings = sum(branches.values())
        self.siren = siren 
    
    def split(self):
        return [Branch(rome,self.rho,self.m,h,siren=self.siren,nb=b) for b, (rome, h) in enumerate(self.branches.items())]
        
def random_firm(rho,m,max_branches,hirings_per_branch,siren='.'): 
    branches = rd.choice(romes,rd.randint(1,high=max_branches))
    hirings = {b:rd.chisquare(hirings_per_branches) for b in branches} 
    return Firm(rd.choice(rho),rd.choice(m),hirings,siren=siren)

def random_branch(rho,m,hirings_per_branch,siren='.'): 
    rome = rd.choice(romes,1)
    hirings = rd.chisquare(hirings_per_branches)
    return Branch(rome,rd.choice(rho),rd.choice(m),hirings,siren=siren)




CALCULATE PREDICTED NUMBER OF MATCHES 

In [344]:
def get_D(distance,DE,BRANCHES):
    D = np.empty(shape=(len(DE),len(BRANCHES)))
    for n,de in enumerate(DE):
        for b,branch in enumerate(BRANCHES):
            D[n][b] = distance[de.rome][branch.rome]
    return D

def get_THETA(DE,BRANCHES):
    thetas = {rome:{'U':0.0,'V':0.0} for rome in romes}
    for de in DE:
        thetas[de.rome]['U'] += 1
    for branch in BRANCHES: 
        thetas[branch.rome]['V'] += branch.hirings
    return thetas

def get_RHO_DE(D,DE,BRANCHES):
    RHO_DE = np.transpose(np.repeat([[de.rho for de in DE]],len(BRANCHES),axis=0))**D
    return RHO_DE

def get_RHO_BRANCHES(D,DE,BRANCHES):
    RHO_BRANCHES = np.repeat([[branch.rho for branch in BRANCHES]],len(DE),axis=0)**D
    return RHO_BRANCHES

def get_X(D,DE,BRANCHES):
    X = {}
    THETA = get_THETA(DE,BRANCHES)
    for n,de in enumerate(DE):
        X[n] = np.empty(shape=(10,len(BRANCHES)))
        for b,branch in enumerate(BRANCHES):
            X[n][0][b] = de.rho
            X[n][1][b] = de.rho*D[n][b]
            X[n][2][b] = branch.rho
            X[n][3][b] = branch.rho*D[n][b]
            X[n][4][b] = branch.m 
            X[n][5][b] = D[n][b]
            X[n][6][b] = THETA[de.rome]['U']           
            X[n][7][b] = THETA[de.rome]['V']
            X[n][8][b] = THETA[branch.rome]['U']
            X[n][9][b] = THETA[branch.rome]['V']
    return X

def interpret(beta):
    result = {}
    result['rho (DE)'] = beta[0]
    result['rho*d (DE)'] = beta[1]
    result['rho (F)'] = beta[2]
    result['rho*d (F)'] = beta[3]
    result['m (F)'] = beta[4]
    result['d'] = beta[5]
    result['U (DE)'] = beta[6]
    result['V (DE)'] = beta[7]
    result['U (F)'] = beta[8]
    result['V (F)'] = beta[9]
    return result
    
def get_ALPHA(beta,X,DE,BRANCHES):
    ALPHA = np.zeros(shape=(len(DE),len(BRANCHES)))
    for n,de in enumerate(DE):
        ALPHA[n] = np.exp(beta.dot(X[n]))
        ALPHA[n] = ALPHA[n]/np.sum(ALPHA[n])
    return ALPHA

def get_P(alpha,RHO_DE,DE,BRANCHES):
    P = np.empty(shape=(len(DE),len(BRANCHES)))
    T = np.transpose(np.repeat([[de.T for de in DE]],len(BRANCHES),axis=0))
    O = np.ones(shape=(len(DE),len(BRANCHES)))
    P = (O - (O-alpha)**T)*RHO_DE
    return P 

def get_PI(P,DE,BRANCHES):
    PI = np.sum(P,axis=0)
    H = np.array([branch.hirings for branch in BRANCHES])
    M = np.array([branch.m for branch in BRANCHES])
    return np.repeat([M*(H/(PI+H))**2],len(DE),axis=0)

def get_matches(beta,X,RHO_DE,RHO_BRANCHES,DE,BRANCHES):
    ALPHA = get_ALPHA(beta,X,DE,BRANCHES)
    P = get_P(ALPHA,RHO_DE,DE,BRANCHES)
    PI = get_PI(P,DE,BRANCHES)
    return np.sum(RHO_BRANCHES*PI*P)

In [345]:
def draws_without_recall(objects,prob,nb_draws):
    draws = []
    balls = [b for b in objects]
    pi = [p for p in prob]
    for d in range(nb_draws):
        draws += [rd.choice(balls,p=pi)]
        pi = [pi[p] for p,b in enumerate(balls) if b != draws[d]]
        balls = [b for b in balls if b != draws[d]]
        pi = [p/sum(pi) for p in pi]
    return draws

def recommendations(beta,X,DE,BRANCHES):
    R = {}
    ALPHA = get_ALPHA(beta,X,DE,BRANCHES)
    branches = list(range(len(BRANCHES)))
    for n,de in enumerate(DE):
        #R[n] = draws_without_recall(branches,ALPHA[n],de.T)
        R[n] = ALPHA[n].argsort()[-de.T:][::-1]
    return {n:[(BRANCHES[b].siren,BRANCHES[b].rome) for b in branches] for n,branches in R.items()}

def back_to_firms(R,DE,FIRMS):
    T = {(firm.siren,firm.m,firm.rho): 0 for firm in FIRMS}
    for n,r in R.items():
        for f in r:
            T[(FIRMS[f[0]].siren,FIRMS[f[0]].m,FIRMS[f[0]].rho)] += 1/FIRMS[f[0]].tot_hirings
    return T

def average_distance(R,DE):
    return {(n,DE[n].rho):np.mean([distance[DE[n].rome][r[1]] for r in rn]) for n,rn in R.items()}
   

In [346]:
rd.seed(123)
N = 50
F = 10
rho = [0.5,0.01]
m = [1,0.01]
T = [6,3]
max_branches = 2
hirings_per_branches = 1

DE = [JobSeeker(rd.choice(romes),rd.choice(rho),rd.choice(T),n) for n in range(N)]
FIRMS = [random_firm(rho,m,max_branches,hirings_per_branches,siren=f) for f in range(F)]
BRANCHES = []
for firm in FIRMS:
    BRANCHES = BRANCHES + firm.split()

In [347]:
D = get_D(distance,DE,BRANCHES)
RHO_DE = get_RHO_DE(D,DE,BRANCHES)
RHO_BRANCHES = get_RHO_BRANCHES(D,DE,BRANCHES)
X = get_X(D,DE,BRANCHES)

In [348]:
res = minimize(lambda beta: -get_matches(beta,X,RHO_DE,RHO_BRANCHES,DE,BRANCHES),np.zeros(10),method='Nelder-Mead')

In [349]:
interpret(res.x)

{'rho (DE)': -11.377319264111673,
 'rho*d (DE)': -0.20516546644874462,
 'rho (F)': -3.6267588187233804,
 'rho*d (F)': 9.239409540965688,
 'm (F)': 0.2558300045272456,
 'd': -7.219493221204403,
 'U (DE)': 39.62180987926279,
 'V (DE)': -17.062703641815496,
 'U (F)': -5.15716268612955,
 'V (F)': 14.09479625877075}

In [325]:
R = recommendations(res.x,X,DE,BRANCHES)
#average_distance(R,DE)
back_to_firms(R,DE,FIRMS)

{(0, 1.0, 0.01): 1890.5483554055756,
 (1, 0.01, 0.01): 172.37942579279942,
 (2, 1.0, 0.5): 71.43404040903626,
 (3, 0.01, 0.5): 3.1042049951559614,
 (4, 0.01, 0.01): 23.633528635355912,
 (5, 0.01, 0.01): 427.0731249863933,
 (6, 1.0, 0.01): 86.36923318216236,
 (7, 0.01, 0.01): 0.26493326159318187,
 (8, 0.01, 0.01): 17.356896538123063,
 (9, 1.0, 0.01): 36.850305830411855}

In [151]:
distance['J1305']['H1207']

5

In [211]:
graph[DE[13].rome]

[]

In [216]:
firm_romes = []
for firm in FIRMS.values():
    firm_romes += firm.romes
sum([DE[n].rome in firm_romes for n in DE.keys()])/len(DE)

0.05

NameError: name 'unique' is not defined