In [None]:
import pandas as pd
import hashlib
import datetime as dt
import numpy as np
import random
import json
import math
from tqdm import tqdm
import pickle

In [None]:
from ampligraph.utils import restore_model

In [None]:
import tensorflow as tf 

if tf.test.gpu_device_name(): 
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

else:
    print("Please install GPU version of TF")

## Candidates

In [None]:
user_work = pd.read_csv('user_work_triples.csv')

In [None]:
user_work['project'] = user_work['u'].apply(lambda x: x.split('-')[0])

In [None]:
def activeness_candidate(issue, period=90):
    issue_create_date = issue_information[issue_information['issuekey']==issue]['createdate'].values[0]
    date_diff = issue_create_date - pd.to_timedelta(period, unit='D')
    candidate_active = change_log[(change_log['timecreated']>=date_diff) & (change_log['timecreated']<issue_create_date)]
    return candidate_active.drop_duplicates(subset="username")['username'].values.tolist()

In [None]:
# all candidates
candidate = dict()
candidate['dev'] = user_work[user_work['edge']=='dev']['v'].unique()
candidate['integrator'] = user_work[user_work['edge']=='integrator']['v'].unique()
candidate['peer'] = user_work[user_work['edge']=='peer']['v'].unique()
candidate['tester'] = user_work[user_work['edge']=='tester']['v'].unique()

In [None]:
# activeness candidates
# run all candidates first
change_log = pd.read_csv('changelog.csv', engine='python')
change_log['timecreated'] = pd.to_datetime(change_log['timecreated'])
issue_information = pd.read_csv('issueinformation.csv', sep=';')
issue_information['createdate'] = pd.to_datetime(issue_information['createdate'])

In [None]:
# project candidates
candidate = {project: dict() for project in user_work['project'].unique()}
for project in candidate:
    for role in user_work['edge'].unique():
        if role!='assignee':
            candidate[project][role] = list(user_work[(user_work['project']==project) & (user_work['edge']==role)]['v'].unique())

## Load model

In [None]:
model = restore_model('./model/convKB/team_rec_sub5.model')

## Precompute cost

In [None]:
with open('team_test_data/TeamRec_Liu_hitnohit_moodle.json', 'rb') as jsonfile:
    data_test = json.load(jsonfile)

In [None]:
issue_test = [i['issue'] for i in data_test]

In [None]:
user_active = dict()
roles = ['dev', 'peer', 'integrator', 'tester']
for issue in tqdm(issue_test):
    project = issue.split('-')[0]
    user_with_activeness = set(activeness_candidate(issue, period=90))
    user_active[issue] = dict()
    for role in roles:
        user_active[issue][role] = list(set(candidate[project][role]).intersection(user_with_activeness))

In [None]:
score_pre = dict()
for issue in tqdm(user_active):
    score_pre[issue] = dict()
    for role in user_active[issue]:
        score_pre[issue][role] = dict()
        triple = []
        username = user_active[issue][role]
        for user in username:
            triple.append([issue, role, user])
        if len(triple)!=0:
            score = model.predict(np.array(triple))
        for idx in range(len(username)):
            score_pre[issue][role][username[idx]] = score[idx]

        score_pre[issue][role] = dict(sorted(score_pre[issue][role].items(), reverse=True, key=lambda x: x[1]))
        for index, user in enumerate(score_pre[issue][role]):
            score_pre[issue][role][user] = ((index+1)/len(score_pre[issue][role]), score_pre[issue][role][user])


In [None]:
# compute score
score_pre = dict()
for issue in tqdm(user_active):
    score_pre[issue] = dict()
    for role in user_active[issue]:
        score_pre[issue][role] = dict()
        for user in user_active[issue][role]:
            triple = (issue, role, user)
            score = model.predict(triple)
            score_pre[issue][role][user] = score
        score_pre[issue][role] = dict(sorted(score_pre[issue][role].items(), reverse=True, key=lambda x: x[1]))
        for index, user in enumerate(score_pre[issue][role]):
            score_pre[issue][role][user] = ((index+1)/len(score_pre[issue][role]), score_pre[issue][role][user])

In [None]:
with open('candidate_score_sub5.pkl', 'wb') as fp:
    pickle.dump(score_pre, fp)

In [None]:
with open('candidate_score_sub5.pkl', 'rb') as fp:
    score_pre = pickle.load(fp)

## Cost function

In [None]:
def cost(model, team, issue, score):
    scores = []
    for role in team:
        if role=='assignee':
            continue
        r = role.rstrip('1234567890')
#         print(score)
        scores.append(score[r][team[role]][0])
    
    score_final = np.sum(scores)
    return score_final

In [None]:
user_collab = pd.read_csv('./team.csv')
assignee = pd.read_csv('./assignee.csv', sep=';')
user_collab = user_collab.set_index('issuekey').join(assignee.set_index('issuekey')).reset_index()
user_collab.columns = ['issuekey', 'dev', 'integrator', 'peer', 'tester', 'assignee']

def collaboration(candidate, assignee):
    roles = ['dev', 'peer', 'integrator', 'tester', 'assignee']
    collab_freq = dict()
    for role in candidate:
        collab_freq[role] = dict()
        for user in candidate[role]:
            if user!=assignee:
                user_filter = user_collab[user_collab.isin([user]).any(axis=1)]
            else:
                user_filter = user_collab[user_collab.drop(['assignee'], axis=1).isin([user]).any(axis=1)]
            count = user_filter[user_filter.isin([assignee]).any(axis=1)].shape[0]
            collab_freq[role][user] = count
        
    return collab_freq

def rerank(score, collab_freq, mean_collab):
    result = dict()
    for role in score:
        upper = []
        lower = []
        for user in score[role]:
            if collab_freq[role][user] >= mean_collab[role]:
                upper.append(user)
            else:
                lower.append(user)
        all_c = upper + lower
        result[role] = {all_c[i]: (i+1, score[role][all_c[i]][1]) for i in range(len(all_c))}
        
    return result    

In [None]:
def format_output(team,rank):
    
    # filter out all duplicate team
    hashteam = set()
    temp = []
    for t,s,f in team:
        t_sorted = {i:sorted(t[i]) if type(t[i]) == list else t[i] for i in t}
        ## hash team
        t_hash = hashlib.sha1(json.dumps(t_sorted, sort_keys=True).encode()).hexdigest()
        if t_hash not in hashteam:
            hashteam.add(t_hash)
            temp.append((t,s,f))
    team = temp
    team = team[:min(len(team),rank)]
    rank = []
    rankno=1
    team.sort(key=lambda tup: tup[1]) 
    for team,score,feature in team:
        rankdict = {'rank':rankno,'team':{'developer':[],'integrator':[],'tester':[],'reviewer':[],'assignee':[]},'cost':score,'features':feature}
        for r in team:
            if r =='assignee':
                rankdict['team']['assignee'].append(team[r])
            elif r.startswith('dev'):
                rankdict['team']['developer'].append(team[r])
            elif r.startswith('integrator'):
                rankdict['team']['integrator'].append(team[r])
            elif r.startswith('peer'):
                rankdict['team']['reviewer'].append(team[r])
            elif r.startswith('tester'):
                rankdict['team']['tester'].append(team[r])
        rank.append(rankdict)
        rankno=rankno+1
    return rank

## Maxlogit

In [None]:
def hash_team(team):
    t_sorted = {i:sorted(team[i]) if type(team[i]) == list else team[i] for i in team}
    ## hash team
    t_hash = hashlib.sha1(json.dumps(t_sorted, sort_keys=True).encode()).hexdigest()
    return t_hash

In [None]:
def probability(costT,costTp,alpha): #16
    try:
        vt = math.exp(-costT/alpha) #17
    except OverflowError:
        vt = math.inf if costT<0 else 0
    try:
        vtp = math.exp(-costTp/alpha) #18
    except OverflowError:
        vtp = math.inf if costTp<0 else 0
    if max(vt,vtp) !=0:
        prob = vtp/max(vt,vtp) #19
    else:
        prob = 1
    return prob #20

In [None]:
allteam_cache = {}
def maxlogit(N,roles_pp,alpha,cost,issuekey, score, context='',component=''):
    allteam_hash = set()
    allteam = []#tor add

    roles = [r for r in roles_pp] 
    roles.remove('assignee') # not random assignee
    
    T = {roles_pp[0]:random.choice(roles_pp[1]) for roles_pp in roles_pp.items()} #1
    bestT = T #1
    if issuekey!='':
        cb = cost(model, bestT, issuekey, score)
    else:
        cb = cost(model, bestT, issuekey, score)
        
    
    allteam.append((bestT,cb, None)) # tor add
    allteam_cache[hash_team(bestT)] = cb # tor add

    for i in range(1,N): #2
#     if N > 0:
        hashT = hash_team(T) #tor add
        if issuekey!='':
            c = cost(model, T, issuekey, score) if hashT not in allteam_cache else allteam_cache[hashT] #3 tor modified
        else:
            c = cost(model, T, issuekey, score) if hashT not in allteam_cache else allteam_cache[hashT] #3 tor modified
                
        Tp = T.copy() #4
        randomrole = random.choice(roles) #4 modified by tor (not change assignee)
        Tp[randomrole] = random.choice(roles_pp[randomrole]) #4
        
        # tor modified if team has been calculated before don't re calculate
        hashTP = hash_team(Tp)
                
        if issuekey!='':
            cp = cost(model, Tp, issuekey, score) if hashTP not in allteam_cache else allteam_cache[hashTP] #5 tor modified
        else:
            cp = cost(model, Tp, issuekey, score) if hashTP not in allteam_cache else allteam_cache[hashTP] #5 tor modified
           
        allteam.append((Tp,cp, None)) # tor add
        allteam_cache[hashTP] = cp #tor add
        
        prob = probability(c,cp,alpha) #6
        r = random.uniform(0, 1) #7
        if r <= prob:#8
            T = Tp #9
            c = cp #9
#             allteam.append((T,c)) # tor add
            if c < cb: #10
                bestT = T #11
                cb = c #11
#                 allteam.append((bestT,cb)) # tor add
#     return bestT,cb
    return allteam # tor modified

In [None]:
with open('team_test_data/TeamRec_Liu_hitnohit_moodle.json', 'rb') as jsonfile:
    data_test = json.load(jsonfile)

In [None]:
def recommend(name, user, score):
    
    team = maxlogit(1000, user, 0.05, cost, issuekey=name, score=score)
    team = format_output(team,100)
    return {'issue': name, 'r': team}

In [None]:
team_rank = []
reranking = True
for issue in tqdm(data_test):
    dev_n = len(issue['r'][0]['team']['developer'])
    integrator_n = len(issue['r'][0]['team']['integrator'])
    tester_n = len(issue['r'][0]['team']['tester'])
    peer_n = len(issue['r'][0]['team']['reviewer'])
    
    assignee = issue['r'][0]['team']['assignee'][0]
    
    roles = ['dev', 'tester', 'peer','integrator']
    user_active = dict()
    user_with_activeness = set(activeness_candidate(issue['issue'], period=90))
    project = issue['issue'].split('-')[0]
    for role in roles:
        user_active[role] = list(set(candidate[project][role]).intersection(user_with_activeness))
    
    role_count = {
        'dev': dev_n,
        'integrator': integrator_n,
        'tester': tester_n,
        'peer': peer_n,
    }
    
    score_new = score_pre[issue['issue']].copy()
    if reranking:
                          
        collab_freq = collaboration(score_new, assignee)
        mean_collab = dict()
        for role in collab_freq:
            mean_collab[role] = np.array(list(collab_freq[role].values())).mean()
        score_new = rerank(score_new, collab_freq, mean_collab)
    
    for role in role_count:
        if role_count[role]>1:
            for count in range(2, role_count[role]+1):
                user_active[role+str(count)] = user_active[role].copy()
        elif role_count[role]==0:
            del user_active[role]
            
    user_active['assignee'] = [assignee]
        
    team_recommend = recommend(issue['issue'], user_active, score_new)
                
    team_rank.append(team_recommend)

In [None]:
import numpy
def convert(o):
    if isinstance(o, numpy.int64): return int(o)  
    raise TypeError
    
with open('team_rec_sum_rerank_sub5.json', 'w') as outfile:
    json.dump(team_rank, outfile, default=convert)