In [None]:
import os
from tqdm import tqdm
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
import json
from ampligraph.latent_features import ConvKB
from ampligraph.utils import save_model, restore_model
import pandas as pd
import numpy as np

In [None]:
with open('actual_team.json', 'rb') as jsonfile:
    data = json.load(jsonfile)
    data = pd.DataFrame(data)

In [None]:
with open('team_test_data/TeamRec_Liu_hitnohit_moodle.json', 'rb') as jsonfile:
    data_test = json.load(jsonfile)
    data_test_df = pd.DataFrame(data_test)

In [None]:
with open('dev_rerank_search_result.json', 'rb') as jsonfile:
    t = json.load(jsonfile)

In [None]:
issue_test = data[data['issue'].isin(data_test_df['issue'])].values.tolist()

In [None]:
triples_test = []
for d in issue_test:
    for role in d[1][0]['team']:
        if role!='assignee':
            for user in d[1][0]['team'][role]:
                triples_test.append((d[0], role, user))

In [None]:
triples_test = pd.DataFrame(triples_test, columns=['u', 'edge', 'v'])
triples_test = triples_test.replace({'reviewer': 'peer', 'developer': 'dev'})

In [None]:
triples = pd.read_csv('triple/subtriples05-5.csv')
triples = triples.dropna()

In [None]:
triples_test.head()

In [None]:
triples.head()

In [None]:
triples.shape

In [None]:
index1 = pd.MultiIndex.from_arrays([triples[col] for col in ['u', 'edge', 'v']])
index2 = pd.MultiIndex.from_arrays([triples_test[col] for col in ['u', 'edge', 'v']])
X_train = triples.loc[~index1.isin(index2)].to_numpy()

In [None]:
X_train.shape

## Candidates

In [None]:
user_work = pd.read_csv('user_work_triples.csv')

In [None]:
user_work['project'] = user_work['u'].apply(lambda x: x.split('-')[0])

In [None]:
# all candidates
candidate = dict()
candidate['dev'] = user_work[user_work['edge']=='dev']['v'].unique()
candidate['integrator'] = user_work[user_work['edge']=='integrator']['v'].unique()
candidate['peer'] = user_work[user_work['edge']=='peer']['v'].unique()
candidate['tester'] = user_work[user_work['edge']=='tester']['v'].unique()

In [None]:
# activeness candidates
# run all candidates first
change_log = pd.read_csv('changelog.csv', engine='python')
change_log['timecreated'] = pd.to_datetime(change_log['timecreated'])
issue_information = pd.read_csv('issueinformation.csv', sep=';')
issue_information['createdate'] = pd.to_datetime(issue_information['createdate'])

In [None]:
def activeness_candidate(issue, period=90):
    issue_create_date = issue_information[issue_information['issuekey']==issue]['createdate'].values[0]
    date_diff = issue_create_date - pd.to_timedelta(period, unit='D')
    candidate_active = change_log[(change_log['timecreated']>=date_diff) & (change_log['timecreated']<issue_create_date)]
    return candidate_active.drop_duplicates(subset="username")['username'].values.tolist()

In [None]:
# project candidates
candidate = {project: dict() for project in user_work['project'].unique()}
for project in candidate:
    for role in user_work['edge'].unique():
        if role!='assignee':
            candidate[project][role] = list(user_work[(user_work['project']==project) & (user_work['edge']==role)]['v'].unique())

## Model

In [None]:
model = ConvKB(batches_count=100, seed=43, epochs=500, k=200, verbose=True)

In [None]:
model.fit(X_train)

In [None]:
save_model(model, model_name_path = './model/convKB/team_rec_sub5.model')

In [None]:
model = restore_model('./model/convKB/team_rec.model')

In [None]:
# for activeness candidates + project candidates
test_role = 'dev'
test_role_full = 'developer'
search_result = data_test.copy()
# finding issue's candidates
user_active = dict()
for search in tqdm(search_result):
    project = search['issue'].split('-')[0]
    user_with_activeness = set(activeness_candidate(search['issue'], period=90))
    user_active[search['issue']] = list(set(candidate[project][test_role]).intersection(user_with_activeness))
# recommend
for i in tqdm(range(len(search_result))):
    result = model.predict([(search_result[i]['issue'], test_role, user) for user in user_active[search_result[i]['issue']]])
    try:
        score = sorted(zip(user_active[search_result[i]['issue']], result), reverse=True, key=lambda x: x[1])
    except:
        score = sorted(zip(user_active[search_result[i]['issue']], [result]), reverse=True, key=lambda x: x[1])
    for j in range(len(search_result[i]['r'])):
        if len(search_result[i]['r'][j]['team'][test_role_full])>1 and score[j][0] in search_result[i]['r'][j]['team'][test_role_full][1:]:
            score.pop(j)
        # no more candidate
        try:
            search_result[i]['r'][j]['team'][test_role_full][0] = score[j][0]
        except IndexError:
            search_result[i]['r'][j]['team'][test_role_full][0] = 'no_user'

In [None]:
with open('dev_test.json', 'w') as outfile:
    json.dump(search_result, outfile)