In [1]:
## Test Team Formation framework with real datasets
## Balancing Task Coverage vs. Maximum Expert Load
## Karan Vombatkere, Spring 2022

#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json, time
import TeamFormationProblem as TFP

In [2]:
import gurobipy as gp
from gurobipy import GRB

#### Freelancer Dataset

In [3]:
#Import Freelancer data
#Freelancer from DropBox link: https://www.dropbox.com/sh/8zpsi1etvvvvj5k/AAD-J9ZQmSsbnSmEILBMD9uxa/datasets/real?dl=0&subfolder_nav_tracking=1
#freelance_experts.csv and freelance_projects.csv

def extract_skills(row):
    skills = []
    for i,val in enumerate(row):
        if val == 1:
            skills.append(str(i))
    return skills            

    
def importFreelancerData(experts_filename='datasets/freelancer/freelancer_experts.csv', tasks_filename='datasets/freelancer/freelancer_projects.csv'):
    #Extract tasks skills as list
    freelance_tasks_df = pd.read_csv(tasks_filename, header=None)
    print("Freelancer tasks df shape: ", freelance_tasks_df.shape)
    freelance_tasks_df['Task_Skills'] = freelance_tasks_df.apply(lambda row: extract_skills(row), axis=1)
    task_skills_list = freelance_tasks_df.Task_Skills.to_list()
    
    #Extract experts skills as list
    freelance_experts_df = pd.read_csv(experts_filename, header=None)
    print("Freelancer experts df shape: ", freelance_experts_df.shape)
    freelance_experts_df['Expert_Skills'] = freelance_experts_df.apply(lambda row: extract_skills(row), axis=1)
    expert_skills_list = freelance_experts_df.Expert_Skills.to_list()

    print("Imported Freelancer dataset. Num Experts={}, Num Tasks={}".format(len(expert_skills_list),len(task_skills_list)))

    return task_skills_list, expert_skills_list
    

In [4]:
t,e = importFreelancerData()

Freelancer tasks df shape:  (992, 175)
Freelancer experts df shape:  (1212, 175)
Imported Freelancer dataset. Num Experts=1212, Num Tasks=992


In [5]:
def createExpertTaskSkillMatrices(expert_skills_list, task_skills_list):
    '''
    Create (n_experts, n_skills) and (m_tasks, n_skills) matrices from skill and expert lists
    ARGS:
        expert_skills_list : List of lists of expert skill indices as stored in dataset txt files
        task_skills_list   : List of lists of task skill indices as stored in dataset txt files
    RETURN:
        experts_mat : (n_experts, n_skills) binary matrix
        tasks_mat   : (m_tasks, n_skills) binary matrix
        tasks_not_coverable : list of tasks that are not fully coverable
    '''
    #First check if all tasks are coverable and get set of all skills
    all_experts_skillset = set()

    for expert_i in expert_skills_list:
        for skill in expert_i:
            all_experts_skillset = all_experts_skillset.union({skill})

    s_skills = len(all_experts_skillset) #Get total number of skills

    #Create (n_experts, n_skills) matrix
    experts_mat = np.zeros((len(expert_skills_list), s_skills), dtype=np.int8)
    for expert_index, expert_i in enumerate(expert_skills_list):
        for skill in expert_i:
            skill_index = int(skill)
            experts_mat[expert_index][skill_index] = 1

    print("Generated expert-skill matrix, shape = {}".format(experts_mat.shape))
    
    #Create (m_tasks, n_skills) matrix
    tasks_mat = np.zeros((len(task_skills_list), s_skills), dtype=np.int8)

    tasks_not_coverable = []
    allTasksCoverable = True

    for task_index, task_i in enumerate(task_skills_list):
        for skill in task_i:
            skill_index = int(skill)
            tasks_mat[task_index][skill_index] = 1

            if skill not in all_experts_skillset:
                allTasksCoverable = False
                tasks_not_coverable.append(task_index)
    
    print("Generated task-skill matrix, shape = {}".format(tasks_mat.shape))

    if not allTasksCoverable:
        print("{} Tasks not fully coverable: {}".format(len(tasks_not_coverable), tasks_not_coverable))
    
    return experts_mat, tasks_mat, tasks_not_coverable

In [24]:
def convertLPSolutionToMatrix(lp_model, n, m):
    '''
    Convert the lp_model output to a (n_experts x m_tasks) matrix
    Entries in n x m matrix represent probabilities of assigning expert i to task j
    ARGS:
        lp_model: Gurobi solved LP model
        n       : number of experts
        m       : number of tasks
    RETURN:
        LP_soln_matrix: (n_experts x m_tasks) matrix with LP solution X_ji values as per Power in Unity paper
    '''
    v = lp_model.getVars()
    count = 0

    LP_soln_matrix = np.zeros((n, m), dtype=np.float32)
    
    for i in range(n):
        for j in range(m):
            LP_soln_matrix[i][j] = v[count].x
            count += 1

    return LP_soln_matrix


def solve_LP(expertMatrix, taskMatrix):
    '''
    Given (n_experts, n_skills) and (m_tasks, n_skills) matrices, solve the relaxed ILP and return 
    a (n_experts x m_tasks) matrix with LP solution
    ARGS:
        expertMatrix : (n_experts, n_skills) binary matrix of expert skills
        taskMatrix   : (m_tasks, n_skills) binary matrix of task skills
    RETURN:
        LP_solution_matrix: (n_experts x m_tasks) matrix with LP solution X_ji values as per Power in Unity paper
    '''
    #Create empty assignment matrix of shape (n_experts x m_tasks)
    X =  np.zeros((len(expertMatrix), len(taskMatrix)), dtype=np.int8)

    #Create Gurobi LP Model
    m = gp.Model("TaskCoverageLP")

    #Add variables
    x = m.addVars(len(X), len(X[1]), vtype='S', ub=1.0, name="x")

    #Set objective function
    L = m.addVar(vtype='S', name = 'Load')
    obj = 1*L
    m.setObjective(obj, GRB.MINIMIZE)

    #Add constraints
    # c1 - Load of each expert is upper bounded by L
    c1 = m.addConstrs(x.sum(i,'*') <= L for i in range(len(X)))

    # c2 - Each task is (fully) covered
    experts_transpose = np.transpose(expertMatrix)
    c2 = m.addConstrs(gp.quicksum(experts_transpose[j][l]*x[l,i] for l in range(len(expertMatrix))) >= taskMatrix[i][j] 
                                        for i in range(len(taskMatrix)) for j in range(len(taskMatrix[0])) if taskMatrix[i][j] > 0)
        
    # Silence model output
    # m.setParam('OutputFlag', 0)

    #Solve LP model
    m.optimize()

    LP_solution_matrix = convertLPSolutionToMatrix(m, len(expertMatrix), len(taskMatrix))

    return LP_solution_matrix

In [25]:
expertMatrix, taskMatrix, tnc = createExpertTaskSkillMatrices(t,e)
LP_sol = solve_LP(expertMatrix, taskMatrix)

Generated expert-skill matrix, shape = (992, 175)
Generated task-skill matrix, shape = (1212, 175)
Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (mac64[rosetta2])


2022-09-30 17:16:04,786 |INFO: Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (mac64[rosetta2])


Thread count: 8 physical cores, 8 logical processors, using up to 8 threads


2022-09-30 17:16:04,787 |INFO: Thread count: 8 physical cores, 8 logical processors, using up to 8 threads


Optimize a model with 2758 rows, 1202305 columns and 1348690 nonzeros


2022-09-30 17:16:04,787 |INFO: Optimize a model with 2758 rows, 1202305 columns and 1348690 nonzeros


Model fingerprint: 0xc9620a3c


2022-09-30 17:16:04,798 |INFO: Model fingerprint: 0xc9620a3c


Variable types: 0 continuous, 0 integer (0 binary)


2022-09-30 17:16:04,801 |INFO: Variable types: 0 continuous, 0 integer (0 binary)


Semi-Variable types: 1202305 continuous, 0 integer


2022-09-30 17:16:04,802 |INFO: Semi-Variable types: 1202305 continuous, 0 integer


Coefficient statistics:


2022-09-30 17:16:04,808 |INFO: Coefficient statistics:


  Matrix range     [1e+00, 1e+00]


2022-09-30 17:16:04,809 |INFO:   Matrix range     [1e+00, 1e+00]


  Objective range  [1e+00, 1e+00]


2022-09-30 17:16:04,809 |INFO:   Objective range  [1e+00, 1e+00]


  Bounds range     [1e+00, 1e+00]


2022-09-30 17:16:04,810 |INFO:   Bounds range     [1e+00, 1e+00]


  RHS range        [1e+00, 1e+00]


2022-09-30 17:16:04,811 |INFO:   RHS range        [1e+00, 1e+00]


Presolve removed 2300 rows and 1194064 columns


2022-09-30 17:16:06,253 |INFO: Presolve removed 2300 rows and 1194064 columns


Presolve time: 1.39s


2022-09-30 17:16:06,253 |INFO: Presolve time: 1.39s


Presolved: 458 rows, 8241 columns, 18111 nonzeros


2022-09-30 17:16:06,257 |INFO: Presolved: 458 rows, 8241 columns, 18111 nonzeros


Variable types: 8241 continuous, 0 integer (0 binary)


2022-09-30 17:16:06,259 |INFO: Variable types: 8241 continuous, 0 integer (0 binary)





2022-09-30 17:16:06,263 |INFO: 


Root relaxation: objective 6.000000e+00, 175 iterations, 0.00 seconds (0.00 work units)


2022-09-30 17:16:06,263 |INFO: Root relaxation: objective 6.000000e+00, 175 iterations, 0.00 seconds (0.00 work units)





2022-09-30 17:16:06,518 |INFO: 


    Nodes    |    Current Node    |     Objective Bounds      |     Work


2022-09-30 17:16:06,519 |INFO:     Nodes    |    Current Node    |     Objective Bounds      |     Work


 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time


2022-09-30 17:16:06,519 |INFO:  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time





2022-09-30 17:16:06,520 |INFO: 


*    0     0               0       6.0000000    6.00000  0.00%     -    1s


2022-09-30 17:16:06,521 |INFO: *    0     0               0       6.0000000    6.00000  0.00%     -    1s





2022-09-30 17:16:06,804 |INFO: 


Explored 1 nodes (175 simplex iterations) in 2.02 seconds (2.81 work units)


2022-09-30 17:16:06,805 |INFO: Explored 1 nodes (175 simplex iterations) in 2.02 seconds (2.81 work units)


Thread count was 8 (of 8 available processors)


2022-09-30 17:16:06,805 |INFO: Thread count was 8 (of 8 available processors)





2022-09-30 17:16:06,806 |INFO: 


Solution count 1: 6 


2022-09-30 17:16:06,806 |INFO: Solution count 1: 6 





2022-09-30 17:16:06,807 |INFO: 


Optimal solution found (tolerance 1.00e-04)


2022-09-30 17:16:06,808 |INFO: Optimal solution found (tolerance 1.00e-04)


Best objective 6.000000000000e+00, best bound 6.000000000000e+00, gap 0.0000%


2022-09-30 17:16:06,816 |INFO: Best objective 6.000000000000e+00, best bound 6.000000000000e+00, gap 0.0000%


In [None]:
def setCoverLP():
    '''
    Adapted LP algorithm for the non-online setting of the Load minimization problem by Anagnostopoulos et al.
    ARGS:
        LP_solution : (n_experts x m_tasks) matrix with LP solution X_ji values 
    RETURN:
        task_assignment
    '''
    

In [None]:
print('Cost is (objective value): %g' % obj.getValue())


In [None]:
numVars = 0
for v in m.getVars():
    if v.X > 0:
        numVars += 1
        print('%s %g' % (v.VarName, v.X))

In [None]:
def getLPSolnMatrix(lp_model, n, m):
    v = lp_model.getVars()
    count = 0

    LP_soln_matrix = np.zeros((n, m), dtype=np.int8)
    
    for i in range(n):
        for j in range(m):
            LP_soln_matrix[i][j] = v[count].x
            count += 1

    return LP_soln_matrix

In [None]:
t,e = importFreelancerData()
FreelancerTest = TFP.TeamFormationProblem(t, e)

In [None]:
runtimeDict, F_vals, workLoad_vals = FreelancerTest.computeTaskAssigment(algorithms=['lazy_greedy', 'random', 'no_update_greedy','task_greedy'], lambdaVal=0.1)

In [None]:
# freelancerCovList = FreelancerTest.getCoverageValues()

#### Guru Dataset

In [None]:
#Guru Dataset
def extract_skills_guru(row):
    skills = []
    for i,val in enumerate(row):
        if val == 1:
            skills.append(str(i))
    return skills 

def importGuruData(experts_filename='datasets/guru/guru_experts.csv', tasks_filename='datasets/guru/guru_tasks.csv'):
    #Extract tasks skills as list
    guru_tasks_df = pd.read_csv(tasks_filename, header=None)
    print("Guru tasks df shape: ", guru_tasks_df.shape)
    guru_tasks_df['Task_Skills'] = guru_tasks_df.apply(lambda row: extract_skills_guru(row), axis=1)
    task_skills_list = guru_tasks_df.Task_Skills.to_list()
    task_skills_list = task_skills_list[0:-1]
    
    #Extract experts skills as list
    guru_experts_df = pd.read_csv(experts_filename, header=None)
    print("Guru experts df shape: ", guru_experts_df.shape)
    guru_experts_df['Expert_Skills'] = guru_experts_df.apply(lambda row: extract_skills_guru(row), axis=1)
    expert_skills_list = guru_experts_df.Expert_Skills.to_list()
    expert_skills_list = expert_skills_list[0:-1]

    print("Imported Guru dataset. Num Experts={}, Num Tasks={}".format(len(expert_skills_list),len(task_skills_list)))

    return task_skills_list, expert_skills_list
    

In [None]:
t,e = importGuruData()
GuruTest = TFP.TeamFormationProblem(t, e)

In [None]:
runtimeDict, F_vals, workLoad_vals = GuruTest.computeTaskAssigment(algorithms=['no_update_greedy'], lambdaVal=0.1)
# runtimeDict, F_vals, workLoad_vals = GuruTest.computeTaskAssigment(algorithms=['lazy_greedy'], lambdaVal=0.1)

#### IMDB Datasets

In [None]:
#Import IMDB Data
def importIMDBData(experts_filename, tasks_filename):
    with open(experts_filename, 'r') as f:
        expert_skills_list = json.loads(f.read())
    
    with open(tasks_filename, 'r') as f:
        task_skills_list = json.loads(f.read())

    print("Imported IMDB dataset. Num Experts={}, Num Tasks={}".format(len(expert_skills_list),len(task_skills_list)))

    return task_skills_list, expert_skills_list, 

#Run algorithm on IMDB datasets
def testIMDBDatasets(write_flag, algoList):
    imdb_data_path = 'datasets/imdb/'
    movieYears = [2015, 2018, 2020]

    if write_flag:
        runTimeStamp = str(time.strftime("%m-%d-%H:%M:%S", time.localtime(time.time())))
        imdb_outfilename = "experiments/imdb_" + runTimeStamp + ".txt"
        outfile_imdb = open(imdb_outfilename, "a")
        outfile_imdb.write("IMDB dataset Team-Formation Algorithms: {}\n".format(runTimeStamp))

    for y in movieYears:
        experts_file = imdb_data_path + 'imdb_experts_' + str(y) + '.txt'
        tasks_file = imdb_data_path + 'imdb_tasks_' + str(y) + '.txt'
        print("IMDB Dataset: {}, {}".format('imdb_experts_' + str(y), 'imdb_tasks_' + str(y)))

        imdb_tasks, imdb_experts = importIMDBData(experts_file, tasks_file)
        IMDBTest = TFP.TeamFormationProblem(imdb_tasks[0:600], imdb_experts[0:100])

        rt_dict, f_dict, workload_dict, coverageList = IMDBTest.computeTaskAssigment(algorithms=algoList, plot_flag=False)
        coverageListString = ""
        for c_i in coverageList:
            c_i_str = ", "+str(np.round(c_i, 2))
            coverageListString += c_i_str
        print(coverageListString)
        #Write output to file
        if write_flag:
            runInfo = "\nIMDB movieYear = {}, Experts = {}, Tasks = {}".format(str(y), str(IMDBTest.n), str(IMDBTest.m))
            outfile_imdb.write(runInfo)

            f_info = "\nAlgorithm Objectives (F_max): Lazy Greedy = {}; No-Update-Greedy = {}; Task Greedy = {}; Random = {};\
                ".format(f_dict['lazyGreedy'], f_dict['noUpdateGreedy'], f_dict['taskGreedy'], f_dict['random'])
            outfile_imdb.write(f_info)   

            wload_info = "\nAlgorithm optimal workloads: Lazy Greedy = {}; No-Update-Greedy = {}; Task Greedy = {}; Random = {};\
                ".format(workload_dict['lazyGreedy'], workload_dict['noUpdateGreedy'], workload_dict['taskGreedy'], workload_dict['random'])
            outfile_imdb.write(wload_info)   

            runtimeInfo = "\nAlgorithm Runtimes: Total = {:.3f}s; Lazy Greedy = {:.3f}s; No-Update-Greedy = {:.3f}s; Task Greedy = {:.3f}s; Random = {:.3f}s;\
                \n".format(rt_dict['total'], rt_dict['lazyGreedy'], rt_dict['noUpdateGreedy'], rt_dict['taskGreedy'], rt_dict['random'])
            outfile_imdb.write(runtimeInfo)

            outfile_imdb.write("\nCoverage List: {}".format(coverageListString))

    
    if write_flag:
        outfile_imdb.close()

    return None
    

In [None]:
# testIMDBDatasets(write_flag=True, algoList=['lazy_greedy', 'random', 'no_update_greedy', 'task_greedy'])

In [None]:
#Get coverage lists
imdb_data_path = 'datasets/imdb/'
y = 2018
experts_file = imdb_data_path + 'imdb_experts_' + str(y) + '.txt'
tasks_file = imdb_data_path + 'imdb_tasks_' + str(y) + '.txt'
print("IMDB Dataset: {}, {}".format('imdb_experts_' + str(y), 'imdb_tasks_' + str(y)))

imdb_tasks, imdb_experts = importIMDBData(experts_file, tasks_file)
IMDBTest = TFP.TeamFormationProblem(imdb_tasks, imdb_experts, max_workload_threshold=100)


In [None]:
runtimeDict, F_vals, workLoad_vals = IMDBTest.computeTaskAssigment(algorithms=['random', 'no_update_greedy', 'task_greedy'], lambdaVal=0.05)
#runtimeDict, F_vals, workLoad_vals = IMDBTest.computeTaskAssigment(algorithms=['no_update_greedy'], lambdaVal=0.1)

In [None]:
#covDict = IMDBCoverages.getStepCoverageValues()

In [None]:
imdb_data_path = 'datasets/imdb/'
y = 2015
experts_file = imdb_data_path + 'imdb_experts_' + str(y) + '.txt'
tasks_file = imdb_data_path + 'imdb_tasks_' + str(y) + '.txt'
print("IMDB Dataset: {}, {}".format('imdb_experts_' + str(y), 'imdb_tasks_' + str(y)))

imdb_tasks, imdb_experts = importIMDBData(experts_file, tasks_file)
IMDBLambdaTest = TFP.TeamFormationProblem(imdb_tasks[:1000], imdb_experts[300:600], max_workload_threshold=100)

t_arr, f_dict, t_maxArr, f_maxArr = IMDBLambdaTest.testLambdaTaskAssignment(algorithms=['lazy_greedy'])


In [None]:
#Plot F_i for different Lambda for Lazy Greedy
plt.figure(figsize=(9,6))
#for l_val in f_dict.keys():
    #plt.plot(t_arr, f_dict[l_val], label='Lambda={:.3f}'.format(l_val))

# Plot the max values
plt.plot(t_maxArr, f_maxArr, '--*', label='Max F_i')

# title_text = 'Lazy Greedy Performance by varying Lambda (IMDB_2015)'
# plt.title(title_text, fontsize=12)
plt.xlabel('Workload Threshold, T_i', fontsize=12)
plt.ylabel('Coverage, C(A)', fontsize=12)


lambda_arr = [0.3, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100]
i=0
# zip joins x and y coordinates in pairs
for x,y in zip(t_maxArr,f_maxArr):
    label = "{:.1f}".format(lambda_arr[i])
    i += 1
    plt.annotate(label, # this is the text
                 (x,y), # these are the coordinates to position the label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 ha='center') # horizontal alignment can be left, right or center

plt.legend(loc='lower right')
plt.show()

#### Bibsonomy Datasets

In [None]:
#Import Bibsonomy datasets
def importBibsonomyData(experts_filename, tasks_filename):
    with open(experts_filename, 'r') as f:
        expert_skills_list = json.loads(f.read())
    
    with open(tasks_filename, 'r') as f:
        task_skills_list = json.loads(f.read())

    print("Imported Bibsonomy dataset. Num Experts={}, Num Tasks={}".format(len(expert_skills_list),len(task_skills_list)))

    return task_skills_list, expert_skills_list

#Run algorithm on Bibsonomy datasets
def testBibsonomyDatasets(write_flag, algoList):
    bibsonomy_data_path = 'datasets/bibsonomy/'
    movieYears = [2010, 2015, 2020]

    if write_flag:
        runTimeStamp = str(time.strftime("%m-%d-%H:%M:%S", time.localtime(time.time())))
        bibs_outfilename = "experiments/bibsonomy_" + runTimeStamp + ".txt"
        outfile_bibsonomy = open(bibs_outfilename, "a")
        outfile_bibsonomy.write("Bibsonomy dataset Team-Formation Algorithms: {}\n".format(runTimeStamp))

    for y in movieYears:
        experts_file = bibsonomy_data_path + 'bibsonomy_experts_' + str(y) + '.txt'
        tasks_file = bibsonomy_data_path + 'bibsonomy_tasks_' + str(y) + '.txt'
        print("\nBibsonomy Dataset: {}, {}".format('bibsonomy_experts_' + str(y), 'bibsonomy_tasks_' + str(y)))

        bib_tasks, bib_experts = importBibsonomyData(experts_file, tasks_file)
        BibsonomyTest = TFP.TeamFormationProblem(bib_tasks[0:500], bib_experts[0:200])

        rt_dict, f_dict, workload_dict = BibsonomyTest.computeTaskAssigment(algorithms=algoList, plot_flag=False)

        #Write output to file
        if write_flag:
            runInfo = "\nBibsonomy paperYear = {}, Experts = {}, Tasks = {}".format(str(y), str(BibsonomyTest.n), str(BibsonomyTest.m))
            outfile_bibsonomy.write(runInfo)

            f_info = "\nAlgorithm Objectives (F_max): Lazy Greedy = {}; No-Update-Greedy = {}; Task Greedy = {}; Random = {};\
                ".format(f_dict['lazyGreedy'], f_dict['noUpdateGreedy'], f_dict['taskGreedy'], f_dict['random'])
            outfile_bibsonomy.write(f_info)   

            wload_info = "\nAlgorithm optimal workloads: Lazy Greedy = {}; No-Update-Greedy = {}; Task Greedy = {}; Random = {};\
                ".format(workload_dict['lazyGreedy'], workload_dict['noUpdateGreedy'], workload_dict['taskGreedy'], workload_dict['random'])
            outfile_bibsonomy.write(wload_info)   

            runtimeInfo = "\nAlgorithm Runtimes: Total = {:.3f}s; Lazy Greedy = {:.3f}s; No-Update-Greedy = {:.3f}s; Task Greedy = {:.3f}s; Random = {:.3f}s;\
                \n".format(rt_dict['total'], rt_dict['lazyGreedy'], rt_dict['noUpdateGreedy'], rt_dict['taskGreedy'], rt_dict['random'])
            outfile_bibsonomy.write(runtimeInfo)
    
    if write_flag:
        outfile_bibsonomy.close()
    
    return None


In [None]:
#testBibsonomyDatasets(write_flag=True, algoList=['lazy_greedy', 'random', 'no_update_greedy', 'task_greedy'])

In [None]:
bibsonomy_data_path = 'datasets/bibsonomy/'
y=2015
experts_file = bibsonomy_data_path + 'bibsonomy_experts_' + str(y) + '.txt'
tasks_file = bibsonomy_data_path + 'bibsonomy_tasks_' + str(y) + '.txt'
print("\nBibsonomy Dataset: {}, {}".format('bibsonomy_experts_' + str(y), 'bibsonomy_tasks_' + str(y)))

bib_tasks, bib_experts = importBibsonomyData(experts_file, tasks_file)
BibsonomyTest = TFP.TeamFormationProblem(bib_tasks, bib_experts)

In [None]:
runtimeDict, F_vals, workLoad_vals = BibsonomyTest.computeTaskAssigment(algorithms=['random', 'no_update_greedy', 'task_greedy'], lambdaVal=0.05)

In [None]:
#Plot F_i for different Lambda for Lazy Greedy
# plt.figure(figsize=(9,6))
# for l_val in Fi_dict.keys():
#     plt.plot(T_arr, Fi_dict[l_val], label='Lambda={:.3f}'.format(l_val))

# # Plot the max values
# plt.plot(TMaxArr, FMaxArr, '--*', label='Max F_i')

# title_text = 'Lazy Greedy Performance by varying Lambda (Bibsonomy_2015)'
# plt.title(title_text, fontsize=12)
# plt.xlabel('Workload Threshold, T_i')
# plt.ylabel('F_i')
# plt.legend(loc='upper right')
# plt.show()

In [None]:
# max_threshold_arr = [5,10,40,80,100,150,200]
# rev_rt_arr, reg_rt_arr = [],[]
# for thresh in max_threshold_arr:
#     FreelancerTest = TFP.TeamFormationProblem(t[0:200], e[0:200], max_workload_threshold=thresh)
#     rev_rt, reg_rt = FreelancerTest.compare_Methods()
#     rev_rt_arr.append(rev_rt)
#     reg_rt_arr.append(reg_rt)

#Plot Runtimes
# plt.figure(figsize=(9,6))
# plt.plot(max_threshold_arr, rev_rt_arr, label='Reverse Threshold Runtime')
# plt.plot(max_threshold_arr, reg_rt_arr, label='Regular Lazy Runtime')

# title_text = 'Reverse Threshold vs. Regular Lazy runtimes'
# plt.title(title_text, fontsize=11)
# plt.xlabel('Max Threshold, T_i')
# plt.ylabel('Runtime, s')
# plt.legend(loc='lower right')
# plt.show()

#FreelancerTest.compute_reverseThreshold()
#FreelancerTest.compareTest_Lazy_Stochastic_Assignments()