In [1]:
import pandas as pd
import numpy as np

In [2]:
import random

task_name = [f'T{i}' for i in range(170)]
task_time = [np.random.choice([0.25,0.5,0.66,0.91]) for _ in range(170)]
task_type = [random.randint(1,5) for _ in range(170)]
task_cnt = [random.randint(1,500) for _ in range(170)]

In [3]:
task_df = pd.DataFrame([task_name,task_time,task_type,task_cnt])

In [4]:
task_df = task_df.T

In [5]:
task_df = task_df.rename(columns={i:x for i,x in enumerate(['task_name',
                                                            'task_time','task_type','task_cnt'])})

In [6]:
task_df['task_time'].sum()

100.5099999999998

In [7]:
task_df.head()

Unnamed: 0,task_name,task_time,task_type,task_cnt
0,T0,0.66,4,411
1,T1,0.91,4,495
2,T2,0.25,3,333
3,T3,0.25,1,63
4,T4,0.5,2,297


In [8]:
task_df.isna().sum()

task_name    0
task_time    0
task_type    0
task_cnt     0
dtype: int64

In [9]:
worker = [f'W{i}' for i in range(5)]
worker_time = [task_df['task_time'].sum()/5]*5
#worker_task_type = [np.random.choice([1,2,3],size=2).tolist() for _ in range(5)]
worker_task_type = [[1,2,3,4,5] for _ in range(5)]

In [10]:
worker_time

[20.10199999999996,
 20.10199999999996,
 20.10199999999996,
 20.10199999999996,
 20.10199999999996]

In [11]:
random.seed(1212)

mean = task_df['task_time'].sum()/5
for i in range(int(len(worker_time)/2)):
    sub_time = random.randint(1,5)
    worker_time[i*2] = mean - sub_time
    worker_time[i*2+1] = mean + sub_time
    

In [12]:
worker_time

[17.10199999999996,
 23.10199999999996,
 17.10199999999996,
 23.10199999999996,
 20.10199999999996]

In [13]:
sum(worker_time)

100.5099999999998

In [14]:
worker_df = pd.DataFrame(data = {'worker': worker, 'worker_time':worker_time,'worker_task_type':worker_task_type})

In [15]:
worker_df

Unnamed: 0,worker,worker_time,worker_task_type
0,W0,17.102,"[1, 2, 3, 4, 5]"
1,W1,23.102,"[1, 2, 3, 4, 5]"
2,W2,17.102,"[1, 2, 3, 4, 5]"
3,W3,23.102,"[1, 2, 3, 4, 5]"
4,W4,20.102,"[1, 2, 3, 4, 5]"


In [16]:
for i in range(worker_df.shape[0]):
    cur_rows = worker_df.iloc[i]
    cur_name, cur_time, cur_task = cur_rows.values
    pos_task = []
    for task_type in cur_task:
        pos_task.extend(task_df[task_df['task_type']==task_type]['task_name'].tolist())
    
#     print(list(set(pos_task)))/

In [17]:
def get_worker_pos_task():
    ret = {}
    for i in range(worker_df.shape[0]):
        cur_rows = worker_df.iloc[i]
        cur_name, cur_time, cur_task = cur_rows.values
        pos_task = []
        for task_type in cur_task:
            pos_task.extend(task_df[task_df['task_type']==task_type]['task_name'].tolist())
        ret[cur_name] = list(set(pos_task))
    return ret

In [18]:
avg_task = task_df['task_cnt'].sum()/worker_df['worker_time'].sum()
avg_task

420.45567605213495

In [19]:
task_df['task_cnt'].sum()

42260

In [26]:
from pulp import *
from random import choice, sample

### CREATING DATA

# Setting parameters

tasks = task_df['task_name'].tolist()

# Each task is labelled with a number 1 - 3 that indicates estimated time taken to complete
task_time = dict(zip(tasks, task_df['task_time'].tolist()))

task_cnt = dict(zip(tasks, task_df['task_cnt'].tolist()))

volunteers = worker_df['worker'].tolist()

volunteers_time = dict(zip(worker_df['worker'].tolist(),
                           worker_df['worker_time'].tolist()))

# Each volunteer is asked to choose 7 tasks to be assigned
# volunteer_choices = dict(zip(volunteers, [list(sample(tasks, k=7)) for i in range(num_volunteers)]))
volunteer_choices = get_worker_pos_task()

task_types = task_df['task_type'].unique().tolist()

# Each volunteer can choose to take between 1 - 3 tasks
# volunteer_max_tasks = dict(zip(volunteers, [choice([1, 2, 3]) for i in range(num_volunteers)]))
volunteer_max_tasks = dict(zip(worker_df['worker'].tolist(), worker_df['worker_time'].tolist()))



### DEFINING MODEL

# Define model
model = LpProblem(name = "resource-allocation", sense = LpMinimize)

# Define decision pair
pair = LpVariable.dicts("Pair", (volunteers, tasks), cat=LpBinary)  # no need for upper/lower bound for binary. :)

task_covered = LpVariable.dicts("Covered", tasks, cat=LpBinary)

# Set list of all possible pairs
pairs = [(v, t) for t in tasks for v in volunteers]

# One task can be assigned to one volunteer only
for t in tasks:
    model += lpSum(pair[v][t] for v in volunteers) == 1

# All volunteers must be assigned at least one task  <-- superfluous constraint.  Model is "trying" to do this
for v in volunteers:
    model += lpSum(pair[v][t] for t in tasks) >= 1
    #model += lpSum(pair[v][t] for t in tasks) <= 1



# Volunteers cannot be assigned too high a work load 
for v in volunteers:
    model += lpSum(pair[v][t] * task_time[t] for t in tasks) <= volunteer_max_tasks[v] + 0.2
    
# Volunteers cannot be assigned a task they didn't choose
for v in volunteers:
    for t in tasks:
        if not (t in volunteer_choices[v]):
            model += pair[v][t] == 0
    
# All tasks must get a volunteer (CAN I LOOSEN THIS?)  # This is where your infeasibility problem was
for t in tasks:
    model += (lpSum([pair[v][t] for v in volunteers]) >= 1)
    model += (lpSum([pair[v][t] for v in volunteers]) <= 1)
    
diff = LpVariable.dicts('diff', indexs=volunteers, cat='Real')

# constraint:  the "positive" difference side of the ABS
for v in volunteers:
    model += diff[v] >= \
               avg_task - lpSum(pair[v][t] * task_cnt[t] for t in tasks)/volunteers_time[v]

# constraint:  the "negative" diff...
for v in volunteers:
    model += diff[v] >= \
               lpSum(pair[v][t] * task_cnt[t] for t in tasks)/volunteers_time[v]-avg_task

# OBJ:  minimize the total diff (same as min avg diff)
model += lpSum(diff[v] for v in volunteers)

model.solve(PULP_CBC_CMD(msg=1, maxSeconds=20))

task_df['worker'] = np.nan
for v in sorted(volunteers):
    for task in sorted(pair[v]):
        if pair[v][task].varValue:
            task_df.loc[task_df['task_name']==task,'worker']=v
print(task_df.groupby('worker')['task_time'].sum())
print(task_df.groupby(['worker'])['task_cnt'].sum())
# print(task_df.groupby(['worker'])['region_name'].nunique().reset_index()['region_name'].sum())
            



worker
W0    17.08
W1    23.20
W2    16.90
W3    23.29
W4    20.04
Name: task_time, dtype: float64
worker
W0    7185
W1    9720
W2    7193
W3    9715
W4    8447
Name: task_cnt, dtype: int64


In [27]:
summ_df = task_df.groupby('worker').agg({
    'task_time': 'sum',
    'task_cnt':'sum'
}).reset_index()

In [28]:
summ_df['cnt_per_hr'] = summ_df['task_cnt']/summ_df['task_time']

In [29]:
summ_df

Unnamed: 0,worker,task_time,task_cnt,cnt_per_hr
0,W0,17.08,7185,420.667447
1,W1,23.2,9720,418.965517
2,W2,16.9,7193,425.621302
3,W3,23.29,9715,417.131816
4,W4,20.04,8447,421.506986
