In [94]:
import pandas as pd
import random

# Load the data from the provided Excel file
moderator_data = pd.read_excel("./EDA/Datasets/moderator-data-cleaned.xlsx")

# Filter out moderators with a handling time of 0
moderator_data = moderator_data[moderator_data['handling time'] >= 1000]

In [95]:
# Normalize accuracy and productivity
moderator_data['normalized_accuracy'] = (moderator_data['accuracy'] - moderator_data['accuracy'].min()) / \
                                        (moderator_data['accuracy'].max() - moderator_data['accuracy'].min())

moderator_data['normalized_productivity'] = (moderator_data['Productivity'] - moderator_data['Productivity'].min()) / \
                                           (moderator_data['Productivity'].max() - moderator_data['Productivity'].min())

# Compute moderator score as the average of normalized accuracy and normalized productivity
moderator_data['moderator_score'] = (moderator_data['normalized_accuracy'] + moderator_data['normalized_productivity']) / 2

# Display the first few rows with the new columns
moderator_data[['moderator', 'normalized_accuracy', 'normalized_productivity', 'moderator_score']].head()

Unnamed: 0,moderator,normalized_accuracy,normalized_productivity,moderator_score
0,1704427801912322,0.906667,0.20271,0.554688
1,1712377365906433,0.828,0.329741,0.57887
2,1705699742139394,0.826667,0.272345,0.549506
3,1759969798094866,0.708,0.29833,0.503165
5,1695096148334594,0.884,0.35298,0.61849


In [96]:
# Assumed paid hours per day for TikTok moderators
PAID_HOURS_PER_DAY = 8

# Calculate the maximum number of tasks each moderator can handle in a day based on a 10% increase in utilization
moderator_data['max_tasks_per_day'] = (0.1 * PAID_HOURS_PER_DAY * 60 * 60 * 1000) / moderator_data['handling time']

# Display the first few rows with the updated max_tasks_per_day
moderator_data[['moderator', 'handling time', 'Utilisation %', 'max_tasks_per_day']].head()

Unnamed: 0,moderator,handling time,Utilisation %,max_tasks_per_day
0,1704427801912322,119688,1.28725,24.062563
1,1712377365906433,102324,1.157927,28.14589
2,1705699742139394,76773,1.150042,37.513188
3,1759969798094866,100732,1.146969,28.590716
5,1695096148334594,76199,1.115514,37.795772


In [97]:
# Extract unique markets from the moderator data
all_markets = set()
for market_list in moderator_data['market']:
    markets = eval(market_list)
    all_markets.update(markets)

# Generate 5000 sample ads with random queue_market attributes and random ad scores
sample_ads_50 = []
for _ in range(5000):
    ad = {
        'ad_id': f"ad_{_ + 1}",
        'queue_market': random.choice(list(all_markets)),
        'ad_score': random.random()  # Random score between 0 and 1
    }
    sample_ads_50.append(ad)

# Reset the allocated_tasks dictionary for reallocation using the filtered moderators and new max tasks per day
allocated_tasks_filtered = {moderator: 0 for moderator in moderator_data['moderator']}

# Modified Allocation function to prioritize moderators with lower utilization %
def allocate_ad_v4(ad, filtered_moderator_data, allocated_tasks):
    # Filter moderators based on ad's queue_market
    valid_moderators = filtered_moderator_data[filtered_moderator_data['market'].apply(lambda x: ad['queue_market'] in eval(x))].copy()
    
    # Sort these moderators based on utilization % (ascending) and then by the difference between ad score and moderator score
    valid_moderators['score_diff'] = abs(valid_moderators['moderator_score'] - ad['ad_score'])
    valid_moderators = valid_moderators.sort_values(by=['Utilisation %', 'score_diff'])
    
    for _, moderator_row in valid_moderators.iterrows():
        # Check if the moderator can handle more tasks
        if allocated_tasks[moderator_row['moderator']] < moderator_row['max_tasks_per_day']:
            allocated_tasks[moderator_row['moderator']] += 1  # Allocate the task
            return moderator_row['moderator'], moderator_row['moderator_score']  # Return the allocated moderator's ID and score
    
    return None, None  # If no moderator is available

# Re-allocate the 5000 sample ads using the updated function and filtered moderators
allocations_50_prioritized = {}
for ad in sample_ads_50:
    allocated_moderator, moderator_score = allocate_ad_v4(ad, moderator_data, allocated_tasks_filtered)
    allocations_50_prioritized[ad['ad_id']] = {
        'moderator': allocated_moderator,
        'ad_score': ad['ad_score'],
        'moderator_score': moderator_score
    }

# First few allocations for inspection after prioritizing
list(allocations_50_prioritized.items())[:5]


[('ad_1',
  {'moderator': 1782229,
   'ad_score': 0.5931036603821502,
   'moderator_score': 0.5467781354882114}),
 ('ad_2',
  {'moderator': 1759086854982705,
   'ad_score': 0.45303492002601153,
   'moderator_score': 0.4214142147453562}),
 ('ad_3',
  {'moderator': 1295012,
   'ad_score': 0.45109964185506923,
   'moderator_score': 0.6033293524849894}),
 ('ad_4',
  {'moderator': 3932913,
   'ad_score': 0.865798771785148,
   'moderator_score': 0.36487784167575726}),
 ('ad_5',
  {'moderator': 1694644355452929,
   'ad_score': 0.09897103276582642,
   'moderator_score': 0.5782365931438693})]

In [98]:
# Convert allocated tasks from dictionary to Series
allocated_tasks_series = pd.Series(allocated_tasks_filtered)

# Calculate the increase in utilization % for each moderator
moderator_data['allocated_tasks'] = moderator_data['moderator'].map(allocated_tasks_series)
moderator_data['increase_in_utilisation'] = (moderator_data['allocated_tasks'] * moderator_data['handling time']) / (PAID_HOURS_PER_DAY * 60 * 60 * 1000)   # Convert handling time to percentage of a day
moderator_data['new_utilisation'] = moderator_data['Utilisation %'] + moderator_data['increase_in_utilisation']

# Extract relevant columns for display
utilisation_changes_corrected = moderator_data[['moderator', 'allocated_tasks', 'increase_in_utilisation', 'new_utilisation']]

# Only display moderators with non-zero allocations for brevity
utilisation_changes_corrected = utilisation_changes_corrected[utilisation_changes_corrected['allocated_tasks'] > 0]

utilisation_changes_corrected

Unnamed: 0,moderator,allocated_tasks,increase_in_utilisation,new_utilisation
267,3215286,40,0.006137,0.868429
298,1700711878316033,30,0.100942,0.956051
305,8716810,33,0.102325,0.954929
361,1672026730048518,1,0.003003,0.845092
371,8881701,34,0.101161,0.940364
...,...,...,...,...
1276,1694644355452929,39,0.101036,0.113067
1277,1684394145691650,61,0.065933,0.074475
1279,1691911660815362,77,0.101036,0.101036
1281,5827188,121,0.013037,0.013037
