In [2]:
import numpy as np
import pandas as pd
import random
from os import path as osp


In [None]:
'''Se precisa coordinar el doblaje de una película. Los actores del doblaje deben coincidir en las
tomas en las que sus personajes aparecen juntos en las diferentes tomas. Los actores de
doblaje cobran todos la misma cantidad por cada día que deben desplazarse hasta el estudio de
grabación independientemente del número de tomas que se graben. No es posible grabar más
de 6 tomas por día. El objetivo es planificar las sesiones por día de manera que el gasto por los
servicios de los actores de doblaje sea el menor posible.
Número de actores: 10
Número de tomas : 30

Esto es un Crew Scheduling Problem o Minimum Cost Crew Assignment
'''

In [3]:
# Load dataset
fpath = osp.join("res", "Doblaje.csv")
df = pd.read_csv(fpath, sep=',', header=1, index_col=0, engine='python')

# Drop the last 2 columns and the last row
df = df.iloc[:-2, :-2]

# Convert to a 2D list of integers
#sessions = df.astype(int).values.tolist() # doblaje[][]

## Greedy Heuristic (for approximate/fast solutions)
Assign scenes to days, trying to group scenes with overlapping actors, to minimize the number of unique actors per day.
Not optimal, but can be implemented quickly. For optimal I found some solvers for this but didnt try them.

In [85]:
def scenes_organization_gh(scenes):
    '''Initialize:
    Create a list of all unassigned scenes (0 to 29).
    Create an empty list for each day to store assigned scenes.
    '''

    #sessions = np.array(df)  # Convert to numpy array for easier manipulation
    num_scenes = sessions.shape[0]
    unassigned = set(range(num_scenes))
    days = []

    '''While there are unassigned scenes:

    Pick an unassigned scene as the "seed" for a new day.
    For this day, try to add up to 5 more unassigned scenes that have the most overlap in actors with the current group (i.e., adding them increases the set of unique actors as little as possible).
    Assign these scenes to the current day and remove them from the unassigned list.'''

    while unassigned:
        # Start a new day with the first unassigned scene
        #day_scenes = [unassigned.pop()]
        
        # Pick a random scene from unassigned instead
        seed_scene = random.choice(list(unassigned))
        day_scenes = [seed_scene]
        unassigned.remove(seed_scene)
            
        # Get the set of actors present in the first scene
        actors_in_day = set(np.where(sessions[day_scenes[0]] == 1)[0])
        
        # Try to add up to 5 more scenes to this day
        for _ in range(5):
            best_scene = None
            best_increase = None
            for scene in unassigned: # Iterate over unassigned scenes
                # Calculate the increase in unique actors if this scene is added
                # actors_in_day is the set of actors already assigned to this day
                # actors_in_scene is the set of actors in the currently analized scene
                actors_in_scene = set(np.where(sessions[scene] == 1)[0])
                # This was intended to skip scenes that do not add any new actors but it ended up generating worse results
                #if actors_in_scene.difference(actors_in_day) == set():
                #    continue  # Skip if no new actors repeat
                increase = len(actors_in_day | actors_in_scene) - len(actors_in_day)
                if best_scene is None or increase < best_increase:
                    best_scene = scene
                    best_increase = increase
            if best_scene is not None:
                day_scenes.append(best_scene)
                actors_in_day |= set(np.where(sessions[best_scene] == 1)[0]) # Update the set of actors |= is union
                unassigned.remove(best_scene)
            else:
                break
        days.append(day_scenes)
    return days

def calc_cost(days,sessions):
    ''' Count:

    For each day, count the unique actors present (i.e., any actor with a 1 in any assigned scene for that day).
    Sum over all days to get the total actor-days.'''

    # Calculate total actor-days
    total_actor_days = 0
    day = 0
    for day_scenes in days:
        actors = set()
        for scene in day_scenes:
            actors |= set(np.where(sessions[scene] == 1)[0])
        total_actor_days += len(actors)
        day += 1 
        #print(f'Scenes of day {day}:{day_scenes}')
    return total_actor_days

sessions = np.array(df)  # Convert to numpy array for easier manipulation


total_actor_days = 99999
days = []

for t in range(100): # A bit of an improvement, find the best option in X tries, in this case 100
    days_n = scenes_organization_gh(sessions)
    total_actor_days_n = calc_cost(days_n,sessions)
    if total_actor_days_n < total_actor_days:
        total_actor_days = total_actor_days_n
        days = days_n
day = 1
for day_scenes in days:
    print(f'Scenes of day {day}:{day_scenes}')
    day += 1

print("Total times I have to pay if these scenes are scheduled (greedy):", total_actor_days)

Scenes of day 1:[23, 17, 13, 16, 18, 22]
Scenes of day 2:[29, 27, 8, 4, 3, 14]
Scenes of day 3:[24, 15, 5, 6, 12, 26]
Scenes of day 4:[11, 7, 21, 0, 1, 19]
Scenes of day 5:[9, 20, 28, 2, 10, 25]
Total times I have to pay if these scenes are scheduled (greedy): 27
