### Problem Statement
Imagine you are a highly-indemand actor, who has been presented with offers to star in n different movie projects under development. Each offer comes specified with the first and last  day of filming. To take the job, you must commit to being available throughout  this entire period. Thus you cannot simultaneously accept two jobs whose  intervals overlap.

### Criteria
You want to make as much money as possible. Because each of these  films pays the same fee per film, this implies you seek the largest possible  set of jobs (intervals) such that no two of them conflict with each other.

In [1]:
import numpy as np
import random
import datetime
from collections import namedtuple
import calendar

# np.random.seed(314)
Range = namedtuple("Range", ["start", "end"])


### Creating a DateRange Class

In [20]:
class DateRange(object):
    def __init__(self, date_range):
        self.range = date_range
        self.difference = self.range.end-self.range.start
        
    def get_start_date(self):
        return self.range.start
    
    def get_end_date(self):
        return self.range.end
    
    def get_start_string(self):
        return self.range.start.strftime("%Y-%m-%d")
    
    def get_end_string(self):
        return self.range.end.strftime("%Y-%m-%d")
    
    def __str__(self):
        return f"{self.get_start_string()} ====> {self.get_end_string()}"
        
    def __lt__(self, r2):
        return self.range < r2
    
    def __le__(self, r2):
        return self.range <= r2
    
    def __eq__(self, r2):
        return self.range == r2
    
    def __gt__(self, r2):
        return self.range > r2
    
    def __ge__(self, r2):
        return self.range >= r2
    
    def get_overlap(self, r2):
        latest_start = max(self.range.start, r2.range.start)
        earliest_end = min(self.range.end, r2.range.end)
        delta = (earliest_end - latest_start).days + 1
        return max(0, delta)
    
    def is_overlap(self, r2):
        return self.get_overlap(r2) != 0


### Creating a Test Set

In [38]:
def make_datetime_object(day,month,year):
    return datetime.datetime.strptime(f"{year}-{month}-{day}", "%Y-%m-%d")
    
def get_random_date(start):
    """
    Generates a random datetime object between two other datetime objects.
    """
    MAX_DAYS = 30
    MIN_DAYS = 3
    number_of_days = np.random.randint(MIN_DAYS, MAX_DAYS)
    return start + datetime.timedelta(days=number_of_days)
    
def get_date(spread, year):
    month = np.random.randint(1,spread)
    day = np.random.randint(1,calendar.monthrange(year,month)[1])

    return make_datetime_object(day,month,year)

def generate_jobs(n,MAX_DAYS,MIN_DAYS,spread,year):
    date_ranges=[]
    for i in range(n):
        start = get_date(spread, year)
        end = get_random_date(start)
        date_range = DateRange(Range(start=start, end=end))
        date_ranges.append(date_range)
    return date_ranges

def check_overlap(date, date_ranges):
    _temp = []
    for d in date_ranges:
        print(f"Checking overlap with previous date: {d}")
        _temp.append(date.is_overlap(d))
    if not any(_temp):
        print("No overlap with any previous date.")
        return True
    return False

### Generate a sample test set

In [61]:
date_ranges = generate_jobs(100,3,30,10,2019)

### Earliest Job First Methodology
The simplest idea to solve this scheduling problem is to accept the work which starts the earliest and then go down the list of jobs and take the work which is next to start and doesnt overlap with any previously taken assignments.

In [62]:
def earliest_job_first(date_ranges):
    dates_to_take = []
    sorted_date_ranges = np.argsort(date_ranges)
    dates_to_take.append(date_ranges[sorted_date_ranges[0]])
    print(f"Job which starts the first: {date_ranges[sorted_date_ranges[0]]}")
    start = 0
    for other_date in sorted_date_ranges[1:]:
        print(f"Next earliest starting job: {date_ranges[other_date]}")
        take_date = check_overlap(date_ranges[other_date], dates_to_take)
        if take_date:
            dates_to_take.append(date_ranges[other_date])
            print(f"Appending to taken jobs list: {date_ranges[other_date]}")

        print("===================================================")
    return dates_to_take
            
jobs_to_take = earliest_job_first(date_ranges)

Job which starts the first: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-08 ====> 2019-01-15
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-08 ====> 2019-01-28
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-12 ====> 2019-01-18
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-17 ====> 2019-01-31
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-18 ====> 2019-02-02
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-18 ====> 2019-02-05
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-19 ====> 2019-02-04
Checking overlap with previous date: 2019-01-05 ====> 2019-02-03
Next earliest starting job: 2019-01-20 ====> 2019-02-11
Checking overlap with previous date: 2019

In [63]:
print("Checking jobs and pay:")
for job in jobs_to_take:
    print(job)
    print("Pay: $X")
print(f"=============================================")

print(f"Total Pay: ${len(jobs_to_take)}X")
print(f"Total Jobs Taken: {len(jobs_to_take)}")
print(f"Total Percent of Jobs Taken: {(len(jobs_to_take)/len(date_ranges))*100}%")

Checking jobs and pay:
2019-01-05 ====> 2019-02-03
Pay: $X
2019-02-06 ====> 2019-02-10
Pay: $X
2019-02-12 ====> 2019-03-04
Pay: $X
2019-03-08 ====> 2019-04-03
Pay: $X
2019-04-05 ====> 2019-05-01
Pay: $X
2019-05-02 ====> 2019-05-26
Pay: $X
2019-05-29 ====> 2019-06-23
Pay: $X
2019-07-01 ====> 2019-07-28
Pay: $X
2019-07-29 ====> 2019-08-27
Pay: $X
2019-09-01 ====> 2019-09-17
Pay: $X
2019-09-18 ====> 2019-09-29
Pay: $X
Total Pay: $11X
Total Jobs Taken: 11
Total Percent of Jobs Taken: 11.0%


This idea makes sense until we realize that accepting the earliest job might block us from taking many other jobs if that first job is quite long. For example if the first movie you accept takes 30 days to film and there are 3 overlapping movie which only take 10 days each to film, then you are accepting 1 movie at the cost of 3.

This brings us to another approach:

### Shortest Job First Methodology
The idea here is that we accept the movie which finishes in the shortest time and then go down the list accepting the movies which take the next shortest time and dont clash with previously accepted movies.

In [64]:
def shortest_movie_first(date_ranges):
    differences = np.argsort([i.difference.days for i in date_ranges])
    print(f"Movie with shortest duration: {date_ranges[differences[0]]}")
    dates_to_take = [date_ranges[differences[0]]]
    for other_date in differences[1:]:
        print(f"Next earliest starting job: {date_ranges[other_date]}")
        take_date = check_overlap(date_ranges[other_date], dates_to_take)
        
        if take_date:
            dates_to_take.append(date_ranges[other_date])
            print(f"Appending to taken jobs list: {date_ranges[other_date]}")

        print("===================================================")
    return dates_to_take

jobs_to_take = shortest_movie_first(date_ranges)

Movie with shortest duration: 2019-03-30 ====> 2019-04-02
Next earliest starting job: 2019-03-24 ====> 2019-03-27
Checking overlap with previous date: 2019-03-30 ====> 2019-04-02
No overlap with any previous date.
Appending to taken jobs list: 2019-03-24 ====> 2019-03-27
Next earliest starting job: 2019-06-16 ====> 2019-06-20
Checking overlap with previous date: 2019-03-30 ====> 2019-04-02
Checking overlap with previous date: 2019-03-24 ====> 2019-03-27
No overlap with any previous date.
Appending to taken jobs list: 2019-06-16 ====> 2019-06-20
Next earliest starting job: 2019-09-24 ====> 2019-09-28
Checking overlap with previous date: 2019-03-30 ====> 2019-04-02
Checking overlap with previous date: 2019-03-24 ====> 2019-03-27
Checking overlap with previous date: 2019-06-16 ====> 2019-06-20
No overlap with any previous date.
Appending to taken jobs list: 2019-09-24 ====> 2019-09-28
Next earliest starting job: 2019-02-06 ====> 2019-02-10
Checking overlap with previous date: 2019-03-30 =

In [65]:
print("Checking jobs and pay:")
for job in jobs_to_take:
    print(job)
    print("Pay: $X")
print(f"=============================================")

print(f"Total Pay: ${len(jobs_to_take)}X")
print(f"Total Jobs Taken: {len(jobs_to_take)}")
print(f"Total Percent of Jobs Taken: {(len(jobs_to_take)/len(date_ranges))*100}%")


Checking jobs and pay:
2019-03-30 ====> 2019-04-02
Pay: $X
2019-03-24 ====> 2019-03-27
Pay: $X
2019-06-16 ====> 2019-06-20
Pay: $X
2019-09-24 ====> 2019-09-28
Pay: $X
2019-02-06 ====> 2019-02-10
Pay: $X
2019-02-24 ====> 2019-03-01
Pay: $X
2019-09-10 ====> 2019-09-15
Pay: $X
2019-03-11 ====> 2019-03-16
Pay: $X
2019-03-04 ====> 2019-03-09
Pay: $X
2019-06-05 ====> 2019-06-10
Pay: $X
2019-08-13 ====> 2019-08-18
Pay: $X
2019-01-12 ====> 2019-01-18
Pay: $X
2019-06-21 ====> 2019-06-27
Pay: $X
2019-04-26 ====> 2019-05-03
Pay: $X
2019-07-18 ====> 2019-07-26
Pay: $X
2019-05-09 ====> 2019-05-17
Pay: $X
2019-01-25 ====> 2019-02-04
Pay: $X
2019-09-29 ====> 2019-10-15
Pay: $X
Total Pay: $18X
Total Jobs Taken: 18
Total Percent of Jobs Taken: 18.0%


This is a better heuristic than the previous one, but there might be cases where accepting the shortest job might block us from accepting two other jobs. The potential loss is less than the previous idea but it can readily limit us to half the optimal payoff.


### Earliest Ending Job First Methodology

In [66]:
def earliest_ending_job_first(date_ranges):
    dates_to_take = []
    sorted_date_ranges = np.argsort([i.range.end for i in date_ranges])
    dates_to_take.append(date_ranges[sorted_date_ranges[0]])
    print(f"Job which ends the first: {date_ranges[sorted_date_ranges[0]]}")
    start = 0
    for other_date in sorted_date_ranges[1:]:
        print(f"Next earliest ending job: {date_ranges[other_date]}")
        take_date = check_overlap(date_ranges[other_date], dates_to_take)
        if take_date:
            dates_to_take.append(date_ranges[other_date])
            print(f"Appending to taken jobs list: {date_ranges[other_date]}")

        print("===================================================")
    return dates_to_take
            
jobs_to_take = earliest_ending_job_first(date_ranges)

Job which ends the first: 2019-01-08 ====> 2019-01-15
Next earliest ending job: 2019-01-12 ====> 2019-01-18
Checking overlap with previous date: 2019-01-08 ====> 2019-01-15
Next earliest ending job: 2019-01-08 ====> 2019-01-28
Checking overlap with previous date: 2019-01-08 ====> 2019-01-15
Next earliest ending job: 2019-01-17 ====> 2019-01-31
Checking overlap with previous date: 2019-01-08 ====> 2019-01-15
No overlap with any previous date.
Appending to taken jobs list: 2019-01-17 ====> 2019-01-31
Next earliest ending job: 2019-01-18 ====> 2019-02-02
Checking overlap with previous date: 2019-01-08 ====> 2019-01-15
Checking overlap with previous date: 2019-01-17 ====> 2019-01-31
Next earliest ending job: 2019-01-05 ====> 2019-02-03
Checking overlap with previous date: 2019-01-08 ====> 2019-01-15
Checking overlap with previous date: 2019-01-17 ====> 2019-01-31
Next earliest ending job: 2019-01-25 ====> 2019-02-04
Checking overlap with previous date: 2019-01-08 ====> 2019-01-15
Checking 

In [68]:
print("Checking jobs and pay:")
for job in jobs_to_take:
    print(job)
    print("Pay: $X")
print(f"=============================================")

print(f"Total Pay: ${len(jobs_to_take)}X")
print(f"Total Jobs Taken: {len(jobs_to_take)}")
print(f"Total Percent of Jobs Taken: {(len(jobs_to_take)/len(date_ranges))*100}%")



Checking jobs and pay:
2019-01-08 ====> 2019-01-15
Pay: $X
2019-01-17 ====> 2019-01-31
Pay: $X
2019-02-06 ====> 2019-02-10
Pay: $X
2019-02-16 ====> 2019-02-26
Pay: $X
2019-03-01 ====> 2019-03-08
Pay: $X
2019-03-11 ====> 2019-03-16
Pay: $X
2019-03-24 ====> 2019-03-27
Pay: $X
2019-03-30 ====> 2019-04-02
Pay: $X
2019-04-13 ====> 2019-04-27
Pay: $X
2019-05-09 ====> 2019-05-17
Pay: $X
2019-05-25 ====> 2019-06-07
Pay: $X
2019-06-16 ====> 2019-06-20
Pay: $X
2019-06-21 ====> 2019-06-27
Pay: $X
2019-07-11 ====> 2019-07-21
Pay: $X
2019-07-30 ====> 2019-08-17
Pay: $X
2019-08-26 ====> 2019-09-15
Pay: $X
2019-09-24 ====> 2019-09-28
Pay: $X
2019-09-29 ====> 2019-10-15
Pay: $X
Total Pay: $18X
Total Jobs Taken: 18
Total Percent of Jobs Taken: 18.0%


### Evaluating Performance


In [None]:
movie_ranges = [10,50,100,150,200,250,300]
months_spread = [1,3,6,9,12]
date_ranges = generate_jobs(100,3,30,10,2019)