# PATTERN: MERGE INTERVALS

https://www.educative.io/courses/grokking-the-coding-interview/3YVYvogqXpA

# Merge intervals (medium)

### Problem statement

Given a list of intervals, merge all the overlapping intervals to produce a list that has only mutually exclusive intervals.

### Example 1:

- Intervals: [[1,4], [2,5], [7,9]]
- Output: [[1,5], [7,9]]
- Explanation: Since the first two intervals [1,4] and [2,5] overlap, we merged them into 
one [1,5].

### Example 2:

- Intervals: [[6,7], [2,4], [5,9]]
- Output: [[2,4], [5,9]]
- Explanation: Since the intervals [6,7] and [5,9] overlap, we merged them into one [5,9].

### Example 3:

- Intervals: [[1,4], [2,6], [3,5]]
- Output: [[1,6]]
- Explanation: Since all the given intervals overlap, we merged them into one.

In [12]:
from __future__ import print_function

class Interval:
  def __init__(self, start, end):
    self.start = start
    self.end = end

  def print_interval(self):
    print("[" + str(self.start) + ", " + str(self.end) + "]", end='')
  
  def overlaps(self, interval):
    '''
    Since input intervals are sorted, the condition for overlap is: b.start <= a.end
    '''
    return interval.start <= self.end
  
  def merge(self, interval):
    ''' Input intervals are sorted on start index. '''
    new_interval = Interval(self.start, max(self.end, interval.end))
    return new_interval

In [13]:
# Time O(n*logn)  - sort O(n*logn) - for loop O(n)
# Space O(n)      - sort O(n) - output array O(n)

def merge(intervals):
  if len(intervals) < 2:
    return intervals

  # Sort the intervals on start time
  intervals.sort(key=lambda x: x.start)

  # Merge the consecutive overlapping intervals.
  merged = []
  new_interval = intervals[0]

  for i in range(1, len(intervals)):
    current_interval = intervals[i]
    if new_interval.overlaps(current_interval):
      new_interval = new_interval.merge(current_interval)
    else:
      merged.append(new_interval)
      new_interval = current_interval

  merged.append(new_interval)
  
  return merged

In [14]:
def main():
  intervals = [Interval(1, 4), Interval(2, 5), Interval(7, 9)]
  print("Intervals: ", end='')
  for i in intervals:
    i.print_interval()

  merged = merge(intervals)
  print(" -> Merged intervals: ", end='')
  for i in merged:
    i.print_interval()
  print()


  intervals = [Interval(6, 7), Interval(2, 4), Interval(5, 9)]
  print("Intervals: ", end='')
  for i in intervals:
    i.print_interval()

  merged = merge(intervals)
  print(" -> Merged intervals: ", end='')
  for i in merged:
    i.print_interval()
  print()


  intervals = [Interval(1, 4), Interval(2, 6), Interval(3, 5)]
  print("Intervals: ", end='')
  for i in intervals:
    i.print_interval()

  merged = merge(intervals)
  print(" -> Merged intervals: ", end='')
  for i in merged:
    i.print_interval()
  print()

main()

Intervals: [1, 4][2, 5][7, 9] -> Merged intervals: [1, 5][7, 9]
Intervals: [6, 7][2, 4][5, 9] -> Merged intervals: [2, 4][5, 9]
Intervals: [1, 4][2, 6][3, 5] -> Merged intervals: [1, 6]


# Insert interval (medium)

### Problem Statement

Given a list of non-overlapping intervals sorted by their start time, insert a given interval at the correct position and merge all necessary intervals to produce a list that has only mutually exclusive intervals.

#### Example 1:

- Input: Intervals=[[1,3], [5,7], [8,12]], New Interval=[4,6]
- Output: [[1,3], [4,7], [8,12]]
- Explanation: After insertion, since [4,6] overlaps with [5,7], we merged them into one [4,7].

#### Example 2:

- Input: Intervals=[[1,3], [5,7], [8,12]], New Interval=[4,10]
- Output: [[1,3], [4,12]]
- Explanation: After insertion, since [4,10] overlaps with [5,7] & [8,12], we merged them into [4,12].

#### Example 3:

- Input: Intervals=[[2,3],[5,7]], New Interval=[1,4]
- Output: [[1,4], [5,7]]
- Explanation: After insertion, since [1,4] overlaps with [2,3], we merged them into one [1,4].

In [23]:
# Time O(n)  - 1 iteration over list of intervals
# Space O(n) - new list of intervals

def insert(intervals, new_interval):
  # Insert intervals that start before the new interval
  merged = [interval for interval in intervals if new_interval[0] > interval[1]]
  i = len(merged)
  
  # Merge and insert the new interval
  merged_interval = new_interval
  while i < len(intervals):
    if overlaps(intervals[i], merged_interval):
      merged_interval = merge(merged_interval, intervals[i])
      i += 1
    else:
      break
  merged.append(merged_interval)

  # Insert the remaining intervals
  merged.extend(intervals[i:])

  return merged


def overlaps(interval1, interval2):
  return (interval1[0] <= interval2[0] and interval2[0] <= interval1[1]) or \
        (interval2[0] <= interval1[0] and interval1[0] <= interval2[1])

def merge(interval1, interval2):
  return [min(interval1[0], interval2[0]), max(interval1[1], interval2[1])]

In [24]:
def main():
  print("Intervals after insertion: " + str(insert([[1, 3], [5, 7], [6, 7], [8, 12]], [4, 6])))
  print("Intervals after insertion: " + str(insert([[1, 3], [5, 7], [8, 12]], [4, 10])))
  print("Intervals after insertion: " + str(insert([[2, 3], [5, 7]], [1, 4])))
  print("Intervals after insertion: " + str(insert([[1, 3], [5, 6], [7, 9]], [2, 3])))


main()

Intervals after insertion: [[1, 3], [4, 7], [8, 12]]
Intervals after insertion: [[1, 3], [4, 12]]
Intervals after insertion: [[1, 4], [5, 7]]
Intervals after insertion: [[1, 3], [5, 6], [7, 9]]


# Intervals intersection (medium)

### Problem Statement

Given two lists of intervals, find the intersection of these two lists. Each list consists of disjoint intervals sorted on their start time.

#### Example 1:

- Input: arr1=[[1, 3], [5, 6], [7, 9]], arr2=[[2, 3], [5, 7]]
- Output: [2, 3], [5, 6], [7, 7]
- Explanation: The output list contains the common intervals between the two lists.

#### Example 2:

- Input: arr1=[[1, 3], [5, 7], [9, 12]], arr2=[[5, 10]]
- Output: [5, 7], [9, 10]
- Explanation: The output list contains the common intervals between the two lists.

In [27]:
# Time O(m+n)   - m and n number of elements in the 2 input lists
# Space O(m+n)  - max size of the result list

def merge(intervals_a, intervals_b):
  result = []
  i = 0

  for j in range(len(intervals_b)):
    current_b_interval = intervals_b[j]
    while i < len(intervals_a):
      if overlaps(intervals_a[i], current_b_interval):
        result.append(intersect(current_b_interval, intervals_a[i]))
        i += 1
      elif intervals_a[i][1] < current_b_interval[0]:
        i += 1
      else:
        break

  return result


def overlaps(interval1, interval2):
  return (interval1[0] <= interval2[0] and interval2[0] <= interval1[1]) or \
        (interval2[0] <= interval1[0] and interval1[0] <= interval2[1])

def intersect(interval1, interval2):
  return [max(interval1[0], interval2[0]), min(interval1[1], interval2[1])]

In [28]:
def main():
  print("Intervals Intersection: " + str(merge([[1, 3], [5, 6], [7, 9]], [[2, 3], [5, 7]])))
  print("Intervals Intersection: " + str(merge([[1, 3], [5, 7], [9, 12]], [[5, 10]])))


main()

Intervals Intersection: [[2, 3], [5, 6], [7, 7]]
Intervals Intersection: [[5, 7], [9, 10]]


# Conflicting appointments (medium)

### Problem Statement

Given an array of intervals representing ‘N’ appointments, find out if a person can attend all the appointments.

#### Example 1:

- Appointments: [[1,4], [2,5], [7,9]]
- Output: false
- Explanation: Since [1,4] and [2,5] overlap, a person cannot attend both of these appointments.

#### Example 2:

- Appointments: [[6,7], [2,4], [8,12]]
- Output: true
- Explanation: None of the appointments overlap, therefore a person can attend all of them.

#### Example 3:

- Appointments: [[4,5], [2,3], [3,6]]
- Output: false
- Explanation: Since [4,5] and [3,6] overlap, a person cannot attend both of these appointments.

In [29]:
# Time O(n*logn)  - sort O(n*logn) - for loop O(n)
# Space O(n)      - sort

def can_attend_all_appointments(intervals):
  # Sort intervals based on their start time
  intervals.sort(key=lambda x: x[0])

  # Check if intervals overlap
  for j in range(len(intervals) - 1):
    if overlaps(intervals[j], intervals[j + 1]):
      return False

  return True


def overlaps(interval1, interval2):
  return (interval1[0] <= interval2[0] and interval2[0] <= interval1[1]) or \
        (interval2[0] <= interval1[0] and interval1[0] <= interval2[1])
        

In [30]:
def main():
  print("Can attend all appointments: " + str(can_attend_all_appointments([[1, 4], [2, 5], [7, 9]])))
  print("Can attend all appointments: " + str(can_attend_all_appointments([[6, 7], [2, 4], [8, 12]])))
  print("Can attend all appointments: " + str(can_attend_all_appointments([[4, 5], [2, 3], [3, 6]])))

main()

Can attend all appointments: False
Can attend all appointments: True
Can attend all appointments: False


# Merge list of intervals

In [31]:
def merge(intervals_a, intervals_b):
  result = []
  i = 0

  for j in range(len(intervals_b)):
    current_b_interval = intervals_b[j]

    # Insert intervals_a elements that don't overlap with current intervals_b element
    while i < len(intervals_a):
      if intervals_a[i][1] < current_b_interval[0]: # or intervals_b[j][1] < intervals_a[i][0]:
        result.append(intervals_a[i])
        i += 1
      else:
        break

    # Merge current_b_interval with all overlapping intervals_a elements
    merged_interval = current_b_interval
    while i < len(intervals_a):
      if overlaps(intervals_a[i], merged_interval):
        merged_interval = merge_intervals(intervals_a[i], merged_interval)
        i += 1
      else:
        break
    result.append(merged_interval)
  
  # Insert remaining intervals_a elements
  result.extend(intervals_a[i:])

  return result


def overlaps(interval1, interval2):
  return (interval1[0] <= interval2[0] and interval2[0] <= interval1[1]) or \
        (interval2[0] <= interval1[0] and interval1[0] <= interval2[1])

def merge_intervals(interval1, interval2):
  return [min(interval1[0], interval2[0]), max(interval1[1], interval2[1])]


def main():
  print("Intervals Intersection: " + str(merge([[1, 3], [5, 6], [7, 9]], [[2, 3], [5, 7]])))
  print("Intervals Intersection: " + str(merge([[1, 3], [5, 7], [9, 12]], [[5, 10]])))


main()


[[1, 3]]
[[1, 3], [5, 9]]
Intervals Intersection: [[1, 3], [5, 9]]
[[1, 3], [5, 12]]
Intervals Intersection: [[1, 3], [5, 12]]


# Minimum meeting rooms (hard)

### Problem statement

Given a list of intervals representing the start and end time of ‘N’ meetings, find the minimum number of rooms required to hold all the meetings.

#### Example 1:

- Meetings: [[1,4], [2,5], [7,9]]
- Output: 2
- Explanation: Since [1,4] and [2,5] overlap, we need two rooms to hold these two meetings. [7,9] can 
occur in any of the two rooms later.

#### Example 2:

- Meetings: [[6,7], [2,4], [8,12]]
- Output: 1
- Explanation: None of the meetings overlap, therefore we only need one room to hold all meetings.

#### Example 3:

- Meetings: [[1,4], [2,3], [3,6]]
- Output:2
- Explanation: Since [1,4] overlaps with the other two meetings [2,3] and [3,6], we need two rooms to 
hold all the meetings.

#### Example 4:

- Meetings: [[4,5], [2,3], [2,4], [3,5]]
- Output: 2
- Explanation: We will need one room for [2,3] and [3,5], and another room for [2,4] and [4,5].


In [35]:
# Time O(n*logn)  - sort O(n*logn) - heapq pop/push O(logn) because based on binary tree
# Space O(n)      - sort

from heapq import *

class Meeting:
  def __init__(self, start, end):
    self.start = start
    self.end = end
  
  def __lt__(self, other):
    ''' Used to sort meetings in heapq based on ending hour. '''
    return self.end < other.end

  def print_meeting(self):
    return '[{}, {}]'.format(self.start, self.end)


def min_meeting_rooms(meetings):
  # Sort meetings based on starting hour
  meetings.sort(key=lambda x: x.start)

  # Count number of meetings happening at the same time
  current_meetings = []
  min_rooms = 0

  for meeting in meetings:
    # remove meetings that have ended before start of current meeting
    while (len(current_meetings) > 0) and (current_meetings[0].end <= meeting.start):
      heappop(current_meetings)

    # add current meeting
    heappush(current_meetings, meeting)

    min_rooms = max(min_rooms, len(current_meetings))

  return min_rooms

In [36]:
def main():
  meetings = [Meeting(1, 4), Meeting(2, 5), Meeting(7, 9)]
  print("Minimum meeting rooms required: " + str(min_meeting_rooms(meetings)))
  meetings = [Meeting(6, 7), Meeting(2, 4), Meeting(8, 12)]
  print("Minimum meeting rooms required: " + str(min_meeting_rooms(meetings)))
  meetings = [Meeting(1, 4), Meeting(2, 3), Meeting(3, 6)]
  print("Minimum meeting rooms required: " + str(min_meeting_rooms(meetings)))
  meetings = [Meeting(4, 5), Meeting(2, 3), Meeting(2, 4), Meeting(3, 5)]
  print("Minimum meeting rooms required: " + str(min_meeting_rooms(meetings)))
  meetings = [Meeting(4, 5), Meeting(2, 3), Meeting(2, 4), Meeting(3, 5)]
  print("Minimum meeting rooms required: " + str(min_meeting_rooms(meetings)))

main()

Minimum meeting rooms required: 2
Minimum meeting rooms required: 1
Minimum meeting rooms required: 2
Minimum meeting rooms required: 2
Minimum meeting rooms required: 2


### Similar problems

#### Problem 1
Given a list of intervals, find the point where the maximum number of intervals overlap.

#### Problem 2
Given a list of intervals representing the arrival and departure times of trains to a train station, our goal is to find the minimum number of platforms required for the train station so that no train has to wait.

# Maximum CPU load (hard)

### Problem statement

We are given a list of Jobs. Each job has a Start time, an End time, and a CPU load when it is running. Our goal is to find the maximum CPU load at any time if all the jobs are running on the same machine.

#### Example 1:

- Jobs: [[1,4,3], [2,5,4], [7,9,6]]
- Output: 7
- Explanation: Since [1,4,3] and [2,5,4] overlap, their maximum CPU load (3+4=7) will be when both the 
jobs are running at the same time i.e., during the time interval (2,4).

#### Example 2:

- Jobs: [[6,7,10], [2,4,11], [8,12,15]]
- Output: 15
- Explanation: None of the jobs overlap, therefore we will take the maximum load of any job which is 15.

#### Example 3:

- Jobs: [[1,4,2], [2,4,1], [3,6,5]]
- Output: 8
- Explanation: Maximum CPU load will be 8 as all jobs overlap during the time interval [3,4]. 

In [42]:
# Time O(n*logn)  - sort O(n*logn) - heapq pop/push O(logn) because based on binary tree
# Space O(n)      - sort

from heapq import *

class job:
  def __init__(self, start, end, cpu_load):
    self.start = start
    self.end = end
    self.cpu_load = cpu_load
  
  def __lt__(self, other):
    self.end < other.end


def find_max_cpu_load(jobs):
  jobs.sort(key=lambda x: x.end)
  
  max_load = 0
  current_jobs = []
    
  for job in jobs:
    while (len(current_jobs) > 0) and (current_jobs[0].end <= job.start):
      heappop(current_jobs)    
    heappush(current_jobs, job)
    current_load = sum([j.cpu_load for j in current_jobs])
    max_load = max(max_load, current_load)
  
  return max_load

In [43]:
def main():
    jobs = [job(1, 4, 3), job(2, 5, 4), job(7, 9, 6)]
    print("Maximum CPU load: " + str(find_max_cpu_load(jobs)))
    jobs = [job(6, 7, 10), job(2, 4, 11), job(8, 12, 15)]
    print("Maximum CPU load: " + str(find_max_cpu_load(jobs)))
    jobs = [job(1, 4, 2), job(2, 4, 1), job(3, 6, 5)]
    print("Maximum CPU load: " + str(find_max_cpu_load(jobs)))

main()

Maximum CPU load: 7
Maximum CPU load: 15
Maximum CPU load: 8


# Employee free times (hard)

### Problem statement

For ‘K’ employees, we are given a list of intervals representing each employee’s working hours. Our goal is to determine if there is a free interval which is common to all employees. You can assume that each list of employee working hours is sorted on the start time.

#### Example 1:

- Input: Employee Working Hours=[[[1,3], [5,6]], [[2,3], [6,8]]]
- Output: [3,5]
- Explanation: All the employees are free between [3,5].

#### Example 2:

- Input: Employee Working Hours=[[[1,3], [9,12]], [[2,4]], [[6,8]]]
- Output: [4,6], [8,9]
- Explanation: All employees are free between [4,6] and [8,9].

#### Example 3:

- Input: Employee Working Hours=[[[1,3]], [[2,4]], [[3,5], [7,9]]]
- Output: [5,7]
- Explanation: All employees are free between [5,7].

## Method

- min_heap is sorted by start times
- Insert the first job of each employee into the min_heap,    
  then insert the second job of each employee into the min_heap,    
  ...
  - Example: 
    - input [ [[1,3], [5,6]] , [[2,3], [6,8]] ]
    - min_heap [1,3], [2,3], [5,6], [6,8]
- Then compare the pairs of consecutive jobs in the min_heap to find gaps:
  - [1,3] vs [2,3] : no gap -> pop job [1,3]
  - [2,3] vs [5,6] : gap    -> insert gap [3,5] in result list + pop job [2,3]
  - [5,6] vs [6,8] : no gap -> pop job [5,6]

In [66]:
# Time O(n * logk)  - k employees, n intervals - iterate through intervals O(n) - push/pop in heapq O(logk)
# Space O(k)        - heap length is max k at any time (every time we push, we also pop)

from __future__ import print_function
from heapq import *

class Interval:
    def __init__(self, start, end):
        self.start = start
        self.end = end

    def print_interval(self):
        return "[" + str(self.start) + ", " + str(self.end) + "]"


class EmployeeInterval:
    def __init__(self, interval: Interval, interval_index: int, employee_index: int):
        self.interval = interval
        self.interval_index = interval_index
        self.employee_index = employee_index
    
    def __lt__(self, other):
        self.interval.start < other.interval.start


def find_employee_free_time(schedule):
    if schedule is None:
        return None

    result, min_heap = [], []
    
    # Insert the first schedule of each employee in the heap
    for i in range(len(schedule)):
        heappush(min_heap, EmployeeInterval(schedule[i][0], 0, i))
    
    # Compare employee intervals to find gaps
    previous_employee_interval = min_heap[0].interval
    while len(min_heap) > 0:
        next_employee = heappop(min_heap)

        # Gap between the two employee schedules
        if previous_employee_interval.end < next_employee.interval.start:
            result.append(Interval(previous_employee_interval.end, next_employee.interval.start))
            previous_employee_interval = next_employee.interval

        # No gap between the two employee schedules (overlap)
        # (sorted by start time so prev.start < next.start)
        elif previous_employee_interval.end < next_employee.interval.end:
            previous_employee_interval = next_employee.interval

        # Insert next schedule of current employee
        employee_index = next_employee.employee_index
        employee_schedules = schedule[employee_index]
        employee_interval_index = next_employee.interval_index

        if len(employee_schedules) > employee_interval_index + 1:
            heappush(min_heap, EmployeeInterval(employee_schedules[employee_interval_index + 1],
                                                employee_interval_index + 1,
                                                employee_index))

    return result

In [65]:
def main():

    employee_schedules = [
        [[Interval(1, 3), Interval(5, 6)], [Interval(2, 3), Interval(6, 8)]],
        [[Interval(1, 3), Interval(9, 12)], [Interval(2, 4)], [Interval(6, 8)]],
        [[Interval(1, 3)], [ Interval(2, 4)], [Interval(3, 5), Interval(7, 9)]]
    ]
    
    for schedule in employee_schedules:
        free_times = find_employee_free_time(schedule)
        print("Free intervals: {} ".format(str(len(free_times))), end='')
        for interval in free_times:
            print(interval.print_interval(), end=" ")
        print()


main()

Free intervals: 1 [3, 5] 
Free intervals: 2 [4, 6] [8, 9] 
Free intervals: 1 [5, 7] 
