> **Write a program that takes two arrays of strings, and return the indices of the starting and ending index of a shortest subarray of the first array (the "paragraph" array) that "sequentially covers", i.e., contains all the strings in the second array (the "keywords" array), in the order in which they appear in the keywords array. You can assume all keywords are distinct. For example, let the paragraph array be (apple,banana,cat,apple), and the keywords array be (banana,apple). The paragraph subarray starting at index 0 and ending at index 1 does not fulfill the specification, even though it contains all the keywords, since they do not appear in the specified order. On the other hand, the subarray starting at index 1 and ending at index 3 does fulfill the specification.**

_Hint: For each index in the paragraph array, compute the shortest subarray ending at that index which fulfills the specification._

In [10]:
from collections import Counter, namedtuple
from typing import List

Subarray = namedtuple('Subarray', ('start', 'end'))


def find_smallest_sequentially_covering_subset(paragraph: List[str],keywords: List[str]) -> Subarray:
    # Maps each keyword to its index in the keywords array.
    keyword_to_idx = {k: i for i, k in enumerate(keywords)}

    # Since keywords are uniquely identified by their indices in keywords
    # array, we can use those indices as keys to lookup in an array.
    latest_occurrence = [-1] * len(keywords)
    # For each keyword (identified by its index in keywords array), the length
    # of the shortest subarray ending at the most recent occurrence of that
    # keyword that sequentially cover all keywords up to that keyword.
    shortest_subarray_length = [float('inf')] * len(keywords)

    shortest_distance = float('inf')
    result = Subarray(-1, -1)
    for i, p in enumerate(paragraph):
        if p in keyword_to_idx:
            keyword_idx = keyword_to_idx[p]
            if keyword_idx == 0:  # First keyword.
                shortest_subarray_length[keyword_idx] = 1
            elif shortest_subarray_length[keyword_idx - 1] != float('inf'):
                distance_to_previous_keyword = (i - latest_occurrence[keyword_idx - 1])
                shortest_subarray_length[keyword_idx] = (distance_to_previous_keyword +
                    shortest_subarray_length[keyword_idx - 1])
            latest_occurrence[keyword_idx] = i

            # Last keyword, for improved subarray.
            if (keyword_idx == len(keywords) - 1
                    and shortest_subarray_length[-1] < shortest_distance):
                shortest_distance = shortest_subarray_length[-1]
                result = Subarray(i - shortest_distance + 1, i)
    return result
        

p = ["apple", "banana", "apple", "apple", "dog", "apple", "apple", "cat", "apple", 
     "dog", "banana", "apple", "cat", "dog"]
k = ["cat", "dog"]
print(find_smallest_sequentially_covering_subset(p, k))

Subarray(start=12, end=13)


In [9]:
from collections import Counter, namedtuple
from typing import List

Subarray = namedtuple('Subarray', ('start', 'end'))


def find_smallest_sequentially_covering_subset(paragraph: List[str],keywords: List[str]) -> Subarray:
    pos = Subarray(-1, -1)
    
    left, right = 0, 0
    pattern = Counter(keywords)
    seen = Counter()
    while right < len(paragraph):
        seen.update({paragraph[right]})
        if pattern - seen == Counter():
            if paragraph[left] == keywords[0]:
                if pos == (-1, -1) or (right - left) < pos[1] - pos[0]:
                    pos = (left, right)
                    seen.clear()
            while paragraph[left] != keywords[0] and left <= right:
#                 if pos == (-1, -1) or (right - left) < pos[1] - pos[0]:
#                     pos = (left, right)
                prev = left
                seen[paragraph[prev]] -= 1
                if seen[paragraph[prev]] == 0:
                    seen.pop(paragraph[prev])
                left += 1
        right += 1
    
    return pos
        

p = ["apple", "banana", "apple", "apple", "dog", "apple", "apple", "cat", "apple", "dog", "banana", "apple", "cat", "dog"]
k = ["cat", "dog"]
print(find_smallest_sequentially_covering_subset(p, k))

(7, 9)
