# Example: Character with the longest consecutive subsequence of a string

In [None]:
def check(implementation):
    tests = [
        # [input, expected],
        ["aaaabb", ('a', 4)],
        ["bbbaaabaaaa", ('a', 4)],
        ["bbbaaaabaaa", ('a', 4)],
        ["cbdeuuu900", ('u', 3)],
        ["abbbbb", ('b', 5)],
        ["aabb", ('a', 2)],
        ["ba", ('b', 1)],
        ["", ('', 0)],
    ]
    for s, true_answer in tests:
        print(f"Input: {repr(s)} => Expected output: {true_answer}")
        our_answer = implementation(s)
        assert our_answer == true_answer, f"{our_answer} does not match expected result."

## Method 0: "One-at-a-time" approach

Scan the string from left-to-right, one character at a time. Keep track of the longest consecutive substring encountered so far, and update that every time we encounter a new character.

In [None]:
s = "bbbaaaabaaa"

**Aside: Refactoring.**

In [None]:
def update_lcs(p, pc, l, lc):
    return (p, pc) if pc > lc else (l, lc)

def lcs0(s):
    previous, previous_count = '', 0  # logical relationship between previous and previous_count is "invariant"
    longest, longest_count = '', 0
    for c in s:  # O(n)
        if c == previous: # same substring, keep going
            previous_count += 1
        else: # new substring
            longest, longest_count = update_lcs(previous, previous_count, longest, longest_count)
            previous, previous_count = c, 1
    return update_lcs(previous, previous_count, longest, longest_count)

lcs0(s)

In [None]:
check(lcs0)

## Method 1: A "data-parallel" approach

For all consecutive pairs of characters, detect where changes occur. These are, in effect, the locations of the last letter of a consecutive subsequence. From those locations, looking at the differences between them yields the length of each subsequence.

In [None]:
s = "bbbaaaabaaa"

```python
                    1
0 1 2 3 4 5 6 7 8 9 0
b b b a a a a b a a a
b b a a a a b a a a
```

```python
-1, 2, 6, 7, 10  # <-- look at difference between pairs
 3, 4, 1, 3
```

In [None]:
def neighbor_pairs(s):
    return zip(s[:-1], s[1:])

def find_changes(s):
    return [k for k, (left, right) in enumerate(neighbor_pairs(s)) if left != right] + [len(s)-1]

def diffs(x):
    return [b-a for a, b in neighbor_pairs(x)]

def lcs1(s):
    cut_points = [-1] + find_changes(s)    # n = |s|, O(n)
    lengths = diffs(cut_points)   # O(n)
    letters = [s[k:k+1] for k in cut_points[1:]] # O(n)
    letter_count_pairs = zip(letters, lengths) # O(n)
    return max(letter_count_pairs, key=lambda t: t[1]) # O(n)

s, lcs1(s)

In [None]:
check(lcs1)

# Method 2a: String splitting by slicing

In [None]:
s = "bbbaaaabaaa"

In [None]:
s[0:3], s[3:7], s[7:8], s[8:11]  # a:b => [a, b) vs. [a, b]

In [None]:
find_changes(s)

In [None]:
def inc(x):
    return [e+1 for e in x]

ends = inc(find_changes(s))
ends

In [None]:
starts = [0] + ends[:-1]
starts

In [None]:
[s[a:b] for a, b in zip(starts, ends)]

# Method 2b: Transform + (literal) string-splitting

In [None]:
'the quick brown fox jumps over the lazy dog'.split('fox')

In [None]:
s = "bbbaaaabaaa"

In [None]:
s_new = "bbb|aaaa|b|aaa".split('|')
s_new

In [None]:
SEPARATOR = '|'
assert SEPARATOR not in s

In [None]:
s_new = ''.join([(left if left == right else left+SEPARATOR) for left, right in neighbor_pairs(s)]) + s[-1]
s_new

In [None]:
s_new.split(SEPARATOR)