Continuing to review old questions from the stack.

Now cycling between 5 old and 5 new (but related problems).

1. Find largest palindrome in a string.
2. Number of unique decodings.
3. Implement MinStack, with all these ops running in constant time: push, pop, getMin, top
4. Dictionary word break.
**5. Smallest substring that contains all letters of the target string.**

6. Word search in matrix.
7. Max-product of a sub-array.
---

# 1. Find largest palindrome in a string.

Start from the 1st position. Keep a pointer ahead of the 1st pointer.
Iterate over all such positions, and keep checking for palilndromes.

A faster solution:
1. Given a string `s` which needs to be checked, check if `s[1:-1]` is a palindrome.
2. If it is, then `s` is a palindrome if `s[0]` and `s[-1]` are the same.
3. Build this structure iteratively, using uni-chars, then bi-chars, then tri-chars.
    * Solve for these problems first, then use these to check for bigger strings.

In [232]:
class LargestPalindrome:
    def slow(self, s):
        if len(s) == 1:
            return s
        
        ans = ''
        for i in range(len(s)):
            for j in range(i, len(s)):
                temp = s[i:j+1]
                if len(temp) > len(ans) and temp == temp[::-1]:
                    ans = temp
                    
        return ans
    
    def fast(self, s):
        structure = [[0 for i in range(len(s))] for j in range(len(s))]
        for i in range(len(s)):
            structure[i][i] = 1
            if (i+1) < len(s) and s[i] == s[i+1]:
                s[i][i+1] = 1
                
        ans = s[0:1]
        
        for gap in range(2, len(s)):
            for i in range(len(s)-gap):
                j = i + gap
                if structure[i+1][j-1] and s[i] == s[j]:
                    structure[i][j] = 1
                    temp = s[i:j+1]
                    if len(temp) > len(ans):
                        ans = temp
                        
        return ans

In [233]:
t = LargestPalindrome()
t.slow('abcba')

'abcba'

In [234]:
t.fast('abcban')

'abcba'

# 2. Find number of unique decodings for a string.

Given a string of numbers, convert it into all possible `decodings`.

Any number can be converted into an alphabet, except for 0, and substrings starting from 0.

```
s: 11106
1: 11, 10, 6
2: 1, 1, 10, 6
```

At each position, we can decode the current char, or the current+next chars.

Remember, that the memoization here, is that you can have different paths which will land on the same position!

So, you should `memo` the number of unique decodings at different positions -- this way, each position will be visited only once!

In [56]:
from functools import lru_cache

class UniqueDecodings:
    def solve(self, s):
        self.ans = 0
        valid = set([str(i) for i in range(1, 27)])
        
        @lru_cache(maxsize=None)
        def decode(position):
            if position == len(s):
                return 1
            else:
                if s[position] in valid:
                    single = decode(position+1)
                    if (position+1) < len(s) and s[position:position+2] in valid:
                        double = decode(position+2)
                        return single + double
                    else:
                        return single
                else:
                    return 0
                        
        return decode(0)

In [58]:
t = UniqueDecodings()
t.solve('11106')

2

# 3. Implement a MinStack

All these ops need to run in constant time:
* push
* pop
* getMin
* top

In [65]:
class MinStack:
    def __init__(self):
        self.stack = []
        self.mins = []
        
    def __str__(self):
        return str(self.stack) + str(self.mins)
        
    def push(self, x):
        self.stack.append(x)
        if self.mins:
            if x < self.mins[-1]:
                self.mins.append(x)
            else:
                self.mins.append(self.mins[-1])
        else:
            self.mins.append(x)
            
    def pop(self):
        self.mins.pop()
        return self.stack.pop()
    
    def getMin(self):
        return self.mins[-1]
    
    def top(self):
        return self.stack[-1]

In [66]:
t = MinStack()
t.push(2)
t.push(3)
t.push(1)
print(t)
t.pop()
t.push(0)
t.push(4)
print(t)

[2, 3, 1][2, 2, 1]
[2, 3, 0, 4][2, 2, 0, 0]


# 4. Dictionary word break

Confirm if a string can be split into words present in a dictionary.
```
applepenapple: [apple, pen, apple]
```

Start from the first position, keep moving up:
* if you found a word, AND the rest of the string can ALSO be split, then return `True`
    * This means, you launch a recursive call for `check(pos+1)`
* else you just move on to the next position


Say the substring that contains the last character exists -- you have now reached the end of the string.
* Return `True`

### Complexity?
The inner loop is $O(n^2)$ because:
* for each char in the string, you check for all substrings in the future.

But remeber, you also sub-select the substring: that is $O(n)$

**Ans:** $O(n^3)$

In [188]:
from functools import lru_cache

class WordBreak:
    def solve(self, s, words):
        """
        Note, `words` is a list.
        """
        words = set(words)
        
        @lru_cache(maxsize=None)
        def check(start):
            if start == len(s):
                return True
            
            for i in range(start, len(s)):
                subword = s[start:i+1]
                if subword in words and check(i+1):
                    return True
                
            return False
        
        return check(0)

In [189]:
t = WordBreak()
t.solve('applespen', ['apple', 'apple', 'pen'])

False

# 5. Smallest substring that contains all letters of the target.

```
s: ABDCAB
t: ABC

ans: CAB
```

Two-pointer should work?
* Keep track of chars, and a counter for the chars in the target.

In [230]:
from collections import Counter

class SmallestContains:
    def solve(self, source, target):
        letters = set(target)
        remaining = Counter(target)
        seen = {}
        
        size = len(source)+1
        ans = ''
        
        start = 0
        for end in range(len(source)):
            print("posx:", start, end)
            print("seen:", seen)
            print("rema:", remaining)

            if source[end] in seen:
                seen[source[end]] += 1
            else:
                seen[source[end]] = 1
                
            if source[end] in letters:
                # Must be deleted.
                if source[end] in remaining:
                    remaining[source[end]] -= 1
                    if remaining[source[end]] == 0:
                        del remaining[source[end]]
                else:
                    while start < end:
                        print("while:", seen)
                        seen[source[start]] -= 1
                        if seen[source[start]] == 0:
                            del seen[source[start]]
                            
                        if source[start] in letters:
                            if source[start] in remaining:
                                remaining[source[start]] += 1
                            else:
                                remaining[source[start]] = 1
                                
                        start += 1
                        
            print("POST")
            print("posx:", start, end)
            print("seen:", seen)
            print("rema:", remaining)
            print()
            
#             # while start < end and all([char in seen and count <= seen[char] for char, count in remaining.items()]):
#             while start < end and len(remaining) == 0:
#                 if end+1 - start < size:
#                     size = end+1 - start
#                     ans = source[start:end+1]
                    
#                 seen[source[start]] -= 1
#                 if seen[source[start]] == 0:
#                     del seen[source[start]]
                    
#                 if source[start] in remaining:
#                     remaining[source[start]] += 1
#                 else:
#                     remaining[source[start]] = 1
                    
#                 start += 1
                
#         return size, ans

In [231]:
t = SmallestContains()
t.solve('aaaabdbcb', 'abc')

posx: 0 0
seen: {}
rema: Counter({'a': 1, 'b': 1, 'c': 1})
POST
posx: 0 0
seen: {'a': 1}
rema: Counter({'b': 1, 'c': 1})

posx: 0 1
seen: {'a': 1}
rema: Counter({'b': 1, 'c': 1})
while: {'a': 2}
POST
posx: 1 1
seen: {'a': 1}
rema: Counter({'b': 1, 'c': 1, 'a': 1})

posx: 1 2
seen: {'a': 1}
rema: Counter({'b': 1, 'c': 1, 'a': 1})
POST
posx: 1 2
seen: {'a': 2}
rema: Counter({'b': 1, 'c': 1})

posx: 1 3
seen: {'a': 2}
rema: Counter({'b': 1, 'c': 1})
while: {'a': 3}
while: {'a': 2}
POST
posx: 3 3
seen: {'a': 1}
rema: Counter({'a': 2, 'b': 1, 'c': 1})

posx: 3 4
seen: {'a': 1}
rema: Counter({'a': 2, 'b': 1, 'c': 1})
POST
posx: 3 4
seen: {'a': 1, 'b': 1}
rema: Counter({'a': 2, 'c': 1})

posx: 3 5
seen: {'a': 1, 'b': 1}
rema: Counter({'a': 2, 'c': 1})
POST
posx: 3 5
seen: {'a': 1, 'b': 1, 'd': 1}
rema: Counter({'a': 2, 'c': 1})

posx: 3 6
seen: {'a': 1, 'b': 1, 'd': 1}
rema: Counter({'a': 2, 'c': 1})
while: {'a': 1, 'b': 2, 'd': 1}
while: {'b': 2, 'd': 1}
while: {'b': 1, 'd': 1}
POST
posx: 6 

# 6. Word Search in Matrix

```
h e h s
o l h x
l l o e
```

Return `True` if the word `hello` exists in the grid.

You are only permitted to move in adjacent directions.

In [253]:
class WordSearch:
    def solve(self, word, matrix):
        rows = len(matrix)
        cols = len(matrix[0])
        
        def search(i, x, y):
            if matrix[x][y] == word[i]:
                if i == len(word)-1:
                    return True
                else:
                    char = matrix[x][y]
                    matrix[x][y] = None
                    for a, b in [(x, y+1), (x+1, y), (x, y-1), (x-1, y)]:
                        if a >= 0 and a < rows and b >= 0 and b < cols and matrix[a][b] is not None:
                            if search(i+1, a, b):
                                matrix[x][y] = char
                                return True
                            
                    matrix[x][y] = char
                    return False
                            
            return False
                        
                        
        for x in range(rows):
            for y in range(cols):
                if matrix[x][y] == word[0]:
                    if search(0, x, y):
                        return True
                    
        return False

In [254]:
t = WordSearch()
m = [list('hehs'), list('olhx'), list('lloe')]
t.solve('hoe', m)

True

In [255]:
for row in m:
    print(row)

['h', 'e', 'h', 's']
['o', 'l', 'h', 'x']
['l', 'l', 'o', 'e']


# 7. Given an array of integers, find a subarray with the max product, and return the product.

`[2, -5, 3, 1, -4, 0, -10]`

In [260]:
class MaxSubProduct:
    def solve(self, array):
        big = array[0]
        small = array[0]
        
        ans = array[0]
        
        for current in array[1:]:
            ans = max(ans, big, small)
            
            big = max(current, current * big, current * small)
            small = min(current, current * big, current * small)
            
        return max(ans, big, small)

In [263]:
t = MaxSubProduct()
t.solve([2, -5, 3, 1, 4, 0, 55])

55

# 8. Max Subarray Sum

In [264]:
class MaxSubSum:
    def solve(self, array):
        ans = array[0]
        best = array[0]
        for current in array[1:]:
            ans = max(ans, best)
            
            temp = best + current
            if temp > current:
                best = temp
            else:
                best = current
                
        return max(ans, best)

In [267]:
t = MaxSubSum()
t.solve([2, -5, -33, -1, 0, -1, 55])

55

# 9. MaxPath Sum

In [277]:
class Node:
    def __init__(self, val, left=None, right=None):
        self.val = val
        self.left = left
        self.right = right
        
root = Node(-50)
seven = Node(100)
nine = Node(6)
two = Node(2)
three = Node(3)

root.left = nine
root.right = seven
seven.left = two
seven.right = three

class MaxPathSum:
    def solve(self, root):
        self.ans = root.val  # Dummy placeholder.
        
        def bestpath(node):
            if node is None:
                return 0  # For continuity at leaf nodes.
            else:
                left = bestpath(node.left)  # MaxPathSum from left.
                right = bestpath(node.right)  # MaxPathSum from right.
                
                # 1. Consider the sub-path from the left<--root->right ...
                sub = left + node.val + right
                # This is a candidate.
                self.ans = max(self.ans, sub)
                
                temp_l = node.val + left
                temp_r = node.val + right
                
                self.ans = max(self.ans, node.val, temp_l, temp_r)
                return max(node.val, temp_l, temp_r)
            
        bestpath(root)
        return self.ans

In [278]:
t = MaxPathSum()
t.solve(root)

105

# 10. Move zeros while maintaining order.

`[1, 2, 3, 0, 4, 0, 6] ==> [1, 2, 3, 4, 6, 0, 0]`

DO IT INPLACE!

In [288]:
class MoveZeros:
    def solve(self, array):
        z = 0
        for i in range(len(array)):
            if array[i] == 0:
                z = i
                break
                
        n = z+1
        print(z, n)
        while n < len(array):
            print(z, n, array)
            if array[n] == 0:
                n += 1
            else:
                array[n], array[z] = array[z], array[n]
                while z < len(array) and array[z] != 0:
                    z += 1
                    
                n = z+1

In [289]:
a = [1, 2, 3, 0, 4, 0, 6]
t = MoveZeros()
t.solve(a)

3 4
3 4 [1, 2, 3, 0, 4, 0, 6]
4 5 [1, 2, 3, 4, 0, 0, 6]
4 6 [1, 2, 3, 4, 0, 0, 6]
5 6 [1, 2, 3, 4, 6, 0, 0]


In [67]:
import math

In [72]:
vals = [2.1,1.8, 1.3, 0.9, 0.4, 0.2, 0.15, 0.02, 0.001]
total = sum(vals)
running = 0
for ix, v in enumerate(vals):
    running += v
    print(ix+1, running/total)

1 0.30563236792315523
2 0.5676029690001455
3 0.7568039586668606
4 0.8877892592053558
5 0.9460049483335758
6 0.9751127928976859
7 0.9969436763207684
8 0.9998544607771794
9 1.0


In [69]:
total

6.871000000000001

In [133]:
import numpy as np

In [134]:
X = np.array([
    [-2, -2, -2, 0, 0],
    [-2, -2, -2, 0, 0],
    [4, 4, 4, 0, 0],
    [0, 0, 0, -2, -2],
    [0, 0, 0, 2, 2],
    [0, 0, 0, 0, 0]
])

In [135]:
U, S, Vh = np.linalg.svd(X)

In [146]:
print("U")
print(np.round(U[:,:2], 4))

U
[[-0.4082  0.    ]
 [-0.4082  0.    ]
 [ 0.8165  0.    ]
 [ 0.     -0.7071]
 [ 0.      0.7071]
 [ 0.      0.    ]]


In [143]:
print("S")
print(np.round(np.expand_dims(S, -1), 4))

S
[[8.4853]
 [4.    ]
 [0.    ]
 [0.    ]
 [0.    ]]


In [155]:
print("S")
print(np.round(np.diag(S), 4)[:2,:2])

S
[[8.4853 0.    ]
 [0.     4.    ]]


In [158]:
print("V")
print(np.round(Vh[:2,:], 4))

V
[[0.5774 0.5774 0.5774 0.     0.    ]
 [0.     0.     0.     0.7071 0.7071]]


In [114]:
np.var(np.dot(X, Vh[0]))

11.999999999999998

In [149]:
recon = np.expand_dims(np.dot(X, Vh[0]), -1).dot(np.expand_dims(Vh[0], 0))

In [150]:
X

array([[-2, -2, -2,  0,  0],
       [-2, -2, -2,  0,  0],
       [ 4,  4,  4,  0,  0],
       [ 0,  0,  0, -2, -2],
       [ 0,  0,  0,  2,  2],
       [ 0,  0,  0,  0,  0]])

In [154]:
np.round(np.mean(np.linalg.norm(X - recon, axis=1)**2), 4)

2.6667

In [126]:
from scipy.stats import norm

1 - norm.cdf(2.869) + 1 - norm.cdf(1.051)

0.14868815391267343

In [96]:
np.linalg.norm(U, axis=1)

array([1., 1., 1., 1., 1., 1.])

In [82]:
r = np.random.rand(5, 2)

In [83]:
r

array([[0.14163243, 0.45658557],
       [0.97367086, 0.12363282],
       [0.25618947, 0.91079007],
       [0.93672353, 0.728582  ],
       [0.79285307, 0.84677053]])

In [88]:
np.linalg.norm(r, axis=0)

array([1.59367334, 1.51695289])

In [171]:
import csv, json

def swag_to_json(fpath, outname):
    dump = []
    
    with open(fpath) as fp:
        reader = csv.DictReader(fp)
        for row in reader:
            data = {}
            for key, val in row.items():
                if len(key):
                    if key == 'label':
                        data[key] = int(val)
                    else:
                        data[key] = val
                        
            dump.append(json.dumps(data))
            
    with open('/Users/priyamtejaswin/CMU/DirectedStudyF2021/lightning-transformers/samples/%s'%outname, 'w') as fp:
        fp.write('\n'.join(dump))
        
    print("Wrote", len(dump), "samples.")

In [173]:
swag_to_json('/Users/priyamtejaswin/CMU/DirectedStudyF2021/lightning-transformers/samples/swag_train.csv',
             'swag_train.json')

Wrote 99 samples.


In [174]:
swag_to_json('/Users/priyamtejaswin/CMU/DirectedStudyF2021/lightning-transformers/samples/swag_val.csv',
             'swag_val.json')

Wrote 99 samples.


In [177]:
def hella_to_swag(fpath, outname):
    dump = []
    with open(fpath) as fp:
        for row in fp.readlines():
            final = {}
            data = json.loads(row.strip())
            
            final["sent1"] = data["ctx_a"]
            final["sent2"] = data["ctx_b"].capitalize()
            final["label"] = data["label"]
            for i in range(4):
                final["ending%d"%i] = data["endings"][i]
                
            dump.append(json.dumps(final))
            
    with open('/Users/priyamtejaswin/CMU/DirectedStudyF2021/lightning-transformers/samples/%s'%outname, 'w') as fp:
        fp.write('\n'.join(dump))
        
    print("Wrote", len(dump), "samples.")

In [178]:
hella_to_swag('/Users/priyamtejaswin/CMU/DirectedStudyF2021/calibration/calibration_data/HellaSWAG/train.txt', 'hs_train.json')

Wrote 39905 samples.


In [179]:
hella_to_swag('/Users/priyamtejaswin/CMU/DirectedStudyF2021/calibration/calibration_data/HellaSWAG/dev.txt', 'hs_val.json')

Wrote 5021 samples.


In [None]:
def multifc_to_kgat(fpath, outname):
    dump = []
    with open(fpath) as fp:
        for row in fp.readlines():
            final = {}
            data = json.loads(row.strip())
            
            final['id'] = data['claimID']
            final['claim'] = data['claim']
            final['label'] = data['label']
            bylen = list(sorted(data['evidence'], key=lambda x: len(x.split()), reverse=True))[:5]
            evidence = []
            for ix, row in enumerate(bylen):
                e = ["Google", ix, row]
                e.extend(data["sentiment_counts"])
                e.append(data["freshness"])
                e.append(data["spell_correct"])
                
                evidence.append(e)
            final['evidence'] = evidence
            
            dump.append(json.dumps(final))
            
    with open('./multifc/'+outname, 'w') as fp:
        fp.write('\n'.join(dump))

In [290]:
import numpy as np

In [301]:
signals = [[[6.0, 3.0, 1367834400.0, 8.4444444444], [6.0, 3.0, 1367834400.0, 8.4444444444], [6.0, 3.0, 1367834400.0, 8.4444444444], [6.0, 3.0, 1367834400.0, 8.4444444444], [6.0, 3.0, 1367834400.0, 8.4444444444]], [[5.0, 4.0, 1263581940.0, 10.3333333333], [5.0, 4.0, 1263581940.0, 10.3333333333], [5.0, 4.0, 1263581940.0, 10.3333333333], [5.0, 4.0, 1263581940.0, 10.3333333333], [5.0, 4.0, 1263581940.0, 10.3333333333]], [[4.0, 6.0, 1426564800.0, 4.8], [4.0, 6.0, 1426564800.0, 4.8], [4.0, 6.0, 1426564800.0, 4.8], [4.0, 6.0, 1426564800.0, 4.8], [4.0, 6.0, 1426564800.0, 4.8]], [[4.0, 5.0, 1468123200.0, 5.5555555556], [4.0, 5.0, 1468123200.0, 5.5555555556], [4.0, 5.0, 1468123200.0, 5.5555555556], [4.0, 5.0, 1468123200.0, 5.5555555556], [4.0, 5.0, 1468123200.0, 5.5555555556]]]
batch = []
for claim in signals:
    a = np.array(claim)
    batch.append(a)
    
np.array(batch).shape

(4, 5, 4)

In [302]:
np.array(signals+[np.array(signals[-1])]).shape

(5, 5, 4)

In [None]:
np.ones