# Graph

## Pre-run 

In [None]:
from typing import List
from helpers.misc import *

## Union Find (UFS) 

* **TRICK 1**: DO NOT STORE ALL EDGES! UFS must be stored in O(n) space, not O(N\*N) space!
* **TRICK 2**: UFS `find` can make faster by **updating the root list** each time when find cannot be done in no more than 2 iterations.
* [1202](https://leetcode.com/problems/smallest-string-with-swaps), [924](https://leetcode.com/problems/minimize-malware-spread/)

In [None]:
class UFS:
    def __init__(self, N: int):
        # TRICK 1: do not store all edges!
        self.root = [i for i in range(N)]
        
    def find(self, x: int) -> int:
        '''Find the root number of the union.'''
        # TRICK 2: by doing so can make next search faster.
        if x != self.root[x]:
            self.root[x] = self.find(self.root[x])
        return self.root[x]
    
    def union(self, x: int, y: int):
        '''Union x and y.'''
        x_root = self.find(x)
        y_root = self.find(y)
        self.root[x_root] = y_root

## 133 [Clone Graph](https://leetcode.com/problems/clone-graph) - M

### DFS - Queue + Hashtable 

* Runtime: 24 ms, faster than 99.82% of Python3 online submissions for Clone Graph.
* Memory Usage: 12.7 MB, less than 100.00% of Python3 online submissions for Clone Graph.

In [None]:
"""
# Definition for a Node.
class Node:
    def __init__(self, x: int, next: 'Node' = None, random: 'Node' = None):
        self.val = int(x)
        self.next = next
        self.random = random
"""

from collections import deque
class Solution:
    def cloneGraph(self, node: 'Node') -> 'Node':
        '''Clone a graph using DFS.'''
        # empty graph
        if node == None:
            return None

        # final result
        res = Node(node.val, [])
        # traversed nodes
        resp = {node.val:res}
        # traversing nodes queue
        q = deque([node])
        
        def dfs(cur: 'Node'):
            '''Using DFS to solve the problem.
            
            cur: current node.
            '''
            # if queue is empty, stop traversing
            if not q:
                return
            for nr in q.popleft().neighbors:
                if nr.val not in resp:
                    q.append(nr)
                    resp[nr.val] = Node(nr.val, [])
                cur.neighbors.append(resp[nr.val])
                dfs(resp[nr.val])
        
        dfs(res)
        return res

## 138 [Copy List with Random Pointer](https://leetcode.com/problems/copy-list-with-random-pointer/) - M

### Hashtable 

* Runtime: 36 ms, faster than 51.35% of Python3 online submissions for Copy List with Random Pointer.
* Memory Usage: 13.6 MB, less than 100.00% of Python3 online submissions for Copy List with Random Pointer.
* **CAUTION**: cannot use val as keys! vals are not an ID of nodes! vals can be the same!
* **CAUTION**: always use keys that cannot be the same!

In [None]:
"""
# Definition for a Node.
class Node:
    def __init__(self, x: int, next: 'Node' = None, random: 'Node' = None):
        self.val = int(x)
        self.next = next
        self.random = random
"""
class Solution:
    def copyRandomList(self, head: 'Node') -> 'Node':
        '''Deep copy a random list.'''
        if not head:
            return None
        
        res = Node(head.val)
        # TRICK: pointers are hashable.
        # CAUTION: cannot use val as keys! vals are not an ID of nodes! vals 
        # can be the same!
        # CAUTION: always use keys that cannot be the same!
        res_done = {head: res}
        
        def dfs(cr: 'Node', cl: 'Node'):
            '''Using DFS to solve problem.
            
            cr:    current node in result
            cl:    current node in list to copy
            '''
            if not cl.random:
                cr.random = None
            else:
                if cl.random not in res_done:
                    res_done[cl.random] = Node(cl.random.val)
                cr.random = res_done[cl.random]
            
            if not cl.next:
                cr.next = None
                return
            else:
                if cl.next not in res_done:
                    res_done[cl.next] = Node(cl.next.val)
                cr.next = res_done[cl.next]
                dfs(cr.next, cl.next)
            
        dfs(res, head)
        return res

## 1202 [Smallest String With Swaps](https://leetcode.com/problems/smallest-string-with-swaps) - M

### Union Find (UFS)

* UFS must be stored in O(n) space, not O(N\*N) space!
* UFS `find` can be made faster by **updating the root list** each time when find cannot be done in no more than 2 iterations.
* Runtime: 804 ms, faster than 51.70% of Python3 online submissions for Smallest String With Swaps.
* Memory Usage: 49.4 MB, less than 100.00% of Python3 online submissions for Smallest String With Swaps.

In [None]:
class UFS:
    def __init__(self, N: int):
        self.root = [i for i in range(N)]
        
    def find(self, x: int) -> int:
        '''Find the root number of the union.'''
        # TRICK: by doing so can make next search faster.
        if x != self.root[x]:
            self.root[x] = self.find(self.root[x])
        return self.root[x]
    
    def union(self, x: int, y: int):
        '''Union x and y.'''
        x_root = self.find(x)
        y_root = self.find(y)
        self.root[x_root] = y_root

class Solution:
    def smallestStringWithSwaps(self, s: str, pairs: List[List[int]]) -> str:
        '''Get smallest string with swaps using DFU.'''
        N = len(s)
        ufs = UFS(N)
        
        for pair in pairs:
            ufs.union(*pair)
        
        groups = {}
        
        for i in range(N):
            root = ufs.find(i)
            if root not in groups:
                groups[root] = []
            # TRICK: already sorted!
            groups[root].append(i)
        
        s = list(s)
        for group in groups.values():
            chars = sorted([s[i] for i in group])
            
            for i, char in zip(group, chars):
                s[i] = char
        
        return ''.join(s)

In [None]:
# test
eq(Solution().smallestStringWithSwaps("dcab", [[0,3],[1,2]]), "bacd")
eq(Solution().smallestStringWithSwaps("dcab", [[0,3],[1,2],[0,2]]), "abcd")
eq(Solution().smallestStringWithSwaps("cba", [[0,1],[1,2]]), "abc")
eq(Solution().smallestStringWithSwaps("d", []), "d")
eq(Solution().smallestStringWithSwaps("dc", []), "dc")
eq(Solution().smallestStringWithSwaps("dc", [[0,1]]), "cd")
eq(Solution().smallestStringWithSwaps("yhiihxbordwyjybyt", [[9,1],[5,11],[9,7],[2,7],[14,16],[6,16],[0,5],[12,9],[6,5],[9,10],[4,7],[3,2],[10,1],[3,15],[12,4],[10,10],[15,12]]), "bdhhibtirjoxwyyyy")

##  924 [Minimize Malware Spread](https://leetcode.com/problems/minimize-malware-spread/) - H

* [Solution](https://leetcode.com/articles/minimize-malware-spread/)

### Brute Hashmap

* Runtime: 4152 ms, faster than 5.76% of Python3 online submissions for Minimize Malware Spread.
* Memory Usage: 15.2 MB, less than 75.00% of Python3 online submissions for Minimize Malware Spread.

In [None]:
class Solution:
    def minMalwareSpread(self, graph: List[List[int]], initial: List[int]) -> int:
        '''Find the node to remove to make the whole graph less infected by
        malware.'''
        l = len(graph)
        h = {i:{i} for i in range(l)}
        for x in range(l):
            for y in range(x, l):
                if graph[x][y]:
                    h[x] = h[x] | h[y]
                    for n in h[x]:
                        h[n] = h[x]
        max_conn = 0
        ans = initial[0]
        for n in initial:
            n_conn = len(h[n])
            if n_conn > max_conn:
                max_conn = n_conn
                ans = n
            if n_conn == max_conn and n < ans:
                ans = n
        return ans

In [None]:
# test
eq(Solution().minMalwareSpread([[1,0,0,0,1,0,0,0,0,0],[0,1,1,0,0,0,0,0,0,0],
                                [0,1,1,0,0,1,0,0,0,0],[0,0,0,1,0,0,0,0,0,0],
                                [1,0,0,0,1,0,0,0,0,0],[0,0,1,0,0,1,0,0,0,0],
                                [0,0,0,0,0,0,1,0,0,1],[0,0,0,0,0,0,0,1,0,0],
                                [0,0,0,0,0,0,0,0,1,0],[0,0,0,0,0,0,1,0,0,1]],
                               [1,3,0]), 1)

### DFS 

* Runtime: 1696 ms, faster than 78.40% of Python3 online submissions for Minimize Malware Spread.
* Memory Usage: 21.5 MB, less than 25.00% of Python3 online submissions for Minimize Malware Spread.

In [None]:
from collections import Counter
class Solution:
    def minMalwareSpread(self, graph: List[List[int]], initial: List[int]) -> int:
        '''Find the node to remove to make the whole graph less infected by
        malware by DFS.'''
        # color each component.
        N = len(graph)
        # hash table for colors: keys are nodes, values are colors.
        colors = {}
        # color palette: same number, same color.
        c = 0
        
        def dfs(node: int, color: int):
            '''Set color for node.'''
            # set color for this node
            colors[node] = color
            # set color for nodes connected to this node
            for i, adj in enumerate(graph[node]):
                if adj and i not in colors:
                    dfs(i, color)
        # set color for each node            
        for node in range(N):
            if node not in colors:
                dfs(node, c)
                # after dfs, all nodes connected to node are colored in the 
                # same color, thus we should set a different color.
                c += 1
        
        # size of each color
        # size[color] = number of occurrences of this color.
        size = Counter(colors.values())
        
        # find unique colors.
        color_count = Counter(colors[node] for node in initial)
            
        # answer
        ans = float('inf')
        for x in initial:
            c = colors[x]
            # if two nodes are in the same sub-graph, removing one node cannot
            # prevent the malware spread by other node in the same sub-graph.
            if color_count[c] == 1:
                if (ans == float('inf')) or (size[c] > size[colors[ans]]) or (size[c] == size[colors[ans]] and x < ans):
                    ans = x
        # if ans not changed, removing nodes in initial will not make the malware less infected.
        return ans if ans < float('inf') else min(initial)

In [None]:
# test
eq(Solution().minMalwareSpread([[1,0,0,0,1,0,0,0,0,0],[0,1,1,0,0,0,0,0,0,0],
                                [0,1,1,0,0,1,0,0,0,0],[0,0,0,1,0,0,0,0,0,0],
                                [1,0,0,0,1,0,0,0,0,0],[0,0,1,0,0,1,0,0,0,0],
                                [0,0,0,0,0,0,1,0,0,1],[0,0,0,0,0,0,0,1,0,0],
                                [0,0,0,0,0,0,0,0,1,0],[0,0,0,0,0,0,1,0,0,1]],
                               [1,3,0]), 1)

### Union Find (UFS) 

* Runtime: 1832 ms, faster than 39.53% of Python3 online submissions for Minimize Malware Spread.
* Memory Usage: 15.2 MB, less than 75.00% of Python3 online submissions for Minimize Malware Spread.

In [None]:
from collections import Counter
class UFS:
    '''This UFS is not a standard UFS.'''
    def __init__(self, N: int):
        # p stores the relationship of connectivity. i and p[i] is connected.
        # when i == p[i], i is the greatest node in the sub-graph.
        self.p = [x for x in range(N)]
        # sz stores the size of sub-graphs efficiently.
        # sz is valid only when i is the greatest node in the 
        # sub-graph(i == p[i]).
        self.sz = [1] * N
    
    def find(self, x: int) -> int:
        '''In standard UFS, find should take 2 nodes and return a bool.
        
        This find take 1 node and return the greatest node in the sub-graph.
        '''
        if self.p[x] != x:
            # TRICK: this makes next find quicker
            self.p[x] = self.find(self.p[x])
        return self.p[x]
    
    def union(self, x: int, y: int):
        '''Union node x and node y together in UFS.'''
        # find the greatest node connected to node x or node y.
        xr = self.find(x)
        yr = self.find(y)
        # xr should be less than yr.
        if xr > yr:
            xr, yr = yr, xr
        # update p and sz.
        self.p[xr] = yr
        self.sz[yr] += self.sz[xr]
        
    def size(self, x: int) -> int:
        '''Get the size of the sub-graph to which x is connected.
        
        sz is valid only when i is the greatest node in the sub-graph(i == p[i]).
        '''
        return self.sz[self.find(x)]

class Solution:
    def minMalwareSpread(self, graph: List[List[int]], initial: List[int]) -> int:
        '''Find the node to remove to make the whole graph less infected by
        malware by DSU.'''
        ufs = UFS(len(graph))
        # transfer graph into dsu
        for j, row in enumerate(graph):
            for i in range(j):
                if row[i]:
                    ufs.union(i, j)
        # count occurence of the sub-graphs.
        count = Counter(ufs.find(u) for u in initial)
        # ans = (min_count, min_node of this count)
        ans = (-1, min(initial))
        for node in initial:
            root = ufs.find(node)
            # if two nodes are in the same sub-graph, removing one node cannot
            # prevent the malware spread by other node in the same sub-graph.
            if count[root] == 1:
                if ufs.size(root) > ans[0]:
                    ans = ufs.size(root), node
                elif ufs.size(root) == ans[0] and node < ans[1]:
                    ans = ufs.size(root), node
        return ans[1]

In [None]:
# test
eq(Solution().minMalwareSpread([[1,0,0,0,1,0,0,0,0,0],[0,1,1,0,0,0,0,0,0,0],
                                [0,1,1,0,0,1,0,0,0,0],[0,0,0,1,0,0,0,0,0,0],
                                [1,0,0,0,1,0,0,0,0,0],[0,0,1,0,0,1,0,0,0,0],
                                [0,0,0,0,0,0,1,0,0,1],[0,0,0,0,0,0,0,1,0,0],
                                [0,0,0,0,0,0,0,0,1,0],[0,0,0,0,0,0,1,0,0,1]],
                               [1,3,0]), 1)

## 417 [Pacific Atlantic Water Flow](https://leetcode.com/problems/pacific-atlantic-water-flow/) - M 

### DFS - Queue + Hashtable

* Runtime: 280 ms, faster than 97.37% of Python3 online submissions for Pacific Atlantic Water Flow.
* Memory Usage: 14.1 MB, less than 55.00% of Python3 online submissions for Pacific Atlantic Water Flow.
* Caution: the table is a rectangle, not a square!

In [None]:
from collections import deque
class Solution:
    def pacificAtlantic(self, matrix: List[List[int]]) -> List[List[int]]:
        '''Find the list of grid coordinates where water can flow to both 
        Pacific and Atlantic Ocean.'''
        if not matrix:
            return []
        # possible waterflow directions
        flows = [(0, 1), (0, -1), (1, 0), (-1, 0)]
        lx = len(matrix)
        ly = len(matrix[0])
        # positions that can reach pacific/atlantic ocean waters
        pacific = {(x, 0) for x in range(lx)} | {(0, y) for y in range(ly)}
        atlantic = {(x, ly-1) for x in range(lx)} | {(lx-1, y) for y in range(ly)}
        
        # queues
        qp = deque(pacific)
        qa = deque(atlantic)
        
        # search positions that can reach pacific ocean
        while qp:
            px, py = qp.popleft()
            for fx, fy in flows:
                nx = px+fx
                ny = py+fy
                if (nx, ny) in pacific or nx < 0 or nx >= lx or ny < 0 or ny >= ly:
                    continue
                if matrix[nx][ny] >= matrix[px][py]:
                    qp.append((nx, ny))
                    pacific.add((nx, ny))
                    
        # search positions that can reach pacific ocean
        while qa:
            px, py = qa.popleft()
            for fx, fy in flows:
                nx = px+fx
                ny = py+fy
                if (nx, ny) in atlantic or nx < 0 or nx >= lx or ny < 0 or ny >= ly:
                    continue
                if matrix[nx][ny] >= matrix[px][py]:
                    qa.append((nx, ny))
                    atlantic.add((nx, ny))
        
        return [list(a) for a in pacific & atlantic]