# 1 Overview
- pg 247

## 1.1 Representation

### Adjacency List
- 2-D matrix, where m[0] is a array of vertex connected to vertex 0
- Preferred for sparse graph because it is more memory efficiety


In [9]:
graph_unweighted = {
    0: [1, 3],  # Vertex 0 is connected to vertices 1 and 3
    1: [2],     # Vertex 1 is connected to vertex 2
    2: [3],     # Vertex 2 is connected to vertex 3
    3: [0]      # Vertex 3 is connected to vertex 0
}

graph_weighted = {
    0: [(1, 5), (3, 2)],  # Vertex 0 is connected to 1 with weight 5, and 3 with weight 2
    1: [(2, 3)],          # Vertex 1 is connected to 2 with weight 3
    2: [(3, 8)],          # Vertex 2 is connected to 3 with weight 8
    3: [(0, 1)]           # Vertex 3 is connected to 0 with weight 1
}

from collections import defaultdict
graph = defaultdict(list)
graph[0].extend([1, 3])  # Vertex 0 is connected to 1 and 3
graph[1].append(2)       # Vertex 1 is connected to 2


# index of graph array is the node id, and value is the connected nodes
# See problem below: Bipartite Graph Validation
graph_via_list = [[1,4], [0,2], [1], [4], [0,3] ]


for vertex, edges in graph_unweighted.items():
    print(f"{vertex}: {edges}")


0: [1, 3]
1: [2]
2: [3]
3: [0]


### Adjacency Matrix: 
- matrix[i][j] indicates whether there is an edge from vertex i to vertex j.
- The value of matrix[1][j] can be:
    * 1 for unweighted graph
    * 1 for weighted graph
- Preferred for dense graph

In [1]:
num_vertices = 4
matrix = [ [0] * num_vertices for _ in range(num_vertices) ]

### Via Class

In [2]:
class GraphNode:
    def __init__(self, val):
        self.val = val
        self.neighbors = []

## 1.2 Traversal
- BFS and DFS support similar use cases
- we may need to maintain a set to see which node we have visited

### DFS

In [None]:
visited = set()

# Assuming graph is an adjacnecy list
def dfs(graph, source):
    visited.add(source)

    # busines logic
    # process(node)
    
    for destination in graph[source]:
        if destination not in visited:
            dfs(graph, destination)




### BFS

In [6]:
from collections import deque, defaultdict

visited = set()

def bfs(graph, source):
    queue = deque([source])

    while queue:
        current = queue.popleft()
        if current not in visited:
            # Add business logic here
            visisted.add(current)

        for destination in graph[current]:
            if destination not in visited:
                queue.append(destination)

# 2 DFS

### Graph Deep Copy
- Given a reference to a node within a undirected graph, create a deep copy of the graph. 

In [4]:
def graph_deep_copy(node: GraphNode) -> GraphNode:
    if not node:
        return None
    return dfs(node)

def dfs(node: GraphNode, clone_map = {} ) -> GraphNode:
    if node in clone_map:
        return None

    cloned_node = GraphNode(node.val)

    for vertex in node.neighbors():
        cloned_node.children.append(dfs(vertex, clone_map))
    
    return cloned_node

### Count Islands
- Given a binary matrix representing 1s as land and 0s as water, return the number of islands.  An island is formed by connecting adjacements lands up, down, left, and right



In [8]:
# Insight:  Benefit of top down is that is more more intuitive what the goal of method dfs is.
def count_islands_top_down(matrix):
    num_islands = 0
    
    for r in range(len(matrix)):
        for c in range(len(matrix[0])):
            if matrix[r][c] == 1:
                num_islands += 1
                dfs(r, c, matrix) # mark all neighbors path to 0

    return num_islands

def dfs(r, c, matrix):
    dirs = [ (-1,0), (1, 0), (0,-1), (0,1) ]

    for x, y in dirs:
        cur_x, cur_y = r+x, c+y
        if (0<=cur_x<len(matrix)) and (0<=cur_y<len(matrix[0])) and (matrix[cur_x][cur_y]==1):
            dfs(cur_x, cur_y, matrix)
    
    

In [None]:
def count_islands_nested(matrix):

    def dfs(r, c):
        dirs = [ (0,1), (0,-1), (1,0), (-1, 0) ]
        
        for x, y in dirs:
            next_row, nex_col = r+x, y_y
            if (0<= next_row < num_rows) and (0<= next_col < num_cols) and matrix[r][c] == 1:
                dfs(next_row, next_col)
        return
    
    if not matrix:
        return 0

    num_rows, num_cols = len(matrix), len(matrix[0]])
    num_islands = 0
    for r in range(num_rows):
        for c in range(num_cols):
            if matrix[r][c] == 1:
                num_islands += 1
                matrix[r][c] = 0
                dfs(r, c)

    return num_islands        
    

### Bipartite Graph Validation
- Given a undirected graph, determine if it is bipartite. A graph is bipartite IFF the nodes can be colored in onr of 2 colors AND not two adjacent nodes have the same color

- Input: graph = [[1,4], [0,2], [1], [4], [0,3] ]
    * index of graph array is the node id, and value is the connected nodes
- output: True

In [10]:
def is_bipartite(graph):
    # colors can have values: 0, 1, -1
    colors = [0] * len(graph)

    for i in graph:
        if colors[i] == 0 and not dfs(i, 1, graph, colors):
            return False
        
    return True

# dfs : the recurrence is for each node, try to color its neighbors the opposite color. if unviisted, submit hat node into a dfs
def dfs(node, color, graph, colors):
    colors[node] = color

    for v in graph[node]:
        if colors[v] == color:
            return False
        if (colors[v] == 0) and not (dfs(v, -color, graph, colors)): 
            return False

    return True


### Longest Increasing Path
- Find the "LENGTH" of longest strictly increasing path in a matrix of POSITIVE integers.  A path is a sequence of cells where each one is 4 directionally adjacent (up, down, left, or right)
- 

In [19]:
from typing import List

def longest_increasing_path(matrix: List[List[int]]) -> int:
    if not matrix:
        return 0
        
    m, n = len(matrix), len(matrix[0])
    max_num_path = 0

    # memo[r][c] contains the max value of that path; it is the local max_num_path
    memo = [ [0] * n for _ in range(m) ] 
    
    for r in range(m):
        for c in range(c):
            max_num_path = max(max_num_path, dfs(r, c, matrix, memo))

    return max_sum

# DFS:
#   Returns the max_num_path at position (r,c)
#   Populates memo[r][c]
def dfs(r, c, matrix, memo):
    if memo[r][c] != 0:
        return memo[r][c]

    max_path = 1
    dirs = [ (0,1), (0, -1), (1, 0), (-1, 0) ]

    for dx, dy in dirs:
        next_x, next_y = r + dx, c+dy
        if within_bounds(next_x, next_y, matrix) and matrix[r][c] < matrix[next_x][next_y]:
            max_path = max(max_path, 1 + dfs(next_x, next_y, matrix, memo) )

    memo[r][c] = max_path
    return max_path

def within_bounds(r, c, matrix):
    m, n = len(matrix), len(matrix[0])
    return (0 <= r < m) and (0 <= c < n)

# 3 BFS

In [47]:
queues = defaultdict(list)
queues[1].append(2)
if queues:
    print(True)

True


### DataBricks Find Most Efficient Transportation:
- Problem:
    * Give a grid, where there a m modes of transportation (1,2,3,4), find the most optimized mode of transportation between the start position S and position grid D.
    * If there is a tie based on time, use cost as the tie breaker
    * X means a road block

- Insights:
    * This is the shortest path hidden, so BFS and not DFS.
    * What makes BFS optimal traversal?
      - since each mode has the same cost, we only need to track the incremental distance from the destination
      - this makes it a shortest path problem --> BFS
    * How do I prevent revisiting the same position twice? This would also solve having multiple optimal solution for a particular mode
      - Use a visited set.
      - Add the new position when we add it to the queue 

In [95]:
from collections import deque, defaultdict

# --------------------------------------------
# Input variables
# --------------------------------------------
grid = [
    ["3", "3", "S", "2", "X", "X"],
    ["3", "1", "1", "2", "X", "2"],
    ["3", "1", "1", "2", "2", "2"],
    ["3", "1", "1", "1", "D", "3"],
    ["3", "3", "3", "3", "3", "4"],
    ["4", "4", "4", "4", "4", "4"]
]
cost_matrix = [0, 1, 3, 2]
time_matrix = [3, 2, 1, 1]

num_modes = len(cost_matrix)
m, n = len(grid), len(grid[0])
directions = [(0,1), (1,0), (0,-1), (-1,0)]

# -----------------------------------------
# Find End and Start Position
# -----------------------------------------
start_pos = None
dest_pos = None
for r in range(m):
    for c in range(n):
        if grid[r][c]=="D":
            dest_pos = (r,c)
        if grid[r][c]=="S":
            start_pos = (r,c)
print(f'dest_pos={dest_pos}')

# -----------------------------------------
# INITIALIZE QUEUE: 
# -----------------------------------------
queue = deque([ (None, dest_pos) ])
print(f'\nqueue={queue}')

# -----------------------------------------
# BFS
# -----------------------------------------
mode_to_dist = [0] * num_modes
visited = set()
dist = 0
while queue:
    print(f'\nNew Level: dist={dist}')
    
    # Flush the current queue
    for _ in range(len(queue)): # This is the beauty of the BFS; flush the current queue
        curr_mode, curr_pos = queue.popleft()
        print(f'\tcurr_mode={curr_mode} cur_pos={curr_pos}')
        
        for dx, dy in directions:
            x = curr_pos[0] + dx
            y = curr_pos[1] + dy
            
            if start_pos == (x, y):
                print(f'\tEUREKA: position ({x}, {y}) curr_mode={curr_mode} reached destination')
                mode_to_dist[int(curr_mode)] = dist

            if (0 <= x < m) and (0 <= y < n) and grid[x][y] != "X":    
                if (x, y) in visited:
                    continue
                if (grid[x][y] == curr_mode) or (curr_mode == None):
                    next_mode = grid[x][y]
                    queue.append( (next_mode, (x, y)) )
                    visited.add( (x,y) )
                             
    dist += 1

print(f'dist={dist}')




dest_pos=(3, 4)

queue=deque([(None, (3, 4))])

New Level: dist=0
	curr_mode=None cur_pos=(3, 4)

New Level: dist=1
	curr_mode=3 cur_pos=(3, 5)
	curr_mode=3 cur_pos=(4, 4)
	curr_mode=1 cur_pos=(3, 3)
	curr_mode=2 cur_pos=(2, 4)

New Level: dist=2
	curr_mode=3 cur_pos=(4, 3)
	curr_mode=1 cur_pos=(3, 2)
	curr_mode=2 cur_pos=(2, 5)
	curr_mode=2 cur_pos=(2, 3)

New Level: dist=3
	curr_mode=3 cur_pos=(4, 2)
	curr_mode=1 cur_pos=(3, 1)
	curr_mode=1 cur_pos=(2, 2)
	curr_mode=2 cur_pos=(1, 5)
	curr_mode=2 cur_pos=(1, 3)

New Level: dist=4
	curr_mode=3 cur_pos=(4, 1)
	curr_mode=1 cur_pos=(2, 1)
	curr_mode=1 cur_pos=(1, 2)
	EUREKA: position (0, 2) curr_mode=1 reached destination
	curr_mode=2 cur_pos=(0, 3)
	EUREKA: position (0, 2) curr_mode=2 reached destination

New Level: dist=5
	curr_mode=3 cur_pos=(4, 0)
	curr_mode=1 cur_pos=(1, 1)

New Level: dist=6
	curr_mode=3 cur_pos=(3, 0)

New Level: dist=7
	curr_mode=3 cur_pos=(2, 0)

New Level: dist=8
	curr_mode=3 cur_pos=(1, 0)

New Level: dist=9
	c

In [96]:
mode_to_dist

[0, 4, 4, 10]

In [97]:
cost = [(time*distance, cost_matrix[i], i) for i, (time, distance) in enumerate(zip(time_matrix, mode_to_dist)) if distance != 0]
cost

[(8, 1, 1), (4, 3, 2), (10, 2, 3)]

In [98]:
cost.sort(key = lambda t: t[0])
cost

[(4, 3, 2), (8, 1, 1), (10, 2, 3)]

In [99]:
# Most efficient transportation is mode 2
cost[0][2]

2

### Matrix Infection

### Shortest Transformation Sequences
- Given 2 words start and end, and a dictionionary containing an array of words, return the LENGTHof the shortest transformation to transform start to end. A transformation sequence is a series of words in which
    * each word differs from the preceding word by exactly one letter
    * each word int he sequence exists in the dictionary
- If no such sequence exists, return 0
- Example
    * start=red, end=hit, dictionary=["red", "bed", "hat", "rod", "rad", "rat", "hit", "bad", "bat"] 
    * red -> rad -> rat -> hat -> hit
    * output = 5

### Shortest Transformation Sequences
- Given 2 words start and end, and a dictionionary containing an array of words, return the LENGTHof the shortest transformation to transform start to end. A transformation sequence is a series of words in which
    * each word differs from the preceding word by exactly one letter
    * each word int he sequence exists in the dictionary
- If no such sequence exists, return 0
- Example
    * start=red, end=hit, dictionary=["red", "bed", "hat", "rod", "rad", "rat", "hit", "bad", "bat"] 
    * red -> rad -> rat -> hat -> hit
    * output = 5

#### V1: Less efficient

In [26]:
from collections import deque

# let n = num words, L = lenght of character
# Runtime:  O(n)  = n L^2
# Space: hashset and queue ~= n * L     
def shortest_transformation_sequence(start: str, end: str, dictionary: List[str]) -> int:
    if not dict:
        return 0

    alphabets = "abcdefgihijklmnopqurstuvwxyz"
    queue = deque([start])
    visited = set([start])
    dictionary_set = set(dictionary)  # RT: O( n * L )
    
    dist = 0

    # TOTAL: RT O(n) = N * (L * 26) * L ~= n L^2
    while queue:  

        # why for loop? We want to finish the current level before we increment dist variable below
        for _ in range(len(queue)): # RT: N
            curr_word = queue.pop_left()

            if curr_word == end:
                return dist + 1
            
            # Look for candidates: RT: 
            for i in len(curr_word): # RT: L
                for c in alphabets:  # 26
                    
                    next_word = curr_word[:i] + c + curr_word[i+1:]  
                    if next_word not in visited and next_word in dictionary_set: # RT: L
                        visited.append(next_word)
                        queue.append(next_word)
                
        dist += 1
    
    return 0

#### V2; With Trie Nodes
- Assuming k is the average lenght of words in dictionary, and n is number of words in dictionary
- RunTime: 
    * Build Trie: O(n * k)
    * Explore neighbors for each oord: O(m * k) where m is the number of valid neighbors
    * Total: buildTree + BFS( exploreNeighbors) = O(n*k) + O(n * (m*k) ) ~= O(n * m * k)
    * This is more efficient than V1 version if m < 26

In [34]:
class TrieNode:
    def __init__(self):
        self.children = {}
        self.word = None  # If not none, this node is the end of a word

start = "rat"
dictionary=["red", "bed", "hat", "rod"]
root = TrieNode()

def build_trie(dictionary):
    root = TriNode()
    
    for w in dictionary:
        curr_node = root
        for c in w:
            if c not in curr_node.children:
                curr_node.children[c] = TriNode()
            curr_node = curr_node.children[c]
        curr_node.word = w
    return root

def shortest_transformation_length(start, end, dictionary):
    if end not in dictionary:
        return -1

    trie_root = build_trie(dictionary)

    queue = deque([ (start, 1) ] ) # (curr_word, transformation_length)

    while queue: # O(n)
        curr_word, length = queue.popleft()

        if curr_word == end:
            return length

        visited.add(curr_word)

        for n in get_valid_neighbors(curr_word, trie_root):
            if n not in visited:
                queue.append((neighbors, lenght + 1))

    return -1

# Fn return a list of words in dictionary that that are 1 character aprt from word
# RT: O(m*k), where m is 
def get_valid_neigbors(word, trie_root):
    neighbors = []
    curr_node = trie_root

    def dfs(node, current_word, depth):
        if depth == len(word):
            if node.word and node.word != word:
                neighbors.append(node.word)
            
            return

        char = word[depth]
        # ONLY look for char that are in the trie
        for next_char, child_node in node.children.items():
            if next_char == char or len(current_word) < depth:  # Explore neighbors
                dfs(child_node, current_word + next_char, depth + 1)
            
    dfs(curr_node, "", 0)
    return neighbors
            

# 4 Topological Sort
- Indegree of nodeA measures how many edges coming into nodeA. Ex: node1 has a in degree of 2.
    * indegree of 0 means there all prereqs have been resolved
    * at each iteration, a queue contains only nodes with indegree of 0. This intuitively means nodes which has satisified all prereqs.

### Prerquisites
- Given an integer representing the number of courses labered from 0 to n-1 and an array of prereqs pairs, determine if it is possible to enroll in all courses
- Each prereq is a pair [a,b] indicating that course a must be taken before course b
- Example:
    * Input: n=3, prereqs = [[0,1], [1,2], [2,1] ]
    * Output: False bc course 1 cannot be taken without completing course 2, and vice versa

In [36]:
from typing import List
from collections import defaultdict

def prequisites(n: int, prereqs: List[ List[int]] ) -> bool:
    graph = defaultdict(list) # graph[prereqA] = [ coursees that requires prereqA ]
    in_degress = [0] * n # index is the course #; value is the number of prereq it has
    
    for pre, course in prereqs:
        graph[pre].append(course)
        in_degress[course] += 1

    queue = deque()
    for i in range(n):
        if in_degress[i] == 0:
            queue.append(i)

    enrolled_classes = 0

    # queue contains course which has no prereqs, ie prereqs are satisifed
    while queue:
        node = queue.popleft()
        enrolled_classes += 1

        # c is the class which had depended on prereq node
        for c in graph[node]:
            in_degrees[c] -=1

            if in_degrees[c] == 0:
                queue.append(c)

    return enrolled_classes == n

# 5 Union Find

### Merging Communities

### Connect the Dots [BONUS]

# 6 Dijkstra Algorithm [BONUS]