In [1]:
import sys; sys.setrecursionlimit(100)

# Reminder: Stacks and Queues

We here give a reminder about two basic datastructures: stacks and queues. Stacks and queues are list-like data structures in which one can push and pull values and that impose an (implicit) order over the pulled values:

 - In a queue, elements are accessed on a first-in-first-out (FIFO) basis, i.e. when you pull an element from a queue, you obtain the first inserted one that has not been already removed. It follows the same logic as (real life) queues: the earlier you arrive, the earlier you leave.
 - In a stack, elements are accessed on a last-in-first-out (LIFO) basis, i.e. when you pull an element from a stack, you obtain the last inserted one.

We can graphically represent stacks and queues as follow. The first graphic shows the evolution of the state of a stack after some operations, where we see the LIFO nature of the structure:

<img src="files/stack.png" style="max-height: 300px;"></img>

Whereas in the following graphic, we show the evolution of the state of a queue after some operations, where we see the FIFO nature of the structure:

<img src="files/queue.png" style="max-height: 300px;"></img>

Queues and stack can be easily implemented in Python using lists. We here give one implementation for each data-structure:

In [2]:
class Stack:
    def __init__(self, elements = None):
        """ Create an empty stack """
        self._stack = [] if elements is None else elements
        
    def __len__(self):
        """ Return the number of elements in the stack """
        return len(self._stack)
    
    def __bool__(self):
        """ Return wether the stack is empty or not """
        return self._stack != []
    
    def clear(self):
        """ Clear the stack, i.e. remove all its elements """
        self._stack = []
    
    def push(self, x):
        """ Push an element on top of the stack """
        print(f'Push {x}')
        self._stack.append(x)
        
    def pop(self):
        """ Pop an element from the top of the stack and return it
        
            Raise [IndexError] if the stack is empty """
        print('Pop')
        return self._stack.pop()
        
    def top(self):
        """ Return the top-element, but do not pop it
        
            Raise [IndexError] if the stack is empty """
        return self._stack[-1]
    
    def __repr__(self):
        return 'Stack({})'.format(self._stack)
    
class Queue:
    def __init__(self, elements = None):
        """ Create an empty queue """
        self._queue = [] if elements is None else elements
        
    def __len__(self):
        """ Return the number of elements in the queue """        
        return len(self._queue)
    
    def __bool__(self):
        """ Return wether the queue is empty or not """
        return self._queue != []
    
    def clear(self):
        """ Clear the queue, i.e. remove all its elements """
        self._queue = []
    
    def enqueue(self, x):
        """ Enqueue an element in the queue """
        print(f'Enqueue: {x}')
        self._queue.append(x)
        
    def dequeue(self):
        """ Dequeue an element from the queue and return it
        
            Raise [IndexError] if the queue is empty """
        
        # Note that the `pop(0)` operation is linear in the size of [self._queue]
        # We could use a double-linked list to obtain a better complexity
        # (See for instance the class `deque` in the `collections` Python module)
        print('Dequeue')
        return self._queue.pop(0)
    
    def front(self):
        """ Return the front element from the queue without removing it
        
            Raise [IndexError] if the queue is empty"""
        return self._queue[0]
    
    def __repr__(self):
        return 'Queue({})'.format(self._queue)

In [3]:
import collections

help(collections.deque)

Help on class deque in module collections:

class deque(builtins.object)
 |  deque([iterable[, maxlen]]) --> deque object
 |  
 |  A list-like sequence optimized for data accesses near its endpoints.
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __bool__(self, /)
 |      True if self else False
 |  
 |  __contains__(self, key, /)
 |      Return key in self.
 |  
 |  __copy__(...)
 |      Return a shallow copy of a deque.
 |  
 |  __delitem__(self, key, /)
 |      Delete self[key].
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __getitem__(self, key, /)
 |      Return self[key].
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iadd__(self, value, /)
 |      Implement self+=value.
 |  
 |  __imul__(self, value, /)
 |      Implement self*=value.
 |  
 |  __in

We here give a test case for `Queue`:

In [4]:
def test_queue():
    q = Queue()
    for i in range(5):
        # We enqueue values 0, 1, 2, 3, 4
        q.enqueue(i)
    print(q.dequeue()) # We pull 0 (the first value inserted)
    print(q.dequeue()) # We pull 1 (the second first value inserted)
    q.enqueue(5) # We enqueue an extra element (5)
    while q:
        # We dequeue all the elements one by one
        # The last element is going to be 5 (the last value inserted)
        print(q.dequeue())
        
test_queue()

Enqueue: 0
Enqueue: 1
Enqueue: 2
Enqueue: 3
Enqueue: 4
Dequeue
0
Dequeue
1
Enqueue: 5
Dequeue
2
Dequeue
3
Dequeue
4
Dequeue
5


And for `Stack`:

In [5]:
def test_stack():
    q = Stack()
    for i in range(5):
        # We push values 0, 1, 2, 3, 4
        q.push(i)
    print(q.pop()) # We pop 4 (the last value inserted)
    print(q.pop()) # We pop 3 (the before the last value inserted)
    q.push(5) # We push an extra element (5)
    while q:
        # We pop all the elements one by one
        # Since we just pushed `5`, we are going to pop it before
        # the others (`3`, `2` and `1`)
        print(q.pop())
        
test_stack()

Push 0
Push 1
Push 2
Push 3
Push 4
Pop
4
Pop
3
Push 5
Pop
5
Pop
2
Pop
1
Pop
0


# Graphs

An (oriented) graph is a data-structure that consists of a finite set of vertices (or nodes), along with a set of ordered pairs of such elements. These pairs are called *edges* or *arcs*. A graph structure can also have a *value* or *label* associated with each edge, such as a label or numeric value (a cost, capacity, length, etc.).

In Python, following what we did for trees, we can represent a graph with instances of the following class `Node`. In this case, the class `Node` contains two data-attributes (one for the label of the node and one for the successor nodes) and a graph will be the data of the set (or list) of its nodes:

In [6]:
def f(x, y, *args):
    print(f'x={x}, y={y}, args={args}')
    
f(2, 4)
f(2, 3, 4, 5)
f("foo", None, True)

x=2, y=4, args=()
x=2, y=3, args=(4, 5)
x=foo, y=None, args=(True,)


In [7]:
class Node:
    def __init__(self, label):
        self.label      = label
        self.successors = []

    def add(self, *nodes):
        self.successors.extend(nodes)
#        for succ in nodes:
#            if succ.label != self.label:
#                succ.successors.append(self)

    def __repr__(self):
        return f'Node[{self.label}]'

# If you install Graphviz (https://www.graphviz.org/)
# you can use that function to get a visual representation of
# a graph using the `Node` representation (see below)
#
# You also need to install the graphviz python module
def repr_graph(graph):
    import graphviz
    dot, nodes = graphviz.Digraph(), {}
    dot.node_attr.update(
        margin='0', fontsize='12', width='0.5',
        shape='circle', style='filled')
    for node in graph:
        nodes[node.label] = dot.node(str(node.label))
    for node in graph:
        for succ in node.successors:
            dot.edge(str(node.label), str(succ.label))
    return dot


        
graph = [Node(i) for i in range(7)]
graph[0].add(graph[1])
graph[1].add(graph[2])
graph[2].add(graph[0])
graph[3].add(graph[1], graph[3], graph[5])
graph[5].add(graph[6])

print(graph[3].successors)

# repr_graph(graph) # Uncommment if you have graphviz

[Node[1], Node[3], Node[5]]


The graph we just constructed (in `graph`) as the following representation:

<img src="files/graph.png" style="max-height: 200px;"></img>

However, we tend to use different, more compact and/or efficient representation of graphs. We below present two of them: the adjacency matrix representation and the adjacency list representation.

From now on, we assume that the nodes of our graphs are labelled with the integers $0$, $1$, ..., $n-1$ where $n$ is the number of nodes of the graph. In that case, we order the nodes by their label where the $i$-th node is the node labelled with $i$.

## Adjacency list

In the adjacency list form, we attach to node $i$ a list $l$ of labels s.t. $j \in l$ iff there exists an edge from $i$ to $j$. For example, our example graph is represented by the following adjacency list:

In [8]:
G = [
    [1],        # Successors of node 0
    [2],        # Successors of node 1
    [0],        # Successors of node 2
    [1, 3, 5],  # Successors of node 3
    [],         # Successors of node 4
    [6],        # Successors of node 5
    [],         # Successors of node 6
]

To examplify the adjacency list representation, we give a function that convert a graph `G` in this representation to the one using the class `Node`:

In [9]:
def adjlst_to_nodes(G):
    # Create a node per label
    nodes = [Node(i) for i in range(len(G))]
    for i, successors in enumerate(G):
        # Enumerate all the nodes
        # i = node label / index
        # successors = successors of `i`
        for j in successors:
            # Add a link between `i` and `j` for `j`
            # iterating over all the successors of `i`
            nodes[i].add(nodes[j])
    return nodes

G2 = adjlst_to_nodes(G)
for i, node in enumerate(G2):
    print (i, node.successors)

# repr_graph(adjlst_to_nodes(G))

0 [Node[1]]
1 [Node[2]]
2 [Node[0]]
3 [Node[1], Node[3], Node[5]]
4 []
5 [Node[6]]
6 []


## Adjacency matrix

In the adjacency matrix representation, a matrix $m$ of booleans (or integers) of size $n \times n$ (where $n$ is the size of the graph) is used. This matrix is s.t. $m_{i,j}$ is `True` (or different from `0`) iff there exists an edge from $i$ to $j$. For example, our example graph is represented by the following adjacency matrix:

In [10]:
G = [
    [0, 1, 0, 0, 0, 0, 0],
    [0, 0, 1, 0, 0, 0, 0],
    [1, 0, 0, 0, 0, 0, 0],
    [0, 1, 0, 1, 0, 1, 0],
    [0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 1],
    [0, 0, 0, 0, 0, 0, 0],
]

To examplify the adjacency matrix representation, we give a function that convert a graph `G` in this representation to the one using the class `Node`:

In [11]:
def adjmx_to_nodes(G):
    nodes = [Node(i) for i in range(len(G))]
    for i in range(len(G)):
        for j in range(len(G)):
            if G[i][j]:
                nodes[i].add(nodes[j])
    return nodes

G2 = adjmx_to_nodes(G)
for i, node in enumerate(G2):
    print (i, node.successors)

# repr_graph(adjmx_to_nodes(G))

0 [Node[1]]
1 [Node[2]]
2 [Node[0]]
3 [Node[1], Node[3], Node[5]]
4 []
5 [Node[6]]
6 []


## Undirected graph

Undirected graphs (i.e. when the presence of an edge from `i` to `j` imply the existence of an edge from `j` to `i`) can be represented using the aforementioned representations by *symmetrizing* then - i.e. by forcing the presence of back-edges in the representation.

E.g., in the case of adjacency matrix representation, a graph is undirected its adjacency matrix is symmetric. In that case, as an optimization, we could only store the upper part of the adjacency matrix since it is always possible to recover the value $m_{j,i}$ (when $j > i$) from the one of $m_{i,j}$.

## Encapsultation

In all this notebook, in order the lighten the presentation, we are directly manipulating the representation of graphs. This would be a very bad idea in a real application where the graph representation should be abstracted behing an API. Moreover, doing so would allow the API implementation to swap from one representation to another one depending on the shape of the graphs --- without changing the way the application manipulate the graph. For instance, for graphs with numerous nodes but very few edges, one may want to switch to a adjacency sparse matrix representation.

# Traversing a Graph

In this section, we use the `Node` base representation of graphs.

Assume that we want to traverse all the nodes of a given graph. We are going to use them methodology as we did for trees: we start from a node, we apply some operation on it (e.g., we print its value) and the, we recursively visit all the successors:

In [12]:
def visit_1(node):
    """ Visit a graph from node `node`, printing
        all the visited labels"""
    print(node.label, end = ', ') # Visit the current node
    for successor in node.successors:
         # Recursively visit all the successors of `node`
        visit_1(successor)

# Print all the labels of the graph `graph`, starting at node `3`
visit_1(graph[3])
    

3, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/strubpy/.pyenv/versions/3.10.4/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3442, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/7p/3n4lkkq94d915jm2fvz906ch0000gn/T/ipykernel_64211/349771589.py", line 10, in <module>
    visit_1(graph[3])
  File "/var/folders/7p/3n4lkkq94d915jm2fvz906ch0000gn/T/ipykernel_64211/349771589.py", line 7, in visit_1
    visit_1(successor)
  File "/var/folders/7p/3n4lkkq94d915jm2fvz906ch0000gn/T/ipykernel_64211/349771589.py", line 7, in visit_1
    visit_1(successor)
  File "/var/folders/7p/3n4lkkq94d915jm2fvz906ch0000gn/T/ipykernel_64211/349771589.py", line 7, in visit_1
    visit_1(successor)
  [Previous line repeated 66 more times]
  File "/var/folders/7p/3n4lkkq94d915jm2fvz906ch0000gn/T/ipykernel_64211/349771589.py", line 4, in visit_1
    print(node.label, end = ', ') # Visit the current node
  File "/Users/strubpy/.pyenv/versions/3.10.4/lib/

As you can see, we have a problem here: we are visiting the nodes $0$, $1$, $2$ over and over, in a cycle, up to a point where we exhaust the system stack. If we look more carefully, we indeed see that from the node $3$, following the edges, we reach a cycle $1 \rightarrow 2 \rightarrow 0 \rightarrow 1$. In trees, this scenario was not possible since trees are acyclic structures. For graphs, we may have cycles and we hence need to remember which nodes we already visited.

This gives the following implementation:

In [13]:
def visit(node, visited):
    """ Visit a graph from node `node`, applying `cb` on
        all the visited labels. The variable `visited` is
        a set that contains the labels of all the nodes that
        we already visited."""
    if node.label in visited:
        # If `node` has already been visited, we stop
        return
    print(node.label)       # We visit `node`...
    visited.add(node.label) # ...and we store that it has been visited
    for succ in node.successors:
        # Then, we recursively visit all the successors of `node`
        visit(succ, visited)

S = set()
visit(graph[3], S)
print(S)


3
1
2
0
5
6
{0, 1, 2, 3, 5, 6}


Note that there is one node that hasn't been visited: the one labeled with `4`. This comes from the fact that `4` is not reachable from `3` - a graph, unlike a tree, might not be fully connected.

The strategy we used for graph traversal is called a Depth-first search (DFS) strategy. In DFS, the traversal starts at the considered node and explores as far as possible along each path in the graph before backtracking.

For example, in our case, starting from the node `3`, we have $3$ successors: `1`, `3` and `5`. We first go to `1` and will explore all the paths from `1` --- since we are in DFS, we will only consider the nodes `3` and `5` when we are done with the paths rooted at `1`. From `1`, there is only one path: `1` $\rightarrow$ `2` $\rightarrow$ `0`. Arrived at `0`, we stop since `1` has already been visited. Hence, we backtrack. The last choice has been made at the node `3`, where we decided to go for `1`, but we had two other choices: `3` and `5`. We then take the next one (`3`) that brings to a dead end (`3` has already been visited). We then backtrack again and follow `5`. The choice leads to the path `5` $\rightarrow$ `6`. At the end of that path, we cannot backtrack anymore (we explored all the possible choices) and we are done.

The animated image below shows the order in which nodes are visited. The node in red is the node we are visiting, whereas the nodes in yellow are the ones we fully visited (i.e. we followed all the path starting at that node). Last, the node in blue are the nodes along the path we are currently on. At the top of the image, we display the list of nodes being visited (i.e. the active one in addition to the ones we still have to backtrack over). You can see that this list has the behaviour of a *stack*.

<img src="files/dfs.gif" style="max-height: 250px;"></img>

This gives us a hint that DFS could be implemented using iteratively using a stack, as follow:

In [14]:
def dfs_iter(node):
    stack, visited = Stack([node]), set()
    # `stack` contains the nodes we have to visit
    while stack:
        node = stack.pop()
        if node.label in visited:
            # If the node has been visited, we skip it
            continue
        # Otherwise, we register we visited it...
        visited.add(node.label)
        # ...do something with its label...
        print(node.label)
        # ...and register all its successors in `stack`
        for subnode in reversed(node.successors):
            stack.push(subnode)
        
dfs_iter(graph[3])


Pop
3
Push Node[5]
Push Node[3]
Push Node[1]
Pop
1
Push Node[2]
Pop
2
Push Node[0]
Pop
0
Push Node[1]
Pop
Pop
Pop
5
Push Node[6]
Pop
6


What happens if we use a queue instead of a stack:

In [15]:
def bfs_iter(node, cb):
    queue, visited = Queue([node]), set()
    # `queue` contains the nodes we have to visit
    while queue:
        node = queue.dequeue()
        if node.label in visited:
            # If the node has been visited, we skip it
            continue
        # Otherwise, we register we visited it...
        visited.add(node.label)
        # ...do something with its label...
        cb(node.label)
        # ...and register all its successors in `stack`
        for subnode in node.successors:
            queue.enqueue(subnode)
        
bfs_iter(graph[3], print)


Dequeue
3
Enqueue: Node[1]
Enqueue: Node[3]
Enqueue: Node[5]
Dequeue
1
Enqueue: Node[2]
Dequeue
Dequeue
5
Enqueue: Node[6]
Dequeue
2
Enqueue: Node[0]
Dequeue
6
Dequeue
0
Enqueue: Node[1]
Dequeue


Then, we still obtain a graph traversal algorithm, but the order in which we visit the node is different. In image, we obtain this:

<img src="files/bfs.gif" style="max-height: 250px;"></img>

As you can see, we here visit the starting node, then all the nodes that are at distance $1$ from the starting node, then all the nodes that are at distance $2$ from the starting node, etc, etc... This traversal is called Breadth-first search (BFS).

Depending on the algorithm you want to implement, you may need to use DFS or BFS. E.g., for searching the minimal path (in term of numbers of edges) between two vertices, BFS is the way to go since during a BFS, we always reach a vertex from given source using the minimum number of edges.

# Weighted graphs

Sometimes, we need to store data along with edges. E.g., to each label, we may want to attach a weight representing the travel time between two nodes that in turn represent cities.

For example, in the adjacency matrix model, this is possible by storing the data directly in the matrix. If `m` is an adjancy matrix, then `m[i][j]` is `None` if there is no arcs between `i` and `j`. Otherwise, there exists an arc between the two nodes and `m[i][j]` is the data attached to that arc.

Consider the following weighted (undirected) graph:

<img src="files/wgraph.png" style="max-width: 350px;"></img>

<!--
graph {
    node [margin=0 fontsize=12 width=0.5 shape=circle style=filled]
    rankdir="LR";
    1 -- 2[label="7"];
    1 -- 3[label="9"];
    1 -- 6[label="14"];
    2 -- 3[label="10"];
    2 -- 4[label="15"];
    3 -- 4[label="11"];
    3 -- 6[label="2"];
    4 -- 5[label="6"];
    5 -- 6[label="9"];
}
-->

The, we can use the following matrix for its adjacency matrix representation:

In [16]:
wG = [
    [None,    7,    9, None, None,   14],
    [None, None,   10,   15, None, None],
    [None, None, None,   11, None,    2],
    [None, None, None, None,    6, None],
    [None, None, None, None, None,    9],
    [None, None, None, None, None, None],
]

# We symetrize G because G is undirected
for i in range(len(wG)):
    for j in range(i):
        wG[i][j] = wG[j][i]
del i, j

print(wG)

[[None, 7, 9, None, None, 14], [7, None, 10, 15, None, None], [9, 10, None, 11, None, 2], [None, 15, 11, None, 6, None], [None, None, None, 6, None, 9], [14, None, 2, None, 9, None]]


Using this representation, we can implement Dijkstra algorithm (seen in CSE103):

In [17]:
def dijkstra(graph, start):
    import math
    
    # For storing the visited nodes
    visited = [False for _ in range(len(graph))]

    # Initialize all distances from `start` 
    distances = [math.inf for _ in range(len(graph))]
    distances[start] = 0

    while True:
        # Among the non-visited nodes, find the one with the
        # currently shortest distance from the start node
        min_distance = math.inf
        min_index    = None
        for i in range(len(graph)):
            if distances[i] < min_distance and not visited[i]:
                min_index, min_distance = i, distances[i]

        if min_index is None:
            # All nodes have been visited. We are done.
            return distances

        # For all non-visited neighboring nodes...
        for i in range(len(graph[min_index])):
            if graph[min_index][i] is not None:
                # ...if going through this edge is shorter...
                if distances[i] > distances[min_index] + graph[min_index][i]:
                    # ...save it
                    distances[i] = distances[min_index] + graph[min_index][i]

        visited[min_index] = True
        
print(dijkstra(wG, 0))


[0, 7, 9, 20, 20, 11]
