## BFS

In [2]:
class Node:

    def __init__(self, name):
        self.name = name
        self.adjacency_list = []
        self.visited = False


def breadth_first_search(start_node):

    # FIFO: first item we insert will be the first one to take out
    queue = [start_node]

    # we keep iterating (considering the neighbors) until the queue becomes empty
    while queue:

        # remove and return the first item we have inserted into the list
        actual_node = queue.pop(0)
        actual_node.visited = True
        print(actual_node.name)

        # let's consider the neighbors of the actual_node one by one
        for n in actual_node.adjacency_list:
            if not n.visited:
                queue.append(n)


In [3]:
# we can create the nodes or vertices
node1 = Node("A")
node2 = Node("B")
node3 = Node("C")
node4 = Node("D")
node5 = Node("E")

# we have to handle the neighbors
node1.adjacency_list.append(node2)
node1.adjacency_list.append(node3)
node2.adjacency_list.append(node4)
node4.adjacency_list.append(node5)

# A->B -> D
#  ->C -> E

# run the BFS
breadth_first_search(node1)

A
B
C
D
E


### WebCrawler with BFS

In [5]:
import requests
import re

class WebCrawler:

    def __init__(self, max = 5):
        # we want to avoid revisiting the same website over and over again
        self.discovered_websites = []
        self.max = max

    # BFS implementation
    def crawl(self, start_url):

        queue = [start_url]
        self.discovered_websites.append(start_url)

        # THIS IS A STANDARD BREADTH-FIRST SEARCH
        cnt = 0
        while queue:
            # for early stop
            if cnt == self.max:
                break
            else:
                cnt += 1

            actual_url = queue.pop(0)
            print(actual_url)
            
            

            # this is the raw html representation of the given website (URL)
            actual_url_html = self.read_raw_html(actual_url)

            for url in self.get_links_from_html(actual_url_html):
                if url not in self.discovered_websites:
                    self.discovered_websites.append(url)
                    queue.append(url)

    def get_links_from_html(self, raw_html):
        return re.findall("https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", raw_html)

    def read_raw_html(self, url):

        raw_html = ''

        try:
            raw_html = requests.get(url).text
        except Exception as e:
            pass

        return raw_html

In [4]:

crawler = WebCrawler()
crawler.crawl('https://www.cnn.com')

https://www.cnn.com
https://lightning.cnn.com
https://aswpsdkus.com
https://media.cnn.com
https://client-api.arkoselabs.com
https://middycdn-a.akamaihd.net


# DFS

In [1]:
class Node:

    def __init__(self, name):
        self.name = name
        self.adjacency_list = []
        self.visited = False


def depth_first_search(start_node):

    # that we need a LIFO: last item we insert is the first one we take out
    stack = [start_node]

    # let's iterate until the stack becomes empty
    while stack:

        # the pop() function returns with the last item we have inserted - O(1)
        actual_node = stack.pop()
        actual_node.visited = True
        print(actual_node.name)

        for n in actual_node.adjacency_list:
            # if the node has not been visited so far
            if not n.visited:
                # insert the item into the stack
                stack.append(n)

In [2]:
# first we have to create the vertices (nodes)
node1 = Node("A")
node2 = Node("B")
node3 = Node("C")
node4 = Node("D")
node5 = Node("E")

# handle and set the neighbors accordingly
node1.adjacency_list.append(node2)
node1.adjacency_list.append(node3)
node2.adjacency_list.append(node4)
node4.adjacency_list.append(node5)

# run the DFS
depth_first_search(node1)

A
C
B
D
E


In [3]:
class Node:

    def __init__(self, name):
        self.name = name
        self.adjacency_list = []
        self.visited = False

# DFS with Recursion
def depth_first_search(node):

    node.visited = True
    print(node.name)

    for n in node.adjacency_list:
        if not n.visited:
            depth_first_search(n)


if __name__ == '__main__':

    # first we have to create the vertices (nodes)
    node1 = Node("A")
    node2 = Node("B")
    node3 = Node("C")
    node4 = Node("D")
    node5 = Node("E")

    # handle and set the neighbors accordingly
    node1.adjacency_list.append(node2)
    node1.adjacency_list.append(node3)
    node2.adjacency_list.append(node4)
    node4.adjacency_list.append(node5)

    # run the DFS
    depth_first_search(node1)

A
B
D
E
C


# BFS vs DFS

## BFS application
- Pathfinding (BFS or DFS)
- Maximum Flow, Edomnds-Karp algorithm uses BFS
- Garbage Collection: Cheyen's algorithm uses BFS
- Serialization (of tree like structure) since the order matters

## DFS application
- Pathfinding (BFS or DFS)
- Topological Ordering (build tools)
- Strongly connected components
- Cycle Detection (in operation systems -> deadlock detection)

## Memory Comparison (DFS preferred by less memory consumption)
- BFS: in worse-case we have to store all the leaf nodes on the queue
  - ex) balanced tree (N/2) -> O(n)
  - it finds closer items faster.
- DFS: in worst-case we have to stall all the nodes on the stack until we hit a leaf node
  - ex) balanced tree -> the height of the tree -> O(longN)
  - it finds the outlier items faster (when a verte is far away from the string node)
