In [10]:
class Graph:
    """
        Set up a directed Graph class to represents all vertice and their heads in a form of dictionary:
        For example:
        >>> {1: [4], 2: [8], 3: [6], 4: [7], 5: [2], 6: [9], 7: [1], 8: [5, 6], 9: [7, 3]}
        
        nodes: dictionary that has vertex as key and list of its heads as value; head is a vertex that a given vertex is
                pointing at
        size: count of vertices
        check: set of vertices that explored
        stack: stack (LIFO) of vertices that was explored
        finishedTime: dictionary that has finishing time as key and corresponding vertex as value. This is the output of 
                     first pass of DFS algorithm.
        time: variable to be used to count the finishing time of each vertex, for the "magic ordering" of vertices
        scc: dictionary that has leader vertex as key and the count of Strongly Connected Components as value
        scc_count: variable to be used to count the SCC of each leader vertex
    """
    def __init__(self, nodes, size):
        self.nodes = nodes
        self.size = size
        self.checked = set()           # set is implemented as a hash table. so lookup/add in O(1) average
        self.stack = []  
        self.finishedTime = {}  
        self.time = 0
        self.scc = []  
        self.scc_count = 0
        
        
    def __str__(self):
        return str(self.nodes)
    
    
    ####################################################
    # Below are DFS and find SCC codes using stack     #
    ####################################################
    
    def DFS(self, start, pass_count=1):
        """
            Use stack to do Depth First Search
            start: the vertex to start DFS
            pass_count: either 1 or not 1, indicating it's the 1st pass or 2nd pass of DFS. 1st pass is to output the
                        finishedTime class variable; 2nd pass is to count SCC.
        """
        
        # mark start as explored
        self.checked.add(start)
        
        # initiate stack with start
        self.stack.append(start)
        
        vertex = start
        
        # variable i to be used to loop through heads to find unexplored heads
        i = 0
        
        # while stack not empty
        while len(self.stack) > 0:
            
            # if the vertex has a head that it's pointing to:
            #       if the head is not checked: add it to stack and checked, and then it becomes the new vertex: just like  
            #                                    calling recursion on the head
            #       if the head is checked: go to next head
            if self.nodes.get(vertex):
                while self.nodes.get(vertex) and i < len(self.nodes[vertex]):
                    head = self.nodes[vertex][i]
                    
                    if head not in self.checked:
                        self.checked.add(head)
                        self.stack.append(head)
                        vertex = head
                        i = 0                  # since the head now becomes vertex, reset i to 0

                    else:
                        i+=1          # add 1 to i in order to move to next head of the vertex
                
                # once all heads are explored, meaning one group of SCCs is found, use helper function to pop a vertex 
                # from stack, reset i to 0, and new vertex becomes the last item in stack
                self.helper(vertex, pass_count)
                i = 0
                if len(self.stack) > 0:
                    vertex = self.stack[-1]
            
            # if the vertex doesn't have a head, meaning it can't form SCC with other vertices, call helper function
            else:
                self.helper(vertex, pass_count)
                        
                        
    def helper(self, vertex, pass_count=1):
        """
            helper function to do specific action depending on pass_count, and then remove the vertex from stack
            pass_count: either 1 or not 1, indicating it's the 1st pass or 2nd pass of DFS. 1st pass is to increase time tracker
                        by 1 and output the finishedTime and its vertex to finishedTime class variable(dictionary); 
                        2nd pass is to increase scc_count by 1 to track SCC counts.
        """
        if pass_count == 1:
            self.time +=1
            self.finishedTime[self.time] = vertex
            
        else:
            self.scc_count +=1
            
        self.stack.pop()
        
    
    def DFS_loop(self):
        """
            Loop through the Graph using Depth First Search algorithm to explore the whole Grape. This is used for 
            1st pass on Graph in reversed order to get finishedTime; start from the largest vertex, going descending.
        """
        for i in range (self.size, 0, -1):
            if i not in self.checked:
                self.DFS(i)
    
    
    def find_scc(self):
        """
            Loop through the Graph using Depth First Search algorithm to explore the whole Graph. This is used for 
            2nd pass on Graph in original order to get count of SCC; start from the vertex with largest finishing time, 
            going descending.
        """
        for i in range (self.size, 0, -1):
            vertex = self.finishedTime[i]
            
            # explore the vertex if it's not checked yet
            if vertex not in self.checked:
                self.DFS(vertex, 2)
                
                # optimization to add SSC count to self.scc: always keep 5 largest SCC count in self.scc list
                # by constantly sort the list and compare the new SCC count with the loweset one from the list
                if len(self.scc) <= 4:
                    self.scc.append(self.scc_count)
                    self.scc.sort(reverse=True)
                else:
                    if self.scc_count > self.scc[-1]:
                        self.scc[-1] = self.scc_count
                        self.scc.sort(reverse=True)

                self.scc_count = 0            # reset scc_count after one group of SCC is added to self.scc
                
                 
    
def load(lis, output, order=0):
    """
        To use for loading data to nodes (represent as a dictionary), in order to load to Graph class
        lis: input list
        output: dictionary name for output, 
        order: has to be either 0 or 1. 0 means for original order Graph; 1 means for reversed order Graph
    """
    if order != 0 and order != 1:
        print("Error in second argument you inputed!!")
    else:
        if lis[order] not in output.keys():
            output[lis[order]] = [lis[1-order]]
        else:
            output[lis[order]].append(lis[1-order])
            
if __name__ == "__main__":
    
    ####################################################
    # Test case                                        #
    ####################################################
    
    # load the test file to nodes dictionary in original order and reversed order, and find the maximum vertex number
    with open("test.txt") as file:
        nodes = {}
        nodes2 = {}
        maximum = 0
        for lines in file:
            vertex = [int(i) for i in lines.strip().split(" ")]
            if max(vertex) > maximum:
                maximum = max(vertex)
            load(vertex, nodes)
            load(vertex, nodes2, 1)
    
    # initiate Graph in original orders and reversed orders
    graph = Graph(nodes, maximum)
    print("Original graph is: \n", graph)
    print("   Its checked status is: \n", graph.checked)
    
    graph2 = Graph(nodes2, maximum)
    print("\n")
    print("Reversed graph is: \n", graph2)
    print("   Its checked status is: \n", graph2.checked)
           
    # run DFS loop on reversed graph to compute the finishing of each vertex, and make a copy to original graph,
    # so that when running DFS loop on original graph, it processes processing nodes in decreasing order of finishing times
    graph2.DFS_loop()
    graph.finishedTime = dict(graph2.finishedTime)
    print("\n")
    print("After 1st pass, finishing time and its corresponding vertex is: \n", graph.finishedTime)
    print("    The checked status of reversed graph is: \n", graph2.checked)
    
    # find the count of SCC
    graph.find_scc()
    print("\n")
    print("After 2nd pass, list of SCC count in descending order is: \n", graph.scc)
    print("    The checked status of original graph is: \n", graph.checked)


Original graph is: 
 {1: [2], 2: [3, 4, 5], 3: [6], 4: [5, 7], 5: [2, 6, 7], 6: [3, 8], 7: [8, 10], 8: [7], 9: [7], 10: [9, 11], 11: [12], 12: [10]}
   Its checked status is: 
 set()


Reversed graph is: 
 {2: [1, 5], 3: [2, 6], 4: [2], 5: [2, 4], 6: [3, 5], 7: [4, 5, 8, 9], 8: [6, 7], 10: [7, 12], 9: [10], 11: [10], 12: [11]}
   Its checked status is: 
 set()


After 1st pass, finishing time and its corresponding vertex is: 
 {1: 1, 2: 5, 3: 2, 4: 4, 5: 3, 6: 6, 7: 8, 8: 9, 9: 7, 10: 10, 11: 11, 12: 12}
    The checked status of reversed graph is: 
 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}


After 2nd pass, list of SCC count in descending order is: 
 [6, 3, 2, 1]
    The checked status of original graph is: 
 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}


In [11]:
"""
    Programming Assignment #1
    https://www.coursera.org/learn/algorithms-graphs-data-structures/exam/rOtFq/programming-assignment-1/attempt
    
    The file contains the edges of a directed graph. Vertices are labeled as positive integers from 1 to 875714. 
    Every row indicates an edge, the vertex label in first column is the tail and the vertex label in second column 
    is the head (recall the graph is directed, and the edges are directed from the first column vertex to the second 
    column vertex). So for example, the 11th row looks liks : "2 47646". This just means that the vertex with label 2 
    has an outgoing edge to the vertex with label 47646

    Your task is to code up the algorithm from the video lectures for computing strongly connected components (SCCs), 
    and to run this algorithm on the given graph.

    Output Format: You should output the sizes of the 5 largest SCCs in the given graph, in decreasing order of sizes, 
    separated by commas (avoid any spaces). 
"""


import time
    
start = time.time()

with open("SCC.txt") as file:
    nodes = {}
    nodes2 = {}
    for lines in file:
        vertex = [int(i) for i in lines.strip().split(" ")]
        load(vertex, nodes)
        load(vertex, nodes2, 1)

    #################################################################
    # Below is Kosaraju’s Two-Pass Algorithm using DFS with stack   #
    #################################################################        
        
# 1. initiate Graph in original orders and reversed orders
graph = Graph(nodes, 875714)
graph2 = Graph(nodes2, 875714)

end = time.time()
print(f"The run time to load data to Graph is {end - start} second(s)")


The run time to load data to Graph is 11.822657108306885 second(s)


In [12]:
start = time.time()

# 2. run DFS loop on reversed graph to compute the finishing of each vertex, and make a copy to original graph,
# so that when running DFS loop on original graph, it processes processing nodes in decreasing order of finishing times
graph2.DFS_loop()
graph.finishedTime = graph2.finishedTime

end = time.time()
print(f"The run time to do the 1st pass and copy result to original graph is {end - start} second(s)")

The run time to do the 1st pass and copy result to original graph is 26.995785236358643 second(s)


In [13]:
start = time.time()

# 3. run DFS loop on original graph to find the count of SCC and print the size of SCCs in descending orders
graph.find_scc()
print(graph.scc)

end = time.time()
print(f"The run time to do the 2nd pass to find SCC is {end - start} second(s)")

[434821, 968, 459, 313, 211]
The run time to do the 2nd pass to find SCC is 5.055362939834595 second(s)
