## TASK:
<b>Find Strongly Connected Components (SCC) in directed graph.</b><br>
The answer in array of 5 largest components' sizes<br>
<br>
Input file format: each line in the file is a directed edge.

In [272]:
import time

In [274]:
def read_graph_from_file(filename): #read edges from file into 2 arrays (direct and transposed)
    with open(f'{filename}.txt', 'r') as f:
        E = []
        for line in f:
            l = line.split()
            l_1 = [int(item) for item in l]
            E.append(l_1)
    return E

def edges_to_dict(e, V):
    edict = {}
    e_sorted = sorted(e, key=lambda x: x[0])
    for item in e_sorted:
        if item[0] not in edict:
            edict[item[0]] = [item[1]]
        else:
            edict[item[0]].append(item[1])
    for v in V:
        if v not in edict:
            edict[v] = None
    return edict

In [296]:
class Graph_i: #graph operations done iteratively
    def __init__(self, V, E, adjacency_list):
        self.V = V
        self.E = E
        self.adj = adjacency_list
        self.fdict = {}
        
    
    def get_transposed(self):
        E_t = [[edge[1],edge[0]] for edge in self.E]
        adj_t = edges_to_dict(E_t, V)
        g_t = Graph(self.V, E_t, adj_t)
        return g_t

        
    def DFS(self, s, visited, t):
        visited.add(s)
        S = [s]
        while S != []:
            v = S[-1]
            if self.adj[v] != None:
                if not set(self.adj[v]).issubset(visited):
                    i = 0
                    u = self.adj[v][i]
                    while u in visited:
                        u = self.adj[v][i+1]
                        i+= 1
                    visited.add(u)
                    S.append(u)
                else:
                    self.fdict[t[0]] = v
                    visited.add(v)
                    t.pop(0)
                    S.pop()
            else:
                self.fdict[t[0]] = v
                visited.add(v)
                t.pop(0)
                S.pop()
                  
    def DFS_t(self, s, visited, component):
        visited.add(s)
        S = [s]
        while S != []:
            v = S[-1]
            if self.adj[v] != None:
                if not set(self.adj[v]).issubset(visited):
                    i = 0
                    u = self.adj[v][i]
                    while u in visited:
                        u = self.adj[v][i+1]
                        i+= 1
                    visited.add(u)
                    S.append(u)
                else:
                    component.append(v)
                    visited.add(v)
                    S.pop()
            else:
                component.append(v)
                visited.add(v)
                S.pop()
        
    def GFSLoop(self):
        t = V[:]
        visited = set()
        for u in self.V:
            if u not in visited:
                self.DFS(u, visited, t)
    
    def GFSLoop_t(self):
        visited = set()
        components = []
        for i in reversed(self.fdict.keys()):
            u = self.fdict[i]
            if u not in visited:
                component = []
                self.DFS_t(u, visited, component)
                components.append(component)
#         print("components=",components)
        lens = [len(com) for com in components]
        print("lens=", sorted(lens, reverse=True)[:5])
    
        
    def SCC(self):
        self.GFSLoop()
        print("1 done")
        G_t = self.get_transposed()
        print("2 done")
        G_t.fdict = self.fdict
        G_t.GFSLoop_t()

In [294]:
n = 875714
# n = 20
V = [i for i in range(1,n+1)]
# E = read_graph_from_file("test_08_3")
E = read_graph_from_file("task")
adj = edges_to_dict(E, V) #Adjacency list 

In [295]:
G = Graph_i(V, E, adj)

start = time.time()
G.SCC()
end = time.time()
print("time=", end - start)

1 done
2 done
lens= [434821, 968, 459, 313, 211, 205]
time= 332.34663796424866


In [280]:
class Graph_r: #graph operations done recursively
    def __init__(self, V, E, adjacency_list):
        self.V = V
        self.E = E
        self.adj = adjacency_list
        self.fdict = {}
        
    
    def get_transposed(self):
        E_t = [[edge[1],edge[0]] for edge in self.E]
        adj_t = edges_to_dict(E_t, V)
        g_t = Graph(self.V, E_t, adj_t)
        return g_t
    
    def DFSR(self, s, visited, t):
        visited.add(s)
        if self.adj[s] != None:
            for u in self.adj[s]:
                if u not in visited:
                    self.DFSR(u, visited, t)
        self.fdict[t[0]] = s
        t.pop(0)
            
    def DFSR_t(self, s, visited, component):
        visited.add(s)
        component.append(s)
        if self.adj[s] != None:
            for u in self.adj[s]:
                if u not in visited:
                    self.DFSR_t(u, visited, component)
    
    def GFSLoopR(self):
        t = V[:]
        visited = set()
        self.fdict = {}
        for u in self.V:
            if u not in visited:
                self.DFSR(u, visited, t)
        
    def GFSLoopR_t(self):
        visited = set()
        components = []
        for i in reversed(self.fdict.keys()):
            u = self.fdict[i]
            if u not in visited:
                component = []
                self.DFSR_t(u, visited, component)
                components.append(component)
#         print("components=",components)
        lens = [len(com) for com in components]
        print("lens=", sorted(lens, reverse=True)[:6])
        
    def SCC(self):
        self.GFSLoopR()
        print("1 done")
        G_t = self.get_transposed()
        print("2 done")
        G_t.fdict = self.fdict
        G_t.GFSLoopR_t()

In [286]:
# n = 875714
n = 20
V = [i for i in range(1,n+1)]
E = read_graph_from_file("test_08_3")
adj = edges_to_dict(E, V) #Adjacency list 

In [293]:
G = Graph_r(V, E, adj)

start = time.time()
G.SCC()
end = time.time()
print("time=", end - start)

1 done
2 done
lens= [9, 4, 3, 2, 2]
time= 0.0009999275207519531
