In [2]:
import os
dirs = os.listdir('./Sample Datasets')
dirs

['shareholder sample data.xlsx',
 'primary ssic sample data.xlsx',
 'appointment sample data.xlsx']

In [68]:
import pandas as pd
from collections import defaultdict 

data = pd.read_excel("./Sample Datasets/shareholder sample data.xlsx") 
df = pd.DataFrame(data, columns= ['Entity UEN','Shareholder UEN'])
filter_dic = {"SSIC": "./Sample Datasets/primary ssic sample data.xlsx",
             "Shareholder" : "./Sample Datasets/shareholder sample data.xlsx",
             "Appointment" : "./Sample Datasets/appointment sample data.xlsx"}

# This class represents a directed graph 
# using adjacency list representation 
class Graph: 

    # Constructor 
    def __init__(self): 

        # default dictionary to store graph 
        self.graph = defaultdict(list) 
        self.immediateNeigh = defaultdict(set)

    # function to add an edge to graph 
    def addEdge(self,u,v): 
        self.graph[u].append(v) 
        self.graph[v].append(u) 
        self.immediateNeigh[u].add(v)
        self.immediateNeigh[v].add(u)

    # Function to print a BFS of graph 
    def BFS(self, s): 

        # Mark all the vertices as not visited 
        visited = [False] * (len(self.graph)) 

        # Create a queue for BFS 
        queue = [] 

        # Mark the source node as 
        # visited and enqueue it 
        queue.append(s) 
        visited[s] = True

        while queue: 
            # Dequeue a vertex from 
            # queue and print it 
            s = queue.pop(0) 
            print (s, end = " ") 

            # Get all adjacent vertices of the 
            # dequeued vertex s. If a adjacent 
            # has not been visited, then mark it 
            # visited and enqueue it 
            for i in self.graph[s]: 
                if visited[i] == False: 
                    queue.append(i) 
                    visited[i] = True
                else:
                    self.immediateNeigh[i].add(s)
                    self.immediateNeigh[s].add(i)
                    
    def depth(self, u, d): 
        #to find neighbours in depth of s and less
        s = set()  
        if (d == 1):
            for i in self.immediateNeigh[u]:
                s.add(i)
        else :
            for y in self.immediateNeigh[u]:
                s.add(y)
                for i in self.depth(y, d-1):
                    s.add(i)
        
        return s
    
    def filter(self, g, *filters):
        count = 0
        
        #merging process to those field that are wanted
        if filters:
            for key in filters:
                if count == 0:
                    result = pd.read_excel(filter_dic[key])
                    count += 1
                else:
                    result = pd.merge(result,  pd.read_excel(filter_dic[key]), on = "Entity UEN", how = "outer")
                    if key == "SSIC": ##ONLY KEEP THOSE SSIC required
                        result = result[result.SSIS == 1001.0]
        else:
            return g
        
        ##to create the new edges. Add only if there is a value that isnt null
        print(result)
        
        for i in range(len(result)):
            for col in result.columns:
                if col != "Entity UEN" and result.loc[i,col] != "NaN" :
                    g.addEdge(result.loc[i,"Entity UEN"], result.loc[i,col])
        
              
##MAIN                
# Create a graph given in 
# the above diagram 
g = Graph() 
g.filter(g) ##key in the attributes to filter() to make the new graph


    

print ("Following is Breadth First Traversal"
        " (starting from vertex 2)") 
#g.BFS(2) 
print("\n")

# for i in g.depth(1,3):
#     print (i, end = " ") 
    
for i in g.depth('en5', 1):
    print (i, end = " ") 

Following is Breadth First Traversal (starting from vertex 2)


