[Section 1](#section_1)

## Imports

In [1]:
import pandas as pd
import os
import numpy as np
from pyvis.network import Network
from neo4j import GraphDatabase
import re
from functools import reduce


## Functions 

### Find Each Table's primary key

In [2]:
def pk_finder(name, df):
    for i in range(len(df.columns)):
        col = df.iloc[:,i]
        #if(col.size == col.drop_duplicates().size):
        if(col.nunique() == col.size):
            pk[name] = df.columns[i] 
            break
        

### Find Each Table's foreign key


In [3]:
def fk_finder(name, df):
    for i in range(len(df.columns)):
        col_name = df.columns[i]
        col = df[col_name]
        for j in pk: #loops on the key j = table names
            if j != name:
                primary_key_column = All_dfs[j][(pk[j])]
                
                if primary_key_column.dtype == col.dtype:
                    status = col.isin(primary_key_column)
                    if status[status== False].size == 0:
                        fk[name][col_name] = j 
                        ref_in[j].add(name)
                        break

                elif (isinstance(col[0],list) and type(primary_key_column[0]) == type(col[0][0])):
                    status = col.explode().reset_index(drop=True).isin(primary_key_column)
                    if status[status== False].size == 0:
                        fk[name][col_name] = j 
                        ref_in[j].add(name)
                        break

In [4]:
# type(All_dfs["warehouses"].warehouse_id[0])

### Assign Tables to nodes or edges

In [5]:
def nodes_edges(fk):
    for f in fk:
        if len(fk[f]) == 2:
            inner_dict = list(fk[f])
            edges[f] = fk[f]
        elif len(fk[f]) == 1:
            properties[f] = fk[f]
        else:
            nodes[f] = All_dfs[f]
#         else:
#             if len(fk[f]) == 1:
#                 properties[f] = fk[f]
            
#             nodes[f] = All_dfs[f]
    # for f in fk:
    #     if((f not in edges) ):
    #         nodes[f] = All_dfs[f]
    #         #nodes.append(f)
        

### Create Nodes Table

In [6]:
def add_nodes(nodes, nodesTable):
    for n in range(len(nodes)):
        for index, row in nodes[list(nodes.keys())[n]].iterrows():
            column_names = list(nodes[list(nodes.keys())[n]].columns) #get column names
            att = {}
            for i in range(1, len(column_names)):
                att[column_names[i]] = nodes[list(nodes.keys())[n]].iloc[index, i]
            newRow = [{'Label': list(nodes.keys())[n] , 'ID': nodes[list(nodes.keys())[n]].iloc[index,0] , 'Attributes': att}]
            tmp = pd.DataFrame(newRow)
            nodesTable = pd.concat([nodesTable, tmp], ignore_index=True)
    return nodesTable


### Create Edges Table

In [7]:
def add_edges(edges,edgesTable,pk,fk,ref_in):
    for e in edges:
        df = All_dfs[e] #df of the cur edge
        for r in range(len(df)):
            label = list(edges[e].keys())
            from_id = nodesTable[(nodesTable["Label"] == edges[e][label[0]]) & (df.loc[r,label[0]] == nodesTable["ID"])]
            from_id = from_id.index[0]
            to_id = nodesTable[(nodesTable["Label"] == edges[e][label[1]]) & (df.loc[r,label[1]] == nodesTable["ID"])]
            to_id = to_id.index[0]
            pk_col = pk[e]
#             primary_key = df[pk_col].iloc[r]
            primary_key = df.loc[r,pk_col]
            att = convert_prop(ref_in,e,primary_key,fk)
            newRow = [{'From_Node_ID': from_id, 'To_Node_ID': to_id, 'order/service' : att}]
            tmp = pd.DataFrame(newRow)
            edgesTable = pd.concat([edgesTable, tmp], ignore_index=True)
    return edgesTable

### Properties of the Edge

In [8]:
def convert_prop(ref_in,edge_key, pk_value,fk):
    for referenced_table_name in ref_in[edge_key]:
        referenced_table = All_dfs[referenced_table_name]
        fk_of_ref_table = fk[referenced_table_name]
        for foreign_key in fk_of_ref_table:
            if fk_of_ref_table[foreign_key] == edge_key:
#                 referenced_table[referenced_table[foreign_key]== id]
                all_occurances_df = referenced_table[referenced_table[foreign_key]== pk_value].drop([foreign_key],axis=1)
                return all_occurances_df.to_dict('records')
                

### generate list of n colors

In [9]:
import random
get_colors = lambda n: list(map(lambda i: "#" + "%06x" % random.randint(0, 0xFFFFFF),range(n)))
# get_colors(5) # sample return:  ['#8af5da', '#fbc08c', '#b741d0', '#e599f1', '#bbcb59', '#a2a6c0']

### Draw Pyvis graph

In [10]:
def draw_graph_pyvis(nodes_table,edges_table):

    nodes_name = [(x[1]+'_'+str(x[0])).capitalize() for x in zip(nodes_table.ID,nodes_table.Label)]
    indices = list(map(lambda x: int(x),list(nodes_table.index)))
    nodes_attributes = list(map(lambda x: str(x).replace(',',"\n"),list(nodes_table.Attributes)))
    nodes_tables = list(nodes_df.Label.unique())
    colors = get_colors(len(nodes_name))
    nodes_color = list(map(lambda x: colors[nodes_tables.index(x)],list(nodes_table.Label)))
    
    g = Network(width='100%')
#     g.barnes_hut()
    g.add_nodes(indices,
                title= nodes_attributes,
                label=nodes_name,
                color=nodes_color,
                )
    for e in range(len(edges_table)):
        src = int(edges_table.iloc[e]["From"])
        dst = int(edges_table.iloc[e]["To"])
        g.add_edge(src,dst,weight=5)

    print("Number of Nodes: ",len(g.get_nodes()))
    print("Number of Edges: ",len(g.get_edges()))
    

    g.show('test.html')




### Draw Graph Neo4j

<a id='Neo4jFunction'></a>

[Noe4jCallFunction](#Neo4jCall)

#### prepare satements

In [11]:
def draw_graph_Neo4j(nodes_table,edges_table):

    transaction_execution_commands = []
    # creating a statement for each node (row in node_table)
    for i, node in nodes_table.iterrows():
        label = node["Label"].capitalize()
        index = i
        IDs = node["ID"]
        attributes = node["Attributes"]
        neo4j_create_statement = Create_Statement(label,i,IDs,attributes)
        transaction_execution_commands.append(neo4j_create_statement)

    for i, edge in edges_table.iterrows():
        from_node_id = edge['From']
        to_node_id = edge['To']
        from_node_name = edge['From_Table']
        to_node_name = edge['To_Table']
        neo4j_create_statement = create_relation_statement(from_node_id,to_node_id,from_node_name,to_node_name)
        transaction_execution_commands.append(neo4j_create_statement)

     
    execute_transactions(transaction_execution_commands)
 

#### Connect and execute statements

In [12]:
def execute_transactions(transaction_execution_commands):
    data_base_connection = GraphDatabase.driver(uri = "bolt://localhost:7687", auth=("neo4j", "password"))
    session = data_base_connection.session()    
    for i in transaction_execution_commands:
        session.run(i)

 

#### Create a "Create statement for nodes"

In [13]:
def Create_Statement(label,index,IDs,attributes):
#     att = destructure_dict(attributes)
#     return f"CREATE (x:{label} {'{'}name: {get_name(label,IDs)},index:{index},ID:{IDs} ,{destructure_dict(attributes)} {'}'})"
    att = (", " + destructure_dict(attributes)) if len(attributes) > 0 else ""
    return f"CREATE (x:{label} {'{'}name: {get_name(label,IDs)},index:{index},ID:{IDs} {att} {'}'})"


In [14]:
def get_name(label,ID):
    return '\"' + label.capitalize()+' '+str(ID)+'\"'

#### prepare attributes for create statement

In [15]:
def destructure_dict(dic):
    s = ""
    for key in dic:
        value = dic[key]
        if isinstance (value,str):
            value = "\"" + value +"\""
        s=s + re.sub("[^\w_]", '',str(key).replace(" ","_")) + ":" + str(value)+","
    return s[:-1]

In [16]:
def create_relation_statement(from_id, to_id,from_name,to_name):
        return f"Match (a:{from_name}),(b:{to_name}) WHERE a.index ={from_id} AND b.index = {to_id} CREATE (a) - [r:RELTYPE]->(b)"


In [17]:
def and_agg(series):
       return reduce(lambda x, y: x and y, series)

## Global Variables

In [18]:
# {Table name: df}
All_dfs = {}
# {Table name: Table's Primary key}
pk = {}
# {Table name: {Column Name: Referenced Table}}
fk = {}
# {Table name: set(Tables that references the table)}
ref_in = {}
# {Table name: df}
nodes = {}
# {Table name: {Column Name: Referenced Table}}
edges = {}
# _
properties = {}
# Nodes_df
nodesTable = pd.DataFrame(columns=['Label', 'ID', 'Attributes'])
# Edges_df
edgesTable = pd.DataFrame(columns = ['From_Node_ID', 'To_Node_ID', 'order/service'])
# Properties_df
propertiesTable = pd.DataFrame(columns = [['index','From', 'Label', 'Attributes']])
#Label = ['facilities'/ 'order' / 'service']


# Nodes
# Edges ['From_Node_ID', 'To_Node_ID']
# Properties ['Label', 'ID', 'Attributes']
# (^Nodes in graph)
# Relationship ["Relation_name","from","to"]


nodes_df = pd.DataFrame(columns=['Label', 'ID', 'Attributes'])
edges_df = pd.DataFrame(columns=['From','To','From_Table','To_Table'])


<a id='section_1'></a>
## Reading Data Set


 creating a dictionary where
 key: first word of the table name 
 value: df 

In [19]:
import os
path_of_the_directory = './DataSet/'
ext = ('.csv')
for file in os.listdir(path_of_the_directory):
    if file.endswith(ext):
        print(file) 
        temp = (file.replace("_"," ").replace("."," ").split(" ")[0].lower())
        All_dfs[temp] = pd.read_csv(path_of_the_directory+file)
    else:
        continue

customer_data.csv
ExternalOrders_data.csv
ExternalServices_data.csv
ExternalShipments_data.csv
ExternalTransactions.csv
facilities_data.csv
InternalOrders_data.csv
InternalServices_data.csv
InternalShipments_data.csv
InternalTransactions.csv
Manufacturing_data.csv
Products_data.csv
Retailer_data.csv
Supplier_data.csv
warehouses_data.csv


In [20]:
for table_nm in All_dfs:
    table = All_dfs[table_nm]
    for column_name in table.columns:
        column =  table[column_name]
        if column.dtype=='O' and isinstance(column[0],str):
            if  (isinstance(column[0],str) and column.apply(lambda x: (str(x).startswith('[') and str(x).endswith(']'))
                         or 
                         (str(x).startswith('(') and str(x).endswith(')'))).agg(and_agg)):
                
                s = column.apply(lambda x: x.strip("[](,)").split(','))
                if (s[0][0].isdigit()):
                    s = s.apply(lambda x: list(map(np.int64,x)))
                All_dfs[table_nm][column_name] = s
                


In [21]:
All_dfs["products"].warehouses.explode().isin(All_dfs["warehouses"].warehouse_id ).agg(and_agg)

False

In [22]:
# ware_house_id = All_dfs["warehouses"].warehouse_id 

# warehouse_product_isin_warehouse = All_dfs["products"].warehouses.explode().isin(ware_house_id)
# warehouse_product_not_in_warehouse_values = list(All_dfs["products"].warehouses.explode()[[not elem for elem in warehouse_product_isin_warehouse]].unique())
# print(len(warehouse_product_not_in_warehouse_values))
# warehouse_isin_warehouse_product =  ware_house_id.isin(All_dfs["products"].warehouses.explode())
# print(len( list(ware_house_id[[not elem for elem in warehouse_isin_warehouse_product]].unique())))
# warehouses_to_be_replaced = list(ware_house_id[[not elem for elem in warehouse_isin_warehouse_product]].unique())[:len(warehouse_product_not_in_warehouse_values)]

# All_dfs["warehouses"].warehouse_id  =  ware_house_id.replace(warehouses_to_be_replaced,warehouse_product_not_in_warehouse_values)

### Adding Missing Edges


In [23]:
initial_internalshipments_df = All_dfs["internalshipments"]
initial_internalshipments_df

Unnamed: 0,IntShip_id,listSuppIds,factoryIds,from_to_where
0,7281,90942,52160,SS
1,5897,36243,67361,SR
2,3477,87916,30692,SS
3,8796,90236,26533,SS
4,2360,89467,46954,SR
...,...,...,...,...
95,5797,11283,24646,SS
96,6673,19072,31857,SR
97,3632,29277,97142,SS
98,3089,72315,90349,SR


In [24]:
c = 9995
from_shipment = All_dfs["internalshipments"].listSuppIds
to_shipment = All_dfs["internalshipments"].factoryIds

for r in range(len(All_dfs["manufacturing"])):
    diff_supp = All_dfs["manufacturing"].loc[r,"Different_suppliers"]
    factory = All_dfs["manufacturing"].loc[r,"Factory_id"]
    for s in range(len(diff_supp)):
        
        supp = diff_supp[s]
        if supp == factory:
            print(factory)
        if len(All_dfs["internalshipments"].query(f"listSuppIds == {supp} and factoryIds == {factory}")) ==0:
            new_row = [{"IntShip_id":c, "listSuppIds":supp, "factoryIds": factory, "from_to_where": "SS"}] 
            df = pd.DataFrame(new_row)
            c = c+1
            All_dfs["internalshipments"] = pd.concat([All_dfs["internalshipments"], df], ignore_index=True)

#         if (supp not in from_shipmentfrom_shipment)  :
#                 if factory in All_dfs["supplier"].supp_id:
#                     i = "SS"

#         new_row = [{"IntShip_id":c, "listSuppIds":diff_supp[s], "factoryIds": factory, "from_to_where": i}] 
#         df = pd.DataFrame(new_row)
#         c = c+1
#         All_dfs["internalshipments"] = pd.concat([All_dfs["internalshipments"], df], ignore_index=True)
      

### Splitting Shipments tables

In [25]:
SRIntShip = All_dfs["internalshipments"].query('from_to_where == "SR"')
SRIntShip = SRIntShip.drop(['from_to_where'], axis=1).reset_index(drop = True)
#SRIntShip
RCExtShip = All_dfs["externalshipments"].query('from_to_where == "RC"')
RCExtShip = RCExtShip.drop(['from_to_where'], axis=1).reset_index(drop = True)
#RCExtShip
SSIntShip = All_dfs["internalshipments"].query('from_to_where == "SS"')
SSIntShip = SSIntShip.drop(['from_to_where'], axis=1).reset_index(drop = True)
#SSIntShip
SCExtShip = All_dfs["externalshipments"].query('from_to_where == "SC"')
SCExtShip = SCExtShip.drop(['from_to_where'], axis=1).reset_index(drop = True)
#SCExtShip
All_dfs["RCExtShip".lower()] = RCExtShip
All_dfs["SCExtShip".lower()] = SCExtShip
All_dfs["SRIntShip".lower()] = SRIntShip
All_dfs["SSIntShip".lower()] = SSIntShip

### Splitting Orders Tables

In [26]:
filter_list = All_dfs["rcextship"]["ExtShip_id"]
All_dfs["rcextorders"] = All_dfs["externalorders"][All_dfs["externalorders"].ExtShip_id.isin(filter_list)].reset_index(drop=True)
filter_list = All_dfs["scextship"]["ExtShip_id"]
All_dfs["scextorders"] = All_dfs["externalorders"][All_dfs["externalorders"].ExtShip_id.isin(filter_list)].reset_index(drop=True)
#All_dfs["scextorders"] = All_dfs["externalorders"].query("ExtShip_id.isin(@filter_list)").reset_index(drop=True)
filter_list = All_dfs["srintship"]["IntShip_id"]
All_dfs["srintorders"] = All_dfs["internalorders"][All_dfs["internalorders"].IntShip_id.isin(filter_list)].reset_index(drop=True)
#All_dfs["srintorders"] = All_dfs["internalorders"].query("IntShip_id.isin(@filter_list)").reset_index(drop=True)
filter_list = All_dfs["ssintship"]["IntShip_id"]
All_dfs["ssintorders"] = All_dfs["internalorders"][All_dfs["internalorders"].IntShip_id.isin(filter_list)].reset_index(drop=True)
#All_dfs["ssintorders"] = All_dfs["internalorders"].query("IntShip_id.isin(@filter_list)").reset_index(drop=True)

### Removing Redundant Tables

In [27]:
All_dfs.pop("internalshipments")
All_dfs.pop("externalshipments")
All_dfs.pop("externalorders")
All_dfs.pop("internalorders")
All_dfs.keys()

dict_keys(['customer', 'externalservices', 'externaltransactions', 'facilities', 'internalservices', 'internaltransactions', 'manufacturing', 'products', 'retailer', 'supplier', 'warehouses', 'rcextship', 'scextship', 'srintship', 'ssintship', 'rcextorders', 'scextorders', 'srintorders', 'ssintorders'])

In [28]:
# for table in All_dfs:
#     print(table)
#     print(All_dfs[table].isna().sum())
#     print("_________________________")

### Removing nan

In [29]:
for table in All_dfs:
    All_dfs[table].fillna('Unknown',inplace = True)


### Execlude Tables

In [30]:
# All_dfs = {key: value for key, value in All_dfs.items() 
#            if key not in ["products","warehouses","manufacturing"]}
# All_dfs.keys()



### Initialize fk and ref_in

In [31]:
table_name = list(All_dfs.keys())
for table_name in All_dfs:
    fk[table_name] = {}
    ref_in[table_name] = set()

### Get Primary key for each table

In [32]:
for t in All_dfs:
    pk_finder(t,All_dfs[t])
pk

{'customer': 'cust_id',
 'externalservices': 'ExtServ_id',
 'externaltransactions': 'ExtTran_id',
 'facilities': 'fac_id',
 'internalservices': 'IntServ_id',
 'internaltransactions': 'IntTran_id',
 'manufacturing': 'Manf_id',
 'products': 'prod_id',
 'retailer': 'retailer_id',
 'supplier': 'supp_id',
 'warehouses': 'capacity (NA)',
 'rcextship': 'ExtShip_id',
 'scextship': 'ExtShip_id',
 'srintship': 'IntShip_id',
 'ssintship': 'IntShip_id',
 'rcextorders': 'ExtOrders_id',
 'scextorders': 'ExtOrders_id',
 'srintorders': 'IntOrders_id',
 'ssintorders': 'IntOrders_id'}

In [33]:
ware_house_id = All_dfs["warehouses"].warehouse_id 

warehouse_product_isin_warehouse = All_dfs["products"].warehouses.explode().isin(ware_house_id)
warehouse_product_not_in_warehouse_values = list(All_dfs["products"].warehouses.explode()[[not elem for elem in warehouse_product_isin_warehouse]].unique())
warehouse_product_not_in_warehouse_values

[5886,
 3387,
 3044,
 2969,
 5071,
 3871,
 5330,
 9888,
 3031,
 1837,
 6960,
 1169,
 2930,
 5383,
 7400,
 3118,
 3760,
 4963,
 2194,
 5966,
 9084,
 9168,
 1080,
 7070,
 2764,
 7064,
 6085,
 4786,
 7146,
 5424,
 8527,
 5359,
 3108,
 2382,
 4703,
 8563,
 3365,
 2161,
 1817,
 3412,
 9988,
 6124,
 7794,
 7309,
 7571,
 9936,
 4083]

In [59]:
warehouse_isin_warehouse_product =  ware_house_id.isin(All_dfs["products"].warehouses.explode())
warehouses_to_be_replaced = list(ware_house_id[[not elem for elem in warehouse_isin_warehouse_product]].unique())[:len(warehouse_product_not_in_warehouse_values)]
warehouses_to_be_replaced

[8828,
 8677,
 6020,
 5965,
 8750,
 5024,
 2428,
 9659,
 5659,
 1606,
 4638,
 9319,
 9784,
 8480,
 1028,
 8060,
 3140,
 4563,
 2951,
 7224,
 9589,
 8568,
 8596,
 4018,
 4303,
 5349,
 4930,
 1666,
 4276,
 9455,
 4292,
 7189,
 4512,
 9277,
 3156,
 1234,
 8923,
 3437,
 9375,
 4655,
 5111,
 8387,
 4825,
 7899,
 4273,
 1470,
 6750]

### Get Foreing key for each table

In [56]:
x = All_dfs["warehouses"].warehouse_id.value_counts()
len(x[x>1])

49

In [34]:
for t in All_dfs:
    fk_finder(t,All_dfs[t])
fk

{'customer': {},
 'externalservices': {'ExtTrans_id': 'externaltransactions'},
 'externaltransactions': {'CompFrom': 'supplier', 'Custto': 'customer'},
 'facilities': {'supplier_id': 'supplier'},
 'internalservices': {'IntTrans_id': 'internaltransactions'},
 'internaltransactions': {'CompFrom': 'supplier', 'Compto': 'supplier'},
 'manufacturing': {'Different_suppliers': 'supplier',
  'Product_id': 'products',
  'Factory_id': 'supplier'},
 'products': {},
 'retailer': {},
 'supplier': {},
 'warehouses': {},
 'rcextship': {'factoryIds/retailerIds': 'retailer', 'idsTo': 'customer'},
 'scextship': {'factoryIds/retailerIds': 'supplier', 'idsTo': 'customer'},
 'srintship': {'listSuppIds': 'supplier', 'factoryIds': 'retailer'},
 'ssintship': {'listSuppIds': 'supplier', 'factoryIds': 'supplier'},
 'rcextorders': {'ExtShip_id': 'rcextship'},
 'scextorders': {'ExtShip_id': 'scextship'},
 'srintorders': {'IntShip_id': 'srintship'},
 'ssintorders': {'IntShip_id': 'ssintship'}}

In [34]:
asdadf

NameError: name 'asdadf' is not defined

In [None]:
ref_in

### Determine nodes and edges

In [None]:
nodes_edges(fk)

In [None]:
nodes.keys()

In [None]:
edges

In [None]:
properties

### Create NodesTable

In [None]:
nodesTable = add_nodes(nodes,nodesTable)
nodesTable

### Create Edges Table

In [None]:
edgesTable = add_edges(edges, edgesTable,pk,fk,ref_in)
edgesTable

In [None]:
# nodes_df = pd.DataFrame(columns=['Label', 'ID', 'Attributes'])
# edges_df = pd.DataFrame(columns=['From','To'])
all_nodes = list(nodes.keys())
for n in range(len(nodes)):
      for index, row in nodes[all_nodes[n]].iterrows():
          column_names = list(nodes[all_nodes[n]].columns) #get column names
          att = {}
          for i in range(1, len(column_names)):
              att[column_names[i]] = nodes[all_nodes[n]].iloc[index, i]
          newRow = [{'Label': all_nodes[n] , 'ID': nodes[all_nodes[n]].iloc[index,0] , 'Attributes': att}]
          tmp = pd.DataFrame(newRow)
          nodes_df = pd.concat([nodes_df, tmp], ignore_index=True)

In [None]:
all_edges = list(edges.keys())
for n in range(len(edges)):
      Edge_name = all_edges[n]

      foreign_keys = list(fk[Edge_name].keys())

      from_col = foreign_keys[0]
      from_table_name = fk[Edge_name][from_col]
      from_df = All_dfs[from_table_name]
      from_df_pk = pk[from_table_name]

      to_col = foreign_keys[-1]
      to_table_name = fk[Edge_name][to_col]
      to_df = All_dfs[to_table_name]
      to_df_pk = pk[to_table_name]

      column_names = list(All_dfs[Edge_name].columns) #get column names
      for index, _ in All_dfs[Edge_name].iterrows():
          att = {}
          from_ref_id,to_ref_id = None, None


          for i in range(1, len(column_names)):
            column_name = column_names[i]


            if column_name not in foreign_keys:
              att[column_name] = All_dfs[Edge_name].iloc[index, i]

            else:
              reference_id = All_dfs[Edge_name].iloc[index, i]
              if column_name == from_col:
                # from_ref_id = from_df[from_df[from_df_pk] == reference_id].index[0]
                from_ref_id = reference_id
              else:
                # to_ref_id = to_df[to_df[to_df_pk] == reference_id].index[0]
                to_ref_id = reference_id

          # Adding new entry to node tabel
          newRow = [{'Label': Edge_name , 'ID': All_dfs[Edge_name].iloc[index,0] , 'Attributes': att}]
          tmp = pd.DataFrame(newRow)
          nodes_df = pd.concat([nodes_df, tmp], ignore_index=True)
          edge_node_index = len(nodes_df)-1
          # print(nodes_df.iloc[len(nodes_df)-1],All_dfs[Edge_name].iloc[index,0] )

          # creating two edges, one from the from_node to the edge node and one from edge node to to_node
          from_node_id = nodes_df[(nodes_df['Label']==from_table_name) &  (nodes_df['ID']==from_ref_id)].index[0]
          to_node_id =  nodes_df[(nodes_df['Label']==to_table_name) &  (nodes_df['ID']==to_ref_id)].index[0] 

          # from ---> edge 
          new_from_edge_row = [{'From': from_node_id , 'To':edge_node_index,'From_Table': from_table_name.capitalize(),'To_Table': Edge_name.capitalize() }]
          tmp = pd.DataFrame(new_from_edge_row)
          edges_df = pd.concat([edges_df, tmp], ignore_index=True)
          # edge --->to
          new_to_edge_row = [{'From': edge_node_index , 'To':  to_node_id, 'From_Table':Edge_name.capitalize(), 'To_Table': to_table_name.capitalize()}]
          tmp = pd.DataFrame(new_to_edge_row)
          edges_df = pd.concat([edges_df, tmp], ignore_index=True)



In [None]:
all_properties = list(properties.keys())
for property_name in all_properties:
  property_df = All_dfs[property_name]
      
  foreign_keys = list(fk[property_name].keys())

  fk_col = foreign_keys[0]
  referenced_table_name = fk[property_name][fk_col]
  referenced_table_df = All_dfs[referenced_table_name]
  referenced_table_pk = pk[referenced_table_name]
  
  column_names = list(property_df.columns) #get column names
  
  for index, _ in property_df.iterrows():    
      att = {}
      reference_id = None;      
      
      for i in range(1, len(column_names)):
        column_name = column_names[i]
        
        
        if column_name not in foreign_keys:
          att[column_name] = property_df.iloc[index, i]
      
        else:
          # capturing foreign key value
          reference_id = property_df.iloc[index, i]

      # Adding new entry to node tabel
      newRow = [{'Label': property_name , 'ID': property_df.iloc[index,0] , 'Attributes': att}]
      tmp = pd.DataFrame(newRow)
      nodes_df = pd.concat([nodes_df, tmp], ignore_index=True)
      property_node_index = len(nodes_df)-1
      # print(nodes_df.iloc[len(nodes_df)-1],property_df.iloc[index,0] )

      # creating two edges, one from the from_node to the edge node and one from edge node to to_node
      referenced_node_id = nodes_df[(nodes_df['Label']==referenced_table_name) &  (nodes_df['ID']==reference_id)].index[0] 
      
      new_property_edge_row = [{'From': referenced_node_id , 'To':  property_node_index,'From_Table':referenced_table_name.capitalize(), 'To_Table': property_name.capitalize()}]
      tmp = pd.DataFrame(new_property_edge_row)
      edges_df = pd.concat([edges_df, tmp], ignore_index=True)



In [None]:
nodes_df

In [None]:
edges_df

In [None]:
draw_graph_pyvis(nodes_df,edges_df)

[Neo 4j Function](#Neo4jFunction)


<a id="Neo4jCall"></a>

In [None]:
# draw_graph_Neo4j(nodes_df,edges_df)

In [None]:
sum_nodes = 0
sum_edges = 0
for key in All_dfs:
    table_size = len(All_dfs[key])
    print(key, table_size)
    sum_nodes += table_size
    if(key in all_edges):
        sum_edges += table_size*2
    if(key in all_properties ):
        sum_edges += table_size
print("Number Of Nodes: ",sum_nodes)
print("Number Of Edges: ",sum_edges)