In [1]:
import pandas as pd
import pandas.io.sql as sqlio
import psycopg2
import sqlalchemy
import os

from dotenv import load_dotenv
load_dotenv()

import matplotlib as plt
%matplotlib inline

In [2]:
conn = psycopg2.connect(
    user=os.getenv("POSTGRES_USER"), 
    password=os.getenv("POSTGRES_PASSWORD"), 
    host="127.0.0.1", 
    port="54320", 
    database="dcat")

In [3]:
cur = conn.cursor()

In [4]:
print ( conn.get_dsn_parameters(),"\n")

{'user': 'dcat', 'dbname': 'dcat', 'host': '127.0.0.1', 'port': '54320', 'tty': '', 'options': '', 'sslmode': 'prefer', 'sslcompression': '0', 'krbsrvname': 'postgres', 'target_session_attrs': 'any'} 



In [5]:
cur.execute("SELECT version();")
record = cur.fetchone()
print("You are connected to - ", record,"\n")

You are connected to -  ('PostgreSQL 12.0 (Debian 12.0-1.pgdg100+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit',) 



In [None]:
cur.execute("drop index if exists idx_tbl_er_c1")
cur.execute("drop index if exists idx_tbl_er_c2")

# Node details table

In [6]:
cur.execute("drop table if exists tbl_detail")
cur.execute("""
create table tbl_detail 
(  
  id int primary key,    -- ID  
  info jsonb,            -- description  
  crt_time timestamp     -- Creation time  
);  
""")

In [7]:
sql = "select * from tbl_detail"
df = sqlio.read_sql_query(sql, conn)
df.head()

Unnamed: 0,id,info,crt_time


# Relationship description/attributes

In [8]:
cur.execute("drop table if exists tbl_er_desc")
cur.execute("""
create table tbl_er_desc  -- Relationship description  
(  
  id int2 primary key,    -- Relationship ID  
  info text  -- Description  
);
""")

In [None]:
cur.execute("insert into tbl_er_desc values (1,"");"))

In [9]:
sql = "select * from tbl_er_desc"
df = sqlio.read_sql_query(sql, conn)
df.head()

Unnamed: 0,id,info


# Relationships/Edges 

In [10]:
cur.execute("drop table if exists tbl_er")
cur.execute("""
create table tbl_er     -- What is relationship between ID1 to ID2      
(  
  c1 int references tbl_detail(id),    
  c2 int references tbl_detail(id),    
  prop int2[],  -- Multiple relationships may exist. Therefore, we use array storage. This is an edge. 
  crt_time timestamp,  
  check (c1<>c2),  
  unique (c1,c2)  
  -- FOREIGN KEY (EACH ELEMENT OF prop) REFERENCES tbl_er_desc(id)  -- Array foreign key, which is supported for PostgreSQL 11. The performance is good  
);  
""")

In [11]:
sql = "select * from tbl_er"
df = sqlio.read_sql_query(sql, conn)
df.head()

Unnamed: 0,c1,c2,prop,crt_time


In [12]:
cur.execute("drop index if exists idx_tbl_er_c1")
cur.execute("create index idx_tbl_er_c1 on tbl_er(c1)")

In [13]:
cur.execute("drop index if exists idx_tbl_er_c2")
cur.execute("create index idx_tbl_er_c2 on tbl_er(c2)")

In [14]:
cur.execute("insert into tbl_detail select generate_series(1,10000);")

In [15]:
cur.execute("insert into tbl_er values (1,2,array[10],now());")
cur.execute("insert into tbl_er values (2,1,array[9],now());")
cur.execute("insert into tbl_er values (1,3,array[10],now());")
cur.execute("insert into tbl_er values (3,1,array[9],now());")
cur.execute("insert into tbl_er values (5,2,array[11],now());")
cur.execute("insert into tbl_er values (2,5,array[9],now());")
cur.execute("insert into tbl_er values (5,3,array[11],now());")
cur.execute("insert into tbl_er values (3,5,array[9],now());")
cur.execute("insert into tbl_er values (4,1,array[10],now());")
cur.execute("insert into tbl_er values (1,4,array[8],now());")
cur.execute("insert into tbl_er values (6,5,array[10],now());")
cur.execute("insert into tbl_er values (5,6,array[9],now());")
cur.execute("insert into tbl_er values (7,1,array[11],now());")
cur.execute("insert into tbl_er values (1,7,array[8],now());")
cur.execute("insert into tbl_er values (8,5,array[11],now());")
cur.execute("insert into tbl_er values (5,8,array[9],now());")

In [16]:
conn.commit()

In [17]:
sql = """
create or replace function graph_search1(      
  IN i_root int,                       -- The node that the search is based on        
  IN i_depth int  default 99999,       -- the tier to search (the depth limit)      
  IN i_limit int8 default 2000000000,  -- limit the number of records returned for each tier      
  OUT o_path int[],                    -- output: path, an array of IDs      
  OUT o_point1 int,                    -- output: point 1 ID      
  OUT o_point2 int,                    -- output: point 2 ID      
  OUT o_link_prop int2[],              -- output: the connection property between the two current points      
  OUT o_link_prop_all text,            -- output: the connection property from the starting node to the current node      
  OUT o_depth int                      -- output: current depth (tier)      
) returns setof record as 
$$
      
declare      
  sql text;      
begin      
sql := format($_$      
WITH RECURSIVE search_graph(        
  c1,     -- point 1        
  c2,     -- point 2        
  prop,   -- current edge property      
  all_prop,  -- properties of all edges  
  depth,  -- current depth, starting from 1         
  path    -- path, stored as an array         
) AS (        
        select c1,c2,prop,all_prop,depth,path from (        
        SELECT                               -- ROOT node query        
          g.c1,                              -- point 1        
          g.c2,                              -- point 2        
          g.prop,                            -- edge property        
      g.prop::text as all_prop,          -- properties of all edges  
          1 depth,                           -- initial depth=1        
          ARRAY[g.c1, g.c2] path             -- initial path        
        FROM tbl_er AS g         
        WHERE         
          c1 = %s                            -- ROOT node=?        
          limit %s                           -- How many records are limited at each tier?        
        ) t        
      UNION ALL        
        select c1,c2,prop,all_prop,depth,path from (        
        SELECT                               -- recursive clause         
          g.c1,                              -- point 1        
          g.c2,                              -- point 2        
          g.prop,                            -- edge property     
      sg.all_prop || g.prop::text as all_prop,    -- properties of all edges  
          sg.depth + 1 depth,                   -- depth +1        
          sg.path || g.c2 path                 -- Add a new point to the path        
        FROM tbl_er AS g, search_graph AS sg    -- circular INNER JOIN        
        WHERE         
          g.c1 = sg.c2                       -- recursive JOIN condition        
          AND (g.c2 <> ALL(sg.path))                      -- Prevent loop, determine whether it is a loop and judge if the new point is already in the previous path   
          AND sg.depth <= %s                 -- search depth =?          
          limit %s                           -- How many records are limited at each tier?       
        ) t        
)        
SELECT path as o_path, c1 as o_point1, c2 as o_point2, prop as o_link_prop, all_prop as o_link_prop_all, depth as o_depth      
FROM search_graph;                           -- query a recursive table. You can add LIMIT output or use a cursor       
$_$, i_root, i_limit, i_depth, i_limit      
);      
      
return query execute sql;      
      
end;      

$$
 language plpgsql strict;   
"""

cur.execute(sql)

In [18]:
conn.commit()

In [37]:
%%time
sql = "select * from graph_search1(1)"
df = sqlio.read_sql_query(sql, conn)

CPU times: user 3.57 ms, sys: 1.51 ms, total: 5.08 ms
Wall time: 12.5 ms


In [38]:
df.head()

Unnamed: 0,o_path,o_point1,o_point2,o_link_prop,o_link_prop_all,o_depth
0,"[1, 2]",1,2,[10],{10},1
1,"[1, 3]",1,3,[10],{10},1
2,"[1, 4]",1,4,[8],{8},1
3,"[1, 7]",1,7,[8],{8},1
4,"[1, 2, 5]",2,5,[9],{10}{9},2
