In [1]:
from xpinyin import Pinyin
import time
pin = Pinyin()

### Load File

In [2]:
# Parse the source
with open('THUOCL_chengyu.txt', 'r', encoding='utf-8') as source:
    idioms = [i for i in map(lambda x: x.split()[0], source.readlines()) if len(i) <= 4]

nodes = set([pin.get_pinyin(i[0]) for i in idioms] + [pin.get_pinyin(i[-1]) for i in idioms])
edges = [{"src": pin.get_pinyin(i[0]), "dst": pin.get_pinyin(i[-1]), "idiom": i} for i in idioms]

#### Create Graph

In [4]:
from py2neo import Graph
graph = Graph("bolt://localhost:7687", password="12345678")

cql = "CREATE \n" + ",".join("(%s : Node {char: \"%s\"})\n"%(n, n) for n in nodes)
for e in edges:
    cql += ",(%s)-[:idiom {idiom: '%s'}]->(%s)\n"%(e['src'], e["idiom"], e['dst'])
    
graph.run(cql)

<py2neo.database.Cursor at 0x10a4f3cf8>

___
### Query Graph
##### Find next word

In [5]:
next_word = lambda word: graph.run("match ()-[:idiom {idiom: '%s'}]->()-[e:idiom]->() return e.idiom"%word)

df_next = next_word("坚定不移").to_data_frame()

print("found %i idioms"%len(df_next))
df_next.head()

found 376 idioms


Unnamed: 0,e.idiom
0,一技之长
1,一本正经
2,一病不起
3,抑扬顿挫
4,一钱不值


##### Find path

In [6]:
def find_path(idiom, length, limit):
    cql = "match ()-[e:idiom {idiom: '%s'}]->()"%idiom
    path = "".join(["-[e%i:idiom]->()"%i for i in range(length)])
    ret = ", ".join(["e%i.idiom"%i for i in range(length)])
    return graph.run(cql + path + " return e.idiom, " + ret + " limit %i"%limit).to_data_frame()

In [7]:
start = time.time()
df_paths = find_path(idiom="坚定不移", length=5, limit=100000)

print("found %i paths, time elapsed: %.2fs"%(len(df_paths), time.time() - start))
df_paths.head(10)

found 100000 paths, time elapsed: 3.05s


Unnamed: 0,e.idiom,e0.idiom,e1.idiom,e2.idiom,e3.idiom,e4.idiom
0,坚定不移,一本正经,惊叹不已,一尘不染,燃眉之急,急公好义
1,坚定不移,一钱不值,只知其一,一尘不染,燃眉之急,急公好义
2,坚定不移,一钱不值,直抒胸臆,一尘不染,燃眉之急,急公好义
3,坚定不移,一走了之,只知其一,一尘不染,燃眉之急,急公好义
4,坚定不移,一走了之,直抒胸臆,一尘不染,燃眉之急,急公好义
5,坚定不移,一叶知秋,求同存异,一尘不染,燃眉之急,急公好义
6,坚定不移,衣冠楚楚,出敌不意,一尘不染,燃眉之急,急公好义
7,坚定不移,衣冠楚楚,出其不意,一尘不染,燃眉之急,急公好义
8,坚定不移,一手包办,半信半疑,一尘不染,燃眉之急,急公好义
9,坚定不移,一字千金,尽如人意,一尘不染,燃眉之急,急公好义
