/
whoosh.py
81 lines (69 loc) · 2.46 KB
/
whoosh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
# fromtextindex()
#################
import petl as etl
import os
# set up an index and load some documents via the Whoosh API
from whoosh.index import create_in
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
dirname = 'example.whoosh'
if not os.path.exists(dirname):
os.mkdir(dirname)
index = create_in(dirname, schema)
writer = index.writer()
writer.add_document(title=u"First document", path=u"/a",
content=u"This is the first document we've added!")
writer.add_document(title=u"Second document", path=u"/b",
content=u"The second one is even more interesting!")
writer.commit()
# extract documents as a table
table = etl.fromtextindex(dirname)
table
# totextindex()
###############
import petl as etl
import datetime
import os
# here is the table we want to load into an index
table = (('f0', 'f1', 'f2', 'f3', 'f4'),
('AAA', 12, 4.3, True, datetime.datetime.now()),
('BBB', 6, 3.4, False, datetime.datetime(1900, 1, 31)),
('CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25)))
# define a schema for the index
from whoosh.fields import *
schema = Schema(f0=TEXT(stored=True),
f1=NUMERIC(int, stored=True),
f2=NUMERIC(float, stored=True),
f3=BOOLEAN(stored=True),
f4=DATETIME(stored=True))
# load index
dirname = 'example.whoosh'
if not os.path.exists(dirname):
os.mkdir(dirname)
etl.totextindex(table, dirname, schema=schema)
# searchtextindex()
###################
import petl as etl
import os
# set up an index and load some documents via the Whoosh API
from whoosh.index import create_in
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
dirname = 'example.whoosh'
if not os.path.exists(dirname):
os.mkdir(dirname)
index = create_in('example.whoosh', schema)
writer = index.writer()
writer.add_document(title=u"Oranges", path=u"/a",
content=u"This is the first document we've added!")
writer.add_document(title=u"Apples", path=u"/b",
content=u"The second document is even more "
u"interesting!")
writer.commit()
# demonstrate the use of searchtextindex()
table1 = etl.searchtextindex('example.whoosh', 'oranges')
table1
table2 = etl.searchtextindex('example.whoosh', 'doc*')
table2