/
Database.pm
541 lines (395 loc) · 11.9 KB
/
Database.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
# $Id$
=head1 NAME
ThirdLobe::Database - encapsulates ThirdLobe's low-level database operations
=head1 SYNOPSIS
No synopsis yet.
=head1 DESCRIPTION
ThirdLobe::Database abstracts the low-level database operations used
by ThirdLobe::ArcStore. This class is the reference implementation.
It may eventually move into ThirdLobe::Database::Postgres since it's
based heavily on that database engine.
=cut
package ThirdLobe::Database;
use warnings;
use strict;
use DBI;
use ThirdLobe::Node;
use ThirdLobe::Arc;
use Carp qw(croak);
use constant DBH => 0;
=head1 NODE METHODS
=head2 node_add ANCHOR, TEXT
Adds a node record to the database, associating an ANCHOR arc with
some TEXT. Returns a ThirdLobe::Node object on success.
TODO - Currently does not have a failure mode.
my $node_object = $db->node_add($arc_object, "some text");
=cut
sub node_add {
my ($self, $arc, $text) = @_;
my $dbh = $self->[DBH];
# Hash the node's text for its key.
my $key = $self->_node_hash($text);
# Insert the node.
my $sth = $dbh->prepare_cached(
"INSERT INTO node (arc_seq, val_key, val_text) VALUES (?, ?, ?)"
);
$sth->execute($arc->seq(), $key, $text) or die $sth->errstr();
$sth->finish();
# Return the node represented by the recently inserted row.
return ThirdLobe::Node->new(
{
seq => $dbh->last_insert_id(undef, undef, "node", undef),
arc_seq => $arc->seq(),
key => $key,
val => $text,
}
);
}
=head2 _node_hash TEXT
Accepts the TEXT for a node, and returns a version of it that's hashed
for fuzzy retrieval.
The current return value is TEXT that is folded to lowercase, and
whitespace normalized. More complex algorithms may emerge as usage
dictates.
my $key = $db->_node_hash("some text");
B<Changing the algorithm will invalidate all your nodes. Don't do
this lightly.>
=cut
sub _node_hash {
my ($self, $text) = @_;
# Simple hashing. We can do better later.
my $key = lc($text);
$key =~ s/\s+/ /g;
$key =~ s/^\s+//;
$key =~ s/\s+$//;
return $key;
}
=head2 node_from_text TEXT
Look up a node record in the database for a given piece of TEXT.
Returns a ThirdLobe::Node object representing the TEXT, or undef on
failure.
my $node_object = $db->node_from_text("some text");
=cut
sub node_from_text {
my ($self, $text) = @_;
my $dbh = $self->[DBH];
# Hash the node's text for its key.
my $key = $self->_node_hash($text);
my $sth = $dbh->prepare_cached("SELECT * FROM node WHERE val_key = ?");
$sth->execute($key);
my $row = $sth->fetchrow_hashref();
$sth->finish();
return unless $row;
return ThirdLobe::Node->new($row);
}
=head2 node_from_seq NODE_SEQ
Every node object has a unique, sequential ID assigned to it on
creation. This is the node table's primary key.
node_from_seq() fetches a node record by this ID and returns a
ThirdLobe::Node object representing it. Returns undef on failure.
my $node_object = $db->node_from_seq(42);
=cut
sub node_from_seq {
my ($self, $node_seq) = @_;
my $dbh = $self->[DBH];
my $sth = $dbh->prepare_cached("SELECT * FROM node WHERE seq = ?");
$sth->execute($node_seq);
my $row = $sth->fetchrow_hashref();
$sth->finish();
return unless $row;
return ThirdLobe::Node->new($row);
}
=head2 node_from_anchor ANCHOR
Retrieves the node record associated with an anchor arc, and returns a
ThirdLobe::Node representing the record. Returns undef if there's no
node for the anchor.
my $node_object = $db->node_from_anchor($arc_object);
=cut
sub node_from_anchor {
my ($self, $anchor) = @_;
my $dbh = $self->[DBH];
my $sth = $dbh->prepare_cached("SELECT * FROM node WHERE arc_seq = ?");
$sth->execute($anchor->seq());
my $row = $sth->fetchrow_hashref();
$sth->finish();
return unless $row;
return ThirdLobe::Node->new($row);
}
=head1 ARC METHODS
=head2 build_arc_query SUBJECT_ARC, PREDICATE_ARC, OBJECT_ARC
Builds the SQL WHERE clause and corresponding list of values for
fetching arcs that match up to three arc objects. Undefined arc
objects act as wildcards.
my ($where, @values) = $db->build_arc_query(
$subject_arc, $predicate_arc, $object_arc
);
=cut
sub build_arc_query {
my ($self, $sub_anchor, $prd_anchor, $obj_anchor) = @_;
my (@wheres, @values);
if (defined $sub_anchor) {
push @wheres, "sub_seq = ?";
push @values, $sub_anchor->seq();
}
if (defined $prd_anchor) {
push @wheres, "prd_seq = ?";
push @values, $prd_anchor->seq();
}
if (defined $obj_anchor) {
push @wheres, "obj_seq = ?";
push @values, $obj_anchor->seq();
}
my $where_clause;
if (@wheres) {
$where_clause = " WHERE " . join(" AND ", @wheres);
}
else {
$where_clause = "";
}
return $where_clause, @values;
}
=head2 arc_add SUBJECT_ARC, PREDICATE_ARC, OBJECT_ARC
Add an arc that associates three other arcs. Returns a new
ThirdLobe::Arc object, or undef on failure.
my $arc = $db->arc_add($subject_arc, $predicate_arc, $object_arc);
=cut
sub arc_add {
my ($self, $sub_arc, $prd_arc, $obj_arc) = @_;
my $dbh = $self->[DBH];
# Insert the arc.
my $sth = $dbh->prepare_cached(
"INSERT INTO arc (sub_seq, prd_seq, obj_seq) VALUES (?, ?, ?)"
);
$sth->execute($sub_arc->seq(), $prd_arc->seq(), $obj_arc->seq())
or die $sth->errstr();
$sth->finish();
# Return an arc that represents the newly inserted row.
# Since we know the arcs, we can pre-cache them.
return ThirdLobe::Arc->new(
{
db => $self,
seq => $dbh->last_insert_id(undef, undef, "arc", undef),
sub_seq => $sub_arc->seq(),
prd_seq => $prd_arc->seq(),
obj_seq => $obj_arc->seq(),
sub_arc => $sub_arc,
prd_arc => $prd_arc,
obj_arc => $obj_arc,
}
);
}
=head2 arc_from_arcs SUBJECT_ARC, PREDICATE_ARC, OBJECT_ARC
Fetch zero or more arcs that match a given SUBJECT_ARC, PREDICATE_ARC,
and OBJECT_ARC. The three arcs are usually but not always anchors.
Undefined parameters are treated as wildcards.
my $new_arc = $db->arc_from_arcs(
$subject_arc, $predicate_arc, $object_arc
);
=cut
sub arc_from_arcs {
my ($self, $sub_arc, $prd_arc, $obj_arc, $limit) = @_;
my $dbh = $self->[DBH];
my ($where_clause, @values) = $self->build_arc_query(
$sub_arc, $prd_arc, $obj_arc
);
my $sql = "SELECT * FROM arc " . $where_clause;
$sql .= " LIMIT $limit" if $limit;
warn $sql;
my $sth = $dbh->prepare_cached($sql) or die $dbh->errstr;
$sth->execute(@values) or die $sth->errstr;
my (%memo, @arcs);
while (my $row = $sth->fetchrow_hashref()) {
# The (0,0,0,0) arc doesn't officially exist.
next unless $row->{seq};
push @arcs, ThirdLobe::Arc->new({ db => $self, %$row });
}
$sth->finish();
return @arcs;
}
=head2 arc_count SUBJECT_ARC, PREDICATE_ARC, OBJECT_ARC
Counts the number of arcs that match up to three other arcs.
Undefined parameters are treated as wildcards. Returns the number of
arcs that were found.
my $number_found = $db->arc_count(
$subject_arc, $predicate_arc, $object_arc
);
=cut
sub arc_count {
my ($self, $sub_arc, $prd_arc, $obj_arc) = @_;
my $dbh = $self->[DBH];
my ($where_clause, @values) = $self->build_arc_query(
$sub_arc, $prd_arc, $obj_arc
);
my $sql = "SELECT count(seq) FROM arc" . $where_clause;
warn $sql;
my $sth = $dbh->prepare_cached($sql) or die $dbh->errstr;
$sth->execute(@values) or die $sth->errstr;
my @row = $sth->fetchrow_array();
$sth->finish();
return unless @row;
return $row[0];
}
=head2 anchor_add
Create a new anchor arc record, and return a ThirdLobe::Arc object to
represent it.
my $arc_object = $db->anchor_add();
=cut
sub anchor_add {
my $self = shift;
my $dbh = $self->[DBH];
# Insert the arc.
my $sth = $dbh->prepare_cached(
"INSERT INTO arc (sub_seq, prd_seq, obj_seq) VALUES (0, 0, 0)"
);
$sth->execute() or die $sth->errstr();
$sth->finish();
# Return an arc representing it.
return ThirdLobe::Arc->new(
{
db => $self,
seq => $dbh->last_insert_id(undef, undef, "arc", undef),
sub_seq => 0,
prd_seq => 0,
obj_seq => 0,
}
);
}
=head2 arc_from_seq ARC_SEQ
Every arc has a unique sequential ID assigned to it. These IDs are
used as the arc table's primary key.
arc_from_seq() returns a ThirdLobe::Arc object representing the arc
record with a given ARC_SEQ.
my $arc_object = $db->arc_from_seq(42);
=cut
sub arc_from_seq {
my ($self, $seq) = @_;
my $dbh = $self->[DBH];
my $sth = $dbh->prepare_cached("SELECT * FROM arc WHERE seq = ?");
$sth->execute($seq);
# TODO - Error checking. Return undef on failure.
my $row = $sth->fetchrow_hashref();
my $arc = ThirdLobe::Arc->new({ db => $self, %$row });
$sth->finish();
return $arc;
}
=head1 WHOLE DATABASE METHODS
=head2 rebuild
Destroy any data you have, and rebuild the tables and indices the
library will need to actually function. Must be called after the
database is connected.
$db->rebuild(); # [SFX: TOILET FLUSHING]
=cut
sub rebuild {
my $self = shift;
my $dbh = $self->[DBH];
warn(
"++ You may see NOTICEs about implicit triggers being dropped added.\n",
"++ They appear to be normal. Please inform us if they can be avoided.\n",
);
# Nodes.
$dbh->do("DROP TABLE node CASCADE");
$dbh->do("DROP SEQUENCE node_seq_seq");
$dbh->do("CREATE SEQUENCE node_seq_seq");
$dbh->do(
<<' END'
CREATE TABLE node (
seq INTEGER DEFAULT nextval('node_seq_seq') NOT NULL,
arc_seq INTEGER NOT NULL,
val_key CHARACTER VARYING NOT NULL,
val_text CHARACTER VARYING NOT NULL
)
END
);
$dbh->do("CREATE UNIQUE INDEX node_seq ON node USING BTREE (seq)");
$dbh->do("CREATE INDEX node_arc ON node USING BTREE (arc_seq)");
$dbh->do("CREATE UNIQUE INDEX node_val_key ON node USING BTREE (val_key)");
# Arcs.
$dbh->do("DROP TABLE arc CASCADE");
$dbh->do("DROP SEQUENCE arc_seq_seq");
$dbh->do("CREATE SEQUENCE arc_seq_seq");
$dbh->do(
<<' END'
CREATE TABLE arc (
seq INTEGER DEFAULT nextval('arc_seq_seq') NOT NULL,
sub_seq INTEGER NOT NULL,
prd_seq INTEGER NOT NULL,
obj_seq INTEGER NOT NULL
)
END
);
$dbh->do("CREATE UNIQUE INDEX arc_seq ON arc USING BTREE (seq)");
$dbh->do("CREATE INDEX arc_sub_seq ON arc USING BTREE (sub_seq)");
$dbh->do("CREATE INDEX arc_prd_seq ON arc USING BTREE (prd_seq)");
$dbh->do("CREATE INDEX arc_obj_seq ON arc USING BTREE (obj_seq)");
# Referential integrity.
$dbh->do(
"ALTER TABLE node " .
"ADD CONSTRAINT node_arc " .
"FOREIGN KEY (arc_seq) " .
"REFERENCES arc(seq) " .
"MATCH FULL"
);
$dbh->do(
"ALTER TABLE arc " .
"ADD CONSTRAINT arc_sub " .
"FOREIGN KEY (sub_seq) " .
"REFERENCES arc(seq) " .
"MATCH FULL"
);
$dbh->do(
"ALTER TABLE arc " .
"ADD CONSTRAINT arc_prd " .
"FOREIGN KEY (prd_seq) " .
"REFERENCES arc(seq) " .
"MATCH FULL"
);
$dbh->do(
"ALTER TABLE arc " .
"ADD CONSTRAINT arc_obj " .
"FOREIGN KEY (obj_seq) " .
"REFERENCES arc(seq) " .
"MATCH FULL"
);
# For referential integrity to work.
$dbh->do("INSERT INTO arc VALUES (0, 0, 0, 0)");
warn "++ End of the NOTICEs.\n";
}
=head2 connect DSN, USERNAME, PASSWORD
Connect to the database, using a DSN, USERNAME, and PASSWORD.
Actually, the parameters to connect() are passed verbatim to
DBI->connect(). Returns a ThirdLobe::Database object that can be used
to interact with the database on a low level.
my $dbh = ThirdLobe::Database->connect("dbi:pg:dbname=know");
=cut
sub connect {
my $class = shift;
my $dbh = DBI->connect(@_);
die "Could not connect to database: ", $dbh->errstr() if $dbh->err();
my $self = bless [
$dbh, # DBH
], $class;
}
=head2 DESTROY
The object destructor makes sure that the database is disconnected
from properly.
=cut
sub DESTROY {
my $self = shift;
if (defined $self->[DBH]) {
$self->[DBH]->disconnect();
$self->[DBH] = undef;
}
}
=head1 TODO
Many of these methods don't define failure modes if the underlying DBI
calls fail. They may die outright or return bogus values. Often the
DBI calls aren't checked for success or failure.
=head1 AUTHORS
ThirdLobe::Database was conceived and written by Rocco Caputo.
Thank you for using it.
=head1 COPYRIGHT
Copyright 2005, Rocco Caputo.
This library is free software; you can use, redistribute it, and/or
modify it under the same terms as Perl itself.
=cut
1;