-
Notifications
You must be signed in to change notification settings - Fork 0
/
ExtractorTest.php
204 lines (164 loc) · 8.12 KB
/
ExtractorTest.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
<?php
use mattstein\dekindler\KindleClipping;
use mattstein\dekindler\Extractor;
/**
* Drop “My Clippings.txt” into this test folder to optionally parse its content.
* It must have at least one valid item.
*/
$testFile = __DIR__ . DIRECTORY_SEPARATOR . 'My Clippings.txt';
if (file_exists($testFile)) {
test('extracts everything', function () use ($testFile) {
$clippings = (new Extractor())
->parse(file_get_contents($testFile));
expect(count($clippings))->toBeGreaterThan(0);
});
}
test('identifies duplicates', function() {
$a = new KindleClipping("The Trees (Percival Everett)
- Your Highlight on page 90 | Location 1367-1369 | Added on Sunday, March 5, 2023 10:13:56 PM
“If you want to know a place, you talk to its history,” Mama Z said. 34 Red Jetty pulled over and turned off the engine of his squad car.
");
$b = new KindleClipping("The Trees (Percival Everett)
- Your Highlight on page 90 | Location 1367-1368 | Added on Sunday, March 5, 2023 10:14:04 PM
“If you want to know a place, you talk to its history,” Mama Z said.
");
$c = new KindleClipping("A Pretend Book (Wordlesby, Samuel)
- Your Highlight on page 12 | Location 4061-4063 | Added on Monday, February 20, 2023 7:16:39 PM
We’re looking for highlights that were made and then corrected, which show up as separate chunks
");
expect($a->isDuplicateOf($b))->toBeTrue();
expect($b->isDuplicateOf($a))->toBeTrue();
expect($a->isDuplicateOf($c))->toBeFalse();
expect($b->isDuplicateOf($c))->toBeFalse();
});
test('skips duplicates', function () {
$content = "==========
A Pretend Book (Wordlesby, Samuel)
- Your Highlight on page 12 | Location 4061-4063 | Added on Monday, February 20, 2023 7:16:39 PM
We’re looking for highlights that were made and then corrected, which show up as separate chunks
==========
A Pretend Book (Wordlesby, Samuel)
- Your Highlight on page 12 | Location 4061-4063 | Added on Monday, February 20, 2023 7:16:45 PM
We’re looking for highlights that were made and then corrected, which show up as separate chunks in the clipping file.
==========
";
$extractor = new Extractor();
$clippings = $extractor->parse($content);
expect(count($clippings))->toEqual(1);
expect($extractor->duplicateCount)->toEqual(1);
expect($clippings[0]->text)->toEqual('We’re looking for highlights that were made and then corrected, which show up as separate chunks in the clipping file.');
$extractor = new Extractor();
$clippings = $extractor->parse($content, [], false);
expect(count($clippings))->toEqual(2);
// The *latest* clipping should be the one that stays
expect($extractor->duplicateCount)->toEqual(1);
});
test('filters types', function () {
$content = "==========
The Bullet Journal Method: Track Your Past, Order Your Present, Plan Your Future (Carroll, Ryder)
- Your Highlight on page 33 | Location 498-498 | Added on Saturday, December 21, 2019 3:54:53 PM
Being intentional about what you let into your life is a practice that shouldn’t be limited to the pages of your notebook.
==========
The Bullet Journal Method: Track Your Past, Order Your Present, Plan Your Future (Carroll, Ryder)
- Your Highlight on page 38 | Location 573-574 | Added on Saturday, December 21, 2019 5:20:41 PM
That’s right: The fact that it takes longer to write things out by hand gives handwriting its cognitive edge.
==========
The Bullet Journal Method: Track Your Past, Order Your Present, Plan Your Future (Carroll, Ryder)
- Your Note on page 38 | Location 574 | Added on Saturday, December 21, 2019 5:21:32 PM
Share with that guy that said handwriting is useless!
==========
Bird by Bird: Some Instructions on Writing and Life (Anne Lamott)
- Your Bookmark on page 70 | Location 1073 | Added on Friday, January 17, 2020 10:18:02 AM
==========
";
// Everything
$clippings = (new Extractor())->parse($content);
expect(count($clippings))->toEqual(4);
// Only notes
$clippings = (new Extractor())->parse($content, [KindleClipping::TYPE_NOTE]);
expect(count($clippings))->toEqual(1);
expect($clippings[0]->text)->toContain('Share with that guy');
// Only bookmarks
$clippings = (new Extractor())->parse($content, [KindleClipping::TYPE_BOOKMARK]);
expect(count($clippings))->toEqual(1);
expect($clippings[0]->title)->toContain('Bird by Bird');
// Only highlights and notes
$extractor = new Extractor();
$clippings = $extractor->parse($content, [KindleClipping::TYPE_NOTE, KindleClipping::TYPE_HIGHLIGHT], false);
expect(count($clippings))->toEqual(3);
expect($extractor->noteCount)->toEqual(1);
expect($extractor->bookmarkCount)->toEqual(1);
expect($extractor->highlightCount)->toEqual(2);
expect($extractor->duplicateCount)->toEqual(0);
});
test('parses location-only highlights', function() {
$content = "==========
A Pretend Book (Wordlesby, Samuel)
- Your Highlight on Location 587-588 | Added on Sunday, February 12, 2023 11:50:59 AM
For some reason highlights will sometimes come without a page number and only a location.
==========
A Pretend Book (Wordlesby, Samuel)
- Your Highlight on page 2 | Location 728-730 | Added on Sunday, February 12, 2023 12:06:43 PM
When they do have a page number, however, they’ll start with that and include location in another pipe-separated section.
==========
";
$clippings = (new Extractor())->parse($content);
$firstClipping = $clippings[0];
$secondClipping = $clippings[1];
expect($firstClipping->page)->toBeNull();
expect($firstClipping->location)->toEqual('587-588');
expect($secondClipping->page)->toEqual(2);
expect($secondClipping->location)->toEqual('728-730');
});
test('handles different date formats', function () {
$content = "==========
A Pretend Book (Wordlesby, Samuel)
- Your Highlight on Location 587-588 | Added on Sunday, February 12, 2023 11:50:59 AM
For some reason highlights will sometimes come without a page number and only a location.
==========
Fahrenheit 451 (Ray Bradbury)
- Your Highlight at location 784-785 | Added on Saturday, 26 March 2016 18:37:26
Who knows who might be the target of the well-read man?
==========
Zen and the Art of Motorcycle Maintenance (Robert M. Pirsig)
- Highlight on Page 6 | Loc. 190 | Added on Wednesday, 5 December 12 23:07:35 GMT+00:59
So we navigate mostly by dead reckoning,
==========
";
$clippings = (new Extractor())->parse($content);
expect($clippings[0]->date->format("Y-m-d H:i:s"))->toEqual('2023-02-12 11:50:59');
expect($clippings[1]->date->format("Y-m-d H:i:s"))->toEqual('2016-03-26 18:37:26');
expect($clippings[2]->date->format("Y-m-d H:i:s"))->toEqual('2012-12-05 23:07:35');
});
test('handles abbreviated highlight location format', function() {
$content = "Dive Into Python (Mark Pilgrim)
- Highlight Loc. 1150-51 | Added on Wednesday, 5 December 12 06:48:00 GMT+00:59
[0, 1, 2, 3, 4, 5, 6] >>> (MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7)
";
$clipping = new KindleClipping($content);
expect($clipping->page)->toBeNull();
expect($clipping->location)->toEqual('1150-51');
expect($clipping->date->format("Y-m-d H:i:s"))->toEqual('2012-12-05 06:48:00');
});
test('handles abbreviated note location format', function() {
$content = "Jump Start Node.js (Don Nguyen)
- Note Loc. 2322 | Added on Wednesday, 26 December 12 00:16:53 GMT+00:59
aaa
";
$clipping = new KindleClipping($content);
expect($clipping->page)->toBeNull();
expect($clipping->type)->toEqual(KindleClipping::TYPE_NOTE);
expect($clipping->location)->toEqual('2322');
expect($clipping->date->format("Y-m-d H:i:s"))->toEqual('2012-12-26 00:16:53');
});
test('handles page highlight without location', function() {
$content = "Oreilly.Developing.Backbone.js.Applications.Apr.2012 (Addy Osmani)
- Highlight on Page 7 | Added on Monday, 3 December 12 19:42:30 Greenwich Mean Time
JavaScript templating libraries (such as Handlebars.js or Mustache)
";
$clipping = new KindleClipping($content);
expect($clipping->page)->toEqual(7);
expect($clipping->type)->toEqual(KindleClipping::TYPE_HIGHLIGHT);
expect($clipping->location)->toBeNull();
expect($clipping->date->format("Y-m-d H:i:s"))->toEqual('2012-12-03 19:42:30');
});