@@ -8,7 +8,8 @@ import Promise from 'bluebird';
8
8
9
9
/* eslint-disable no-console */
10
10
11
- const maxPages = 2000 ;
11
+ const MAX_PAGES = 2000 ;
12
+ const MAX_SCORES_PER_PAGE = 25 ;
12
13
13
14
const parseReview = ( db , $ ) => async ( i , review ) => {
14
15
const $r = $ ( review ) ;
@@ -28,51 +29,75 @@ const parseReview = (db, $) => async (i, review) => {
28
29
29
30
return db
30
31
. run (
31
- SQL `INSERT OR REPLACE INTO albums (name, date, image, url) VALUES (${ title } , ${ date } , ${ image } , ${ url } )`
32
+ SQL `INSERT OR IGNORE INTO albums (name, date, image, url) VALUES (${ title } , ${ date } , ${ image } , ${ url } )`
32
33
)
33
34
. then ( async ( { stmt } ) => {
34
- console . log ( 'Inserted:' , stmt . lastID ) ;
35
- const albumId = stmt . lastID ;
36
-
37
- // console.log(title, artists);
35
+ let albumId ;
36
+ if ( stmt . changes === 0 ) {
37
+ albumId = await db
38
+ . get ( SQL `SELECT album_id as id FROM albums WHERE name = ${ title } AND url = ${ url } ` )
39
+ . then ( ( { id } ) => id ) ;
40
+ } else {
41
+ albumId = stmt . lastID ;
42
+ }
38
43
39
- await Promise . all (
44
+ const artistIds = await Promise . all (
40
45
artists . map ( artist =>
41
46
db
42
- . run ( SQL `INSERT OR REPLACE INTO artists (name) VALUES (${ artist } )` )
43
- . then ( ( { stmt : st } ) => st . lastID )
47
+ . run ( SQL `INSERT OR IGNORE INTO artists (name) VALUES (${ artist } )` )
48
+ . then ( ( { stmt : st } ) => {
49
+ if ( st . changes === 0 ) {
50
+ return db
51
+ . get ( SQL `SELECT artist_id as id FROM artists WHERE name = ${ artist } ` )
52
+ . then ( ( { id } ) => id ) ;
53
+ }
54
+ return st . lastID ;
55
+ } )
44
56
)
45
- ) . then ( artistIds =>
46
- Promise . all (
47
- artistIds . map ( artistId =>
48
- db . run (
49
- SQL `INSERT OR REPLACE INTO album_artists (album_id, artist_id) VALUES (${ albumId } , ${ artistId } )`
57
+ ) ;
58
+
59
+ console . log ( albumId , title , artists , artistIds ) ;
60
+
61
+ await Promise . all (
62
+ artistIds . map ( artistId =>
63
+ db
64
+ . run (
65
+ SQL `INSERT OR IGNORE INTO album_artists (album_id, artist_id) VALUES (${ albumId } , ${ artistId } )`
50
66
)
51
- )
67
+ . then ( ( { stmt : st } ) => st . lastID )
52
68
)
53
69
) ;
54
70
55
- await Promise . all (
71
+ const genreIds = await Promise . all (
56
72
genres . map ( genre =>
57
73
db
58
- . run ( SQL `INSERT OR REPLACE INTO genres (name) VALUES (${ genre } )` )
59
- . then ( ( { stmt : st } ) => st . lastID )
74
+ . run ( SQL `INSERT OR IGNORE INTO genres (name) VALUES (${ genre } )` )
75
+ . then ( ( { stmt : st } ) => {
76
+ if ( st . changes === 0 ) {
77
+ return db
78
+ . get ( SQL `SELECT genre_id as id FROM genres WHERE name = ${ genre } ` )
79
+ . then ( ( { id } ) => id ) ;
80
+ }
81
+ return st . lastID ;
82
+ } )
60
83
)
61
- ) . then ( genreIds =>
62
- Promise . all (
63
- genreIds . map ( genreId =>
64
- db . run (
65
- SQL `INSERT OR REPLACE INTO album_genres (album_id, genre_id) VALUES (${ albumId } , ${ genreId } )`
84
+ ) ;
85
+
86
+ await Promise . all (
87
+ genreIds . map ( genreId =>
88
+ db
89
+ . run (
90
+ SQL `INSERT OR IGNORE INTO album_genres (album_id, genre_id) VALUES (${ albumId } , ${ genreId } )`
66
91
)
67
- )
92
+ . then ( ( { stmt : st } ) => st . lastID )
68
93
)
69
94
) ;
70
95
71
96
return Promise . resolve ( ) ;
72
97
} ) ;
73
98
} ;
74
99
75
- const request = ( db , base , i = 1 ) => {
100
+ const request = ( { db, base, maxPages , i = 1 } ) => {
76
101
const url = `${ base } ?page=${ i } ` ;
77
102
78
103
console . log ( 'Fetching:' , url ) ;
@@ -88,15 +113,15 @@ const request = (db, base, i = 1) => {
88
113
reviews . each ( iter ) ;
89
114
90
115
if ( i < maxPages ) {
91
- return request ( db , base , i + 1 ) ;
116
+ return request ( { db, base, maxPages , i : i + 1 } ) ;
92
117
}
93
118
return Promise . resolve ( ) ;
94
119
} ) ;
95
120
} ;
96
121
97
122
const pageRows = async ( db , rows ) => {
98
123
const queries = [ ] ;
99
- const promises = rows . splice ( 0 , 25 ) . map ( row =>
124
+ const promises = rows . splice ( 0 , MAX_SCORES_PER_PAGE ) . map ( row =>
100
125
fetch ( row . url )
101
126
. then ( r => r . text ( ) )
102
127
. then ( review => {
@@ -125,6 +150,7 @@ const pageRows = async (db, rows) => {
125
150
let doRequest = true ;
126
151
let offset = 0 ;
127
152
let limit = - 1 ;
153
+ const maxPages = argv . pages || MAX_PAGES ;
128
154
129
155
if ( argv . reviews ) {
130
156
base = 'https://pitchfork.com/reviews/albums/' ;
@@ -145,7 +171,7 @@ const pageRows = async (db, rows) => {
145
171
}
146
172
147
173
if ( doRequest ) {
148
- await request ( db , base ) ;
174
+ await request ( { db, base, maxPages } ) ;
149
175
}
150
176
151
177
const rows = await db . all (
0 commit comments