Skip to content

Commit

Permalink
DB Importer (#806)
Browse files Browse the repository at this point in the history
DB Importer
  • Loading branch information
Harjot1Singh authored May 18, 2019
2 parents 5d43422 + d76e3e6 commit 591e575
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 69 deletions.
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ A digital representation of Sikh Bani and other Panthic texts with a public logb
- [Build](#build)
- [Database](#database)
- [JSON](#json)
- [Import](#import)
- [Contributing](#contributing)
- [Folder structure](#folder-structure)
- [Releases](#releases)
Expand Down Expand Up @@ -74,7 +75,8 @@ Bani compilations can be added to the `bani.json`. To define the lines it contai

# Build

It is possible to make small changes and build a database from the JSON files and in reverse to make batch changes and build the JSON files from the database.
It is possible to make small changes and build a database from the JSON files and in reverse to make batch changes and build the JSON files from the database. Additionally,
*some* SQLite files can be imported.

## Database

Expand All @@ -94,6 +96,16 @@ It is best practice to build the database, make changes to `database.sqlite`, an

**Docker** - `docker-compose up build-json`

## Import

It's possible to import other sqlite files. Run `npm run import -- --help` to see all options.

The importer will generate placeholder Sources, Translation Sources, and fill in `-1` for Shabad sections and Writer IDs. These must be corrected in `build/database.sqlite`. (Refer to [Database](#database) and [JSON](#json) above).

```bash
npm run import -- nandlal.sqlite nandlal -o ID -s ShabadID -2 ShabadID -S SourceID -t English -t Punjabi -p PageNo -l LineNo -g Gurmukhi
```

# Contributing

By making a pull request of changes to the `data` folder, others can submit spelling mistakes, grammar errors, and translation improvements. Format for errors should follow the guideline for git commits:
Expand Down
6 changes: 6 additions & 0 deletions lib/build-sqlite.js
Original file line number Diff line number Diff line change
Expand Up @@ -377,10 +377,16 @@ const main = async () => {

console.log( 'Generating SQLite database'.header )

// Disconnect database file
await knex.destroy()

// Create directory for DB file
await removeDirAsync( OUTPUT_PATH )
createDir( OUTPUT_PATH )

// Reconnect database file
await knex.initialize()

// Create tables from schema in a transaction
await initialiseDatabase()
await setSQLiteSettings()
Expand Down
34 changes: 0 additions & 34 deletions lib/ids.js

This file was deleted.

157 changes: 157 additions & 0 deletions lib/import.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
const program = require( 'commander' )
const sqlite = require( 'sqlite' )

const { Lines, Shabads, Sources, TranslationSources, Translations, knex } = require( '../' )

const { generateId } = require( './utils' )
require( './string-colors' )

const columnOpts = [ 'shabadId', 'translation', 'page', 'line', 'sttm', 'gurmukhi', 'orderBy', 'source' ]

const findIds = async ( model, length, count ) => {
// Grab ids to exclude from database
const ids = new Set( ( await model.query().select( 'id' ) ).map( ( { id } ) => id ) )

const newIds = []
while ( newIds.length < count ) {
const id = generateId( length )
if ( !ids.has( id ) ) {
ids.add( id )
newIds.push( id )
}
}

return newIds
}

const main = async () => {
program
.version( '0.0.1' )
.arguments( '<input> <tableName>' )
.option( '-o, --order-by <column>', 'order column name' )
.option( '-s, --shabad-id <column>', 'Shabad ID column name' )
.option( '-S, --source <column>', 'Source column name', -1 )
.option( '-2, --sttm-id <column>', 'STTM2 Shabad ID column name' )
.option( '-t, --translation <column>', 'column name of a translation', ( val, prev ) => [ ...prev, val ], [] )
.option( '-p, --page <column>', 'Page number column name' )
.option( '-l, --line <column>', 'Line number column name' )
.option( '-g, --gurmukhi <column>', 'Gurmukhi column name' )
.parse( process.argv )

const [ filename, tableName ] = program.args

if ( !( filename && tableName ) ) program.outputHelp()

// Extract the column names, provided as options
const { orderBy, shabadId, sttmId, source, gurmukhi, translation, line, page } = Object
.entries( program.opts() )
.filter( ( [ , value ] ) => !!value )
.filter( ( [ key ] ) => columnOpts.includes( key ) )
.reduce( ( opts, [ key, value ] ) => ( { ...opts, [ key ]: value } ), {} )

console.log( `Running import CLI with options: ${JSON.stringify( program.opts() )}\n`.subheader )

// Open target DB and fetch all the target data, then close it
console.log( `Opening database ${filename}`.header )
const db = await sqlite.open( filename )
const lines = await db.all( `SELECT * FROM ${tableName} ORDER BY ${orderBy}` )
const shabadIds = ( await db.all( `SELECT DISTINCT ${shabadId} FROM ${tableName}` ) ).map( x => x[ shabadId ] )
const sources = ( await db.all( `SELECT DISTINCT ${source} FROM ${tableName}` ) ).map( x => x[ source ] )
await db.close()

// Generate some Line and Shabad IDs and map them in
console.log( 'Generating IDs'.header )
const newShabadIDs = await findIds( Shabads, 3, shabadIds.length )
const newLineIDs = await findIds( Lines, 4, lines.length )
const newSourceId = ( await Sources.query().orderBy( 'id', 'desc' ).first() ).id + 1
const newTranslationSourceId = ( await TranslationSources.query().orderBy( 'id', 'desc' ).first() ).id + 1

// Get last order IDs
const { orderId: shabadOrderId } = await Shabads.query().orderBy( 'order_id', 'desc' ).first()
const { orderId: lineOrderId } = await Lines.query().orderBy( 'order_id', 'desc' ).first()

console.log( 'Mapping data'.header )
// Map all the old shabad IDs to new ones
const shabadIDMap = shabadIds.reduce( ( ids, id, index ) => ( {
...ids,
[ id ]: newShabadIDs[ index ],
} ), {} )

// Map sources to original shabad id
const sourceMap = lines.reduce( ( sourceMap, line ) => ( {
...sourceMap,
[ line[ shabadId ] ]: sources.find( source => source === line[ source ] ) || -1,
} ), {} )

// Generate sources
const newSources = sources.map( ( name, index ) => ( {
name_english: `${name}-new`,
name_gurmukhi: `${name}-new`,
id: newSourceId + index,
length: -1,
page_name_english: -( index + 1 ),
page_name_gurmukhi: -( index + 1 ),
} ) )

// Generate new Shabads
const newShabads = shabadIds.map( ( id, index ) => ( {
id: shabadIDMap[ id ],
writer_id: -1,
section_id: -1,
sttm_id: lines[ sttmId ],
order_id: shabadOrderId + index + 1,
source_id: sourceMap[ id ],
} ) )

// Generate the new lines
const newLines = lines.map( ( data, index ) => ( {
id: newLineIDs[ index ],
shabad_id: shabadIDMap[ data[ shabadId ] ],
gurmukhi: data[ gurmukhi ],
source_page: data[ page ] || -1,
source_line: data[ line ] || -1,
order_id: lineOrderId + 1 + index,
} ) )

// Generate translation sources for translations
const newTranslationSources = translation.map( ( name, index ) => ( {
id: newTranslationSourceId + index,
name_gurmukhi: name,
name_english: name,
source_id: newSourceId,
language_id: -1,
} ) )

// Generate the translations
const newTranslations = translation
.map( ( name, index ) => lines.map( ( data, lineIndex ) => ( {
line_id: newLineIDs[ lineIndex ],
translation_source_id: newTranslationSourceId + index,
translation: data[ name ] || '',
additional_information: '{}',
} ) ) )
.reduce( ( allTranslations, translations ) => allTranslations.concat( translations ), [] )

// Now, insert all the data
console.log( 'Inserting into SQLite database'.header )
await knex.transaction( async trx => {
// Insert sources
await Promise.all( newSources.map( source => Sources.query( trx ).insert( source ) ) )
// Insert shabad IDs
await Promise.all( newShabads.map( shabad => Shabads.query( trx ).insert( shabad ) ) )
// Insert lines
await Promise.all( newLines.map( line => Lines.query( trx ).insert( line ) ) )
// Insert translation sources
await Promise.all( newTranslationSources.map( source => (
TranslationSources.query( trx ).insert( source )
) ) )
// Insert translations
await Promise.all( newTranslations.map( data => Translations.query( trx ).insert( data ) ) )
} )

console.log( 'Import complete. Please update the default values in `build/database.sqlite`. Run `npm run build-json`, followed by `npm run build-sqlite`.'.success )
}

main()
.then( () => process.exit( 0 ) )
.catch( e => console.error( e ) || process.exit( 1 ) )
Loading

0 comments on commit 591e575

Please sign in to comment.