-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
We want to allow users to cache queries in a more permanent way. We're doing persistent into parquet files. In this PR we introduce the functionality for writing parquet files to the Source Manager. We also introduce the concept of batched queries in our connectors. This means now they can pull all the data from the query in a way that don't exhaust the memory of the running machine. This can happen with huge queries
- Loading branch information
1 parent
aab4a4e
commit 203e297
Showing
39 changed files
with
2,176 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
"@latitude-data/postgresql-connector": minor | ||
"@latitude-data/source-manager": minor | ||
--- | ||
|
||
- Add the ability of running batched queries to PostgreSQL connector. | ||
- Allow source manager to write the result of a query into a parquet file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,3 +33,5 @@ yarn-error.log* | |
|
||
sites/**/* | ||
!sites/package.json | ||
|
||
apps/server/scripts/dist/**/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import typescript from '@rollup/plugin-typescript' | ||
|
||
/** | ||
* @typedef {import('rollup').RollupOptions} RollupOptions | ||
* @type {RollupOptions} | ||
*/ | ||
export default { | ||
input: 'scripts/materialize_queries/index.ts', | ||
output: { | ||
file: 'scripts/dist/materialize_queries.js', | ||
format: 'esm', | ||
sourcemap: true, | ||
}, | ||
plugins: [ | ||
typescript({ tsconfig: './tsconfig.scripts.json', sourceMap: true }), | ||
], | ||
external: ['fs', 'ora', '@latitude-data/source-manager'], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import fs from 'fs' | ||
import { DiskDriver, StorageDriver } from '@latitude-data/source-manager' | ||
import sourceManager from '../../src/lib/server/sourceManager' | ||
import ora from 'ora' | ||
|
||
type CommandArgs = { | ||
debug: boolean | ||
} | ||
|
||
function getArgs(): CommandArgs { | ||
const args = process.argv.slice(2) | ||
const debug = args[0] === 'debug' | ||
|
||
return { debug } | ||
} | ||
|
||
function ensureMaterializeDirExists(storage: StorageDriver) { | ||
if (!(storage instanceof DiskDriver)) return | ||
|
||
const basePath = storage.basePath | ||
if (!fs.existsSync(basePath)) { | ||
fs.mkdirSync(basePath, { recursive: true }) | ||
} | ||
} | ||
|
||
const BATCH_SIZE = 4096 | ||
async function materializeQueries(debug = false) { | ||
let startTime = 0 | ||
if (debug) { | ||
startTime = performance.now() | ||
} | ||
const spinner = ora().start() | ||
const defaultArgs = debug | ||
? { | ||
onDebug: ({ memoryUsageInMb }: { memoryUsageInMb: string }) => { | ||
spinner.text = `Memory: ${memoryUsageInMb}` | ||
}, | ||
} | ||
: {} | ||
try { | ||
// TODO: This is faked. Do the logic to get only materializable queries | ||
const allQueries = ['postgresql/query'] | ||
const storage = sourceManager.materializeStorage | ||
|
||
ensureMaterializeDirExists(storage) | ||
|
||
for (const [index, query] of allQueries.entries()) { | ||
const status = `${index + 1} of ${allQueries.length} ${query}` | ||
if (debug) { | ||
console.log(status) | ||
} else { | ||
spinner.text = status | ||
} | ||
const url = await storage.writeParquet({ | ||
...defaultArgs, | ||
queryPath: query, | ||
params: {}, | ||
batchSize: BATCH_SIZE, | ||
}) | ||
|
||
if (debug) { | ||
console.table({ query, batchSize: BATCH_SIZE, url }) | ||
const endTime = performance.now() | ||
const min = Math.floor((endTime - startTime) / 60000) | ||
const seconds = ((endTime - startTime) % 60000) / 1000 | ||
console.log(`Time: ${min}:${seconds} minutes`) | ||
} | ||
} | ||
|
||
spinner.stop() | ||
console.log('\nMaterialization complete 🎉') | ||
process.exit(0) | ||
} catch (e) { | ||
spinner.fail('Error materializing') | ||
console.error(e) | ||
process.exit(1) | ||
} | ||
} | ||
|
||
const args = getArgs() | ||
|
||
materializeQueries(args.debug) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"extends": "./tsconfig.json", | ||
"compilerOptions": { | ||
"module": "esnext", | ||
"esModuleInterop": true, | ||
"target": "ES2018", | ||
"moduleResolution": "node", | ||
"outDir": "./scripts/dist", | ||
"rootDir": ".", | ||
"baseUrl": "..", | ||
"paths": { | ||
"@/*": ["./packages/source_manager/src/*"] | ||
} | ||
}, | ||
"include": ["./scripts/**/*.ts"], | ||
"exclude": ["node_modules", "**/*.svelte"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
version: '3.1' | ||
services: | ||
db: | ||
image: postgres | ||
restart: always | ||
environment: | ||
POSTGRES_USER: 'latitude' | ||
POSTGRES_PASSWORD: 'secret' | ||
ports: | ||
- '5436:5432' | ||
volumes: | ||
- ./init-db.sh:/docker-entrypoint-initdb.d/init-db.sh |
Oops, something went wrong.