Conversation
01f18f0 to
ff3dae6
Compare
internal/app/testdata/wal.input
Outdated
| @@ -1,2 +1,2 @@ | |||
| {"commit_lsn":957393888,"records":[{"action":"I","xid":1055,"lsn":"0/3910AB70","nextlsn":"","timestamp":"2023-08-22 10:56:08.151-03","schema":"public","table":"a","columns":[{"name":"a","type":"integer","value":4}],"pk":[]}]} | |||
| {"commit_lsn":957398296,"records":[{"action":"I","xid":1058,"lsn":"0/3910B898","nextlsn":"","timestamp":"2023-08-22 14:44:02.043586-03","schema":"public","table":"t","columns":[{"name":"id","type":"integer","value":200232},{"name":"name","type":"text","value":"100"}],"pk":[{"name":"id","type":"integer"}]}]} No newline at end of file | |||
| {"commit_lsn":957398296,"records":[{"action":"I","xid":1058,"lsn":"0/3910B898","nextlsn":"","timestamp":"2023-08-22 14:44:02.043586-03","schema":"public","table":"t","columns":[{"name":"id","type":"integer","value":200232},{"name":"name","type":"text","value":"100"}],"pk":[{"name":"id","type":"integer"}]}]} | |||
There was a problem hiding this comment.
changed the first example to have the same schema for current test cases
| signature, err := b.sign(capnpTx) | ||
| if err != nil { | ||
| return fmt.Errorf("sign: %s", err) | ||
| if err := b.dbMngr.Replay(ctx, tx); err != nil { |
There was a problem hiding this comment.
local WAL replay before committing LSN
| return nil | ||
| } | ||
|
|
||
| func (b *BasinStreamer) sign(tx basincapnp.Tx) ([]byte, error) { |
There was a problem hiding this comment.
we wouldn't need sign and push because we can re use the file uploader's functions
internal/app/streamer.go
Outdated
|
|
||
| // Create sink table in local DB | ||
| if err := b.dbMngr.Setup(ctx); err != nil { | ||
| return fmt.Errorf("cannot setup db: %s", err) |
There was a problem hiding this comment.
creates and sets up a new local db before starting replication
| } | ||
|
|
||
| // queryFromWAL creates a query for a WAL TX records. | ||
| func (dbm *DBManager) queryFromWAL(tx *pgrepl.Tx) string { |
There was a problem hiding this comment.
pg -> ddb type remappings is a todo for a separate PR
| WHERE c.table_name = $1; | ||
| `, rel, | ||
| ) | ||
| columns, err := inspectTable(ctx, tx, rel) |
There was a problem hiding this comment.
extracted this logic to get table schema out into a new function
| } | ||
| defer rows.Close() | ||
|
|
||
| type column struct { |
There was a problem hiding this comment.
made the column public and moved to the app package since it needs to be shared with the local db
| func (dbm *DBManager) Replay(ctx context.Context, tx *pgrepl.Tx) error { | ||
| if dbm.windowPassed() { | ||
| slog.Info("replacing current db before replaying further txs") | ||
| if err := dbm.replace(ctx); err != nil { |
There was a problem hiding this comment.
If export or upload fails, the replicator will exit. the next time it starts it will first upload all existing db files and then start replicating on a fresh db
| createdAT time.Time | ||
| cols []Column | ||
| winSize time.Duration | ||
| uploader *BasinUploader |
There was a problem hiding this comment.
reuses file uploader to upload the exported files
| return strings.Join(queries, "\n") | ||
| } | ||
|
|
||
| func (dbm *DBManager) replace(ctx context.Context) error { |
There was a problem hiding this comment.
this is how the db is being replaced when the winSize exceeds
| return nil | ||
| } | ||
|
|
||
| func (bp *basinProviderMock) Upload( |
There was a problem hiding this comment.
the mock impl, makes a copy of the file that was uploaded. to test, we will import this file into a db and make assertions
d01db14 to
d7d0f6f
Compare
| } | ||
|
|
||
| // UploadAll uploads all db dumps in the db dir. | ||
| func (dbm *DBManager) UploadAll(ctx context.Context) error { |
There was a problem hiding this comment.
UploadAll is called when the replication starts to send any remaining data left.
| return fmt.Errorf("upload all: %s", err) | ||
| } | ||
|
|
||
| basinStreamer := app.NewBasinStreamer(ns, r, dbm) |
There was a problem hiding this comment.
Streamer doesn't bind to BasinProvider anymore because going forward, BasinProvider is only used for fetching and updating metadata. Uploader is used to upload files. DBManager is instantiated with an Uploader.
There was a problem hiding this comment.
Just a comment about an alternative more decoupled architecture. No need to change it.
One option would be to not have the Uploader as a dependency of DBManager. You could use a channel to synchronize both components and make them independent. Every time a file is ready to be uploaded, DBManager notifies the channel, Uploader picks up the notification and uploads the file. Notice that I'm not saying to make it async, it would still be blocking, but more decoupled and easier to test the parts independently.
| // BasinProvider ... | ||
| type BasinProvider interface { | ||
| Create(context.Context, string, string, basincapnp.Schema, common.Address) (bool, error) | ||
| Push(context.Context, string, string, basincapnp.Tx, []byte) error |
There was a problem hiding this comment.
removing Push from here as it will not be used now. I left it intact in the capnp definition maybe we can clean that up when we remove capnp otherwise we need to regenerate the files in this PR
| } | ||
|
|
||
| // Push pushes Postgres tx to the server. | ||
| func (bp *BasinProvider) Push(ctx context.Context, ns string, rel string, tx basincapnp.Tx, sig []byte) error { |
There was a problem hiding this comment.
Push will not be used anymore
brunocalza
left a comment
There was a problem hiding this comment.
Nice work! In general, it looks good and correct to me. Left some comments for your considerations. Nothing critical, just minor improvements from my perspective.
cmd/basin/publication.go
Outdated
|
|
||
| func newPublicationCreateCommand() *cli.Command { | ||
| var owner, dburi, provider string | ||
| var owner, dburi, provider, winSize string |
cmd/basin/publication.go
Outdated
| } | ||
|
|
||
| func mkDBDir(dir, pub string) error { | ||
| if err := os.Mkdir(path.Join(dir, pub), 0o755); err != nil { |
There was a problem hiding this comment.
Maybe better using MkdirAll so it ignores if the dir already exists?
MkdirAll creates a directory named path, along with any necessary parents, and returns nil, or else returns an error. The permission bits perm (before umask) are used for all directories that MkdirAll creates. If path is already a directory, MkdirAll does nothing and returns nil.
internal/app/db.go
Outdated
|
|
||
| cols := strings.Join(columnNames, ", ") | ||
| valsStr := strings.Join(vals, ", ") | ||
| query := fmt.Sprintf( |
There was a problem hiding this comment.
you don't have to create multiple insert stmts (one for each record), you can do all into one statement, e.g. INSERT INTO %s (%s) values (%s), (%s), (%s), ...
There was a problem hiding this comment.
nice! i didn't know that you could write an insert statement like that
internal/app/db.go
Outdated
| } | ||
|
|
||
| // Create a new db | ||
| db, err := dbm.NewDB() |
There was a problem hiding this comment.
this pattern is weird
db, err := dbm.NewDB()
...
dbm.db = db
you're returning a db and setting into in the dbm later. Why not simply do the dbm.db = db inside NewDB?
There was a problem hiding this comment.
also, if every time you call NewDB you have to call Setup, you can make Setup private and be called inside NewDb.
| return fmt.Errorf("upload all: %s", err) | ||
| } | ||
|
|
||
| basinStreamer := app.NewBasinStreamer(ns, r, dbm) |
There was a problem hiding this comment.
Just a comment about an alternative more decoupled architecture. No need to change it.
One option would be to not have the Uploader as a dependency of DBManager. You could use a channel to synchronize both components and make them independent. Every time a file is ready to be uploaded, DBManager notifies the channel, Uploader picks up the notification and uploads the file. Notice that I'm not saying to make it async, it would still be blocking, but more decoupled and easier to test the parts independently.
internal/app/db.go
Outdated
| return err | ||
| } | ||
|
|
||
| oldDBFname := dbm.dbFname |
There was a problem hiding this comment.
nit: simply do oldDBPath := path.Join(dbm.dbDir, dbm.dbFname) in L99
| return fmt.Errorf("cannot stat file: %s", err) | ||
| } | ||
|
|
||
| progress := progressbar.DefaultBytes( |
There was a problem hiding this comment.
not sure if the progress bar makes sense in a background routine, this is more useful for a user when executing a command
There was a problem hiding this comment.
can pass in nil to remove it but it looks okay to me since you can tell that the "replication is paused while this file uploads".
may we can change it later based on the user feedback
internal/app/streamer.go
Outdated
| func (b *BasinStreamer) Run(ctx context.Context) error { | ||
| txs, table, err := b.replicator.StartReplication(ctx) | ||
| // Open a local DB for replaying txs | ||
| db, err := b.dbMngr.NewDB() |
1dd5599 to
9ea6305
Compare
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
Signed-off-by: avichalp <hi@avichalp.me>
9ea6305 to
e1358d9
Compare
Summary
replaythe WAL records locally on a duck db instancereplication startcommand, all the existing db files are uploaded before we start replicating the target database to local duck db.DBManagerMaintains a time window. It is configurable. By default it is set to 3600 seconds. When the window is passed the current database is exported and uploaded to the provider.