talent-plan · Connor1996 · Feb 27, 2020 · Feb 27, 2020
diff --git a/kv/tikv/server.go b/kv/tikv/server.go
@@ -130,8 +130,11 @@ func (server *Server) RawScan(_ context.Context, req *kvrpcpb.RawScanRequest) (*
 
 	reader, err := server.innerServer.Reader(req.Context)
 	if !rawRegionError(err, response) {
+		// To scan, we need to get an iterator for the underlying storage.
 		it := reader.IterCF(req.Cf)
 		defer it.Close()
+		// Initialize the iterator. Termination condition is that the iterator is still valid (i.e.
+		// we have not reached the end of the DB) and we haven't exceeded the client-specified limit.
 		for it.Seek(req.StartKey); it.Valid() && len(response.Kvs) < int(req.Limit); it.Next() {
 			item := it.Item()
 			key := item.KeyCopy(nil)

diff --git a/kv/tikv/transaction/doc.go b/kv/tikv/transaction/doc.go
@@ -9,14 +9,42 @@ package transaction
 // Note that there are two kinds of transactions in play: TinySQL transactions are collaborative between TinyKV and its
 // client (e.g., TinySQL). They are implemented using multiple TinyKV commands and ensure that multiple SQL commands can
 // be executed atomically. There are also mvcc transactions which are an implementation detail of this
-// layer in TinyKV (represented by Txn in tikv/storage/exec/transaction.go). These ensure that a *single* TinyKV command
+// layer in TinyKV (represented by MvccTxn in tikv/transaction/mvcc/transaction.go). These ensure that a *single* TinySQL command
 // is executed atomically.
 //
 // *Locks* are used to implement TinySQL transactions. Setting or checking a lock in a TinySQL transaction is lowered to
-// writing or reading a key and value in the InnerServer store. TODO explain this encoding in detail.
+// writing or reading a key and value in the InnerServer store.
 //
 // *Latches* are used to implement mvcc transactions and are not visible to the client. They are stored outside the
-// underlying storage (or equivalently, you can think of every key having its own latch). TODO explain latching in more detail.
+// underlying storage (or equivalently, you can think of every key having its own latch). See the latches package for details.
 //
-// Within this package, `commands` contains code to lower TinySQL requests to mvcc transactions. `mvcc` contains code for interacting with the underlying storage
-// (InnerServer).
+// Within this package, `commands` contains code to lower TinySQL requests to mvcc transactions. `mvcc` contains code for
+// interacting with the underlying storage (InnerServer).
+//
+// Each transactional command is represented by a type which implements the `Command` interface and is defined in `commands`.
+// See the `Command` docs for details on how a command is executed. The gRPC layer will handle each request on its own thread.
+// We execute the command to completion on its own thread, relying on latches for thread safety. In TiKV there is a scheduler
+// to optimise execution.
+//
+// Within the `mvcc` package, `Lock` and `Write` provide abstractions for lowering locks and writes into simple keys and values.
+// `Scanner` is an abstraction for implementing the the transactional scan command - this is complicated because we must scan
+// as if we were iterating over user key/values, rather than the encoding of these key/values which are stored in the DB.
+//
+// ## Encoding user key/values
+//
+// The mvcc strategy is essentially to store all data (committed and uncommitted) at every point in time. So for example, if we store
+// a value for a key, then store another value (a logical overwrite) at a later time, both values are preserved in the underlying
+// storage.
+//
+// This is implemented by encoding user keys with their timestamps (the starting timestamp of the transaction in which they are
+// written) to make an encoded key (see codec.go). The `default` CF is a mapping from encoded keys to their values.
+//
+// Locking a key means writing into the `lock` CF. In this CF, we use the user key (i.e., not the encoded key so that a key is locked
+// for all timestamps). The value in the `lock` CF consists of the 'primary key' for the transaction, the kind of lock (for 'put',
+// 'delete', or 'rollback'), the start timestamp of the transaction, and the lock's ttl (time to live). See lock.go for the
+// implementation.
+//
+// The status of values is stored in the `write` CF. Here we map keys encoded with their commit timestamps (i.e., the time at which a
+// a transaction is committed) to a value containing the transaction's starting timestamp, and the kind of write ('put', 'delete', or
+// 'rollback'). Note that for transactions which are rolled back, the start timestamp is used for the commit timestamp in the encoded
+// key.
diff --git a/kv/tikv/transaction/latches/latches.go b/kv/tikv/transaction/latches/latches.go
@@ -6,6 +6,19 @@ import (
 	"github.com/pingcap-incubator/tinykv/kv/tikv/transaction/mvcc"
 )
 
+// Latching provides atomicity of TinyKV commands. This should not be confused with SQL transactions which provide atomicity
+// for multiple TinyKV commands. For example, consider two commit commands, these write to multiple keys/CFs so if they race,
+// then it is possible for inconsistent data to be written. By latching the keys each command might write, we ensure that the
+// two commands will not race to write the same keys.
+//
+// A latch is a per-key lock. There is only one latch per user key, not one per CF or one for each encoded key. Latches are
+// only needed for writing. Only one thread can hold a latch at a time and all keys that a command might write must be locked
+// at once.
+//
+// Latching is implemented using a single map which maps keys to a Go WaitGroup. Access to this map is guarded by a mutex
+// to ensure that latching is atomic and consistent. Since the mutex is a global lock, it would cause intolerable contention
+// in a real system.
+
 type Latches struct {
 	// Before modifying any property of a key, the thread must have the latch for that key. `Latches` maps each latched
 	// key to a WaitGroup. Threads who find a key locked should wait on that WaitGroup.

diff --git a/kv/tikv/transaction/mvcc/scanner.go b/kv/tikv/transaction/mvcc/scanner.go
@@ -7,7 +7,7 @@ import (
 
 // Scanner is used for reading multiple sequential key/value pairs from the storage layer. It is aware of the implementation
 // of the storage layer and returns results suitable for users.
-// Invariant: either the scanner is finished and can not be used, or it is ready to return a value immediately.
+// Invariant: either the scanner is finished and cannot be used, or it is ready to return a value immediately.
 type Scanner struct {
 	writeIter engine_util.DBIterator
 	txn       *RoTxn