diff --git a/examples/bert/bert_model.py b/examples/bert/bert_model.py
index 4569376aeb..3350fdc7dc 100644
--- a/examples/bert/bert_model.py
+++ b/examples/bert/bert_model.py
@@ -13,7 +13,11 @@
 # limitations under the License.
 """Bert model and layer implementations.
 
-We should work to replace this with components from the keras-nlp library.
+These components come from the tensorflow official model repository for BERT:
+https://github.com/tensorflow/models/tree/master/official/nlp/modeling
+
+This is to get us into a testable state. We should work to replace all of these
+components with components from the keras-nlp library.
 """
 
 import tensorflow as tf
diff --git a/examples/bert/create_pretraining_data.py b/examples/bert/create_pretraining_data.py
index 859503eac9..7bf07d37d9 100644
--- a/examples/bert/create_pretraining_data.py
+++ b/examples/bert/create_pretraining_data.py
@@ -24,6 +24,9 @@
 sentence pairs, as will the original paper. So a 20gb source of wikipedia and
 bookscorpus will result in a 400gb dataset.
 
+This script is adapted from the original BERT respository:
+https://github.com/google-research/bert/blob/master/create_pretraining_data.py
+
 Usage:
 python create_pretraining_data.py \
     --input_files ~/datasets/bert-sentence-split-data/shard_0.txt \