tensorflow · copybara-service · Jul 11, 2020
diff --git a/tensorflow_lite_support/metadata/cc/metadata_version.cc b/tensorflow_lite_support/metadata/cc/metadata_version.cc
@@ -44,6 +44,7 @@ enum class SchemaMembers {
   kProcessUnitOptionsSentencePieceTokenizerOptions = 4,
   kSubGraphMetadataInputTensorGroups = 5,
   kSubGraphMetadataOutputTensorGroups = 6,
+  kProcessUnitOptionsRegexTokenizerOptions = 7,
 };
 
 // Helper class to compare semantic versions in terms of three integers, major,
@@ -104,6 +105,8 @@ Version GetMemberVersion(SchemaMembers member) {
       return Version(1, 2, 0);
     case SchemaMembers::kSubGraphMetadataOutputTensorGroups:
       return Version(1, 2, 0);
+    case SchemaMembers::kProcessUnitOptionsRegexTokenizerOptions:
+      return Version(1, 2, 1);
     default:
       // Should never happen.
       TFLITE_LOG(FATAL) << "Unsupported schema member: "
@@ -165,6 +168,12 @@ void UpdateMinimumVersionForTable<tflite::ProcessUnit>(
             SchemaMembers::kProcessUnitOptionsSentencePieceTokenizerOptions),
         min_version);
   }
+  if (process_unit_type == ProcessUnitOptions_RegexTokenizerOptions) {
+    UpdateMinimumVersion(
+        GetMemberVersion(
+            SchemaMembers::kProcessUnitOptionsRegexTokenizerOptions),
+        min_version);
+  }
 }
 
 template <>

diff --git a/...e_support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataParser.java b/...e_support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataParser.java
@@ -21,7 +21,7 @@ public final class MetadataParser {
    * The version of the metadata parser that this metadata extractor library is depending on. The
    * value should match the value of "Schema Semantic version" in metadata_schema.fbs.
    */
-  public static final String VERSION = "1.2.0";
+  public static final String VERSION = "1.2.1";
 
   private MetadataParser() {}
 }
diff --git a/tensorflow_lite_support/metadata/metadata_schema.fbs b/tensorflow_lite_support/metadata/metadata_schema.fbs
@@ -50,7 +50,7 @@ namespace tflite;
 // for which they were added.
 //
 // LINT.IfChange
-// Schema Semantic version: 1.2.0
+// Schema Semantic version: 1.2.1
 // LINT.ThenChange(//tensorflow_lite_support/\
 //     metadata/java/src/java/org/tensorflow/lite/support/metadata/\
 //     MetadataParser.java)
@@ -68,6 +68,7 @@ file_identifier "M001";
 //         Added output_process_units to SubGraphMetadata.
 // 1.2.0 - Added input_tensor_group to SubGraphMetadata.
 //         Added output_tensor_group to SubGraphMetadata.
+// 1.2.1 - Added RegexTokenizerOptions to ProcessUnitOptions.
 
 // File extension of any written files.
 file_extension "tflitemeta";
@@ -448,6 +449,19 @@ table SentencePieceTokenizerOptions {
   vocab_file:[AssociatedFile];
 }
 
+// Splits strings by the occurrences of pattern and converts the tokens into
+// ids. For example, given
+//   pattern: "\W+",
+//   string: "Words, words, words.",
+// the tokens after split are: "Words", "words", "words", "".
+// And then the tokens can be converted into ids according to the vocab_file.
+// Added in: 1.2.1
+table RegexTokenizerOptions {
+  pattern:string;
+  // The vocabulary files used to convert this tokens into ids.
+  vocab_file:[AssociatedFile];
+}
+
 // Options that are used when processing the tensor.
 union ProcessUnitOptions {
   NormalizationOptions,
@@ -456,7 +470,9 @@ union ProcessUnitOptions {
   // Added in: 1.1.0
   BertTokenizerOptions,
   // Added in: 1.1.0
-  SentencePieceTokenizerOptions
+  SentencePieceTokenizerOptions,
+  // Added in: 1.2.1
+  RegexTokenizerOptions
 }
 
 // A process unit that is used to process the tensor out-of-graph.