Skip to content
This repository
Browse code

API-CHANGE: i18nTextCollector can now extract the new translatable en…

…tities (<%t) from templates and populate them in language tables (uses PEG parser)
  • Loading branch information...
commit c314d0b6595f703f5391ee88ab6a9f265f443ff5 1 parent f926242
Julian Seidenberg authored April 16, 2012
224  i18n/i18nTextCollector.php
@@ -195,70 +195,134 @@ protected function processModule($module) {
195 195
 	}
196 196
 	
197 197
 	public function collectFromCode($content, $module) {
198  
-		$entities = array();
  198
+		$entitiesArr = array();
  199
+
  200
+		$newRegexRule = '/_t\s*\(\s*(.+?)\s*\)\s*;\s*/is';
  201
+
  202
+		$matchesArray = array();    //array for the matches to go into
  203
+		if (preg_match_all($newRegexRule, $content, $matchesArray) > 0) {   //we have at least one match
  204
+
  205
+			//take all the matched _t entities
  206
+			foreach($matchesArray[1] as $match) {
  207
+				//replace all commas with backticks (unique character to explode on later)
  208
+				$replacedMatch = preg_replace('/("|\'|_LOW|_MEDIUM|_HIGH)\s*,\s*([\'"]|"|\'|array|PR)/','$1`$2',$match);  //keep array text
  209
+
  210
+				//$replacedMatch = trim($replacedMatch," \"'\n");  //remove starting and ending quotes
  211
+				$replacedMatch = trim($replacedMatch," \n");  //remove starting and ending spaces and newlines
199 212
 
200  
-		$tokens = token_get_all("<?php\n" . $content);
201  
-		$inTransFn = false;
202  
-		$inConcat = false;
203  
-		$currentEntity = array();
204  
-		foreach($tokens as $token) {
205  
-			if(is_array($token)) {
206  
-				list($id, $text) = $token;
207  
-				if($id == T_STRING && $text == '_t') {
208  
-					// start definition
209  
-					$inTransFn = true;
210  
-				} elseif($inTransFn && $id == T_VARIABLE) {
211  
-					// Dynamic definition from provideEntities - skip
212  
-					$inTransFn = false;
213  
-					$inConcat = false;
214  
-					$currentEntity = array();
215  
-				} elseif($inTransFn && $id == T_CONSTANT_ENCAPSED_STRING) {
216  
-					// Fixed quoting escapes, and remove leading/trailing quotes
217  
-					if(preg_match('/^\'/', $text)) {
218  
-						$text = str_replace("\'", "'", $text);
219  
-						$text = preg_replace('/^\'/', '', $text);
220  
-						$text = preg_replace('/\'$/', '', $text);
221  
-					} else {
222  
-						$text = str_replace('\"', '"', $text);
223  
-						$text = preg_replace('/^"/', '', $text);
224  
-						$text = preg_replace('/"$/', '', $text);
  213
+				$parts = explode('`',$replacedMatch);   //cut up the _t call
  214
+
  215
+				$partsWOQuotes = array();
  216
+				foreach($parts as $part) {
  217
+					$part = trim($part,"\n");  //remove spaces and newlines from part
  218
+
  219
+					$firstChar = substr($part,0,1);
  220
+					if ($firstChar == "'" || $firstChar == '"') {
  221
+						//remove wrapping quotes
  222
+						$part = substr($part,1,-1);
  223
+
  224
+						//remove inner concatenation
  225
+						$part = preg_replace("/$firstChar\\s*\\.\\s*$firstChar/",'',$part);
225 226
 					}
226  
-					
227  
-					if($inConcat) {
228  
-						$currentEntity[count($currentEntity)-1] .= $text;
229  
-					} else {
230  
-						$currentEntity[] = $text;
231  
-					} 
232  
-				} 
233  
-			} elseif($inTransFn && $token == '.') {
234  
-				$inConcat = true;	
235  
-			} elseif($inTransFn && $token == ',') {
236  
-				$inConcat = false;	
237  
-			} elseif($inTransFn && $token == ')') {
238  
-				// finalize definition
239  
-				$inTransFn = false;
240  
-				$inConcat = false;
241  
-				$entity = array_shift($currentEntity);
242  
-				$entities[$entity] = $currentEntity;
243  
-				$currentEntity = array();
  227
+
  228
+					$partsWOQuotes[] = $part;  //remove starting and ending quotes from inner parts
  229
+				}
  230
+
  231
+				if ($parts && count($partsWOQuotes) > 0) {
  232
+
  233
+					$entitiesArr = array_merge($entitiesArr, (array)$this->entitySpecFromNewParts($partsWOQuotes));
  234
+				}
244 235
 			}
245 236
 		}
246  
-		
247  
-		foreach($entities as $entity => $spec) {
248  
-			// call without master language definition
249  
-			if(!$spec) {
250  
-				unset($entities[$entity]);
251  
-				continue; 
  237
+
  238
+		ksort($entitiesArr);
  239
+
  240
+		return $entitiesArr;
  241
+	}
  242
+
  243
+
  244
+	/**
  245
+	 * Test if one string starts with another
  246
+	 */
  247
+	protected function startsWith($haystack, $needle) {
  248
+        $length = strlen($needle);
  249
+        return (substr($haystack, 0, $length) === $needle);
  250
+	}
  251
+
  252
+	/**
  253
+	 * Converts a parts array from explode function into an array of entities for the i18n text collector
  254
+	 * @return array
  255
+	 */
  256
+	protected function entitySpecFromNewParts($parts, $namespace = null) {
  257
+		// first thing in the parts array will always be the entity
  258
+		// split fullname into entity parts
  259
+		//set defaults
  260
+		$value = "";
  261
+		$prio = null;
  262
+		$comment = null;
  263
+
  264
+		$entityParts = explode('.', $parts[0]);
  265
+		if(count($entityParts) > 1) {
  266
+			// templates don't have a custom namespace
  267
+			$entity = array_pop($entityParts);
  268
+			// namespace might contain dots, so we explode
  269
+			$namespace = implode('.',$entityParts);
  270
+		} else {
  271
+			$entity = array_pop($entityParts);
  272
+			$namespace = $namespace;
  273
+		}
  274
+
  275
+		//find the array (if found, then we are dealing with the new _t syntax
  276
+		$newSyntax = false;
  277
+		$offset = 0;
  278
+		foreach($parts as $p) {
  279
+			if ($this->startsWith($p,'array')) {    //remove everything after (and including) the array
  280
+				$newSyntax = true;
  281
+				$parts = array_splice($parts,0,$offset);
  282
+				break;
252 283
 			}
  284
+			$offset++;
  285
+		}
253 286
 
254  
-			unset($entities[$entity]);
255  
-			$entities[$this->normalizeEntity($entity, $module)] = $spec;
  287
+		//2nd part of array is always "string"
  288
+		if (isset($parts[1])) $value = $parts[1];
  289
+
  290
+
  291
+		//3rd part can either be priority or context, if old or now syntax is used
  292
+		if (isset($parts[2])) {
  293
+			if ($newSyntax) {
  294
+				$prio = 40; //default priority
  295
+				$comment = $parts[2];
  296
+			} else {
  297
+				if (stripos($parts[2], 'PR_LOW') !== false ||
  298
+				    stripos($parts[2], 'PR_MEDIUM') !== false ||
  299
+				    stripos($parts[2], 'PR_HIGH') !== false) {  //definitely old syntax
  300
+					$prio = $parts[2];
  301
+				} else {    //default to new syntax (3rd position is comment/context
  302
+					$prio = 40; //default priority
  303
+					$comment = $parts[2];
  304
+				}
  305
+			}
256 306
 		}
257  
-		ksort($entities);
258  
-		
259  
-		return $entities;
  307
+
  308
+		//if 4th position is set then this is old syntax and it is the context
  309
+		//it would be array in the new syntax and therefore should have already been spliced off
  310
+		if (isset($parts[3])) {
  311
+			$comment = $parts[3];
  312
+			$prio = $parts[2];  //3rd position is now definitely priority
  313
+		}
  314
+
  315
+		return array(
  316
+			"{$namespace}.{$entity}" => array(
  317
+				$value,
  318
+				$prio,
  319
+				$comment
  320
+			)
  321
+		);
260 322
 	}
261 323
 
  324
+
  325
+
262 326
 	public function collectFromTemplate($content, $fileName, $module) {
263 327
 		$entities = array();
264 328
 		
@@ -276,6 +340,11 @@ public function collectFromTemplate($content, $fileName, $module) {
276 340
 			// @todo Will get massively confused if you include the includer -> infinite loop
277 341
 		}
278 342
 
  343
+		// use parser to extract <%t style translatable entities
  344
+		$translatables = i18nTextCollector_Parser::GetTranslatables($content);
  345
+		$entities = array_merge($entities,(array)$translatables);
  346
+
  347
+		// use the old method of getting _t() style translatable entities
279 348
 		// Collect in actual template
280 349
 		if(preg_match_all('/<%\s*(_t\(.*)%>/ms', $content, $matches)) {
281 350
 			foreach($matches as $match) {
@@ -516,4 +585,49 @@ public function getYaml($entities, $locale) {
516 585
 		// TODO Dumper can't handle YAML comments, so the context information is currently discarded
517 586
 		return $yamlHandler->dump(array($locale => $entitiesNested), 99);
518 587
 	}
  588
+}
  589
+
  590
+/**
  591
+ * Parser that scans through a template and extracts the parameters to the _t and <%t calls
  592
+ */
  593
+class i18nTextCollector_Parser extends SSTemplateParser {
  594
+
  595
+	static $entities = array();
  596
+	static $currentEntity = array();
  597
+
  598
+	function Translate__construct(&$res) {
  599
+		self::$currentEntity = array(null,null,null); //start with empty array
  600
+	}
  601
+
  602
+	function Translate_Entity(&$res, $sub) {
  603
+		self::$currentEntity[0] = $sub['text']; //entity
  604
+	}
  605
+
  606
+	function Translate_Default(&$res, $sub) {
  607
+		self::$currentEntity[1] = $sub['String']['text']; //value
  608
+	}
  609
+
  610
+	function Translate_Context(&$res, $sub) {
  611
+		self::$currentEntity[2] = $sub['String']['text']; //comment
  612
+	}
  613
+
  614
+	function Translate__finalise(&$res) {
  615
+		// set the entity name and the value (default), as well as the context (comment)
  616
+		// priority is no longer used, so that is blank
  617
+		self::$entities[self::$currentEntity[0]] = array(self::$currentEntity[1],null,self::$currentEntity[2]);
  618
+	}
  619
+
  620
+	/**
  621
+	 * Parses a template and returns any translatable entities
  622
+	 */
  623
+	static function GetTranslatables($template) {
  624
+		self::$entities = array();
  625
+
  626
+		// Run the parser and throw away the result
  627
+		$parser = new i18nTextCollector_Parser($template);
  628
+		if(substr($template, 0,3) == pack("CCC", 0xef, 0xbb, 0xbf)) $parser->pos = 3;
  629
+		$parser->match_TopTemplate();
  630
+
  631
+		return self::$entities;
  632
+	}
519 633
 }
62  tests/i18n/i18nTextCollectorTest.php
@@ -58,7 +58,7 @@ function testConcatenationInEntityValues() {
58 58
 
59 59
 _t(
60 60
 'Test.CONCATENATED2',
61  
-"Line \"4\" and " . 
  61
+"Line "4" and " .
62 62
 "Line 5");
63 63
 PHP;
64 64
 		$this->assertEquals(
@@ -68,7 +68,36 @@ function testConcatenationInEntityValues() {
68 68
 				'Test.CONCATENATED2' => array("Line \"4\" and Line 5")
69 69
 			)
70 70
 		);
71  
-	}	
  71
+	}
  72
+
  73
+	function testCollectFromNewTemplateSyntaxUsingParserSubclass() {
  74
+			$c = new i18nTextCollector();
  75
+
  76
+			$html = <<<SS
  77
+			<% _t('Test.SINGLEQUOTE','Single Quote'); %>
  78
+<%t i18nTestModule.NEWMETHODSIG "New _t method signature test" %>
  79
+<%t i18nTestModule.INJECTIONS_0 "Hello {name} {greeting}. But it is late, {goodbye}" name="Mark" greeting="welcome" goodbye="bye" %>
  80
+<%t i18nTestModule.INJECTIONS_1 "Hello {name} {greeting}. But it is late, {goodbye}" name="Paul" greeting="good you are here" goodbye="see you" %>
  81
+<%t i18nTestModule.INJECTIONS_2 "Hello {name} {greeting}. But it is late, {goodbye}" is "New context (this should be ignored)" name="Steffen" greeting="willkommen" goodbye="wiedersehen" %>
  82
+<%t i18nTestModule.INJECTIONS_3 name="Cat" greeting='meow' goodbye="meow" %>
  83
+<%t i18nTestModule.INJECTIONS_4 name=\$absoluteBaseURL greeting=\$get_locale goodbye="global calls" %>
  84
+SS;
  85
+		$c->collectFromTemplate($html, 'mymodule', 'Test');
  86
+
  87
+		$this->assertEquals(
  88
+			$c->collectFromTemplate($html, 'mymodule', 'Test'),
  89
+			array(
  90
+				'Test.SINGLEQUOTE' => array('Single Quote',null,null),
  91
+				'i18nTestModule.NEWMETHODSIG' => array("New _t method signature test",null,null),
  92
+				'i18nTestModule.INJECTIONS_0' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, null),
  93
+				'i18nTestModule.INJECTIONS_1' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, null),
  94
+				'i18nTestModule.INJECTIONS_2' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, "New context (this should be ignored)"),
  95
+				'i18nTestModule.INJECTIONS_3' => array(null, null, null),
  96
+				'i18nTestModule.INJECTIONS_4' => array(null, null, null),
  97
+			)
  98
+		);
  99
+	}
  100
+
72 101
 	function testCollectFromTemplateSimple() {
73 102
 		$c = new i18nTextCollector();
74 103
 
@@ -283,6 +312,35 @@ function testNewlinesInEntityValues() {
283 312
 	}
284 313
 
285 314
 	/**
  315
+	 * Test extracting entities from the new _t method signature
  316
+	 */
  317
+	function testCollectFromCodeNewSignature() {
  318
+		$c = new i18nTextCollector();
  319
+
  320
+		$php = <<<PHP
  321
+_t('i18nTestModule.NEWMETHODSIG',"New _t method signature test");
  322
+_t('i18nTestModule.INJECTIONS1','_DOES_NOT_EXIST', "Hello {name} {greeting}. But it is late, {goodbye}", array("name"=>"Mark", "greeting"=>"welcome", "goodbye"=>"bye"));
  323
+_t('i18nTestModule.INJECTIONS2', "Hello {name} {greeting}. But it is late, {goodbye}", array("name"=>"Paul", "greeting"=>"good you are here", "goodbye"=>"see you"));
  324
+_t("i18nTestModule.INJECTIONS3", "Hello {name} {greeting}. But it is late, {goodbye}", "New context (this should be ignored)", array("name"=>"Steffen", "greeting"=>"willkommen", "goodbye"=>"wiedersehen"));
  325
+_t('i18nTestModule.INJECTIONS4', array("name"=>"Cat", "greeting"=>"meow", "goodbye"=>"meow"));
  326
+PHP;
  327
+
  328
+		$collectedTranslatables = $c->collectFromCode($php, 'mymodule');
  329
+
  330
+		$expectedArray = (array(
  331
+			'i18nTestModule.NEWMETHODSIG' => array("New _t method signature test", null, null),
  332
+			'i18nTestModule.INJECTIONS1' => array("_DOES_NOT_EXIST", 40, "Hello {name} {greeting}. But it is late, {goodbye}"),
  333
+			'i18nTestModule.INJECTIONS2' => array("Hello {name} {greeting}. But it is late, {goodbye}", null, null),
  334
+			'i18nTestModule.INJECTIONS3' => array("Hello {name} {greeting}. But it is late, {goodbye}", 40, "New context (this should be ignored)"),
  335
+			'i18nTestModule.INJECTIONS4' => array(null, null, null),
  336
+		));
  337
+
  338
+		ksort($expectedArray);
  339
+
  340
+		$this->assertEquals($collectedTranslatables, $expectedArray);
  341
+	}
  342
+
  343
+	/**
286 344
 	 * Input for langArrayCodeForEntitySpec() should be suitable for insertion
287 345
 	 * into single-quoted strings, so needs to be escaped already.
288 346
 	 */

0 notes on commit c314d0b

Please sign in to comment.
Something went wrong with that request. Please try again.