Skip to content
Lemmatizer text with php and the TreeTagger library
PHP
Branch: master
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Type Name Latest commit message Commit time
Failed to load latest commit information.
exemple
lib
.gitignore
LICENSE
README.md
composer.json

README.md

php-lemmatizer

Minimum PHP Version Latest Stable Version Total Downloads License

A simple lemmatizer tool based on TreeTagger for PHP.

Installation TreeTagger library

View TreeTagger WebSite

Install php-lemmatizer

You can install it with Composer:

composer require mbeurel/php-lemmatizer

Examples

Example scripts are available ina separate repository php-lemmatizer/example.

Sample Code

include "vendor/autoload.php";
use PhpTreeTagger\TreeTagger;
$treeTaggerPath = __DIR__."/treeTagger"; // Library TreeTagger path

try {

  // Init library
  $treeTagger = new TreeTagger("french", array(
      "treeTaggerPath"        =>  $treeTaggerPath,      // Path to TreeTagger Library
      "debug"                 =>  false,                // View Debug
      "wordUnique"            =>  true,                 // Keep only one occurrence of the word
      "wordRemoveAccent"      =>  true,                 // Remove all accent in word
      "nbProcess"             =>  $nbProcess            // Number of processes executed at the same time
    )
  );
  
  // Remove type in words
  $treeTagger->setCleanTypeWords(
    array(
      "PRO:PER",
      "DET:ART",
      "DET:POS",
      "SENT",
      "PRP"
    )
  );
  
  // Lemmatizer String or Array parameters, to array => ["La lemmatisation désigne un traitement lexical", "apporté à un texte en vue de son analyse"]
  $result = $treeTagger->lemmatizer("La lemmatisation désigne un traitement lexical apporté à un texte en vue de son analyse.");
  
  // View result : 
  var_dump($result);
  
  //  $result = array(
  //    0  =>  array(
  //      "value"     =>  "lemmatisation designer traitement lexical apporter texte vue analyse",
  //      "detail"    =>  array(
  //        1           =>  array(
  //          "source"    =>  "lemmatisation",
  //          "type"      =>  "NOM",
  //          "dest"      =>  "lemmatisation"
  //        ),
  //        2           =>  array(
  //          "source"    =>  "désigne",
  //          "type"      =>  "VER:pres",
  //          "dest"      =>  "désigner"
  //        ),
  //        4           =>  array(
  //          "source"    =>  "traitement",
  //          "type"      =>  "NOM",
  //          "dest"      =>  "traitement"
  //        ),
  //        6           =>  array(
  //          "source"    =>  "apporté",
  //          "type"      =>  "VER:pper",
  //          "dest"      =>  "apporter"
  //        ),
  //        7           =>  array(
  //          "source"    =>  "à",
  //          "type"      =>  "PRP",
  //          "dest"      =>  "à"
  //        ),
  //        9           =>  array(
  //          "source"    =>  "texte",
  //          "type"      =>  "NOM",
  //          "dest"      =>  "texte"
  //        ),
  //        10          =>  array(
  //          "source"    =>  "en",
  //          "type"      =>  "PRP",
  //          "dest"      =>  "en"
  //        ),
  //        11          =>  array(
  //          "source"    =>  "vue",
  //          "type"      =>  "NOM",
  //          "dest"      =>  "vue"
  //        ),
  //        12          =>  array(
  //          "source"    =>  "de",
  //          "type"      =>  "PRP",
  //          "dest"      =>  "de"
  //        ),
  //        13          =>  array(
  //          "source"    =>  "son",
  //          "type"      =>  "DET:POS",
  //          "dest"      =>  "son"
  //        ),
  //        14          =>  array(
  //          "source"    =>  "analyse",
  //          "type"      =>  "NOM",
  //          "dest"      =>  "analyse"
  //        ),
  //        15          =>  array(
  //          "source"    =>  ".",
  //          "type"      =>  "SENT",
  //          "dest"      =>  "."
  //        )
  //      }
  //    }
  //  }
} catch(\Exception $e) {
  echo $e;
}

Credits

Created by Matthieu Beurel. Sponsored by Nexboard.

You can’t perform that action at this time.