Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

621 lines (501 sloc) 18.151 kb
<?php
/*
+-----------------------------------------------------------------------+
| program/include/rcube_spellchecker.php |
| |
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2011, Kolab Systems AG |
| Copyright (C) 2008-2011, The Roundcube Dev Team |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Spellchecking using different backends |
| |
+-----------------------------------------------------------------------+
| Author: Aleksander Machniak <machniak@kolabsys.com> |
| Author: Thomas Bruederli <roundcube@gmail.com> |
+-----------------------------------------------------------------------+
*/
/**
* Helper class for spellchecking with Googielspell and PSpell support.
*
* @package Core
*/
class rcube_spellchecker
{
private $matches = array();
private $engine;
private $lang;
private $rc;
private $error;
private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/';
private $options = array();
private $dict;
private $have_dict;
// default settings
const GOOGLE_HOST = 'ssl://www.google.com';
const GOOGLE_PORT = 443;
const MAX_SUGGESTIONS = 10;
/**
* Constructor
*
* @param string $lang Language code
*/
function __construct($lang = 'en')
{
$this->rc = rcube::get_instance();
$this->engine = $this->rc->config->get('spellcheck_engine', 'googie');
$this->lang = $lang ? $lang : 'en';
$this->options = array(
'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'),
'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'),
'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'),
'dictionary' => $this->rc->config->get('spellcheck_dictionary'),
);
}
/**
* Set content and check spelling
*
* @param string $text Text content for spellchecking
* @param bool $is_html Enables HTML-to-Text conversion
*
* @return bool True when no mispelling found, otherwise false
*/
function check($text, $is_html = false)
{
// convert to plain text
if ($is_html) {
$this->content = $this->html2text($text);
}
else {
$this->content = $text;
}
if ($this->engine == 'pspell') {
$this->matches = $this->_pspell_check($this->content);
}
else {
$this->matches = $this->_googie_check($this->content);
}
return $this->found() == 0;
}
/**
* Number of mispellings found (after check)
*
* @return int Number of mispellings
*/
function found()
{
return count($this->matches);
}
/**
* Returns suggestions for the specified word
*
* @param string $word The word
*
* @return array Suggestions list
*/
function get_suggestions($word)
{
if ($this->engine == 'pspell') {
return $this->_pspell_suggestions($word);
}
return $this->_googie_suggestions($word);
}
/**
* Returns misspelled words
*
* @param string $text The content for spellchecking. If empty content
* used for check() method will be used.
*
* @return array List of misspelled words
*/
function get_words($text = null, $is_html=false)
{
if ($this->engine == 'pspell') {
return $this->_pspell_words($text, $is_html);
}
return $this->_googie_words($text, $is_html);
}
/**
* Returns checking result in XML (Googiespell) format
*
* @return string XML content
*/
function get_xml()
{
// send output
$out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
foreach ($this->matches as $item) {
$out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
$out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
$out .= '</c>';
}
$out .= '</spellresult>';
return $out;
}
/**
* Returns checking result (misspelled words with suggestions)
*
* @return array Spellchecking result. An array indexed by word.
*/
function get()
{
$result = array();
foreach ($this->matches as $item) {
if ($this->engine == 'pspell') {
$word = $item[0];
}
else {
$word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET);
}
$result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
}
return $result;
}
/**
* Returns error message
*
* @return string Error message
*/
function error()
{
return $this->error;
}
/**
* Checks the text using pspell
*
* @param string $text Text content for spellchecking
*/
private function _pspell_check($text)
{
// init spellchecker
$this->_pspell_init();
if (!$this->plink) {
return array();
}
// tokenize
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
$diff = 0;
$matches = array();
foreach ($text as $w) {
$word = trim($w[0]);
$pos = $w[1] - $diff;
$len = mb_strlen($word);
// skip exceptions
if ($this->is_exception($word)) {
}
else if (!pspell_check($this->plink, $word)) {
$suggestions = pspell_suggest($this->plink, $word);
if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
}
$matches[] = array($word, $pos, $len, null, $suggestions);
}
$diff += (strlen($word) - $len);
}
return $matches;
}
/**
* Returns the misspelled words
*/
private function _pspell_words($text = null, $is_html=false)
{
$result = array();
if ($text) {
// init spellchecker
$this->_pspell_init();
if (!$this->plink) {
return array();
}
// With PSpell we don't need to get suggestions to return misspelled words
if ($is_html) {
$text = $this->html2text($text);
}
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
foreach ($text as $w) {
$word = trim($w[0]);
// skip exceptions
if ($this->is_exception($word)) {
continue;
}
if (!pspell_check($this->plink, $word)) {
$result[] = $word;
}
}
return $result;
}
foreach ($this->matches as $m) {
$result[] = $m[0];
}
return $result;
}
/**
* Returns suggestions for misspelled word
*/
private function _pspell_suggestions($word)
{
// init spellchecker
$this->_pspell_init();
if (!$this->plink) {
return array();
}
$suggestions = pspell_suggest($this->plink, $word);
if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
return is_array($suggestions) ? $suggestions : array();
}
/**
* Initializes PSpell dictionary
*/
private function _pspell_init()
{
if (!$this->plink) {
if (!extension_loaded('pspell')) {
$this->error = "Pspell extension not available";
rcube::raise_error(array(
'code' => 500, 'type' => 'php',
'file' => __FILE__, 'line' => __LINE__,
'message' => $this->error), true, false);
return;
}
$this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST);
}
if (!$this->plink) {
$this->error = "Unable to load Pspell engine for selected language";
}
}
private function _googie_check($text)
{
// spell check uri is configured
$url = $this->rc->config->get('spellcheck_uri');
if ($url) {
$a_uri = parse_url($url);
$ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
$port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
$host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
$path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
}
else {
$host = self::GOOGLE_HOST;
$port = self::GOOGLE_PORT;
$path = '/tbproxy/spell?lang=' . $this->lang;
}
// Google has some problem with spaces, use \n instead
$gtext = str_replace(' ', "\n", $text);
$gtext = '<?xml version="1.0" encoding="utf-8" ?>'
.'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
.'<text>' . $gtext . '</text>'
.'</spellrequest>';
$store = '';
if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
$out = "POST $path HTTP/1.0\r\n";
$out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
$out .= "Content-Length: " . strlen($gtext) . "\r\n";
$out .= "Content-Type: application/x-www-form-urlencoded\r\n";
$out .= "Connection: Close\r\n\r\n";
$out .= $gtext;
fwrite($fp, $out);
while (!feof($fp))
$store .= fgets($fp, 128);
fclose($fp);
}
if (!$store) {
$this->error = "Empty result from spelling engine";
}
preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
// skip exceptions (if appropriate options are enabled)
if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums'])
|| !empty($this->options['ignore_caps']) || !empty($this->options['dictionary'])
) {
foreach ($matches as $idx => $m) {
$word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
// skip exceptions
if ($this->is_exception($word)) {
unset($matches[$idx]);
}
}
}
return $matches;
}
private function _googie_words($text = null, $is_html=false)
{
if ($text) {
if ($is_html) {
$text = $this->html2text($text);
}
$matches = $this->_googie_check($text);
}
else {
$matches = $this->matches;
$text = $this->content;
}
$result = array();
foreach ($matches as $m) {
$result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
}
return $result;
}
private function _googie_suggestions($word)
{
if ($word) {
$matches = $this->_googie_check($word);
}
else {
$matches = $this->matches;
}
if ($matches[0][4]) {
$suggestions = explode("\t", $matches[0][4]);
if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
$suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);
}
return $suggestions;
}
return array();
}
private function html2text($text)
{
$h2t = new html2text($text, false, true, 0);
return $h2t->get_text();
}
/**
* Check if the specified word is an exception accoring to
* spellcheck options.
*
* @param string $word The word
*
* @return bool True if the word is an exception, False otherwise
*/
public function is_exception($word)
{
// Contain only symbols (e.g. "+9,0", "2:2")
if (!$word || preg_match('/^[0-9@#$%^&_+~*=:;?!,.-]+$/', $word))
return true;
// Contain symbols (e.g. "g@@gle"), all symbols excluding separators
if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word))
return true;
// Contain numbers (e.g. "g00g13")
if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word))
return true;
// Blocked caps (e.g. "GOOGLE")
if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word))
return true;
// Use exceptions from dictionary
if (!empty($this->options['dictionary'])) {
$this->load_dict();
// @TODO: should dictionary be case-insensitive?
if (!empty($this->dict) && in_array($word, $this->dict))
return true;
}
return false;
}
/**
* Add a word to dictionary
*
* @param string $word The word to add
*/
public function add_word($word)
{
$this->load_dict();
foreach (explode(' ', $word) as $word) {
// sanity check
if (strlen($word) < 512) {
$this->dict[] = $word;
$valid = true;
}
}
if ($valid) {
$this->dict = array_unique($this->dict);
$this->update_dict();
}
}
/**
* Remove a word from dictionary
*
* @param string $word The word to remove
*/
public function remove_word($word)
{
$this->load_dict();
if (($key = array_search($word, $this->dict)) !== false) {
unset($this->dict[$key]);
$this->update_dict();
}
}
/**
* Update dictionary row in DB
*/
private function update_dict()
{
if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
$userid = $this->rc->get_user_id();
}
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array(
'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict));
if (!empty($plugin['abort'])) {
return;
}
if ($this->have_dict) {
if (!empty($this->dict)) {
$this->rc->db->query(
"UPDATE ".$this->rc->db->table_name('dictionary')
." SET data = ?"
." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
implode(' ', $plugin['dictionary']), $plugin['language']);
}
// don't store empty dict
else {
$this->rc->db->query(
"DELETE FROM " . $this->rc->db->table_name('dictionary')
." WHERE user_id " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
$plugin['language']);
}
}
else if (!empty($this->dict)) {
$this->rc->db->query(
"INSERT INTO " .$this->rc->db->table_name('dictionary')
." (user_id, " . $this->rc->db->quoteIdentifier('language') . ", data) VALUES (?, ?, ?)",
$plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary']));
}
}
/**
* Get dictionary from DB
*/
private function load_dict()
{
if (is_array($this->dict)) {
return $this->dict;
}
if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
$userid = $this->rc->get_user_id();
}
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array(
'userid' => $userid, 'language' => $this->lang, 'dictionary' => array()));
if (empty($plugin['abort'])) {
$dict = array();
$this->rc->db->query(
"SELECT data FROM ".$this->rc->db->table_name('dictionary')
." WHERE user_id ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
$plugin['language']);
if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) {
$this->have_dict = true;
if (!empty($sql_arr['data'])) {
$dict = explode(' ', $sql_arr['data']);
}
}
$plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict);
}
if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) {
$this->dict = $plugin['dictionary'];
}
else {
$this->dict = array();
}
return $this->dict;
}
}
Jump to Line
Something went wrong with that request. Please try again.