Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

"More Like This" Query #63

Merged
merged 4 commits into from Aug 7, 2011
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
250 changes: 250 additions & 0 deletions lib/Elastica/Query/MoreLikeThis.php
@@ -0,0 +1,250 @@
<?php
/**
* More Like This query
*
* @uses Elastica_Query_Abstract
* @category Xodoa
* @package Elastica
* @author Raul Martinez, Jr <juneym@gmail.com>
* @link http://www.elasticsearch.org/guide/reference/query-dsl/mlt-query.html
*/
class Elastica_Query_MoreLikeThis extends Elastica_Query_Abstract
{
/**
* @var array
*/
protected $_fields = array();

/**
* @var string
*/
protected $_likeText = null;

protected $_percTermsToMatch = 0.3;
protected $_minTermFreq = 2;
protected $_maxQueryTerms = 25;
protected $_minDocFreq = 5;
protected $_maxDocFreq = null;
protected $_minWordLen = 0;
protected $_maxWordLen = null;
protected $_boostTerms = 1;
protected $_boost = 1.0;

/**
* @var string
*/
protected $_analyzer = null;

/**
* @var array
*/
protected $_stopWords = null;


/**
* Adds field to flt query
*
* @param array $fields Field names
* @return Elastica_Query_MoreLikeThis Current object
*/
public function addFields(Array $fields) {
$this->_fields = $fields;
return $this;
}

/**
* Set the "like_text" value
*
* @param string $text
* @return Elastica_Query_MoreLikeThis This current object
*/
public function setLikeText($text) {
$text = trim($text);
$this->_likeText = $text;
return $this;
}

/**
* @param float $value Boost value
* @return Elastica_Query_MoreLikeThis Query object
*/
public function setBoost($value) {
$this->_boost = (float) $value;
return $this;
}

/**
* Set max_query_terms
*
* @param int $value Max query terms value
* @return Elastica_Query_MoreLikeThis
*/
public function setMaxQueryTerms($value) {
$this->_maxQueryTerms = (int)$value;
return $this;
}


/**
* @param float $perc percentage
* @return Elastica_Query_MoreLikeThis
*/
public function setPercentTermsToMatch( $perc ) {
$perc= (float)$perc;

$this->_percTermsToMatch = $perc;

return $this;
}

/**
* @param int $value
* @return Elastica_Query_MoreLikeThis
*/
public function setMinTermFrequency( $value ) {
$value = (int)$value;
if ($value < 0) {
$value = 0;
}

$this->_minTermFreq = $value;

return $this;
}


/**
* @param int $value
* @return Elastica_Query_MoreLikeThis
*/
public function setMinDocFrequency( $value ) {
$value = (int)$value;
$value = ($value < 0) ? 5 : $value;

$this->_minDocFreq = $value;
return $this;
}

/**
* @param int $value
* @return Elastica_Query_MoreLikeThis
*/
public function setMaxDocFrequency($value) {
$value = (int)$value;
$value = ($value < 0) ? null : $value;

$this->_maxDocFreq = $value;
return $this;
}


/**
* @param int $value
* @return Elastica_Query_MoreLikeThis
*/
public function setMinWordLength( $value ) {
$value = (int)$value;
$value = ($value <= 0) ? 0 : $value;

$this->_minWordLen = $value;
return $this;
}

/**
* @param int $value
* @return Elastica_Query_MoreLikeThis
*/
public function setMaxWordLength($value) {
$value = (int)$value;
$value = ($value < 0) ? null : $value;

$this->_maxWordLen = $value;
return $this;
}


/**
*
* @param int $value;
* @link http://www.elasticsearch.org/guide/reference/query-dsl/mlt-query.html
* @return Elastica_Query_MoreLikeThis
*/
public function setBoostTerms($value) {
$value = (int)$value;
$value = ($value < 0) ? 1 : $value;
$this->_boostTerms = $value;
return $this;
}


/**
* @param string $value
* @return Elastica_Query_MoreLikeThis
*/
public function setAnalyzer( $value ) {
$value = trim($value);
if (!empty($value)) {
$this->_analyzer = $value;
}

return $this;
}


/**
* @param Array $words
* @return Elastica_Query_MoreLikeThis
*/
public function setStopWords(Array $words) {
$this->_stopWords = $words;
return $this;
}

/**
* Converts more_like_this query to array
*
* @return Array Query array
* @see Elastica_Query_Abstract::toArray()
*/
public function toArray() {

if (!empty($this->_fields)) {
$args['fields'] = $this->_fields;
}

if (!empty($this->_boost)) {
$args['boost'] = $this->_boost;
}

if (!empty($this->_likeText)) {
$args['like_text'] = $this->_likeText;
}

$args['max_query_terms'] = $this->_maxQueryTerms;

$args['percent_terms_to_match'] = $this->_percTermsToMatch;
$args['min_term_freq'] = $this->_minTermFreq;

if (!empty($this->_stopWords)) {
$args['stop_words'] = $this->_stopWords;
}

if (!empty($this->_analyzer)) {
$args['analyzer'] = $this->_analyzer;
}

$args['min_doc_freq'] = $this->_minDocFreq;

if ($this->_maxDocFreq != null) {
$args['max_doc_freq'] = $this->_maxDocFreq;
}

$args['min_word_len'] = $this->_minWordLen;
if ($this->_maxWordLen != null) {
$args['max_word_len'] = $this->_maxWordLen;
}
$args['boost_terms'] = $this->_boostTerms;

return array('mlt' => $args);
}
}
54 changes: 54 additions & 0 deletions test/lib/Elastica/Query/MoreLikeThisTest.php
@@ -0,0 +1,54 @@
<?php
require_once dirname(__FILE__) . '/../../../bootstrap.php';

class Elastica_Query_MoreLikeThisTest extends PHPUnit_Framework_TestCase
{
public function setUp() {
}

public function tearDown() {
}

public function testSearch() {

$client = new Elastica_Client();
$index = new Elastica_Index($client, 'test');
$index->create(array(), true);
$index->getSettings()->setNumberOfReplicas(0);
//$index->getSettings()->setNumberOfShards(1);

$type = new Elastica_Type($index, 'helloworldmlt');
$mapping = new Elastica_Type_Mapping($type , array(
'email' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'),
'content' => array('store' => 'yes', 'type' => 'string', 'index' => 'analyzed'),
));

$mapping->setSource(array('enabled' => false));
$type->setMapping($mapping);


$doc = new Elastica_Document(1000, array('email' => 'testemail@gmail.com', 'content' => 'This is a sample post. Hello World Fuzzy Like This!'));
$type->addDocument($doc);

$doc = new Elastica_Document(1001, array('email' => 'nospam@gmail.com', 'content' => 'This is a fake nospam email address for gmail'));
$type->addDocument($doc);

// Refresh index
$index->refresh();

$mltQuery = new Elastica_Query_MoreLikeThis();
$mltQuery->setLikeText("fake gmail sample");
$mltQuery->addFields(array("email","content"));
$mltQuery->setMaxQueryTerms(1);
$mltQuery->setMinDocFrequency(1);
$mltQuery->setMinTermFrequency(1);

$query = new Elastica_Query();
$query->setFields(array("email", "content"));
$query->setQuery($mltQuery);

$resultSet = $type->search($query);
$resultSet->getResponse()->getData();
$this->assertEquals(2, $resultSet->count());
}
}