From 8a700208e20e76159bad7a42f4c69c3697d0dc00 Mon Sep 17 00:00:00 2001 From: Nadav Harnik Date: Wed, 8 Nov 2017 02:05:23 +0200 Subject: [PATCH] KMS-14857: add language support to name and description in partial search + optimize partial query for captions --- .../caption/search/CaptionSearchPlugin.php | 2 +- .../config/mapping/category_mapping.json | 264 ++++++++++++++++++ .../config/mapping/entry_mapping.json | 264 ++++++++++++++++++ .../elastic_search/lib/elasticSearchUtils.php | 7 +- .../lib/model/ESearchQueryAttributes.php | 29 ++ .../lib/model/items/ESearchCaptionItem.php | 25 +- .../lib/model/items/ESearchCategoryItem.php | 27 +- .../lib/model/items/ESearchCuePointItem.php | 24 +- .../lib/model/items/ESearchEntryItem.php | 28 +- .../lib/model/items/ESearchItem.php | 6 +- .../lib/model/items/ESearchMetadataItem.php | 22 +- .../lib/model/items/ESearchOperator.php | 18 +- .../lib/model/items/ESearchUnifiedItem.php | 36 ++- .../lib/model/items/ESearchUserItem.php | 24 +- .../lib/model/kESearchQueryManager.php | 14 +- .../elastic_search/lib/search/kBaseSearch.php | 26 +- 16 files changed, 764 insertions(+), 52 deletions(-) create mode 100644 plugins/search/providers/elastic_search/lib/model/ESearchQueryAttributes.php diff --git a/plugins/content/caption/search/CaptionSearchPlugin.php b/plugins/content/caption/search/CaptionSearchPlugin.php index f367154a152..56684ccc57f 100644 --- a/plugins/content/caption/search/CaptionSearchPlugin.php +++ b/plugins/content/caption/search/CaptionSearchPlugin.php @@ -258,7 +258,7 @@ protected static function getElasticLines($items, $language, $assetId) $content = substr($content, 0, kElasticSearchManager::MAX_LENGTH); $line['content'] = $content; - $analyzedFieldName = elasticSearchUtils::getAnalyzedFieldName($language, 'content'); + $analyzedFieldName = elasticSearchUtils::getAnalyzedFieldName($language, 'content' ,elasticSearchUtils::UNDERSCORE_FIELD_DELIMITER); if($analyzedFieldName) $line[$analyzedFieldName] = $content; diff --git a/plugins/search/providers/elastic_search/config/mapping/category_mapping.json b/plugins/search/providers/elastic_search/config/mapping/category_mapping.json index 10b126bdff5..8d5662162d5 100644 --- a/plugins/search/providers/elastic_search/config/mapping/category_mapping.json +++ b/plugins/search/providers/elastic_search/config/mapping/category_mapping.json @@ -99,6 +99,138 @@ "raw" : { "type" : "text", "analyzer" : "kaltura_keyword" + }, + "english" : { + "type" : "text", + "analyzer": "english" + }, + "arabic" : { + "type" : "text", + "analyzer": "arabic" + }, + "armenian" : { + "type" : "text", + "analyzer" : "armenian" + }, + "basque" : { + "type" : "text", + "analyzer" : "basque" + }, + "brazilian" : { + "type" : "text", + "analyzer" : "brazilian" + }, + "bulgarian" : { + "type" : "text", + "analyzer" : "bulgarian" + }, + "catalan" : { + "type" : "text", + "analyzer" : "catalan" + }, + "cjk" : { + "type" : "text", + "analyzer" : "cjk" + }, + "czech" : { + "type" : "text", + "analyzer" : "czech" + }, + "danish" : { + "type" : "text", + "analyzer" : "danish" + }, + "dutch" : { + "type" : "text", + "analyzer" : "dutch" + }, + "finnish" : { + "type" : "text", + "analyzer" : "finnish" + }, + "french" : { + "type" : "text", + "analyzer" : "french" + }, + "galician" : { + "type" : "text", + "analyzer" : "galician" + }, + "german" : { + "type" : "text", + "analyzer" : "german" + }, + "greek" : { + "type" : "text", + "analyzer" : "greek" + }, + "hindi" : { + "type" : "text", + "analyzer" : "hindi" + }, + "hungarian" : { + "type" : "text", + "analyzer" : "hungarian" + }, + "indonesian" : { + "type" : "text", + "analyzer" : "indonesian" + }, + "irish" : { + "type" : "text", + "analyzer" : "irish" + }, + "italian" : { + "type" : "text", + "analyzer" : "italian" + }, + "latvian" : { + "type" : "text", + "analyzer" : "latvian" + }, + "lithuanian" : { + "type" : "text", + "analyzer" : "lithuanian" + }, + "norwegian" : { + "type" : "text", + "analyzer" : "norwegian" + }, + "persian" : { + "type" : "text", + "analyzer" : "persian" + }, + "portuguese" : { + "type" : "text", + "analyzer" : "portuguese" + }, + "romanian" : { + "type" : "text", + "analyzer" : "romanian" + }, + "russian" : { + "type" : "text", + "analyzer" : "russian" + }, + "sorani" : { + "type" : "text", + "analyzer" : "sorani" + }, + "spanish" : { + "type" : "text", + "analyzer" : "spanish" + }, + "swedish" : { + "type" : "text", + "analyzer" : "swedish" + }, + "turkish" : { + "type" : "text", + "analyzer" : "turkish" + }, + "thai" : { + "type" : "text", + "analyzer" : "thai" } } }, @@ -157,6 +289,138 @@ "raw" : { "type" : "text", "analyzer" : "kaltura_keyword" + }, + "english" : { + "type" : "text", + "analyzer": "english" + }, + "arabic" : { + "type" : "text", + "analyzer": "arabic" + }, + "armenian" : { + "type" : "text", + "analyzer" : "armenian" + }, + "basque" : { + "type" : "text", + "analyzer" : "basque" + }, + "brazilian" : { + "type" : "text", + "analyzer" : "brazilian" + }, + "bulgarian" : { + "type" : "text", + "analyzer" : "bulgarian" + }, + "catalan" : { + "type" : "text", + "analyzer" : "catalan" + }, + "cjk" : { + "type" : "text", + "analyzer" : "cjk" + }, + "czech" : { + "type" : "text", + "analyzer" : "czech" + }, + "danish" : { + "type" : "text", + "analyzer" : "danish" + }, + "dutch" : { + "type" : "text", + "analyzer" : "dutch" + }, + "finnish" : { + "type" : "text", + "analyzer" : "finnish" + }, + "french" : { + "type" : "text", + "analyzer" : "french" + }, + "galician" : { + "type" : "text", + "analyzer" : "galician" + }, + "german" : { + "type" : "text", + "analyzer" : "german" + }, + "greek" : { + "type" : "text", + "analyzer" : "greek" + }, + "hindi" : { + "type" : "text", + "analyzer" : "hindi" + }, + "hungarian" : { + "type" : "text", + "analyzer" : "hungarian" + }, + "indonesian" : { + "type" : "text", + "analyzer" : "indonesian" + }, + "irish" : { + "type" : "text", + "analyzer" : "irish" + }, + "italian" : { + "type" : "text", + "analyzer" : "italian" + }, + "latvian" : { + "type" : "text", + "analyzer" : "latvian" + }, + "lithuanian" : { + "type" : "text", + "analyzer" : "lithuanian" + }, + "norwegian" : { + "type" : "text", + "analyzer" : "norwegian" + }, + "persian" : { + "type" : "text", + "analyzer" : "persian" + }, + "portuguese" : { + "type" : "text", + "analyzer" : "portuguese" + }, + "romanian" : { + "type" : "text", + "analyzer" : "romanian" + }, + "russian" : { + "type" : "text", + "analyzer" : "russian" + }, + "sorani" : { + "type" : "text", + "analyzer" : "sorani" + }, + "spanish" : { + "type" : "text", + "analyzer" : "spanish" + }, + "swedish" : { + "type" : "text", + "analyzer" : "swedish" + }, + "turkish" : { + "type" : "text", + "analyzer" : "turkish" + }, + "thai" : { + "type" : "text", + "analyzer" : "thai" } } }, diff --git a/plugins/search/providers/elastic_search/config/mapping/entry_mapping.json b/plugins/search/providers/elastic_search/config/mapping/entry_mapping.json index e586dbcc47d..6b960292557 100644 --- a/plugins/search/providers/elastic_search/config/mapping/entry_mapping.json +++ b/plugins/search/providers/elastic_search/config/mapping/entry_mapping.json @@ -68,6 +68,138 @@ }, "keyword" : { "type" : "keyword" + }, + "english" : { + "type" : "text", + "analyzer": "english" + }, + "arabic" : { + "type" : "text", + "analyzer": "arabic" + }, + "armenian" : { + "type" : "text", + "analyzer" : "armenian" + }, + "basque" : { + "type" : "text", + "analyzer" : "basque" + }, + "brazilian" : { + "type" : "text", + "analyzer" : "brazilian" + }, + "bulgarian" : { + "type" : "text", + "analyzer" : "bulgarian" + }, + "catalan" : { + "type" : "text", + "analyzer" : "catalan" + }, + "cjk" : { + "type" : "text", + "analyzer" : "cjk" + }, + "czech" : { + "type" : "text", + "analyzer" : "czech" + }, + "danish" : { + "type" : "text", + "analyzer" : "danish" + }, + "dutch" : { + "type" : "text", + "analyzer" : "dutch" + }, + "finnish" : { + "type" : "text", + "analyzer" : "finnish" + }, + "french" : { + "type" : "text", + "analyzer" : "french" + }, + "galician" : { + "type" : "text", + "analyzer" : "galician" + }, + "german" : { + "type" : "text", + "analyzer" : "german" + }, + "greek" : { + "type" : "text", + "analyzer" : "greek" + }, + "hindi" : { + "type" : "text", + "analyzer" : "hindi" + }, + "hungarian" : { + "type" : "text", + "analyzer" : "hungarian" + }, + "indonesian" : { + "type" : "text", + "analyzer" : "indonesian" + }, + "irish" : { + "type" : "text", + "analyzer" : "irish" + }, + "italian" : { + "type" : "text", + "analyzer" : "italian" + }, + "latvian" : { + "type" : "text", + "analyzer" : "latvian" + }, + "lithuanian" : { + "type" : "text", + "analyzer" : "lithuanian" + }, + "norwegian" : { + "type" : "text", + "analyzer" : "norwegian" + }, + "persian" : { + "type" : "text", + "analyzer" : "persian" + }, + "portuguese" : { + "type" : "text", + "analyzer" : "portuguese" + }, + "romanian" : { + "type" : "text", + "analyzer" : "romanian" + }, + "russian" : { + "type" : "text", + "analyzer" : "russian" + }, + "sorani" : { + "type" : "text", + "analyzer" : "sorani" + }, + "spanish" : { + "type" : "text", + "analyzer" : "spanish" + }, + "swedish" : { + "type" : "text", + "analyzer" : "swedish" + }, + "turkish" : { + "type" : "text", + "analyzer" : "turkish" + }, + "thai" : { + "type" : "text", + "analyzer" : "thai" } } }, @@ -82,6 +214,138 @@ "raw" : { "type" : "text", "analyzer" : "kaltura_keyword" + }, + "english" : { + "type" : "text", + "analyzer": "english" + }, + "arabic" : { + "type" : "text", + "analyzer": "arabic" + }, + "armenian" : { + "type" : "text", + "analyzer" : "armenian" + }, + "basque" : { + "type" : "text", + "analyzer" : "basque" + }, + "brazilian" : { + "type" : "text", + "analyzer" : "brazilian" + }, + "bulgarian" : { + "type" : "text", + "analyzer" : "bulgarian" + }, + "catalan" : { + "type" : "text", + "analyzer" : "catalan" + }, + "cjk" : { + "type" : "text", + "analyzer" : "cjk" + }, + "czech" : { + "type" : "text", + "analyzer" : "czech" + }, + "danish" : { + "type" : "text", + "analyzer" : "danish" + }, + "dutch" : { + "type" : "text", + "analyzer" : "dutch" + }, + "finnish" : { + "type" : "text", + "analyzer" : "finnish" + }, + "french" : { + "type" : "text", + "analyzer" : "french" + }, + "galician" : { + "type" : "text", + "analyzer" : "galician" + }, + "german" : { + "type" : "text", + "analyzer" : "german" + }, + "greek" : { + "type" : "text", + "analyzer" : "greek" + }, + "hindi" : { + "type" : "text", + "analyzer" : "hindi" + }, + "hungarian" : { + "type" : "text", + "analyzer" : "hungarian" + }, + "indonesian" : { + "type" : "text", + "analyzer" : "indonesian" + }, + "irish" : { + "type" : "text", + "analyzer" : "irish" + }, + "italian" : { + "type" : "text", + "analyzer" : "italian" + }, + "latvian" : { + "type" : "text", + "analyzer" : "latvian" + }, + "lithuanian" : { + "type" : "text", + "analyzer" : "lithuanian" + }, + "norwegian" : { + "type" : "text", + "analyzer" : "norwegian" + }, + "persian" : { + "type" : "text", + "analyzer" : "persian" + }, + "portuguese" : { + "type" : "text", + "analyzer" : "portuguese" + }, + "romanian" : { + "type" : "text", + "analyzer" : "romanian" + }, + "russian" : { + "type" : "text", + "analyzer" : "russian" + }, + "sorani" : { + "type" : "text", + "analyzer" : "sorani" + }, + "spanish" : { + "type" : "text", + "analyzer" : "spanish" + }, + "swedish" : { + "type" : "text", + "analyzer" : "swedish" + }, + "turkish" : { + "type" : "text", + "analyzer" : "turkish" + }, + "thai" : { + "type" : "text", + "analyzer" : "thai" } } }, diff --git a/plugins/search/providers/elastic_search/lib/elasticSearchUtils.php b/plugins/search/providers/elastic_search/lib/elasticSearchUtils.php index f6f6a229fc9..cd97e4d5cc9 100644 --- a/plugins/search/providers/elastic_search/lib/elasticSearchUtils.php +++ b/plugins/search/providers/elastic_search/lib/elasticSearchUtils.php @@ -5,13 +5,16 @@ */ class elasticSearchUtils { + const UNDERSCORE_FIELD_DELIMITER ='_'; + const DOT_FIELD_DELIMITER = '.'; /** * return the analyzed language field name * @param $language * @param $fieldName + * @param $delimiter * @return null|string */ - public static function getAnalyzedFieldName($language, $fieldName) + public static function getAnalyzedFieldName($language, $fieldName, $delimiter) { $fieldMap = array( 'english' => 'english', @@ -52,7 +55,7 @@ public static function getAnalyzedFieldName($language, $fieldName) $language = strtolower($language); if(isset($fieldMap[$language])) - return $fieldName.'_'.$fieldMap[$language]; + return $fieldName.$delimiter.$fieldMap[$language]; return null; } diff --git a/plugins/search/providers/elastic_search/lib/model/ESearchQueryAttributes.php b/plugins/search/providers/elastic_search/lib/model/ESearchQueryAttributes.php new file mode 100644 index 00000000000..8654b558fb0 --- /dev/null +++ b/plugins/search/providers/elastic_search/lib/model/ESearchQueryAttributes.php @@ -0,0 +1,29 @@ +partnerLanguages; + } + + /** + * @param array $partnerLanguages + */ + public function setPartnerLanguages($partnerLanguages) + { + $this->partnerLanguages = $partnerLanguages; + } + +} diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchCaptionItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchCaptionItem.php index 8258f9967db..9ee706f59fc 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchCaptionItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchCaptionItem.php @@ -14,6 +14,10 @@ class ESearchCaptionItem extends ESearchItem 'caption_assets.lines.end_time' => array('ESearchItemType::RANGE'=>ESearchItemType::RANGE), ); + private static $multiLanguageFields = array( + ESearchCaptionFieldName::CAPTION_CONTENT, + ); + /** * @var string */ @@ -66,7 +70,7 @@ public static function getAllowedSearchTypesForField() return array_merge(self::$allowed_search_types_for_field, parent::getAllowedSearchTypesForField()); } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $innerHitsConfig = kConf::get('innerHits', 'elastic'); $innerHitsSize = isset($innerHitsConfig['captionInnerHitsSize']) ? $innerHitsConfig['captionInnerHitsSize'] : self::DEFAULT_INNER_HITS_SIZE; @@ -75,13 +79,13 @@ public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSear $allowedSearchTypes = ESearchCaptionItem::getAllowedSearchTypesForField(); foreach ($eSearchItemsArr as $eSearchCaptionItem) { - self::createSingleItemSearchQuery($eSearchCaptionItem, $boolOperator, $captionQuery, $allowedSearchTypes); + self::createSingleItemSearchQuery($eSearchCaptionItem, $boolOperator, $captionQuery, $allowedSearchTypes, $queryAttributes); } return array($captionQuery); } - public static function createSingleItemSearchQuery($eSearchCaptionItem, $boolOperator, &$captionQuery, $allowedSearchTypes) + public static function createSingleItemSearchQuery($eSearchCaptionItem, $boolOperator, &$captionQuery, $allowedSearchTypes, &$queryAttributes) { $eSearchCaptionItem->validateItemInput(); switch ($eSearchCaptionItem->getItemType()) @@ -92,7 +96,7 @@ public static function createSingleItemSearchQuery($eSearchCaptionItem, $boolOpe break; case ESearchItemType::PARTIAL: $captionQuery['nested']['query']['bool'][$boolOperator][] = - kESearchQueryManager::getMultiMatchQuery($eSearchCaptionItem, $eSearchCaptionItem->getFieldName(), true); + kESearchQueryManager::getMultiMatchQuery($eSearchCaptionItem, $eSearchCaptionItem->getFieldName(), $queryAttributes); break; case ESearchItemType::STARTS_WITH: $captionQuery['nested']['query']['bool'][$boolOperator][] = @@ -121,4 +125,17 @@ protected function validateItemInput() $this->validateEmptySearchTerm($this->getFieldName(), $this->getSearchTerm()); } + public function shouldAddLanguageSearch() + { + if(in_array($this->getFieldName(), self::$multiLanguageFields)) + return true; + + return false; + } + + public function getItemMappingFieldsDelimiter() + { + return elasticSearchUtils::UNDERSCORE_FIELD_DELIMITER; + } + } \ No newline at end of file diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchCategoryItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchCategoryItem.php index 8361ec794a0..a131e853578 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchCategoryItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchCategoryItem.php @@ -45,6 +45,11 @@ class ESearchCategoryItem extends ESearchItem 'updated_at' => array('ESearchItemType::RANGE' => ESearchItemType::RANGE), ); + private static $multiLanguageFields = array( + ESearchCategoryFieldName::CATEGORY_NAME, + ESearchCategoryFieldName::CATEGORY_DESCRIPTION, + ); + /** * @return ESearchCategoryFieldName */ @@ -87,19 +92,19 @@ public static function getAllowedSearchTypesForField() return array_merge(self::$allowed_search_types_for_field, parent::getAllowedSearchTypesForField()); } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $categoryQuery = array(); $allowedSearchTypes = ESearchCategoryItem::getAllowedSearchTypesForField(); foreach ($eSearchItemsArr as $categorySearchItem) { - self::createSingleItemSearchQuery($categorySearchItem, $categoryQuery, $allowedSearchTypes); + self::createSingleItemSearchQuery($categorySearchItem, $categoryQuery, $allowedSearchTypes, $queryAttributes); } return $categoryQuery; } - public static function createSingleItemSearchQuery($categorySearchItem, &$categoryQuery, $allowedSearchTypes) + public static function createSingleItemSearchQuery($categorySearchItem, &$categoryQuery, $allowedSearchTypes, &$queryAttributes) { $categorySearchItem->validateItemInput(); $categorySearchItem->translateSearchTerm(); @@ -109,7 +114,7 @@ public static function createSingleItemSearchQuery($categorySearchItem, &$catego $categoryQuery[] = kESearchQueryManager::getExactMatchQuery($categorySearchItem, $categorySearchItem->getFieldName(), $allowedSearchTypes); break; case ESearchItemType::PARTIAL: - $categoryQuery[] = kESearchQueryManager::getMultiMatchQuery($categorySearchItem, $categorySearchItem->getFieldName(), false); + $categoryQuery[] = kESearchQueryManager::getMultiMatchQuery($categorySearchItem, $categorySearchItem->getFieldName(), $queryAttributes); break; case ESearchItemType::STARTS_WITH: $categoryQuery[] = kESearchQueryManager::getPrefixQuery($categorySearchItem, $categorySearchItem->getFieldName(), $allowedSearchTypes); @@ -148,4 +153,18 @@ protected function translateSearchTerm() return; } } + + public function shouldAddLanguageSearch() + { + if(in_array($this->getFieldName(), self::$multiLanguageFields)) + return true; + + return false; + } + + public function getItemMappingFieldsDelimiter() + { + return elasticSearchUtils::DOT_FIELD_DELIMITER; + } + } \ No newline at end of file diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchCuePointItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchCuePointItem.php index 254fb58e831..c80174074cc 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchCuePointItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchCuePointItem.php @@ -32,6 +32,8 @@ class ESearchCuePointItem extends ESearchItem 'cue_points.cue_point_explanation' => array('ESearchItemType::EXACT_MATCH'=> ESearchItemType::EXACT_MATCH, 'ESearchItemType::PARTIAL'=> ESearchItemType::PARTIAL, 'ESearchItemType::STARTS_WITH'=> ESearchItemType::STARTS_WITH, "ESearchItemType::EXISTS"=> ESearchItemType::EXISTS, ESearchUnifiedItem::UNIFIED), ); + private static $multiLanguageFields = array(); + /** * @return ESearchEntryFieldName */ @@ -74,7 +76,7 @@ public static function getAllowedSearchTypesForField() return array_merge(self::$allowed_search_types_for_field, parent::getAllowedSearchTypesForField()); } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $innerHitsConfig = kConf::get('innerHits', 'elastic'); $innerHitsSize = isset($innerHitsConfig['cuePointsInnerHitsSize']) ? $innerHitsConfig['cuePointsInnerHitsSize'] : self::DEFAULT_INNER_HITS_SIZE; @@ -83,12 +85,12 @@ public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSear $allowedSearchTypes = ESearchCuePointItem::getAllowedSearchTypesForField(); foreach ($eSearchItemsArr as $cuePointSearchItem) { - self::createSingleItemSearchQuery($cuePointSearchItem, $boolOperator, $cuePointQuery, $allowedSearchTypes); + self::createSingleItemSearchQuery($cuePointSearchItem, $boolOperator, $cuePointQuery, $allowedSearchTypes, $queryAttributes); } return array($cuePointQuery); } - public static function createSingleItemSearchQuery($cuePointSearchItem, $boolOperator, &$cuePointQuery, $allowedSearchTypes) + public static function createSingleItemSearchQuery($cuePointSearchItem, $boolOperator, &$cuePointQuery, $allowedSearchTypes, &$queryAttributes) { $cuePointSearchItem->validateItemInput(); switch ($cuePointSearchItem->getItemType()) @@ -99,7 +101,7 @@ public static function createSingleItemSearchQuery($cuePointSearchItem, $boolOpe break; case ESearchItemType::PARTIAL: $cuePointQuery['nested']['query']['bool'][$boolOperator][] = - kESearchQueryManager::getMultiMatchQuery($cuePointSearchItem, $cuePointSearchItem->getFieldName(), false); + kESearchQueryManager::getMultiMatchQuery($cuePointSearchItem, $cuePointSearchItem->getFieldName(), $queryAttributes); break; case ESearchItemType::STARTS_WITH: $cuePointQuery['nested']['query']['bool'][$boolOperator][] = @@ -127,4 +129,18 @@ protected function validateItemInput() $this->validateAllowedSearchTypes($allowedSearchTypes, $this->getFieldName()); $this->validateEmptySearchTerm($this->getFieldName(), $this->getSearchTerm()); } + + public function shouldAddLanguageSearch() + { + if(in_array($this->getFieldName(), self::$multiLanguageFields)) + return true; + + return false; + } + + public function getItemMappingFieldsDelimiter() + { + + } + } diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchEntryItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchEntryItem.php index ea564bab52f..b8d27610783 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchEntryItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchEntryItem.php @@ -50,6 +50,11 @@ class ESearchEntryItem extends ESearchItem 'access_control_id' => array('ESearchItemType::EXACT_MATCH'=> ESearchItemType::EXACT_MATCH, 'ESearchItemType::EXISTS' => ESearchItemType::EXISTS), ); + private static $multiLanguageFields = array( + ESearchEntryFieldName::ENTRY_NAME, + ESearchEntryFieldName::ENTRY_DESCRIPTION, + ); + /** * @return ESearchEntryFieldName */ @@ -92,18 +97,18 @@ public static function getAllowedSearchTypesForField() return array_merge(self::$allowed_search_types_for_field, parent::getAllowedSearchTypesForField()); } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $entryQuery = array(); $allowedSearchTypes = ESearchEntryItem::getAllowedSearchTypesForField(); foreach ($eSearchItemsArr as $entrySearchItem) { - self::getSingleItemSearchQuery($entrySearchItem, $entryQuery, $allowedSearchTypes); + self::getSingleItemSearchQuery($entrySearchItem, $entryQuery, $allowedSearchTypes, $queryAttributes); } return $entryQuery; } - public static function getSingleItemSearchQuery($entrySearchItem, &$entryQuery, $allowedSearchTypes) + public static function getSingleItemSearchQuery($entrySearchItem, &$entryQuery, $allowedSearchTypes, &$queryAttributes) { $entrySearchItem->validateItemInput(); switch ($entrySearchItem->getItemType()) @@ -112,7 +117,7 @@ public static function getSingleItemSearchQuery($entrySearchItem, &$entryQuery, $entryQuery[] = kESearchQueryManager::getExactMatchQuery($entrySearchItem, $entrySearchItem->getFieldName(), $allowedSearchTypes); break; case ESearchItemType::PARTIAL: - $entryQuery[] = kESearchQueryManager::getMultiMatchQuery($entrySearchItem, $entrySearchItem->getFieldName(), false); + $entryQuery[] = kESearchQueryManager::getMultiMatchQuery($entrySearchItem, $entrySearchItem->getFieldName(), $queryAttributes); break; case ESearchItemType::STARTS_WITH: $entryQuery[] = kESearchQueryManager::getPrefixQuery($entrySearchItem, $entrySearchItem->getFieldName(), $allowedSearchTypes); @@ -134,5 +139,18 @@ protected function validateItemInput() $this->validateAllowedSearchTypes($allowedSearchTypes, $this->getFieldName()); $this->validateEmptySearchTerm($this->getFieldName(), $this->getSearchTerm()); } - + + public function shouldAddLanguageSearch() + { + if(in_array($this->getFieldName(), self::$multiLanguageFields)) + return true; + + return false; + } + + public function getItemMappingFieldsDelimiter() + { + return elasticSearchUtils::DOT_FIELD_DELIMITER; + } + } diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchItem.php index a09d5f876c5..8899e3e686f 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchItem.php @@ -77,6 +77,10 @@ public static function getAllowedSearchTypesForField() return array(); } - abstract public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null); + abstract public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null); + + abstract public function shouldAddLanguageSearch(); + + abstract public function getItemMappingFieldsDelimiter(); } diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchMetadataItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchMetadataItem.php index 21397d9168a..96a62f01d4d 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchMetadataItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchMetadataItem.php @@ -106,7 +106,7 @@ public static function getAllowedSearchTypesForField() return array_merge(self::$allowed_search_types_for_field, parent::getAllowedSearchTypesForField()); } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $innerHitsConfig = kConf::get('innerHits', 'elastic'); $innerHitsSize = isset($innerHitsConfig['metadataInnerHitsSize']) ? $innerHitsConfig['metadataInnerHitsSize'] : self::DEFAULT_INNER_HITS_SIZE; @@ -116,12 +116,12 @@ public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSear foreach ($eSearchItemsArr as $metadataESearchItem) { - self::createSingleItemSearchQuery($metadataESearchItem, $boolOperator, $metadataQuery, $allowedSearchTypes); + self::createSingleItemSearchQuery($metadataESearchItem, $boolOperator, $metadataQuery, $allowedSearchTypes, $queryAttributes); } return array($metadataQuery); } - public static function createSingleItemSearchQuery($metadataESearchItem, $boolOperator, &$metadataQuery, $allowedSearchTypes) + public static function createSingleItemSearchQuery($metadataESearchItem, $boolOperator, &$metadataQuery, $allowedSearchTypes, &$queryAttributes) { switch ($metadataESearchItem->getItemType()) { @@ -131,7 +131,7 @@ public static function createSingleItemSearchQuery($metadataESearchItem, $boolOp break; case ESearchItemType::PARTIAL: $metadataQuery['nested']['query']['bool'][$boolOperator][] = - self::getMetadataMultiMatchQuery($metadataESearchItem); + self::getMetadataMultiMatchQuery($metadataESearchItem, $queryAttributes); break; case ESearchItemType::STARTS_WITH: $metadataQuery['nested']['query']['bool'][$boolOperator][] = @@ -182,9 +182,9 @@ protected static function getMetadataExactMatchQuery($searchItem, $allowedSearch return $metadataExactMatch; } - protected static function getMetadataMultiMatchQuery($searchItem) + protected static function getMetadataMultiMatchQuery($searchItem, &$queryAttributes) { - $metadataMultiMatch = kESearchQueryManager::getMultiMatchQuery($searchItem, 'metadata.value_text', false); + $metadataMultiMatch = kESearchQueryManager::getMultiMatchQuery($searchItem, 'metadata.value_text', $queryAttributes); if(ctype_digit($searchItem->getSearchTerm()))//add metadata.value_int $metadataMultiMatch['bool']['should'][0]['multi_match']['fields'][] = 'metadata.value_int^3'; @@ -309,4 +309,14 @@ protected static function getMetadataFieldIdQuery($metadataESearchItem) return $metadataFieldIdQuery; } + public function shouldAddLanguageSearch() + { + return false; + } + + public function getItemMappingFieldsDelimiter() + { + + } + } \ No newline at end of file diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchOperator.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchOperator.php index 14619fc0f69..e32527418ba 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchOperator.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchOperator.php @@ -48,7 +48,7 @@ public function setSearchItems($searchItems) $this->searchItems = $searchItems; } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { if (!$eSearchItemsArr || !count($eSearchItemsArr)) { @@ -71,7 +71,7 @@ public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSear } $categorizedSearchItems = self::getCategorizedSearchItems($eSearchItemsArr); - $outQuery = self::createSearchQueryForItems($categorizedSearchItems, $boolOperator, $eSearchOperatorType); + $outQuery = self::createSearchQueryForItems($categorizedSearchItems, $boolOperator, $queryAttributes, $eSearchOperatorType); return $outQuery; } @@ -107,7 +107,7 @@ private static function getCategorizedSearchItems($eSearchCaptionItemsArr) return $allCategorizedSearchItems; } - private static function createSearchQueryForItems($categorizedSearchItems, $boolOperator, $eSearchOperatorType) + private static function createSearchQueryForItems($categorizedSearchItems, $boolOperator, &$queryAttributes, $eSearchOperatorType) { $outQuery = array(); foreach ($categorizedSearchItems as $categorizedSearchItem) @@ -121,7 +121,7 @@ private static function createSearchQueryForItems($categorizedSearchItems, $bool $operatorType = $categorizedSearchItem['operatorType']; } - $subQuery = call_user_func(array($itemClassName, 'createSearchQuery'), $itemSearchItems, $boolOperator, $operatorType); + $subQuery = call_user_func(array($itemClassName, 'createSearchQuery'), $itemSearchItems, $boolOperator, $queryAttributes, $operatorType); foreach ($subQuery as $key => $value) { @@ -143,4 +143,14 @@ public function getType() return 'operator'; } + public function shouldAddLanguageSearch() + { + + } + + public function getItemMappingFieldsDelimiter() + { + + } + } diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchUnifiedItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchUnifiedItem.php index 19a50b897c9..bd6e7db5792 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchUnifiedItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchUnifiedItem.php @@ -34,7 +34,7 @@ public function getType() return self::UNIFIED; } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $outQuery = array(); @@ -43,10 +43,10 @@ public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSear self::validateUnifiedAllowedTypes($eSearchUnifiedItem); $subQuery = array(); $entryUnifiedQuery = array(); - self::addEntryFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery); - self::addCuePointFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery); - self::addCaptionFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery); - self::addMetadataFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery); + self::addEntryFieldsToUnifiedQuery($eSearchUnifiedItem, $entryUnifiedQuery, $queryAttributes); + self::addCuePointFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery, $queryAttributes); + self::addCaptionFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery, $queryAttributes); + self::addMetadataFieldsToUnifiedQuery($eSearchUnifiedItem,$entryUnifiedQuery, $queryAttributes); if(count($entryUnifiedQuery)) { @@ -60,7 +60,7 @@ public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSear return $outQuery; } - private static function addEntryFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery) + private static function addEntryFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery, &$queryAttributes) { $entryItems = array(); $entryAllowedFields = ESearchEntryItem::getAllowedSearchTypesForField(); @@ -81,11 +81,11 @@ private static function addEntryFieldsToUnifiedQuery($eSearchUnifiedItem, &$entr if(count($entryItems)) { - $entryUnifiedQuery = ESearchEntryItem::createSearchQuery($entryItems, 'should', null); + $entryUnifiedQuery = ESearchEntryItem::createSearchQuery($entryItems, 'should', $queryAttributes, null); } } - private static function addCuePointFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery) + private static function addCuePointFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery, &$queryAttributes) { $cuePointAllowedFields = ESearchCuePointItem::getAllowedSearchTypesForField(); $cuePointItems = array(); @@ -106,13 +106,13 @@ private static function addCuePointFieldsToUnifiedQuery($eSearchUnifiedItem, &$e if(count($cuePointItems)) { - $cuePointQuery = ESearchCuePointItem::createSearchQuery($cuePointItems, 'should', null); + $cuePointQuery = ESearchCuePointItem::createSearchQuery($cuePointItems, 'should', $queryAttributes, null); if(count($cuePointQuery)) $entryUnifiedQuery[] = $cuePointQuery; } } - private static function addCaptionFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery) + private static function addCaptionFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery, &$queryAttributes) { $captionItems = array(); $captionAllowedFields = ESearchCaptionItem::getAllowedSearchTypesForField(); @@ -132,13 +132,13 @@ private static function addCaptionFieldsToUnifiedQuery($eSearchUnifiedItem, &$en if(count($captionItems)) { - $captionQuery = ESearchCaptionItem::createSearchQuery($captionItems, 'should', null); + $captionQuery = ESearchCaptionItem::createSearchQuery($captionItems, 'should', $queryAttributes, null); if(count($captionQuery)) $entryUnifiedQuery[] = $captionQuery; } } - private static function addMetadataFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery) + private static function addMetadataFieldsToUnifiedQuery($eSearchUnifiedItem, &$entryUnifiedQuery, &$queryAttributes) { //metadata is special case - we don't need to check for allowed field types $metadataItems = array(); @@ -149,7 +149,7 @@ private static function addMetadataFieldsToUnifiedQuery($eSearchUnifiedItem, &$e $metadataItem->setRange($eSearchUnifiedItem->getRange()); $metadataItems[] = $metadataItem; - $metadataQuery = ESearchMetadataItem::createSearchQuery($metadataItems, 'should', null); + $metadataQuery = ESearchMetadataItem::createSearchQuery($metadataItems, 'should', $queryAttributes, null); if(count($metadataQuery)) $entryUnifiedQuery[] = $metadataQuery; } @@ -164,4 +164,14 @@ protected static function validateUnifiedAllowedTypes($eSearchUnifiedItem) } } + public function shouldAddLanguageSearch() + { + + } + + public function getItemMappingFieldsDelimiter() + { + + } + } \ No newline at end of file diff --git a/plugins/search/providers/elastic_search/lib/model/items/ESearchUserItem.php b/plugins/search/providers/elastic_search/lib/model/items/ESearchUserItem.php index 19945f1009d..16d924751f5 100644 --- a/plugins/search/providers/elastic_search/lib/model/items/ESearchUserItem.php +++ b/plugins/search/providers/elastic_search/lib/model/items/ESearchUserItem.php @@ -30,6 +30,8 @@ class ESearchUserItem extends ESearchItem 'created_at' => array('ESearchItemType::RANGE'=>ESearchItemType::RANGE), ); + private static $multiLanguageFields = array(); + /** * @return ESearchUserFieldName */ @@ -72,18 +74,18 @@ public static function getAllowedSearchTypesForField() return array_merge(self::$allowed_search_types_for_field, parent::getAllowedSearchTypesForField()); } - public static function createSearchQuery($eSearchItemsArr, $boolOperator, $eSearchOperatorType = null) + public static function createSearchQuery($eSearchItemsArr, $boolOperator, &$queryAttributes, $eSearchOperatorType = null) { $userQuery = array(); $allowedSearchTypes = ESearchUserItem::getAllowedSearchTypesForField(); foreach ($eSearchItemsArr as $userSearchItem) { - self::getSingleItemSearchQuery($userSearchItem, $userQuery, $allowedSearchTypes); + self::getSingleItemSearchQuery($userSearchItem, $userQuery, $allowedSearchTypes, $queryAttributes); } return $userQuery; } - private static function getSingleItemSearchQuery($userSearchItem, &$userQuery, $allowedSearchTypes) + private static function getSingleItemSearchQuery($userSearchItem, &$userQuery, $allowedSearchTypes, &$queryAttributes) { switch ($userSearchItem->getItemType()) { @@ -91,7 +93,7 @@ private static function getSingleItemSearchQuery($userSearchItem, &$userQuery, $ $userQuery[] = kESearchQueryManager::getExactMatchQuery($userSearchItem, $userSearchItem->getFieldName(), $allowedSearchTypes); break; case ESearchItemType::PARTIAL: - $userQuery[] = kESearchQueryManager::getMultiMatchQuery($userSearchItem, $userSearchItem->getFieldName(), false); + $userQuery[] = kESearchQueryManager::getMultiMatchQuery($userSearchItem, $userSearchItem->getFieldName(), $queryAttributes); break; case ESearchItemType::STARTS_WITH: $userQuery[] = kESearchQueryManager::getPrefixQuery($userSearchItem, $userSearchItem->getFieldName(), $allowedSearchTypes); @@ -106,4 +108,18 @@ private static function getSingleItemSearchQuery($userSearchItem, &$userQuery, $ KalturaLog::log("Undefined item type[".$userSearchItem->getItemType()."]"); } } + + public function shouldAddLanguageSearch() + { + if(in_array($this->getFieldName(), self::$multiLanguageFields)) + return true; + + return false; + } + + public function getItemMappingFieldsDelimiter() + { + + } + } \ No newline at end of file diff --git a/plugins/search/providers/elastic_search/lib/model/kESearchQueryManager.php b/plugins/search/providers/elastic_search/lib/model/kESearchQueryManager.php index 25f73d3f8e0..206258bfa5d 100644 --- a/plugins/search/providers/elastic_search/lib/model/kESearchQueryManager.php +++ b/plugins/search/providers/elastic_search/lib/model/kESearchQueryManager.php @@ -37,7 +37,7 @@ class kESearchQueryManager const DEFAULT_TRIGRAM_PERCENTAGE = 80; - public static function getMultiMatchQuery($searchItem, $fieldName, $shouldAddLanguageFields = false) + public static function getMultiMatchQuery($searchItem, $fieldName, &$queryAttributes) { $multiMatch = array(); $multiMatch[self::BOOL_KEY][self::SHOULD_KEY][0][self::MULTI_MATCH_KEY][self::QUERY_KEY] = $searchItem->getSearchTerm(); @@ -47,8 +47,16 @@ public static function getMultiMatchQuery($searchItem, $fieldName, $shouldAddLan ); $multiMatch[self::BOOL_KEY][self::SHOULD_KEY][0][self::MULTI_MATCH_KEY][self::TYPE_KEY] = self::MOST_FIELDS; - if($shouldAddLanguageFields) - $multiMatch[self::BOOL_KEY][self::SHOULD_KEY][0][self::MULTI_MATCH_KEY][self::FIELDS_KEY][] = $fieldName.'_*^2'; + if($searchItem->shouldAddLanguageSearch()) + { + $languages = $queryAttributes->getPartnerLanguages(); + foreach ($languages as $language) + { + $mappingLanguageField = elasticSearchUtils::getAnalyzedFieldName($language, $fieldName, $searchItem->getItemMappingFieldsDelimiter()); + if($mappingLanguageField) + $multiMatch[self::BOOL_KEY][self::SHOULD_KEY][0][self::MULTI_MATCH_KEY][self::FIELDS_KEY][] = $mappingLanguageField.'^2'; + } + } $trigramFieldName = $fieldName.'.'.self::NGRAMS_FIELD_SUFFIX; $multiMatch[self::BOOL_KEY][self::SHOULD_KEY][1][self::MATCH_KEY][$trigramFieldName][self::QUERY_KEY] = $searchItem->getSearchTerm(); diff --git a/plugins/search/providers/elastic_search/lib/search/kBaseSearch.php b/plugins/search/providers/elastic_search/lib/search/kBaseSearch.php index 4e8299ff2a8..de5ebb028d7 100644 --- a/plugins/search/providers/elastic_search/lib/search/kBaseSearch.php +++ b/plugins/search/providers/elastic_search/lib/search/kBaseSearch.php @@ -8,10 +8,12 @@ abstract class kBaseSearch { protected $elasticClient; protected $query; + protected $queryAttributes; public function __construct() { $this->elasticClient = new elasticClient(); + $this->queryAttributes = new ESearchQueryAttributes(); } public abstract function doSearch(ESearchOperator $eSearchOperator, $statuses = array(),kPager $pager = null, ESearchOrderBy $order = null); @@ -20,7 +22,7 @@ public abstract function getPeerName(); protected function execSearch(ESearchOperator $eSearchOperator) { - $subQuery = $eSearchOperator->createSearchQuery($eSearchOperator->getSearchItems(), null, $eSearchOperator->getOperator()); + $subQuery = $eSearchOperator->createSearchQuery($eSearchOperator->getSearchItems(), null, $this->queryAttributes, $eSearchOperator->getOperator()); $this->applyElasticSearchConditions($subQuery); KalturaLog::debug("Elasticsearch query [".print_r($this->query, true)."]"); $result = $this->elasticClient->search($this->query); @@ -30,6 +32,7 @@ protected function execSearch(ESearchOperator $eSearchOperator) protected function initQuery(array $statuses, kPager $pager = null, ESearchOrderBy $order = null) { $partnerId = kBaseElasticEntitlement::$partnerId; + $this->initQueryAttributes($partnerId); $this->initBasePartnerFilter($partnerId, $statuses); $this->initPager($pager); $this->initOrderBy($order); @@ -100,4 +103,25 @@ protected function applyElasticSearchConditions($conditions) $this->query['body']['query']['bool']['must'] = array($conditions); } + protected function initQueryAttributes($partnerId) + { + $this->initPartnerLanguages($partnerId); + } + + protected function initPartnerLanguages($partnerId) + { + $partner = PartnerPeer::retrieveByPK($partnerId); + if(!$partner) + return; + + $partnerLanguages = $partner->getESearchLanguages(); + if(!count($partnerLanguages)) + { + //if no languages are set for partner - set the default to english + $partnerLanguages = array('english'); + } + + $this->queryAttributes->setPartnerLanguages($partnerLanguages); + } + }