In [1]:
import json

import sys
sys.path.insert(0,'..')

from ai_es_utils.queries.composers import DecayLastScrapedGroupedComposer, GroupByNameComposer
from ai_es_utils.queries.components import *
from ai_es_utils.queries.models.payload import RequestPayload, Query

from ai_es_utils.services.search.geolocation_service import GeoLocationService, ElasticSearchService
from ai_es_utils.services.enrichment.job2jobs_client import Job2JobsService
from ai_es_utils.services.enrichment.job2skills_client import Job2SkillsService
from ai_es_utils.search.search_executor import SearchExecutor

from ai_es_utils.queries.builder.query_expander import BANKING_JOB2JOBS
from ai_es_utils.queries.builder.queries import MATCH_EXECUTIVE_QUERY
from ai_es_utils.queries.builder.country_names import COUNTRY_NAMES

Build search by selecting search components to use:

In [2]:
gender_map = json.load(open("../data/map_to_gender_specific_job_titles.json"))
country_names = json.load(open("../data/country_names_map.json"))
executive_query = json.load(open("../data/match_executive_query.json"))

curated_skills = json.load(open("../data/curated_skills_expansion.json"))
curated_jobs = json.load(open("../data/curated_jobs_expansion.json"))

coordinates_service = ElasticSearchService("es-dev.internal.talentwunder.com" , 80, "coordinates")
geolocation_service = GeoLocationService(coordinates_service)

job2jobs_service = Job2JobsService("https://api-dev.talentwunder.com", topn=20)
job2skills_service = Job2SkillsService("https://api-dev.talentwunder.com", topn=20)

In [3]:
query_stack = [
    # Filter queries:
    IncludePortalsQuery(),
    GenderQuery(),
    IsStudentQuery(),
    IsFreelancerQuery(),
    IsRecruiterQuery(),
    IsConsultantQuery(),
    IsEntrepreneurQuery(),
    IsScientistQuery(),
    LanguagesQuery(),
    MonthsWorkingQuery(),
    MinMobilityQuery(),
    MinChangeProbabilityQuery(),
    HasPhoneQuery(),
    HasEmailQuery(),
    DistanceQuery(geolocation_service=geolocation_service),
    CountryQuery(country_names=country_names),
    # Most not:
    HideInSearchQuery(),
    ExecutiveOnlyQuery(curated_executive_sub_query=executive_query),
    ExcludePortalsQuery(),
    #Should:
    LocationQuery(),
    #Must
    IndustryCodeQuery(),
    SkillQuery(),
    FunctionScoreQuery(),
    JobTitleQuery(gender_map, name="job_title_expansion"),
    JobTitleExpansionQuery(job2jobs_service=job2jobs_service, gender_normalization_map=gender_map, curated_jobs_expansion=curated_jobs, name="job_title_expansion"), # Arguments
    SkillExpansionQuery(job2skills_service=job2skills_service, gender_normalization_map=gender_map, curated_skills_expansion=curated_skills, name="job_title_expansion"), # Arguments
    MinimumShouldMatchQuery(name="job_title_expansion", bool_key="must"),
    WorksAtQuery(),
    WorksNotAtQuery(),
    WorksAtPreviouslyQuery(),
    WorksNotAtPreviouslyQuery(),
    #Other
    SortQuery(),
    SizeAndOffsetQuery(),
    HighlightQuery()
]

query_composer = DecayLastScrapedGroupedComposer()

In [4]:
bearer_token="Bearer: eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJlUFU3aDNWTnlPaG1NZjl6LVNIbmduZTh5cUFRVzZSZUhyZ3ZRUVo5R20wIn0.eyJleHAiOjE2MzU5MjI1NjgsImlhdCI6MTYzNTQ5MDU2OCwianRpIjoiZTgzZDYzZTItMzY5MS00ZDhhLTgzZDYtZmY2MWJhMjMxMmJiIiwiaXNzIjoiaHR0cHM6Ly9hcGktZGV2LnRhbGVudHd1bmRlci5jb20vYXV0aC9yZWFsbXMvdGFsZW50d3VuZGVyIiwic3ViIjoiYzBmOGI4ZjAtNjhhNC00ZjRkLTkyYzMtYTMxMGVkYzg0NjhhIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoidHctZnJvbnRlbmQiLCJzZXNzaW9uX3N0YXRlIjoiMWEzZDM4MGUtYjFhZC00ZDNlLWFmYWItZTYxYzA4ZmQyOTBiIiwiYWNyIjoiMSIsInNjb3BlIjoib2ZmbGluZV9hY2Nlc3MgdHctZnJvbnRlbmQtc2NvcGUiLCJvcmdhbml6YXRpb25JZCI6IjM0MTdkOTczLTQ3OTgtNDU0Ni05Y2RhLTI4ZjgyMjY0Yzk2YyIsImNvbXBhbnlJZCI6IjM0MTdkOTczLTQ3OTgtNDU0Ni05Y2RhLTI4ZjgyMjY0Yzk2YyIsInJvbGVzIjpbInByb2plY3Q6RWRpdE1ldGEiLCJ0YWxlbnRwb29sOlZpZXdTaW5nbGUiLCJjYW5kaWRhdGU6U2V0UHJvZmlsZUNvbm5lY3Rpb24iLCJ0YWxlbnRwb29sOlZpZXdBbGwiLCJwcm9qZWN0VGVtcGxhdGU6Q3JlYXRlIiwibGljZW5zZTpEdXBsaWNhdGUiLCJ0YWxlbnRwb29sOlNldE93bmVyIiwicm9sZTpEZWxldGUiLCJ1c2VyOkNyZWF0ZSIsImNhbmRpZGF0ZTpWaWV3U2FtZU5hbWVQcm9maWxlcyIsImxpY2Vuc2U6U2V0Q2FwcyIsInByb2plY3Q6Q3JlYXRlIiwib3JnYW5pemF0aW9uOkRvd25sb2FkRHBhIiwiZG9jdW1lbnQ6Q3JlYXRlIiwibGljZW5zZTpDcmVhdGUiLCJwcm9qZWN0OlZpZXdBbGwiLCJwcm9qZWN0OkFyY2hpdmUiLCJwcm9qZWN0OlZpZXdTaW5nbGUiLCJqb2JBZHM6KiIsIm9yZ2FuaXphdGlvbjpFZGl0IiwiY2FuZGlkYXRlOkVkaXRDb250YWN0SW5mbyIsInVzZXI6U2V0T3JnYW5pemF0aW9uIiwibGljZW5zZTpFZGl0IiwiaW50ZXJuYWwiLCJjYW5kaWRhdGU6RWRpdE5vdGUiLCJvcmdhbml6YXRpb246QWN0aXZhdGUiLCJyb2xlOlNldFBlcm1pc3Npb25zIiwiaGVscDpFZGl0QXJ0aWNsZSIsInJvbGU6RWRpdCIsInByb2plY3Q6U2V0T3duZXIiLCJkb2N1bWVudDpFZGl0IiwidGFsZW50cG9vbDpBcmNoaXZlIiwidGFsZW50cG9vbDpFZGl0TWV0YSIsInVzZXI6UmVxdWVzdFBhc3N3b3JkIiwicHJvamVjdDpFeHBvcnRDYW5kaWRhdGVzIiwiY2FuZGlkYXRlOlJlbW92ZUZyb21Qcm9qZWN0IiwiY2FuZGlkYXRlOlJlamVjdFByb2ZpbGVDb25uZWN0aW9uIiwidXNlcjpFZGl0Iiwib3JnQWRtaW4iLCJjYW5kaWRhdGU6UmVtb3ZlUHJvZmlsZUNvbm5lY3Rpb24iLCJvcmdhbml6YXRpb246RGVsZXRlIiwiY2FuZGlkYXRlOlZpZXdTaW1pbGFyIiwidXNlcjpUcmFuc2Zlck93bmVyc2hpcCIsIm9yZ2FuaXphdGlvbjpEZWFjdGl2YXRlIiwidGFsZW50cG9vbDpDcmVhdGUiLCJsaWNlbnNlOkRlbGV0ZSIsImRvY3VtZW50OkRlbGV0ZSIsInNhdmVkU2VhcmNoOlZpZXciLCJjYW5kaWRhdGU6Q3JlYXRlQ29udGFjdEluZm8iLCJwcm9qZWN0VGVtcGxhdGU6RWRpdCIsInRhbGVudHBvb2w6U2V0TWVtYmVyIiwiY2FuZGlkYXRlOlZpZXdQcm9maWxlQ29ubmVjdGlvbnMiLCJjYW5kaWRhdGU6UmVjZWl2ZVNoYXJpbmciLCJjYW5kaWRhdGU6RGVsZXRlQWN0aXZpdHkiLCJjYW5kaWRhdGU6U2hhcmUiLCJvcmdhbml6YXRpb246U2V0RmVhdHVyZXMiLCJkb2N1bWVudDpEb3dubG9hZCIsInNhdmVkU2VhcmNoOkVkaXQiLCJ3aWRnZXQ6Tm90aWZpY2F0aW9uIiwiaGVscDpWaWV3U2VjdGlvbiIsImNhbmRpZGF0ZTpSZW1vdmVGcm9tVGFsZW50cG9vbCIsInByb2plY3RUZW1wbGF0ZTpTZXRNZW1iZXIiLCJjYW5kaWRhdGU6Vmlld0RlYnVnIiwiY2FuZGlkYXRlOkVkaXRBY3Rpdml0eSIsImNhbmRpZGF0ZTpDaGVja0FjdGl2aXR5Iiwic2F2ZWRTZWFyY2g6U2V0TWVtYmVyIiwiY2FuZGlkYXRlOkFkZFRvVGFsZW50cG9vbCIsImRvY3VtZW50OlNldFN0YXR1cyIsImNhbmRpZGF0ZTpEZWxldGVOb3RlIiwidXNlcjpTZXRTdGF0dXMiLCJwcm9qZWN0OkV4cG9ydExhbmUiLCJjYW5kaWRhdGU6Vmlld0NvbnRhY3RJbmZvIiwiYWdlbmRhOlZpZXciLCJ1c2VyOlZpZXdTdGF0aXN0aWNzIiwiY2FuZGlkYXRlOkNyZWF0ZUFjdGl2aXR5IiwidXNlcjpTd2l0Y2hQYWlkTGljZW5zZSIsInByb2plY3RUZW1wbGF0ZTpEZWxldGUiLCJzYXZlZFNlYXJjaDpEZWxldGUiLCJwcm9qZWN0OkRlbGV0ZSIsInNhdmVkU2VhcmNoOkNyZWF0ZSIsImNhbmRpZGF0ZTpTZXRTdGF0dXMiLCJjYW5kaWRhdGU6UmF0ZSIsIm9yZ2FuaXphdGlvbjpTZXRDYXBzIiwicHJvamVjdDpTZXRNZW1iZXJzIiwib3JnYW5pemF0aW9uOkNyZWF0ZSIsImNhbmRpZGF0ZTpEZWxldGVDb250YWN0SW5mbyIsInRhbGVudHBvb2w6RGVsZXRlIiwiY2FuZGlkYXRlOkFkZFRvUHJvamVjdCIsInVzZXI6RGVsZXRlIiwidXNlcjpTd2l0Y2hPcmdBZG1pbiIsImNhbmRpZGF0ZTpTZWFyY2giLCJ1c2VyOk1hbmFnZUZlYXR1cmVzIiwiY2FuZGlkYXRlOkNyZWF0ZU5vdGUiLCJwcm9qZWN0VGVtcGxhdGU6VXNlVGVtcGxhdGUiLCJyb2xlOkNyZWF0ZSIsImNhbmRpZGF0ZTpWaWV3U2luZ2xlIiwicHJvamVjdDpFZGl0U3RydWN0dXJlIl0sInVzZXJJZCI6ImMwZjhiOGYwLTY4YTQtNGY0ZC05MmMzLWEzMTBlZGM4NDY4YSIsInVzZXJuYW1lIjoic2FzY2hhLmdlcmxvZmZAdGFsZW50d3VuZGVyLmNvbSJ9.UItjCX8CcOdTIvIv1GLeJVBJ4_jP9gYaT1YO77pxkh1KgNidugsXVhjHP1CHX0EoIboZnR_KDOFmjMcUcVpb1uoawnn0Ye8CCQBEuK5dyAp46FDTDhXeaRfzkC4maNYdJekYhMdjBsnfSbV4OkbFZbfWOhnwO4KUYIdRI-Wf26FhCCdn1PwK76HSzpbnhAJNUNbScpbAMHX50BXNz1GevLkCHkrdreVZ2IvzZUaj10RA64iItsYDfl2JRLYd8ZpQnLOmCaVSH1F7JfAHeaIH0d8Dg2h3Ip6dwoy6v1pN6vx5Ps9rv8LX1kr7WFtkWAABnzvFyhHp-UAw3-8BpXeBvw"
request = RequestPayload(
            query=Query(
                jobTitle="developer",
                skills=["java"],
                isMale=True
            )
        )



In [5]:
test_source = json.load(open("test_full_payload.json"))
request = RequestPayload(**test_source["payload"])
query_list = []
for c in query_stack:
    query_list.append(c.query(request, bearer_token=bearer_token).query)
query = query_composer(query_list, timeout="59s", track_scores=True)
query

{'query': {'function_score': {'functions': [{'gauss': {'lastScraped': {'decay': 0.3,
       'offset': '30d',
       'origin': 'now',
       'scale': '90d'}}}],
   'query': {'bool': {'filter': [{'terms': {'_class': ['bintray', 'techgig']}},
      {'term': {'gender': 'm'}},
      {'term': {'isStudent': True}},
      {'term': {'isFreelancer': True}},
      {'term': {'isRecruiter': True}},
      {'term': {'isConsultant': True}},
      {'term': {'isEntrepreneur': True}},
      {'term': {'isScientist': True}},
      {'bool': {'should': [{'query_string': {'query': 'spanish AND deutsch',
           'fields': ['languagesString^1.0'],
           'default_operator': 'or',
           'type': 'best_fields'}}]}},
      {'range': {'monthsWorking': {'include_lower': True,
         'include_upper': True,
         'from': 12,
         'to': 48}}},
      {'range': {'mobility': {'gte': 55}}},
      {'range': {'changeProbability': {'gte': 89}}},
      {'term': {'hasPhone': True}},
      {'term': {'hasEmail

For comparison we need to remove the additional name field that we introduced to group the queries.

In [6]:
from deepdiff import DeepDiff
print(DeepDiff(query, test_source["autoquery"], ignore_order=True))

{'dictionary_item_removed': [root['query']['function_score']['query']['bool']['must'][3]['bool']['_name'], root['query']['function_score']['query']['bool']['must'][4]['bool']['_name'], root['query']['function_score']['query']['bool']['must'][5]['bool']['_name'], root['query']['function_score']['query']['bool']['must'][6]['bool']['_name'], root['query']['function_score']['query']['bool']['filter'][15]['bool']['_name']]}


We can safely ignore the missing "name" items, since they are added by the new implementation to get the grouping to work.

In [7]:
json.dump(query, open("test.json", "w"), indent=2)

# Putting everything together

In [8]:
smart_search = SearchExecutor(
    ElasticSearchService("es-dev.internal.talentwunder.com" , 80, "prod"),
    query_stack=query_stack,
    query_composer=query_composer
)

In [9]:
smart_search(request, bearer_token=bearer_token, history_id="asdf")

{'profileInstanceList': [],
 'profileInstanceCount': 0,
 'location': GeoPoint(lat=54.32133, long=10.13489),
 'semanticExpansion': {'jobs': [{'name': 'senior java developer',
    'isChecked': True},
   {'name': 'senior java engineer', 'isChecked': True},
   {'name': 'scala developer', 'isChecked': True},
   {'name': 'junior java developer', 'isChecked': True},
   {'name': 'java team leader', 'isChecked': True},
   {'name': 'freelance java developer', 'isChecked': True},
   {'name': 'j2ee developer', 'isChecked': True},
   {'name': 'senior j2ee consultant', 'isChecked': True},
   {'name': 'java software developer', 'isChecked': True},
   {'name': 'java architect', 'isChecked': True},
   {'name': 'java analyst developer', 'isChecked': True},
   {'name': 'middleware developer', 'isChecked': True},
   {'name': 'java j2ee developer', 'isChecked': True},
   {'name': 'java full stack developer', 'isChecked': True},
   {'name': 'java analyst programmer', 'isChecked': True},
   {'name': 'java te

In [11]:
request = RequestPayload(
            query=Query(
                jobTitle="Software Developer",
                location="Berlin",
                isMale=True,
                distance=50
            )
        )

smart_search(request, bearer_token=bearer_token, history_id="asdf")

{'profileInstanceList': [{'_class': 'xing',
   'profileURL': 'https://www.xing.com/profile/Reza_Toorani',
   'uid': '5f18de83d7951f0caa556683',
   'handle': 'reza_toorani',
   'imageURL': 'https://profile-images.xing.com/images/9726cbd26fa369c2f1cfadc3fa01444c-9/reza-toorani.256x256.jpg',
   'fullName': 'Reza Toorani',
   'name': 'Reza Toorani',
   'firstName': 'Reza',
   'lastName': 'Toorani',
   'geoPoint': {'lat': 52.52000659999999, 'lon': 13.404954},
   'address': {'countryCode': 'de',
    'country': 'Deutschland',
    'state': '6.0',
    'city': 'Berlin'},
   'location': '<mark>Berlin</mark>, Germany',
   'jobTitle': 'Senior <mark>Backend</mark> <mark>Developer</mark>',
   'previousJobTitles': ['<mark>Senior</mark> <mark>Software</mark> <mark>Developer</mark>',
    'Full Stack <mark>Developer</mark>',
    'Mid VB.NET <mark>Developer</mark>',
    'Junior VB.NET <mark>Developer</mark>'],
   'industry': 'Employee, Senior Backend Developer, Sablono GmbH',
   'company': 'Sablono GmbH',