# End-to-end cognitive search pipeline with skills in pure python

In [1]:
from azuresearch.indexers import IndexerParameters
from azuresearch.indexers.indexer import Indexer

from azuresearch.skills import Skillset
from azuresearch.skills.predefined.cognitive_skills import (
    EntityRecognitionSkill,
    KeyPhraseExtractionSkill,
    LanguageDetectionSkill,
    SplitSkill)

### 1. Define data source

In [2]:
import json,os
from azuresearch.data_source import DataSource

config = json.load(open(os.path.join("blob_config.json")))

datasource = DataSource.load(config)
datasource.delete_if_exists()
datasource.create()

### 2. Create fields

In [3]:
from azuresearch.indexes import StringField, CollectionField, Field, Index

#Example
field = Field(name="example",field_type="Edm.String")

#Fields for my Cog search pipeline:
id_field = StringField("id", key=True, sortable=True)
content_field = StringField("content")
language_code_field = StringField("languageCode", sortable=True)
key_phrases_field = CollectionField("keyPhrases")
organizations_field = CollectionField("organizations")
translated_text_field = StringField("translatedText")

fields = [id_field, content_field, language_code_field, key_phrases_field, organizations_field,translated_text_field]

In [4]:
# Get original json
translated_text_field.to_dict()

{'name': 'translatedText',
 'type': 'Edm.String',
 'searchable': True,
 'filterable': False,
 'sortable': False,
 'facetable': False,
 'key': False,
 'retrievable': True}

In [5]:
# Reading current json into object:

new__translated_text_field = StringField.load({'name': 'translatedText',
 'type': 'Edm.String',
 'searchable': True,
 'filterable': False,
 'sortable': False,
 'facetable': False,
 'key': False,
 'retrievable': True})

new__translated_text_field.to_dict()

{'name': 'translatedText',
 'type': 'Edm.String',
 'searchable': True,
 'filterable': False,
 'sortable': False,
 'facetable': False,
 'key': False,
 'retrievable': True}

### 3. Create index

In [6]:
from azuresearch.indexes import Index
index = Index("my-index", fields=fields)
index.delete_if_exists()
index.create()

### 4. Define skills, Including the matching field mapping
    

In [7]:
ner_skill = EntityRecognitionSkill(categories=["Organization"])
language_detection_skill = LanguageDetectionSkill()
split_skill = SplitSkill(maximum_page_length=4000)
keyphrases_skill = KeyPhraseExtractionSkill()

In [8]:
# map skills output to fields (aka FieldOutputMapping)
keyphrases_skill.key_phrases.map_to(key_phrases_field)
ner_skill.organization.map_to(organizations_field)
language_detection_skill.language_code.map_to(language_code_field)

In [9]:
# connect one skill to previous skills outputs:
keyphrases_skill.set_inputs(text=split_skill.text_items, language_code=language_detection_skill.language_code)


### 5. Define skillset and indexer

In [10]:
skillset = Skillset(
    skills=[
        ner_skill,
        language_detection_skill,
        split_skill,
        keyphrases_skill,
    ],
    name="my-skillset",
    description="skillset with one skill",
)
skillset.delete_if_exists()
skillset.create()

In [11]:
skillset.to_dict()

{'name': 'my-skillset',
 'description': 'skillset with one skill',
 'skills': [{'@odata.type': '#Microsoft.Skills.Text.EntityRecognitionSkill',
   'inputs': [{'name': 'text', 'source': '/document/content'}],
   'outputs': [{'name': 'organizations', 'targetName': 'organizations'}],
   'context': '/document',
   'defaultLanguageCode': 'en',
   'includeTypelessEntities': False,
   'categories': ['Organization']},
  {'@odata.type': '#Microsoft.Skills.Text.LanguageDetectionSkill',
   'inputs': [{'name': 'text', 'source': '/document/content'}],
   'outputs': [{'name': 'languageCode', 'targetName': 'languageCode'},
    {'name': 'languageName', 'targetName': 'languageName'},
    {'name': 'score', 'targetName': 'score'}],
   'context': '/document'},
  {'@odata.type': 'Microsoft.Skills.Text.SplitSkill',
   'inputs': [{'name': 'text', 'source': '/document/content'}],
   'outputs': [{'name': 'textItems', 'targetName': 'textItems'}],
   'context': '/document',
   'textSplitMode': 'pages',
   'maxim

In [12]:
# Define Indexer
config = IndexerParameters()
indexer = Indexer(
    name="my-indexer",
    data_source_name=datasource.name,
    target_index_name=index.name,
    skillset_name=skillset.name,
    output_field_mappings=skillset.get_output_field_mappings()
)

indexer.delete_if_exists()
indexer.create()

#### Evaluate indexer status

In [15]:
import time

indexer_status = ""
last_run_status = None
while indexer_status != "error" and (last_run_status is None or last_run_status == "inProgress"):
    status = indexer.get_status()
    indexer_status = status.get("status")
    last_run_status = status.get("lastResult")
    if last_run_status is not None:
        last_run_status = last_run_status.get("status")
        print("last run status: " + last_run_status)

    print("indexer status is: " + indexer_status)
    time.sleep(3)  # wait for 3 seconds until rechecking


last run status: inProgress
indexer status is: running
last run status: inProgress
indexer status is: running
last run status: inProgress
indexer status is: running
last run status: inProgress
indexer status is: running
last run status: inProgress
indexer status is: running
last run status: success
indexer status is: running


### Search

In [16]:
res = index.search("Microsoft")

print("Search status: " + str(res.status_code))
print("Results: " + str(res.content, 'utf-8'))

Search status: 200
Results: {"@odata.context":"https://om-azuresearch.search.windows.net/indexes('my-index')/$metadata#docs(*)","value":[{"@search.score":0.24530376,"id":"aHR0cHM6Ly9vbXNlYXJjaGJsb2IuYmxvYi5jb3JlLndpbmRvd3MubmV0L2Jhc2ljZGVtby9zYXR5YXNsZXR0ZXIudHh00","content":"Today is a very humbling day for me. It reminds me of my very first day at Microsoft, 22 years ago. Like you, I had a choice about where to come to work. I came here because I believed Microsoft was the best company in the world. I saw then how clearly we empower people to do magical things with our creations and ultimately make the world a better place. I knew there was no better company to join if I wanted to make a difference. This is the very same inspiration that continues to drive me today.\r\n\r\nIt is an incredible honor for me to lead and serve this great company of ours. Steve and Bill have taken it from an idea to one of the greatest and most universally admired companies in the world. I\u2019ve been fo