In [1]:
import os
import json
import requests
from dotenv import load_dotenv, find_dotenv
import spacy

In [2]:
load_dotenv(find_dotenv())

search_key = os.environ.get('AZURE_SEARCH_KEY')

In [98]:
service_cache = {}

In [3]:
def resolve_service_name(name, threshold=0.8):
    
    if name in service_cache:
        print('Name in Cache:', name)
        return service_cache[name]
    
    def run_search(search_term):
        search_url = f'https://ccg.search.windows.net/indexes/services-v0/docs'
        params = {
            'api-version': '2017-11-11-preview',
            'search': search_term,
            '$top': 3,
            'scoringProfile': 'boostName',
#             '$filter': "cloud eq 'Microsoft Azure'"
    #         'searchFields': ['name', 'shortDescription']
        }
        headers = {
            'api-key': search_key
        }
        res = requests.get(search_url, headers=headers, params=params)
        return res

    res = run_search(name)
    if res.status_code == 200:  
        top_res = res.json()['value'][0]
#         print(top_res)
        if top_res['@search.score'] > 0.8:                
            service_cache[name] = top_res
            return service_cache[name]
    else:
        return res.text
            
    
resolve_service_name('Azure AD')

NameError: name 'service_cache' is not defined

In [5]:
def run_search(search_term):
    search_url = f'https://ccg.search.windows.net/indexes/services-v0/docs/suggest'
    params = {
        'api-version': '2017-11-11-preview',
        'search': search_term,
        '$top': 3,
        'scoringProfile': 'boostName',
#         'autocompleteMode': 'twoTerms',
        'suggesterName': 'suggest-name',
        'fuzzy': True
#             '$filter': "cloud eq 'Microsoft Azure'"
#         'searchFields': ['name', 'shortDescription']
    }
    headers = {
        'api-key': search_key
    }
    res = requests.get(search_url, headers=headers, params=params)
    return res

run_search('asdf;lkasdfj').text

'{"@odata.context":"https://ccg.search.windows.net/indexes(\'services-v0\')/$metadata#docs(*)","value":[]}'

In [77]:
service_cache

{}

In [30]:
nlp = spacy.load('models/ner_azure_v0/')

In [100]:
with open('data/processed/azure_examples.jsonl') as az_ex_file:
    for line in az_ex_file.readlines()[100:120]:
        text = json.loads(line)['text']
        doc = nlp(text)
        for ent in doc.ents:
            s = resolve_service_name(ent.text)
            if s:
                print(ent.text, 'resolved to:', s['name'])
            else:
                print(ent.text, 'is not an Azure service')
        print('=======================================')
        


Azure Advisor resolved to: Azure Advisor
Name in Cache: Azure Advisor
Azure Advisor resolved to: Azure Advisor
REST is not an Azure service
Name in Cache: Azure Advisor
Azure Advisor resolved to: Azure Advisor
Azure Analysis Services resolved to: Azure Analysis Services
Name in Cache: Azure Analysis Services
Azure Analysis Services resolved to: Azure Analysis Services
Name in Cache: Azure Analysis Services
Azure Analysis Services resolved to: Azure Analysis Services
Azure Resource Manager resolved to: Azure Resource Manager
Azure Functions resolved to: Azure Functions
Azure Automation resolved to: Azure Automation
Name in Cache: Azure Analysis Services
Azure Analysis Services resolved to: Azure Analysis Services
Azure Active Directory resolved to: Azure Active Directory
Azure Data Factory resolved to: Azure Data Factory
Name in Cache: Azure Automation
Azure Automation resolved to: Azure Automation
Name in Cache: Azure Functions
Azure Functions resolved to: Azure Functions
Name in Cache

In [10]:
import spacy
from spacy import displacy

In [3]:
nlp = spacy.load('en_ner_azure_lg')

In [31]:
doc = nlp('I want to migrate from SQL Server 2008 to Azure SQL Database')
# doc = nlp('In this article, you learn about the primary methods for migrating a SQL Server 2005 or later database to a single or pooled database in Azure SQL Database')

In [23]:
[(ent.label_, ent.text) for ent in doc.ents]

[('AZURE_SERVICE', 'Azure SQL Database')]

In [46]:
def extract_relations_and_root_verb(doc, ent_label):
    spans = list(doc.ents) + list(doc.noun_chunks)
    for span in spans:
        span.merge()

    relations = []
    for svc in filter(lambda w: w.ent_type_ == ent_label, doc):
        if svc.dep_ in ('attr', 'dobj'):
            subject = [w for w in svc.head.lefts if w.dep_ == 'nsubj']
            if subject:
                subject = subject[0]
                relations.append((subject, svc))
        elif svc.dep_ == 'pobj' and svc.head.dep_ == 'prep':
            relation = svc.head.head
            relations.append((relation, svc))

            cur = relation
            root_verb = None
            while cur.head:
                if cur.pos_ == 'VERB':
                    root_verb = cur.lemma_
                    break
                else:
                    cur = cur.head
        return svc, relations, root_verb
            
        
svc, relations, root_verb = extract_relations_and_root_verb(doc, 'AZURE_SERVICE')

In [39]:
from spacy.tokens import Span

In [53]:
doc[svc.i:svc.i+1].start_char

42

In [42]:
svc.doc

I want to migrate from SQL Server 2008 to Azure SQL Database

In [25]:
displacy.render(doc, style='dep', jupyter=True)

In [16]:
for token in doc:
    if token.pos_ == 'VERB':
        print(token.text, token.dep_, token.lemma_, token.head.text, token.head.pos_,
              [child for child in token.children])

learn ROOT learn learn VERB [In, ,, you, about]
migrating pcomp migrate for ADP [a SQL Server]
