In [1]:
from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import ApplicationCreateObject, ExampleLabelObject
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials
from functools import reduce

import json, time, uuid

import keys

import re

In [2]:
authoringKey = keys.authoringKey
authoringEndpoint = keys.authoringEndpoint
predictionKey = keys.predictionKey
predictionEndpoint = keys.predictionEndpoint

In [4]:
client = LUISAuthoringClient(authoringEndpoint, CognitiveServicesCredentials(authoringKey))

In [5]:
app_id = client.apps.add({
            'name': "FlyMe",
            'initial_version_id':  "0.1",
            'description': "Flight booking chatbot app",
            'culture': 'en-us',
        })

print("Created app.")

Created app.


In [3]:
app_id = "db70b5ef-3103-46a0-aa92-7cbfd39c1439"

In [5]:
luis_app = client.apps.get(app_id=app_id)

app_version = luis_app.active_version

In [7]:
entities = ['or_city', 'dst_city', 'str_date', 'end_date', 'budget']
prebuilt_entities = ['datetimeV2']


In [8]:
# Create intent into the model
intent_name = "BookFlight"

client.model.add_intent(app_id, "0.1", "BookFlight")
print("Added intent BookFlight")

Added intent BookFlight


In [9]:
# add a prebuilt entity "datetimeV2" to the model

client.model.add_prebuilt(
    app_id=app_id,
    version_id=app_version,
    prebuilt_extractor_names=["datetimeV2"]
)
print("Added prebuilt entity datetimeV2")

Added prebuilt entity datetimeV2


In [11]:
# Create entities and entity features into the model
for entity in entities:
    client.model.add_entity(app_id, "0.1", name=entity)
    print("Added entity {}".format(entity))

Added entity or_city
Added entity dst_city
Added entity str_date
Added entity end_date
Added entity budget


In [12]:
with open("../data/train.json") as f:
    train_data = json.load(f)

# send the training data to the service in chunks of 100

for i in range(0, len(train_data), 100):
    j = i + 100
    if j > len(train_data):
        j = len(train_data)

        client.examples.batch(
                    app_id,
                    app_version,
                    train_data[i:j]
                )

In [13]:
async_training = client.train.train_version(app_id, app_version)
is_trained = async_training.status == "UpToDate"

trained_status = ["UpToDate", "Success"]
while not is_trained:
    time.sleep(1)
    status = client.train.get_status(app_id, app_version)
    is_trained = all(
        m.details.status in trained_status for m in status)

print("Your app is trained. You can now go to the LUIS portal and test it!")

Your app is trained. You can now go to the LUIS portal and test it!


In [15]:
# Publish the app
client.apps.update_settings(app_id, is_public=True)
publish_result = client.apps.publish(app_id, app_version, is_staging=False)

endpoint = publish_result.endpoint_url + \
            "?subscription-key=" + predictionKey + "&q="

print("The app is published.")

The app is published.


In [42]:
runtime_client = LUISRuntimeClient(predictionEndpoint, CognitiveServicesCredentials(predictionKey))

request = {
    "query" : "I want to book a flight from Strasbourg to London from 10/10/2019 to 15/10/2019 for 200 euros"
}
result = runtime_client.prediction.get_slot_prediction(app_id=app_id, slot_name="Production", prediction_request=request)

print("\nDetected intent: {}".format(
            result.prediction.top_intent
        ))
print("Detected entities:")
for entity in result.prediction.entities:
    print("\t-> Entity '{}' (type: {})".format(
        result.prediction.entities.get(entity),
        entity
    ))
print("\nComplete result object as dictionnary")
print(result.as_dict())


Detected intent: BookFlight
Detected entities:
	-> Entity '['Strasbourg']' (type: or_city)
	-> Entity '['London']' (type: dst_city)
	-> Entity '[{'type': 'daterange', 'values': [{'timex': '(2019-10-10,2019-10-15,P5D)', 'resolution': [{'start': '2019-10-10', 'end': '2019-10-15'}]}]}]' (type: datetimeV2)
	-> Entity '['200 euros']' (type: budget)

Complete result object as dictionnary
{'query': 'I want to book a flight from Strasbourg to London from 10/10/2019 to 15/10/2019 for 200 euros', 'prediction': {'top_intent': 'BookFlight', 'intents': {'BookFlight': {'score': 0.99955165}}, 'entities': {'or_city': ['Strasbourg'], 'dst_city': ['London'], 'datetimeV2': [{'type': 'daterange', 'values': [{'timex': '(2019-10-10,2019-10-15,P5D)', 'resolution': [{'start': '2019-10-10', 'end': '2019-10-15'}]}]}], 'budget': ['200 euros']}}}


Since LUIS is not so good at extracting str_date and end_date, we create a function to extract these values using the datetimeV2 prebuilt entity and search_dates from dateparser.

In [59]:
def date_extractor(luis_res):
    res_dict = luis_res.as_dict()
    dt = res_dict['prediction']['entities'].get('datetimeV2')

    if dt[0]['type'] == 'daterange':
        print("Start date: {}".format(dt[0].get('values')[0].get('resolution')[0].get('start')))
        print("End date: {}".format(dt[0].get('values')[0].get('resolution')[0].get('end')))

date_extractor(result)

Start date: 2019-10-10
End date: 2019-10-15


## Evaluating the model

In [45]:
import json

In [9]:
runtime_client = LUISRuntimeClient(predictionEndpoint, CognitiveServicesCredentials(predictionKey))

In [16]:
import requests

In [78]:
url = predictionEndpoint + "luis/v3.0-preview/apps/" + app_id + "/versions/" + app_version + "/evaluations"
body = test_data
resp = requests.post(url, json=val_batch, headers={'Ocp-Apim-Subscription-Key': predictionKey, 'Content-Type': 'application/json'} )
resp

<Response [202]>

In [79]:
resp.text

'{"operationId":"cdc5495c-4b3b-42d6-b57b-d7a29e0fd239_638133120000000000","status":"notstarted","createdDateTime":"03/02/2023 17:30:51","lastActionDateTime":"03/02/2023 17:30:51"}'

'https://luisp10.cognitiveservices.azure.com//luis/v3.0-preview/apps/db70b5ef-3103-46a0-aa92-7cbfd39c1439/versions/0.1/evaluations'

In [13]:
response = runtime_client.prediction.get_slot_prediction(app_id=app_id, slot_name="Production", prediction_request = test_data)

SerializationError: Unable to build a model: Unable to deserialize to object: type, AttributeError: 'dict' object has no attribute 'lower', DeserializationError: Unable to deserialize to object: type, AttributeError: 'dict' object has no attribute 'lower'

In [87]:
test_data[5]

{'text': "hi! i'd like to go to boston from mos eisley on august 15th.",
 'intentName': 'BookFlight',
 'entityLabels': [{'entityName': 'dst_city',
   'startCharIndex': 22,
   'endCharIndex': 28},
  {'entityName': 'or_city', 'startCharIndex': 34, 'endCharIndex': 44},
  {'entityName': 'str_date', 'startCharIndex': 48, 'endCharIndex': 59}]}

In [88]:
request = {
    "query" : test_data[5]["text"]
}
result = runtime_client.prediction.get_slot_prediction(app_id=app_id, slot_name="Production", prediction_request=request)

print(result.as_dict())

{'query': "hi! i'd like to go to boston from mos eisley on august 15th.", 'prediction': {'top_intent': 'BookFlight', 'intents': {'BookFlight': {'score': 0.9990596}}, 'entities': {'dst_city': ['boston'], 'or_city': ['mos eisley'], 'datetimeV2': [{'type': 'date', 'values': [{'timex': 'XXXX-08-15', 'resolution': [{'value': '2022-08-15'}, {'value': '2023-08-15'}]}]}], 'str_date': ['august 15th.']}}}


In [89]:
true_y = test_data[5]
pred_y = result.as_dict()

In [90]:
for entity in true_y.get('entityLabels'):

    name = entity.get('entityName')
    value = true_y.get('text')[entity.get('startCharIndex'):entity.get('endCharIndex')]

    pred_value = re.sub(r'[^\w\s]','',pred_y.get('prediction').get('entities').get(name)[0])

    print("Name: ", name)
    print("True value: ", value)
    print("Predicted value: ", pred_value)

Name:  dst_city
True value:  boston
Predicted value:  boston
Name:  or_city
True value:  mos eisley
Predicted value:  mos eisley
Name:  str_date
True value:  august 15th
Predicted value:  august 15th


In [162]:
scores = []
wrong = []

for conversation in test_data:
    true_y = conversation
    pred_y = runtime_client.prediction.get_slot_prediction(app_id=app_id, slot_name="Production", prediction_request={"query" : conversation["text"]}).as_dict()

    for entity in true_y.get('entityLabels'):

        name = entity.get('entityName')
        value = true_y.get('text')[entity.get('startCharIndex'):entity.get('endCharIndex')]

        if pred_y.get('prediction').get('entities').get(name) is not None:
            pred_value = re.sub(r'[^\w\s]','',pred_y.get('prediction').get('entities').get(name)[0])
        else: pred_value = None

        if value == pred_value:
            scores.append(1)
        else:
            wrong.append(value)
            wrong.append(pred_value)
            scores.append(0)

In [157]:
index = 3

true_y = test_data[index]
pred_y = runtime_client.prediction.get_slot_prediction(app_id=app_id, slot_name="Production", prediction_request={"query" : true_y["text"]}).as_dict()

for entity in true_y.get('entityLabels'):

    name = entity.get('entityName')
    value = true_y.get('text')[entity.get('startCharIndex'):entity.get('endCharIndex')]

    if pred_y.get('prediction').get('entities').get(name) is not None:
        pred_value = re.sub(r'[^\w\s]','',pred_y.get('prediction').get('entities').get(name)[0])
    else: pred_value = None

    print(value)
    print(pred_value)

sept 2
sept 2
9
None
5800
5800
la paz
la paz
ulsan
None


In [158]:
true_y

{'text': 'i am looking to explore some options for an upcoming internship. i have a week off from sept 2-9 to see where i’d like to spend my next 6 months\ni have a 5800 budget and im leaving from la paz\ncan you tell me whats available in ulsan',
 'intentName': 'BookFlight',
 'entityLabels': [{'entityName': 'str_date',
   'startCharIndex': 88,
   'endCharIndex': 94},
  {'entityName': 'end_date', 'startCharIndex': 95, 'endCharIndex': 96},
  {'entityName': 'budget', 'startCharIndex': 154, 'endCharIndex': 158},
  {'entityName': 'or_city', 'startCharIndex': 186, 'endCharIndex': 192},
  {'entityName': 'dst_city', 'startCharIndex': 228, 'endCharIndex': 233}]}

In [159]:
pred_y

{'query': 'i am looking to explore some options for an upcoming internship. i have a week off from sept 2-9 to see where i’d like to spend my next 6 months\ni have a 5800 budget and im leaving from la paz\ncan you tell me whats available in ulsan',
 'prediction': {'top_intent': 'BookFlight',
  'intents': {'BookFlight': {'score': 0.99992037}},
  'entities': {'datetimeV2': [{'type': 'duration',
     'values': [{'timex': 'P1W', 'resolution': [{'value': '604800'}]}]},
    {'type': 'daterange',
     'values': [{'timex': '(XXXX-09-02,XXXX-09-09,P7D)',
       'resolution': [{'start': '2022-09-02', 'end': '2022-09-09'},
        {'start': '2023-09-02', 'end': '2023-09-09'}]}]},
    {'type': 'daterange',
     'values': [{'timex': '(2023-03-01,2023-09-01,P6M)',
       'resolution': [{'start': '2023-03-01', 'end': '2023-09-01'}]}]}],
   'str_date': ['sept 2-'],
   'budget': ['5800'],
   'or_city': ['la paz']}}}

In [163]:
sum(scores)/len(scores)

0.5824022346368715

In [93]:
val_batch = [
    {
      'text': 'please book me a flight',
      'intent': 'BookFlight',
      'entities': []
    }
]

data = str(val_batch)

headers = {
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': predictionKey
}

slot_name = "Production"
start_batch_endpoint = f'{predictionEndpoint}luis/v3.0-preview/apps/{app_id}/slots/{slot_name}/evaluations'

start_response = requests.post(start_batch_endpoint, data=data, headers=headers)
start_response.text


'{"error":{"code":"BadArgument","message":"labeled utterances cannot be null or empty."}}'

In [76]:
print(start_response.json())

{'operationId': '64aa0133-97c4-4d09-aa54-1951f31196a5_638133120000000000', 'status': 'notstarted', 'createdDateTime': '03/02/2023 17:29:39', 'lastActionDateTime': '03/02/2023 17:29:39'}


In [71]:
start_response.text

'{"operationId":"55164c2b-aaa2-4b7d-8972-dcc01843b31f_638133120000000000","status":"notstarted","createdDateTime":"03/02/2023 17:27:22","lastActionDateTime":"03/02/2023 17:27:22"}'

In [104]:
json = {
    "LabeledTestSetUtterances": [
        {
            "text": "book a ticket",
            "intent": "BookFlight",
            "entities": []
        }
    ]
}

In [9]:
import requests

In [31]:
data = str(json)

headers = {
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': predictionKey
}

slot_name = "Production"
start_batch_endpoint = f'{predictionEndpoint}luis/v3.0-preview/apps/{app_id}/slots/{slot_name}/evaluations'

start_response = requests.post(start_batch_endpoint, data=data, headers=headers)
start_response.text

'{"operationId":"c74cb33c-9eda-49dd-a3eb-fdb91b433303_638133984000000000","status":"notstarted","createdDateTime":"03/03/2023 14:47:22","lastActionDateTime":"03/03/2023 14:47:22"}'

In [40]:
start_batch_endpoint = f'{predictionEndpoint}luis/v3.0-preview/apps/{app_id}/slots/{slot_name}/evaluations/c74cb33c-9eda-49dd-a3eb-fdb91b433303_638133984000000000/status'

start_response = requests.get(start_batch_endpoint, headers=headers)
start_response.text

'{"operationId":"c74cb33c-9eda-49dd-a3eb-fdb91b433303_638133984000000000","status":"succeeded","createdDateTime":"03/03/2023 14:47:22","lastActionDateTime":"03/03/2023 14:47:28"}'

In [41]:
start_response

<Response [200]>

In [42]:
start_response.json()

{'operationId': 'c74cb33c-9eda-49dd-a3eb-fdb91b433303_638133984000000000',
 'status': 'succeeded',
 'createdDateTime': '03/03/2023 14:47:22',
 'lastActionDateTime': '03/03/2023 14:47:28'}

In [43]:
start_batch_endpoint = f'{predictionEndpoint}luis/v3.0-preview/apps/{app_id}/slots/{slot_name}/evaluations/c74cb33c-9eda-49dd-a3eb-fdb91b433303_638133984000000000/result'

start_response = requests.get(start_batch_endpoint, headers=headers)
start_response.text

'{"intentModelsStats":[{"modelName":"BookFlight","modelType":"Intent Classifier","precision":1.0,"recall":1.0,"fScore":1.0},{"modelName":"None","modelType":"Intent Classifier","precision":"NaN","recall":"NaN","fScore":"NaN"}],"entityModelsStats":[{"modelName":"budget","modelType":"Entity Extractor","precision":"NaN","recall":"NaN","fScore":"NaN"},{"modelName":"dst_city","modelType":"Entity Extractor","precision":"NaN","recall":"NaN","fScore":"NaN"},{"modelName":"end_date","modelType":"Entity Extractor","precision":"NaN","recall":"NaN","fScore":"NaN"},{"modelName":"or_city","modelType":"Entity Extractor","precision":"NaN","recall":"NaN","fScore":"NaN"},{"modelName":"str_date","modelType":"Entity Extractor","precision":"NaN","recall":"NaN","fScore":"NaN"}],"utterancesStats":[{"text":"book a ticket","predictedIntentName":"BookFlight","labeledIntentName":"BookFlight","falsePositiveEntities":[],"falseNegativeEntities":[]}]}'

In [19]:
start_response

<Response [202]>

In [26]:
# Poll the server for status
status = "notstarted"
while status != "succeeded":
    response = requests.post(start_batch_endpoint, data=data, headers=headers)
    if response.status_code == 200:
        status = response.json().get("status")
        if status == "succeeded":
            print("Operation succeeded!")
        elif status == "failed":
            print("Operation failed.")
        else:
            print("Operation still running. Status: {}".format(status))
        time.sleep(5)  # Wait for 5 seconds before checking again
    elif response.status_code == 202:
        print("Operation still in progress. Checking again in 30 seconds.")
        print(response.json().get("status"))
        time.sleep(30)  # Wait for 30 seconds before checking again
    else:
        print("Error checking operation status. Status code: {}".format(response.status_code))
        break

Operation still in progress. Checking again in 30 seconds.
notstarted
Operation still in progress. Checking again in 30 seconds.
notstarted
Operation still in progress. Checking again in 30 seconds.
notstarted
Operation still in progress. Checking again in 30 seconds.
notstarted
Operation still in progress. Checking again in 30 seconds.
notstarted
Operation still in progress. Checking again in 30 seconds.
notstarted
Operation still in progress. Checking again in 30 seconds.
notstarted


KeyboardInterrupt: 

In [92]:
with open("../data/test.json", encoding="utf-8") as f:
    test_data = json.load(f)

In [171]:
def convert_data_for_batch(data):
    
    for convo in data:
        convo = str(convo)
        convo = convo.replace("intentName", "intent")
        convo = convo.replace("entityLabels", "entities")
        convo = convo.replace("entityName", "entity")
        convo = convo.replace("startCharIndex", "startPos")
        convo = convo.replace("endCharIndex", "endPos")
        print(convo)
    
    return data

    data = str(data)
    #data = data.replace("intentName", "intent")
    #data = data.replace("entityLabels", "entities")
    #data = data.replace("entityName", "entity")
    #data = data.replace("startCharIndex", "startPos")
    #data = data.replace("endCharIndex", "endPos")

    #data = "{\"LabeledTestSetUtterances\": [" + data + "]}"

    return data

In [172]:
convert_data_for_batch(test_data[0])

text
intent
entities


{'text': "hi there...this is my first time traveling and i'm so excited! i've lived in boston all my life and want to see the world",
 'intentName': 'BookFlight',
 'entityLabels': [{'entityName': 'or_city',
   'startCharIndex': 77,
   'endCharIndex': 83}]}

In [160]:
convert_data_for_batch(test_data[1])

{'text': 'hey there, i need to book a trip to get away!',
 'intentName': 'BookFlight',
 'entityLabels': []}

In [168]:
test_data[2]

{'text': 'hi there! got anything from kabul to tel aviv?',
 'intentName': 'BookFlight',
 'entityLabels': [{'entityName': 'or_city',
   'startCharIndex': 28,
   'endCharIndex': 33},
  {'entityName': 'dst_city', 'startCharIndex': 37, 'endCharIndex': 45}]}

In [164]:
convert_data_for_batch(test_data[0:2])

[{'text': "hi there...this is my first time traveling and i'm so excited! i've lived in boston all my life and want to see the world",
  'intentName': 'BookFlight',
  'entityLabels': [{'entityName': 'or_city',
    'startCharIndex': 77,
    'endCharIndex': 83}]},
 {'text': 'hey there, i need to book a trip to get away!',
  'intentName': 'BookFlight',
  'entityLabels': []}]

In [146]:
test_data[1]

{'text': 'hey there, i need to book a trip to get away!',
 'intentName': 'BookFlight',
 'entityLabels': []}

In [147]:
convert_data_for_batch(test_data[0:2])


'{"LabeledTestSetUtterances": [[{\'text\': "hi there...this is my first time traveling and i\'m so excited! i\'ve lived in boston all my life and want to see the world", \'intent\': \'BookFlight\', \'entities\': [{\'entity\': \'or_city\', \'startPos\': 77, \'endPos\': 83}]}, {\'text\': \'hey there, i need to book a trip to get away!\', \'intent\': \'BookFlight\', \'entities\': []}]]}'

In [138]:
test_data_converted = convert_data_for_batch(test_data[0:1])

headers = {
    'Content-Type': 'application/json',
    'Ocp-Apim-Subscription-Key': predictionKey
}

slot_name = "Production"
start_batch_endpoint = f'{predictionEndpoint}luis/v3.0-preview/apps/{app_id}/slots/{slot_name}/evaluations'

start_response = requests.post(start_batch_endpoint, data=test_data_converted.encode('utf-8'), headers=headers)
start_response

<Response [400]>

In [139]:
start_response.text

'{"error":{"code":"BadArgument","message":"labeled utterances cannot be null or empty."}}'

In [95]:
operationId = json.loads(start_response.text)["operationId"]

KeyError: 'operationId'

In [None]:
status_endpoint = f'{predictionEndpoint}luis/v3.0-preview/apps/{app_id}/slots/{slot_name}/evaluations/{operationId}/status'

status_response = requests.get(status_endpoint, headers=headers)
status_response.text