# Import des librairies

In [1]:
import json
import os
import random
import warnings
from pathlib import Path

import pandas as pd

from tqdm.notebook import tqdm_notebook as tqdm
import time

!pip install azure-core
from azure.core.credentials import AzureKeyCredential

!pip install azure-ai-language-conversations
from azure.ai.language.conversations import ConversationAnalysisClient
from azure.ai.language.conversations.authoring import ConversationAuthoringClient

!pip install python-dotenv
from dotenv import load_dotenv

Collecting plotly
  Downloading plotly-5.11.0-py2.py3-none-any.whl (15.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.3/15.3 MB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.1.0-py3-none-any.whl (23 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.11.0 tenacity-8.1.0
[0mCollecting pandas_profiling
  Downloading pandas_profiling-3.6.2-py2.py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.7/328.7 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<2.14,>=2.13.2
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Collecting htmlmin==0.1.12
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting phik<0.13,>=0.11.1
  Downloading phik-0.12.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (679 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Chargement des fichiers

In [2]:
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

load_dotenv(override=True)

AZURE_CLU_ENDPOINT = os.getenv("AZURE_CLU_ENDPOINT")
AZURE_CLU_KEY = os.getenv("AZURE_CLU_KEY")

AZURE_CLU_PROJECT_NAME = "FlyMeBot"
AZURE_CLU_API_VERSION = "2022-10-01-preview"

TRAIN_TEST_RATIO = 0.8

In [3]:
DATA_PATH = Path("/storage/P10/")
FRAMES_PATH = Path(DATA_PATH, "frames.json")

In [5]:
raw_data = pd.read_json(FRAMES_PATH)

assets = {
    "stringIndexType": "utf16CodeUnit",
    "projectKind": "Conversation",
    "intents": [
        {"category": "Book"},
        {"category": "Info"},
    ],
    "entities": [
        {"category": "or_city"},
        {"category": "dst_city"},
        {"category": "str_date"},
        {"category": "end_date"},
        {"category": "budget"},
    ],
    "utterances": [],
}

unique_utterances = []

for turn in tqdm(raw_data["turns"]):
    for frame in turn:
        if frame["author"] == "wizard" or frame["text"] in unique_utterances:
            continue

        unique_utterances.append(frame["text"])

        is_book = False
        entities = []

        for act in frame["labels"]["acts_without_refs"]:
            for arg in act["args"]:
                if arg["key"] == "intent" and arg["val"] == "book":
                    is_book = True

                if (
                    arg["key"] in [e["category"] for e in assets["entities"]]
                    and arg["val"] is not None
                    and frame["text"].find(arg["val"]) != -1
                ):
                    entity = {
                        "category": arg["key"],
                        "offset": frame["text"].index(arg["val"]),
                        "length": len(arg["val"]),
                    }
                    entities.append(entity)

        if len(entities) > 0:
            assets["utterances"].append(
                {
                    "text": frame["text"],
                    "language": "en-us",
                    "intent": "Book" if is_book else "Info",
                    "entities": entities,
                    "dataset": "Train"
                    if random.random() < TRAIN_TEST_RATIO
                    else "Test",
                }
            )

  0%|          | 0/1369 [00:00<?, ?it/s]

In [9]:
assets_light = {
 "projectKind": "Conversation",
 "intents": [{"category": "Book"}, {"category": "Info"}],
 "entities": [{"category": "or_city"},
  {"category": "dst_city"},
  {"category": "str_date"},
  {"category": "end_date"},
  {"category": "budget"}],
 "utterances": [{"text": "I'd like to book a trip to Atlantis from Caprica on Saturday, August 13, 2016 for 8 adults. I have a tight budget of 1700.",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "dst_city", "offset": 27, "length": 8},
    {"category": "or_city", "offset": 41, "length": 7},
    {"category": "str_date", "offset": 52, "length": 25},
    {"category": "budget", "offset": 117, "length": 4}],
   "dataset": "Train"},
  {"text": "Yes, how about going to Neverland from Caprica on August 13, 2016 for 5 adults. For this trip, my budget would be 1900.",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 24, "length": 9},
    {"category": "budget", "offset": 114, "length": 4},
    {"category": "or_city", "offset": 39, "length": 7},
    {"category": "str_date", "offset": 50, "length": 15}],
   "dataset": "Train"},
  {"text": "I have no flexibility for dates... but I can leave from Atlantis rather than Caprica. How about that?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 56, "length": 8},
    {"category": "or_city", "offset": 77, "length": 7}],
   "dataset": "Train"},
  {"text": "Hello, I am looking to book a vacation from Gotham City to Mos Eisley for $2100.",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "dst_city", "offset": 59, "length": 10},
    {"category": "or_city", "offset": 44, "length": 11},
    {"category": "budget", "offset": 75, "length": 4}],
   "dataset": "Test"},
  {"text": "What about a trip from Gotham City to Neverland for the same budget?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 38, "length": 9},
    {"category": "or_city", "offset": 23, "length": 11}],
   "dataset": "Train"},
  {"text": "Would any packages to Mos Eisley be available if I increase my budget to $2500?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 22, "length": 10},
    {"category": "budget", "offset": 73, "length": 5}],
   "dataset": "Train"},
  {"text": "You know what, I'd like to try and visit Neverland",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 41, "length": 9}],
   "dataset": "Train"},
  {"text": "Do you have any trips from Gotham City to Kobe for my original budget of $2100?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 42, "length": 4},
    {"category": "budget", "offset": 73, "length": 5},
    {"category": "or_city", "offset": 27, "length": 11}],
   "dataset": "Train"},
  {"text": "No, that's too far for me. I need a flight that leaves from Birmingham.",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 60, "length": 10}],
   "dataset": "Train"},
  {"text": "How many days would I be in Kobe?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 28, "length": 4}],
   "dataset": "Train"},
  {"text": "Hello there i am looking to go on a vacation with my family to Gotham City, can you help me?",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "dst_city", "offset": 63, "length": 11}],
   "dataset": "Train"},
  {"text": "yes i do, it is around $2200",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "budget", "offset": 23, "length": 5}],
   "dataset": "Train"},
  {"text": "We are from Neverland",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 12, "length": 9}],
   "dataset": "Train"},
  {"text": "we can depart from Toronto",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 19, "length": 7}],
   "dataset": "Train"},
  {"text": "hmm what options would i have out of Toronto?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 37, "length": 7}],
   "dataset": "Train"},
  {"text": "Hi I'd like to go to Caprica from Busan, between Sunday August 21, 2016 and Wednesday August 31, 2016",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "dst_city", "offset": 21, "length": 7},
    {"category": "or_city", "offset": 34, "length": 5},
    {"category": "str_date", "offset": 49, "length": 22},
    {"category": "end_date", "offset": 76, "length": 25}],
   "dataset": "Test"},
  {"text": "Do you have anything for San Antonio as a destination?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 25, "length": 11}],
   "dataset": "Train"},
  {"text": "That sounds great. 1:00 am return on Sunday August 28th is very early in the morning... ",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "end_date", "offset": 37, "length": 18}],
   "dataset": "Train"},
  {"text": "Hello, I am looking to book a trip for 2 adults and 6 children for $21,300 or less. We are departing from Kochi for Denver.",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "budget", "offset": 67, "length": 7},
    {"category": "or_city", "offset": 106, "length": 5},
    {"category": "dst_city", "offset": 116, "length": 6}],
   "dataset": "Train"},
  {"text": "I do not have any dates in mind. I would like to spend as much time in Denver as my budget will allow.",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 71, "length": 6}],
   "dataset": "Train"},
  {"text": "Hey, i Want to go to St. Louis on the 17th of August",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "dst_city", "offset": 21, "length": 9},
    {"category": "str_date", "offset": 38, "length": 14}],
   "dataset": "Train"},
  {"text": "I need to back by the 31st",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "end_date", "offset": 22, "length": 4}],
   "dataset": "Train"},
  {"text": "I’m from Calgary",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 9, "length": 7}],
   "dataset": "Train"},
  {"text": "I'm looking for a trip to Gotham City leaving from Kakariko Village on Saturday, August 13, 2016. 3 adults for no more than $2400 USD.",
   "language": "en-us",
   "intent": "Book",
   "entities": [{"category": "dst_city", "offset": 26, "length": 11},
    {"category": "or_city", "offset": 51, "length": 16},
    {"category": "str_date", "offset": 71, "length": 25},
    {"category": "budget", "offset": 124, "length": 9}],
   "dataset": "Train"},
  {"text": "I'd like to adjust the departure city to Caprica.",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "or_city", "offset": 41, "length": 7}],
   "dataset": "Train"},
  {"text": "The dates cannot be changed. How about going to Theed with 14 adults, leaving from Kakariko Village, on a budget of $2900?",
   "language": "en-us",
   "intent": "Info",
   "entities": [{"category": "dst_city", "offset": 48, "length": 5},
    {"category": "or_city", "offset": 83, "length": 16},
    {"category": "budget", "offset": 116, "length": 5}],
   "dataset": "Test"}
   ]
}

In [17]:
client = ConversationAuthoringClient(AZURE_LANGUAGE_ENDPOINT, AzureKeyCredential(AZURE_LANGUAGE_KEY))
poller = client.begin_import_project(
    project_name=AZURE_LANGUAGE_PROJECT_NAME,
    project={
        "stringIndexType": "Utf16CodeUnit",
        "assets": assets_light,
        "metadata": {
            "projectKind": "Conversation",
            "settings": {"confidenceThreshold": 0.7},
            "projectName": AZURE_LANGUAGE_PROJECT_NAME,
            "multilingual": True,
            "description": "Language understanding for FlyMe Bot",
            "language": "en-us",
        },
        "projectFileVersion": AZURE_LANGUAGE_API_VERSION,
    },
)
response = poller.result()
print(response)

{'jobId': '4735f0df-beec-4504-80fc-f882b67dafbf_638087328000000000', 'createdDateTime': '2023-01-08T21:14:01Z', 'lastUpdatedDateTime': '2023-01-08T21:14:03Z', 'expirationDateTime': '2023-01-15T21:14:01Z', 'status': 'succeeded'}


In [20]:
poller2 = client.begin_delete_project(
    project_name=AZURE_LANGUAGE_PROJECT_NAME,
)

response2 = poller2.result()
print(response2)

{'jobId': '9ab9ce4f-9a8b-4d2a-9c0f-f21cb907ef64_638087328000000000', 'createdDateTime': '2023-01-08T22:12:07Z', 'lastUpdatedDateTime': '2023-01-08T22:12:07Z', 'expirationDateTime': '2023-01-15T22:12:07Z', 'status': 'succeeded'}


In [21]:
poller = client.begin_import_project(
    project_name=AZURE_LANGUAGE_PROJECT_NAME,
    project={
        "stringIndexType": "Utf16CodeUnit",
        "assets": assets,
        "metadata": {
            "projectKind": "Conversation",
            "settings": {"confidenceThreshold": 0.7},
            "projectName": AZURE_LANGUAGE_PROJECT_NAME,
            "multilingual": True,
            "description": "Language understanding for FlyMe Bot",
            "language": "en-us",
        },
        "projectFileVersion": AZURE_LANGUAGE_API_VERSION,
    },
)
response = poller.result()
print(response)

{'jobId': '01c8f837-1189-4509-8dd4-8938654dd7bf_638087328000000000', 'createdDateTime': '2023-01-08T22:14:00Z', 'lastUpdatedDateTime': '2023-01-08T22:14:14Z', 'expirationDateTime': '2023-01-15T22:14:00Z', 'status': 'succeeded'}


In [24]:
poller3 = client.begin_train(
    project_name=AZURE_LANGUAGE_PROJECT_NAME,
    configuration = {
        "modelLabel": "ProdModel",
        "trainingMode": "standard",
        "evaluationOptions": {
            "kind": "manual",
        },
    }
    
)
response3 = poller3.result()
print(response3)

{'result': {'modelLabel': 'ProdModel', 'trainingConfigVersion': '2022-09-01', 'trainingMode': 'standard', 'trainingStatus': {'percentComplete': 100, 'startDateTime': '2023-01-09T00:03:49.4845885Z', 'endDateTime': '2023-01-09T00:07:04.6156276Z', 'status': 'succeeded'}, 'evaluationStatus': {'percentComplete': 100, 'startDateTime': '2023-01-09T00:07:04.6408021Z', 'endDateTime': '2023-01-09T00:08:20.4031794Z', 'status': 'succeeded'}}, 'jobId': 'f3c6843f-c419-41fd-8148-2d460ee11004_638088192000000000', 'createdDateTime': '2023-01-09T00:03:47Z', 'lastUpdatedDateTime': '2023-01-09T00:08:25Z', 'expirationDateTime': '2023-01-16T00:03:47Z', 'status': 'succeeded'}


In [26]:
poller4 = client.begin_deploy_project(
    project_name=AZURE_LANGUAGE_PROJECT_NAME,
    deployment_name='apiprod',
    deployment = {
        "trainedModelLabel": "ProdModel",  # Represents the trained model label. Required.
    }
    
)
response4 = poller4.result()
print(response4)

{'deploymentName': 'apiprod', 'modelId': 'ProdModel-20230109T120825-f33589a5934c4c18a2b4fb034200f5df', 'lastTrainedDateTime': '2023-01-09T00:08:25.3696069Z', 'lastDeployedDateTime': '2023-01-09T00:40:33Z', 'deploymentExpirationDate': '2024-02-28', 'modelTrainingConfigVersion': '2022-09-01'}


In [37]:
client2 = ConversationAnalysisClient(AZURE_LANGUAGE_ENDPOINT, AzureKeyCredential(AZURE_LANGUAGE_KEY))
with client2:
    query = "I want to book a ticket from Lyon to New York City with a budget of 500 euros maximum."
    result = client2.analyze_conversation(
        task={
            "kind": "Conversation",
            "analysisInput": {
                "conversationItem": {
                    "participantId": "testing",
                    "id": "testing",
                    "modality": "text",
                    "language": "en-us",
                    "text": query
                },
            },
            "parameters": {
                "projectName": AZURE_LANGUAGE_PROJECT_NAME,
                "deploymentName": 'apiprod',
                "verbose": True
            }
        }
    )
    print(json.dumps(result, indent=4))
        

{
    "kind": "ConversationResult",
    "result": {
        "query": "I want to book a ticket from Lyon to New York City with a budget of 500 euros maximum.",
        "prediction": {
            "topIntent": "Book",
            "projectKind": "Conversation",
            "intents": [
                {
                    "category": "Book",
                    "confidenceScore": 0.9183872
                },
                {
                    "category": "Info",
                    "confidenceScore": 0.91747797
                },
                {
                    "category": "None",
                    "confidenceScore": 0
                }
            ],
            "entities": [
                {
                    "category": "or_city",
                    "text": "Lyon",
                    "offset": 29,
                    "length": 4,
                    "confidenceScore": 1
                },
                {
                    "category": "dst_city",
                   