In [1]:
from rasa_nlu.model import Interpreter
import json

In [2]:
# nlu_config = """
# language: fr
# pipeline: tensorflow_embedding
# """

nlu_config = """
language: fr
pipeline:
- name: "nlp_spacy"
- name: "tokenizer_spacy"
- name: "intent_featurizer_spacy"
- name: "intent_entity_featurizer_regex"
- name: "ner_crf"
- name: "ner_synonyms"
- name: "intent_classifier_sklearn"
"""

# nlu_config = """
# language: fr
# pipeline: "spacy_sklearn"
# """

%store nlu_config > nlu_config.yml

Writing 'nlu_config' (str) to file 'nlu_config.yml'.


In [3]:
nlu_md = """
## intent:hello
- salut
- bonjour
- hey
- hello
- coucou
- yo

## intent:search_dataset
- statistiques de [la ville de paris](organization)
- je cherche [les données](object) du [ministère de l'intérieur](organization)
- j'aimerai connaitre la [qualité de l'air](topic) à Paris
- Je voudrai avoir [les chiffres](object) de la [population française](topic)
- quel est [mon numéro](object) [RNA](topic) ?
- poule 2010
- je veux des données
- [vigicrue](topic) 2018
- base [SIRENE](topic)

## intent:confirm
- oui
- ouais
- ouaip
- ui
- hum oui
- hmm ui
- ok
- c'est ça
- yes

## intent:deny
- non
- pas du tout
- nope
- oui mais non
- hm non
- hum non
- no

## intent:thankyou
- merci
- c'est chouette
- cool !
- super !

## intent:bye
- au revoir
- à plus
- bye
- ciao
- bonne nuit

## lookup:organization
organisations.txt

## lookup:topic
datasets.txt

## regex:year
- [0-9]{4}
"""
%store nlu_md > nlu.md

Writing 'nlu_md' (str) to file 'nlu.md'.


## importation des noms d'organisations

In [4]:
import pandas as pd

In [5]:
df1 = pd.read_csv('../pilotage/data/organizations.csv', sep=";")

In [6]:
names = [ x for x in list(df1['name'].astype(str).values) if x != 'nan' ]
accronyms = [ x for x in list(df1['acronym'].astype(str).values) if x != 'nan' ]

txt = "\n".join(names + accronyms)
%store txt > organisations.txt

Writing 'txt' (str) to file 'organisations.txt'.


In [7]:
df2 = pd.read_csv('../pilotage/data/datasets.csv', sep=";")

  interactivity=interactivity, compiler=compiler, result=result)


In [8]:
df2.head()

names = [ x for x in list(df2['title'].astype(str).values) if x != 'nan' ]
accronyms = [ x for x in list(df2['acronym'].astype(str).values) if x != 'nan' ]

txt = "\n".join(names + accronyms)
%store txt > datasets.txt

Writing 'txt' (str) to file 'datasets.txt'.


## Entrainement

In [9]:
!python -m rasa_nlu.train -c nlu_config.yml --data nlu.md -o models --fixed_model_name nlu --project current --verbose

2019-01-31 12:13:51 [1;30mINFO    [0m [34mrasa_nlu.utils.spacy_utils[0m  - Trying to load spacy model with name 'fr'
2019-01-31 12:13:58 [1;30mINFO    [0m [34mrasa_nlu.components[0m  - Added 'nlp_spacy' to component cache. Key 'nlp_spacy-fr'.
2019-01-31 12:13:58 [1;30mINFO    [0m [34mrasa_nlu.training_data.loading[0m  - Training data format of nlu.md is md
2019-01-31 12:13:58 [1;30mINFO    [0m [34mrasa_nlu.training_data.training_data[0m  - Training data stats: 
	- intent examples: 40 (6 distinct intents)
	- Found intents: 'confirm', 'thankyou', 'hello', 'deny', 'bye', 'search_dataset'
	- entity examples: 7 (3 distinct entities)
	- found entities: 'topic', 'organization', 'object'

2019-01-31 12:13:58 [1;30mINFO    [0m [34mrasa_nlu.model[0m  - Starting to train component nlp_spacy
2019-01-31 12:13:59 [1;30mINFO    [0m [34mrasa_nlu.model[0m  - Finished training component.
2019-01-31 12:13:59 [1;30mINFO    [0m [34mrasa_nlu.model[0m  - Starting to train componen

In [10]:
interpreter = Interpreter.load("./models/current/nlu")

In [11]:
message = "Pays de Morlaix 2017"
result = interpreter.parse(message)
print(json.dumps(result, indent=2))

{
  "intent": {
    "name": "search_dataset",
    "confidence": 0.37504581892005423
  },
  "entities": [],
  "intent_ranking": [
    {
      "name": "search_dataset",
      "confidence": 0.37504581892005423
    },
    {
      "name": "deny",
      "confidence": 0.16469573942343668
    },
    {
      "name": "confirm",
      "confidence": 0.1479097845543902
    },
    {
      "name": "hello",
      "confidence": 0.1188168084682127
    },
    {
      "name": "thankyou",
      "confidence": 0.1121113661456479
    },
    {
      "name": "bye",
      "confidence": 0.0814204824882585
    }
  ],
  "text": "Pays de Morlaix 2017"
}


In [12]:
message = "oui"
result = interpreter.parse(message)
print(json.dumps(result, indent=2))

{
  "intent": {
    "name": "confirm",
    "confidence": 0.28726857647984166
  },
  "entities": [],
  "intent_ranking": [
    {
      "name": "confirm",
      "confidence": 0.28726857647984166
    },
    {
      "name": "deny",
      "confidence": 0.22217696580285237
    },
    {
      "name": "hello",
      "confidence": 0.20043180588287685
    },
    {
      "name": "bye",
      "confidence": 0.1288506139988975
    },
    {
      "name": "thankyou",
      "confidence": 0.10419230006202315
    },
    {
      "name": "search_dataset",
      "confidence": 0.05707973777350827
    }
  ],
  "text": "oui"
}


In [13]:
message = "non"
result = interpreter.parse(message)
print(json.dumps(result, indent=2))

{
  "intent": {
    "name": "deny",
    "confidence": 0.2639537437459718
  },
  "entities": [],
  "intent_ranking": [
    {
      "name": "deny",
      "confidence": 0.2639537437459718
    },
    {
      "name": "confirm",
      "confidence": 0.23351412650327888
    },
    {
      "name": "hello",
      "confidence": 0.14857869023961762
    },
    {
      "name": "bye",
      "confidence": 0.13550775547498314
    },
    {
      "name": "thankyou",
      "confidence": 0.11207979421263056
    },
    {
      "name": "search_dataset",
      "confidence": 0.10636588982351801
    }
  ],
  "text": "non"
}
