Setup the imports

In [1]:
import json
import collections
import dataclasses
from typing import *

In [2]:
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

import nltk
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords

In [3]:
import npdl
from npdl import activations
from npdl import layers
from npdl import objectives
from npdl import optimizers

## Load the data

In [4]:
with open('intents.json', 'r') as file:
    intents = json.load(file)

## Preprocess the data

In [5]:
Token = str


@dataclasses.dataclass
class Document:
    classs_: str
    tokens: Iterable[Token]

def tokenize(sentence: str,
             language: str = 'english') -> Iterable[Token]:
    stop_words = stopwords.words(language)
    return np.array([SnowballStemmer(language).stem(word.lower()) for word in nltk.word_tokenize(sentence, language)
                     if word not in stop_words and word.isalpha()])

Initialize the **vocabulary**, the **classes** and the **corpus**.

In [6]:
vocabulary: List[Token] = []
corpus: List[Document] = []
classes = []

In [7]:
for document in intents:
    class_ = document['class']
    classes.append(class_)
    for pattern in document['patterns']:
        tokens = tokenize(pattern)
        vocabulary.extend(tokens)
        corpus.append(Document(class_, tokens))

In [8]:
vocabulary = sorted(set(vocabulary))
classes = sorted(set(classes))