## Entity Recognition for Ecommerce Chatbot

In [1]:
feature_dict = {
    "brands": [
        "redmi", "samsung", "casio", "apple"
    ],
    "category": [
        "smartphone", "mobile", "phone", "watch", "calculator", "laptop", "television", "tv", "computer", "headphone"
    ]
}

In [2]:
"ORD123".isdigit()

False

## Features

In [3]:
def get_word_shape(word):
    shape = ""
    last_char_type = ""
    
    for char in word:
        if char.isupper():
            char_type = "X"
        elif char.islower():
            char_type = "x"
        elif char.isdigit():
            char_type = "d"
        else:
            char_type = char
        if char_type != last_char_type:
            shape += char_type
            last_char_type = char_type
            
    return shape

In [6]:
get_word_shape("ORD123"), get_word_shape("ORD"), get_word_shape("ORD123abc")

('Xd', 'X', 'Xdx')

In [8]:
get_word_shape("oO11o1o")

'xXdxdx'

In [10]:
import re

def get_ecommerce_features(sentence: list[str] , index: int) -> dict:
    """
    Creates a feature dictionary for a given word in a sentence.
    
    Args:
    sentence: list of words in a sentence
    index: index of the word in the sentence
    Returns:
    dict: features
    """
    features = dict()
    word = sentence[index]
    
    features.update(
        {
            "has_number": bool(re.search(r"\d", word)),
            "is_brand": word.lower() in feature_dict["brands"],
            "is_category": word.lower() in feature_dict["category"],
            "word_shape": get_word_shape(word)
        }
    )
    
    if index < len(sentence) - 1:
        word_next = sentence[index + 1]
        features.update({
            "next_word.lower": word_next.lower(),
            "next_word.istitle": word_next.istitle(),
            "next_word.isupper": word_next.isupper(),
            "next_word.isdigit": word_next.isdigit()
        })
    else:
        features["EOS"] = True
    
    if index > 0:
        word_prev = sentence[index - 1]
        features.update({
            "prev_word.lower": word_prev.lower(),
            "prev_word.istitle": word_prev.istitle(),
            "prev_word.isupper": word_prev.isupper(),
            "prev_word.isdigit": word_prev.isdigit()
        })
    else:
        features["BOS"] = True
        
    return features