In [4]:
import spacy
from spacy.matcher import Matcher

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Define the queries
queries = [
    "Show me watches under Rs. 500.",
    "Can you list watches between Rs. 1000 and Rs. 2000?",
    "Show me all Samsung watches available.",
    "Which watches do you have from Jianuo?",
    "List watches sold by New-Gen.",
    "Show me the top-rated sellers who sell watches.",
    "Show me watches from sellers with more than 90% positive ratings.",
    "Which watches are sold by highly rated sellers?",
    "Can you find watches that always ship on time?",
    "I'm looking for sports watches priced between Rs. 1500 and Rs. 2500, sold by top-rated sellers.",
    "What are the latest smartwatches available under Rs. 5000?",
    "List all luxury watches.",
    "Help me find a watch for a gift under Rs. 2000."
]

# Initialize the Matcher
matcher = Matcher(nlp.vocab)

# Define more comprehensive patterns
price_patterns = [
    {"LOWER": "under", "OP": "?"},
    {"LOWER": "rs.", "OP": "?"},
    {"LIKE_NUM": True, "OP": "+"},
    {"TEXT": {"REGEX": "^(and|to|or)$"}, "OP": "*"},
    {"LOWER": "rs.", "OP": "?"},
    {"LIKE_NUM": True, "OP": "*"},
]

brand_patterns = [
    {"LOWER": "samsung"},
    {"LOWER": "jianuo"}
]

seller_patterns = [
    {"LOWER": "sold"},
    {"LOWER": "by"},
    {"IS_ASCII": True}
]

rating_patterns = [
    {"TEXT": {"REGEX": "^[0-9]+%$"}},
    {"LOWER": "positive", "OP": "?"},
    {"LOWER": "ratings"}
]

product_patterns = [
    {"LOWER": {"IN": ["watches", "watch", "smartwatches", "luxury watches", "sports watches"]}}
]

# Add patterns to matcher
matcher.add("PRICE", [price_patterns])
matcher.add("BRAND", [brand_patterns])
matcher.add("SELLER", [seller_patterns])
matcher.add("RATING", [rating_patterns])
matcher.add("PRODUCT", [product_patterns])

# Function to analyze queries
def analyze_queries(queries):
    for query in queries:
        doc = nlp(query)
        matches = matcher(doc)
        results = {}
        for match_id, start, end in matches:
            rule_id = nlp.vocab.strings[match_id]
            span = doc[start:end].text
            if rule_id in results:
                results[rule_id].append(span)
            else:
                results[rule_id] = [span]
        print(f"Query: '{query}'")
        for key, value in results.items():
            print(f"  {key}: {value}")
        print()

# Analyze the queries
analyze_queries(queries)


Query: 'Show me watches under Rs. 500.'
  PRODUCT: ['watches']
  PRICE: ['500']

Query: 'Can you list watches between Rs. 1000 and Rs. 2000?'
  PRODUCT: ['watches']
  PRICE: ['1000', '1000 and', '2000']

Query: 'Show me all Samsung watches available.'
  PRODUCT: ['watches']

Query: 'Which watches do you have from Jianuo?'
  PRODUCT: ['watches']

Query: 'List watches sold by New-Gen.'
  PRODUCT: ['watches']
  SELLER: ['sold by New']

Query: 'Show me the top-rated sellers who sell watches.'
  PRODUCT: ['watches']

Query: 'Show me watches from sellers with more than 90% positive ratings.'
  PRODUCT: ['watches']
  PRICE: ['90']

Query: 'Which watches are sold by highly rated sellers?'
  PRODUCT: ['watches']
  SELLER: ['sold by highly']

Query: 'Can you find watches that always ship on time?'
  PRODUCT: ['watches']

Query: 'I'm looking for sports watches priced between Rs. 1500 and Rs. 2500, sold by top-rated sellers.'
  PRODUCT: ['watches']
  PRICE: ['1500', '1500 and', '2500']
  SELLER: [

In [5]:
import spacy

# Load the SpaCy model
nlp = spacy.load("en_core_web_sm")

# Sample queries
queries = [
    "Show me watches under Rs. 500.",
    "Can you list watches between Rs. 1000 and Rs. 2000?",
    "Show me all Samsung watches available.",
    "Which watches do you have from Jianuo?",
    "List watches sold by New-Gen.",
    "Show me the top-rated sellers who sell watches.",
    "Show me watches from sellers with more than 90% positive ratings.",
    "Which watches are sold by highly rated sellers?",
    "Can you find watches that always ship on time?",
    "I'm looking for sports watches priced between Rs. 1500 and Rs. 2500, sold by top-rated sellers.",
    "What are the latest smartwatches available under Rs. 5000?",
    "List all luxury watches.",
    "Help me find a watch for a gift under Rs. 2000."
]

def analyze_queries(queries):
    for query in queries:
        doc = nlp(query)
        subjects = []
        limitations = []

        # Check for product names and categories
        for token in doc:
            # If token is a noun, it could be a subject (product)
            if token.pos_ in ['NOUN', 'PROPN'] and 'watch' in token.lemma_:
                subjects.append(token.text)
            
            # If token is a numeral or related to numbers, it's part of a limitation
            if token.dep_ in ['nummod', 'appos'] or token.head.lemma_ in ['between', 'under', 'over']:
                limitations.append(token.text_with_ws + token.head.text_with_ws.strip())
            
            # Check for named entities for brands and prices
            if token.ent_type_ in ['ORG', 'MONEY']:
                limitations.append(token.text)

        # Print the results
        print(f"Query: '{query}'")
        print(f"  Subjects: {subjects}")
        print(f"  Limitations: {limitations}")
        print()

# Analyze the queries
analyze_queries(queries)


Query: 'Show me watches under Rs. 500.'
  Subjects: ['watches']
  Limitations: ['Rsunder', '. under', '500.']

Query: 'Can you list watches between Rs. 1000 and Rs. 2000?'
  Subjects: ['watches']
  Limitations: ['Rsbetween', '. between', '1000 .', '2000.']

Query: 'Show me all Samsung watches available.'
  Subjects: ['watches']
  Limitations: ['Samsung']

Query: 'Which watches do you have from Jianuo?'
  Subjects: []
  Limitations: []

Query: 'List watches sold by New-Gen.'
  Subjects: ['watches']
  Limitations: ['New', '-', 'Gen.']

Query: 'Show me the top-rated sellers who sell watches.'
  Subjects: ['watches']
  Limitations: []

Query: 'Show me watches from sellers with more than 90% positive ratings.'
  Subjects: []
  Limitations: ['90%']

Query: 'Which watches are sold by highly rated sellers?'
  Subjects: ['watches']
  Limitations: []

Query: 'Can you find watches that always ship on time?'
  Subjects: ['watches']
  Limitations: []

Query: 'I'm looking for sports watches priced b