In [3]:
import spacy
from spacy import displacy

# Load medium English model
nlp = spacy.load("en_core_web_md")

In [13]:

# Your e-commerce query
query = "case for iphone"

# Process the query
doc = nlp(query)

# Print dependency tree details
print(f"{'Token':<10} {'Dep':<10} {'Head':<10} {'POS':<10} {'Children'}")
print("-" * 50)
for token in doc:
    children = [child.text for child in token.children]
    print(f"{token.text:<10} {token.dep_:<10} {token.head.text:<10} {token.pos_:<10} {children}")

# OPTIONAL: Visualize dependency tree (works in Jupyter or web contexts)
displacy.serve(doc, style="dep")

Token      Dep        Head       POS        Children
--------------------------------------------------
case       ROOT       case       NOUN       ['for']
for        prep       case       ADP        ['iphone']
iphone     pobj       for        NOUN       []



Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


# POS Approach

In [15]:
import spacy

#nlp = spacy.load("en_core_web_sm")

# Define connectors with grammar-based rules
left_primary = {"with", "for", "under"}
right_primary = { "of", "to", "from", "about", "on", "by"}

def extract_info(text):
    doc = nlp(text)

    # Initialize outputs
    primary = {"main_noun": "", "prev_nouns": "", "adj": "", "adp": ""}
    secondary = {"main_noun": "", "prev_nouns": "", "adj": "", "adp": ""}

    connector_token = None

    # Step 1: Find the first relevant connector
    for token in doc:
        if token.text.lower() in left_primary.union(right_primary) and token.pos_ == "ADP":
            connector_token = token
            break

    if connector_token:
        # Process left side
        left_tokens = list(doc[:connector_token.i])
        left_main_noun, left_prev_nouns = "", ""
        for i in reversed(range(len(left_tokens))):
            token = left_tokens[i]
            if token.pos_ in {"NOUN", "PROPN","NUM"}:
                left_main_noun = token.text
                prev_nouns = []
                for j in reversed(range(i)):
                    if left_tokens[j].pos_ in {"NOUN", "PROPN","NUM"}:
                        prev_nouns.insert(0, left_tokens[j].text)
                    else:
                        break
                left_prev_nouns = " ".join(prev_nouns)
                break

        # Process right side
        right_tokens = list(doc[connector_token.i + 1:])
        right_main_noun, right_prev_nouns = "", ""
        for i, token in enumerate(right_tokens):
            if token.pos_ in {"NOUN", "PROPN","NUM"}:
                right_main_noun = token.text
                prev_nouns = []
                for j in reversed(range(i)):
                    if right_tokens[j].pos_ in {"NOUN", "PROPN","NUM"}:
                        prev_nouns.insert(0, right_tokens[j].text)
                    else:
                        break
                right_prev_nouns = " ".join(prev_nouns)
                break

        connector = connector_token.text.lower()

        # Assign primary and secondary based on connector
        if connector in left_primary:
            primary["main_noun"] = left_main_noun
            primary["prev_nouns"] = left_prev_nouns
            secondary["main_noun"] = right_main_noun
            secondary["prev_nouns"] = right_prev_nouns
        elif connector in right_primary:
            primary["main_noun"] = right_main_noun
            primary["prev_nouns"] = right_prev_nouns
            secondary["main_noun"] = left_main_noun
            secondary["prev_nouns"] = left_prev_nouns

        # Step 2: Find adjectives, adpositions, referring to nouns
        for token in doc:
            if token.head.text == primary["main_noun"]:
                if token.pos_ == "ADJ":
                    primary["adj"] = token.text
                if token.pos_ == "ADP":
                    primary["adp"] = token.text
            if token.head.text == secondary["main_noun"]:
                if token.pos_ == "ADJ":
                    secondary["adj"] = token.text
                if token.pos_ == "ADP":
                    secondary["adp"] = token.text

    return {
        "Primary main noun": primary["main_noun"],
        "Primary prev nouns": primary["prev_nouns"],
        "Adj ref Primary noun": primary["adj"],
        "ADP ref Primary noun": primary["adp"],
        "Secondary main noun": secondary["main_noun"],
        "Secondary prev nouns": secondary["prev_nouns"],
        "Adj ref Secondary noun": secondary["adj"],
        "ADP ref Secondary noun": secondary["adp"],
    }

# Test Example
text = "best s24 galaxy phone with 50 mp camera"
result = extract_info(text)
for key, value in result.items():
    print(f"{key}: {value if value else 'N/A'}")

Primary main noun: phone
Primary prev nouns: s24 galaxy
Adj ref Primary noun: best
ADP ref Primary noun: with
Secondary main noun: 50
Secondary prev nouns: N/A
Adj ref Secondary noun: N/A
ADP ref Secondary noun: N/A


In [25]:
import spacy

#nlp = spacy.load("en_core_web_sm")

# Define connectors with grammar-based rules
left_primary = {"with", "for", "under"}
right_primary = { "of", "to", "from", "about", "on", "by"}

noun_pos_tags = {"NOUN", "PROPN", "NUM"}

def extract_info(text):
    doc = nlp(text)

    primary = {"main_noun": "", "prev_nouns": "", "adj": "", "adp": ""}
    secondary = {"main_noun": "", "next_nouns": "", "adj": "", "adp": ""}

    connector_token = None

    # Find connector
    for token in doc:
        if token.text.lower() in left_primary.union(right_primary) and token.pos_ == "ADP":
            connector_token = token
            break

    if connector_token:
        left_tokens = list(doc[:connector_token.i])
        right_tokens = list(doc[connector_token.i + 1:])

        left_main_noun, left_prev_nouns = "", ""
        right_main_noun, right_next_nouns = "", ""

        # Find left main noun & previous consecutive nouns
        for i in reversed(range(len(left_tokens))):
            token = left_tokens[i]
            if token.pos_ in noun_pos_tags:
                left_main_noun = token.text
                prev_nouns = []
                for j in reversed(range(i)):
                    if left_tokens[j].pos_ in noun_pos_tags:
                        prev_nouns.insert(0, left_tokens[j].text)
                    else:
                        break
                left_prev_nouns = " ".join(prev_nouns)
                break

        # Find right main noun & next consecutive nouns
        for i, token in enumerate(right_tokens):
            if token.pos_ in noun_pos_tags:
                right_main_noun = token.text
                next_nouns = []
                for j in range(i + 1, len(right_tokens)):
                    if right_tokens[j].pos_ in noun_pos_tags:
                        next_nouns.append(right_tokens[j].text)
                    else:
                        break
                right_next_nouns = " ".join(next_nouns)
                break

        connector = connector_token.text.lower()

        if connector in left_primary:
            primary["main_noun"] = left_main_noun
            primary["prev_nouns"] = left_prev_nouns
            secondary["main_noun"] = right_main_noun
            secondary["next_nouns"] = right_next_nouns
        elif connector in right_primary:
            primary["main_noun"] = right_main_noun
            primary["prev_nouns"] = right_prev_nouns
            secondary["main_noun"] = left_main_noun
            secondary["next_nouns"] = left_prev_nouns

        # Find adjectives, adpositions, verbs referring to nouns
        for token in doc:
            if token.head.text == primary["main_noun"]:
                if token.pos_ == "ADJ":
                    primary["adj"] = token.text
                if token.pos_ == "ADP":
                    primary["adp"] = token.text

            if token.head.text == secondary["main_noun"]:
                if token.pos_ == "ADJ":
                    secondary["adj"] = token.text
                if token.pos_ == "ADP":
                    secondary["adp"] = token.text


    return {
        "Primary main noun": primary["main_noun"],
        "Primary prev nouns": primary["prev_nouns"],
        "Adj ref Primary noun": primary["adj"],
        "ADP ref Primary noun": primary["adp"],
        "Secondary main noun": secondary["main_noun"],
        "Secondary next nouns": secondary["next_nouns"],
        "Adj ref Secondary noun": secondary["adj"],
        "ADP ref Secondary noun": secondary["adp"],
    }

# Test Example
text = "cheap phone with charger for gaming"
result = extract_info(text)

# Pretty print
for key, value in result.items():
    print(f"{key}: {value if value else 'N/A'}")


Primary main noun: phone
Primary prev nouns: N/A
Adj ref Primary noun: cheap
ADP ref Primary noun: for
Secondary main noun: charger
Secondary next nouns: N/A
Adj ref Secondary noun: N/A
ADP ref Secondary noun: N/A


In [21]:
import random
import json

# Word banks
nouns = ["phone", "laptop", "charger", "headphones", "tablet", "watch", "camera", "speaker", "mic", "router", "printer", "monitor", "keyboard", "mouse", "projector", "powerbank", "case", "screen", "earbuds", "gamepad"]
adjs = ["cheap", "fast", "wireless", "affordable", "powerful", "compatible", "new", "slim", "durable", "portable", "lightweight", "stylish", "reliable", "advanced", "smart"]
nums = ["10000", "500", "65w", "256gb", "2tb", "50", "100", "2000", "300", "150"]
adps = ["with", "for", "under", "above", "beside", "without"]

# 50 sentence templates
templates = [
    "Give me {adj1} {noun1} with {adj2} {noun2}.",
    "I want {noun1} for {adj2} {noun2}.",
    "Find {adj1} {noun1} under {num}.",
    "Looking for {adj1} {noun1} with {adj2} {noun2}.",
    "Buy {adj1} {noun1} with {adj2} {noun2}.",
    "Get {noun1} for {adj2} {noun2}.",
    "Search {adj1} {noun1} under {num}.",
    "Order {adj1} {noun1} with {adj2} {noun2}.",
    "Cheap {noun1} for {adj2} {noun2}.",
    "Looking for {adj1} {noun1} above {num}.",
    "I need {adj1} {noun1} beside {adj2} {noun2}.",
    "Get {noun1} without {adj2} {noun2}.",
    "Find {adj1} {noun1} above {num}.",
    "Order {adj1} {noun1} without {adj2} {noun2}.",
    "New {noun1} with {adj2} {noun2}.",
    "Affordable {noun1} for {adj2} {noun2}.",
    "Looking for {adj1} {noun1} with {adj2} {noun2}.",
    "Buy {adj1} {noun1} for {adj2} {noun2}.",
    "Search for {adj1} {noun1} under {num}.",
    "Get {adj1} {noun1} above {num}.",
    "Order {adj1} {noun1} beside {adj2} {noun2}.",
    "Find {adj1} {noun1} without {adj2} {noun2}.",
    "Need {adj1} {noun1} with {adj2} {noun2}.",
    "Looking for {adj1} {noun1} for {adj2} {noun2}.",
    "Cheap {adj1} {noun1} under {num}.",
    "Buy {adj1} {noun1} with {adj2} {noun2}.",
    "Order {adj1} {noun1} for {adj2} {noun2}.",
    "I want {adj1} {noun1} with {adj2} {noun2}.",
    "Need {adj1} {noun1} under {num}.",
    "Searching for {adj1} {noun1} above {num}.",
    "Purchase {adj1} {noun1} without {adj2} {noun2}.",
    "Get {adj1} {noun1} with {adj2} {noun2}.",
    "Find {adj1} {noun1} beside {adj2} {noun2}.",
    "Affordable {adj1} {noun1} above {num}.",
    "New {adj1} {noun1} for {adj2} {noun2}.",
    "Order {adj1} {noun1} under {num}.",
    "Need {adj1} {noun1} with {adj2} {noun2}.",
    "Searching {adj1} {noun1} for {adj2} {noun2}.",
    "Get {adj1} {noun1} under {num}.",
    "Looking for {adj1} {noun1} without {adj2} {noun2}.",
    "Purchase {adj1} {noun1} for {adj2} {noun2}.",
    "I want {adj1} {noun1} above {num}.",
    "Find {adj1} {noun1} beside {adj2} {noun2}.",
    "Buy {adj1} {noun1} without {adj2} {noun2}.",
    "Affordable {adj1} {noun1} with {adj2} {noun2}.",
    "Order {adj1} {noun1} for {adj2} {noun2}.",
    "Looking for {adj1} {noun1} beside {adj2} {noun2}.",
    "Purchase {adj1} {noun1} above {num}.",
    "Need {adj1} {noun1} without {adj2} {noun2}.",
    "Cheap {adj1} {noun1} for {adj2} {noun2}."
]

test_dataset = []

for template in templates:
    adj1 = random.choice(adjs)
    adj2 = random.choice(adjs)
    noun1 = random.choice(nouns)
    noun2 = random.choice(nouns)
    num = random.choice(nums)

    sentence = template.format(adj1=adj1, adj2=adj2, noun1=noun1, noun2=noun2, num=num)

    # Basic rule-based expected output logic
    expected = {}

    if " with " in template and " for " not in template:
        expected = {
            "Primary main noun": noun1,
            "Primary prev nouns": "",
            "Adj ref Primary noun": adj1,
            "ADP ref Primary noun": "",
            "Verb ref Primary noun": "",
            "Secondary main noun": noun2,
            "Secondary next nouns": "",
            "Adj ref Secondary noun": adj2,
            "ADP ref Secondary noun": "",
            "Verb ref Secondary noun": ""
        }
    elif " for " in template:
        expected = {
            "Primary main noun": noun2,
            "Primary prev nouns": "",
            "Adj ref Primary noun": adj2,
            "ADP ref Primary noun": "",
            "Verb ref Primary noun": "",
            "Secondary main noun": noun1,
            "Secondary next nouns": "",
            "Adj ref Secondary noun": adj1,
            "ADP ref Secondary noun": "",
            "Verb ref Secondary noun": ""
        }
    elif " under " in template or " above " in template:
        adp = "under" if " under " in template else "above"
        expected = {
            "Primary main noun": noun1,
            "Primary prev nouns": "",
            "Adj ref Primary noun": adj1,
            "ADP ref Primary noun": adp,
            "Verb ref Primary noun": "",
            "Secondary main noun": num,
            "Secondary next nouns": "",
            "Adj ref Secondary noun": "",
            "ADP ref Secondary noun": "",
            "Verb ref Secondary noun": ""
        }
    else:
        expected = {
            "Primary main noun": noun1,
            "Primary prev nouns": "",
            "Adj ref Primary noun": adj1,
            "ADP ref Primary noun": "",
            "Verb ref Primary noun": "",
            "Secondary main noun": noun2,
            "Secondary next nouns": "",
            "Adj ref Secondary noun": adj2,
            "ADP ref Secondary noun": "",
            "Verb ref Secondary noun": ""
        }

    test_dataset.append({
        "text": sentence,
        "expected": expected
    })

# Save as JSON
with open("generated_ground_truth_dataset.json", "w") as f:
    json.dump(test_dataset, f, indent=4)

print("Generated 50 ground truth examples saved to 'generated_ground_truth_dataset.json'")


Generated 50 ground truth examples saved to 'generated_ground_truth_dataset.json'
