# NLP test

In [30]:
import spacy

# Load spaCy's small English model
nlp = spacy.load("en_core_web_sm")

# Sample user query
# text = "What is the tire wear for driver Hamilton on lap 50?"
text = "Who had the highest tire wear in lap 12, Max or Charles?"
# text = "Which team is better in the championship, Red Bull or Ferrari?"

# Process the text
doc = nlp(text)

# Print recognized entities and their labels
print("Entities detected:")
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

# Simple keyword-based intent identification
if "tire wear" in text.lower():
    intent = "analyze_tire_wear"
    print(f"Identified intent: {intent}")

# Output explanation
print("\nEntity Explanation:")
for ent in doc.ents:
    if ent.label_ == "PERSON":
        print(f"Driver identified: {ent.text}")
    elif ent.label_ == "CARDINAL" or ent.label_ == "ORDINAL":
        print(f"Lap number identified: {ent.text}")


Entities detected:
12 - CARDINAL
Max - PERSON
Charles - PERSON
Identified intent: analyze_tire_wear

Entity Explanation:
Lap number identified: 12
Driver identified: Max
Driver identified: Charles


To create a custom rule that recognizes specific terms (e.g., “lap” and specific drivers), you can use EntityRuler:

In [24]:
from spacy.pipeline import EntityRuler

# Create an EntityRuler and add it before the NER component in the pipeline
ruler = nlp.add_pipe("entity_ruler", before="ner")

# Define custom patterns for the EntityRuler
patterns = [
    {"label": "DRIVER", "pattern": [{"LOWER": "hamilton"}]},
    {"label": "LAP", "pattern": [{"LOWER": "lap"}]},
    {"label": "TEAM", "pattern": [{"LOWER": "red"}, {"LOWER": "bull"}, {"LOWER": "racing"}]},
]

patterns.extend([
    {"label": "TEAM", "pattern": [{"LOWER": "red"}, {"LOWER": "bull"}, {"LOWER": "racing"}]},
    {"label": "TEAM", "pattern": [{"LOWER": "red"}, {"LOWER": "bull"}]},  # Subset pattern
    {"label": "TEAM", "pattern": [{"LOWER": "mercedes"}, {"LOWER": "amg"}, {"LOWER": "petronas"}]},
    {"label": "TEAM", "pattern": [{"LOWER": "mercedes"}]}  # Subset pattern
])

# Add patterns to the ruler
ruler.add_patterns(patterns)


In [25]:
# Sample text to test
doc = nlp("Show me the tire wear for driver Hamilton on lap 50.")

# Display detected entities
print("Entities detected:")
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

# Expected Output:
# Hamilton - DRIVER
# lap - LAP
# 50 - CARDINAL


Entities detected:
Hamilton - DRIVER
lap - LAP
50 - DATE


Add Red Bull Racing as a team

In [28]:
# Test the custom rules
text = "Which team is better in the championship, Red Bull Racing or Mercedes?"
doc = nlp(text)
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

Red Bull Racing - TEAM
Mercedes - TEAM
