<a href="https://colab.research.google.com/github/nicolejulietta/NER_Customer_Support/blob/main/Project_Phase1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install spacy pandas PyPDF2
!python -m spacy download en_core_web_sm #Helps recognize common entities


In [None]:
import spacy
import random
from spacy.training.example import Example


In [None]:
# Load Spacy's basic English model
nlp = spacy.load("en_core_web_sm")


In [None]:
LABELS = [
    #Customer Support Entities
    "CUSTOMER_NAME", "SUPPORT_AGENT", "PRODUCT_NAME", "ORDER_NUMBER",
    "TICKET_NUMBER", "ISSUE_DESCRIPTION", "CUSTOMER_FEEDBACK",
    "SERVICE_REQUEST", "PRODUCT_MODEL", "SUPPORT_CHANNEL", "LOCATION",
    "COMPLAINT_TYPE", "TICKET_STATUS", "TIME_DATE", "ESCALATION_REQUEST",
    "REFUND_REQUEST", "SHIPPING_INFO", "DISCOUNT_PROMO",
    "CUSTOMER_SATISFACTION", "SURVEY_REQUEST",

    # Customer Service Entities
    "COMPANY_NAME", "PRODUCT_SERVICE_NAME", "SLA_TERMS", "CUSTOMER_QUERY",
    "SERVICE_TYPE", "PAYMENT_ISSUE", "SHIPPING_DELIVERY_INFO",
    "RETURN_REQUEST", "FEEDBACK_REVIEW", "DISCOUNT_CODE"
]

print("All 30 entity labels are set up!")


In [None]:
TRAIN_DATA = [
    #Customer Support Entities
    ("Marissa Velez contacted support about a missing order.",
     {"entities": [(0, 14, "CUSTOMER_NAME")]}),

    ("Support agent Juan Rodríguez resolved the issue quickly.",
     {"entities": [(14, 28, "SUPPORT_AGENT")]}),

    ("The customer requested a refund for their Sony PlayStation 5.",
     {"entities": [(40, 59, "PRODUCT_NAME")]}),

    ("Order #B78234 has been shipped and will arrive soon.",
     {"entities": [(6, 13, "ORDER_NUMBER")]}),

    ("Ticket #R90765 is still open and awaiting an agent’s response.",
     {"entities": [(7, 13, "TICKET_NUMBER")]}),

    ("The issue described was 'connectivity problems on Wi-Fi'.",
     {"entities": [(27, 57, "ISSUE_DESCRIPTION")]}),

    ("Customer feedback: 'The service was terrible, very frustrating!'
     , {"entities": [(19, 60, "CUSTOMER_FEEDBACK")]}),

    ("The user submitted a warranty claim for their broken smartphone.",
     {"entities": [(21, 36, "SERVICE_REQUEST")]}),

    ("They need help with their new Google Pixel 8 Pro model.",
     {"entities": [(30, 50, "PRODUCT_MODEL")]}),

    ("They contacted support via phone support for a technical issue.",
     {"entities": [(29, 42, "SUPPORT_CHANNEL")]}),

    ("The issue was reported from their address in São Paulo, Brazil.",
     {"entities": [(46, 64, "LOCATION")]}),

    ("Customer complaint: 'I received the wrong color for my sneakers!'
     , {"entities": [(19, 58, "COMPLAINT_TYPE")]}),

    ("The ticket status has been changed to 'pending review'.",
     {"entities": [(26, 41, "TICKET_STATUS")]}),

    ("The last update on this case was on February 19, 2025.",
     {"entities": [(36, 51, "TIME_DATE")]}),

    ("The customer asked to escalate the issue to the regional manager.",
     {"entities": [(34, 58, "ESCALATION_REQUEST")]}),

    ("A refund request was submitted for the defective washing machine.",
     {"entities": [(2, 16, "REFUND_REQUEST")]}),

    ("Shipping information shows that the package is 'delayed in transit'.",
     {"entities": [(26, 43, "SHIPPING_INFO")]}),

    ("The customer used a 30% discount promo during their purchase.",
     {"entities": [(27, 48, "DISCOUNT_PROMO")]}),

    ("Customer satisfaction ratings indicate a 2.5-star average.",
     {"entities": [(27, 44, "CUSTOMER_SATISFACTION")]}),

    ("The survey request asked: 'Would you recommend our service?'
     , {"entities": [(4, 18, "SURVEY_REQUEST")]}),

    # Customer Service Entities
    ("Toyota has been known for excellent customer service.",
     {"entities": [(0, 6, "COMPANY_NAME")]}),

    ("Customers often complain about connectivity issues with Verizon 5G.",
     {"entities": [(44, 53, "PRODUCT_SERVICE_NAME")]}),

    ("The service level agreement guarantees a resolution within 72 hours.",
     {"entities": [(31, 60, "SLA_TERMS")]}),

    ("Customer query: 'How do I change my billing address?'
     , {"entities": [(0, 14, "CUSTOMER_QUERY")]}),

    ("They reached out to WhatsApp support to track their missing package.",
     {"entities": [(23, 40, "SERVICE_TYPE")]}),

    ("A failed payment issue occurred due to an expired credit card.",
     {"entities": [(2, 21, "PAYMENT_ISSUE")]}),

    ("Shipping and delivery information states that customs clearance is pending.",
     {"entities": [(0, 36, "SHIPPING_DELIVERY_INFO")]}),

    ("A return request was submitted for a defective espresso machine.",
     {"entities": [(2, 16, "RETURN_REQUEST")]}),

    ("Their feedback review stated: 'Support took over 5 hours to respond!'
     , {"entities": [(6, 22, "FEEDBACK_REVIEW")]}),

    ("The discount code SUMMERDEAL50 was successfully applied.",
     {"entities": [(17, 30, "DISCOUNT_CODE")]}),
]

print("Training data set up!")


In [None]:
import spacy

# Setting up spaCy's blank model
nlp = spacy.blank("en")

# Add Named Entity Recognition
ner = nlp.add_pipe("ner")

# Feeding it our labels and training data
for _, annotations in TRAIN_DATA:
    for ent in annotations["entities"]:
        ner.add_label(ent[2])


In [None]:
import random
from spacy.training.example import Example

# Begin training the model
optimizer = nlp.begin_training()

for epoch in range(30):
    random.shuffle(TRAIN_DATA)
    losses = {}
    for text, annotations in TRAIN_DATA:
        doc = nlp.make_doc(text)
        example = Example.from_dict(doc, annotations)
        nlp.update([example], sgd=optimizer, losses=losses)
    print(f"Epoch {epoch} - Loss: {losses}")

# Save the trained model to disk
nlp.to_disk("trained_ner_model")
print("Model saved to 'trained_ner_model' folder!")
