In [1]:
import pandas as pd
import spacy
import re
import json
from collections import defaultdict

### Load Testing data

In [2]:
test_df = pd.read_excel('/content/Product_NER_Validation.xlsx')
print("Shape of dataframe:", test_df.shape)
print(test_df.info())
test_df.head(3)

Shape of dataframe: (99, 1)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   99 non-null     object
dtypes: object(1)
memory usage: 920.0+ bytes
None


Unnamed: 0,title
0,the alpha of all trimmers
1,irobot roomba pro intelligent floorvac robotic...
2,everlast classic heavyweight duffel black


In [3]:
import zipfile
with zipfile.ZipFile('/content/Product_Name_NER_Model.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/Product_Name_NER_Model')

In [4]:
# Path to your trained model
model_path = "/content/Product_Name_NER_Model"

# load model
nlp = spacy.load(model_path)

In [5]:
def preprocess_text(text):
    text = ' '.join(text.lower().strip().split())
    return text

In [6]:
results = []

for _, row in test_df.iterrows():
    title = row['title']
    processed_text = preprocess_text(title)
    doc = nlp(processed_text)

    grouped_entities = defaultdict(list)
    ner_entities = []

    for ent in doc.ents:
        grouped_entities[ent.label_].append(ent.text)
        ner_entities.append((ent.start_char, ent.end_char, ent.label_))

    results.append({
        'title': title,
        'extracted_attributes': json.dumps(dict(grouped_entities)),
        'ner_entities': str(ner_entities)
    })

submission_df = pd.DataFrame(results)

In [7]:
# Save to CSV
submission_df.to_csv('Product_Name_NER_Submission.csv', index=False)

In [8]:
submission_df.head(50)

Unnamed: 0,title,extracted_attributes,ner_entities
0,the alpha of all trimmers,"{""features_specifications"": [""the alpha of all...","[(0, 25, 'features_specifications')]"
1,irobot roomba pro intelligent floorvac robotic...,"{""brand"": [""irobot roomba""], ""features_specifi...","[(0, 13, 'brand'), (18, 38, 'features_specific..."
2,everlast classic heavyweight duffel black,"{""brand"": [""everlast""], ""features_specificatio...","[(0, 8, 'brand'), (9, 16, 'features_specificat..."
3,activetool airman cordless multi-purpose air ...,"{""brand"": [""activetool""], ""features_specificat...","[(0, 10, 'brand'), (11, 17, 'features_specific..."
4,apple iphone 3g digitizer touch screen replace...,"{""brand"": [""apple iphone""], ""features_specific...","[(0, 12, 'brand'), (60, 74, 'features_specific..."
5,carolina cottage 42-inch round drop leaf table...,"{""brand"": [""carolina cottage""], ""quantity"": [""...","[(0, 16, 'brand'), (17, 24, 'quantity'), (25, ..."
6,"martex 300-thread-count queen flat sheet, heather","{""brand"": [""martex""], ""features_specifications...","[(0, 6, 'brand'), (7, 10, 'features_specificat..."
7,home decor 5202-42 easel adjustable clr.wht 3....,"{""brand"": [""home""], ""features_specifications"":...","[(0, 4, 'brand'), (11, 24, 'features_specifica..."
8,sprite 1litre lemon drink,"{""brand"": [""sprite""], ""features_specifications...","[(0, 6, 'brand'), (7, 13, 'features_specificat..."
9,c2g 29550 2-port uxga monitor splitter/extende...,"{""brand"": [""c2g""], ""features_specifications"": ...","[(0, 3, 'brand'), (4, 9, 'features_specificati..."


# My observations
1. Model is performing good on the all the attributes except the dimensions (confusing it with the quantity)
2. Overall Good brand and feature specification results.
