In [1]:
import pandas as pd 
import nltk 
from nltk.corpus import stopwords 
from nltk.stem import WordNetLemmatizer 
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.model_selection import train_test_split 
from sklearn.naive_bayes import MultinomialNB, BernoulliNB 
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, auc 
import string 

In [2]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Asus\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
data = pd.read_csv('ecommerceDataset.csv')
data.head(10)


Unnamed: 0,Household,"Paper Plane Design Framed Wall Hanging Motivational Office Decor Art Prints (8.7 X 8.7 inch) - Set of 4 Painting made up in synthetic frame with uv textured print which gives multi effects and attracts towards it. This is an special series of paintings which makes your wall very beautiful and gives a royal touch. This painting is ready to hang, you would be proud to possess this unique painting that is a niche apart. We use only the most modern and efficient printing technology on our prints, with only the and inks and precision epson, roland and hp printers. This innovative hd printing technique results in durable and spectacular looking prints of the highest that last a lifetime. We print solely with top-notch 100% inks, to achieve brilliant and true colours. Due to their high level of uv resistance, our prints retain their beautiful colours for many years. Add colour and style to your living space with this digitally printed painting. Some are for pleasure and some for eternal bliss.so bring home this elegant print that is lushed with rich colors that makes it nothing but sheer elegance to be to your friends and family.it would be treasured forever by whoever your lucky recipient is. Liven up your place with these intriguing paintings that are high definition hd graphic digital prints for home, office or any room."
0,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
1,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
2,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
3,Household,Incredible Gifts India Wooden Happy Birthday U...
4,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
5,Household,Paper Plane Design Starry Night Vangoh Wall Ar...
6,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
7,Household,SAF 'Ganesh Modern Art Print' Painting (Synthe...
8,Household,Paintings Villa UV Textured Modern Art Print F...
9,Household,Painting Mantra Art Street - Jardin Bird Frame...


In [4]:
data.columns =['class name', 'description']

In [5]:
data.head(10)

Unnamed: 0,class name,description
0,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
1,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
2,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
3,Household,Incredible Gifts India Wooden Happy Birthday U...
4,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
5,Household,Paper Plane Design Starry Night Vangoh Wall Ar...
6,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
7,Household,SAF 'Ganesh Modern Art Print' Painting (Synthe...
8,Household,Paintings Villa UV Textured Modern Art Print F...
9,Household,Painting Mantra Art Street - Jardin Bird Frame...


In [6]:
stopword = set(stopwords.words('english')) 
stopword

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'own',
 'r

In [7]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [8]:
data = data.dropna()
print(data.isnull().sum())

class name     0
description    0
dtype: int64


In [9]:
data['description']

0        SAF 'Floral' Framed Painting (Wood, 30 inch x ...
1        SAF 'UV Textured Modern Art Print Framed' Pain...
2        SAF Flower Print Framed Painting (Synthetic, 1...
3        Incredible Gifts India Wooden Happy Birthday U...
4        Pitaara Box Romantic Venice Canvas Painting 6m...
                               ...                        
50419    Strontium MicroSD Class 10 8GB Memory Card (Bl...
50420    CrossBeats Wave Waterproof Bluetooth Wireless ...
50421    Karbonn Titanium Wind W4 (White) Karbonn Titan...
50422    Samsung Guru FM Plus (SM-B110E/D, Black) Colou...
50423                     Micromax Canvas Win W121 (White)
Name: description, Length: 50423, dtype: object

In [10]:
def preprocess_text(text):
    # Remove punctuation
    remove_punc = [char for char in text if char not in string.punctuation]
    clean_words = ''.join(remove_punc) # char joining
    
    # Remove stopwords
    text = ([word for word in clean_words.split() if word.lower() not in stopword])
    return text

In [11]:
data.description = data.description.apply(preprocess_text)
data.description

0        [SAF, Floral, Framed, Painting, Wood, 30, inch...
1        [SAF, UV, Textured, Modern, Art, Print, Framed...
2        [SAF, Flower, Print, Framed, Painting, Synthet...
3        [Incredible, Gifts, India, Wooden, Happy, Birt...
4        [Pitaara, Box, Romantic, Venice, Canvas, Paint...
                               ...                        
50419    [Strontium, MicroSD, Class, 10, 8GB, Memory, C...
50420    [CrossBeats, Wave, Waterproof, Bluetooth, Wire...
50421    [Karbonn, Titanium, Wind, W4, White, Karbonn, ...
50422    [Samsung, Guru, FM, Plus, SMB110ED, Black, Col...
50423                 [Micromax, Canvas, Win, W121, White]
Name: description, Length: 50423, dtype: object

In [12]:
data.description[2]

['SAF',
 'Flower',
 'Print',
 'Framed',
 'Painting',
 'Synthetic',
 '135',
 'inch',
 'x',
 '22',
 'inch',
 'UV',
 'Textured',
 'Set',
 '3',
 'SANFSW4951',
 'ColorMulticolor',
 'Size35',
 'cm',
 'x',
 '50',
 'cm',
 'x',
 '2',
 'cm',
 'beautiful',
 'painting',
 'involves',
 'action',
 'skill',
 'using',
 'paint',
 'right',
 'manner',
 'hence',
 'end',
 'product',
 'picture',
 'speak',
 'thousand',
 'words',
 'say',
 'Arts',
 'trend',
 'quite',
 'time',
 'give',
 'different',
 'viewer',
 'different',
 'meanings',
 'style',
 'design',
 'saf',
 'wood',
 'matte',
 'painting',
 'frame',
 'quite',
 'abstract',
 'mysteriously',
 'beautiful',
 'painting',
 'nice',
 'frame',
 'gift',
 'family',
 'friend',
 'painting',
 'various',
 'forms',
 'certain',
 'figures',
 'seen',
 'image',
 'add',
 'good',
 'set',
 'lights',
 'place',
 'painting',
 'decor',
 'give',
 'different',
 'feel',
 'look',
 'place',
 'Quality',
 'durability',
 'painting',
 'matte',
 'finish',
 'includes',
 'good',
 'quality',
 'f

In [13]:
lemmatizer = WordNetLemmatizer()

def lemmatize_text(text):
    lemmatized_text = ' '.join([lemmatizer.lemmatize(word) for word in text])
    return lemmatized_text

data.description = data.description.apply(lemmatize_text)

In [14]:
vectorizer = TfidfVectorizer()

x = vectorizer.fit_transform(data.description)
y = data['class name']

In [15]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [16]:
models = [
    MultinomialNB(),
    BernoulliNB()
]

for model in models:
    model.fit(xtrain, ytrain)

    ypred = model.predict(xtest)
    ypred_proba = model.predict_proba(xtest)[:, 1]

    print(f"Model: {type(model).__name__}")
    print('Accuracy Score =',model.score(xtest, ytest))
    print("Confusion Matrix:")
    print(confusion_matrix(ytest, ypred))

    print('\n')

Model: MultinomialNB
Accuracy Score = 0.9399107585523054
Confusion Matrix:
[[2149   17   26  143]
 [   4 1641    6  108]
 [  41    5 1873  193]
 [  20   13   30 3816]]


Model: BernoulliNB
Accuracy Score = 0.8777392166584036
Confusion Matrix:
[[1641  515   22  157]
 [   5 1716    9   29]
 [  39   91 1761  221]
 [  21   74   50 3734]]




In [None]:
random_text = input()

preprocessed_text = preprocess_text(random_text)
lemmatized_text = lemmatize_text(preprocessed_text)
text_vector = vectorizer.transform([lemmatized_text])

for model in models:
    prediction = model.predict(text_vector)
    print(f"Model: {type(model).__name__}")
    print("Prediction:", prediction)
    print('\n')