In [9]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [10]:
!pip install Faker



In [11]:
import random
from faker import Faker

# Initialize Faker
fake = Faker()

# Define product categories
categories = ["Electronics", "Personal Care", "Sports & Outdoors", "Home Appliances", "Furniture"]

# Define sample product names and descriptions by category
product_samples = {
    "Electronics": [
        {"name": "Laptop", "description": "High-performance laptop with {}-inch display"},
        {"name": "Smartphone", "description": "Latest model smartphone with {} battery"},
        {"name": "Headphones", "description": "Noise-cancelling {} headphones"},
        {"name": "Tablet", "description": "Portable tablet with {}-inch screen"},
        {"name": "Smartwatch", "description": "Wearable smartwatch with {} features"}
    ],
    "Personal Care": [
        {"name": "Toothbrush", "description": "Rechargeable electric toothbrush with {} modes"},
        {"name": "Hair Dryer", "description": "Professional hair dryer with {} settings"},
        {"name": "Electric Shaver", "description": "Rechargeable electric shaver with {} blades"},
        {"name": "Facial Cleanser", "description": "Electric facial cleanser with {} speed settings"},
        {"name": "Hair Straightener", "description": "Ceramic hair straightener with {} temperature settings"}
    ],
    "Sports & Outdoors": [
        {"name": "Yoga Mat", "description": "Eco-friendly yoga mat with {} surface"},
        {"name": "Running Shoes", "description": "Lightweight running shoes with {} material"},
        {"name": "Tent", "description": "Waterproof tent for {} people"},
        {"name": "Fitness Tracker", "description": "Wearable fitness tracker with {} features"},
        {"name": "Bicycle", "description": "Mountain bicycle with {} gears"}
    ],
    "Home Appliances": [
        {"name": "Blender", "description": "High-speed blender with {} settings"},
        {"name": "Coffee Maker", "description": "Programmable coffee maker with {} capacity"},
        {"name": "Vacuum Cleaner", "description": "Bagless vacuum cleaner with {} suction power"},
        {"name": "Microwave Oven", "description": "Countertop microwave oven with {} presets"},
        {"name": "Air Purifier", "description": "HEPA air purifier with {} speed settings"}
    ],
    "Furniture": [
        {"name": "Desk Chair", "description": "Ergonomic desk chair with {} support"},
        {"name": "Dining Table", "description": "Modern dining table with seating for {}"},
        {"name": "Sofa", "description": "Comfortable sofa with {} cushions"},
        {"name": "Bookshelf", "description": "Wooden bookshelf with {} shelves"},
        {"name": "Bed Frame", "description": "King-size bed frame with {} design"}
    ],
}

# Function to generate a random product
def generate_random_product():
    category = random.choice(categories)
    product_sample = random.choice(product_samples[category])
    name = f"{product_sample['name']} {fake.word().capitalize()}"
    description = product_sample["description"].format(fake.word())
    price = round(random.uniform(10.0, 2000.0), 2)
    return {"name": name, "description": description, "price": price, "category": category}

# Generate 10,000 random products
products = [generate_random_product() for _ in range(10000)]

# Convert to DataFrame
df = pd.DataFrame(products)

# Display the DataFrame
print(df.head())

# Save to CSV for further use
df.to_csv("dummy_products.csv", index=False)


                   name                                   description  \
0     Air Purifier Task     HEPA air purifier with nor speed settings   
1  Fitness Tracker Pull  Wearable fitness tracker with alone features   
2   Microwave Oven Push   Countertop microwave oven with just presets   
3   Air Purifier Better  HEPA air purifier with middle speed settings   
4     Hair Dryer Happen  Professional hair dryer with though settings   

     price           category  
0   108.73    Home Appliances  
1  1515.07  Sports & Outdoors  
2   540.26    Home Appliances  
3  1739.78    Home Appliances  
4  1130.79      Personal Care  


In [12]:
df.sample(5)

Unnamed: 0,name,description,price,category
6508,Facial Cleanser If,Electric facial cleanser with four speed settings,1205.67,Personal Care
6507,Vacuum Cleaner Hospital,Bagless vacuum cleaner with activity suction p...,98.84,Home Appliances
6639,Toothbrush Continue,Rechargeable electric toothbrush with sport modes,1549.99,Personal Care
1662,Hair Dryer Fire,Professional hair dryer with item settings,122.84,Personal Care
1905,Bicycle Fill,Mountain bicycle with city gears,1250.86,Sports & Outdoors


In [13]:
#converting to lowercase
#removing special characters, punctuation

import re

def clean_text(text):
    text = text.lower() 
    text = re.sub(r'[^\w\s]', '', text) 
    return text

df['name'] = df['name'].apply(clean_text)
df['description'] = df['description'].apply(clean_text)

In [14]:
!pip install nltk



In [15]:
#tokenizing data
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

df['name'] = df['name'].apply(word_tokenize)
df['description'] = df['description'].apply(word_tokenize)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [16]:
df.head()

Unnamed: 0,name,description,price,category
0,"[air, purifier, task]","[hepa, air, purifier, with, nor, speed, settings]",108.73,Home Appliances
1,"[fitness, tracker, pull]","[wearable, fitness, tracker, with, alone, feat...",1515.07,Sports & Outdoors
2,"[microwave, oven, push]","[countertop, microwave, oven, with, just, pres...",540.26,Home Appliances
3,"[air, purifier, better]","[hepa, air, purifier, with, middle, speed, set...",1739.78,Home Appliances
4,"[hair, dryer, happen]","[professional, hair, dryer, with, though, sett...",1130.79,Personal Care


In [17]:
#removing stopwords
from nltk.corpus import stopwords
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

def remove_stop_words(tokens):
    return [word for word in tokens if word not in stop_words]

df['name'] = df['name'].apply(remove_stop_words)
df['description'] = df['description'].apply(remove_stop_words)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [18]:
df.head()

Unnamed: 0,name,description,price,category
0,"[air, purifier, task]","[hepa, air, purifier, speed, settings]",108.73,Home Appliances
1,"[fitness, tracker, pull]","[wearable, fitness, tracker, alone, features]",1515.07,Sports & Outdoors
2,"[microwave, oven, push]","[countertop, microwave, oven, presets]",540.26,Home Appliances
3,"[air, purifier, better]","[hepa, air, purifier, middle, speed, settings]",1739.78,Home Appliances
4,"[hair, dryer, happen]","[professional, hair, dryer, though, settings]",1130.79,Personal Care


In [19]:
#lemmatization - converting words to their root word or base form
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

def lemmatize_tokens(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

df['name'] = df['name'].apply(lemmatize_tokens)
df['description'] = df['description'].apply(lemmatize_tokens)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [20]:
df.head()

Unnamed: 0,name,description,price,category
0,"[air, purifier, task]","[hepa, air, purifier, speed, setting]",108.73,Home Appliances
1,"[fitness, tracker, pull]","[wearable, fitness, tracker, alone, feature]",1515.07,Sports & Outdoors
2,"[microwave, oven, push]","[countertop, microwave, oven, presets]",540.26,Home Appliances
3,"[air, purifier, better]","[hepa, air, purifier, middle, speed, setting]",1739.78,Home Appliances
4,"[hair, dryer, happen]","[professional, hair, dryer, though, setting]",1130.79,Personal Care


In [21]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()

df['name'] = df['name'].astype(str)
df['description'] = df['description'].astype(str)

# Concatenate text data from both columns
all_text = (df['name'].apply(lambda x: ' '.join(x)) + ' ' +
            df['description'].apply(lambda x: ' '.join(x)))
tokenizer.fit_on_texts(all_text)

# Convert text data in each column to sequences of integers
sequences_col1 = tokenizer.texts_to_sequences(df['name'].apply(lambda x: ' '.join(x)))
sequences_col2 = tokenizer.texts_to_sequences(df['description'].apply(lambda x: ' '.join(x)))

# Pad sequences to ensure uniform length
max_length = max(max(len(seq) for seq in sequences_col1), max(len(seq) for seq in sequences_col2))
padded_sequences_col1 = pad_sequences(sequences_col1, maxlen=max_length, padding='post')
padded_sequences_col2 = pad_sequences(sequences_col2, maxlen=max_length, padding='post')

# Replace original text data in the columns with padded sequences
df['name'] = padded_sequences_col1.tolist()
df['description'] = padded_sequences_col2.tolist()

In [22]:
df.head()

Unnamed: 0,name,description,price,category
0,"[1, 3, 6, 4, 1, 1, 13, 19, 4, 6, 17, 6, 2, 4, ...","[1, 12, 2, 13, 3, 1, 1, 3, 6, 4, 1, 1, 13, 19,...",108.73,Home Appliances
1,"[1, 17, 6, 5, 7, 2, 9, 9, 1, 1, 5, 4, 3, 10, 2...","[1, 21, 2, 3, 4, 3, 18, 11, 2, 1, 1, 17, 6, 5,...",1515.07,Sports & Outdoors
2,"[1, 14, 6, 10, 4, 8, 21, 3, 23, 2, 1, 1, 8, 23...","[1, 10, 8, 19, 7, 5, 2, 4, 5, 8, 13, 1, 1, 14,...",540.26,Home Appliances
3,"[1, 3, 6, 4, 1, 1, 13, 19, 4, 6, 17, 6, 2, 4, ...","[1, 12, 2, 13, 3, 1, 1, 3, 6, 4, 1, 1, 13, 19,...",1739.78,Home Appliances
4,"[1, 12, 3, 6, 4, 1, 1, 16, 4, 20, 2, 4, 1, 1, ...","[1, 13, 4, 8, 17, 2, 9, 9, 6, 8, 7, 3, 11, 1, ...",1130.79,Personal Care


In [23]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df['category'] = label_encoder.fit_transform(df['category'])
y = df['category']

In [24]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

scaler = MinMaxScaler(feature_range=(1, 10))
df['price'] = scaler.fit_transform(df[['price']])
df['combined'] = df.apply(lambda row: row['name'] + row['description'] + [int(row['price'])], axis=1)

max_seq_length = max(df['combined'].apply(len))
X = pad_sequences(df['combined'], maxlen=max_seq_length, padding='post')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
df.head()

Unnamed: 0,name,description,price,category,combined
0,"[1, 3, 6, 4, 1, 1, 13, 19, 4, 6, 17, 6, 2, 4, ...","[1, 12, 2, 13, 3, 1, 1, 3, 6, 4, 1, 1, 13, 19,...",1.445791,2,"[1, 3, 6, 4, 1, 1, 13, 19, 4, 6, 17, 6, 2, 4, ..."
1,"[1, 17, 6, 5, 7, 2, 9, 9, 1, 1, 5, 4, 3, 10, 2...","[1, 21, 2, 3, 4, 3, 18, 11, 2, 1, 1, 17, 6, 5,...",7.80673,4,"[1, 17, 6, 5, 7, 2, 9, 9, 1, 1, 5, 4, 3, 10, 2..."
2,"[1, 14, 6, 10, 4, 8, 21, 3, 23, 2, 1, 1, 8, 23...","[1, 10, 8, 19, 7, 5, 2, 4, 5, 8, 13, 1, 1, 14,...",3.397621,2,"[1, 14, 6, 10, 4, 8, 21, 3, 23, 2, 1, 1, 8, 23..."
3,"[1, 3, 6, 4, 1, 1, 13, 19, 4, 6, 17, 6, 2, 4, ...","[1, 12, 2, 13, 3, 1, 1, 3, 6, 4, 1, 1, 13, 19,...",8.823104,2,"[1, 3, 6, 4, 1, 1, 13, 19, 4, 6, 17, 6, 2, 4, ..."
4,"[1, 12, 3, 6, 4, 1, 1, 16, 4, 20, 2, 4, 1, 1, ...","[1, 13, 4, 8, 17, 2, 9, 9, 6, 8, 7, 3, 11, 1, ...",6.068615,3,"[1, 12, 3, 6, 4, 1, 1, 16, 4, 20, 2, 4, 1, 1, ..."


In [26]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, BatchNormalization, Embedding, LSTM, Flatten
from tensorflow.keras.regularizers import l2

model = Sequential([
    Embedding(input_dim=100, output_dim=50, input_length=max_seq_length),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Use 'categorical_crossentropy' for multiclass
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])




In [34]:
model.summary()

In [27]:
history = model.fit(X_train, y_train,
                    epochs=10,  # Adjust the number of epochs
                    batch_size=128,  # Adjust the batch size
                    validation_split=0.2)

Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.6753 - loss: 2.4634 - val_accuracy: 0.7000 - val_loss: 2.2861
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9694 - loss: 1.1875 - val_accuracy: 0.9906 - val_loss: 1.6180
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9874 - loss: 0.7225 - val_accuracy: 0.9912 - val_loss: 1.2803
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9938 - loss: 0.4653 - val_accuracy: 0.9681 - val_loss: 1.0447
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9973 - loss: 0.3129 - val_accuracy: 1.0000 - val_loss: 0.8495
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9965 - loss: 0.2160 - val_accuracy: 1.0000 - val_loss: 0.6657
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━

In [28]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

train_loss, train_accuracy = model.evaluate(X_train, y_train)
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Training Loss: {train_loss}, Training Accuracy: {train_accuracy}")
print(f"Testing Loss: {test_loss}, Testing Accuracy: {test_accuracy}")

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 0.1103
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 0.1123
Training Loss: 0.11096637696027756, Training Accuracy: 1.0
Testing Loss: 0.11139015853404999, Testing Accuracy: 1.0


In [29]:
# Predict on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate evaluation metrics
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))

print("\nClassification Report:")
target_names = list(map(str, label_encoder.classes_))
print("Target names:", target_names)  # Debugging line to verify target names

print("Encoded Label -> Actual Category")
for encoded_label in range(len(label_encoder.classes_)):
    actual_category = label_encoder.inverse_transform([encoded_label])
    print(f"{encoded_label} -> {actual_category[0]}")
print(classification_report(y_test, y_pred_classes, target_names=target_names))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Confusion Matrix:
[[419   0   0   0   0]
 [  0 395   0   0   0]
 [  0   0 383   0   0]
 [  0   0   0 395   0]
 [  0   0   0   0 408]]

Classification Report:
Target names: ['Electronics', 'Furniture', 'Home Appliances', 'Personal Care', 'Sports & Outdoors']
Encoded Label -> Actual Category
0 -> Electronics
1 -> Furniture
2 -> Home Appliances
3 -> Personal Care
4 -> Sports & Outdoors
                   precision    recall  f1-score   support

      Electronics       1.00      1.00      1.00       419
        Furniture       1.00      1.00      1.00       395
  Home Appliances       1.00      1.00      1.00       383
    Personal Care       1.00      1.00      1.00       395
Sports & Outdoors       1.00      1.00      1.00       408

         accuracy                           1.00      2000
        macro avg       1.00      1.00      1.00      2000
     weighted avg       1.00      1.00      1.00      2000



In [30]:
!pip install nbconvert flake8



In [31]:
#!jupyter nbconvert --to script payever_task.ipynb

[NbConvertApp] Converting notebook payever_task.ipynb to script
[NbConvertApp] Writing 9306 bytes to payever_task.py


In [51]:
!flake8 payever_task.py

In [33]:
!pip install pipreqs



In [7]:
!pipreqs . --use-local

INFO: Not scanning for jupyter notebooks.
INFO: Successfully saved requirements file in .\requirements.txt
