In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
!pip install Faker



In [3]:
import random
from faker import Faker

# Initialize Faker
fake = Faker()

# Define product categories
categories = ["Electronics", "Personal Care", "Sports & Outdoors", "Home Appliances", "Furniture"]

# Define sample product names and descriptions by category
product_samples = {
    "Electronics": [
        {"name": "Laptop", "description": "High-performance laptop with {}-inch display"},
        {"name": "Smartphone", "description": "Latest model smartphone with {} battery"},
        {"name": "Headphones", "description": "Noise-cancelling {} headphones"},
        {"name": "Tablet", "description": "Portable tablet with {}-inch screen"},
        {"name": "Smartwatch", "description": "Wearable smartwatch with {} features"}
    ],
    "Personal Care": [
        {"name": "Toothbrush", "description": "Rechargeable electric toothbrush with {} modes"},
        {"name": "Hair Dryer", "description": "Professional hair dryer with {} settings"},
        {"name": "Electric Shaver", "description": "Rechargeable electric shaver with {} blades"},
        {"name": "Facial Cleanser", "description": "Electric facial cleanser with {} speed settings"},
        {"name": "Hair Straightener", "description": "Ceramic hair straightener with {} temperature settings"}
    ],
    "Sports & Outdoors": [
        {"name": "Yoga Mat", "description": "Eco-friendly yoga mat with {} surface"},
        {"name": "Running Shoes", "description": "Lightweight running shoes with {} material"},
        {"name": "Tent", "description": "Waterproof tent for {} people"},
        {"name": "Fitness Tracker", "description": "Wearable fitness tracker with {} features"},
        {"name": "Bicycle", "description": "Mountain bicycle with {} gears"}
    ],
    "Home Appliances": [
        {"name": "Blender", "description": "High-speed blender with {} settings"},
        {"name": "Coffee Maker", "description": "Programmable coffee maker with {} capacity"},
        {"name": "Vacuum Cleaner", "description": "Bagless vacuum cleaner with {} suction power"},
        {"name": "Microwave Oven", "description": "Countertop microwave oven with {} presets"},
        {"name": "Air Purifier", "description": "HEPA air purifier with {} speed settings"}
    ],
    "Furniture": [
        {"name": "Desk Chair", "description": "Ergonomic desk chair with {} support"},
        {"name": "Dining Table", "description": "Modern dining table with seating for {}"},
        {"name": "Sofa", "description": "Comfortable sofa with {} cushions"},
        {"name": "Bookshelf", "description": "Wooden bookshelf with {} shelves"},
        {"name": "Bed Frame", "description": "King-size bed frame with {} design"}
    ],
}

# Function to generate a random product
def generate_random_product():
    category = random.choice(categories)
    product_sample = random.choice(product_samples[category])
    name = f"{product_sample['name']} {fake.word().capitalize()}"
    description = product_sample["description"].format(fake.word())
    price = round(random.uniform(10.0, 2000.0), 2)
    return {"name": name, "description": description, "price": price, "category": category}

# Generate 10,000 random products
products = [generate_random_product() for _ in range(10000)]

# Convert to DataFrame
df = pd.DataFrame(products)

# Display the DataFrame
print(df.head())

# Save to CSV for further use
df.to_csv("dummy_products.csv", index=False)


                  name                                     description  \
0           Tent Under               Waterproof tent for remain people   
1   Running Shoes Wait  Lightweight running shoes with result material   
2  Yoga Mat Understand      Eco-friendly yoga mat with discuss surface   
3    Dining Table Deal    Modern dining table with seating for medical   
4    Blender President           High-speed blender with drug settings   

     price           category  
0  1308.44  Sports & Outdoors  
1   641.22  Sports & Outdoors  
2   863.17  Sports & Outdoors  
3  1403.64          Furniture  
4  1276.71    Home Appliances  


In [4]:
df.sample(5)

Unnamed: 0,name,description,price,category
4341,Air Purifier Do,HEPA air purifier with fall speed settings,1009.97,Home Appliances
7579,Microwave Oven No,Countertop microwave oven with quality presets,1904.44,Home Appliances
7705,Electric Shaver Their,Rechargeable electric shaver with leave blades,1093.36,Personal Care
3815,Smartwatch Money,Wearable smartwatch with line features,838.64,Electronics
2290,Yoga Mat Sound,Eco-friendly yoga mat with machine surface,1449.76,Sports & Outdoors


In [5]:
#converting to lowercase
#removing special characters, punctuation

import re

def clean_text(text):
    text = text.lower() 
    text = re.sub(r'[^\w\s]', '', text) 
    return text

df['name'] = df['name'].apply(clean_text)
df['description'] = df['description'].apply(clean_text)

In [6]:
!pip install nltk



In [7]:
#tokenizing data
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

df['name'] = df['name'].apply(word_tokenize)
df['description'] = df['description'].apply(word_tokenize)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [8]:
df.head()

Unnamed: 0,name,description,price,category
0,"[tent, under]","[waterproof, tent, for, remain, people]",1308.44,Sports & Outdoors
1,"[running, shoes, wait]","[lightweight, running, shoes, with, result, ma...",641.22,Sports & Outdoors
2,"[yoga, mat, understand]","[ecofriendly, yoga, mat, with, discuss, surface]",863.17,Sports & Outdoors
3,"[dining, table, deal]","[modern, dining, table, with, seating, for, me...",1403.64,Furniture
4,"[blender, president]","[highspeed, blender, with, drug, settings]",1276.71,Home Appliances


In [9]:
#removing stopwords
from nltk.corpus import stopwords
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

def remove_stop_words(tokens):
    return [word for word in tokens if word not in stop_words]

df['name'] = df['name'].apply(remove_stop_words)
df['description'] = df['description'].apply(remove_stop_words)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [10]:
df.head()

Unnamed: 0,name,description,price,category
0,[tent],"[waterproof, tent, remain, people]",1308.44,Sports & Outdoors
1,"[running, shoes, wait]","[lightweight, running, shoes, result, material]",641.22,Sports & Outdoors
2,"[yoga, mat, understand]","[ecofriendly, yoga, mat, discuss, surface]",863.17,Sports & Outdoors
3,"[dining, table, deal]","[modern, dining, table, seating, medical]",1403.64,Furniture
4,"[blender, president]","[highspeed, blender, drug, settings]",1276.71,Home Appliances


In [11]:
#lemmatization - converting words to their root word or base form
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

def lemmatize_tokens(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

df['name'] = df['name'].apply(lemmatize_tokens)
df['description'] = df['description'].apply(lemmatize_tokens)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [12]:
df.head()

Unnamed: 0,name,description,price,category
0,[tent],"[waterproof, tent, remain, people]",1308.44,Sports & Outdoors
1,"[running, shoe, wait]","[lightweight, running, shoe, result, material]",641.22,Sports & Outdoors
2,"[yoga, mat, understand]","[ecofriendly, yoga, mat, discus, surface]",863.17,Sports & Outdoors
3,"[dining, table, deal]","[modern, dining, table, seating, medical]",1403.64,Furniture
4,"[blender, president]","[highspeed, blender, drug, setting]",1276.71,Home Appliances


In [15]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()

df['name'] = df['name'].astype(str)
df['description'] = df['description'].astype(str)

# Concatenate text data from both columns
all_text = (df['name'].apply(lambda x: ' '.join(x)) + ' ' +
            df['description'].apply(lambda x: ' '.join(x)))
tokenizer.fit_on_texts(all_text)

# Convert text data in each column to sequences of integers
sequences_col1 = tokenizer.texts_to_sequences(df['name'].apply(lambda x: ' '.join(x)))
sequences_col2 = tokenizer.texts_to_sequences(df['description'].apply(lambda x: ' '.join(x)))

# Pad sequences to ensure uniform length
max_length = max(max(len(seq) for seq in sequences_col1), max(len(seq) for seq in sequences_col2))
padded_sequences_col1 = pad_sequences(sequences_col1, maxlen=max_length, padding='post')
padded_sequences_col2 = pad_sequences(sequences_col2, maxlen=max_length, padding='post')

# Replace original text data in the columns with padded sequences
df['name'] = padded_sequences_col1.tolist()
df['description'] = padded_sequences_col2.tolist()

In [16]:
df.head()

Unnamed: 0,name,description,price,category
0,"[1, 5, 2, 7, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 21, 3, 5, 2, 4, 13, 4, 8, 8, 17, 1, 1, 5, ...",0.535569,Sports & Outdoors
1,"[1, 4, 19, 7, 7, 6, 7, 15, 1, 1, 9, 12, 8, 2, ...","[1, 11, 6, 15, 12, 5, 21, 2, 6, 15, 12, 5, 1, ...",-0.619322,Sports & Outdoors
2,"[1, 20, 8, 15, 3, 1, 1, 14, 3, 5, 1, 1, 19, 7,...","[1, 2, 10, 8, 17, 4, 6, 2, 7, 16, 11, 20, 1, 1...",-0.235149,Sports & Outdoors
3,"[1, 16, 6, 7, 6, 7, 15, 1, 1, 5, 3, 18, 11, 2,...","[1, 14, 8, 16, 2, 4, 7, 1, 1, 16, 6, 7, 6, 7, ...",0.700351,Furniture
4,"[1, 18, 11, 2, 7, 16, 2, 4, 1, 1, 13, 4, 2, 9,...","[1, 12, 6, 15, 12, 9, 13, 2, 2, 16, 1, 1, 18, ...",0.480648,Home Appliances


In [17]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df['category'] = label_encoder.fit_transform(df['category'])
y = df['category']

In [18]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

scaler = MinMaxScaler(feature_range=(1, 10))
df['price'] = scaler.fit_transform(df[['price']])
df['combined'] = df.apply(lambda row: row['name'] + row['description'] + [int(row['price'])], axis=1)

max_seq_length = max(df['combined'].apply(len))
X = pad_sequences(df['combined'], maxlen=max_seq_length, padding='post')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, BatchNormalization, Embedding, LSTM, Flatten
from tensorflow.keras.regularizers import l2

model = Sequential([
    Embedding(input_dim=100, output_dim=50, input_length=max_seq_length),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(32, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Use 'categorical_crossentropy' for multiclass
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])




In [20]:
history = model.fit(X_train, y_train,
                    epochs=10,  # Adjust the number of epochs
                    batch_size=128,  # Adjust the batch size
                    validation_split=0.2)

Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.6584 - loss: 2.6218 - val_accuracy: 0.9631 - val_loss: 2.3680
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9617 - loss: 1.2987 - val_accuracy: 1.0000 - val_loss: 1.6704
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9834 - loss: 0.8365 - val_accuracy: 1.0000 - val_loss: 1.3114
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9933 - loss: 0.5606 - val_accuracy: 0.9975 - val_loss: 1.0387
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9926 - loss: 0.3914 - val_accuracy: 1.0000 - val_loss: 0.7557
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9944 - loss: 0.2760 - val_accuracy: 0.9906 - val_loss: 0.6159
Epoch 7/10
[1m50/50[0m [32m━━━━

In [26]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

train_loss, train_accuracy = model.evaluate(X_train, y_train)
test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Training Loss: {train_loss}, Training Accuracy: {train_accuracy}")
print(f"Testing Loss: {test_loss}, Testing Accuracy: {test_accuracy}")

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9988 - loss: 0.2288
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9985 - loss: 0.2333
Training Loss: 0.22557556629180908, Training Accuracy: 0.9991250038146973
Testing Loss: 0.2326909303665161, Testing Accuracy: 0.9980000257492065


In [27]:
# Predict on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate evaluation metrics
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))

print("\nClassification Report:")
target_names = list(map(str, label_encoder.classes_))
print("Target names:", target_names)  # Debugging line to verify target names

print("Encoded Label -> Actual Category")
for encoded_label in range(len(label_encoder.classes_)):
    actual_category = label_encoder.inverse_transform([encoded_label])
    print(f"{encoded_label} -> {actual_category[0]}")
print(classification_report(y_test, y_pred_classes, target_names=target_names))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Confusion Matrix:
[[391   0   0   0   0]
 [  0 402   0   0   0]
 [  0   0 405   0   0]
 [  1   0   3 399   0]
 [  0   0   0   0 399]]

Classification Report:
Target names: ['Electronics', 'Furniture', 'Home Appliances', 'Personal Care', 'Sports & Outdoors']
Encoded Label -> Actual Category
0 -> Electronics
1 -> Furniture
2 -> Home Appliances
3 -> Personal Care
4 -> Sports & Outdoors
                   precision    recall  f1-score   support

      Electronics       1.00      1.00      1.00       391
        Furniture       1.00      1.00      1.00       402
  Home Appliances       0.99      1.00      1.00       405
    Personal Care       1.00      0.99      1.00       403
Sports & Outdoors       1.00      1.00      1.00       399

         accuracy                           1.00      2000
        macro avg       1.00      1.00      1.00      2000
     weighted avg       1.00      1.00      1.00      2000



In [1]:
!pip install nbconvert flake8



In [2]:
!jupyter nbconvert --to script payever_task.ipynb

[NbConvertApp] Converting notebook payever_task.ipynb to script
[NbConvertApp] Writing 9169 bytes to payever_task.py


In [54]:
!flake8 payever_task.py

In [1]:
!pip install pipreqs

Collecting pipreqs
  Downloading pipreqs-0.5.0-py3-none-any.whl.metadata (7.9 kB)
Collecting docopt==0.6.2 (from pipreqs)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting ipython==8.12.3 (from pipreqs)
  Downloading ipython-8.12.3-py3-none-any.whl.metadata (5.7 kB)
Collecting yarg==0.1.9 (from pipreqs)
  Downloading yarg-0.1.9-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting backcall (from ipython==8.12.3->pipreqs)
  Downloading backcall-0.2.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting pickleshare (from ipython==8.12.3->pipreqs)
  Downloading pickleshare-0.7.5-py2.py3-none-any.whl.metadata (1.5 kB)
Downloading pipreqs-0.5.0-py3-none-any.whl (33 kB)
Downloading ipython-8.12.3-py3-none-any.whl (798 kB)
   ---------------------------------------- 0.0/798.3 kB ? eta -:--:--
   -- ------------------------------------- 41.0/798.3 kB ? eta -:--:--
   ----------- ------------------

In [4]:
!pipreqs . --use-local

INFO: Not scanning for jupyter notebooks.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "D:\ProgramFiles\Miniconda\Scripts\pipreqs.exe\__main__.py", line 7, in <module>
  File "D:\ProgramFiles\Miniconda\Lib\site-packages\pipreqs\pipreqs.py", line 609, in main
    init(args)
  File "D:\ProgramFiles\Miniconda\Lib\site-packages\pipreqs\pipreqs.py", line 599, in init
    generate_requirements_file(path, imports, symbol)
  File "D:\ProgramFiles\Miniconda\Lib\site-packages\pipreqs\pipreqs.py", line 209, in generate_requirements_file
    with _open(path, "w") as out_file:
  File "D:\ProgramFiles\Miniconda\Lib\contextlib.py", line 137, in __enter__
    return next(self.gen)
           ^^^^^^^^^^^^^^
  File "D:\ProgramFiles\Miniconda\Lib\site-packages\pipreqs\pipreqs.py", line 91, in _open
    file = open(filename, mode)
           ^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file o