<a href="https://colab.research.google.com/github/nasare34/Interview_Task/blob/main/Interview_Task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from sklearn.model_selection import train_test_split


In [None]:
import pandas as pd

# I Created a small datasets
data = {
    'name': ['iPhone 13', 'MacBook Pro', 'Levi\'s Jeans', 'Dining Table'],
    'description': [
        'Latest model of iPhone with advanced features',
        'Powerful laptop with M1 chip and Retina display',
        'Comfortable and stylish jeans from Levi\'s',
        'Wooden dining table with modern design'
    ],
    'price': [799.67, 1299.55, 59, 499.79],
    'category': ['Electronics', 'Electronics', 'Clothing', 'Furniture']
}

df = pd.DataFrame(data)
print(df)


           name                                      description    price  \
0     iPhone 13    Latest model of iPhone with advanced features   799.67   
1   MacBook Pro  Powerful laptop with M1 chip and Retina display  1299.55   
2  Levi's Jeans        Comfortable and stylish jeans from Levi's    59.00   
3  Dining Table           Wooden dining table with modern design   499.79   

      category  
0  Electronics  
1  Electronics  
2     Clothing  
3    Furniture  


In [None]:
# Working on Tokenization of text attributes (name and description)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['name'] + df['description'])
name_sequences = tokenizer.texts_to_sequences(df['name'])
description_sequences = tokenizer.texts_to_sequences(df['description'])

In [None]:
# Padding sequences to ensure they have the same length
max_len = max(max(len(seq) for seq in name_sequences), max(len(seq) for seq in description_sequences))
name_sequences = pad_sequences(name_sequences, maxlen=max_len)
description_sequences = pad_sequences(description_sequences, maxlen=max_len)

In [None]:
# Okay, Let's work on the category by Encoding categorical attributes (category)
label_encoder = LabelEncoder()
categories = label_encoder.fit_transform(df['category'])

In [None]:
# Combining all features into a single array
features = np.hstack((name_sequences, description_sequences, df[['price']].values))
labels = categories

In [None]:
print("Features:\n", features)
print("Labels:\n", labels)

Features:
 [[0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
  2.00000e+00 0.00000e+00 7.00000e+00 8.00000e+00 2.00000e+00 1.00000e+00
  9.00000e+00 1.00000e+01 7.99670e+02]
 [0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
  1.10000e+01 1.30000e+01 1.00000e+00 1.40000e+01 1.50000e+01 3.00000e+00
  1.60000e+01 1.70000e+01 1.29955e+03]
 [0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 4.00000e+00
  2.00000e+01 0.00000e+00 0.00000e+00 3.00000e+00 1.90000e+01 2.00000e+01
  2.10000e+01 4.00000e+00 5.90000e+01]
 [0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 5.00000e+00
  2.30000e+01 0.00000e+00 0.00000e+00 5.00000e+00 2.30000e+01 1.00000e+00
  2.40000e+01 2.50000e+01 4.99790e+02]]
Labels:
 [1 1 0 2]


In [None]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [None]:
# I defined the Define the model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=8, input_length=features.shape[1]),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Training the model
model.fit(features, labels, epochs=30, batch_size=2, verbose=1)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7f5a1f3250f0>

In [None]:
# Let's Evaluate the model
loss, accuracy = model.evaluate(features, labels, verbose=1)
print(f"Loss: {loss}, Accuracy: {accuracy}")

Loss: 0.8849900960922241, Accuracy: 0.75


In [None]:
# Okay, again, Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


Test Loss: 0.8106189370155334, Test Accuracy: 1.0


                                                  PREDICTION

---



In [58]:
# Prepare input data for prediction
new_product_name = df['name'][3]  # Selecting the name of the first product in the dataset
new_product_description = df['description'][3]  # Selecting the description of the first product in the dataset
new_product_price = df['price'][3]  # Selecting the price of the first product in the dataset


In [59]:
# Tokenize and pad the input text data
new_name_sequence = tokenizer.texts_to_sequences([new_product_name])
new_description_sequence = tokenizer.texts_to_sequences([new_product_description])
new_name_padded = pad_sequences(new_name_sequence, maxlen=max_len)
new_description_padded = pad_sequences(new_description_sequence, maxlen=max_len)


In [60]:
# Combine features into a single array
new_features = np.hstack((new_name_padded, new_description_padded, np.array([[new_product_price]])))


In [61]:
# Use the model to make predictions
predicted_probabilities = model.predict(new_features)
predicted_category = np.argmax(predicted_probabilities)



In [62]:
# Decode the predicted category
predicted_category_name = label_encoder.inverse_transform([predicted_category])


In [63]:
# Print the predicted category
print("Predicted Category:", predicted_category_name)

Predicted Category: ['Furniture']
