In [20]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, LSTM, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Example data
data = {
    'price': [1000, 1200, 900, 1100, 1050, 950],
    'average_rating': [4.5, 4.0, 4.7, 4.3, 4.6, 4.2],
    'total_ratings': [200, 150, 300, 180, 220, 170],
    'description': ["Lightweight and durable", "Affordable and reliable", "High performance and comfort", "Lightweight and affordable", "Durable and high performance", "Comfortable and reliable"],
    'label': ["Bike A", "Bike B", "Bike C", "Bike A", "Bike B", "Bike C"]
}

df = pd.DataFrame(data)

print(df)

# Tokenize and pad text data
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['description'])
sequences = tokenizer.texts_to_sequences(df['description'])
max_text_length = max(len(seq) for seq in sequences)
text_data_padded = pad_sequences(sequences, maxlen=max_text_length)


# Encode labels
label_encoder = LabelEncoder()
label_data = label_encoder.fit_transform(df['label'])
num_bikes = len(label_encoder.classes_)

print(label_data)

# Extract numerical data
numerical_data = df[['price', 'average_rating', 'total_ratings']].values

# Numerical Input
numerical_input = Input(shape=(3,), name='numerical_input')
x_num = Dense(64, activation='relu')(numerical_input)
x_num = Dense(32, activation='relu')(x_num)

# Text Input (Description)
vocab_size = 10000  
text_input = Input(shape=(max_text_length,), name='text_input')
x_text = Embedding(input_dim=vocab_size, output_dim=128)(text_input)
x_text = LSTM(128)(x_text)

# Concatenate
x = concatenate([x_num, x_text])
x = Dense(64, activation='relu')(x)
output = Dense(num_bikes, activation='softmax')(x)

# Create Model
model = Model(inputs=[numerical_input, text_input], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Split data into training and validation sets
numerical_train, numerical_val, text_train, text_val, label_train, label_val = train_test_split(
    numerical_data, text_data_padded, label_data, test_size=0.2, random_state=42)



   price  average_rating  total_ratings                   description   label
0   1000             4.5            200       Lightweight and durable  Bike A
1   1200             4.0            150       Affordable and reliable  Bike B
2    900             4.7            300  High performance and comfort  Bike C
3   1100             4.3            180    Lightweight and affordable  Bike A
4   1050             4.6            220  Durable and high performance  Bike B
5    950             4.2            170      Comfortable and reliable  Bike C
[0 1 2 0 1 2]


In [23]:
# Train the model
history = model.fit(
    [numerical_train, text_train], label_train,
    validation_data=([numerical_val, text_val], label_val),
    epochs=10, batch_size=32
)

# Evaluate the model
loss, accuracy = model.evaluate([numerical_val, text_val], label_val)
print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy}')

# Example user input
user_numerical_input = np.array([[1100, 4.3, 180]])
user_text_input = ["Lightweight and affordable"]
user_text_sequences = tokenizer.texts_to_sequences(user_text_input)
user_text_padded = pad_sequences(user_text_sequences, maxlen=max_text_length)

# Predict
predictions = model.predict([user_numerical_input, user_text_padded])
predicted_bike_index = np.argmax(predictions, axis=1)
predicted_bike_name = label_encoder.inverse_transform(predicted_bike_index)

print(f'Recommended Bike: {predicted_bike_name[0]}')

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 245ms/step - accuracy: 0.5000 - loss: 10.7024 - val_accuracy: 0.0000e+00 - val_loss: 19.8272
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step - accuracy: 0.5000 - loss: 9.9271 - val_accuracy: 0.0000e+00 - val_loss: 15.0370
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296ms/step - accuracy: 0.5000 - loss: 7.8480 - val_accuracy: 0.0000e+00 - val_loss: 8.0499
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267ms/step - accuracy: 0.5000 - loss: 4.6214 - val_accuracy: 0.5000 - val_loss: 5.2398
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.2500 - loss: 4.4877 - val_accuracy: 0.5000 - val_loss: 5.7082
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step - accuracy: 0.2500 - loss: 7.3768 - val_accuracy: 0.5000 - val_loss: 4.4575
Epoch 7/10
[1m1/1[0m [