In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate

# Load the data
data = pd.read_csv('/Users/sayeedkhan/Desktop/product_search_relevence/main_model/generated_data.csv')  # Replace with your actual file path

# Define the target variable
y = data['Relevance']

# Split the data into training and testing sets
X_train_search, X_test_search, X_train_result, X_test_result, y_train, y_test = train_test_split(
    data['Searched Product'], data['Result Product'], y, test_size=0.2, random_state=42)

# Initialize TF-IDF Vectorizer
tfidf_search = TfidfVectorizer(max_features=5000)
tfidf_result = TfidfVectorizer(max_features=5000)

# Fit and transform the training data
X_train_search_tfidf = tfidf_search.fit_transform(X_train_search)
X_train_result_tfidf = tfidf_result.fit_transform(X_train_result)

# Transform the testing data
X_test_search_tfidf = tfidf_search.transform(X_test_search)
X_test_result_tfidf = tfidf_result.transform(X_test_result)

# Define the model with separate inputs
input_search = Input(shape=(X_train_search_tfidf.shape[1],))
input_result = Input(shape=(X_train_result_tfidf.shape[1],))

x_search = Dense(128, activation='relu')(input_search)
x_search = Dropout(0.2)(x_search)

x_result = Dense(128, activation='relu')(input_result)
x_result = Dropout(0.2)(x_result)

merged = Concatenate()([x_search, x_result])

x = Dense(64, activation='relu')(merged)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.2)(x)
output = Dense(1, activation='linear')(x)

model = Model(inputs=[input_search, input_result], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    [X_train_search_tfidf, X_train_result_tfidf], y_train,
    epochs=10, batch_size=32,
    validation_data=([X_test_search_tfidf, X_test_result_tfidf], y_test)
)

# Evaluate the model on the test set
loss, mae = model.evaluate([X_test_search_tfidf, X_test_result_tfidf], y_test)
print(f'Test Mean Absolute Error: {mae}')




Epoch 1/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 3ms/step - loss: 0.7891 - mae: 0.6575 - val_loss: 0.1972 - val_mae: 0.2739
Epoch 2/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 2ms/step - loss: 0.3250 - mae: 0.3911 - val_loss: 0.1712 - val_mae: 0.2562
Epoch 3/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2ms/step - loss: 0.2854 - mae: 0.3557 - val_loss: 0.1618 - val_mae: 0.2495
Epoch 4/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2ms/step - loss: 0.2657 - mae: 0.3385 - val_loss: 0.1620 - val_mae: 0.2630
Epoch 5/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 2ms/step - loss: 0.2539 - mae: 0.3288 - val_loss: 0.1531 - val_mae: 0.2337
Epoch 6/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 2ms/step - loss: 0.2514 - mae: 0.3261 - val_loss: 0.1501 - val_mae: 0.2280
Epoch 7/10
[1m44688/44688[0m [32m━━━━━━━━━━━━━━━━



Test Mean Absolute Error: 0.2215205281972885


In [50]:
import joblib

# Save the trained model
model.save('your_model_name.h5')

# Save the fitted TF-IDF vectorizers
joblib.dump(tfidf_search, 'tfidf_search.pkl')
joblib.dump(tfidf_result, 'tfidf_result.pkl')




['tfidf_result.pkl']