In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import linear_kernel

# Load and preprocess the data
df = pd.read_csv(r"C:\Users\Moksh\Dropbox\PC\Downloads\catalog_product_20210807_043549.csv")
df_cleaned = df.dropna(subset=['name', 'description', 'short_description', 'meta_keywords', 'meta_description']).copy()  # Explicit copy
df_cleaned.reset_index(drop=True, inplace=True)
df_cleaned['combined_text'] = df_cleaned['name'] + ' ' + df_cleaned['description'] + ' ' + df_cleaned['short_description'] + ' ' + df_cleaned['meta_keywords'] + ' ' + df_cleaned['meta_description']

# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df_cleaned['combined_text'])

# Dimensionality reduction using TruncatedSVD
svd = TruncatedSVD(n_components=500)
tfidf_matrix_reduced = svd.fit_transform(tfidf_matrix)

# Normalize the reduced matrix
tfidf_matrix_normalized = normalize(tfidf_matrix_reduced)

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix_normalized, tfidf_matrix_normalized)

# Recommendation function
def recommend_products(product_name):
    idx = df_cleaned.index[df_cleaned['name'] == product_name].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    product_indices = [i[0] for i in sim_scores]
    recommended_products = df_cleaned['name'].iloc[product_indices].tolist()
    recommended_scores = [score[1] for score in sim_scores]
    return list(zip(recommended_products, recommended_scores))

# Sample usage
recommendations = recommend_products("Parker Jotter SS Ball Pen Gt A")
print(recommendations)


[('Parker Jotter StainleSS Steel Ball Pen Gt', 0.8686502970910411), ('Parker Jotter SS Gt Ball Pen M-4 P911 E-Com', 0.8442278174628025), ('Parker Jotter SS Gt Ball Pen M-2P911 E-Com', 0.8254284133114953), ('Parker Jotter SS Gt Ball Pen M-3P911E-Com', 0.8254284133114953), ('Parker Jotter SS Ball Pen Gt M-1P911 E-Com', 0.8141581414563762), ('Parker Jotter Standard Ball Pen Black', 0.8077905898695089), ('Parker Jotter StainleSS Steel Ball Pen CtGl', 0.7924973560104087), ('Parker Jotter Standard Ball Pen Red', 0.7856075513960034), ('Parker Jotter Standard Ball Pen GtGl', 0.7389121780486185), ('Parker Jotter Gold Ball Pen', 0.7358549596873445)]


In [4]:
# Sample usage
recommendations = recommend_products("Parker Vector SS Ct Roller Ball Pen With D-4")
print(recommendations)

[('Parker Vector SS Ct Roller Ball Pen With D-2', 0.9999999999999999), ('Parker Vector SS Ct Roller Ball Pen With D-3', 0.9999999999999999), ('Parker Vector SS Ct Roller Ball Pen With D-4', 0.9999999999999999), ('Parker Vector SS Ct Roller Ball Pen With D-5', 0.9999999999999999), ('Parker Vector SS Ct Roller Ball Pen With D-6', 0.9999999999999999), ('Parker Vector SS Ct Roller Ball Pen With D-7', 0.9999999999999999), ('Parker Vector SS Gt Roller Ball Pen B', 0.9252192082063389), ('Parker Vector SS Roller Ball Pen GtGl', 0.8940801321457456), ('Parker Vector SS Roller Ball Pen CtGl', 0.8805140414077203), ('Parker Vector SS Ct Roller Ball Pen M-3P911E-Com', 0.8486799382991345)]
