In [27]:
import mysql.connector
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import defaultdict
import string

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

db_connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="vfgetew2234*Wew",
    database="ecommerce",
    port='3307'
)

cursor = db_connection.cursor(dictionary=True)
cursor.execute("SELECT Product.id, Product.name, Category.name as category, Product.description FROM Product INNER JOIN Category ON Category.id=Product.CategoryId")
products = cursor.fetchall()

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in string.punctuation and token not in stop_words]
    return " ".join(tokens)

tags_per_product = defaultdict(list)
all_texts = []
for product in products:
    name = preprocess_text(product['name'])
    category = preprocess_text(product['category'])
    description = preprocess_text(product['description'])
    text = name + " " + category + " " + description
    all_texts.append(text)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_texts)

# Extract keywords for each product
feature_names = vectorizer.get_feature_names_out()
for i, product in enumerate(products):
    feature_index = tfidf_matrix[i,:].nonzero()[1]
    tfidf_scores = zip(feature_index, [tfidf_matrix[i, x] for x in feature_index])
    top_keywords = sorted([(feature_names[i], score) for (i, score) in tfidf_scores], key=lambda x: x[1], reverse=True)[:5]
    tags_per_product[product['id']] = [keyword for keyword, _ in top_keywords]

for product_id, product_tags in tags_per_product.items():
    tags = ", ".join(product_tags)
    cursor.execute("UPDATE Product SET tags = %s WHERE id = %s", (tags, product_id))
    db_connection.commit()

cursor.close()
db_connection.close()


[nltk_data] Downloading package punkt to /home/tek/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/tek/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/tek/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
