In [7]:
import warnings 
warnings.filterwarnings('ignore')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import pandas as pd
df = pd.read_csv("fashion_products.csv")
df.head()

Unnamed: 0,User ID,Product ID,Product Name,Brand,Category,Price,Rating,Color,Size
0,19,1,Dress,Adidas,Men's Fashion,40,1.043159,Black,XL
1,97,2,Shoes,H&M,Women's Fashion,82,4.026416,Black,L
2,25,3,Dress,Adidas,Women's Fashion,44,3.337938,Yellow,XL
3,57,4,Shoes,Zara,Men's Fashion,23,1.049523,White,S
4,79,5,T-shirt,Adidas,Men's Fashion,79,4.302773,Black,M


In [8]:
df.shape

(1000, 9)

In [9]:
df.info() # tüm değerler dolu 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   User ID       1000 non-null   int64  
 1   Product ID    1000 non-null   int64  
 2   Product Name  1000 non-null   object 
 3   Brand         1000 non-null   object 
 4   Category      1000 non-null   object 
 5   Price         1000 non-null   int64  
 6   Rating        1000 non-null   float64
 7   Color         1000 non-null   object 
 8   Size          1000 non-null   object 
dtypes: float64(1), int64(3), object(5)
memory usage: 70.4+ KB


In [10]:
# Özelliklerin birleştirilmesi
df['Features'] = df['Product Name'] + ' ' + df['Brand'] + ' ' + df['Category'] + ' ' + df['Color'] + ' ' + df['Size']


In [11]:
# TF-IDF Vektörizer
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['Features'])


In [12]:
# Benzerlik matrisinin hesaplanması
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [20]:
def recommend_products(product_name, cosine_sim=cosine_sim):
    # Ürünün veri çerçevesinde olup olmadığını kontrol et
    if product_name not in df['Product Name'].values:
        return f"Product '{product_name}' not found in the dataset."

    # Ürünün indeksini bulma
    idx = df[df['Product Name'] == product_name].index[0]
    
    # Benzerlik skorlarını hesaplama
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Benzerlik skorlarına göre sıralama
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # En yüksek 10 benzer ürün
    sim_scores = sim_scores[1:11]
    
    # Ürün indekslerini alma
    product_indices = [i[0] for i in sim_scores]
    
    # Önerilen ürünleri döndürme
    return df.iloc[product_indices][['Product Name', 'Brand', 'Category', 'Price', 'Rating']]

In [21]:
# Örnek ürünle öneri yapma
recommend_products("Dress")

Unnamed: 0,Product Name,Brand,Category,Price,Rating
828,Dress,Adidas,Men's Fashion,19,4.257085
488,Dress,Adidas,Men's Fashion,37,3.560668
755,Jeans,Adidas,Men's Fashion,34,1.669924
239,Dress,H&M,Men's Fashion,10,2.234527
677,Dress,H&M,Men's Fashion,85,2.862981
760,Dress,H&M,Men's Fashion,92,3.643456
440,Sweater,Adidas,Men's Fashion,50,1.841606
458,Dress,Adidas,Kids' Fashion,10,2.342763
294,Dress,H&M,Kids' Fashion,56,2.424543
208,Dress,H&M,Women's Fashion,30,1.02867
