In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/content/fashion_products.csv')
df.head(10)

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Vectorize product descriptions
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Product Name'])



In [None]:
# Normalization using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
numerical_features = ['Price','Rating']

df[numerical_features] = scaler.fit_transform(df[numerical_features])

In [None]:
#one hot encoding

from sklearn.preprocessing import OneHotEncoder
en = OneHotEncoder()
categorical_features = en.fit_transform(df[['Category','Brand']])


In [None]:
feature_matrix = np.hstack((categorical_features.toarray(), df[numerical_features].values))

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim= cosine_similarity(feature_matrix)

In [None]:
def recommend(product_id, cosine_sim=cosine_sim):
    idx = df[df['Product ID'] == product_id].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_items = [df.iloc[i[0]]['Product ID'] for i in sim_scores[1:6]]  # Top 5
    return top_items

In [None]:
eg_product_id = df['Product ID'].iloc[0]

#get recommendations
recommended_products = recommend(eg_product_id)
print(f"Recommendations for Products {eg_product_id}: {recommended_products}")

In [None]:
## Evaluation REcommendation
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)


In [None]:
# precision@k
def precision_at_k(recommended_items, actual_items, k=5):
  relevant_items =set(recommended_items[:k]) & set(actual_items)
  return len(relevant_items) / k

user_actual_items = test_data[test_data['User ID'] ==1]['Product ID'].tolist()
user_recommended_items = recommend(product_id =1000)
precision = precision_at_k(user_recommended_items, user_actual_items, k=5)
print(f"Precision@5: {precision}")

In [None]:
def recall_at_k(recommended_items, actual_items, k=5):
    relevant_items = set(recommended_items[:k]) & set(actual_items)
    return len(relevant_items) / len(actual_items)

recall = recall_at_k(user_recommended_items, user_actual_items, k=5)
print(f"Recall@5: {recall}")

In [None]:
def coverage(recommendations, all_items):
    unique_recommendations = set(recommendations)
    return len(unique_recommendations) / len(all_items)

all_items = df['Product ID'].unique()
coverage_score = coverage(recommended_products, all_items)
print(f"Coverage: {coverage_score}")