## Building recommendation system that suggests outfit based on occasion using Glove embedding and cosine similarity.

In [1]:
!pip install datasets



In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from datasets import load_dataset
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import numpy as np
from gensim.models import KeyedVectors

In [3]:
# Load dataset
dataset = load_dataset("nextai-team/fashion")

In [4]:
# Convert dataset to pandas DataFrame
df = pd.DataFrame(dataset['train'])

In [5]:
# NLP Preprocessing
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [6]:
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text.lower())
    # Lemmatization and remove stop words
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and word not in string.punctuation]
    return " ".join(tokens)

In [7]:
df['input'] = df['input'].apply(preprocess_text)
df['output'] = df['output'].apply(preprocess_text)


In [8]:
# Load GloVe embeddings
word_vectors = KeyedVectors.load_word2vec_format('glove.6B.100d.txt', binary=False, no_header=True)

In [9]:
# Function to calculate the average word embedding of a sentence
def sentence_embedding(sentence):
    words = sentence.split()
    embeddings = [word_vectors[word] for word in words if word in word_vectors]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(word_vectors.vector_size)


In [10]:
# Calculate sentence embeddings
df['input_embedding'] = df['input'].apply(sentence_embedding)


In [11]:
# Function to recommend similar items
def recommend(text, n=1):
    text_embedding = sentence_embedding(preprocess_text(text))
    if np.all(text_embedding == 0):
        return []
    df['similarity'] = df['input_embedding'].apply(lambda x: cosine_similarity([text_embedding], [x])[0][0])
    recommendations = df.sort_values(by='similarity', ascending=False).head(n)
    return recommendations['output'].tolist()

# Test the recommendation system
input_text = "job interview"
recommendations = recommend(input_text)
print(recommendations)

['well-fitted suit/dress closed-toe shoe minimal accessory groomed appearance']


In [24]:
# Test the recommendation system
input_text = input("Enter your query: ")
recommendations = recommend(input_text)
print(recommendations)

Enter your query: i have job interview and my favourite color is blue
['well-fitted suit/dress closed-toe shoe minimal accessory groomed appearance']


In [32]:
input_text = input("Enter your query: ")
recommendations = recommend(input_text)
print(recommendations)

Enter your query: to party with freinds
['men casual attire like jeans/chinos stylish shirt polo woman chic dress trendy outfit suitable party theme']


In [33]:
input_text = input("Enter your query: ")
recommendations = recommend(input_text)
print(recommendations)

Enter your query: to play basketball
['shorts/leggings moisture-wicking top supportive sneakers/athletic shoe']


In [34]:
input_text = input("Enter your query: ")
recommendations = recommend(input_text)
print(recommendations)

Enter your query: to present my research paper to highly qualified scientists
['well-fitted suit/dress closed-toe shoe minimal accessory groomed appearance']
