## Clothing Suggestion using NLP

Import required libraries

In [12]:
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string
import pandas as pd
import numpy as np

Preprocessing

In [13]:
# Preprocessing

nltk.download('stopwords')      # Download the stop words like (the, an, and, or, etc)
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    
    # Remove punctuation and special characters (assuming 'text' is a single sentence)
    text = ''.join(char for char in text if char.isalnum() or char.isspace())
    
    # Remove stopwords
    words = text.split()
    words = [word for word in words if word not in stop_words]
    
    # Join the processed words back into a sentence
    processed_text = ' '.join(words)
    
    return processed_text

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\soori\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Load the dataset and preprocess it

In [14]:
df = pd.read_csv('dataset.csv')

processed_desc = []
for k in range(len(df.desc)):
    processed_desc.append("".join(i for i in df.desc[k] if i not in string.punctuation))

df.insert(2, "processed_desc", processed_desc, True)

corpus = df.processed_desc

In [15]:
# Input the collection requrirement that you want
input_sentence = input("Input the collection requrirement that you want: ")

# Preprocess the input sentence
processed_input = preprocess_text(input_sentence)
print(processed_input)

pink full sleeve top


In [16]:
# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()
corpus_vectors = vectorizer.fit_transform(corpus)

# Vectorize the processed input sentence
input_vector = vectorizer.transform([processed_input])

# Similarity Modeling
similarity_scores = cosine_similarity(corpus_vectors, input_vector)

sim_scores = []
for i in similarity_scores:
    sim_scores.append(i[0])

df.insert(2, "similarity", sim_scores, True)

# Sorting by column 'similarity'
df.sort_values(by=['similarity'], ascending=False, inplace=True)

rank = 1
# Print the top similar rows
for i in range(0,10):
    print(rank, ":  ", df.iloc[i, 0],"\t\t", df.iloc[i, 4])
    rank += 1


1 :   Dusty Pink Buttoned Smocked Waist Top 		 https://www.faballey.com/dusty-pink-buttoned-smocked-waist-top-78/prdt
2 :   Navy Pleated Wrap Top 		 https://www.faballey.com/navy-pleated-wrap-top-78/prdt
3 :   Pink Printed Ruffle Sleeve Peplum Top 		 https://www.faballey.com/pink-printed-ruffle-sleeve-peplum-top-78/prdt
4 :   Light Green Collared Satin Shirt 		 https://www.faballey.com/light-green-collared-satin-shirt-78/prdt
5 :   Hot Pink Floral Print Puff Sleeve Cotton Top 		 https://www.faballey.com/hot-pink-floral-print-puff-sleeve-cotton-top-78/prdt
6 :   Purple Floral Boat Neck Tie Up Blouse 		 https://www.faballey.com/purple-floral-boat-neck-tie-up-blouse-78/prdt
7 :   Wine Polka Frilled Sleeve Crop Top 		 https://www.faballey.com/wine-polka-frilled-sleeve-crop-top-78/prdt
8 :   Blush Wrinkled Chiffon Embellished Top 		 https://www.faballey.com/blush-wrinkled-chiffon-embellished-top-78/prdt
9 :   Hot Pink Schiffli Puff Sleeve Top 		 https://www.faballey.com/hot-pink-schiffli-to

In [17]:
def clothingSuggestion(text):
    df = pd.read_csv('dataset.csv')

    processed_desc = []
    for k in range(len(df.desc)):
        processed_desc.append("".join(i for i in df.desc[k] if i not in string.punctuation))

    df.insert(2, "processed_desc", processed_desc, True)

    corpus = df.processed_desc


    # Input the collection requrirement that you want
    input_sentence = text

    # Preprocess the input sentence
    processed_input = preprocess_text(input_sentence)
    print(processed_input)


    # Create a TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    corpus_vectors = vectorizer.fit_transform(corpus)

    # Vectorize the processed input sentence
    input_vector = vectorizer.transform([processed_input])

    # Similarity Modeling
    similarity_scores = cosine_similarity(corpus_vectors, input_vector)

    sim_scores = []
    for i in similarity_scores:
        sim_scores.append(i[0])

    df.insert(2, "similarity", sim_scores, True)

    # Sorting by column 'similarity'
    df.sort_values(by=['similarity'], ascending=False, inplace=True)

    rank = 1
    # Print the top similar rows
    for i in range(0,10):
        print(rank, ":  ", df.iloc[i, 0],"\t\t", df.iloc[i, 4])
        rank += 1


In [18]:
clothingSuggestion(input())

green sleeveless top
1 :   Green Lace Strappy Belted Peplum Top 		 https://www.faballey.com/green-lace-strappy-belted-peplum-top-78/prdt
2 :   Green V Neck Blouse 		 https://www.faballey.com/green-v-neck-blouse-78/prdt
3 :   Green Lace Smocked Waist Crop Top 		 https://www.faballey.com/green-lace-smocked-waist-crop-top-78/prdt
4 :   Green Floral Print Puff Sleeve Blouse 		 https://www.faballey.com/green-floral-print-puff-sleeve-blouse-78/prdt
5 :   Light Green Collared Satin Shirt 		 https://www.faballey.com/light-green-collared-satin-shirt-78/prdt
6 :   Blush Wrinkled Chiffon Embellished Top 		 https://www.faballey.com/blush-wrinkled-chiffon-embellished-top-78/prdt
7 :   Wine Polka Frilled Sleeve Crop Top 		 https://www.faballey.com/wine-polka-frilled-sleeve-crop-top-78/prdt
8 :   Blue Frilled Sleeve Back Tie Top 		 https://www.faballey.com/blue-frilled-sleeve-back-tie-top-78/prdt
9 :   Peach Frilled Sleeve Crop Top 		 https://www.faballey.com/peach-frilled-sleeve-crop-top-78/prdt
10 