In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("wayfair_uniq.csv")  
df_new=df.iloc[:,[3,4,5,7,9,11,13,15,17,18,19]]
df_new['index'] = df_new.index
df_new.shape

df_new.head()


df_new['original_price'] = df_new.original_price.str.replace('$', '')
df_new['original_price'] = df_new.original_price.str.replace(',', '').astype(float)

new_price = df_new["price"].str.split(";\s\$", n = 1, expand = True)
df_new["new_price"] = new_price[1]
df_new.drop(columns =["price"], inplace = True)


price_range = df_new[df_new["new_price"].str.contains("-",regex=True)]
price_range['new_price'] = (price_range['new_price'].str.split("\s-", n = 1, expand = True))[0]
df_new.loc[price_range.index] = price_range
df_new['new_price'] = df_new.new_price.str.replace(',', '').astype(float)


bins = [0,500,1000,1500,2000,2500,3000,3500,4000,4500,35000]
labels = ["<$500","$500-$1000","$1000-$1500","$1500-$2000","$2000-$2500","$2500-$3000","$3000-$3500","$3500-$4000","$4000-$4500",">$4500"]
df_new['price_bracket'] = pd.cut(df_new['new_price'], bins, labels=labels)


features = ["product_data1","product_data2","product_descrip","manufacturer"]


for feature in features:
      df_new[feature] = df_new[feature].fillna('')

def combine_features(row):
      try:
         return row['product_data1'] +" "+row['product_data2']+" "+row["product_descrip"]+" "+row["price_bracket"]+" "+row["manufacturer"]
      except:
         print("Error:", row)
    
df_new["combined_features"] = df_new.apply(combine_features,axis=1)


stop_words = stopwords.words('english')
df_new['combined_features'] = df_new['combined_features'].str.lower().str.split()
df_new["features"]=df_new["combined_features"].apply(lambda x: [word for word in x if word not in stop_words])
df_new["features"]=df_new["features"].apply(lambda x: " ".join(x))

cv = CountVectorizer()
count_matrix = cv.fit_transform(df_new["features"])

cosine_sim = cosine_similarity(count_matrix)



def get_title_from_index(index):
    return df_new[df_new.index == index]["product_name"].values[0]

def get_home(index):
    return df_new[df_new.index == index]["product_name_link"].values[0]

def get_index_from_title(title):
    return df_new[df_new.product_name == title]["index"].values[0]

product_user_likes = 'Rosalie Sofa'
product_index = get_index_from_title(product_user_likes)

similar_products =  list(enumerate(cosine_sim[product_index]))

sorted_similar_products = sorted(similar_products,key=lambda x:x[1],reverse=True)

i=0
for element in sorted_similar_products:
    print (get_title_from_index(element[0]),get_home(element[0]))
    i=i+1
    if i>50:
        break
        


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

(17209, 12)

Unnamed: 0,product_name,product_name_link,manufacturer,price,original_price,review_count,review_data,shipping_time,product_descrip,product_data1,product_data2,index
0,"Ariana 63"" Column Floor Lamp",https://www.wayfair.com/lighting/pdp/zipcode-d...,by Zipcode Design,from $38.02; $38.02,$69.99,3878.0,Rated 4.5 out of 5 stars.3878 total votes.,,Short on space? Make the most of your square f...,Switch Type: On/Off switch,,0
1,"Dale 63.75"" Arched/Arc Floor Lamp",https://www.wayfair.com/v/wayup/sponsored_skus...,by Langley Street,$73.99; $73.99,$99.99,1382.0,Rated 4.5 out of 5 stars.1382 total votes.,FREE 2-Day Shipping,"With a slender silhouette, this floor lamp bri...",Switch Type: Foot Switch,Maximum Wattage (per Bulb): 100 Watt,1
2,"Carlisle Douthit 70"" Swing Arm Floor Lamp",https://www.wayfair.com/lighting/pdp/williston...,by Williston Forge,from $104.99; $104.99,$399.99,1084.0,Rated 4.5 out of 5 stars.1084 total votes.,FREE 2-Day Shipping,"Part decor and part luminary, we love floor la...",Switch Type: Rotary socket,Maximum Wattage (per Bulb): 100 Watt,2
3,"Morrill 82"" Tree Floor Lamp",https://www.wayfair.com/lighting/pdp/brayden-s...,by Brayden Studio,$143.99; $143.99,$230.00,3476.0,Rated 4.5 out of 5 stars.3476 total votes.,FREE 2-Day Shipping,When your ensemble is in need of a little illu...,Switch Type: 4-Way,Maximum Wattage (per Bulb): 26 Watt,3
4,"Shipton Crystal and Metal 61"" Floor Lamp",https://www.wayfair.com/lighting/pdp/willa-arl...,by Willa Arlo Interiors,$62.99; $62.99,$170.04,2099.0,Rated 4.5 out of 5 stars.2099 total votes.,FREE 2-Day Shipping,Equally alluring sitting sofa-side for a littl...,Switch Type: On/Off switch,Maximum Wattage (per Bulb): 60 Watt,4


Rosalie Sofa https://www.wayfair.com/furniture/pdp/rosalie-sofa-lrfy2382.html
Rosalie Loveseat https://www.wayfair.com/furniture/pdp/rosalie-loveseat-lrfy2381.html
Lauryn Sofa https://www.wayfair.com/furniture/pdp/lauryn-sofa-w001370014.html
Zander Tufted Sofa https://www.wayfair.com/furniture/pdp/orren-ellis-zander-tufted-sofa-orel4706.html
Marbleton Sofa https://www.wayfair.com/furniture/pdp/loon-peak-marbleton-sofa-loon1825.html
Grosvenor Chesterfield Sofa https://www.wayfair.com/furniture/pdp/willa-arlo-interiors-grosvenor-chesterfield-sofa-wrlo1738.html
Logan 3 Seater Sofa https://www.wayfair.com/furniture/pdp/brayden-studio-logan-3-seater-sofa-brys2211.html
Shullsburg Reclining Sofa https://www.wayfair.com/furniture/pdp/winston-porter-shullsburg-reclining-sofa-w001517407.html
Chisolm Loveseat https://www.wayfair.com/furniture/pdp/charlton-home-chisolm-loveseat-chlh6419.html
Cecilia Sofa https://www.wayfair.com/furniture/pdp/beverly-fine-furniture-cecilia-sofa-qrmg1038.html
Bora L