# Import Libraries

In [28]:
import pandas as pd 
import numpy as np
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_colwidth', None) 
import math

# Loading Dataframe

In [41]:
df=pd.read_csv('../data/dwr.csv')

In [42]:
df.head()

Unnamed: 0,id,name,brand,price_min,price_max,color_options_count,color_options,image_path
0,2579525,Avio Sectional with Table,Knoll®,"$31,255.00","$43,126.00",,"['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579525.jpg
1,2579423,Avio Sectional,Knoll®,"$17,970.00","$25,450.00",,"['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579423.jpg
2,2579424-2,"Avio Sofa, Three Seat",Knoll®,"$14,899.00","$21,254.00",,"['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579424-2.jpg
3,2195266-1,"Kelston Sofa, Fabric",Case,"$7,745.00","$11,795.00",,"['Bark', 'Basil', 'Blue', 'Blush']",dwr\2195266-1.jpg
4,2195267-2,"Kelston Sectional, Leather",Case,"$17,695.00","$18,895.00",,"['Balsa', 'Black', 'Bruno', 'Canyon']",dwr\2195267-2.jpg


# Renaming `color_options` to Colors

In [43]:
df.rename(columns={
        "color_options": "Colors",
        "name": "Title"
    },inplace=True)

# Droping `color_options_count` column

In [44]:
df.drop(columns=['color_options_count'],inplace=True)

In [45]:
df.head()

Unnamed: 0,id,Title,brand,price_min,price_max,Colors,image_path
0,2579525,Avio Sectional with Table,Knoll®,"$31,255.00","$43,126.00","['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579525.jpg
1,2579423,Avio Sectional,Knoll®,"$17,970.00","$25,450.00","['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579423.jpg
2,2579424-2,"Avio Sofa, Three Seat",Knoll®,"$14,899.00","$21,254.00","['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579424-2.jpg
3,2195266-1,"Kelston Sofa, Fabric",Case,"$7,745.00","$11,795.00","['Bark', 'Basil', 'Blue', 'Blush']",dwr\2195266-1.jpg
4,2195267-2,"Kelston Sectional, Leather",Case,"$17,695.00","$18,895.00","['Balsa', 'Black', 'Bruno', 'Canyon']",dwr\2195267-2.jpg


# Fixing `price_min` and `price_max` Columns

In [46]:
df['price_min']=df['price_min'].str.replace(pat='$',repl='')
df['price_max']=df['price_max'].str.replace(pat='$',repl='')
df['price_min']=df['price_min'].str.replace(pat=',',repl='')
df['price_max']=df['price_max'].str.replace(pat=',',repl='')

In [47]:
df['price_min']=df['price_min'].astype('float')
df['price_max']=df['price_max'].astype('float')

In [48]:
df['Price']=(df['price_min']+df['price_max'])/2

In [49]:
df.drop(columns=['price_min','price_max'],inplace=True)

In [50]:
df.head()

Unnamed: 0,id,Title,brand,Colors,image_path,Price
0,2579525,Avio Sectional with Table,Knoll®,"['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579525.jpg,37190.5
1,2579423,Avio Sectional,Knoll®,"['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579423.jpg,21710.0
2,2579424-2,"Avio Sofa, Three Seat",Knoll®,"['Aviator', 'Black', 'Chartreuse', 'Daffodil']",dwr\2579424-2.jpg,18076.5
3,2195266-1,"Kelston Sofa, Fabric",Case,"['Bark', 'Basil', 'Blue', 'Blush']",dwr\2195266-1.jpg,9770.0
4,2195267-2,"Kelston Sectional, Leather",Case,"['Balsa', 'Black', 'Bruno', 'Canyon']",dwr\2195267-2.jpg,18295.0


# Cleaing Color Column

In [51]:
df['Colors']=df['Colors'].str.replace(r"[\[\]',/]",repl='',regex=True)

In [52]:
df.head()

Unnamed: 0,id,Title,brand,Colors,image_path,Price
0,2579525,Avio Sectional with Table,Knoll®,Aviator Black Chartreuse Daffodil,dwr\2579525.jpg,37190.5
1,2579423,Avio Sectional,Knoll®,Aviator Black Chartreuse Daffodil,dwr\2579423.jpg,21710.0
2,2579424-2,"Avio Sofa, Three Seat",Knoll®,Aviator Black Chartreuse Daffodil,dwr\2579424-2.jpg,18076.5
3,2195266-1,"Kelston Sofa, Fabric",Case,Bark Basil Blue Blush,dwr\2195266-1.jpg,9770.0
4,2195267-2,"Kelston Sectional, Leather",Case,Balsa Black Bruno Canyon,dwr\2195267-2.jpg,18295.0


# Droping Duplicate

In [53]:
df.drop_duplicates(subset = "Title",inplace=True)

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2017 entries, 0 to 2755
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          2017 non-null   object 
 1   Title       2017 non-null   object 
 2   brand       2006 non-null   object 
 3   Colors      2017 non-null   object 
 4   image_path  2010 non-null   object 
 5   Price       1176 non-null   float64
dtypes: float64(1), object(5)
memory usage: 110.3+ KB


# Making the Links Right

In [39]:
df['Link']=df['Link'].str.replace('https://www.example.com/','https://www.containerstore.com/')

# Replacing Commas in Colors Column

In [18]:
df['Colors']=df['Colors'].str.replace(',','')

In [19]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,The Container Store Samson Faux Leather Bin,94.99,css-1jtiraa css-1xx16ea,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-samson-faux-leather-bin/12d?productId=11023148
1,Zafferano Poldina Pro Wireless Lamp,169.0,css-abqf3x css-1ljdx5i css-1xx16ea,https://www.example.com/s/home-decor/home-decor-new-arrivals/zafferano-poldina-pro-wireless-lamp/12d?productId=11024636
2,The Container Store Rattan Bins with Lid,144.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-rattan-bins-with-lid/12d?productId=11023133
3,The Container Store Pacific Bin,34.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-pacific-bin/12d?productId=11023713
4,Yamazaki Tower Narrow Entryway Console Table,79.0,css-1xx16ea css-abqf3x,https://www.example.com/s/home-decor/home-decor-new-arrivals/yamazaki-tower-narrow-entryway-console-table/12d?productId=11024476


In [20]:
scaler = StandardScaler()

In [21]:
df['Price'] = scaler.fit_transform(df[['Price']])

In [22]:
df['Colors'] = pd.factorize(df['Colors'])[0]

In [23]:
# Creating feature vectors
features = df[['Colors', 'Price']].values

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features, features)

In [41]:
def recommend_items(item_name, similarity_matrix, df, top_n=10):
    if item_name not in df['Title'].values:
        return f"Item '{item_name}' not found in the dataset."

    item_index = df.index[df['Title'] == item_name].tolist()[0]

    similarity_scores = list(enumerate(similarity_matrix[item_index]))

    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    top_similar_items = similarity_scores[1:top_n+1]

    recommended_items = [df.iloc[item[0]]['Title'] for item in top_similar_items]

    return recommended_items

# Example usage
item_name = 'YouCopia ReStickable 3D Fridge Labels Pack of 10'
recommendations = recommend_items(item_name, similarity_matrix, df)
print(f"Because you viewed {item_name}, you may also like: {', '.join(recommendations)}")


Because you viewed YouCopia ReStickable 3D Fridge Labels Pack of 10, you may also like: Command Large Double Hook, 6-Pocket Mesh Shower Caddy, Chrome Metal Purse Hangers Pkg/6, The Container Store All-Purpose Storage Bag, Large Tall Cabinet Shelf, White Long Grid Stackable Shelf, Erasable Food Storage Labels, Built NY Stainless Steel Utensil Set, Progressive Magnetic Kitchen Scissors, RSVP Silicone Straws with Cleaner Pkg/6


In [42]:
recommendations

['Command Large Double Hook',
 '6-Pocket Mesh Shower Caddy',
 'Chrome Metal Purse Hangers Pkg/6',
 'The Container Store All-Purpose Storage Bag',
 'Large Tall Cabinet Shelf',
 'White Long Grid Stackable Shelf',
 'Erasable Food Storage Labels',
 'Built NY Stainless Steel Utensil Set',
 'Progressive Magnetic Kitchen Scissors',
 'RSVP Silicone Straws with Cleaner Pkg/6']

In [43]:
df[df['Title']==item_name]

Unnamed: 0,Title,Price,Colors,Link
1441,YouCopia ReStickable 3D Fridge Labels Pack of 10,-0.433485,-1,https://www.containerstore.com/s/elfa/complete-your-elfa-space/youcopia-restickable-3d-fridge-labels-pkg~10/12d?productId=11019795


In [44]:
df[df['Title']==item_name]['Link']

1441    https://www.containerstore.com/s/elfa/complete-your-elfa-space/youcopia-restickable-3d-fridge-labels-pkg~10/12d?productId=11019795
Name: Link, dtype: object

In [45]:
[df[df['Title']==rec]['Link'] for rec in recommendations]

[215    https://www.containerstore.com/s/office/new-office-arrivals/command-large-double-hook/12d?productId=11024883
 Name: Link, dtype: object,
 238    https://www.containerstore.com/s/bath/bathroom-new-arrivals/6_pocket-mesh-shower-caddy/12d?productId=11024855
 Name: Link, dtype: object,
 1281    https://www.containerstore.com/s/elfa/complete-your-elfa-space/chrome-metal-purse-hangers/12d?productId=10037214
 Name: Link, dtype: object,
 1282    https://www.containerstore.com/s/elfa/complete-your-elfa-space/the-container-store-all_purpose-storage-bag/12d?productId=11023043
 Name: Link, dtype: object,
 1341    https://www.containerstore.com/s/elfa/complete-your-elfa-space/large-tall-cabinet-shelf/12d?productId=10035747
 Name: Link, dtype: object,
 1367    https://www.containerstore.com/s/elfa/complete-your-elfa-space/white-long-grid-stackable-shelf/12d?productId=11002124
 Name: Link, dtype: object,
 1446    https://www.containerstore.com/s/elfa/complete-your-elfa-space/erasable-food-sto

In [22]:
import re

# Example string
text = "['Aviator', 'Black', 'Chartreuse', 'Daffodil']"

# Extract words using regex
words = re.findall(r'\b\w+\b', text)

print(words)

['Aviator', 'Black', 'Chartreuse', 'Daffodil']


In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Example dataset (replace with your actual dataset loading)
data = {
    'furniture_name': ['Sofa', 'Table', 'Chair', 'Cushion', 'Bed', 'Desk'],
    'color': ['Red', 'Brown', 'White', 'Blue', 'Black', 'Brown'],
    'price': [500, 300, 150, 50, 800, 400]
}

df = pd.DataFrame(data)

# Encoding categorical variables (color)
df['color_encoded'] = pd.factorize(df['color'])[0]

# Scaling numerical features (price)
scaler = StandardScaler()
df['price_scaled'] = scaler.fit_transform(df[['price']])

# Creating feature vectors
features = df[['color_encoded', 'price_scaled']].values

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features, features)

# Function to recommend items
def recommend_items(item_name, similarity_matrix, df, top_n=3):
    item_index = df.index[df['furniture_name'] == item_name][0]
    similarity_scores = list(enumerate(similarity_matrix[item_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_similar_items = similarity_scores[1:top_n+1]  # Exclude itself, get top N similar items
    recommended_items = [df.iloc[item[0]]['furniture_name'] for item in top_similar_items]
    return recommended_items

# Example usage
item_name = 'Sofa'
recommendations = recommend_items(item_name, similarity_matrix, df)
print(f"Because you viewed {item_name}, you may also like: {', '.join(recommendations)}")
