# Import Libraries

In [1]:
import pandas as pd 
import numpy as np
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_colwidth', None) 

# Loading Dataframe

In [2]:
df=pd.read_csv('../data/PotteryBarn_data.csv')

In [3]:
df.isna().sum()

Title               0
Link                0
Sale Price          0
Regular Price    4538
Image Path         14
Swatch Colors    6415
dtype: int64

In [4]:
df = df[['Title','Sale Price','Swatch Colors','Link']]

In [5]:
df.isna().sum()

Title               0
Sale Price          0
Swatch Colors    6415
Link                0
dtype: int64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11014 entries, 0 to 11013
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Title          11014 non-null  object
 1   Sale Price     11014 non-null  object
 2   Swatch Colors  4599 non-null   object
 3   Link           11014 non-null  object
dtypes: object(4)
memory usage: 344.3+ KB


In [7]:
df.rename(columns={'Sale Price': 'Price',
                   'Swatch Colors': 'Colors'},
          inplace=True, errors='raise')

# Displaying Dataset

In [8]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,"Big Sur Square Arm Sofa (76""–105"")",1614,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
1,"Big Sur Square Arm Chaise Sectional (114""–156"")",3229,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-with-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
2,"Big Sur Square Arm Deep Seat Chaise Sectional (105""–147"")",3739,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
3,"Big Sur Square Arm Double Chaise Sectional (146""–198"")",4584,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-u-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
4,"Big Sur Square Arm Deep Seat Sofa (77""–105"")",1869,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant


# Droping Duplicate

In [9]:
df.drop_duplicates(subset = "Title",inplace=True)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4631 entries, 0 to 11013
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Title   4631 non-null   object
 1   Price   4631 non-null   object
 2   Colors  2285 non-null   object
 3   Link    4631 non-null   object
dtypes: object(4)
memory usage: 180.9+ KB


# Using Regex to Clean `Title` Column

In [11]:
title=df['Title']

In [12]:
title.head()

0                           Big Sur Square Arm Sofa (76"–105")
1              Big Sur Square Arm Chaise Sectional (114"–156")
2    Big Sur Square Arm Deep Seat Chaise Sectional (105"–147")
3       Big Sur Square Arm Double Chaise Sectional (146"–198")
4                 Big Sur Square Arm Deep Seat Sofa (77"–105")
Name: Title, dtype: object

In [13]:
df['Title'] = df['Title'].str.replace(r'\s*\([^)]*\)', '',regex=True)

In [14]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,Big Sur Square Arm Sofa,1614,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
1,Big Sur Square Arm Chaise Sectional,3229,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-with-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
2,Big Sur Square Arm Deep Seat Chaise Sectional,3739,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
3,Big Sur Square Arm Double Chaise Sectional,4584,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-u-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
4,Big Sur Square Arm Deep Seat Sofa,1869,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant


# Replacing Commas With Nothing in Price Column

In [15]:
df['Price'] = df['Price'].str.replace(',', '',regex=True)

In [16]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,Big Sur Square Arm Sofa,1614,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
1,Big Sur Square Arm Chaise Sectional,3229,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-with-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
2,Big Sur Square Arm Deep Seat Chaise Sectional,3739,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
3,Big Sur Square Arm Double Chaise Sectional,4584,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-upholstered-u-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
4,Big Sur Square Arm Deep Seat Sofa,1869,"Performance Boucle, Oatmeal, Performance Boucle, Ivory, Performance Boucle, Pebble, Performance Boucle, Metal",https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant


# Replacing Commas in Colors Column

In [17]:
df['Colors']=df['Colors'].str.replace(',','')

In [18]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,Big Sur Square Arm Sofa,1614,Performance Boucle Oatmeal Performance Boucle Ivory Performance Boucle Pebble Performance Boucle Metal,https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
1,Big Sur Square Arm Chaise Sectional,3229,Performance Boucle Oatmeal Performance Boucle Ivory Performance Boucle Pebble Performance Boucle Metal,https://www.potterybarn.com/products/big-sur-square-arm-upholstered-sofa-with-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
2,Big Sur Square Arm Deep Seat Chaise Sectional,3739,Performance Boucle Oatmeal Performance Boucle Ivory Performance Boucle Pebble Performance Boucle Metal,https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
3,Big Sur Square Arm Double Chaise Sectional,4584,Performance Boucle Oatmeal Performance Boucle Ivory Performance Boucle Pebble Performance Boucle Metal,https://www.potterybarn.com/products/big-sur-square-arm-upholstered-u-chaise-sectional/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant
4,Big Sur Square Arm Deep Seat Sofa,1869,Performance Boucle Oatmeal Performance Boucle Ivory Performance Boucle Pebble Performance Boucle Metal,https://www.potterybarn.com/products/big-sur-square-arm-deep-seat-upholstered-sofa-collection/?pkey=cbig-sur-sofas-sectionals&cm_sp=ossa-17548_bestseller_variant


In [19]:
df['Colors'] = pd.factorize(df['Colors'])[0]

# Converting Price From String To Float

In [20]:
df['Price'] = df['Price'].str.replace(r'\S*[a-zA-Z]\S*', '0',regex=True)

In [21]:
df['Price']=df['Price'].astype('float')

In [22]:
scaler = StandardScaler()

In [23]:
df['Price'] = scaler.fit_transform(df[['Price']])

In [24]:
# Creating feature vectors
features = df[['Colors', 'Price']].values

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features, features)

In [25]:
def recommend_items(item_name, similarity_matrix, df, top_n=10):
    if item_name not in df['Title'].values:
        return f"Item '{item_name}' not found in the dataset."

    item_index = df.index[df['Title'] == item_name].tolist()[0]

    similarity_scores = list(enumerate(similarity_matrix[item_index]))

    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    top_similar_items = similarity_scores[1:top_n+1]

    recommended_items = [df.iloc[item[0]]['Title'] for item in top_similar_items]

    return recommended_items

# Example usage
item_name = 'Buchanan Square Arm Chair'
recommendations = recommend_items(item_name, similarity_matrix, df)
print(f"Because you viewed {item_name}, you may also like: {', '.join(recommendations)}")


Because you viewed Buchanan Square Arm Chair, you may also like: Big Sur Leather Sectional Ottoman, Pacifica Trundle Sleeper Sofa, Jake Leather Seadrift Wood Base Swivel Chair, Jake Leather Brindle Wood Base Swivel Chair, Turner Storage Ottoman with Pull Out Table, Layton Low Upholstered Bed, Baldwin Leather Swivel Desk Chair, Shasta Roll Arm Leather Chair, Shasta Square Arm Leather Chair, Carmel Leather Ottoman


In [26]:
recommendations

['Big Sur Leather Sectional Ottoman',
 'Pacifica Trundle Sleeper Sofa',
 'Jake Leather Seadrift Wood Base Swivel Chair',
 'Jake Leather Brindle Wood Base Swivel Chair',
 'Turner Storage Ottoman with Pull Out Table',
 'Layton Low Upholstered Bed',
 'Baldwin Leather Swivel Desk Chair',
 'Shasta Roll Arm Leather Chair',
 'Shasta Square Arm Leather Chair',
 'Carmel Leather Ottoman']

In [27]:
df[df['Title']=="Big Sur Leather Sectional Ottoman"]

Unnamed: 0,Title,Price,Colors,Link
2146,Big Sur Leather Sectional Ottoman,-0.428692,14,https://www.potterybarn.com/products/big-sur-leather-sectional-ottoman/?pkey=cliving-room-benches


In [28]:
df[df['Title']=="Big Sur Leather Sectional Ottoman"]['Link']

2146    https://www.potterybarn.com/products/big-sur-leather-sectional-ottoman/?pkey=cliving-room-benches
Name: Link, dtype: object

In [29]:
[df[df['Title']==rec]['Link'] for rec in recommendations]

[2146    https://www.potterybarn.com/products/big-sur-leather-sectional-ottoman/?pkey=cliving-room-benches
 Name: Link, dtype: object,
 1115    https://www.potterybarn.com/products/pacifica-square-arm-upholstered-trundle-sleeper-sofa/?pkey=cpacifica-sofas-sectionals
 Name: Link, dtype: object,
 576    https://www.potterybarn.com/products/jake-leather-wood-legs-swivel-armchair/?pkey=cjake-wooden-base-collection
 Name: Link, dtype: object,
 586    https://www.potterybarn.com/products/jake-leather-dark-wood-base-swivel-armchair/?pkey=cjake-wooden-base-collection
 Name: Link, dtype: object,
 204    https://www.potterybarn.com/products/turner-upholstered-storage-ottoman-pull-out-table/?pkey=cturner-sofas-sectionals
 Name: Link, dtype: object,
 3687    https://www.potterybarn.com/products/layton-low-upholstered-bed/?pkey=clayton-collection
 Name: Link, dtype: object,
 7372    https://www.potterybarn.com/products/baldwin-leather-swivel-desk-chair/?pkey=coffice-chairs
 Name: Link, dtype: objec

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Example dataset (replace with your actual dataset loading)
data = {
    'furniture_name': ['Sofa', 'Table', 'Chair', 'Cushion', 'Bed', 'Desk'],
    'color': ['Red', 'Brown', 'White', 'Blue', 'Black', 'Brown'],
    'price': [500, 300, 150, 50, 800, 400]
}

df = pd.DataFrame(data)

# Encoding categorical variables (color)
df['color_encoded'] = pd.factorize(df['color'])[0]

# Scaling numerical features (price)
scaler = StandardScaler()
df['price_scaled'] = scaler.fit_transform(df[['price']])

# Creating feature vectors
features = df[['color_encoded', 'price_scaled']].values

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features, features)

# Function to recommend items
def recommend_items(item_name, similarity_matrix, df, top_n=3):
    item_index = df.index[df['furniture_name'] == item_name][0]
    similarity_scores = list(enumerate(similarity_matrix[item_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_similar_items = similarity_scores[1:top_n+1]  # Exclude itself, get top N similar items
    recommended_items = [df.iloc[item[0]]['furniture_name'] for item in top_similar_items]
    return recommended_items

# Example usage
item_name = 'Sofa'
recommendations = recommend_items(item_name, similarity_matrix, df)
print(f"Because you viewed {item_name}, you may also like: {', '.join(recommendations)}")
