# Import Libraries

In [1]:
import pandas as pd 
import numpy as np
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_colwidth', None) 

# Loading Dataframe

In [3]:
df=pd.read_csv('../data/containerstore.csv')

In [3]:
df.head()

Unnamed: 0,Title,Price,Link,Image Path,Colors
0,The Container Store Samson Faux Leather Bin,$89.99 – $99.99,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-samson-faux-leather-bin/12d?productId=11023148,images/thecontainerstoresamsonfauxleatherbin.jpg,"css-1jtiraa, css-1xx16ea"
1,Zafferano Poldina Pro Wireless Lamp,$169.00,https://www.example.com/s/home-decor/home-decor-new-arrivals/zafferano-poldina-pro-wireless-lamp/12d?productId=11024636,images/zafferanopoldinaprowirelesslamp.jpg,"css-abqf3x, css-1ljdx5i, css-1xx16ea"
2,The Container Store Rattan Bins with Lid,$109.99 – $179.99,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-rattan-bins-with-lid/12d?productId=11023133,images/thecontainerstorerattanbinswithlid.jpg,
3,The Container Store Pacific Bin,$29.99 – $39.99,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-pacific-bin/12d?productId=11023713,images/thecontainerstorepacificbin.jpg,
4,Yamazaki Tower Narrow Entryway Console Table,$79.00,https://www.example.com/s/home-decor/home-decor-new-arrivals/yamazaki-tower-narrow-entryway-console-table/12d?productId=11024476,images/yamazakitowernarrowentrywayconsoletable.jpg,"css-1xx16ea, css-abqf3x"


In [4]:
df.isna().sum()

Title            0
Price            0
Link             0
Image Path       0
Colors        6138
dtype: int64

In [5]:
df = df[['Title','Price','Colors','Link']]

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8170 entries, 0 to 8169
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Title   8170 non-null   object
 1   Price   8170 non-null   object
 2   Colors  2032 non-null   object
 3   Link    8170 non-null   object
dtypes: object(4)
memory usage: 255.4+ KB


# Displaying Dataset

In [7]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,The Container Store Samson Faux Leather Bin,$89.99 – $99.99,"css-1jtiraa, css-1xx16ea",https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-samson-faux-leather-bin/12d?productId=11023148
1,Zafferano Poldina Pro Wireless Lamp,$169.00,"css-abqf3x, css-1ljdx5i, css-1xx16ea",https://www.example.com/s/home-decor/home-decor-new-arrivals/zafferano-poldina-pro-wireless-lamp/12d?productId=11024636
2,The Container Store Rattan Bins with Lid,$109.99 – $179.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-rattan-bins-with-lid/12d?productId=11023133
3,The Container Store Pacific Bin,$29.99 – $39.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-pacific-bin/12d?productId=11023713
4,Yamazaki Tower Narrow Entryway Console Table,$79.00,"css-1xx16ea, css-abqf3x",https://www.example.com/s/home-decor/home-decor-new-arrivals/yamazaki-tower-narrow-entryway-console-table/12d?productId=11024476


# Droping Duplicate

In [8]:
df.drop_duplicates(subset = "Title",inplace=True)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4252 entries, 0 to 8107
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Title   4252 non-null   object
 1   Price   4252 non-null   object
 2   Colors  975 non-null    object
 3   Link    4252 non-null   object
dtypes: object(4)
memory usage: 166.1+ KB


# Drop Rows in Price Column where it says `See New Price In Cart`

In [10]:
df = df[df['Price'] != "See New Price In Cart"]

# Replacing `$` With Nothing in Price Column

In [11]:
df['Price'] = df['Price'].str.replace('$', '')

In [12]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,The Container Store Samson Faux Leather Bin,89.99 – 99.99,"css-1jtiraa, css-1xx16ea",https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-samson-faux-leather-bin/12d?productId=11023148
1,Zafferano Poldina Pro Wireless Lamp,169.00,"css-abqf3x, css-1ljdx5i, css-1xx16ea",https://www.example.com/s/home-decor/home-decor-new-arrivals/zafferano-poldina-pro-wireless-lamp/12d?productId=11024636
2,The Container Store Rattan Bins with Lid,109.99 – 179.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-rattan-bins-with-lid/12d?productId=11023133
3,The Container Store Pacific Bin,29.99 – 39.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-pacific-bin/12d?productId=11023713
4,Yamazaki Tower Narrow Entryway Console Table,79.00,"css-1xx16ea, css-abqf3x",https://www.example.com/s/home-decor/home-decor-new-arrivals/yamazaki-tower-narrow-entryway-console-table/12d?productId=11024476


# Calculating Average of the Prices Where `-` is present

In [13]:
df['Price'][0]

'89.99 – 99.99'

In [14]:
def clean_and_average_price(price):
    # Regular expression pattern to capture numbers before and after the hyphen
    range_pattern = r'(\d+\.\d+)\s*–\s*(\d+\.\d+)'
    single_price_pattern = r'(\d+\.\d+|\d+)'

    # Check if the price is in range format
    match_range = re.search(range_pattern, price)
    if match_range:
        before_hyphen = float(match_range.group(1))
        after_hyphen = float(match_range.group(2))
        average = (before_hyphen + after_hyphen) / 2
        return average
    
    # Check if the price contains single number with text
    match_single = re.search(single_price_pattern, price)
    if match_single:
        return float(match_single.group(1))
    
    return (price)  # Convert to float if it's a single number without text

# Apply the function to the 'Price' column
df['Price'] = df['Price'].apply(clean_and_average_price)

In [16]:
df['Price']=df['Price'].astype('float')

In [17]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,The Container Store Samson Faux Leather Bin,94.99,"css-1jtiraa, css-1xx16ea",https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-samson-faux-leather-bin/12d?productId=11023148
1,Zafferano Poldina Pro Wireless Lamp,169.0,"css-abqf3x, css-1ljdx5i, css-1xx16ea",https://www.example.com/s/home-decor/home-decor-new-arrivals/zafferano-poldina-pro-wireless-lamp/12d?productId=11024636
2,The Container Store Rattan Bins with Lid,144.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-rattan-bins-with-lid/12d?productId=11023133
3,The Container Store Pacific Bin,34.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-pacific-bin/12d?productId=11023713
4,Yamazaki Tower Narrow Entryway Console Table,79.0,"css-1xx16ea, css-abqf3x",https://www.example.com/s/home-decor/home-decor-new-arrivals/yamazaki-tower-narrow-entryway-console-table/12d?productId=11024476


# Making the Links Right

In [39]:
df['Link']=df['Link'].str.replace('https://www.example.com/','https://www.containerstore.com/')

# Replacing Commas in Colors Column

In [18]:
df['Colors']=df['Colors'].str.replace(',','')

In [19]:
df.head()

Unnamed: 0,Title,Price,Colors,Link
0,The Container Store Samson Faux Leather Bin,94.99,css-1jtiraa css-1xx16ea,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-samson-faux-leather-bin/12d?productId=11023148
1,Zafferano Poldina Pro Wireless Lamp,169.0,css-abqf3x css-1ljdx5i css-1xx16ea,https://www.example.com/s/home-decor/home-decor-new-arrivals/zafferano-poldina-pro-wireless-lamp/12d?productId=11024636
2,The Container Store Rattan Bins with Lid,144.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-rattan-bins-with-lid/12d?productId=11023133
3,The Container Store Pacific Bin,34.99,,https://www.example.com/s/home-decor/home-decor-new-arrivals/the-container-store-pacific-bin/12d?productId=11023713
4,Yamazaki Tower Narrow Entryway Console Table,79.0,css-1xx16ea css-abqf3x,https://www.example.com/s/home-decor/home-decor-new-arrivals/yamazaki-tower-narrow-entryway-console-table/12d?productId=11024476


In [20]:
scaler = StandardScaler()

In [21]:
df['Price'] = scaler.fit_transform(df[['Price']])

In [22]:
df['Colors'] = pd.factorize(df['Colors'])[0]

In [23]:
# Creating feature vectors
features = df[['Colors', 'Price']].values

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features, features)

In [41]:
def recommend_items(item_name, similarity_matrix, df, top_n=10):
    if item_name not in df['Title'].values:
        return f"Item '{item_name}' not found in the dataset."

    item_index = df.index[df['Title'] == item_name].tolist()[0]

    similarity_scores = list(enumerate(similarity_matrix[item_index]))

    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    top_similar_items = similarity_scores[1:top_n+1]

    recommended_items = [df.iloc[item[0]]['Title'] for item in top_similar_items]

    return recommended_items

# Example usage
item_name = 'YouCopia ReStickable 3D Fridge Labels Pack of 10'
recommendations = recommend_items(item_name, similarity_matrix, df)
print(f"Because you viewed {item_name}, you may also like: {', '.join(recommendations)}")


Because you viewed YouCopia ReStickable 3D Fridge Labels Pack of 10, you may also like: Command Large Double Hook, 6-Pocket Mesh Shower Caddy, Chrome Metal Purse Hangers Pkg/6, The Container Store All-Purpose Storage Bag, Large Tall Cabinet Shelf, White Long Grid Stackable Shelf, Erasable Food Storage Labels, Built NY Stainless Steel Utensil Set, Progressive Magnetic Kitchen Scissors, RSVP Silicone Straws with Cleaner Pkg/6


In [42]:
recommendations

['Command Large Double Hook',
 '6-Pocket Mesh Shower Caddy',
 'Chrome Metal Purse Hangers Pkg/6',
 'The Container Store All-Purpose Storage Bag',
 'Large Tall Cabinet Shelf',
 'White Long Grid Stackable Shelf',
 'Erasable Food Storage Labels',
 'Built NY Stainless Steel Utensil Set',
 'Progressive Magnetic Kitchen Scissors',
 'RSVP Silicone Straws with Cleaner Pkg/6']

In [43]:
df[df['Title']==item_name]

Unnamed: 0,Title,Price,Colors,Link
1441,YouCopia ReStickable 3D Fridge Labels Pack of 10,-0.433485,-1,https://www.containerstore.com/s/elfa/complete-your-elfa-space/youcopia-restickable-3d-fridge-labels-pkg~10/12d?productId=11019795


In [44]:
df[df['Title']==item_name]['Link']

1441    https://www.containerstore.com/s/elfa/complete-your-elfa-space/youcopia-restickable-3d-fridge-labels-pkg~10/12d?productId=11019795
Name: Link, dtype: object

In [45]:
[df[df['Title']==rec]['Link'] for rec in recommendations]

[215    https://www.containerstore.com/s/office/new-office-arrivals/command-large-double-hook/12d?productId=11024883
 Name: Link, dtype: object,
 238    https://www.containerstore.com/s/bath/bathroom-new-arrivals/6_pocket-mesh-shower-caddy/12d?productId=11024855
 Name: Link, dtype: object,
 1281    https://www.containerstore.com/s/elfa/complete-your-elfa-space/chrome-metal-purse-hangers/12d?productId=10037214
 Name: Link, dtype: object,
 1282    https://www.containerstore.com/s/elfa/complete-your-elfa-space/the-container-store-all_purpose-storage-bag/12d?productId=11023043
 Name: Link, dtype: object,
 1341    https://www.containerstore.com/s/elfa/complete-your-elfa-space/large-tall-cabinet-shelf/12d?productId=10035747
 Name: Link, dtype: object,
 1367    https://www.containerstore.com/s/elfa/complete-your-elfa-space/white-long-grid-stackable-shelf/12d?productId=11002124
 Name: Link, dtype: object,
 1446    https://www.containerstore.com/s/elfa/complete-your-elfa-space/erasable-food-sto

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Example dataset (replace with your actual dataset loading)
data = {
    'furniture_name': ['Sofa', 'Table', 'Chair', 'Cushion', 'Bed', 'Desk'],
    'color': ['Red', 'Brown', 'White', 'Blue', 'Black', 'Brown'],
    'price': [500, 300, 150, 50, 800, 400]
}

df = pd.DataFrame(data)

# Encoding categorical variables (color)
df['color_encoded'] = pd.factorize(df['color'])[0]

# Scaling numerical features (price)
scaler = StandardScaler()
df['price_scaled'] = scaler.fit_transform(df[['price']])

# Creating feature vectors
features = df[['color_encoded', 'price_scaled']].values

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features, features)

# Function to recommend items
def recommend_items(item_name, similarity_matrix, df, top_n=3):
    item_index = df.index[df['furniture_name'] == item_name][0]
    similarity_scores = list(enumerate(similarity_matrix[item_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_similar_items = similarity_scores[1:top_n+1]  # Exclude itself, get top N similar items
    recommended_items = [df.iloc[item[0]]['furniture_name'] for item in top_similar_items]
    return recommended_items

# Example usage
item_name = 'Sofa'
recommendations = recommend_items(item_name, similarity_matrix, df)
print(f"Because you viewed {item_name}, you may also like: {', '.join(recommendations)}")
