## DATA IMPORT & BASIC EDA

In [1]:
import pandas as pd
import numpy as np

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import spacy

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack
from sklearn.preprocessing import MaxAbsScaler

In [2]:
df = pd.read_csv("product_reviews_dataset.csv")

In [3]:
df.shape

(34660, 9)

In [4]:
df.head(10)

Unnamed: 0,product,source,categories,date,didPurchase,doRecommend,rating,reviews,title
0,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-13T00:00:00.000Z,,True,5.0,This product so far has not disappointed. My c...,brand name
1,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-13T00:00:00.000Z,,True,5.0,great for beginner or experienced person. Boug...,very fast
2,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-13T00:00:00.000Z,,True,5.0,Inexpensive tablet for him to use and learn on...,Beginner tablet for our 9 year old son.
3,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-13T00:00:00.000Z,,True,4.0,I've had my XYZ brand HD 8 two weeks now and I...,Good!!!
4,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-12T00:00:00.000Z,,True,5.0,I bought this for my grand daughter when she c...,Fantastic Tablet for kids
5,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-12T00:00:00.000Z,,True,5.0,This Target XYZ brand 8 inch tablet is the per...,Just what we expected
6,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-12T00:00:00.000Z,,True,4.0,"Great for e-reading on the go, nice and light ...",great e-reader tablet
7,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-12T00:00:00.000Z,,True,5.0,"I gave this as a Christmas gift to my inlaws, ...",Great for gifts
8,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-23T00:00:00.000Z,,True,5.0,Great as a device to read books. I like that i...,Great for reading
9,electronics brand product name Tablet A 10.1 T...,Target,"Electronics,iPad & Tablets,All Tablets,XYZ bra...",2021-01-23T00:00:00.000Z,,True,5.0,I love ordering books and reading them with th...,Great and lightweight reader


In [5]:
df.tail(10)

Unnamed: 0,product,source,categories,date,didPurchase,doRecommend,rating,reviews,title
34650,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2016-03-07T00:00:00.000Z,,,2.0,Although the description states that this will...,Disappointed!
34651,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2021-07-12T00:00:00.000Z,,,5.0,Connects snugly!,Nice contact
34652,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-11-13T00:00:00Z,,,1.0,So I was already not too pleased with Target's...,Ridiculous
34653,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-11-02T00:00:00Z,,,1.0,"As a charger, it works but is nothing special....",Not As Expected
34654,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-10-16T00:00:00Z,,,1.0,This is exactly like any other usb power charg...,Not Necessary
34655,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-09-18T00:00:00Z,,,3.0,This is not appreciably faster than any other ...,Not appreciably faster than any other 1.8A cha...
34656,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-11-21T00:00:00Z,,,1.0,Target should include this charger with the br...,Should be included
34657,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-10-19T00:00:00Z,,,1.0,Love my brand name XYZ brand but I am really d...,Disappointing Charger
34658,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-10-31T00:00:00Z,,,1.0,I was surprised to find it did not come with a...,Not worth the money
34659,,Target,"Computers/Tablets & Networking,Tablet & eBook ...",2012-12-23T00:00:00Z,,,1.0,to spite the fact that i have nothing but good...,as with everyone else


In [6]:
df.dtypes

product         object
source          object
categories      object
date            object
didPurchase     object
doRecommend     object
rating         float64
reviews         object
title           object
dtype: object

In [7]:
# Checking number of unique values present in each column.

column_names = df.columns
unique_values = df.nunique()

print("\nNumber of Unique Values in each Column:")
for col, count in unique_values.items():
    print(f"{col}: {count}")


Number of Unique Values in each Column:
product: 60
source: 6
categories: 44
date: 1078
didPurchase: 1
doRecommend: 2
rating: 5
reviews: 34659
title: 19686


In [8]:
# Checking unique value labels and their occurences for columns with least number of unique values

cols = ["source", "didPurchase", "doRecommend", "rating"]

for col in cols:
    print("\n------- {0} -------\n".format(col))
    print(df[col].value_counts())


------- source -------

source
Target                          28701
Target XYZ brand Tv              5056
Target Echo                       636
Target XYZ brand                  256
Target Digital Services Inc.       10
Target Coco T                       1
Name: count, dtype: int64

------- didPurchase -------

didPurchase
True    1
Name: count, dtype: int64

------- doRecommend -------

doRecommend
True     32682
False     1384
Name: count, dtype: int64

------- rating -------

rating
5.0    23775
4.0     8541
3.0     1499
1.0      410
2.0      402
Name: count, dtype: int64


__All the labels seem meaningful, although the count in the "didPurchase" column is very low.__

__'Date' column can be used to see which products were recommended to users, and if they ended up being purchased after recommendation.__

# Handling missing values.

In [9]:
df.isnull().sum()

product         6760
source             0
categories         0
date              39
didPurchase    34659
doRecommend      594
rating            33
reviews            1
title              6
dtype: int64

__As observed before, the "didPurchase" column have more than 99% missing values. Also, there are significant number of missing__
__values in the "product" column.__

In [10]:
# Dropping 'didPurchase' column
df = df.drop("didPurchase", axis=1)

In [11]:
# Dropping rows with NaN values in 'product' column

df = df.dropna(subset=['product'])

In [12]:
df.isnull().sum()

product          0
source           0
categories       0
date            27
doRecommend    491
rating          32
reviews          1
title            4
dtype: int64

In [13]:
# Impute missing values in 'rating' with median
df['rating'] = df['rating'].fillna(df['rating'].median())

In [14]:
# Imputing NaN values using mode imputation (most frequent value)

mode_value = df['doRecommend'].mode().iloc[0]
df['doRecommend'] = df['doRecommend'].fillna(mode_value)

In [15]:
# Dropping rows with NaN values in 'date, 'reviews' and 'title' columns

df = df.dropna(subset=['date', 'reviews', 'title'])

In [16]:
df.isnull().sum()

product        0
source         0
categories     0
date           0
doRecommend    0
rating         0
reviews        0
title          0
dtype: int64

__Except 'ratings' column, all columns contain textual data. The next step will be to preprocess this data.__

## PREPROCESSING TEXTUAL DATA

In [17]:
# Convert text data to lowercase

text_cols = ['product', 'source', 'categories', 'reviews', 'title']

for col in text_cols:
    df[col] = df[col].apply(lambda x: str(x).lower() if pd.notnull(x) else x)

In [18]:
df.head()

Unnamed: 0,product,source,categories,date,doRecommend,rating,reviews,title
0,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,this product so far has not disappointed. my c...,brand name
1,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,great for beginner or experienced person. boug...,very fast
2,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,inexpensive tablet for him to use and learn on...,beginner tablet for our 9 year old son.
3,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,4.0,i've had my xyz brand hd 8 two weeks now and i...,good!!!
4,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-12T00:00:00.000Z,True,5.0,i bought this for my grand daughter when she c...,fantastic tablet for kids


__There are 55 different products, as shown below.__

In [19]:
print(df['product'].value_counts())

product
xyz brand tablet, 7 display, wi-fi, 8 gb - includes special offers, magenta                                                                                                                                                   10966
retail brand brand name paperwhite - ebook reader - 4 gb - 6 monochrome paperwhite - touchscreen - wi-fi - black,,,                                                                                                            3176
electronics brand product name tablet a 10.1 tablet, 8 hd display, wi-fi, 16 gb - includes special offers, magenta                                                                                                             2814
retail brand xyz brand tv,,,\r\nretail brand xyz brand tv,,,                                                                                                                                                                   2525
electonics brand home\r\nelectonics brand home                                  

__Using regular expressions (regex) to set a format for extracting common entities from different products.__

In [20]:
import re

# function for extracting product features
def extract_product_entities(text):
    entities = []
    
    # Regular expressions for specific entities
    display_regex = r"(\d+in|\d+\.\d+in|\d+ (?:in|inch|display))"  # Display size
    numbers_regex = r"(\d+|\d+\.\d+|\d+ (?:w|ft|th|release))" # Power/wattage
    memory_regex = r"(\d+ gb|\d+gb)"  # Memory
    # color_regex = r"(?i)(?:(?:black|white|red|blue|yellow|green|cyan|tangerine|purple|violet))"  # Color
    device_regex = r"(?i)(?:(?:ipad|tablet|ebook-reader|ebook\s+reader|speaker|tv|charging\s+cover|leather\s+cover|protective\s+case|micro|usb|cable|usb\s+charger|power\s+adapter|echo|keyboard))"  # Device type (case-insensitive)
    other_regex = r"(?i)(?:\b(touchscreen|wi-fi|bluetooth|high-resolution|pagepress\s+sensors|home|portable|kid-proof|android|kids\s+edition||refurbished|generation|alexa)\b)"  # Other keywords
    
    # Extract entities using regex
    entities.extend(re.findall(display_regex, text))
    entities.extend(re.findall(numbers_regex, text))
    entities.extend(re.findall(memory_regex, text))
    # entities.extend(re.findall(color_regex, text))
    entities.extend(re.findall(device_regex, text))
    entities.extend(re.findall(other_regex, text))

    # Clean and lowercase entities
    entities = [e.lower() for e in entities if e]
    
    return entities

In [21]:
# function for extracting source entities
def extract_source_entities(text):
    entities = []
    
    source_regex = r"(?i)(?:(?:target|target\s+coco\s+t|target\s+digital\s+services\s+inc.|target\s+echo|target\s+xyz\s+brand|target\s+xyz\s+brand\s+tv))"
    entities.extend(re.findall(source_regex, text))

    # Clean and lowercase entities
    entities = [e.lower() for e in entities if e]
    
    return entities

In [22]:
# Extracting entitiles from 'product' and 'source' columns

df['product_entities'] = df['product'].apply(extract_product_entities)
df['source_entities'] = df['source'].apply(extract_source_entities)

In [23]:
print("Product Entities:\n", df['product_entities'], "\n ------------------------------------------------ \n")
print("Source Entities:\n", df['source_entities'], "\n ------------------------------------------------ \n")

Product Entities:
 0        [10, 1, 8, 16, 16 gb, tablet, tablet, wi-fi]
1        [10, 1, 8, 16, 16 gb, tablet, tablet, wi-fi]
2        [10, 1, 8, 16, 16 gb, tablet, tablet, wi-fi]
3        [10, 1, 8, 16, 16 gb, tablet, tablet, wi-fi]
4        [10, 1, 8, 16, 16 gb, tablet, tablet, wi-fi]
                             ...                     
27895    [9, 9, micro, usb, cable, micro, usb, cable]
27896    [9, 9, micro, usb, cable, micro, usb, cable]
27897    [9, 9, micro, usb, cable, micro, usb, cable]
27898    [9, 9, micro, usb, cable, micro, usb, cable]
27899                          [9, micro, usb, cable]
Name: product_entities, Length: 27868, dtype: object 
 ------------------------------------------------ 

Source Entities:
 0        [target]
1        [target]
2        [target]
3        [target]
4        [target]
           ...   
27895    [target]
27896    [target]
27897    [target]
27898    [target]
27899    [target]
Name: source_entities, Length: 27868, dtype: object 
 -----------

In [24]:
# Joining entities as strings

df['product_entities'] = df['product_entities'].apply(lambda x: ' '.join(x))
df['source_entities'] = df['source_entities'].apply(lambda x: ' '.join(x))

In [25]:
df.head()

Unnamed: 0,product,source,categories,date,doRecommend,rating,reviews,title,product_entities,source_entities
0,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,this product so far has not disappointed. my c...,brand name,10 1 8 16 16 gb tablet tablet wi-fi,target
1,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,great for beginner or experienced person. boug...,very fast,10 1 8 16 16 gb tablet tablet wi-fi,target
2,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,inexpensive tablet for him to use and learn on...,beginner tablet for our 9 year old son.,10 1 8 16 16 gb tablet tablet wi-fi,target
3,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,4.0,i've had my xyz brand hd 8 two weeks now and i...,good!!!,10 1 8 16 16 gb tablet tablet wi-fi,target
4,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-12T00:00:00.000Z,True,5.0,i bought this for my grand daughter when she c...,fantastic tablet for kids,10 1 8 16 16 gb tablet tablet wi-fi,target


__Next, calculating sentiment scores for product reviews.__

In [26]:
# Function to perform sentiment analysis

sid = SentimentIntensityAnalyzer()

def get_sentiment(text):
    sentiment = sid.polarity_scores(text)
    return sentiment['compound']  # Using compound score as an overall sentiment indicator

In [27]:
# Convert columns to type str 
str_cols = ['product', 'source', 'reviews', 'title']

for col in str_cols:
    df[col] = df[col].astype(str)

__Reason for converting columns to type str:__
__Vader Lexicon's SentimentIntensityAnalyzer() is meant for working with text data. The columns in our dataframe have__
__alphanumeric data, i.e. words and numbers. The numbers are considered as type float() by default in this function.__
__We need these numbers to be considered as part of the text by SentimentIntensityAnalyzer().__

__That's precisely why we have to convert the columns, before calculating polarity scores.__

In [28]:
# Apply sentiment analysis to 'Review' and 'Title' columns
df['review_sentiment'] = df['reviews'].apply(get_sentiment)
df['title_sentiment'] = df['title'].apply(get_sentiment)

__Calculating sentiment scores for the 'title' column may not be beneficial since it has very few words, and might return '0' for most cases. Let's see..__

In [29]:
df.head() 

Unnamed: 0,product,source,categories,date,doRecommend,rating,reviews,title,product_entities,source_entities,review_sentiment,title_sentiment
0,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,this product so far has not disappointed. my c...,brand name,10 1 8 16 16 gb tablet tablet wi-fi,target,0.9194,0.0
1,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,great for beginner or experienced person. boug...,very fast,10 1 8 16 16 gb tablet tablet wi-fi,target,0.8934,0.0
2,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,inexpensive tablet for him to use and learn on...,beginner tablet for our 9 year old son.,10 1 8 16 16 gb tablet tablet wi-fi,target,0.4404,0.0
3,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,4.0,i've had my xyz brand hd 8 two weeks now and i...,good!!!,10 1 8 16 16 gb tablet tablet wi-fi,target,0.9903,0.5826
4,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-12T00:00:00.000Z,True,5.0,i bought this for my grand daughter when she c...,fantastic tablet for kids,10 1 8 16 16 gb tablet tablet wi-fi,target,0.765,0.5574


In [30]:
# # calculating the count of occurrences of '0' in 'title_sentiment'
print(df['title_sentiment'].value_counts().get(0.0000, 0))

4371


__As theorized, about 6.5% of the 'title_sentiment' scores are zero. So, I'll be combining both 'review_sentiment' and 'title_sentiment' scores to create a single feature, with more weightage given to product reviews as they possess more information and returning more accurate scores.__

In [31]:
title_weight = 0.2
review_weight = 0.8

# Calculate weighted average sentiment score
df['combined_sentiment'] = (title_weight * df['title_sentiment'] + review_weight * df['review_sentiment']) / (title_weight + review_weight)

In [32]:
df.head(5)

Unnamed: 0,product,source,categories,date,doRecommend,rating,reviews,title,product_entities,source_entities,review_sentiment,title_sentiment,combined_sentiment
0,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,this product so far has not disappointed. my c...,brand name,10 1 8 16 16 gb tablet tablet wi-fi,target,0.9194,0.0,0.73552
1,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,great for beginner or experienced person. boug...,very fast,10 1 8 16 16 gb tablet tablet wi-fi,target,0.8934,0.0,0.71472
2,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,5.0,inexpensive tablet for him to use and learn on...,beginner tablet for our 9 year old son.,10 1 8 16 16 gb tablet tablet wi-fi,target,0.4404,0.0,0.35232
3,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-13T00:00:00.000Z,True,4.0,i've had my xyz brand hd 8 two weeks now and i...,good!!!,10 1 8 16 16 gb tablet tablet wi-fi,target,0.9903,0.5826,0.90876
4,electronics brand product name tablet a 10.1 t...,target,"electronics,ipad & tablets,all tablets,xyz bra...",2021-01-12T00:00:00.000Z,True,5.0,i bought this for my grand daughter when she c...,fantastic tablet for kids,10 1 8 16 16 gb tablet tablet wi-fi,target,0.765,0.5574,0.72348


__The main features for model building will be 'rating', 'product_entities' and 'combined_sentiment'.__

__Not using 'source_entities' since it only has a single value ("Target") which isn't useful for recommendation in this case.__

__As for categories, they could be used similarly to product entities for content-based filtering. However, product entities might provide a more granular representation of product features.__

# Content-Based Filtering

In [50]:
text_features = df['product_entities'] + ' ' + df['reviews'] + ' ' + df['title']
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(text_features)
content_cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [64]:
# Function to get recommendations based on content similarity
def content_filtering(product_name, cosine_sim, top_n=5):
    idx = df[df['product'] == product_name].index[0]
    
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    product_indices = [i[0] for i in sim_scores]

    # Filter out the input product name from recommendations
    similar_products_content = df['product'].iloc[product_indices]
    similar_products_content = similar_products_content[similar_products_content != product_name]
    
    return similar_products_content.unique()[:top_n]

In [65]:
# Example usage
product_name = 'electronics brand product name tablet a 10.1 tablet, 8 hd display, wi-fi, 16 gb - includes special offers, magenta'
recommendations = content_filtering(product_name, content_cosine_sim)
print("Recommendations for", product_name, ": \n", recommendations.tolist())

Recommendations for electronics brand product name tablet a 10.1 tablet, 8 hd display, wi-fi, 16 gb - includes special offers, magenta : 
 ['xyz brand tablet, 7 display, wi-fi, 8 gb - includes special offers, magenta', 'xyz brand kids edition tablet, 7 display, wi-fi, 16 gb, green kid-proof case']


__The function returns 2 unique products, related to the product name given.__

# Collaborative Filtering

In [62]:
def collaborative_filtering(product_name, data, top_n=5):
    # Create a rating matrix
    rating_matrix = data.groupby('product')['rating'].apply(list).apply(pd.Series).fillna(0)
    
    # Extracting the combined sentiment scores for all products
    product_sentiments = data.set_index('product')['combined_sentiment']
    
    # Calculate cosine similarity between product ratings
    cosine_sim = cosine_similarity(rating_matrix, rating_matrix)
    
    # Get index of the input product
    product_index = rating_matrix.index.get_loc(product_name)
    
    # Get indices of the top similar products (excluding the input product itself)
    similar_product_indices = cosine_sim[product_index].argsort()[::-1][1:top_n+1]
    
    # Get names of the top similar products
    similar_products = rating_matrix.iloc[similar_product_indices].index.tolist()
    
    return similar_products

In [63]:
# Example usage
product_name = 'retail brand brand name paperwhite - ebook reader - 4 gb - 6 monochrome paperwhite - touchscreen - wi-fi - black,,,'
similar_products = collaborative_filtering(product_name, df)
print("Top similar products to", product_name, ":\n", similar_products)

Top similar products to retail brand brand name paperwhite - ebook reader - 4 gb - 6 monochrome paperwhite - touchscreen - wi-fi - black,,, :
 ['electronics brand product name tablet a 10.1 tablet, 8 hd display, wi-fi, 16 gb - includes special offers, magenta', 'retail brand xyz brand tv,,,\r\nretail brand xyz brand tv,,,', 'electonics brand home\r\nelectonics brand home', 'xyz brand kids edition tablet, 7 display, wi-fi, 16 gb, green kid-proof case', '\nelectonics brand home']


__The function returns 5 unique products.__

# Hybrid model

In [66]:
def hybrid_recommendations(product_name, df, top_n=5):
    
    # Content-Based Filtering
    text_features = df['product_entities'] + ' ' + df['reviews'] + ' ' + df['title']
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(text_features)
    content_cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    content_recommendations = content_filtering(product_name, content_cosine_sim, top_n)
    
    # Collaborative Filtering
    collaborative_recommendations = collaborative_filtering(product_name, df, top_n)
    
    # Combine recommendations
    hybrid_recommendations = list(set(content_recommendations) | set(collaborative_recommendations))
    
    # Exclude the input product from recommendations
    hybrid_recommendations = [prod for prod in hybrid_recommendations if prod != product_name]
    
    return hybrid_recommendations[:top_n]

In [68]:
# Example usage
product_name = 'brand new electronics brand ipad16gb 7 ips display tablet wifi 16 gb blue,,,'
hybrid_recs = hybrid_recommendations(product_name, df)
print("Hybrid Recommendations for", product_name, ":\n", hybrid_recs)

Hybrid Recommendations for brand new electronics brand ipad16gb 7 ips display tablet wifi 16 gb blue,,, :
 ['\nelectonics brand home', 'electronics brand product name tablet a 10.1 tablet, 8 hd display, wi-fi, 16 gb - includes special offers, magenta', 'xyz brand kids edition tablet, 7 display, wi-fi, 16 gb, green kid-proof case', 'electonics brand home\r\nelectonics brand home', 'brand name voyage e-reader, 6 high-resolution display (300 ppi) with adaptive built-in light, pagepress sensors, wi-fi - includes special offers,']


__Successfully recommended products using the hybrid approach - combining content-based and collaborative-based filtering.__