In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from fuzzywuzzy import process
from collections import Counter
from collections import defaultdict
from ipywidgets import interact, widgets
from IPython.display import display

users_df = pd.read_csv('users.csv')
users_df.set_index('user_id', inplace=True)

shoes_data = pd.read_csv('shoes.csv')

shoes_data = shoes_data.astype(str)

shoes_data['combined_text'] = shoes_data['shoe_name'] + ' ' + shoes_data['shoe_brand'] + ' ' + shoes_data['shoe_description']

tfidf_vectorizer = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf_vectorizer.fit_transform(shoes_data['combined_text'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


#====================Part 1: Rating and VoteCount ======================================
def high_rating():
    shoes_data = pd.read_csv('shoes.csv')
    df_shoes = shoes_data[['shoe_rating', 'rate_count']]

    C = df_shoes['shoe_rating'].mean()

    m = df_shoes['rate_count'].min() 

    # Function that computes the weighted rating of each movie
    def weighted_rating(x, m = m, C = C):

        v = x['rate_count']
        R = x['shoe_rating']

        # Calculation based on the IMDB formula
        return (v / (v + m) * R) + (m / (m + v) * C)

    shoes_data['score'] = shoes_data.apply(weighted_rating, axis=1)

    shoes_data = shoes_data.sort_values('score', ascending=False)
    top_5_shoes = shoes_data[['shoe_id', 'shoe_name','shoe_brand','score']].head(5).values.tolist()
    #top_5_shoes_ids = shoes_data[['shoe_id']].head(5).values.tolist()
    return top_5_shoes,list(shoes_data['shoe_id'])[:5]

#===================Part 2: Content-based ===================================

def get_recommendations(query, cosine_sim=cosine_sim):
    query = query.lower()
    
    keywords = query.split()
    
    top_matches = []
    for keyword in keywords:
        matches = process.extractOne(keyword, shoes_data['combined_text'])
        top_matches.append(matches[0])
    
    similarity_scores = cosine_similarity(tfidf_vectorizer.transform(top_matches), tfidf_matrix)
    
    combined_scores = similarity_scores.sum(axis=0)
    
    shoe_indices = combined_scores.argsort()[::-1]
    
    non_zero_indices = [idx for idx in shoe_indices if combined_scores[idx] > 0]
    
    unique_shoe_indices = set()
    
    top_shoes = []
    top_shoe_ids = []  # List to store shoe IDs
    for idx in non_zero_indices:
        description = shoes_data.iloc[idx]['shoe_description'].lower()
        brand = shoes_data.iloc[idx]['shoe_brand'].lower()
        for keyword in keywords:
            if keyword in description or keyword in brand:
                # Check if the shoe index is already in the set
                if idx not in unique_shoe_indices:
                    shoe_details = {
                        'shoe_id': shoes_data.iloc[idx]['shoe_id'],
                        'shoe_name': shoes_data.iloc[idx]['shoe_name'],
                        'shoe_brand': shoes_data.iloc[idx]['shoe_brand'],
                        'shoe_description': shoes_data.iloc[idx]['shoe_description']
                    }
                    top_shoes.append(shoe_details)
                    top_shoe_ids.append(shoes_data.iloc[idx]['shoe_id'])  # Append shoe ID
                    unique_shoe_indices.add(idx)
                    if len(top_shoes) == 5:
                        return top_shoes, top_shoe_ids
    
    return top_shoes, top_shoe_ids
    

#===========================Part 3: User characteristics ============================================
label_encoders = {}
for column in ['gender', 'race']:
    label_encoders[column] = LabelEncoder()
    users_df[column] = label_encoders[column].fit_transform(users_df[column])

def recommend_shoe(gender_input, race_input, age):
    gender_input = gender_input.lower()
    race_input = race_input.lower()

    gender_map = {'male': 'Male', 'female': 'Female'}
    race_map = {'indian': 'Indian', 'chinese': 'Chinese', 'malay': 'Malay'}

    gender = gender_map.get(gender_input)
    race = race_map.get(race_input)

    encode_gender = label_encoders['gender'].transform([gender])[0]
    encode_race = label_encoders['race'].transform([race])[0]


    target_user_features = [[age, encode_gender, encode_race]]

    similarities = {}
    for user_id in users_df.index:
        user_features = users_df.loc[user_id, ['age', 'gender', 'race']].values.reshape(1, -1)
        similarity = cosine_similarity(target_user_features, user_features)[0][0]
        similarities[user_id] = similarity

    shoe_similarity_scores = defaultdict(list)
    for user_id, similarity_score in similarities.items():
        shoe_id = users_df.loc[user_id, 'shoe_id']
        shoe_similarity_scores[shoe_id].append(similarity_score)
    
    # Calculate average similarity score for each shoe ID
    average_similarity_scores = {}
    for shoe_id, scores in shoe_similarity_scores.items():
        average_similarity_scores[shoe_id] = sum(scores) / len(scores)

    sorted_shoe_ids = sorted(average_similarity_scores.keys(), key=lambda x: average_similarity_scores[x], reverse=True)
    
    top_recommended_shoes = sorted_shoe_ids[:5]
    
    
    similarity_table = pd.DataFrame({
        'Shoe ID': sorted_shoe_ids,
        'Average Similarity Score': [average_similarity_scores[shoe_id] for shoe_id in sorted_shoe_ids]
    })

    print("\nSimilarity Scores Table:")
    display(similarity_table)
    return top_recommended_shoes

#================output top 5 highest rating shoes=======================================

high_rating_shoe,high_rating_shoe_id = high_rating()
df_highest_rating=pd.DataFrame(high_rating_shoe)
df_highest_rating.columns =['Shoe ID','Name','Brand','Weighted Rating']
print("====================================================")
print("||\t   The Top 5 Highest Rating Shoes \t  ||")
print("====================================================")
display(df_highest_rating)
print("The top 5 highest rating shoes are : {}".format(high_rating_shoe_id))

#================input criteria======================================================
print("\n--------------------------------------xxx--------------------------------------\n")
query = input("Enter what kind of shoes you are looking for : ")
recommendations, top_shoe_ids = get_recommendations(query)


#=================print top 5 key-word-related shoes ================================

if recommendations:
        df_recommendations = pd.DataFrame(recommendations)
        df_recommendations.columns = ['Shoe ID', 'Name', 'Brand', 'Description']
        print("\n==================================================================")
        print("||\t Top 5 Recommended Shoes Based on your looking \t\t||")
        print("==================================================================")
        display(df_recommendations)
        print("Top 5 Shoe IDs:", ', '.join(str(shoe_id) for shoe_id in top_shoe_ids))
else:
        print("\nNo shoes found matching the query.")
        

#=================input user's info==========================================================

print("\n--------------------------------------xxx--------------------------------------\n")

# Function to get final recommendations
def get_final_recommendations(recommended_shoe_ids, high_rating_shoe_id, top_shoe_ids, shoes_data):
    # Priority key function
    def priority_key(shoe_id):
        count = shoe_id_counts[shoe_id]
        if count > 2:
            return 1  # Highest priority
        elif count > 1:
            return 2
        elif shoe_id in top_shoe_ids:
            return 3
        elif shoe_id in recommended_shoe_ids:
            return 4
        elif shoe_id in high_rating_shoe_id:
            return 5
        else:
            return 6  # Lowest priority

    # Combine all shoe IDs
    top_shoe_ids = [int(shoe_id) for shoe_id in top_shoe_ids]
    all_shoe_ids = high_rating_shoe_id + top_shoe_ids + recommended_shoe_ids

    shoe_id_counts = Counter(all_shoe_ids)

    sorted_shoe_ids = sorted(set(all_shoe_ids), key=lambda x: (priority_key(x), all_shoe_ids.index(x)))

    shoe_details_list = []

    for i, shoe_id in enumerate(sorted_shoe_ids[:10]):
        shoe_details = shoes_data.loc[shoes_data['shoe_id'] == str(shoe_id)].iloc[0]
        shoe_details_list.append({
            'Shoe ID': shoe_id,
            'Name': shoe_details['shoe_name'],
            'Brand': shoe_details['shoe_brand'],
            'Description': shoe_details['shoe_description']
        })
    df_final_recommendations = pd.DataFrame(shoe_details_list)

    # Display the DataFrame
    print("\n--------------------------------------xxx--------------------------------------\n")
    print("==================================================================")
    print("||\t \t Final 10 Shoes Recommendations \t \t||")
    print("==================================================================")
    display(df_final_recommendations)
    print("\nxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")

# Create widgets for user input
age_widget = widgets.IntSlider(value=20, min=20, max=99, description='Age:', continuous_update=False)
gender_widget = widgets.Dropdown(options=['Male', 'Female'], description='Gender:')
race_widget = widgets.Dropdown(options=['Malay', 'Chinese', 'Indian'], description='Race:')

# Create a button widget
button = widgets.Button(description="Get Recommendations")

# Function to handle button click event
def on_button_clicked(b):
    # Check if all inputs are provided
    if age_widget.value is None or gender_widget.value is None or race_widget.value is None:
        print("Please provide all inputs.")
        return
    
    # Call the recommend_shoe function to get recommendations
    recommended_shoe_ids = recommend_shoe(gender_widget.value, race_widget.value, age_widget.value)

    recommendations_list = []

    for shoe_id in recommended_shoe_ids:
        shoe_details = {
            'Shoe ID': shoe_id,
            'User ID': [],
            'Gender': [],
            'Age': [],
            'Race': []
        }

        users_with_shoe_id = users_df[users_df['shoe_id'] == shoe_id]

        for user_id in users_with_shoe_id.index:
            user_details = users_df.loc[user_id]
            decoded_gender = label_encoders['gender'].inverse_transform([user_details['gender']])[0]
            decoded_race = label_encoders['race'].inverse_transform([user_details['race']])[0]

            # Append user details to the shoe_details dictionary
            shoe_details['User ID'].append(user_id)
            shoe_details['Gender'].append(decoded_gender)
            shoe_details['Age'].append(user_details['age'])
            shoe_details['Race'].append(decoded_race)

        recommendations_list.append(shoe_details)


    df_user_recommendations = pd.DataFrame(recommendations_list)

    # Display the DataFrame
    print("\n==================================================================")
    print("||\t Recommended Shoes with Past Buyers's Information \t||")
    print("==================================================================")
    display(df_user_recommendations)
    print("Top 5 recommended shoe IDs based on your characteristics: {}".format(recommended_shoe_ids))

    # Call the get_final_recommendations function to display final recommendations
    get_final_recommendations(recommended_shoe_ids, high_rating_shoe_id, top_shoe_ids, shoes_data)

# Attach the button click event handler
button.on_click(on_button_clicked)

# Disable the button until all inputs are provided
button.disabled = True

# Function to enable/disable the button based on inputs
def validate_inputs(change):
    if age_widget.value is not None and gender_widget.value is not None and race_widget.value is not None:
        button.disabled = False
    else:
        button.disabled = True

# Attach the function to validate inputs on change events
age_widget.observe(validate_inputs, 'value')
gender_widget.observe(validate_inputs, 'value')
race_widget.observe(validate_inputs, 'value')

# Display the widgets and button
display(age_widget, gender_widget, race_widget,button)

||	   The Top 5 Highest Rating Shoes 	  ||


Unnamed: 0,Shoe ID,Name,Brand,Weighted Rating
0,8,Trail Running Shoes,Salomon,4.752432
1,18,Formal Dress Shoes,Allen Edmonds,4.739412
2,13,Basketball Shoes,Jordan,4.715745
3,2,Running Shoes,Nike,4.712
4,20,Trail Hiking Shoes,Keen,4.64766


The top 5 highest rating shoes are : [8, 18, 13, 2, 20]

--------------------------------------xxx--------------------------------------

Enter what kind of shoes you are looking for : leather

||	 Top 5 Recommended Shoes Based on your looking 		||


Unnamed: 0,Shoe ID,Name,Brand,Description
0,3,Casual Loafers,Sperry,Comfortable slip-on loafers crafted from premi...
1,24,Slip-On Boots,Timberland,Easy-to-wear slip-on boots featuring premium l...
2,14,Boat Shoes,Sperry,Classic boat shoes crafted from durable leathe...
3,23,Leather Sandals,Birkenstock,Premium leather sandals with contoured footbed...
4,18,Formal Dress Shoes,Allen Edmonds,Handcrafted dress shoes made from premium leat...


Top 5 Shoe IDs: 3, 24, 14, 23, 18

--------------------------------------xxx--------------------------------------



IntSlider(value=20, continuous_update=False, description='Age:', max=99, min=20)

Dropdown(description='Gender:', options=('Male', 'Female'), value='Male')

Dropdown(description='Race:', options=('Malay', 'Chinese', 'Indian'), value='Malay')

Button(description='Get Recommendations', disabled=True, style=ButtonStyle())


Similarity Scores Table:


Unnamed: 0,Shoe ID,Average Similarity Score
0,2,0.999889
1,1,0.999795
2,5,0.999649
3,3,0.999633
4,16,0.999628
5,4,0.999612
6,24,0.999463
7,23,0.999172
8,25,0.998755
9,12,0.998455



||	 Recommended Shoes with Past Buyers's Information 	||


Unnamed: 0,Shoe ID,User ID,Gender,Age,Race
0,2,"[3, 13, 27]","[Female, Female, Female]","[27, 25, 28]","[Malay, Malay, Malay]"
1,1,"[2, 11]","[Male, Female]","[45, 29]","[Malay, Malay]"
2,5,"[9, 19, 25]","[Female, Female, Female]","[22, 31, 21]","[Malay, Malay, Malay]"
3,3,[6],[Male],[40],[Malay]
4,16,[26],[Male],[44],[Malay]


Top 5 recommended shoe IDs based on your characteristics: [2, 1, 5, 3, 16]

--------------------------------------xxx--------------------------------------

||	 	 Final 10 Shoes Recommendations 	 	||


Unnamed: 0,Shoe ID,Name,Brand,Description
0,18,Formal Dress Shoes,Allen Edmonds,Handcrafted dress shoes made from premium leat...
1,2,Running Shoes,Nike,High-performance running shoes featuring light...
2,3,Casual Loafers,Sperry,Comfortable slip-on loafers crafted from premi...
3,24,Slip-On Boots,Timberland,Easy-to-wear slip-on boots featuring premium l...
4,14,Boat Shoes,Sperry,Classic boat shoes crafted from durable leathe...
5,23,Leather Sandals,Birkenstock,Premium leather sandals with contoured footbed...
6,1,Classic Sneakers,Adidas,"Classic sneakers with a timeless design, perfe..."
7,5,Formal Oxfords,Johnston & Murphy,Elegant leather oxfords suitable for formal oc...
8,16,Running Sneakers,New Balance,Versatile running sneakers featuring breathabl...
9,8,Trail Running Shoes,Salomon,Trail-ready running shoes featuring aggressive...



xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

