In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
import numpy as np

In [2]:
def getModel():
    user_df = pd.read_csv("UserData.csv", index_col=0)
    cv = CountVectorizer(max_features=5000,stop_words='english')

    def getSimiliarity(column):
        vector = cv.fit_transform(column).toarray()
        return cosine_similarity(vector)

    def getDistance(column):
        col = column/10
        n = col.size
        res = np.zeros((n, n))
        for i in range(n):
            for j in range(n):
                res[i][j] = (col[i] - col[j]) ** 2
        return res

    prod_sim = getSimiliarity(user_df.prod_name)
    cat_sim = getSimiliarity(user_df.category)
    age_dist = getDistance(user_df.Age)
    price_dist = getDistance(user_df.avg_price)

    #Calculates user similarity
    W_PROD = 7
    W_CAT = 8
    W_AGE = 2
    W_PRICE = 5
    print(age_dist)
    usr_sim = (W_PROD * prod_sim + W_CAT*cat_sim - W_AGE*age_dist - W_PRICE*price_dist) / (W_PROD + W_CAT + W_AGE + W_PRICE)

    prod_df = pd.read_csv("FlipDatasetProcessed.csv", index_col=0)
    WEIGHT = 0.1
    prod_sim = (1 - WEIGHT) * getSimiliarity(prod_df.desc) + WEIGHT * getSimiliarity(prod_df.product_category_tree)
    return (usr_sim, prod_sim)

In [3]:
model = getModel()

[[0.   0.04 0.04 ... 0.01 0.04 0.  ]
 [0.04 0.   0.   ... 0.09 0.   0.04]
 [0.04 0.   0.   ... 0.09 0.   0.04]
 ...
 [0.01 0.09 0.09 ... 0.   0.09 0.01]
 [0.04 0.   0.   ... 0.09 0.   0.04]
 [0.   0.04 0.04 ... 0.01 0.04 0.  ]]


In [4]:
import itertools
prod_df = pd.read_csv("FlipDatasetProcessed.csv", index_col=0)
user_df = pd.read_csv("UserData.csv", index_col=0)

def get_recommendation(model, usr_index):
    user_df = pd.read_csv("UserData.csv", index_col=0)
    prod_df = pd.read_csv("FlipDatasetProcessed.csv", index_col=0)
    sim_users = [index for index, _ in sorted(list(enumerate(model[0][usr_index])),reverse=True,key = lambda x: x[1])[:5]]
    items = []
    gender = user_df.iloc[usr_index]['Gender']
    done = {}

    def is_ok(idx, gender):
        if not prod_df.iloc[idx].show:
            return False
        if prod_df.iloc[idx].product_name in done:
            return False
        desc = prod_df['desc'][idx]
        male = False
        female = False
        for word in desc.split(' '):
            if word in ["woman", "women", "womens", "women's", "girl", "girls", "lady", "ladies"]:
                female = True
            if word in ["man", "men", "mens" "boy", "men's", "boys", "guy", "guys"]:
                male = True
        return male or not female if gender == 'Male' else female or not male

    for sim_usr_ind in sim_users:
        prod_ind = user_df.iloc[sim_usr_ind]['item_index']
        lst = sorted(list(enumerate(model[1][prod_ind])),reverse=True,key = lambda x: x[1])
        filtered = filter(lambda x: is_ok(x[0], gender), lst)
        top10 = itertools.islice(filtered, 5) 

        for ind, _ in top10:
            items.append(ind)
            done[prod_df.iloc[ind].product_name] = True
    return items
        
usr_ind = 4 
print(f"User: {user_df.iloc[usr_ind]}")
print()
rec = get_recommendation(model, usr_ind)
for idx in rec:
    print(f'{prod_df.iloc[idx]["product_name"]} {idx}')


User: Name                                                   Saman Bhoyar
Gender                                                         Male
Age                                                              21
Product Name 1                  Oneplus wireless bullets z neckband
Product Name 2                                          Smartwatch 
Product Name 3                                              Mobile 
Product Name 4                                                Shoes
Product Name 5                                               Laptop
Email address                                  samanb3012@gmail.com
prod_name         Oneplus wireless bullets z neckband Smartwatch...
avg_price                                                   20359.8
category          Footwear Clothes ElectronicGadgets Mobile Comp...
item_index                                                     8866
Name: 16/08/2023 00:03:25, dtype: object

 Amaze Mobile Car Mobile Holder Stand for Xolo A800 2602
 WorldWearF

In [5]:
import pickle as pkl

pkl.dump(model,open('Model.pkl', 'wb'))