# Recommending Retail Products with Memory-based model
> Fitting KNN on a retail dataset to recommend retail items to customers

- toc: true
- badges: true
- comments: true
- categories: [KNN, Retail, MongoDB, PrivateData]
- image:

## Setup

In [None]:
!pip install -q dnspython

In [None]:
import os
import dns
import json
import pickle
import numpy as np
import pandas as pd
from pymongo import MongoClient
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

## Data ingestion

In [None]:
client = MongoClient("mongodb+srv://<username>:<password>@cluster0.xxxxx.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
db = client.get_database("OnlineGroceryDB")

for col in db.collection_names():
  cursor = db[col].find()
  pd.DataFrame(list(cursor)).to_csv('{}.csv'.format(col))

!zip retail_data.zip ./*.csv

## Data schema

Let's analyze the full data schema

In [70]:
pd.read_csv("Customer.csv", index_col=0).columns

Index(['_id', 'Customer_id', 'Customer name', 'Date of Birth', 'Gender',
       'Education', 'City', 'STATE', 'ZIP_Code', 'Phone', 'credit_card?',
       'customer_id', 'customer name', 'phone'],
      dtype='object')

In [76]:
import glob
import pandas as pd
from pathlib import Path

pd.set_option('display.expand_frame_repr', False)

schema = pd.DataFrame()

for i, filepath in enumerate(glob.glob("./*.csv")):
  df = pd.read_csv(filepath, index_col=0)
  schema.loc[i,"filename"] = Path(filepath).stem
  schema.loc[i,"shape"] = str(df.shape)
  schema.loc[i,"columns"] = str(list(df.columns))

schema

Unnamed: 0,filename,shape,columns
0,User_details,"(53297, 4)","['_id', 'User_Id', 'password', 'customer_id']"
1,Product_Review,"(2854, 10)","['_id', 'review_id', 'order_id', 'customer_id'..."
2,Customer,"(33639, 14)","['_id', 'Customer_id', 'Customer name', 'Date ..."
3,Order,"(53299, 10)","['_id', 'customer_id', 'order_id', 'order_purc..."
4,Return_Product,"(495, 8)","['_id', 'order_id', '_id (order_return_id)', '..."
5,Order_Item,"(350009, 10)","['_id', 'product_id', 'seller_id', 'order_id',..."
6,Payment,"(53220, 6)","['_id', 'order_id', 'payment_value', 'payment_..."
7,Product_Category,"(75, 5)","['_id', 'product_category_id', 'product_catego..."
8,Offer,"(52256, 8)","['_id', 'order_id', 'product_id', 'offer_name'..."
9,customer_cart,"(8, 4)","['_id', 'customer_id', 'product_id', 'created_..."


> Note: We are only using a small part of the full dataset

## Preprocessing and Modeling

In [None]:
data_path = '/content'

df_Order = pd.read_csv(os.path.join(data_path,'Order.csv'), usecols=['customer_id', 'order_id'])
df_Order_Item = pd.read_csv(os.path.join(data_path,'Order_Item.csv'), usecols=['product_id', 'order_id'])
df_Product_Review = pd.read_csv(os.path.join(data_path,'Product_Review.csv'), usecols=['product_id', 'ratings'])
df_order_order_item = df_Order.merge(df_Order_Item, on='order_id')
Product_rating_by_Customer = df_order_order_item.merge(df_Product_Review, on='product_id') # Change the join key to customer_id once data available
Product_rating_by_Customer.drop_duplicates(subset=['customer_id', 'product_id'], keep='first', inplace=True)
df_User_interaction_mat = Product_rating_by_Customer.pivot(index='product_id', columns='customer_id', values='ratings').fillna(0)
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(csr_matrix(df_User_interaction_mat.values))

def getPrediction(product_id):
  df_product = pd.read_csv(os.path.join(data_path,'Product.csv'))
  query_index = df_User_interaction_mat.index.get_loc(product_id)
  distances, indices = model_knn.kneighbors(df_User_interaction_mat.iloc[query_index, :].values.reshape(1, -1), n_neighbors=6)
  lst=[]
  for i in range(1, len(distances.flatten())):
      lst.append(df_product[df_product.product_id == df_User_interaction_mat.index[indices.flatten()[i]]].set_index('product_id').to_dict(orient="index"))
  return lst

## Inference

In [62]:
getPrediction("PRO012")

[{'PRO571': {'product_category_id': 'CAT020',
   'product_image_path': nan,
   'product_name': 'BERGNER Argent Tri-ply 20 cm, 3.1 L Casserole/ Dutch Ov...',
   'product_price': 3025.0,
   'product_qty': 133}},
 {'PRO716': {'product_category_id': 'CAT022',
   'product_image_path': nan,
   'product_name': 'Wingreens Farms Oregano Leaves (30g)',
   'product_price': 449.0,
   'product_qty': 221}},
 {'PRO119': {'product_category_id': 'CAT009',
   'product_image_path': 'https://rukminim1.flixcart.com/image/612/612/kmf7ki80/toast-rack/0/w/d/1pc-non-stick-wire-cookie-cooling-rack-for-baking-oven-safe-tool-original-imagfbmxkgauhpza.jpeg?q=70',
   'product_name': 'Cooling rack',
   'product_price': 39.0,
   'product_qty': 230}},
 {'PRO449': {'product_category_id': 'CAT018',
   'product_image_path': nan,
   'product_name': 'Eco Kraft Brown Packaging Paper Roll 30 Inch * 5 Mtr 12...',
   'product_price': 210.0,
   'product_qty': 146}},
 {'PRO621': {'product_category_id': 'CAT021',
   'product_imag

In [63]:
getPrediction("PRO010")

[{'PRO707': {'product_category_id': 'CAT022',
   'product_image_path': nan,
   'product_name': 'DietGro Fresh Chilli Flakes & pizza Seasoning',
   'product_price': 198.0,
   'product_qty': 147}},
 {'PRO708': {'product_category_id': 'CAT022',
   'product_image_path': nan,
   'product_name': "chef's art Hot Chilli Flakes Seasoning",
   'product_price': 261.0,
   'product_qty': 151}},
 {'PRO872': {'product_category_id': 'CAT027',
   'product_image_path': nan,
   'product_name': 'Aum Fresh Pav Bhaji | Freeze Dried - Instant Pav Bhaji',
   'product_price': 220.0,
   'product_qty': 191}},
 {'PRO812': {'product_category_id': 'CAT025',
   'product_image_path': nan,
   'product_name': 'MCaffeine Naked & Raw Mattifying Coffee Hand Cream',
   'product_price': 285.0,
   'product_qty': 238}},
 {'PRO534': {'product_category_id': 'CAT019',
   'product_image_path': nan,
   'product_name': 'indulekha Bhringa Hair Oil',
   'product_price': 337.0,
   'product_qty': 219}}]

In [64]:
getPrediction("PRO015")

[{'PRO112': {'product_category_id': 'CAT009',
   'product_image_path': 'https://www.bigbasket.com/media/uploads/p/m/40186892_5-anjali-power-free-blender-green-grey.jpg',
   'product_name': 'Blender',
   'product_price': 149.0,
   'product_qty': 215}},
 {'PRO837': {'product_category_id': 'CAT026',
   'product_image_path': nan,
   'product_name': 'Pack of 12 Safety Prevention Posters for Hospital Offic',
   'product_price': 875.0,
   'product_qty': 204}},
 {'PRO563': {'product_category_id': 'CAT020',
   'product_image_path': nan,
   'product_name': 'Flipkart SmartBuy Induction Bottom Cookware Set of 3',
   'product_price': 899.0,
   'product_qty': 159}},
 {'PRO042': {'product_category_id': 'CAT003',
   'product_image_path': 'https://rukminim1.flixcart.com/image/612/612/kn3i1zk0/pulses/d/5/m/500-chana-dal-500-gram-organic-bengal-gram-chana-dal-chana-dal-original-imagfurtnxwpepvb.jpeg?q=70',
   'product_name': 'Bengal Gram',
   'product_price': 65.0,
   'product_qty': 158}},
 {'PRO492': {'