In [1]:
import pandas as pd
from surprise import SVDpp
from surprise import Reader, Dataset
import random
import numpy as np
from surprise import SVDk 
from datetime import datetime
from sklearn import metrics
import matplotlib
matplotlib.use('nbagg')
import pickle
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import seaborn as sns
sns.set_style('whitegrid')
import os
from sklearn.preprocessing import MinMaxScaler
from scipy import sparse
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import random

In [2]:
#reading given order_data.csv provided to us
df_order=pd.read_csv('order_data.csv')
df_order.head()

Unnamed: 0,customer_id,order_id,restaurant_id,ordered_time,pickedup_time,delivered_time,discount_flag,customer_geohash,gmv_total,items,post_status
0,13858744,234741430,337897,2024-03-22 21:09:46.000,2024-03-22 21:26:04.200,2024-03-22 21:33:43.885,0,4nxfvt5,465.0,"[106613733, 100356997]",Completed
1,22274504,281286557,139754,2024-03-22 11:33:14.000,2024-03-22 11:49:09.361,2024-03-22 12:02:12.945,0,4nxfvsm,252.0,[102024570],Completed
2,20035394,308688142,266960,2024-03-22 12:27:16.000,2024-03-22 12:52:49.123,2024-03-22 13:20:11.044,0,4nxfu3e,388.0,"[105044875, 104512619, 107196374]",Completed
3,16663800,292290919,204367,2024-03-22 10:10:27.000,2024-03-22 10:33:56.868,2024-03-22 11:03:18.933,0,4nxfeq3,378.0,"[107422570, 102948856]",Completed
4,18044708,243253068,232022,2024-03-22 20:31:48.000,2024-03-22 20:38:48.520,2024-03-22 20:49:23.977,0,4nxfw5y,303.0,[107962930],Completed


In [3]:
#size of data provided
print(df_order.shape)

(950457, 11)


In [4]:
#reading given items_metadata.csv provided to us
df_item=pd.read_csv('items_metadata.csv')
df_item.head(10)

Unnamed: 0,item_id,dish_family,category,cuisine
0,102927323,pav,snacks - evening,north indian
1,102250682,soft drinks,beverage - branded cola/cold,others
2,103846009,breakfast,snacks- others,north indian
3,105415872,dips,sides,others
4,100610198,combos,combos,continental
5,107638608,chocolates,desserts - others,others
6,105296249,dips,sides,others
7,101319083,chocolates,desserts - pastries,others
8,100331713,mousse,desserts - ice cream,others
9,100805961,vada pav,snacks - evening,south indian


In [5]:
#removing ambiguity of Item_id
df_item.drop_duplicates(subset='item_id')



Unnamed: 0,item_id,dish_family,category,cuisine
0,102927323,pav,snacks - evening,north indian
1,102250682,soft drinks,beverage - branded cola/cold,others
2,103846009,breakfast,snacks- others,north indian
3,105415872,dips,sides,others
4,100610198,combos,combos,continental
5,107638608,chocolates,desserts - others,others
6,105296249,dips,sides,others
7,101319083,chocolates,desserts - pastries,others
8,100331713,mousse,desserts - ice cream,others
9,100805961,vada pav,snacks - evening,south indian


In [6]:
#creating item_lookup dataframe that link each Item_id to its description
item_lookup=pd.DataFrame({})
item_lookup['item']=df_item['item_id']
item_lookup['Description']= df_item['item_id'].map(str)+' '+(df_item['dish_family'])+ '-----'+ df_item['category']+' ('+df_item['cuisine']+')'


In [7]:
#converting item_lookup to dictionary for fast lookup while generating recommendations
item_desc_dict=dict(zip(list(item_lookup['item']),list(item_lookup['Description'])))


In [8]:
#dumping 
pickle.dump(item_desc_dict, open("item_desc.p", "wb")) 

In [9]:
#reading preprocessed(train+test) dataset 
df_full=pd.read_csv('full.csv')
df_full.head()


Unnamed: 0,item,user
0,100004385,10000010
1,101719622,10000010
2,103209770,10000010
3,106442689,10000010
4,106442689,10000010


In [None]:
#incorporating count of irder of same item by same user
df_full.groupby(('user','item')).item.agg('count')
u_i_c=pd.DataFrame({'count':df_full.groupby(('user','item')).item.agg('count')}).reset_index()

In [11]:
#binning counts to make counts less skewed
bins = [0, 1,2, 3,8,120]
labels = [1,2,3,4,5]
u_i_c['count'] = pd.cut(u_i_c['count'], bins=bins, labels=labels)


In [12]:
#final dataset format neede to feed to our model
u_i_c[55:65]

Unnamed: 0,user,item,count
55,10000690,101593327,1
56,10000690,102438804,1
57,10000690,105742169,1
58,10000690,106119148,4
59,10000690,108163887,2
60,10000779,105247104,1
61,10000779,105705022,1
62,10000840,100780831,2
63,10000840,107070205,2
64,10001010,100372382,1


In [None]:
#generating SPARSE MATRIX 

user= list(np.sort(u_i_c.user.unique())) # Get our unique customers
item = list(u_i_c.item.unique()) # Get our unique products that were purchased
count = list(u_i_c['count']) # All of our purchases

rows = u_i_c.user.astype('category', categories = user).cat.codes 
# Get the associated row indices
cols = u_i_c.item.astype('category', categories = item).cat.codes 
# Get the associated column indices
purchases_sparse = sparse.csr_matrix((count, (rows, cols)), shape=(len(user), len(item)))

In [14]:
(purchases_sparse)

<170779x160154 sparse matrix of type '<type 'numpy.float64'>'
	with 1412789 stored elements in Compressed Sparse Row format>

In [15]:
#TRAINING DATA MAKING


def make_train(ratings, pct_test = 0.2):
    test_set = ratings.copy() # Make a copy of the original set to be the test set. 
    test_set[test_set != 0] = 1 # Store the test set as a binary preference matrix
    training_set = ratings.copy() # Make a copy of the original data we can alter as our training set. 
    nonzero_inds = training_set.nonzero() # Find the indices in the ratings data where an interaction exists
    nonzero_pairs = list(zip(nonzero_inds[0], nonzero_inds[1])) # Zip these pairs together of user,item index into list
    random.seed(0) # Set the random seed to zero for reproducibility
    num_samples = int(np.ceil(pct_test*len(nonzero_pairs))) # Round the number of samples needed to the nearest integer
    samples = random.sample(nonzero_pairs, num_samples) # Sample a random number of user-item pairs without replacement
    user_inds = [index[0] for index in samples] # Get the user row indices
    item_inds = [index[1] for index in samples] # Get the item column indices
    training_set[user_inds, item_inds] = 0 # Assign all of the randomly chosen user-item pairs to zero
    training_set.eliminate_zeros() # Get rid of zeros in sparse array storage after update to save space
    return training_set, test_set, list(set(user_inds)) # Output the unique list of user rows that were altered  

In [16]:
product_train, product_test, product_users_altered = make_train(purchases_sparse, pct_test = 0.2)

In [17]:
matrix_size = purchases_sparse.shape[0]*purchases_sparse.shape[1] # Number of possible interactions in the matrix
num_purchases = len(purchases_sparse.nonzero()[0]) # Number of items interacted with
sparsity = 100*(1 - (num_purchases/matrix_size))
sparsity

100

In [18]:
import implicit

In [19]:
#TRAINING MODEL

alpha = 15
user_vecs, item_vecs = implicit.alternating_least_squares((product_train*alpha).astype('double'), 
                                                          factors=40, 
                                                          regularization = 2, 
                                                         iterations = 100)
# training is very fast

No handlers could be found for logger "implicit"
100%|██████████| 100.0/100 [00:54<00:00,  2.02it/s]


In [20]:
customers_arr = np.array(user) # Array of customer IDs from the ratings matrix
products_arr = np.array(item) # Array of product IDs from the ratings matrix

In [21]:
customers_arr

array([10000010, 10000033, 10000130, ..., 24995288, 24995290, 24995439])

In [22]:

def auc_score(predictions, test):

    fpr, tpr, thresholds = metrics.roc_curve(test, predictions)
    return metrics.auc(fpr, tpr)   

def calc_mean_auc(training_set, altered_users, predictions, test_set):
    store_auc = [] 
    popularity_auc = [] 
    pop_items = np.array(test_set.sum(axis = 0)).reshape(-1) 
    item_vecs = predictions[1]
    for user in altered_users: 
        training_row = training_set[user,:].toarray().reshape(-1) 
        zero_inds = np.where(training_row == 0) 
        user_vec = predictions[0][user,:]
        pred = user_vec.dot(item_vecs).toarray()[0,zero_inds].reshape(-1)
        actual = test_set[user,:].toarray()[0,zero_inds].reshape(-1) 
        pop = pop_items[zero_inds] 
        store_auc.append(auc_score(pred,actual))
        popularity_auc.append(auc_score(pop, actual))
        return float('%.3f'%np.mean(store_auc)), float('%.3f'%np.mean(popularity_auc))  


In [23]:
calc_mean_auc(product_train, product_users_altered, 
              [sparse.csr_matrix(user_vecs), sparse.csr_matrix(item_vecs.T)], product_test)


(0.859, 0.736)

In [24]:
def get_items_purchased(customer_id, mf_train, customers_list, products_list, item_lookup):
    cust_ind = np.where(customers_list == customer_id)[0][0] # Returns the index row of our customer id
    purchased_ind = mf_train[cust_ind,:].nonzero()[1] # Get column indices of purchased items
    prod_codes = products_list[purchased_ind] # Get the stock codes for our purchased items
    return item_lookup.loc[item_lookup.item.isin(prod_codes)]

In [25]:
customers_arr[10:20]


array([10000690, 10000779, 10000840, 10001010, 10001073, 10001147,
       10001163, 10001220, 10001491, 10001756])

In [26]:
get_items_purchased(10001756, product_train, customers_arr, products_arr, item_lookup)


Unnamed: 0,item,Description
56252,105597752,105597752 fry-----starters (mexican)
65919,100842798,100842798 tart-----desserts - pastries (others)
91341,103397969,103397969 tandoor-----starters (north indian)
127904,101680907,101680907 lollipops-----starters (chinese)
133790,101238606,101238606 thalis-----thali (north indian)


In [27]:


def rec_items(customer_id, mf_train, user_vecs, item_vecs, customer_list, item_list, item_lookup, num_items = 10):
    
    cust_ind = np.where(customer_list == customer_id)[0][0] # Returns the index row of our customer id
    pref_vec = mf_train[cust_ind,:].toarray() # Get the ratings from the training set ratings matrix
    pref_vec = pref_vec.reshape(-1) + 1 # Add 1 to everything, so that items not purchased yet become equal to 1
    pref_vec[pref_vec > 1] = 0 # Make everything already purchased zero
    rec_vector = user_vecs[cust_ind,:].dot(item_vecs.T) # Get dot product of user vector and all item vectors
    # Scale this recommendation vector between 0 and 1
    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0] 
    recommend_vector = pref_vec*rec_vector_scaled 
    # Items already purchased have their recommendation multiplied by zero
    product_idx = np.argsort(recommend_vector)[::-1][:num_items] # Sort the indices of the items into order 
    # of best recommendations
    rec_list = [] # start empty list to store items
    for index in product_idx:
        code = item_list[index]
        rec_list.append([code, item_lookup.Description.loc[item_lookup.item == code].iloc[0]]) 
        # Append our descriptions to the list
    codes = [item[0] for item in rec_list]
    descriptions = [item[1] for item in rec_list]
    final_frame = pd.DataFrame({'item': codes, 'Description': descriptions}) # Create a dataframe 
    return final_frame[['item', 'Description']] # Switch order of columns around

In [28]:
recomm=rec_items(10001756, product_train, user_vecs, item_vecs, customers_arr, products_arr, item_lookup,
                       num_items = 10)


In [29]:
#generating recommendations
print(recomm)

        item                                        Description
0  102059685  102059685 shawarma-----snacks - evening (arabian)
1  103708886  103708886 biryani-----main course - standalone...
2  102465089  102465089 burgers & hotdogs-----main course - ...
3  105932517          105932517 others-----starters (chettinad)
4  107366620  107366620 indian breads-----main course - comp...
5  103497749    103497749 sweets-----desserts - sweets (others)
6  104191855   104191855 brownie-----desserts - others (others)
7  102623496      102623496 tandoor-----starters (north indian)
8  102207981  102207981 burgers & hotdogs-----main course - ...
9  102509979  102509979 sweets-----main course - complement/...


## SERVER RUNNING

In [30]:
file = open("item_desc.p",'r')
item_desc = pickle.load(file)

file = open("item_image.p",'r')
item_image = pickle.load(file)

In [31]:
from flask import Flask, render_template, redirect, url_for, request
import numpy as np

In [32]:
app = Flask(__name__)
@app.route("/index1/<value>")
def index1(value):
    pastOrder = get_items_purchased(int(value), product_train, customers_arr, products_arr, item_lookup)
    recomm = rec_items(int(value), product_train, user_vecs, item_vecs, customers_arr, products_arr, item_lookup,
                       num_items = 10)
    image_name_r={}
    image_name_po={}
    des_r = {}
    des_po={}
    for i in range(len(recomm['item'])):
        indx1 = int(recomm['item'].iloc[i])
        image_name_r[i] = '/'+item_image[indx1]
        des_r[i]=recomm['Description'].iloc[i]
    for i in range(len(pastOrder['item'])):
        indx2 = (pastOrder['item'].iloc[i])
        image_name_po[i] = '/'+item_image[indx2] 
        des_po[i]=pastOrder['Description'].iloc[i]
        
    return render_template('index1.html',filename=image_name_r,filename1=image_name_po,vals=value,des=des_r,des1=des_po)

In [33]:
# Route for handling the login page logic
@app.route('/', methods=['GET', 'POST'])
def login():
    error = None
    if request.method == 'POST':
        val = request.form['username']
        return redirect(url_for('index1',value=val))
    return render_template('login.html', error=error)

In [None]:
if __name__ == "__main__":
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


ERROR:flask.app:Exception on /index1/alt="Northern [GET]
Traceback (most recent call last):
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 2292, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1815, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1718, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1813, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1799, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "<ipython-input-32-6a5477173cde>", line 4, in index1
    pastOrder = get_items_purchased(int(value), product_train, customers_arr, products_arr

ERROR:flask.app:Exception on /index1/alt="Northern [GET]
Traceback (most recent call last):
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 2292, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1815, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1718, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1813, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1799, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "<ipython-input-32-6a5477173cde>", line 4, in index1
    pastOrder = get_items_purchased(int(value), product_train, customers_arr, products_arr

ERROR:flask.app:Exception on /index1/alt="Cinque [GET]
Traceback (most recent call last):
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 2292, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1815, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1718, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1813, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/chaikesh/anaconda2/lib/python2.7/site-packages/flask/app.py", line 1799, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "<ipython-input-32-6a5477173cde>", line 4, in index1
    pastOrder = get_items_purchased(int(value), product_train, customers_arr, products_arr, 