In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import pandas as pd 
import numpy as np

# Importing db of food items across all canteens registered on the platform
df1=pd.read_csv('./db/food.csv')
df1.columns = ['food_id','title','canteen_id','price', 'num_orders', 'category', 'avg_rating', 'num_rating', 'tags']
df1

Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags
0,1,Chole,1,30,35,Indian,3.9,10,"veg, spicy, healthy, hot"
1,2,Masala Dosa,1,25,40,Indian,3.8,15,"veg, healthy, hot"
2,3,Vadapav,1,25,70,Indian,3.0,10,"veg, spicy, hot"
3,4,Gujarati Thali,1,30,25,Indian,2.5,5,"veg, healthy"
4,5,Manchurian,1,60,50,Chinese,4.6,30,"veg, spicy, hot"
5,6,Noodles,1,80,40,Chinese,4.2,28,"veg, spicy, hot"
6,7,Chai,1,10,70,Beverage,4.0,100,"drink, sweet, hot"
7,8,Cofee,1,30,25,Beverage,3.5,50,"drink, sweet, hot"
8,9,Sp.Cofee,1,40,50,Beverage,4.8,30,"drink, sweet, cold"
9,10,Cold Drink,1,20,65,Beverage,4.2,28,"drink, sweet, cold"


In [3]:
# mean of average ratings of all items
C= df1['avg_rating'].mean()

# the minimum number of votes required to appear in recommendation list, i.e, 60th percentile among 'num_rating'
m= df1['num_rating'].quantile(0.6)

# items that qualify the criteria of minimum num of votes
q_items = df1.copy().loc[df1['num_rating'] >= m]

# Calculation of weighted rating based on the IMDB formula
def weighted_rating(x, m=m, C=C):
    v = x['num_rating']
    R = x['avg_rating']
    return (v/(v+m) * R) + (m/(m+v) * C)

# Applying weighted_rating to qualified items
q_items['score'] = q_items.apply(weighted_rating, axis=1)
#print("q_items",q_items)

# Shortlisting the top rated items and popular items
top_rated_items = q_items.sort_values('score', ascending=False)
pop_items= df1.sort_values('num_orders', ascending=False)
#print("top_rated_items",top_rated_items)
#print("pop_items",pop_items)

In [4]:
# Display results of demographic filtering
top_rated_items[['title', 'num_rating', 'avg_rating', 'score']].head()
pop_items[['title', 'num_orders']].head()
Highestrated=top_rated_items.to_csv('C:/Users/Lenovo/Desktop/Recommendations/Highestrated.csv', index=False)
maximumOrdered=pop_items.to_csv('C:/Users/Lenovo/Desktop/Recommendations/maximumOrdered.csv', index=False)

Unnamed: 0,title,num_rating,avg_rating,score
8,Sp.Cofee,30,4.8,4.334694
4,Manchurian,30,4.6,4.232653
6,Chai,100,4.0,3.96646
7,Cofee,50,3.5,3.627919


Unnamed: 0,title,num_orders
2,Vadapav,70
6,Chai,70
9,Cold Drink,65
4,Manchurian,50
8,Sp.Cofee,50


In [5]:
# TODO: clean data

# Creating soup string for each item
def create_soup(x):            
    tags = x['tags'].lower().split(', ')
    tags.extend(x['title'].lower().split())
    tags.extend(x['category'].lower().split())
    print(tags)
    return " ".join(sorted(set(tags), key=tags.index))

df1['soup'] = df1.apply(create_soup, axis=1)
df1.head(3)

['veg', 'spicy', 'healthy', 'hot', 'chole', 'indian']
['veg', 'healthy', 'hot', 'masala', 'dosa', 'indian']
['veg', 'spicy', 'hot', 'vadapav', 'indian']
['veg', 'healthy', 'gujarati', 'thali', 'indian']
['veg', 'spicy', 'hot', 'manchurian', 'chinese']
['veg', 'spicy', 'hot', 'noodles', 'chinese']
[' drink', 'sweet', 'hot', 'chai', 'beverage']
['drink', 'sweet', 'hot', 'cofee', 'beverage']
['drink', 'sweet', 'cold', 'sp.cofee', 'beverage']
['drink', 'sweet', 'cold', 'cold', 'drink', 'beverage']


Unnamed: 0,food_id,title,canteen_id,price,num_orders,category,avg_rating,num_rating,tags,soup
0,1,Chole,1,30,35,Indian,3.9,10,"veg, spicy, healthy, hot",veg spicy healthy hot chole indian
1,2,Masala Dosa,1,25,40,Indian,3.8,15,"veg, healthy, hot",veg healthy hot masala dosa indian
2,3,Vadapav,1,25,70,Indian,3.0,10,"veg, spicy, hot",veg spicy hot vadapav indian


In [6]:
# Import CountVectorizer and create the count matrix
from sklearn.feature_extraction.text import CountVectorizer
count = CountVectorizer(stop_words='english')

df1['soup']
count_matrix = count.fit_transform(df1['soup'])

# Compute the Cosine Similarity matrix based on the count_matrix
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

indices_from_title = pd.Series(df1.index, index=df1['title'])
indices_from_food_id = pd.Series(df1.index, index=df1['food_id'])

0    veg spicy healthy hot chole indian
1    veg healthy hot masala dosa indian
2          veg spicy hot vadapav indian
3     veg healthy gujarati thali indian
4      veg spicy hot manchurian chinese
5         veg spicy hot noodles chinese
6         drink sweet hot chai beverage
7        drink sweet hot cofee beverage
8    drink sweet cold sp.cofee beverage
9             drink sweet cold beverage
Name: soup, dtype: object

In [7]:
# Function that takes in food title or food id as input and outputs most similar dishes 
def get_recommendations(title="", cosine_sim=cosine_sim, idx=-1):
    # Get the index of the item that matches the title
    if idx == -1 and title != "":
        idx = indices_from_title[title]

    # Get the pairwsie similarity scores of all dishes with that dish
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the dishes based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar dishes
    sim_scores = sim_scores[1:3]

    # Get the food indices
    food_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar dishes
    return food_indices

In [8]:
rec=df1.loc[get_recommendations(title="Chai")]
Recommendationfrom1dish=rec.to_csv('C:/Users/Lenovo/Desktop/Recommendations/Recommendationfrom1dish.csv', index=False)

In [9]:
# fetch few past orders of a user, based on which personalized recommendations are to be made
def get_latest_user_orders(user_id, orders, num_orders=3):
    counter = num_orders
    order_indices = []
    
    for index, row in orders[['user_id']].iterrows():
        if row.user_id == user_id:
            counter = counter -1
            order_indices.append(index)
        if counter == 0:
            break
            
    return order_indices

# utility function that returns a DataFrame given the food_indices to be recommended
def get_recomms_df(food_indices, df1, columns, comment):
    row = 0
    df = pd.DataFrame(columns=columns)
    
    for i in food_indices:
        df.loc[row] = df1[['title', 'canteen_id', 'price']].loc[i]
        df.loc[row].comment = comment
        row = row+1
    return df

# return food_indices for accomplishing personalized recommendation using Count Vectorizer
def personalised_recomms(orders, df1, user_id, columns, comment="based on your past orders"):
    order_indices = get_latest_user_orders(user_id, orders)
    food_ids = []
    food_indices = []
    recomm_indices = []
    
    for i in order_indices:
        food_ids.append(orders.loc[i].food_id)
    for i in food_ids:
        food_indices.append(indices_from_food_id[i])
    for i in food_indices:
        recomm_indices.extend(get_recommendations(idx=i))
        
    return get_recomms_df(set(recomm_indices), df1, columns, comment)

# Simply fetch new items added by vendor or today's special at home canteen
def get_new_and_specials_recomms(new_and_specials, users, df1, canteen_id, columns, comment="new/today's special item  in your home canteen"):
    food_indices = []
    
    for index, row in new_and_specials[['canteen_id']].iterrows():
        if row.canteen_id == canteen_id:
            food_indices.append(indices_from_food_id[new_and_specials.loc[index].food_id])
            
    return get_recomms_df(set(food_indices), df1, columns, comment)

# utility function to get the home canteen given a user id
def get_user_home_canteen(users, user_id):
    for index, row in users[['user_id']].iterrows():
        if row.user_id == user_id:
            return users.loc[index].home_canteen
    return -1

# fetch items from previously calculated top_rated_items list
def get_top_rated_items(top_rated_items, df1, columns, comment="top rated items across canteens"):
    food_indices = []
    
    for index, row in top_rated_items.iterrows():
        food_indices.append(indices_from_food_id[top_rated_items.loc[index].food_id])
        
    return get_recomms_df(food_indices, df1, columns, comment)

# fetch items from previously calculated pop_items list
def get_popular_items(pop_items, df1, columns, comment="most popular items across canteens"):
    food_indices = []
    
    for index, row in pop_items.iterrows():
        food_indices.append(indices_from_food_id[pop_items.loc[index].food_id])
        
    return get_recomms_df(food_indices, df1, columns, comment)
    

In [10]:
orders = pd.read_csv('./db/orders.csv')
new_and_specials = pd.read_csv('./db/new_and_specials.csv')
users = pd.read_csv('./db/users.csv')

columns = ['title', 'canteen_id', 'price', 'comment']
current_user = 2
current_canteen = get_user_home_canteen(users, current_user)


personalised_recomms(orders, df1, current_user, columns)
get_new_and_specials_recomms(new_and_specials, users, df1, current_canteen, columns)
get_top_rated_items(top_rated_items, df1, columns)
get_popular_items(pop_items, df1, columns)

Unnamed: 0,title,canteen_id,price,comment
0,Vadapav,1,25,based on your past orders
1,Manchurian,1,60,based on your past orders
2,Noodles,1,80,based on your past orders


Unnamed: 0,title,canteen_id,price,comment
0,Masala Dosa,1,25,new/today's special item in your home canteen
1,Manchurian,1,60,new/today's special item in your home canteen


Unnamed: 0,title,canteen_id,price,comment
0,Sp.Cofee,1,40,top rated items across canteens
1,Manchurian,1,60,top rated items across canteens
2,Chai,1,10,top rated items across canteens
3,Cofee,1,30,top rated items across canteens


Unnamed: 0,title,canteen_id,price,comment
0,Vadapav,1,25,most popular items across canteens
1,Chai,1,10,most popular items across canteens
2,Cold Drink,1,20,most popular items across canteens
3,Manchurian,1,60,most popular items across canteens
4,Sp.Cofee,1,40,most popular items across canteens
5,Masala Dosa,1,25,most popular items across canteens
6,Noodles,1,80,most popular items across canteens
7,Chole,1,30,most popular items across canteens
8,Gujarati Thali,1,30,most popular items across canteens
9,Cofee,1,30,most popular items across canteens


In [11]:
a=personalised_recomms(orders, df1, current_user, columns)
b=get_new_and_specials_recomms(new_and_specials, users, df1, current_canteen, columns)
c=get_top_rated_items(top_rated_items, df1, columns)
d=get_popular_items(pop_items, df1, columns)

a.to_csv('C:/Users/Lenovo/Desktop/Recommendations/personalised_recomms.csv', index=False)
b.to_csv('C:/Users/Lenovo/Desktop/Recommendations/get_new_and_specials_recomms.csv', index=False)
c.to_csv('C:/Users/Lenovo/Desktop/Recommendations/get_top_rated_items.csv', index=False)
d.to_csv('C:/Users/Lenovo/Desktop/Recommendations/get_popular_items.csv', index=False)

In [12]:
personalised_recomms(orders, df1, current_user, columns)
get_new_and_specials_recomms(new_and_specials, users, df1, current_canteen, columns)
get_top_rated_items(top_rated_items, df1, columns)
get_popular_items(pop_items, df1, columns)

Unnamed: 0,title,canteen_id,price,comment
0,Vadapav,1,25,based on your past orders
1,Manchurian,1,60,based on your past orders
2,Noodles,1,80,based on your past orders


Unnamed: 0,title,canteen_id,price,comment
0,Masala Dosa,1,25,new/today's special item in your home canteen
1,Manchurian,1,60,new/today's special item in your home canteen


Unnamed: 0,title,canteen_id,price,comment
0,Sp.Cofee,1,40,top rated items across canteens
1,Manchurian,1,60,top rated items across canteens
2,Chai,1,10,top rated items across canteens
3,Cofee,1,30,top rated items across canteens


Unnamed: 0,title,canteen_id,price,comment
0,Vadapav,1,25,most popular items across canteens
1,Chai,1,10,most popular items across canteens
2,Cold Drink,1,20,most popular items across canteens
3,Manchurian,1,60,most popular items across canteens
4,Sp.Cofee,1,40,most popular items across canteens
5,Masala Dosa,1,25,most popular items across canteens
6,Noodles,1,80,most popular items across canteens
7,Chole,1,30,most popular items across canteens
8,Gujarati Thali,1,30,most popular items across canteens
9,Cofee,1,30,most popular items across canteens
