<a href="https://colab.research.google.com/github/neha702/Collaborative-shopping/blob/main/User_user_based_Collaborative_filtering_recommendation_engine_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**User-user based collaborative filtering recommendation system**

For this challenge,we have prepared a user-user based collaborative filtering recommendation system for shopping that will provide us the top similar users to a particular user and the recommended items to that user other than what he/she has liked which will help in promortion of more and more items that it has not chosen before but may like now.

**Data preprocessing**

In [1]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#Data outlook
data =pd.read_csv('/content/Users_likes_Dislikes_list.csv')
data.head(15)

Unnamed: 0,Timestamp,Name (with surname),Gender,For males: [Formal shirts],For males: [Jackets],For males: [Casual shirts],For males: [Jeans],For females: [Skirts],For females: [Kurtis],For females: [Trousers],For females: [Tops]
0,4/4/2021 15:40:01,Neha Das,Female,,,,,,1.0,,1.0
1,4/4/2021 15:48:59,Vinayak raj,Male,,1.0,1.0,1.0,,,,
2,4/4/2021 15:57:04,Prapti parasar,Female,,,,,,,1.0,
3,4/4/2021 16:10:51,Tarpan Bhattacharya,Male,1.0,,,1.0,,,,
4,4/4/2021 16:24:48,Ryan,Male,,,1.0,1.0,,,,
5,4/4/2021 17:00:09,Aritra Mondal,Male,,,1.0,1.0,,,,
6,4/4/2021 17:14:00,Sparsh Mehta,Male,1.0,1.0,,1.0,,1.0,1.0,1.0
7,4/4/2021 17:14:13,Rishabh Anand,Male,,,,1.0,,,,
8,4/4/2021 17:34:19,Tarishi Jain,Female,,,,,,,1.0,1.0
9,4/4/2021 17:41:48,Prerna Bhendarkar,Female,,,,,1.0,1.0,1.0,1.0


In [3]:
#Timestamp column is of no use to us, so we will drop it
data.drop(['Timestamp'],inplace=True,axis=1)

In [4]:
#Renaming the columns
data.columns=['Users','Gender','Formal Shirts','Jackets','Casual shirts','Jeans','Skirts','Kurtis',
               'Trousers','Tops']

In [5]:
#Check for missing data in items columns and fill it with 0.
data.isnull().sum()
data=data.fillna(0)
data.isnull().sum()

Users            0
Gender           0
Formal Shirts    0
Jackets          0
Casual shirts    0
Jeans            0
Skirts           0
Kurtis           0
Trousers         0
Tops             0
dtype: int64

In [6]:
#Data overlook after preprocessing
data.head(15)

Unnamed: 0,Users,Gender,Formal Shirts,Jackets,Casual shirts,Jeans,Skirts,Kurtis,Trousers,Tops
0,Neha Das,Female,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1,Vinayak raj,Male,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
2,Prapti parasar,Female,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,Tarpan Bhattacharya,Male,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,Ryan,Male,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
5,Aritra Mondal,Male,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
6,Sparsh Mehta,Male,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0
7,Rishabh Anand,Male,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8,Tarishi Jain,Female,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
9,Prerna Bhendarkar,Female,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0


In [7]:
#Function to find relationship(similarity) between two users using pearson correlation coefficient.
def weight_factor(x, y): 
    t1, t2, t3 = 0, 0, 0 
    for i, j in zip(x, y):
        t1+=i*j
        t2+=i*i
        t3+=j*j
    return t1/(np.sqrt(t2) * np.sqrt(t3))

In [8]:
#First one is active user (Neha Das),we find similarity with other users
import numpy as np
x = data.iloc[0,2:]
similarity = np.array([(data.iloc[i,0],weight_factor(x,data.iloc[i, 2:])) for i in range(1,data.shape[0],1)])
similarity

array([['Vinayak raj', '0.0'],
       ['Prapti parasar', '0.0'],
       ['Tarpan Bhattacharya', '0.0'],
       ['Ryan', '0.0'],
       ['Aritra Mondal', '0.0'],
       ['Sparsh Mehta', '0.5773502691896258'],
       ['Rishabh Anand', '0.0'],
       ['Tarishi Jain', '0.4999999999999999'],
       ['Prerna Bhendarkar', '0.7071067811865475'],
       ['Sakshat Jain', '0.0'],
       ['Nayan Jyoti Bordoloi', '0.0'],
       ['Monalisa Nath', '0.40824829046386296'],
       ['Ramchandra Nath', '0.0'],
       ['Jugami Brahma', '0.6324555320336759'],
       ['Chinmayee Ray', '0.0'],
       ['Rituparna Rajbongshi', '0.7071067811865475'],
       ['Maria Basumatary', '0.7071067811865475'],
       ['Jayashree Devi', '0.4999999999999999'],
       ['Rupshikha Devi', '0.7071067811865475'],
       ['Puja Bhendarkar', '0.40824829046386296'],
       ['Mandira Sharma', '0.4999999999999999'],
       ['Baby Devi Nath', '0.7071067811865475'],
       ['Bhabna Borah', '0.8164965809277259'],
       ['Munu chamuah',

**Sorting neighbours based on similarity weights**

In [9]:
ind = np.argsort( similarity[:,1] )
similarity = similarity[ind]
similarity

array([['Vinayak raj', '0.0'],
       ['Dipesh Roy', '0.0'],
       ['Sheetal Sahu', '0.0'],
       ['Monfroline Gogoi', '0.0'],
       ['Sabuj Saikia', '0.0'],
       ['Debanga Bhuya', '0.0'],
       ['Jabed Akhtar', '0.0'],
       ['Dibyajyoti Patgiri', '0.0'],
       ['Pratik', '0.0'],
       ['Rishiraj Chowdhuri', '0.0'],
       ['Sai Charan', '0.0'],
       ['Nitya Chandak', '0.0'],
       ['Tribhuban Panani', '0.0'],
       ['Aditya Vikram Das', '0.0'],
       ['Nishant Sharma', '0.0'],
       ['Pathikrit Chanda', '0.0'],
       ['Saranga K.Mahanta', '0.0'],
       ['Athul Raj', '0.0'],
       ['Dinesh Pamidi', '0.0'],
       ['Tonoy Boruah', '0.0'],
       ['Snehal Nayan', '0.0'],
       ['Bishal Das', '0.0'],
       ['Rahul Jain', '0.0'],
       ['Purobi Sorong', '0.0'],
       ['Amlan Bora', '0.0'],
       ['Tonmoy Baruah', '0.0'],
       ['Abhishek Jha', '0.0'],
       ['Suman Bhowal', '0.0'],
       ['Debojit Bora', '0.0'],
       ['Rohit Ravi Kumar', '0.0'],
       ['Ram De

**Neigbours based on threshold(Similarity matrix)**

In [10]:
#Sorting the weights in descending order giving us the top neighbours first.
index= np.argsort( similarity[:,1] )
index=index[::-1]
similarity=similarity[index]

In [11]:
#We will be taking neighbours having similarity value(weights) more than 0.5
neighbours = similarity[similarity[:,1].astype(float) > 0.5] 

In [12]:
#Printing the top 10 neighbours of any user( taking here Neha Das)
print('Suggested friends for Neha Das are(based on your choices):')
if (len(neighbours)>10):
   for i in range(0,10,1):  #Top 10 neighbours
       print(neighbours[i][0])   
else:
    for i in range(len(neighbours)):
       print(neighbours[i][0])

Suggested friends for Neha Das are(based on your choices):
Devjani Devi
Roseleen Baruah
Nisha Das
Himasmita das
Himashree Barman
Diksha Nath
Neelima Saharia
Rituparna Roy
Rashmita Das
Bhabna Borah


In [14]:
#Function to find top users and similar items
def model(user):
    recommended_list=[]
    top_list=[]
    x = data.iloc[data.loc[data.Users == user].index[0],2:]
    similar = np.array([(data.iloc[i,0],weight_factor(x,data.iloc[i, 2:])) for i in range(0,data.shape[0],1)])
    index= np.argsort( similar[:,1] )
    index=index[::-1]
    similar=similar[index] 
    neighbours = similar[similar[:,1].astype(float) > 0.6]  #Taking threshold as 0.6
    for i in range(0,len(neighbours),1):
        for j in range(2,len(data.columns),1):
            if data.iloc[data.loc[data.Users == neighbours[i][0]].index[0],j]==1 and data.iloc[data.loc[data.Users == user].index[0],j]==0:
               recommended_list.append(data.columns[j])
    if (len(neighbours)>10):
       for i in range(0,10,1):  #Top 10 neighbours
           top_list.append(neighbours[i][0])
    else:
       for i in range(len(neighbours)):
            top_list.append(neighbours[i][0])
    if user in top_list: #Remove the user of which we are asked to find neighbours,each user is always strongly correlated with itself and its of no use to us.
       top_list.remove(user) #
    
    recommended_array=np.unique(np.array(recommended_list))#
    return top_list,recommended_array
  

In [15]:
#Displaying the model result.
user = input("Enter the user name: ")
top_lists,recommendation_list=model(user)
print(" ")
print("Top users similar to this user are:")
print(" ")
for i in range(0,len(top_lists),1):
    print(top_lists[i])
print(" ")
print("Users similar to this user liked these products too:")
print(" ")
for i in range(0,len(recommendation_list),1):
  print(recommendation_list[i])

Enter the user name: Prapti parasar
 
Top users similar to this user are:
 
Chandni brahma
Bobita Sonuwal
Monfroline Gogoi
Sheetal Sahu
Swarjima Boro
Chinmayee Ray
Mandira Sharma
Sreeja Kadari
P. Choudhury
 
Users similar to this user liked these products too:
 
Formal Shirts
Kurtis
Skirts
Tops


**Pickling the model**

In [16]:
import pickle

In [17]:
#pickling the model
pickle.dump(model,open("model.pkl", 'wb'))
c='Sreeja Kadari'
jb=pickle.load(open("model.pkl","rb"))
top_list,recommended_list=jb(c)
print(" ")
print("Top users similar to this user are:")
print(" ")
for i in range(0,len(top_list),1):
    print(top_list[i])
print(" ")
print("Users similar to this user liked these products too:")
print(" ")
for i in range(0,len(recommended_list),1):
  print(recommended_list[i])

 
Top users similar to this user are:
 
Mandira Sharma
Angita Kar
P. Choudhury
Sandhani Boruah
Tarishi Jain
Bhabna Borah
Neelima Saharia
Kizie Basu
Kavya Chopra
 
Users similar to this user liked these products too:
 
Casual shirts
Jeans
Kurtis
Skirts


**Deploying model**

In [18]:
pip install flask-ngrok

Collecting flask-ngrok
  Downloading https://files.pythonhosted.org/packages/af/6c/f54cb686ad1129e27d125d182f90f52b32f284e6c8df58c1bae54fa1adbc/flask_ngrok-0.0.25-py3-none-any.whl
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [29]:
#app.py
from flask import Flask, jsonify, request, render_template
from flask_ngrok import run_with_ngrok
import pickle

# load model
models = pickle.load(open("model.pkl",'rb'))
# app
app = Flask(__name__,template_folder='template')
run_with_ngrok(app)
@app.route('/')
def home():
    return render_template('recommendation.html')
# routes
@app.route('/pred', methods=['POST'])

def predict():
  if request.method=='POST':
     result=request.form
     query_user_name=result["user"]
     top_neighbours,recommend_list=models(query_user_name)
     return render_template('prediction.html',prediction_text1=top_neighbours,prediction_text2=recommend_list)

if __name__ == '__main__':
    app.run()   

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://f6930f7bdd8f.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [09/Apr/2021 09:15:00] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [09/Apr/2021 09:15:18] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
[2021-04-09 09:18:14,947] ERROR in app: Exception on /pred [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "/usr/local/lib/python3.7/dist-packages/flask/_compat.py", line 39, in reraise
    raise value
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1936, in dispatch_request
    return self.view_fu

**Finding requirements.txt**

In [28]:
pip freeze > requirements.txt

**NOTE:** This is only feasible for user names in database.