In [1]:
import os
import numpy as np
import pandas as pd
import scipy.sparse
from scipy.spatial.distance import correlation

In [2]:
data=pd.read_csv('clean_travel_data.csv')
data.head()

Unnamed: 0,location,place,state,location_rating,age,season,affordable,types_of_travel,religion,itemId
0,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1
1,maitri bagh,"Near to Maroda, Bhilai",chhattisgarh,4.0,20,"summer,spring,winter,rainy,autumn",Yes,friends,any,2
2,Tirathgarh Falls,Bastar district,chhattisgarh,4.6,25,monsoon,Yes,friends,any,3
3,Sirpur Heritage Site,bank of holy Mahanadi river,chhattisgarh,5.0,0,winter,Yes,family,hindu,4
4,Rajeev Smriti Van,Raipur,chhattisgarh,3.9,0,"summer,spring,winter,rainy,autumn",Yes,solo,any,5


In [3]:
data1=pd.read_csv('collaborative.csv')
data1.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,1,1,,881250949
1,1,2,3.0,891717742
2,1,3,1.0,878887116
3,1,4,,880606923
4,1,5,1.0,886397596


In [4]:
data=pd.merge(data,data1,left_on='itemId',right_on="itemId")
data.head()

Unnamed: 0,location,place,state,location_rating,age,season,affordable,types_of_travel,religion,itemId,userId,rating,timestamp
0,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1,1,,881250949
1,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1,2,3.0,891717742
2,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1,3,1.0,878887116
3,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1,4,,880606923
4,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1,5,1.0,886397596


In [5]:
data=pd.DataFrame.sort_values(data,['userId','itemId'])
data

Unnamed: 0,location,place,state,location_rating,age,season,affordable,types_of_travel,religion,itemId,userId,rating,timestamp
0,Bhoramdeo Temple,"Chaura village, Kawardha town",chhattisgarh,4.0,0,winter,Yes,family,hindu,1,1,,881250949
10,maitri bagh,"Near to Maroda, Bhilai",chhattisgarh,4.0,20,"summer,spring,winter,rainy,autumn",Yes,friends,any,2,1,3.0,891717742
20,Tirathgarh Falls,Bastar district,chhattisgarh,4.6,25,monsoon,Yes,friends,any,3,1,1.0,878887116
30,Sirpur Heritage Site,bank of holy Mahanadi river,chhattisgarh,5.0,0,winter,Yes,family,hindu,4,1,,880606923
40,Rajeev Smriti Van,Raipur,chhattisgarh,3.9,0,"summer,spring,winter,rainy,autumn",Yes,solo,any,5,1,1.0,886397596
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2819,Jal Mandir,Pawapuri,Bihar,4.1,30,monsoon,Yes,"solo,family,friends",jains,284,10,,891352093
2829,Hajipur,Hajipur,Bihar,3.9,25,"summer,spring,winter,rainy,autumn",Yes,"friends,family,solo","hindus,muslims,buddhists,jains",285,10,4.0,886122952
2839,Karkat Waterfall,Karkathgarh,Bihar,4.5,20,monsoon,Yes,friends,any,286,10,1.0,878961814
2849,Dhungeswari hills,Larpur,Bihar,4.0,20,"summer,spring,winter,rainy,autumn",Yes,"friends,family",buddhists,287,10,1.0,884140579


In [7]:
userItemRatingMatrix=pd.pivot_table(data, values='rating',index=['userId'], columns=['itemId'])

In [8]:
def similarity(user1,user2):
    try:
        user1=np.array(user1)-np.nanmean(user1)
        user2=np.array(user2)-np.nanmean(user2)
        commonItemIds=[i for i in range(len(user1)) if user1[i]>0 and user2[i]>0]
        if len(commonItemIds)==0:
           return 0
        else:
           user1=np.array([user1[i] for i in commonItemIds])
           user2=np.array([user2[i] for i in commonItemIds])
           return correlation(user1,user2)
    except ZeroDivisionError:
        print("You can't divide by zero!")

In [9]:
def nearestNeighbourRatings(activeUser,K):
    try:
        similarityMatrix=pd.DataFrame(index=userItemRatingMatrix.index,columns=['Similarity'])
        for i in userItemRatingMatrix.index:
            similarityMatrix.loc[i]=similarity(userItemRatingMatrix.loc[activeUser],userItemRatingMatrix.loc[i])
        similarityMatrix=pd.DataFrame.sort_values(similarityMatrix,['Similarity'],ascending=[0])
        nearestNeighbours=similarityMatrix[:K]
        neighbourItemRatings=userItemRatingMatrix.loc[nearestNeighbours.index]
        predictItemRating=pd.DataFrame(index=userItemRatingMatrix.columns, columns=['Rating'])
        for i in userItemRatingMatrix.columns:
            predictedRating=np.nanmean(userItemRatingMatrix.loc[activeUser])
            for j in neighbourItemRatings.index:
                if userItemRatingMatrix.loc[j,i]>0:
                   predictedRating += (userItemRatingMatrix.loc[j,i]-np.nanmean(userItemRatingMatrix.loc[j]))*nearestNeighbours.loc[j,'Similarity']
                predictItemRating.loc[i,'Rating']=predictedRating
    except ZeroDivisionError:
        print("You can't divide by zero!")            
    return predictItemRating

In [10]:
def topNRecommendations(activeUser,N):
    try:
        predictItemRating=nearestNeighbourRatings(activeUser,N)
        placeAlreadyWatched=list(userItemRatingMatrix.loc[activeUser].loc[userItemRatingMatrix.loc[activeUser]>0].index)
        predictItemRating=predictItemRating.drop(placeAlreadyWatched)
        topRecommendations=pd.DataFrame.sort_values(predictItemRating,['Rating'],ascending=[0])[:N]
        topRecommendationTitles=(data.loc[data1.itemId.isin(topRecommendations.index)])
    except ZeroDivisionError:
        print("You can't divide by zero!")
    return list([topRecommendationTitles.location,topRecommendationTitles.place,topRecommendationTitles.state,topRecommendationTitles.location_rating])

In [11]:
activeUser=int(input("Enter userid: "))

Enter userid: 4


In [12]:
print("The recommended places for you are: ")
topN=pd.DataFrame(topNRecommendations(activeUser,4))
topN=topN.T
topN=topN.sort_values(by='location_rating', ascending=False)
topN

The recommended places for you are: 


Unnamed: 0,location,place,state,location_rating
58,Palolem Beach,Canacona,Goa,5.0
2081,Rajbari National Park,Belonia,Tripura,5.0
419,Tirthan Valley,Kullu,Himachal pradesh,5.0
997,Panna National Park,"Gwalior,Bhopal",Madhya Pradesh,4.8
2370,Srisailam Dam,Srisailam,Andhra Pradesh,4.7
2391,SHAR,Srihari Kota,Andhra Pradesh,4.7
2153,mecca masjid,hyderabad,telangana,4.7
1286,Shirui Kashong,Ukhrul,Manipur,4.6
1792,mukteswara temple,bhudaneswar,odisha,4.6
1780,golden beach,puri,odisha,4.5


In [13]:
def favoritePlace(activeUser,N):
    topPlace=pd.DataFrame.sort_values(data[data.userId==activeUser],['rating'],ascending=[0])[:N]
    return list([topPlace.location,topPlace.place,topPlace.state,topPlace.location_rating])

In [14]:
print("The recommended places for you are: ")
fav_place=pd.DataFrame(favoritePlace(activeUser,4))
fav_place=fav_place.T
fav_place=fav_place.sort_values(by='location_rating', ascending=False)
fav_place

The recommended places for you are: 


Unnamed: 0,location,place,state,location_rating
743,bijapur,bijapur,Karnataka,4.5
503,Pahalgam,Anantnag,Jammu & Kashmir,4.5
533,Sonamarg,Ganderbal,Jammu & Kashmir,4.5
963,Kanha National Park,jabalpur,Madhya Pradesh,4.4
