In [51]:
import pandas as pd
import nltk
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import scipy.sparse
from scipy.spatial.distance import correlation

# Content Based Recommendation System Model

In [52]:
df = pd.read_csv('C:/Users/dell/Desktop/Artificial Intelligence/Project/data_content.csv',encoding='cp1252')

In [53]:
#creating lemmatizer object
lemmatizer = WordNetLemmatizer()

In [54]:
#pre processing function
def pre_process(text):
    #lowercase 
    text = text.lower()
    #remove commas,full stops etc
    text = re.sub('[^a-zA-Z]',' ',text)
    #remove stop words and lemmatize 
    words = nltk.word_tokenize(text)
    #join the words
    text = " ".join([lemmatizer.lemmatize(word) for word in words if word not in set(stopwords.words('english'))])
    return text

In [55]:
#combining the category and overview enteris
df['new_overview'] = df['overview'] 

In [56]:
#applying the pre-process function
df['cleaned_text'] = df['new_overview'].apply(pre_process, 1)

In [57]:
df['cleaned_text']

0       chicago food planet gateway west loop spotligh...
1       take san francisco two famous bridge minute bo...
2       instead traditional tour bus vantigo transport...
3       walked city street sailed bay water next step ...
4       eat way one san francisco famous neighborhood ...
                              ...                        
1807    seeing washington landmark foot ambitious goal...
1808    walk town one man show run semiretired former ...
1809    u street tour run blue fern travel formerly ca...
1810    adult older evening pub crawl take traveler ci...
1811    blue orb savannah shadow tour based book name ...
Name: cleaned_text, Length: 1812, dtype: object

In [58]:
#creating a corpus
corpus = np.array(df['cleaned_text'])

In [59]:
#convert corpus to tfidf vectors
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
tfidf.fit(corpus)
vectors = tfidf.transform(corpus).toarray()
#creating a dictionary of vectors

In [60]:
def recommendations(query,top = 10):
    #pre process the query
    query = pre_process(query)
    #convert the query into tfidf vector
    query_vector =tfidf.transform([query]).toarray()
    #create a dictionary to keep track of scores
    scores = {i:0 for i in range(len(vectors))}
    for i,vector in enumerate(vectors):
        scores[i] = np.dot(query_vector,vector) #cosine 
    #sort the dictionary according to the scores
    scores = {k: v for k, v in sorted(scores.items(), key=lambda item: item[1],reverse = True)}
    recs = []
    for i in list(scores.keys())[0:top]:
        recs.append(df.iloc[i]['itemId'])
    return recs
/Cosine Simlartiy

In [61]:
query = input("Enter your query: ")
recommendations(query)

Enter your query: askfhvbk


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Collaborative Based Recommendation System Model

In [62]:
data=pd.read_csv('C:/Users/dell/Desktop/Artificial IntelligenceProject/data_collaborative.csv',encoding='cp1252')
placeInfo=pd.read_csv('C:/Users/dell/Desktop/Artificial Intelligence/Project/data_content.csv',encoding='cp1252')

In [63]:
data=pd.merge(data,placeInfo,left_on="itemId",right_on="itemId")
userIds=data.userId
userIds2=data[['userId']]

In [64]:
data.loc[0:10,['userId']]
data=pd.DataFrame.sort_values(data,['userId','itemId'],ascending=[0,1])

In [65]:
def favoritePlace(activeUser,N):
    topPlace=pd.DataFr.ame.sort_values(
        data[data.userId==activeUser],['rating'],ascending=[0])[:N]
    return list(topPlace.itemId)

userItemRatingMatrix=pd.pivot_table(data, values='rating',index=['userId'], columns=['itemId'])

In [66]:
def similarity(user1,user2):
    try:
        user1=np.array(user1)-np.nanmean(user1)
        user2=np.array(user2)-np.nanmean(user2)
        commonItemIds=[i for i in range(len(user1)) if user1[i]>0 and user2[i]>0]
        if len(commonItemIds)==0:
           return 0
        else:
           user1=np.array([user1[i] for i in commonItemIds])
           user2=np.array([user2[i] for i in commonItemIds])
           return correlation(user1,user2)
    except ZeroDivisionError:
        print("You can't divide by zero!")

In [67]:
def nearestNeighbourRatings(activeUser,K):
    try:
        similarityMatrix=pd.DataFrame(index=userItemRatingMatrix.index,columns=['Similarity'])
        for i in userItemRatingMatrix.index:
            similarityMatrix.loc[i]=similarity(userItemRatingMatrix.loc[activeUser],userItemRatingMatrix.loc[i])
        similarityMatrix=pd.DataFrame.sort_values(similarityMatrix,['Similarity'],ascending=[0])
        nearestNeighbours=similarityMatrix[:K]
        neighbourItemRatings=userItemRatingMatrix.loc[nearestNeighbours.index]
        predictItemRating=pd.DataFrame(index=userItemRatingMatrix.columns, columns=['Rating'])
        for i in userItemRatingMatrix.columns:
            predictedRating=np.nanmean(userItemRatingMatrix.loc[activeUser])
            for j in neighbourItemRatings.index:
                if userItemRatingMatrix.loc[j,i]>0:
                   predictedRating += (userItemRatingMatrix.loc[j,i]-np.nanmean(userItemRatingMatrix.loc[j]))*nearestNeighbours.loc[j,'Similarity']
                predictItemRating.loc[i,'Rating']=predictedRating
    except ZeroDivisionError:
        print("You can't divide by zero!")            
    return predictItemRating

In [68]:
def topNRecommendations(activeUser,N):
    try:
        predictItemRating=nearestNeighbourRatings(activeUser,10)
        placeAlreadyWatched=list(userItemRatingMatrix.loc[activeUser]
                              .loc[userItemRatingMatrix.loc[activeUser]>0].index)
        predictItemRating=predictItemRating.drop(placeAlreadyWatched)
        topRecommendations=pd.DataFrame.sort_values(predictItemRating,
                                                ['Rating'],ascending=[0])[:N]
        topRecommendationTitles=(placeInfo.loc[placeInfo.itemId.isin(topRecommendations.index)])
    except ZeroDivisionError:
        print("You can't divide by zero!")
    return list(topRecommendationTitles.itemId)

In [69]:
activeUser=int(input("Enter userid: "))
print("The user's favorite places are: ")
print(favoritePlace(activeUser,10))

print("The recommended places for you are: ")
print(topNRecommendations(activeUser,10))


Enter userid: 5
The user's favorite places are: 
[9, 10, 20, 32, 31, 29, 5, 28, 12, 23]
The recommended places for you are: 
[3, 6, 7, 11, 15, 17, 21, 22, 26, 30]
