In [1]:
import numpy as np
import pandas as pd 
import sklearn
import streamlit as st
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel

# Importing scipy Packages
from scipy.sparse.linalg import svds

toronto_URL= "data/decreased.csv"
def load_data(url):
    data = pd.read_csv(url)
    return data

toronto_data = load_data(toronto_URL)
def mean_center_rows(df):
    return (df.T - df.mean(axis = 1)).T
def score(data):
    # Computing Super-Score Rating for Reviews
    data['super_score'] = data['polarity'] *  data['compound']
    data['super_score'] = data['super_score'] + data['stars']

    return data
def cos_matrix(data):
    # Combining the text in Keywords and categories columns
    # data['All_Keywords'] = data['categories'].str.cat(data['Keywords'],sep=", ")

    # Creating the Matrix by using the Pivot Table Function
    toronto_restaurant_rating = data.pivot_table(index = 'user_id', columns = 'name', values = 'super_score')

    # Normalizing the Rating Scores
    toronto_restaurant_rating = mean_center_rows(toronto_restaurant_rating)

    # Filling all Null Values with 0.0
    toronto_restaurant_rating = toronto_restaurant_rating.fillna(0)

    #cosine similarity

    # List of first 10 Yelp Customer User_ids in the Matrix
    user_ids = list(toronto_restaurant_rating.index)

    # Converting the Matrix DataFrame into a NumPy array
    toronto_matrix = toronto_restaurant_rating.to_numpy()

    # Applying Singular Value Decomposition (SVD)
    #The number of factors to factor the user-item matrix.
    NUMBER_OF_FACTORS_MF = 15

    #Performs matrix factorization of the original user item matrix
    U, sigma, Vt = svds(toronto_matrix, k = NUMBER_OF_FACTORS_MF)

    sigma = np.diag(sigma)

    # Overview of user ratings across all Restaurants in Toronto
    all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

    # Converting the reconstructed matrix back to a Pandas dataframe
    cf_preds_df = pd.DataFrame(all_user_predicted_ratings, columns = toronto_restaurant_rating.columns, index=user_ids).transpose()

    return cf_preds_df

def item_matrix():
    # Creating Item-Item Matrix based on Cosine Similarity
    item_item_matrix = cosine_similarity(cf_preds_df)
    item_item_matrix= pd.DataFrame(item_item_matrix, columns=cf_preds_df.index, index = cf_preds_df.index)

    return item_item_matrix

toronto_data = load_data(toronto_URL)
cf_preds_df = cos_matrix(toronto_data)
item_item_matrix = item_matrix()


# Creating Collaborative Filtering Function for Restaurant-Restaurant Recommendation System
def cf_recommender(restaurant):
    
    """Getting the correlation of a specific restaurant with other Toronto Restaurants"""
    restaurant_ratings = cf_preds_df.T[restaurant]
    similar_restaurant_ratings = cf_preds_df.T.corrwith(restaurant_ratings)
    corr_ratings = pd.DataFrame(similar_restaurant_ratings, columns=['Correlation'])
    corr_ratings.dropna(inplace=True)
    
    """Retrieving the Ratings Scores from the Item-Item Matrix"""
    ratings_sim = item_item_matrix[restaurant]
    
    """Filtering for positively correlated restaurants"""
    ratings_sim = ratings_sim[ratings_sim>0]
    
    """Generate Top 10 Recommended Restaurants"""
    """Exclude top row as that will be the same restaurant"""
    return ratings_sim.sort_values(ascending= False).head(11)[1:]
a=cf_recommender('Birrieria La Plaza')

In [6]:
a

name
Pita Pit                          0.954921
Cascadia Coffee Pub               0.946316
MF Tasty                          0.941666
Pepe Chile Taqueria & Catering    0.929970
The Spot 79                       0.928068
Kim Jong Grillin'                 0.925859
Smokin Fire Fish                  0.921577
Cartlandia - Food Carts           0.920334
Jacqueline                        0.917757
Pollo Bravo                       0.916757
Name: Birrieria La Plaza, dtype: float64

In [8]:
a.index[0]

'Pita Pit'