Here we mainly focus on getting an output for our recommender given a user and city of choice. There is some pre-processing involved to get the desired columns in the form of a dataframe.
You can run this notebook to see the outcome if you do not want to use the streamlit app

In [None]:
import pandas as pd
import numpy as np

In [None]:
# import all required csv's for output
yelp_POI=pd.read_csv('Yelp_data_1EDA.csv')
TM_events=pd.read_csv('TM_data_EDA.csv')
foursq_POI=pd.read_csv('FourSq_data_EDA.csv')

In [None]:
foursq_POI.head()

In [None]:
# fix the data frames for the output
# by dropping columns
# by re-arranging the columns
# by renaming columns names

columns_to_drop1=["Business ID","State_x","Latitude","Longitude","latitude","longitude","Address_y"]
yelp_POI.drop(columns=columns_to_drop1,inplace=True,axis=1)
desired_columns_1=['Name', 'Category','Ratings', 'Pricing','Address_x' , 'City','State_y','Distance_From_Venue',
       'Venue']
yelp_POI=yelp_POI[desired_columns_1]
column_mapping={'Address_x' :'Address',
                'State_y':'State'}
yelp_POI.rename(columns=column_mapping,inplace=True)

                
columns_to_drop_2=["Latitude","Longitude"]
TM_events.drop(columns=columns_to_drop_2,inplace=True,axis=1)
desired_columns_2=['Event_name', 'Event_type', 'Event_dates', 'Event_start_times','Last_Purchase_Date',
       'Event_time_zone', 'Venues', 'City', 'States', 'Address','Additional_info', 'Ticket_limits', 'Age_restrictions',
       'Parking', 'Accesibility']
TM_events=TM_events[desired_columns_2]


foursq_POI.reset_index(drop=True, inplace=True)
desired_columns_3=["POI","Category","Address","City","State","Distance From Venue","Venue"]
foursq_POI=foursq_POI[desired_columns_3]




In [None]:
foursq_POI.head()

In [None]:
# import data for the recommender
venue_ratings=pd.read_csv('Venue_Reviews.csv')
#venue_ratings.head()

In [None]:
venue_ratings.rename(columns={'Event_type': 'Category'}, inplace=True)
#venue_ratings.columns

In [None]:
# import data for the recommender
POI_ratings=pd.read_csv('POI_Ratings.csv')

In [None]:
ratings_df=pd.concat([venue_ratings,POI_ratings])

In [None]:
#ratings_df.shape

In [None]:
ratings_df.drop_duplicates()#.shape

In [None]:
# drop columns not required
columns_to_drop=['Address_y','latitude','longitude']
ratings_df.drop(columns=columns_to_drop,axis=1,inplace=True)

In [None]:
ratings_df['city'].fillna(ratings_df['City'],inplace=True)

In [None]:
ratings_df=ratings_df.drop_duplicates()
ratings_df.drop(['City'],inplace=True,axis=1)

 change the NaN ratings to -1 to indicate that they are missing, and convert the user_id,user_name to string "Not available" so that there are no NaN values in the matrix 


In [None]:
ratings_df['ratings']=ratings_df['ratings'].replace(np.nan,-1)
ratings_df[['user_id','user_name']]=ratings_df[['user_id','user_name']].replace(np.nan,"Ratings not available")


# Build Recommender

In [None]:
# Create the pivot table
recommender_matrix = ratings_df.pivot_table(index='user_id', columns='Category', values='ratings')


In [None]:
user_city= "Toronto" # filter for the ratings to only show those options
user= "Colin S."     # select user login
matched_user=ratings_df[ratings_df['user_name']==user]
matched_id=matched_user['user_id'].unique()
#matched_id

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute similarity matrix
similarity_matrix = cosine_similarity(recommender_matrix.fillna(0))

# Convert similarity matrix into DataFrame
similarity_df = pd.DataFrame(similarity_matrix, index=recommender_matrix.index, columns=recommender_matrix.index)


In [None]:
# Find users similar to selected user
similarities_to_given_user = similarity_df.loc[matched_id[0]].sort_values(ascending=False)
# similarities_to_given_user

In [None]:
# Return a df of similar users
similar_users_data = []

# Iterate over the index of similarities_to_given_user
for user_id in similarities_to_given_user[similarities_to_given_user != 0].index:
    # Filter ratings_df for rows with matching user IDs
    similar_user_data = ratings_df[ratings_df['user_id'] == user_id]
    
    # Append the filtered data to similar_users_data list
    similar_users_data.append(similar_user_data)

# Concatenate the filtered data into a single DataFrame
similar_users_df = pd.concat(similar_users_data)


In [None]:
# Return a df of venues not similar but as suggestions
dis_similar = []

# Iterate over the index of similarities_to_given_user
for user_id in similarities_to_given_user[similarities_to_given_user == 0].index:
    # Filter ratings_df for rows with matching user IDs
    dis_similar_data = ratings_df[ratings_df['user_id'] == user_id]
    
    # Append the filtered data to similar_users_data list
    dis_similar.append(dis_similar_data)

# Concatenate the filtered data into a single DataFrame
suggestions = pd.concat(dis_similar)

In [None]:
# Calculate average ratings by venue for dis-similar items

average_ratings_by_venue = suggestions.groupby('venue')['ratings'].mean().reset_index()
average_ratings_by_venue.columns = ['venue', 'average_ratings']  # Rename columns for clarity
average_ratings_by_venue['average_ratings'] = average_ratings_by_venue['average_ratings'].round(1) # round the value to 1 decimal

# Merge average ratings back into the original DataFrame
suggestions_with_avg_ratings = pd.merge(suggestions, average_ratings_by_venue, on='venue', how='left')
sorted_suggestions=suggestions_with_avg_ratings.sort_values(by='average_ratings', ascending=False)


In [None]:
similar_users_df=similar_users_df[similar_users_df['city']==user_city]
#similar_users_df['venue'].unique()

In [None]:
# we use the sorted suggestions to view places with higher ratings 
sorted_suggestions=sorted_suggestions[sorted_suggestions['city']==user_city]

# Recommender output

In [None]:
restaurants=[]
Events=[]
POI=[]

In [None]:
for item in similar_users_df['venue'].unique():
    for index,row in yelp_POI.iterrows():
        if item in row['Name'] and row['City']==user_city:
            restaurants.append(row)
            break
    for index1, row1 in foursq_POI.iterrows():
        poi_value = row1['POI']
        if isinstance(poi_value, str) and row1['City']==user_city and item in poi_value :
            POI.append(row1)
            break  # Stop searching after finding a match

    for index2,row2 in TM_events.iterrows() :
        if item in row2['Venues'] and row2['City']==user_city:
            Events.append(row2)
            break
                
    
    
#print(len(restaurants),len(Events),len(POI))    

In [None]:
suggested_restaurant=[]
suggested_POI=[]
suggested_events=[]

In [None]:
for items in sorted_suggestions['venue'].unique():
    for index,row in yelp_POI.iterrows():
        if items in row['Name'] and row['City']==user_city:
            suggested_restaurant.append(row)
            break
    for index1, row1 in foursq_POI.iterrows():
        poi_value = row1['POI']
        if isinstance(poi_value, str) and items in poi_value and row1['City']==user_city:
            suggested_POI.append(row1)
            break  # Stop searching after finding a match

    for index2,row2 in TM_events.iterrows() :
        if items in row2['Venues'] and row2['City']==user_city:
            suggested_events.append(row2)
            break
                
    
    
#print(len(suggested_restaurant),len(suggested_events),len(suggested_POI))  

In [None]:
R=pd.DataFrame(restaurants)
E=pd.DataFrame(Events)
P=pd.DataFrame(POI)
SR=pd.DataFrame(suggested_restaurant)
SE=pd.DataFrame(suggested_events)
SP=pd.DataFrame(suggested_POI)



In [None]:
# Display recommendations
    print("Top Recommendations for : ",selected_user)
    if len(restaurants)>0:
       
        R.reset_index(drop=True, inplace=True)  # Reset index
        # Setting index starting from 1
        R.index = R.index + 1
        print("Restaurants in the area :")
        R.head(10)
    else:
        SR.reset_index(drop=True, inplace=True)  # Reset index
        # Setting index starting from 1
        SR.index = SR.index + 1
        print("People have also liked :")
        SR.head()
        
    if len(Events)>0:
      
        E.reset_index(drop=True, inplace=True)  # Reset index
        # Setting index starting from 1
        E.index = E.index + 1
        print("Events to attend : ")
        E.head(10)
    else:
        print("No restaurant recommendations available.")
        SE.reset_index(drop=True, inplace=True)  # Reset index
        # Setting index starting from 1
        SE.index = SE.index + 1
        print("Other options : ")
        SE.head()

    if len(POI)>0 :
        P.reset_index(drop=True, inplace=True) # Reset index
        # Setting index starting from 1
        P.index = P.index + 1
        print("Places to visit :")
        P.head(10)
    else:
        if len(suggested_POI)> 0:
          SP.reset_index(drop=True, inplace=True) # Reset index
        # Setting index starting from 1
          SP.index = SP.index + 1
          print("People have also visited :")
          SP.head()
        else:
            print("No recommendations.")
        print("No recommendations.")