In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly as px
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
import re
import pickle
import nltk
nltk.download('punkt')
nltk.download('omw-1.4')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
from nltk import word_tokenize, pos_tag
from nltk import WordNetLemmatizer
from nltk.corpus import stopwords 
from nltk.corpus import wordnet
! pip install wordcloud
from wordcloud import WordCloud
from sklearn.preprocessing import OneHotEncoder

[nltk_data] Downloading package punkt to /Users/raghavg/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/raghavg/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/raghavg/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/raghavg/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/raghavg/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!




In [2]:
rest = pd.read_csv('restaurants.csv')
review = pd.read_csv('ratings.csv')
user = pd.read_csv('users.csv')

In [3]:
merge_df = rest.merge(review, on='business_id').merge(user, on='user_id')

In [None]:
merge_df.columns

Index(['business_id', 'business_name', 'business_address', 'business_city',
       'business_state', 'business_latitude', 'business_longitude', 'stars',
       'review_counts', 'is_open', 'categories', 'user_id', 'rating', 'date',
       'text', 'user_name', 'user_review_count', 'user_yelp_since', 'friends',
       'useful_reviews', 'funny_reviews', 'cool_reviews', 'n_fans',
       'average_stars'],
      dtype='object')

In [None]:
merge_df.duplicated().sum()

0

In [None]:
merge_df.duplicated(keep='first').sum()

0

In [None]:
merge_df.isna().sum()

business_id           0
business_name         0
business_address      0
business_city         0
business_state        0
business_latitude     0
business_longitude    0
stars                 0
review_counts         0
is_open               0
categories            0
user_id               0
rating                0
date                  0
text                  0
user_name             0
user_review_count     0
user_yelp_since       0
friends               0
useful_reviews        0
funny_reviews         0
cool_reviews          0
n_fans                0
average_stars         0
dtype: int64

In [None]:
merge_df['categories'] = merge_df['categories'].astype(str)

In [37]:
user.columns

Index(['user_id', 'user_name', 'user_review_count', 'user_yelp_since',
       'friends', 'useful_reviews', 'funny_reviews', 'cool_reviews', 'n_fans',
       'years_elite', 'average_stars'],
      dtype='object')

## Data Preprocessing

In [None]:
mb = pd.read_csv('MBTI 500.csv')

In [None]:
def data_preprocessing(df, column):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()

    contract_mapping = {
        "don't": "do not",
        "won't": "will not",
        "can't": "cannot",
        "couldn't": "could not",
        "shouldn't": "should not",
        "wouldn't": "would not",
        "doesn't": "does not",
        "isn't": "is not",
        "aren't": "are not",
        "wasn't": "was not",
        "weren't": "were not",
        "haven't": "have not",
        "hasn't": "has not",
        "hadn't": "had not",
        "ain't": "am not",
        "I'm": "I am",
        "you're": "you are",
        "he's": "he is",
        "she's": "she is",
        "it's": "it is",
        "we're": "we are",
        "they're": "they are",
        "I'll": "I will",
        "you'll": "you will",
        "he'll": "he will",
        "she'll": "she will",
        "it'll": "it will",
        "we'll": "we will",
        "they'll": "they will",
        "I've": "I have",
        "you've": "you have",
        "we've": "we have",
        "they've": "they have",
        "I'd": "I would",
        "you'd": "you would",
        "he'd": "he would",
        "she'd": "she would",
        "it'd": "it would",
        "we'd": "we would",
        "they'd": "they would",
        "I'd've": "I would have",
        "you'd've": "you would have",
        "he'd've": "he would have",
        "she'd've": "she would have",
        "it'd've": "it would have",
        "we'd've": "we would have",
        "they'd've": "they would have",
        "I'm": "I am",
        "you're": "you are",
        "he's": "he is",
        "she's": "she is",
        "it's": "it is",
        "we're": "we are",
        "they're": "they are"
    }

    def expand_contractions(text, mapping):
        for key, value in mapping.items():
            text = text.replace(key, value)
        return text

    df_clean = df.copy()

    df_clean['cleaned_text'] = ''  # Create a new empty column for cleaned text

    for i in range(len(df)):
        # Removing Punctuations, Special Characters, and Urls
        df_clean.loc[i, column] = re.sub(r'[^\w\s]|http\S+|_', '', df.loc[i, column])

        # Removing Numbers 
        df_clean.loc[i, column] = re.sub(r'\d+', '', df_clean.loc[i, column])

        # Converting Text to lower case
        df_clean.loc[i, column] = df_clean.loc[i, column].lower() 

        # Tokenization 
        tokens = word_tokenize(df_clean.loc[i, column])

        # Stop words removal 
        tokens = [word for word in tokens if word not in stop_words]

        # Lemmatization
        tokens = [lemmatizer.lemmatize(word) for word in tokens]

        # Contractual Expansion of Text Abbrevations(i.e, don't to do not)
        cleaned_text = expand_contractions(' '.join(tokens), contract_mapping)

        df_clean.at[i, 'cleaned_text'] = cleaned_text
  
    return df_clean


In [140]:
exp = merge_df[['business_name', 'business_id', 'categories', 'text', 'business_city', 'average_stars', 'review_counts']]

In [127]:
exp

Unnamed: 0,business_name,business_id,categories,text,business_city,average_stars,review_counts
0,St Honore Pastries,MTSW4McQd7CbVtyjqoe9mw,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...",This is the bakery I usually go to in Chinatow...,Philadelphia,3.60,80
1,Mood Cafe,U30ggGzFpXvc2NZYwOW3qg,"Cafes, Pakistani, Juice Bars & Smoothies, Rest...","Average Yelp rating of 5 stars, after 112 revi...",Philadelphia,3.60,458
2,Village Whiskey,EtKSTHV5Qx_Q7Aur9o4kQQ,"Bars, Nightlife, Whiskey Bars, Burgers, Restau...",....the best burger I've had in my life. Just...,Philadelphia,3.60,1553
3,Bui's,IH_ZeeTh13jQURbh31d1Kw,"Restaurants, Vietnamese, Sandwiches, Local Fla...",Food (8.5/10) = After reading several yelp rev...,Philadelphia,3.60,104
4,South Philadelphia Tap Room,YqmClmk7oLcDzPWKHzgA-Q,"Breakfast & Brunch, Bars, Nightlife, Restauran...",Visited this place with my brother for a late ...,Philadelphia,3.60,530
...,...,...,...,...,...,...,...
2674000,First Watch,Scd-rcsQCn60t1sHHFv-og,"Cafes, Restaurants, Breakfast & Brunch, Americ...",VERY slow service. Very noisy and tables all ...,St. Petersburg,1.42,183
2674001,Trader Joe's,mzTTdvQGkUHZ8ii7OnZC5w,"Beer, Wine & Spirits, Shopping, Grocery, Flori...",I really wish they would build another Trader ...,Philadelphia,4.50,494
2674002,El Cap Restaurant,8MzF1Tlgz0pOkxmhP5dYzA,"American (Traditional), Burgers, Restaurants","No high chairs/boosters, they charge for refil...",St. Petersburg,3.91,414
2674003,Sage Mediterranean,N8fK2E6YNyo04DbVNvgIQw,"Restaurants, Mediterranean",I had a disappointing experience at Sage. To s...,Phoenixville,4.00,118


In [141]:
exp = exp[:100000]

In [142]:
exp

Unnamed: 0,business_name,business_id,categories,text,business_city,average_stars,review_counts
0,St Honore Pastries,MTSW4McQd7CbVtyjqoe9mw,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...",This is the bakery I usually go to in Chinatow...,Philadelphia,3.60,80
1,Mood Cafe,U30ggGzFpXvc2NZYwOW3qg,"Cafes, Pakistani, Juice Bars & Smoothies, Rest...","Average Yelp rating of 5 stars, after 112 revi...",Philadelphia,3.60,458
2,Village Whiskey,EtKSTHV5Qx_Q7Aur9o4kQQ,"Bars, Nightlife, Whiskey Bars, Burgers, Restau...",....the best burger I've had in my life. Just...,Philadelphia,3.60,1553
3,Bui's,IH_ZeeTh13jQURbh31d1Kw,"Restaurants, Vietnamese, Sandwiches, Local Fla...",Food (8.5/10) = After reading several yelp rev...,Philadelphia,3.60,104
4,South Philadelphia Tap Room,YqmClmk7oLcDzPWKHzgA-Q,"Breakfast & Brunch, Bars, Nightlife, Restauran...",Visited this place with my brother for a late ...,Philadelphia,3.60,530
...,...,...,...,...,...,...,...
99995,Gyro House,BJXK4B2N8CLoc__SoDNPjg,"Mediterranean, Greek, Restaurants","After a long day at work, there really isn't a...",Saint Louis,3.66,119
99996,Straub's,n16QipPz3Akn-uBQTSBiwA,"Grocery, Food",Yeah this Straub's is definitely the nicer of ...,Webster Groves,3.66,38
99997,Salume Beddu,PXCNGIxi_kQ5_V77j3FrNQ,"Restaurants, Delis, Food, Meat Shops, Sandwich...",I'm still exhaling through my nose to experien...,Saint Louis,3.66,72
99998,Deer Creek Coffee,3H_MN2ecWlBLsUBWrjs8xA,"Coffee & Tea, Breakfast & Brunch, Restaurants,...",Solid Neighborhood Coffee Spot.\n\nI'd like to...,St Louis,3.66,78


In [143]:
exp = data_preprocessing(exp, 'text')

In [None]:
mb = data_preprocessing(mb, 'posts')

In [None]:
mb

Unnamed: 0,posts,type,cleaned_text
0,know intj tool use interaction people excuse a...,INTJ,know intj tool use interaction people excuse a...
1,rap music ehh opp yeah know valid well know fa...,INTJ,rap music ehh opp yeah know valid well know fa...
2,preferably p hd low except wew lad video p min...,INTJ,preferably p hd low except wew lad video p min...
3,drink like wish could drink red wine give head...,INTJ,drink like wish could drink red wine give head...
4,space program ah bad deal meing freelance max ...,INTJ,space program ah bad deal meing freelance max ...
...,...,...,...
106062,stay frustrate world life want take long nap w...,INFP,stay frustrate world life want take long nap w...
106063,fizzle around time mention sure mistake thing ...,INFP,fizzle around time mention sure mistake thing ...
106064,schedule modify hey w intp strong wing underst...,INFP,schedule modify hey w intp strong wing underst...
106065,enfj since january busy schedule able spend li...,INFP,enfj since january busy schedule able spend li...


In [None]:
# Remove rows where 'cleaned_text' column in empty
mb = mb[mb['cleaned_text'] != '']

Unnamed: 0,posts,type,cleaned_text
0,know intj tool use interaction people excuse a...,INTJ,know intj tool use interaction people excuse a...
1,rap music ehh opp yeah know valid well know fa...,INTJ,rap music ehh opp yeah know valid well know fa...
2,preferably p hd low except wew lad video p min...,INTJ,preferably p hd low except wew lad video p min...
3,drink like wish could drink red wine give head...,INTJ,drink like wish could drink red wine give head...
4,space program ah bad deal meing freelance max ...,INTJ,space program ah bad deal meing freelance max ...


In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
mb['encoded_labels'] = label_encoder.fit_transform(mb['type']) 

X = mb['cleaned_text'] 
y = mb['encoded_labels']

In [None]:
from concurrent.futures import ProcessPoolExecutor
from transformers import BertTokenizer
import torch
from tqdm.notebook import tqdm
from multiprocessing import Pool

from tqdm import tqdm

def encode_text(sent, tokenizer, max_len):
    encoded = tokenizer.encode_plus(
        sent,
        add_special_tokens=True,
        max_length=max_len,
        padding='max_length',
        return_attention_mask=True,
        truncation=True
    )
    return encoded['input_ids'], encoded['attention_mask']

def bert_encode(data, max_len):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    input_ids = []
    attention_masks = []
    for sent in tqdm(data, desc="Encoding"):
        ids, masks = encode_text(sent, tokenizer, max_len)
        input_ids.append(ids)
        attention_masks.append(masks)
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    return input_ids, attention_masks

input_ids, attention_masks = bert_encode(exp['cleaned_text'], max_len=64)

Encoding: 100%|██████████| 50000/50000 [00:47<00:00, 1059.01it/s]


In [None]:
from nltk import tokenize 

# Filter rows based on token length
exp = exp[exp['cleaned_text'].apply(lambda x: len(tokenize.word_tokenize(x)) < 500)]


In [None]:
exp.shape

(100, 8)

In [144]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))
best_model_path = '/Users/raghavg/Desktop/USML_Projects/Final_Project/Pre_trained_models/results/path/to/save/models/best_model.pth'
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# def predict_personality_type(text):
#     encoded_input = tokenizer.encode_plus(
#         text,
#         add_special_tokens=True,
#         max_length=64,
#         padding='max_length',
#         return_attention_mask=True,
#         truncation=True,
#         return_tensors='pt'
#     )
#     input_ids = encoded_input['input_ids'].to(device)
#     attention_mask = encoded_input['attention_mask'].to(device)

#     with torch.no_grad():
#         outputs = model(input_ids, attention_mask=attention_mask)
#         logits = outputs.logits

#     predicted_label_idx = torch.argmax(logits, dim=1).cpu().numpy()[0]
#     predicted_label = label_encoder.inverse_transform([predicted_label_idx])[0]
#     return predicted_label

# exp['personality_type'] = exp['cleaned_text'].apply(predict_personality_type)


def predict_personality_types(texts, batch_size=32):
    predicted_personality_types = []

    # Process texts in batches
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i + batch_size]
        encoded_inputs = tokenizer(batch_texts, padding=True, truncation=True, max_length=64, return_tensors="pt")

        if len(batch_texts) == 0:
            continue

        # Move inputs to appropriate device
        input_ids = encoded_inputs["input_ids"].to(device)
        attention_mask = encoded_inputs["attention_mask"].to(device)

        # Perform inference
        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
        
        # Get predicted labels for each sample in the batch
        predicted_label_idxs = torch.argmax(logits, dim=1).cpu().numpy()
        predicted_labels = label_encoder.inverse_transform(predicted_label_idxs)
        
        predicted_personality_types.extend(predicted_labels.tolist())

    return predicted_personality_types

# Predict personality types for each batch
exp['personality_type'] = predict_personality_types(exp['cleaned_text'].tolist())

# Alternatively, you can use the apply function with a lambda function:
# exp['personality_type'] = exp['cleaned_text'].apply(lambda x: predict_personality_types([x])[0])


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



To align the personality options with the Myers-Briggs Type Indicator (MBTI), here are the corresponding personality codes for each option:

1. The Adventurous Foodie: ENTP (The Debater), ESTP (The Entrepreneur)
2. The Comfort Food Connoisseur: ISFJ (The Defender), ESFJ (The Consul)
3. The Health-Conscious Eater: INFJ (The Advocate), INFP (The Mediator)
4. The Culinary Explorer: INTJ (The Architect), ENFP (The Campaigner)
5. The Social Foodie: ESFP (The Entertainer), ENFJ (The Protagonist)

In [3]:
p = pd.read_csv('personality.csv')

In [21]:
p = pd.read_csv('personality.csv')
p = p[:30000]

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

# Calculate TF-IDF vectors for restaurant descriptions
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(p['cleaned_text'])

n_components = 20
svd = TruncatedSVD(n_components=n_components)
svd_matrix = svd.fit_transform(tfidf_matrix)

# Calculate cosine similarity between restaurant descriptions
cosine_sim = cosine_similarity(svd_matrix, svd_matrix)

selected_personality = 'ENTP'
# Get indices of restaurants similar to the selected personality type
similar_indices = p[p['personality_type'] == selected_personality].index

# Calculate average cosine similarity for each restaurant
avg_similarity = cosine_sim[similar_indices].mean(axis=0)

# Recommend restaurants with highest average similarity
recommended_indices = avg_similarity.argsort()[:-6:-1]  # Top 5 recommendations
recommended_restaurants = p.iloc[recommended_indices][['business_name', 'text']]
print("Recommended Restaurants:")
print(recommended_restaurants)


# def evaluate_recommendations(true_indices, recommended_indices):
#     # Calculate precision
#     precision = len(set(recommended_indices) & set(true_indices)) / len(recommended_indices)
    
#     # Calculate recall
#     recall = len(set(recommended_indices) & set(true_indices)) / len(true_indices)
    
#     # Calculate F1-score
#     if precision + recall == 0:
#         f1_score = 0
#     else:
#         f1_score = 2 * (precision * recall) / (precision + recall)
    
#     return precision, recall, f1_score

# # Define true indices of relevant restaurants for the selected personality type
# selected_personality = 'ISFP'
# true_indices = p[p['personality_type'] == selected_personality].index

# # Evaluate recommendations
# precision, recall, f1_score = evaluate_recommendations(true_indices, similar_indices)

# print("Precision:", precision)
# print("Recall:", recall)
# print("F1-score:", f1_score)


Recommended Restaurants:
           business_name                                               text
321             Choolaah  choolah is my favorite place to grab a quick b...
19681   Buutchiis Grille  what a great takeout meal i enjoyed from here ...
4537       Hot On D Spot  so in all transparency ive never eaten trinida...
20974      Chubby Cattle  ive been waiting for this darn place to open f...
9534   Beck's Cajun Cafe  tucked away in reading terminal market  this p...
Precision: 0.0
Recall: 0.0
F1-score: 0


# Food Personality Based Recommendation

In [4]:
p = pd.read_csv('personality.csv')
m1 = merge_df.iloc[:100000][['user_id', 'average_stars']]
m1_unique = m1.groupby('average_stars')['user_id'].first().reset_index()
p = p.merge(m1_unique, on='average_stars', how='left')

### Food Personality Based Recommendation Without SVD

In [9]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
def personality_recommendation(df):
    df_filtered = df[:50000]
    loc = input('Please Enter Your City: ')
    persona_options = {"1": "The Adventurous Foodie", 
                        "2": "The Comfort Food Connoisseur", 
                        "3": "The Health-Conscious Eater", 
                        "4": "The Culinary Explorer",
                        "5": "The Social Foodie"}
    options = ["I'm always eager to try new and adventurous dishes, exploring diverse cuisines and dining atmospheres.", 
    "I prefer cozy, familiar settings where I can enjoy classic comfort foods and unwind with my favorite dishes.",
    "I prioritize health and wellness in my dining choices, opting for nutritious options and mindful eating experiences.",
    "I love experimenting with different flavors and culinary techniques, seeking out unique dining experiences and culinary challenges.",
    "Dining out is a social occasion for me, and I enjoy vibrant, lively atmospheres where I can connect with friends and family over great food."]
    print("What type of dining do you wanna experience today?")
    for i, option in enumerate(options, start=1):
        print(f"{i}. {option}")

    selection = int(input("Please make a selection from the above options"))

    if 1 <= selection <= len(option):
        selected_option = options[selection - 1]
        selected_personality = list(persona_options.values())[selection - 1]
        print(f"We see you are {selected_personality}")
    else:
        print("Invalid Selection. Please choose a number between 1 and", len(options))

    if selection == 1:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'ENTP') | (df['personality_type'] == 'ESTP')]
    elif selection == 2:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'ISFJ') | (df['personality_type'] == 'ESFJ')]
    elif selection == 3:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'INFJ') | (df['personality_type'] == 'INFP')]
    elif selection == 4:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'INTJ') | (df['personality_type'] == 'ENFP')]
    elif selection == 5:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'ESFP') | (df['personality_type'] == 'ENFJ')]

    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(df_filtered['cleaned_text'])
    cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    avg_similarity = cos_sim.mean(axis=0)

    # Recommend restaurants with highest average similarity
    recommended_indices = avg_similarity.argsort()[:-11:-1]  # Top 5 recommendations
    recommended_restaurants = df_filtered.iloc[recommended_indices][['business_name', 'text']]
    print("Here are some of the restaurants you might like: ")
    return recommended_restaurants[['business_name', 'text']]

In [10]:
personality_recommendation(p)

What type of dining do you wanna experience today?
1. I'm always eager to try new and adventurous dishes, exploring diverse cuisines and dining atmospheres.
2. I prefer cozy, familiar settings where I can enjoy classic comfort foods and unwind with my favorite dishes.
3. I prioritize health and wellness in my dining choices, opting for nutritious options and mindful eating experiences.
4. I love experimenting with different flavors and culinary techniques, seeking out unique dining experiences and culinary challenges.
5. Dining out is a social occasion for me, and I enjoy vibrant, lively atmospheres where I can connect with friends and family over great food.
We see you are The Social Foodie
Here are some of the restaurants you might like: 


Unnamed: 0,business_name,text
98789,Noble Crust,first time dining here in person the first tim...
50676,The Lemon Bar,great food and nice atmosphere await at this r...
45072,Mama Carolla's,its like stepping into a tuscan cottage the at...
77579,Barracuda,tldr really cute tiny taco shop with a large s...
3493,Dim Sum House,this place sold me on their decorations i came...
44571,Pomodoro East,the food is great you dont want to miss happy ...
25583,Chihuahua's Cantina & Grill,went out to dinner with friends and this was m...
263,México Lindo,dont let the outside of this place scare you a...
25457,Great Basin Brewing,i have been trying to come here for awhile now...
62657,Milktooth,so happy to finally enjoy this gem after heari...


In [11]:
recommended_df = personality_recommendation(p)
print(recommended_df)

What type of dining do you wanna experience today?
1. I'm always eager to try new and adventurous dishes, exploring diverse cuisines and dining atmospheres.
2. I prefer cozy, familiar settings where I can enjoy classic comfort foods and unwind with my favorite dishes.
3. I prioritize health and wellness in my dining choices, opting for nutritious options and mindful eating experiences.
4. I love experimenting with different flavors and culinary techniques, seeking out unique dining experiences and culinary challenges.
5. Dining out is a social occasion for me, and I enjoy vibrant, lively atmospheres where I can connect with friends and family over great food.
We see you are The Social Foodie
Here are some of the restaurants you might like: 
                     business_name  \
98789                  Noble Crust   
50676                The Lemon Bar   
45072               Mama Carolla's   
77579                    Barracuda   
3493                 Dim Sum House   
44571                

In [12]:
for text in recommended_df['text']:
    print(text)

first time dining here in person the first time we got delivery the food and service here never disappoints it was a saturday night when we went so the place was very packed we sat outside at the picnic benches which were spaced  feet apart 

the food came out pretty slow but thats because it was very busy with the people dining in and the takeout the appetizers were excellent flavorful good portions and cooked so well i loved the fried tomato pork belly and the meatballs the fries were really well seasoned too the prices are average here around  for an appetizer but you get a decent size 

i got the fried chicken parm with spaghetti and it was one of the best juicy tender chicken ive ever had the sauce was delicious pasta was cooked just right not too soft or hard i also ordered the house salad and the house vinaigrette was very good i am picky about salad dressings but theirs was amazing

this restaurant followed all the rules social distancing staff wore masks and gloves the whole t

In [25]:
def personality_recommendation_eval(df, loc, personality_type):
    df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == personality_type)]
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(df_filtered['cleaned_text'])
    cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    avg_similarity = cos_sim.mean(axis=0)

    # Recommend restaurants with highest average similarity
    recommended_indices = avg_similarity.argsort()[:-11:-1]  # Top 10 recommendations
    recommended_restaurants = df_filtered.iloc[recommended_indices]['business_name'].tolist()
    return recommended_restaurants

def get_recommendations(df, city, personality, user_id):
    # Get recommendations based on user's city and personality
    recommendations = personality_recommendation_eval(df, city, personality)
    
    # Get relevant restaurants from ratings DataFrame for the same user
    relevant_restaurants = p[(p['user_id'] == user_id) & (p['average_stars'] >= 3.5)]['business_name'].tolist()
    
    return recommendations, relevant_restaurants

# Call the function with a specific user ID
user_id = 'tCXElwhzekJEH6QJe3xs7Q'
user_recommendations, user_relevant_restaurants = get_recommendations(p, 'Philadelphia', 'INTP', user_id)

# Print the recommendations and relevant restaurants
print("Recommendations based on user's city and personality:", user_recommendations)
print("Relevant restaurants based on user's ratings:", user_relevant_restaurants)


from sklearn.metrics import precision_score, recall_score, f1_score

# Ensure both lists have the same length
min_length = min(len(user_relevant_restaurants), len(user_recommendations))
user_relevant_restaurants = user_relevant_restaurants[:min_length]
user_recommendations = user_recommendations[:min_length]

# Calculate precision, recall, and F1-score
precision = precision_score(user_relevant_restaurants, user_recommendations, average='micro')
recall = recall_score(user_relevant_restaurants, user_recommendations, average='micro')
f1 = f1_score(user_relevant_restaurants, user_recommendations, average='micro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)



Recommendations based on user's city and personality: ['El Jarocho', 'Solo Skewer Bar', 'Alma de Cuba', 'Zahav', 'El Camino Real', 'Chengdu Famous Food', 'Craft Hall', 'Sang Kee Noodle House', 'Revolution House', 'Kaffa Crossing']
Relevant restaurants based on user's ratings: ['St Honore Pastries', 'Mood Cafe', 'Village Whiskey', "Bui's", 'South Philadelphia Tap Room', 'Tyson Bees', 'Parc', 'Brauhaus Schmitz', 'Fu-Wah Mini Market', 'Fu-Wah Mini Market', "Koch's Deli", 'Zahav', 'Hummus Grill', 'Ocean City', 'Ten Stone Bar & Restaurant', 'Sang Kee Noodle House', 'Sang Kee Noodle House', 'Manakeesh Cafe Bakery & Grill', 'The Gold Standard Cafe', 'Ocean Harbor', 'Tea Do', 'Knockbox Cafe', 'Knockbox Cafe', "Xi'an Sizzling Woks", 'Sang Kee Peking Duck House', 'Nan Zhou Hand Drawn Noodle House', 'Magic Carpet', "Monk's Cafe", 'Han Dynasty', 'Han Dynasty', 'Mong Shil Tong Tong', 'Hibachi Express', 'El Taco Torro', 'Convivio Italian Artisan Cuisine', 'The Garden Table', 'Asaka Japanese Restaura

### Food Personality Recommendation using SVD

In [51]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
def personality_recommendation_with_svd(df):
    df_filtered = df[:10000]
    loc = input('Please Enter Your City: ')
    persona_options = {"1": "The Adventurous Foodie", 
                        "2": "The Comfort Food Connoisseur", 
                        "3": "The Health-Conscious Eater", 
                        "4": "The Culinary Explorer",
                        "5": "The Social Foodie"}
    options = ["I'm always eager to try new and adventurous dishes, exploring diverse cuisines and dining atmospheres.", 
    "I prefer cozy, familiar settings where I can enjoy classic comfort foods and unwind with my favorite dishes.",
    "I prioritize health and wellness in my dining choices, opting for nutritious options and mindful eating experiences.",
    "I love experimenting with different flavors and culinary techniques, seeking out unique dining experiences and culinary challenges.",
    "Dining out is a social occasion for me, and I enjoy vibrant, lively atmospheres where I can connect with friends and family over great food."]
    print("What type of dining experiences resonates most with you?")
    for i, option in enumerate(options, start=1):
        print(f"{i}. {option}")

    selection = int(input("Please make a selection from the above options"))

    if 1 <= selection <= len(option):
        selected_option = options[selection - 1]
        selected_personality = list(persona_options.values())[selection - 1]
        print(f"We see you are {selected_personality}")
    else:
        print("Invalid Selection. Please choose a number between 1 and", len(options))

    if selection == 1:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'ENTP') | (df['personality_type'] == 'ESTP')]
    elif selection == 2:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'ISFJ') | (df['personality_type'] == 'ESFJ')]
    elif selection == 3:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'INFJ') | (df['personality_type'] == 'INFP')]
    elif selection == 4:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'INTJ') | (df['personality_type'] == 'ENFP')]
    elif selection == 5:
        df_filtered = df[(df['business_city'] == loc) & (df['personality_type'] == 'ESFP') | (df['personality_type'] == 'ENFJ')]

    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(df_filtered['cleaned_text'])

    n_components = 100
    svd = TruncatedSVD(n_components=n_components)
    svd_matrix = svd.fit_transform(tfidf_matrix)

    cos_sim = cosine_similarity(svd_matrix, svd_matrix)
    avg_similarity = cos_sim.mean(axis=0)

    # Recommend restaurants with highest average similarity
    recommended_indices = avg_similarity.argsort()[:-11:-1]  # Top 5 recommendations
    recommended_restaurants = df_filtered.iloc[recommended_indices][['business_name', 'text']]
    print("Here are some of the restaurants you might like: ")
    return recommended_restaurants[['business_name', 'text']]

In [59]:
personality_recommendation_with_svd(p)

What type of dining experiences resonates most with you?
1. I'm always eager to try new and adventurous dishes, exploring diverse cuisines and dining atmospheres.
2. I prefer cozy, familiar settings where I can enjoy classic comfort foods and unwind with my favorite dishes.
3. I prioritize health and wellness in my dining choices, opting for nutritious options and mindful eating experiences.
4. I love experimenting with different flavors and culinary techniques, seeking out unique dining experiences and culinary challenges.
5. Dining out is a social occasion for me, and I enjoy vibrant, lively atmospheres where I can connect with friends and family over great food.
We see you are The Social Foodie
Here are some of the restaurants you might like: 


Unnamed: 0,business_name,text
36051,Osteria,i really love osteria i always forget about it...
45072,Mama Carolla's,its like stepping into a tuscan cottage the at...
46307,Seoul Garden,this review is going to include a bit of backg...
50676,The Lemon Bar,great food and nice atmosphere await at this r...
14922,Thai Chef & Noodle Fusion,this is definitely one of my favorite thai res...
98789,Noble Crust,first time dining here in person the first tim...
44571,Pomodoro East,the food is great you dont want to miss happy ...
38708,Blood & Sand,passion x blood sand \n\nblood sand is whe...
17085,IndeBlue Modern Indian Food & Spirits,i have been meaning to go to indeblue with my ...
93972,Commander's Palace,fun place apparently a staple in the nola comm...


In [60]:
recom_restaurants = personality_recommendation_with_svd(p)
print(recom_restaurants)

What type of dining experiences resonates most with you?
1. I'm always eager to try new and adventurous dishes, exploring diverse cuisines and dining atmospheres.
2. I prefer cozy, familiar settings where I can enjoy classic comfort foods and unwind with my favorite dishes.
3. I prioritize health and wellness in my dining choices, opting for nutritious options and mindful eating experiences.
4. I love experimenting with different flavors and culinary techniques, seeking out unique dining experiences and culinary challenges.
5. Dining out is a social occasion for me, and I enjoy vibrant, lively atmospheres where I can connect with friends and family over great food.
We see you are The Social Foodie
Here are some of the restaurants you might like: 
                   business_name  \
45072             Mama Carolla's   
36051                    Osteria   
46307               Seoul Garden   
98789                Noble Crust   
50676              The Lemon Bar   
14922  Thai Chef & Noodle F

In [62]:
print(recom_restaurants['text'].iloc[3])

first time dining here in person the first time we got delivery the food and service here never disappoints it was a saturday night when we went so the place was very packed we sat outside at the picnic benches which were spaced  feet apart 

the food came out pretty slow but thats because it was very busy with the people dining in and the takeout the appetizers were excellent flavorful good portions and cooked so well i loved the fried tomato pork belly and the meatballs the fries were really well seasoned too the prices are average here around  for an appetizer but you get a decent size 

i got the fried chicken parm with spaghetti and it was one of the best juicy tender chicken ive ever had the sauce was delicious pasta was cooked just right not too soft or hard i also ordered the house salad and the house vinaigrette was very good i am picky about salad dressings but theirs was amazing

this restaurant followed all the rules social distancing staff wore masks and gloves the whole t