In [3]:
import numpy as np #conda install -c anaconda numpy
import pandas as pd #conda install -c anaconda pandas
import seaborn as sns #conda install -c anaconda seaborn
from sqlalchemy import create_engine #conda install -c anaconda sqlalchemy
from sklearn.metrics.pairwise import cosine_similarity #conda install -c anaconda scikit-learn
from numpy import dot
from numpy.linalg import norm

In [4]:
host = 'localhost'  # localhost本地端
user = 'root'
password = ''
db_name = 'user_info'
port = '3306'  #3306

connection_string = f'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{db_name}'
engine = create_engine(connection_string)

In [6]:
sql_query = 'SELECT rID,rName,all_label FROM res_info;'
res_info = pd.read_sql_query(sql_query, engine)
sql_query = 'SELECT uID,rID,rating FROM cost_detail;'
cost_detail = pd.read_sql_query(sql_query, engine)

res_rating = pd.merge(cost_detail[["uID","rID","rating"]], res_info[["rID","all_label"]], on='rID')
dummies = res_rating["all_label"].str.get_dummies(',')
df = pd.concat([res_rating, dummies], axis=1)
df.drop(['all_label'],axis=1,inplace=True)

print(df.iloc[:5,:10])

   uID  rID  rating  三明治  中式  丼飯  便當  健康餐  冰淇淋  咖哩飯
0   16   52       1    0   1   0   0    1    0    0
1   14   52       2    0   1   0   0    1    0    0
2   16   52       4    0   1   0   0    1    0    0
3   16   52       4    0   1   0   0    1    0    0
4   10   52       1    0   1   0   0    1    0    0


In [9]:
def find_common_res(user1,user2):
    """Find movies that both users have watched"""
    s1 = set((df.loc[df["uID"]==user1,"rID"].values))
    s2 = set((df.loc[df["uID"]==user2,"rID"].values))
    return s1.intersection(s2)

def cal_similarity_for_res_ratings(user1,user2,res_id,method="cosine"):
    """Calculate the similarity for res ratings between user1 and user2"""
    u1 = df[df["uID"]==user1]
    u2 = df[df["uID"]==user2]
    vec1 = u1[u1.rID.isin(res_id)].sort_values(by="rID")["rating"].values
    vec2 = u2[u2.rID.isin(res_id)].sort_values(by="rID")["rating"].values
    if method=="cosine":        
        return dot(vec1, vec2)/(norm(vec1)*norm(vec2))
    return None

def find_the_most_similar_users(user, num):
    # Calculate the similarity between the user and other users
    similarities = []
    user_ids = []
    for other_user in df.uID.unique():
        if other_user == user:
            continue
        
        common_res = find_common_res(user,other_user)
        if len(common_res)<10:
            sim = 0
        else:
            sim = cal_similarity_for_res_ratings(user,other_user,common_res)
        
        similarities.append(sim)
        user_ids.append(other_user)
            
    # Find top n similar users
    similarities,user_ids = np.array(similarities),np.array(user_ids)
    sorted_index = (np.argsort(similarities)[::-1][:num]).tolist()
    most_similar_users = user_ids[sorted_index] 
    return most_similar_users

In [10]:
user = 1
num = 15
similar_users = find_the_most_similar_users(user,num)

print(f"user: {user}")
print(f"The most similar users: {similar_users}")

user: 1
The most similar users: [ 3 12 14 10  6 13 15  5  2  7 17 20  9  8 18]


In [11]:
def recommend(user,similar_users ,top_n=10):
    # Find the restaurant the user hasn't seen and the similar users have seen.
    seen_res = np.unique(df.loc[df["uID"]==user,"rID"].values)
    not_seen_cond = df["rID"].isin(seen_res)==False
    similar_cond = df["uID"].isin(similar_users)
    not_seen_res_ratings = df[not_seen_cond & similar_cond][["rID","rating"]]
    
    # Find average ratings by the most similar users
    average_ratings = not_seen_res_ratings.groupby("rID").mean()
    average_ratings.reset_index(inplace=True)
    top_ratings = average_ratings.sort_values(by="rating",ascending=False).iloc[:top_n]
    top_ratings.reset_index(inplace=True,drop=True)
    return top_ratings

In [12]:
top_n = 10
top_ratings = recommend(user,similar_users, top_n)
print(f"Top-{top_n} average ratings by the most simiilar users:")
print(pd.merge(top_ratings, res_info[["rID","rName"]], on='rID'))

Top-10 average ratings by the most simiilar users:
   rID    rating                                              rName
0   51  5.000000                                            Mica 蔬食
1   10  5.000000                                          18度C巧克力工房
2   20  4.000000                                              520餐廚
3   36  4.000000                            Bon Voyage Coffee 樂悠遊咖啡
4   49  4.000000                                     La Bello 美妍冰淇淋
5   48  3.833333  L Kitchen 佬廚坊 (寵物餐廳，港式餐點) [座位跟人手限制，一組客人超過6位，無法...
6   54  3.800000        Montréal Reservation Restaurant (蒙特利爾預約制餐廳)
7    9  3.666667                                           132巷咖啡簡餐
8   12  3.500000                                             24鬆餅小舖
9   18  3.500000                                          50嵐 埔里中山店
