In [1]:
# 라이브러리 호출
import ast
import datetime
import pandas as pd
from argparse import ArgumentParser

import warnings

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# 1. 기존 데이터 프레임 불러오기
csv_path = "../data/cleansing_game_data.csv"
games_df = pd.read_csv(csv_path, index_col=0)
recommend_num = 10

In [3]:
games_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
0,741344,PC,Not Rated,0.00,,"Shooter, ,"
1,374441,Web,Not Rated,0.00,Singleplayer,"Shooter, ,Singleplayer,Space,shotter"
2,97470,PC,Not Rated,0.00,Boss Rush,"Action,Platformer,Shooter,Sci-Fi,2D,Sci-fi,Pix..."
3,306287,iOS,Not Rated,0.00,,"Action,Arcade,Casual, ,friends,fun,scratch"
4,176964,"PC,MacOS,Linux,Web",Not Rated,0.00,,"Action, ,2D,Score Attack,Black and White"
...,...,...,...,...,...,...
550559,881,"PC,Xbox One",Everyone,4.36,"Multiplayer,Singleplayer","Arcade,Open world,Racing,Open world,Singleplay..."
550560,795517,"Linux,MacOS,PC",Not Rated,0.00,"Adventure,Singleplayer,Story","Adventure,Horror,Anime,Comedy,Horror,Magic,Mys..."
550561,54811,Game Boy Color,Not Rated,0.00,"Multiplayer,Singleplayer","Puzzle, ,Singleplayer,exclusive,true exclusive..."
550562,283966,PlayStation Portable,Rating Pending,0.00,,"Racing, ,"


In [30]:
selected_values = {"platforms":["PC"],
                  "birth_year":[1954],
                  "rating":[3.4],
                  "mode":[None],
                  "genre":["Adventure,Indie"],
                  "theme":["Anime,Fantasy,Strategy"],
                  "tags":["2D,TRPG"]}

In [31]:
# 2. user_data
# 2-1) 입력값 데이터프레임으로 불러오기
user_data = pd.DataFrame(selected_values)

In [32]:
user_data

Unnamed: 0,platforms,birth_year,rating,mode,genre,theme,tags
0,PC,1954,3.4,,"Adventure,Indie","Anime,Fantasy,Strategy","2D,TRPG"


In [33]:
# 2-2) 결측치 처리
user_data["rating"] = user_data["rating"].fillna(3.5)
user_data["birth_year"] = user_data["birth_year"].fillna(datetime.datetime.now().year)
user_data = user_data.fillna("")

In [34]:
user_data

Unnamed: 0,platforms,birth_year,rating,mode,genre,theme,tags
0,PC,1954,3.4,,"Adventure,Indie","Anime,Fantasy,Strategy","2D,TRPG"


In [35]:
# 2-3) 변수 설정
user_platforms = user_data["platforms"][0]
user_birth_year = user_data["birth_year"][0]
user_rating = user_data["rating"][0]
user_mode = user_data["mode"][0]
user_genres = user_data["genre"][0]
user_themes = user_data["theme"][0]
user_tags = user_data["tags"][0]

In [36]:
user_tags

'2D,TRPG'

In [37]:
# 3. 데이터 필터링
# 3-1) Platforms
def filter_by_platforms(df, platform_filter):
    """
    default = ""
    값을 입력하지 않으면 모든 게임을 반환하고,
    값을 입력하면 입력된 platform이 포함된 데이터를 필터링한다.
    """
    if platform_filter == "":
        df = df
    else:
        platforms_filter_list = platform_filter.split(",")
        df = df[
            df["platforms"].apply(
                lambda x: all(
                    platform in x.split(",") for platform in platforms_filter_list
                )
            )
        ]
    return df


filter_p_df = filter_by_platforms(games_df, user_platforms)

In [38]:
filter_p_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
0,741344,PC,Not Rated,0.00,,"Shooter, ,"
2,97470,PC,Not Rated,0.00,Boss Rush,"Action,Platformer,Shooter,Sci-Fi,2D,Sci-fi,Pix..."
4,176964,"PC,MacOS,Linux,Web",Not Rated,0.00,,"Action, ,2D,Score Attack,Black and White"
6,957105,"Linux,MacOS,PC",Not Rated,0.00,Singleplayer,"Adventure, ,Singleplayer,Atmospheric,3D,Unity,..."
8,698979,PC,Not Rated,0.00,,"Strategy,Strategy,Real time strategy"
...,...,...,...,...,...,...
550551,5662,"PC,iOS",Mature,3.39,,"Action,Adventure, ,Blood,nightmare,terror"
550552,349557,"PC,MacOS",Not Rated,3.97,Singleplayer,"Adventure,Indie,Comedy,Detective,Mystery,Singl..."
550556,849517,"PlayStation 5,PC,Xbox Series X|S",Mature,3.51,,"Action,Adventure,Horror,Horror,Noir,Horror,Cin..."
550559,881,"PC,Xbox One",Everyone,4.36,"Multiplayer,Singleplayer","Arcade,Open world,Racing,Open world,Singleplay..."


In [39]:
# 3-2) esrb_rating
def filter_by_age(birth_year, df):
    """
    default=current_year
    값을 입력하지 않으면 전연령 게임만 반환하고,
    값을 입력하면 입력된 나이에 맞는 esrb_rating에 따라 데이터를 필터링한다.
    """
    current_year = datetime.datetime.now().year
    age = current_year - birth_year

    if age < 10:
        df = df[df["esrb_rating"] == "Everyone"]
    elif age < 13:
        df = df[df["esrb_rating"].isin(["Everyone 10+", "Everyone"])]
    elif age < 17:
        df = df[df["esrb_rating"].isin(["Teen", "Everyone 10+", "Everyone"])]
    elif age < 18:
        df = df[df["esrb_rating"].isin(["Mature", "Teen", "Everyone 10+", "Everyone"])]
    else:
        df = df
        age = "20+"
    return df


filter_pa_df = filter_by_age(user_birth_year, filter_p_df)

In [40]:
filter_pa_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
0,741344,PC,Not Rated,0.00,,"Shooter, ,"
2,97470,PC,Not Rated,0.00,Boss Rush,"Action,Platformer,Shooter,Sci-Fi,2D,Sci-fi,Pix..."
4,176964,"PC,MacOS,Linux,Web",Not Rated,0.00,,"Action, ,2D,Score Attack,Black and White"
6,957105,"Linux,MacOS,PC",Not Rated,0.00,Singleplayer,"Adventure, ,Singleplayer,Atmospheric,3D,Unity,..."
8,698979,PC,Not Rated,0.00,,"Strategy,Strategy,Real time strategy"
...,...,...,...,...,...,...
550551,5662,"PC,iOS",Mature,3.39,,"Action,Adventure, ,Blood,nightmare,terror"
550552,349557,"PC,MacOS",Not Rated,3.97,Singleplayer,"Adventure,Indie,Comedy,Detective,Mystery,Singl..."
550556,849517,"PlayStation 5,PC,Xbox Series X|S",Mature,3.51,,"Action,Adventure,Horror,Horror,Noir,Horror,Cin..."
550559,881,"PC,Xbox One",Everyone,4.36,"Multiplayer,Singleplayer","Arcade,Open world,Racing,Open world,Singleplay..."


In [41]:
# 3-3) rating
def filter_by_rating(rating, df):
    """
    default = 3.5
    입력된 rating 이상의 데이터만 남도록 필터링한다.
    """
    df = df[df["rating"] >= rating]
    return df


filter_par_df = filter_by_rating(user_rating, filter_pa_df)

In [42]:
filter_par_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
43,10813,PC,Teen,3.67,"Singleplayer,Story","Adventure,Horror,Horror,Mystery,Singleplayer,S..."
340,619475,"PC,PlayStation 4,Xbox One,Nintendo DS",Not Rated,3.71,"Multiplayer,PvP,Singleplayer,Split screen,Story","Action,Adventure,Arcade,Indie,RPG,Sports,Anime..."
345,379556,"Xbox One,Nintendo DS,PC",Teen,3.74,"Roguelike,Role-Playing,Singleplayer,Survival","Adventure,RPG,Role-Playing,Simulation,Strategy..."
434,42570,"PC,PlayStation 3",Not Rated,3.45,,"Action,RPG,Shooter, ,"
438,58637,"Xbox One,PC,PlayStation 4",Not Rated,3.54,"Co-operative,Multiplayer,Singleplayer","Action,Action,RPG,RPG,Action,Anime,Fantasy,Sin..."
...,...,...,...,...,...,...
550546,3759,"Xbox 360,PC,PlayStation 3,Xbox One",Adults Only,3.77,"Singleplayer,Story","Action,Action,RPG,Adventure,Horror,Metroidvani..."
550549,12908,PC,Not Rated,4.11,"Competitive,Multiplayer,Sandbox,Singleplayer,S...","Competitive,Racing,Sandbox,Sports,Sandbox,VR,S..."
550552,349557,"PC,MacOS",Not Rated,3.97,Singleplayer,"Adventure,Indie,Comedy,Detective,Mystery,Singl..."
550556,849517,"PlayStation 5,PC,Xbox Series X|S",Mature,3.51,,"Action,Adventure,Horror,Horror,Noir,Horror,Cin..."


In [43]:
# 3-4) mode
def filter_by_mode(mode, df):
    """
    default = " "
    값을 입력하지 않으면 데이터 전부를 반환하고,
    값을 입력하면 입력된 mode에 맞게 데이터를 필터링한다.
    """
    if mode == " ":
        df = df
    else:
        df = df[df["mode"].apply(lambda x: mode in x)]
    return df


filter_parm_df = filter_by_mode(user_mode, filter_par_df)

In [44]:
filter_parm_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
43,10813,PC,Teen,3.67,"Singleplayer,Story","Adventure,Horror,Horror,Mystery,Singleplayer,S..."
340,619475,"PC,PlayStation 4,Xbox One,Nintendo DS",Not Rated,3.71,"Multiplayer,PvP,Singleplayer,Split screen,Story","Action,Adventure,Arcade,Indie,RPG,Sports,Anime..."
345,379556,"Xbox One,Nintendo DS,PC",Teen,3.74,"Roguelike,Role-Playing,Singleplayer,Survival","Adventure,RPG,Role-Playing,Simulation,Strategy..."
434,42570,"PC,PlayStation 3",Not Rated,3.45,,"Action,RPG,Shooter, ,"
438,58637,"Xbox One,PC,PlayStation 4",Not Rated,3.54,"Co-operative,Multiplayer,Singleplayer","Action,Action,RPG,RPG,Action,Anime,Fantasy,Sin..."
...,...,...,...,...,...,...
550546,3759,"Xbox 360,PC,PlayStation 3,Xbox One",Adults Only,3.77,"Singleplayer,Story","Action,Action,RPG,Adventure,Horror,Metroidvani..."
550549,12908,PC,Not Rated,4.11,"Competitive,Multiplayer,Sandbox,Singleplayer,S...","Competitive,Racing,Sandbox,Sports,Sandbox,VR,S..."
550552,349557,"PC,MacOS",Not Rated,3.97,Singleplayer,"Adventure,Indie,Comedy,Detective,Mystery,Singl..."
550556,849517,"PlayStation 5,PC,Xbox Series X|S",Mature,3.51,,"Action,Adventure,Horror,Horror,Noir,Horror,Cin..."


In [45]:
# 4. 인덱스 리셋
final_df = filter_parm_df.reset_index(drop=True)

In [46]:
final_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
0,10813,PC,Teen,3.67,"Singleplayer,Story","Adventure,Horror,Horror,Mystery,Singleplayer,S..."
1,619475,"PC,PlayStation 4,Xbox One,Nintendo DS",Not Rated,3.71,"Multiplayer,PvP,Singleplayer,Split screen,Story","Action,Adventure,Arcade,Indie,RPG,Sports,Anime..."
2,379556,"Xbox One,Nintendo DS,PC",Teen,3.74,"Roguelike,Role-Playing,Singleplayer,Survival","Adventure,RPG,Role-Playing,Simulation,Strategy..."
3,42570,"PC,PlayStation 3",Not Rated,3.45,,"Action,RPG,Shooter, ,"
4,58637,"Xbox One,PC,PlayStation 4",Not Rated,3.54,"Co-operative,Multiplayer,Singleplayer","Action,Action,RPG,RPG,Action,Anime,Fantasy,Sin..."
...,...,...,...,...,...,...
4526,3759,"Xbox 360,PC,PlayStation 3,Xbox One",Adults Only,3.77,"Singleplayer,Story","Action,Action,RPG,Adventure,Horror,Metroidvani..."
4527,12908,PC,Not Rated,4.11,"Competitive,Multiplayer,Sandbox,Singleplayer,S...","Competitive,Racing,Sandbox,Sports,Sandbox,VR,S..."
4528,349557,"PC,MacOS",Not Rated,3.97,Singleplayer,"Adventure,Indie,Comedy,Detective,Mystery,Singl..."
4529,849517,"PlayStation 5,PC,Xbox Series X|S",Mature,3.51,,"Action,Adventure,Horror,Horror,Noir,Horror,Cin..."


In [47]:
# 5. 벡터화
cv = CountVectorizer()
cv_matrix = cv.fit_transform(final_df["merge"])

In [48]:
# 6. 사용자 벡터
# 6-1) genres, themes 가중치를 주어 병합하기
def merge_column(g_weight, t_weight):
    temp_str = ""
    for j in range(len(user_genres.split(","))):
        for k in range(g_weight - j):
            temp_str += user_genres.split(",")[j] + ","
    for j in range(len(user_themes.split(","))):
        for k in range(t_weight - j):
            temp_str += user_themes.split(",")[j] + ","
    for j in range(len(user_tags.split(","))):
        temp_str += user_tags.split(",")[j] + ","

    return [temp_str]


user_taste = merge_column(3, 3)

In [49]:
user_taste

['Adventure,Adventure,Adventure,Indie,Indie,Anime,Anime,Anime,Fantasy,Fantasy,Strategy,2D,TRPG,']

In [50]:
# 6-2) 사용자 취향 벡터화
user_vector = cv.transform(user_taste)

In [51]:
# 7. 게임 추천
# 7-1) 코사인 유사도 계산하기
cos_sim = cosine_similarity(user_vector, cv_matrix)

In [52]:
# 7-2) 게임 추천 리스트
def reommendation(recommend_num, df, cos_sim):
    """
    코사인 유사도가 높은 게임을 recommend_num 개수만큼 추천한다.
    final_df 데이터가 recommend_num보다 적으면 final_df를 모두 반환한다.
    """
    pred_sim_games = list(enumerate(cos_sim[0]))
    sorted_pred_sim_games = sorted(pred_sim_games, key=lambda x: x[1], reverse=True)[1:]

    reommendation_list = []
    similarity_list =  []
    max_recommendations = min(recommend_num, len(df))

    for i, item in enumerate(sorted_pred_sim_games):
        if i >= max_recommendations:
            break
        recommendation_game = df[df.index == item[0]]["id"].values[0]
        similarity = str(round(item[1] * 100, 3)) + "%"
        reommendation_list.append(recommendation_game)
        similarity_list.append(similarity)

    return reommendation_list, similarity_list


recommendation_list, similarity_list = reommendation(recommend_num, final_df, cos_sim)

In [53]:
recommendation_list, similarity_list

([41561, 30843, 32142, 451523, 36505, 34647, 41449, 31397, 43090, 34977],
 ['68.229%',
  '65.653%',
  '65.653%',
  '64.594%',
  '64.327%',
  '55.709%',
  '55.709%',
  '55.709%',
  '55.709%',
  '55.709%'])

In [54]:
final_df

Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge
0,10813,PC,Teen,3.67,"Singleplayer,Story","Adventure,Horror,Horror,Mystery,Singleplayer,S..."
1,619475,"PC,PlayStation 4,Xbox One,Nintendo DS",Not Rated,3.71,"Multiplayer,PvP,Singleplayer,Split screen,Story","Action,Adventure,Arcade,Indie,RPG,Sports,Anime..."
2,379556,"Xbox One,Nintendo DS,PC",Teen,3.74,"Roguelike,Role-Playing,Singleplayer,Survival","Adventure,RPG,Role-Playing,Simulation,Strategy..."
3,42570,"PC,PlayStation 3",Not Rated,3.45,,"Action,RPG,Shooter, ,"
4,58637,"Xbox One,PC,PlayStation 4",Not Rated,3.54,"Co-operative,Multiplayer,Singleplayer","Action,Action,RPG,RPG,Action,Anime,Fantasy,Sin..."
...,...,...,...,...,...,...
4526,3759,"Xbox 360,PC,PlayStation 3,Xbox One",Adults Only,3.77,"Singleplayer,Story","Action,Action,RPG,Adventure,Horror,Metroidvani..."
4527,12908,PC,Not Rated,4.11,"Competitive,Multiplayer,Sandbox,Singleplayer,S...","Competitive,Racing,Sandbox,Sports,Sandbox,VR,S..."
4528,349557,"PC,MacOS",Not Rated,3.97,Singleplayer,"Adventure,Indie,Comedy,Detective,Mystery,Singl..."
4529,849517,"PlayStation 5,PC,Xbox Series X|S",Mature,3.51,,"Action,Adventure,Horror,Horror,Noir,Horror,Cin..."


In [55]:
import pandas as pd

# recommendation_list에 있는 id를 그대로 순서대로 정렬
final_df['id'] = pd.Categorical(final_df['id'], categories=recommendation_list, ordered=True)
result_df = final_df[final_df['id'].isin(recommendation_list)].sort_values('id')
result_df['similarity'] = similarity_list
print(recommendation_list)
result_df


[41561, 30843, 32142, 451523, 36505, 34647, 41449, 31397, 43090, 34977]


Unnamed: 0,id,platforms,esrb_rating,rating,mode,merge,similarity
820,41561,PC,Not Rated,4.09,,"Strategy,Action,Adventure,Anime,Fantasy,Magic,",68.229%
372,30843,PC,Not Rated,4.0,,"Adventure,Fantasy,",65.653%
3103,32142,PC,Not Rated,4.0,,"Adventure,Fantasy,",65.653%
3175,451523,"PC,Xbox 360,PlayStation 3",Teen,3.65,,"Action,Adventure,Action,Adventure,Anime,Fantasy,",64.594%
2405,36505,PC,Not Rated,3.7,,"Adventure,Arcade,Anime,",64.327%
7,34647,PC,Not Rated,3.83,,"Adventure, ,",55.709%
242,41449,"PC,PlayStation 3,Android",Mature,4.08,,"Adventure, ,",55.709%
268,31397,"Classic Macintosh,PC",Teen,4.38,,"Adventure, ,",55.709%
512,43090,"PC,PlayStation 4",Not Rated,3.67,,"Adventure, ,",55.709%
640,34977,PC,Not Rated,3.57,,"Adventure, ,",55.709%
