# Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import seaborn as sns
import csv
from ast import literal_eval

#  &#187; METACRITIC DATASET  &#171;

In [2]:
metacritic_df = pd.read_csv("metacritic.csv")
metacritic_df = metacritic_df[['game_title', 'genre', 'platforms', 'developers',
       'release_year', 'release_month', 'release_day',
       'average_metascore_ratings']]

#Convert columns with string values to lists
for column in ['genre', 'platforms', 'developers']:
    metacritic_df[column] = metacritic_df[column].apply(literal_eval)


#Preview of first 5 rows
metacritic_df.head(5)

Unnamed: 0,game_title,genre,platforms,developers,release_year,release_month,release_day,average_metascore_ratings
0,Journey,"[Action, Platformer, 3D]","[PlayStation 4, PC, PlayStation 3]",[Tricky Pixels],2015,7,21,92.0
1,Celeste,"[Action, Platformer, 2D]","[PlayStation 4, iPhone/iPad, PC, Switch, Xbox ...",[Matt Makes Games Inc.],2018,1,25,91.25
2,INSIDE,"[Action, Platformer, 2D]","[PlayStation 4, iPhone/iPad, PC, Switch, Xbox ...",[PLAYDEAD],2016,8,23,90.5
3,Overwatch,"[Tactical, Shooter, Action, First-Person]","[PlayStation 4, PC, Xbox One]",[Blizzard Entertainment],2016,5,23,90.67
4,Shovel Knight,"[Action, Platformer, 2D]","[PlayStation 4, 3DS, PC, PlayStation 3, PlaySt...",[Yacht Club Games],2015,4,21,88.22


#  &#187; USER INPUT ANALYSIS &#171;

<strong>Idea:</strong> A user would input three video game titles as basis for the recommendation engine 

## Example of user input

In [3]:
user_input = ["DOOM",'League of Legends', 'The Elder Scrolls V: Skyrim Special Edition']

# Grabs rows from the metacritic data frame based on the user's inputted games
def user_game_data(df,titles):
    data_frame = df[df["game_title"].isin(titles)]
    data_frame.index = range(len(data_frame.index))
    return data_frame

#Data frame generated from grabbing data
user_df = user_game_data(metacritic_df, user_input)
user_df

Unnamed: 0,game_title,genre,platforms,developers,release_year,release_month,release_day,average_metascore_ratings
0,DOOM,"[Arcade, Third-Person, Action, First-Person, S...","[PlayStation 4, PC, Switch, Xbox One]",[id Software],2016,5,13,84.45
1,The Elder Scrolls V: Skyrim Special Edition,"[Western-Style, Role-Playing]","[PlayStation 4, PC, Switch, Xbox One]",[Bethesda Game Studios],2016,10,28,79.0
2,League of Legends,"[Role-Playing, MOBA, Real-Time, Strategy, Acti...",[PC],[Riot Games],2009,10,27,78.0


## Analysis of user's inputted video games

#### Function for taking unique items in each column and generating a new dataframe

In [4]:
def count_items(df,title):
    dic = {}
    column = df[title]
    for row in column:
        for item in row:
            if item not in dic:
                dic[item] = 1
            else:
                dic[item] +=1
    return pd.DataFrame({title : list(dic.keys()), "count": list(dic.values())}).sort_values(by='count', ascending = False)

In [5]:
user_genres = count_items(user_df, 'genre')
user_platforms = count_items(user_df, 'platforms')
user_developers = count_items(user_df, 'developers')

In [6]:
user_genres

Unnamed: 0,genre,count
7,Role-Playing,2
0,Arcade,1
1,Third-Person,1
2,Action,1
3,First-Person,1
4,Shooter,1
5,Sci-Fi,1
6,Western-Style,1
8,MOBA,1
9,Real-Time,1


## &#187; RECOMMEND SIMILAR GAMES (UNWEIGHTED) &#171;

<strong>Idea:</strong> Find games with similar genres without the account of having prominent genres, platforms, and developers of inputted video game titles

#### Function to find distance/count of similaraties for each category (genres, platforms, developers)

In [7]:
def unweighted_distances(u_df, meta_df):
    meta_df_length = len(meta_df)
    user_genres = count_items(u_df, 'genre')
    user_platforms = count_items(u_df, 'platforms')
    user_developers = count_items(u_df, 'developers')
    
    # Helper Function
    def count_similar(column, category):
        items = list(category.iloc[:,0])
        array = []
        for row in column:
            count = 0
            for item in items:
                if item in row:
                    count += 1
            array.append(count)  
        return array
    
    def find_distance():
        genre_similar_counts = np.array(count_similar(list(meta_df['genre']),user_genres))
        platform_similar_counts = np.array(count_similar(list(meta_df['platforms']),user_platforms))
        developer_similar_counts = np.array(count_similar(list(meta_df['developers']),user_developers))
        distance_array = []
        
        for i in range(meta_df_length):
            sqr_diff_genres = np.square(len(user_genres) - genre_similar_counts[i])
            sqr_diff_platforms = np.square(len(user_platforms) - platform_similar_counts[i])
            sqr_diff_developers = np.square(len(user_developers) - developer_similar_counts[i])
            distance = np.sqrt(sqr_diff_genres + sqr_diff_platforms  + sqr_diff_developers)
            distance_array.append(distance)
        return distance_array

    return pd.DataFrame({'game_title' :list(meta_df['game_title']), 'distance': find_distance() }).sort_values(by='distance')

In [8]:
def recommend_unweighted(u_df, meta_df):
    suggest_df = meta_df[-meta_df["game_title"].isin(user_input)]
    suggest_df = suggest_df[suggest_df.average_metascore_ratings >= 75]
    distances_df = unweighted_distances(u_df, suggest_df)
    return distances_df[['game_title']]

### Top Recommended Games(Based on: "DOOM", "League of Legends", "TESV: Skyrim" )

NOTE: MINIMUM METASCORE RATING SET TO 75

In [9]:
unweighted_df = recommend_unweighted(user_df,metacritic_df).head(20)
unweighted_df

Unnamed: 0,game_title
1217,Super Monday Night Combat
1245,Iron Brigade
2447,ZombiU
1206,Metro: Last Light Redux
868,Metro: 2033 Redux
227,Evolve
568,Titanfall
878,Borderlands 2
106,Helldivers
222,LawBreakers


## &#187; RECOMMEND SIMILAR GAMES (WEIGHTED) &#171;

<strong>Idea:</strong> Find games with similar genres with the account of having prominent genres and developers of inputted video game titles

#### Function to find distance/count of similaraties for each category (genres, platforms, developers) *subject to change


In [10]:
def weighted_distances(u_df, meta_df):
    meta_df_length = len(meta_df)
    user_genres = count_items(u_df, 'genre')
    user_platforms = count_items(u_df, 'platforms')
    user_developers = count_items(u_df, 'developers')
    
    # Helper Functions
    def count_weighted(column, category):
        items = list(category.iloc[:,0])
        array = []
        for row in column:
            count = 0
            for item in items:
                if item in row:
                    count += list(category.loc[category[category.columns[0]] == item]['count'])[0]
            array.append(count)  
        return array
    
    def find_distance():
        genre_similar_counts = np.array(count_weighted(list(meta_df['genre']),user_genres))
        platform_similar_counts = np.array(count_weighted(list(meta_df['platforms']),user_platforms))
        developer_similar_counts = np.array(count_weighted(list(meta_df['developers']),user_developers))
        distance_array = []
        
        for i in range(meta_df_length):
            sqr_diff_genres = np.square(sum(list(user_genres['count'])) - genre_similar_counts[i])
            sqr_diff_platforms = np.square(sum(list(user_platforms['count'])) - platform_similar_counts[i])
            sqr_diff_developers = np.square(sum(list(user_developers['count'])) - developer_similar_counts[i])
            distance = np.sqrt(sqr_diff_genres + sqr_diff_platforms  + sqr_diff_developers)
            distance_array.append(distance)
        return distance_array

    return pd.DataFrame({'game_title' :list(meta_df['game_title']), 'distance': find_distance() }).sort_values(by='distance')

In [11]:
def recommend_weighted(u_df, meta_df):
    suggest_df = meta_df[-meta_df["game_title"].isin(user_input)]
    suggest_df = suggest_df[suggest_df.average_metascore_ratings >= 75]
    distances_df = weighted_distances(u_df, suggest_df)
    return distances_df[['game_title']]

### Top Recommended Games (Based on: "DOOM", "League of Legends", "TESV: Skyrim" )

NOTE: MINIMUM METASCORE RATING SET TO 75

In [12]:
weighted_df = recommend_weighted(user_df,metacritic_df).head(20)
weighted_df

Unnamed: 0,game_title
1206,Metro: Last Light Redux
227,Evolve
2447,ZombiU
868,Metro: 2033 Redux
878,Borderlands 2
222,LawBreakers
106,Helldivers
568,Titanfall
178,PlanetSide 2
232,DOOM: Unto The Evil


## &#187; WEIGHTED AND UNWEIGHTED COMPARISON &#171;

In [13]:
weighted_and_unweighted_df = pd.DataFrame({'weighted':list(weighted_df['game_title']),'unweighted':list(unweighted_df['game_title'])})
weighted_and_unweighted_df

Unnamed: 0,weighted,unweighted
0,Metro: Last Light Redux,Super Monday Night Combat
1,Evolve,Iron Brigade
2,ZombiU,ZombiU
3,Metro: 2033 Redux,Metro: Last Light Redux
4,Borderlands 2,Metro: 2033 Redux
5,LawBreakers,Evolve
6,Helldivers,Titanfall
7,Titanfall,Borderlands 2
8,PlanetSide 2,Helldivers
9,DOOM: Unto The Evil,LawBreakers
