In [1]:
import requests
import pymongo
import pandas as pd
import pprint
import numpy as np

In [61]:
! pwd

/Users/nick0lish/galvanize/Video-Game-Recommender


In [7]:
class ScrapingException(Exception):
    pass

def api_request(url, params=None):
    """Make an IGDB API request."""
    headers = {'Accept': 'application/json', 
               "user-key": '1cca0ffb6c7bbc063e1ae12727218933'}
    response = requests.get(url, headers=headers, params=params)
    if response.status_code != 200:
        raise ScrapingException(f"Error: status code {response.status_code}\n\nContent:\n{response.content}")
    return response.json()

In [8]:
def api_games(game_id=""):
    """Get game data for game_id."""
    url = f"https://api-2445582011268.apicast.io/games/{game_id}"
    return api_request(url)

In [9]:
def api_platforms(platform_id=""):
    """Get platform data for platform_id."""
    url = f"https://api-2445582011268.apicast.io/platforms/{platform_id}"
    return api_request(url)

In [10]:
mc = pymongo.MongoClient()
db = mc['ps4_game_data']

In [11]:
games = db['games']
platform = db['platform']

In [13]:
ps4 = api_platforms(48)

In [14]:
ps4[0]

{'id': 48,
 'name': 'PlayStation 4',
 'logo': {'url': '//images.igdb.com/igdb/image/upload/t_thumb/kjp5zjzy8omfm8kgxarx.jpg',
  'cloudinary_id': 'kjp5zjzy8omfm8kgxarx',
  'width': 600,
  'height': 102},
 'slug': 'ps4--1',
 'url': 'https://www.igdb.com/platforms/ps4--1',
 'created_at': 1326544944299,
 'updated_at': 1433175597593,
 'website': 'http://www.playstation.com/ps4/',
 'summary': 'The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. PS4 puts gamers first with an astounding launch line-up and over 180 games in development. Play amazing top-tier blockbusters and innovative indie hits on PS4. Developer inspired, gamer focused.',
 'alternative_name': 'PS4',
 'generation': 8,
 'games': [2033,
  8500,
  2957,
  1979,
  2920,
  3152,
  16735,
  3256,
  18655,
  3094,
  4,
  16765,
  6632,
  4756,
  4754,
  1887,
  6030,
  4755,
  21729,
  4071,
  386,
  8992,
  5895,
  17522,
  1968,
  3042,
  7574

In [15]:
platform.insert_one(ps4[0])

<pymongo.results.InsertOneResult at 0x116796f48>

In [16]:
ps4_ids = (ps4[0]['games'])

In [17]:
len(ps4_ids)

2470

In [36]:
#for game_id in ps4_ids:
 #   games.insert_many(api_games(game_id))

In [18]:
def get_game_info(key="name", value=""):
    """Returns all game information for game given the key & value in the game dict"""
    return games.find_one({key : value})

In [19]:
get_game_info(value='Persona 5')

{'_id': ObjectId('5bf5d5aa933deaf65348d408'),
 'id': 9927,
 'name': 'Persona 5',
 'slug': 'persona-5',
 'url': 'https://www.igdb.com/games/persona-5',
 'created_at': 1430168520169,
 'updated_at': 1542639538774,
 'summary': 'Persona 5 is a role-playing game developed by Atlus. It is chronologically the sixth installment in the Persona series, which is part of the larger Megami Tensei franchise. Persona 5 is a role-playing game in which players live out a year in the life of a high school boy who gains the ability to summon facets of his psyche, known as Personas. Dungeon exploration features additional elements from previous iterations, such as jumping across gaps or dashing between cover. Dungeons feature a mixture of fixed environments tied into the plot and theme, and randomly generated environments. Battles are based on a turn-based attack system, with the characters wielding both their Personas and two weapon types: a gun and a melee weapon. The Social Link element from Persona 3 a

For more info on any key in game dict:

https://igdb.github.io/api/endpoints/game/

In [20]:
useful_columns = [
    'player_perspectives',
    'game_modes',
    'themes',
    'genres'
]

In [21]:
get_game_info(key='id', value=2933)

{'_id': ObjectId('5bf5d544933deaf65348d350'),
 'id': 2933,
 'name': 'Kingdom Hearts III',
 'slug': 'kingdom-hearts-iii',
 'url': 'https://www.igdb.com/games/kingdom-hearts-iii',
 'created_at': 1378667202138,
 'updated_at': 1542541061940,
 'summary': 'Kingdom Hearts III is the tenth main installment in the Kingdom Hearts series. It focuses on a boy named Sora and his friends Donald and Goofy, as they travel to many different worlds, many of them Disney-inspired.',
 'collection': 272,
 'franchise': 26,
 'franchises': [26],
 'hypes': 96,
 'popularity': 1459.333333333333,
 'games': [18812, 68441, 1223, 28168, 22387, 1864, 9611, 28309, 55092, 27209],
 'tags': [1,
  268435468,
  536870990,
  536871208,
  536871557,
  536871872,
  536871938,
  536871987,
  536873066,
  536873154,
  536873384,
  536873422,
  536874398,
  536874745,
  536875011,
  536875043,
  536875073,
  536875074,
  536875081,
  536875265,
  536875437,
  536875819,
  536875830,
  536875886,
  536875934,
  536876007,
  536876

In [22]:
df = pd.DataFrame(list(games.find()))

In [23]:
def make_id_list(column):
    id_list=[]
    for item in column:
        if type(item)==list:
            for entry in item:
                if entry not in id_list:
                    id_list.append(entry)
    (id_list.sort())
    return id_list

In [24]:
def fill_nans(df):
    useful_columns = [ 'player_perspectives',
                        'game_modes',
                        'themes',
                        'genres']
    
    for col in useful_columns:
        df[col].fillna(0)
    return df

In [25]:
new_df = fill_nans(df)

In [26]:
for column in useful_columns:
    print(column, make_list(new_df[column]))

player_perspectives [1, 2, 3, 4, 5, 6, 7]
game_modes [1, 2, 3, 4, 5]
themes [1, 17, 18, 19, 20, 21, 22, 23, 27, 28, 31, 32, 33, 34, 35, 38, 39, 40, 41, 42, 43]
genres [2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 30, 31, 32, 33]


In [27]:
genres = [2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 30, 31, 32, 33]


Looks like the games in the database are just purchaseable dlc skin packs for the most part.

In [28]:
#new_df[new_df.genres==0]['name']

In [29]:
def is_val_in_col(column, i):
    if type(column)!=list:
        return 0
    elif i in column:
        return 1
    else:
        return 0

In [30]:
for column in useful_columns:
    for i in make_list(new_df[column]):
        new_df[f'{column}_{i}'] = new_df[column].apply(lambda col: is_val_in_col(col, i))

Dropping rows where the category is 1, 2 or 3:
1 = DLC/Add-On
2 = Expansion
3 = Bundle

In [31]:
 drop_df = new_df.drop(new_df[new_df.category == 3].index)

In [32]:
drop_df = drop_df.drop(drop_df[drop_df.category==2].index)

In [33]:
drop_df = drop_df.drop(drop_df[drop_df.category==1].index)

In [34]:
drop_df = (drop_df[drop_df.version_parent.isnull()])

In [35]:
similarity_df = drop_df.iloc[:,53:]

In [36]:
similarity_df['name'] = new_df['name']

In [37]:
similarity_df.columns

Index(['player_perspectives_1', 'player_perspectives_2',
       'player_perspectives_3', 'player_perspectives_4',
       'player_perspectives_5', 'player_perspectives_6',
       'player_perspectives_7', 'game_modes_1', 'game_modes_2', 'game_modes_3',
       'game_modes_4', 'game_modes_5', 'themes_1', 'themes_17', 'themes_18',
       'themes_19', 'themes_20', 'themes_21', 'themes_22', 'themes_23',
       'themes_27', 'themes_28', 'themes_31', 'themes_32', 'themes_33',
       'themes_34', 'themes_35', 'themes_38', 'themes_39', 'themes_40',
       'themes_41', 'themes_42', 'themes_43', 'genres_2', 'genres_4',
       'genres_5', 'genres_7', 'genres_8', 'genres_9', 'genres_10',
       'genres_11', 'genres_12', 'genres_13', 'genres_14', 'genres_15',
       'genres_16', 'genres_24', 'genres_25', 'genres_26', 'genres_30',
       'genres_31', 'genres_32', 'genres_33', 'name'],
      dtype='object')

In [38]:
sim_matrix = (similarity_df.values)[:,:-1]

In [39]:
def find_most_similar(title=""):
    sim_matrix = (similarity_df.values)[:,:-1]
    
    target = (similarity_df[similarity_df.name == title].values)[0]
    jaccard_scores=[]
    
    for i in range(len(sim_matrix)):
        jaccard_scores.append( (sim_matrix[i] & target[:-1]).sum() /
                               (sim_matrix[i] | target[:-1]).sum())
        
        
    idx =  np.argsort(-(np.array(jaccard_scores)))[1]
    
    return similarity_df.iloc[idx,:]['name']

In [40]:
find_most_similar("Everybody's Golf")

"New Everybody's Golf"

In [42]:
get_game_info(key='name', value="Overwatch")

{'_id': ObjectId('5bf5d4cc933deaf65348d271'),
 'id': 8173,
 'name': 'Overwatch',
 'slug': 'overwatch',
 'url': 'https://www.igdb.com/games/overwatch',
 'created_at': 1415390671692,
 'updated_at': 1542833606140,
 'summary': 'In Overwatch, you control one of several heroes in competitive 6-person team shooting matches. Battle over objectives, take down the other team, and achieve victory. \n \nIn Overwatch, heroes do battle in diverse locations around the world. From the technological marvel of Numbani to the manufacturing powerhouse of Volskaya, each map has a unique layout and specific win conditions that your team must meet in order to secure victory.',
 'storyline': "Soldiers. Scientists. Adventurers. Oddities. \n \nIn a time of global crisis, an international task force of heroes banded together to restore peace to a war-torn world: \n \nOVERWATCH. \n \nIt ended the crisis and helped to maintain peace in the decades that followed, inspiring an era of exploration, innovation, and dis