In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as soup
import lxml
from bs4 import NavigableString
import re

# Must gather rating data for all the games without data in the ratings_df in the exploratory analysis notebook.
# Must gather positive, neutral, and negative ratings numbers for both users and critics as well as the
# metascore and userscore

no_rating_df = pd.read_csv("../Resources/games_to_get_ratings.csv")

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
browser = Browser("chrome", executable_path="/usr/local/bin/chromedriver", headless=True)

In [4]:
games_df = pd.read_csv("../Resources/vgsales.csv.zip")
list_of_games = games_df["Name"].values.tolist()

In [5]:
## Navigating to the correct page via the search bar using splinter is too slow. I will rewrite the code to manually 
# write in the url using string interpolation. 
# I will need to interpolate the correct console name as well into the url. Metacritics url structure is:
# "https://www.metacritic.com/game/{console}/{game}"

In [6]:
# must change game names into the proper format to be interpolated into the URL.

game_conversions= {
    "PSV":"playstation-vita",
    "DC":"dreamcast",
    "WiiU":"wii-u",
    "GC":"gamecube",
    "N64":"nintendo-64",
    "XB":"xbox",
    "PSP":"psp",
    "PS4":"playstation-4",
    "PS": "playstation",
    "Wii":"wii",
    "PS3": "playstation-3",
    "PS2":"playstation-2",
    "GBA":"game-boy-advance",
    "DS":"ds",
    "XB":"xbox",
    "X360":"xbox-360"
}

def convert_to_name(platform_abb):
    if platform_abb in game_conversions:
        full_platform= game_conversions[platform_abb]
    else:
        full_platform="other"
    return full_platform
    

In [7]:
# This actually shows that the vast majority of our data belongs to the consoles listed on metacritic, these "legacy consoles"
# Get rid of those games which fall into the "other" category under full_platform_name
games_df["full_platform_name"]= games_df["Platform"].map(convert_to_name)
games_df = games_df.loc[games_df["full_platform_name"]!="other"]


In [8]:
games_df["Name"].tolist()

['Wii Sports',
 'Mario Kart Wii',
 'Wii Sports Resort',
 'New Super Mario Bros.',
 'Wii Play',
 'New Super Mario Bros. Wii',
 'Nintendogs',
 'Mario Kart DS',
 'Wii Fit',
 'Wii Fit Plus',
 'Kinect Adventures!',
 'Grand Theft Auto V',
 'Grand Theft Auto: San Andreas',
 'Brain Age: Train Your Brain in Minutes a Day',
 'Pokemon Diamond/Pokemon Pearl',
 'Grand Theft Auto V',
 'Grand Theft Auto: Vice City',
 'Pokemon Ruby/Pokemon Sapphire',
 'Pokemon Black/Pokemon White',
 'Brain Age 2: More Training in Minutes a Day',
 'Gran Turismo 3: A-Spec',
 'Call of Duty: Modern Warfare 3',
 'Call of Duty: Black Ops',
 'Call of Duty: Black Ops 3',
 'Call of Duty: Black Ops II',
 'Call of Duty: Black Ops II',
 'Call of Duty: Modern Warfare 2',
 'Call of Duty: Modern Warfare 3',
 'Grand Theft Auto III',
 'Super Smash Bros. Brawl',
 'Call of Duty: Black Ops',
 'Animal Crossing: Wild World',
 'Halo 3',
 'Grand Theft Auto V',
 'Pokemon HeartGold/Pokemon SoulSilver',
 'Super Mario 64',
 'Gran Turismo 4',
 'S

In [9]:
# This worked pretty well, but I will alter the code further see if I can increase my scraping success to fail ratio

In [10]:

problem_games = []
abberant_side_detail_games = []
game_info_list = []
counter = 1

# zip the two 
zipped_list =list(zip(games_df["full_platform_name"].tolist(),games_df["Name"].tolist()))

for platform, name in zipped_list[6000:9000]:
    game_scores = {}
    x = re.compile("\.|:|;|\s|&")
    edited_game_title= re.sub(x,"-", name).lower().replace("---","-").replace("--","-").replace("'", "")
    # periods at the end of the url will show up as a dash. This conditional is to get rid of it.
    if edited_game_title[-1] == "-":
        edited_game_title = edited_game_title[:-1]
        print(f"\nGAME TITLE: {edited_game_title}")
    else:
        print(f"\nGAME TITLE: {edited_game_title}")
    browser.visit(f"https://www.metacritic.com/game/{platform}/{edited_game_title}")
    # fill in my search term to the Metacritic Browser and us "\n" to press "enter" and initialize search
    
    game_scores["Name"]=name
    
    html= browser.html
    my_soup = soup(html, "lxml")
    # Grabbing the Metascore
    try:
        metascore = my_soup.find("a", class_="metascore_anchor").get_text()
        game_scores["metascore"]= metascore
    except:
        problem_games.append(name)
        print('THIS IS A PROBLEM CHILD')
        continue
    # Grabbing the Userscore
    try:
        userscore_first = my_soup.find("div", class_="details side_details")
        user_score = userscore_first.select("div.metascore_w")[0].get_text()
        game_scores["user_score"]= user_score
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing the release data
    try:
        product_data = my_soup.find("div",class_="product_data")
        release_data = product_data.find("li", class_="summary_detail release_data")
        release_date = release_data.find("span",class_="data").get_text()
        game_scores["release_date"]=release_date
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing the positive, mixed, and negative critic review numbers
    try:
        critic_reviews = []
        positive_scores = my_soup.find("div",class_="distribution_wrap")
        x = positive_scores.select("ol.score_counts.hover_none span.count")
        for i in x:
            critic_reviews.append(i.get_text())
        game_scores["positive_critics"] = critic_reviews[0]
        game_scores["neutral_critics"]=critic_reviews[1]
        game_scores["negative_critics"]=critic_reviews[2]
    except:
        problem_games.append(name)
        print('THIS IS A PROBLEM CHILD')
        continue
    # Grabbing the positive, mixed, and negative user review numbers
    try:
        user_reviews = []
        user_scores = my_soup.find("div",class_="user_reviews_module")
        drilling_down = user_scores.find("div",class_="distribution_wrap")
        ordered_list_html = drilling_down.select("ol.score_counts.hover_none span.count")
        for i in ordered_list_html:
            user_reviews.append(i.get_text())
        game_scores["positive_users"] = user_reviews[0]
        game_scores["neutral_users"]=user_reviews[1]
        game_scores["negative_users"]=user_reviews[2]
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing side details
    try:
        side_details = my_soup.find_all("div", class_="details side_details")[1]
        unordered_list = side_details.select("ul.summary_details li")
        developer = unordered_list[0].find("span", class_="data").get_text()
        game_scores["developer"]=developer
        number_players = unordered_list[2].find("span",class_="data").get_text()
        game_scores["number_players"]=number_players
        rating = unordered_list[4].find("span", class_="data").get_text()
        game_scores["rating"]=rating
        game_info_list.append(game_scores)
        print('SUCCESSFUL')
        print(f"This is the {counter}th game in the list.")
        counter+=1
    except:
        abberant_side_detail_games.append(game_scores)
        print("SUCCESS but no side details")
        continue
        
            
        

        
        


GAME TITLE: nascar-thunder-2004
SUCCESSFUL
This is the 1th game in the list.

GAME TITLE: buffy-the-vampire-slayer
SUCCESSFUL
This is the 2th game in the list.

GAME TITLE: lego-marvels-avengers
SUCCESS but no side details

GAME TITLE: star-wars-the-force-unleashed-ii
SUCCESS but no side details

GAME TITLE: harry-potter-and-the-goblet-of-fire
SUCCESSFUL
This is the 3th game in the list.

GAME TITLE: saint-seiya-sanctuary-battle
SUCCESS but no side details

GAME TITLE: meteos
SUCCESSFUL
This is the 4th game in the list.

GAME TITLE: virtua-tennis-4
SUCCESSFUL
This is the 5th game in the list.

GAME TITLE: kung-fu-panda-2
THIS IS A PROBLEM CHILD

GAME TITLE: cross-edge
SUCCESSFUL
This is the 6th game in the list.

GAME TITLE: hotel-for-dogs
THIS IS A PROBLEM CHILD

GAME TITLE: cabelas-big-game-hunter-2012
SUCCESS but no side details

GAME TITLE: pro-yakyuu-team-o-tsukurou!
THIS IS A PROBLEM CHILD

GAME TITLE: me-my-katamari
SUCCESSFUL
This is the 7th game in the list.

GAME TITLE: asph

THIS IS A PROBLEM CHILD

GAME TITLE: tiger-woods-pga-tour-2003
SUCCESSFUL
This is the 58th game in the list.

GAME TITLE: blowout
SUCCESSFUL
This is the 59th game in the list.

GAME TITLE: nfl-blitz-pro
SUCCESSFUL
This is the 60th game in the list.

GAME TITLE: mysims-party
SUCCESSFUL
This is the 61th game in the list.

GAME TITLE: tron-evolution-battle-grids
THIS IS A PROBLEM CHILD

GAME TITLE: the-raiden-project
THIS IS A PROBLEM CHILD

GAME TITLE: hooked!-real-motion-fishing
SUCCESSFUL
This is the 62th game in the list.

GAME TITLE: pocket-fighter
THIS IS A PROBLEM CHILD

GAME TITLE: bicycle-casino-2005
THIS IS A PROBLEM CHILD

GAME TITLE: valhalla-knights-2
SUCCESSFUL
This is the 63th game in the list.

GAME TITLE: -hack//g-u-vol-2//reminisce
THIS IS A PROBLEM CHILD

GAME TITLE: disgaea-afternoon-of-darkness
SUCCESSFUL
This is the 64th game in the list.

GAME TITLE: ratchet-clank-full-frontal-assault
SUCCESS but no side details

GAME TITLE: ncaa-football-09
SUCCESS but no side deta

SUCCESS but no side details

GAME TITLE: ecw-hardcore-revolution
THIS IS A PROBLEM CHILD

GAME TITLE: mega-man-64
SUCCESS but no side details

GAME TITLE: chou-kuukan-night-pro-yakyuu-king(higher-jp-sales)
THIS IS A PROBLEM CHILD

GAME TITLE: rampage-2-universal-tour
THIS IS A PROBLEM CHILD

GAME TITLE: ms-pac-man-maze-madness
SUCCESS but no side details

GAME TITLE: tales-of-graces
THIS IS A PROBLEM CHILD

GAME TITLE: age-of-empires-mythologies
SUCCESSFUL
This is the 115th game in the list.

GAME TITLE: viewtiful-joe-2
SUCCESSFUL
This is the 116th game in the list.

GAME TITLE: wheres-waldo?-the-fantastic-journey
THIS IS A PROBLEM CHILD

GAME TITLE: atv-quad-kings
SUCCESS but no side details

GAME TITLE: mary-kate-and-ashley-sweet-16-licenced-to-drive
THIS IS A PROBLEM CHILD

GAME TITLE: jewel-quest-expeditions
THIS IS A PROBLEM CHILD

GAME TITLE: nba-jam-extreme
THIS IS A PROBLEM CHILD

GAME TITLE: dragon-ball-origins-(jp-incomplete-us-sales)
THIS IS A PROBLEM CHILD

GAME TITLE: touc

SUCCESSFUL
This is the 175th game in the list.

GAME TITLE: momotarou-dentetsu-16
THIS IS A PROBLEM CHILD

GAME TITLE: robotech-the-macross-saga
SUCCESSFUL
This is the 176th game in the list.

GAME TITLE: yu-yu-hakusho-dark-tournament
SUCCESSFUL
This is the 177th game in the list.

GAME TITLE: mtv-celebrity-deathmatch
THIS IS A PROBLEM CHILD

GAME TITLE: gekijouban-macross-f-itsuwarino-utahime-hybrid-pack
THIS IS A PROBLEM CHILD

GAME TITLE: conflict-zone
SUCCESSFUL
This is the 178th game in the list.

GAME TITLE: tony-hawks-american-sk8land
SUCCESSFUL
This is the 179th game in the list.

GAME TITLE: de-blob-2
SUCCESS but no side details

GAME TITLE: the-bigs-2
SUCCESSFUL
This is the 180th game in the list.

GAME TITLE: bloodrayne
SUCCESSFUL
This is the 181th game in the list.

GAME TITLE: hannah-montana-the-movie
SUCCESSFUL
This is the 182th game in the list.

GAME TITLE: 50-cent-blood-on-the-sand
SUCCESSFUL
This is the 183th game in the list.

GAME TITLE: bratz-girlz-really-rock
SUCC

THIS IS A PROBLEM CHILD

GAME TITLE: nhl-2k3
SUCCESSFUL
This is the 232th game in the list.

GAME TITLE: heisei-kyouiku-linkai-ds
THIS IS A PROBLEM CHILD

GAME TITLE: spongebob-squarepants-game-boy-advance-video-volume-3
THIS IS A PROBLEM CHILD

GAME TITLE: pro-yaky?-spirits-2010
THIS IS A PROBLEM CHILD

GAME TITLE: the-naked-brothers-band-the-video-game
THIS IS A PROBLEM CHILD

GAME TITLE: final-fight-one
SUCCESSFUL
This is the 233th game in the list.

GAME TITLE: rune-factory-tides-of-destiny
SUCCESSFUL
This is the 234th game in the list.

GAME TITLE: the-amazing-spider-man-(console-version)
THIS IS A PROBLEM CHILD

GAME TITLE: ncaa-football-10
SUCCESSFUL
This is the 235th game in the list.

GAME TITLE: gallop-racer-2003-a-new-breed
SUCCESSFUL
This is the 236th game in the list.

GAME TITLE: dawn-of-discovery
SUCCESSFUL
This is the 237th game in the list.

GAME TITLE: beyblade-vforce-super-tournament-battle
SUCCESSFUL
This is the 238th game in the list.

GAME TITLE: outlaw-golf-2
SUC

SUCCESSFUL
This is the 283th game in the list.

GAME TITLE: mdk
THIS IS A PROBLEM CHILD

GAME TITLE: monster-jam
SUCCESSFUL
This is the 284th game in the list.

GAME TITLE: afro-samurai
SUCCESSFUL
This is the 285th game in the list.

GAME TITLE: rock-band-track-pack-classic-rock
THIS IS A PROBLEM CHILD

GAME TITLE: zone-of-the-enders-hd-collection
SUCCESSFUL
This is the 286th game in the list.

GAME TITLE: ty-the-tasmanian-tiger-2-bush-rescue
SUCCESSFUL
This is the 287th game in the list.

GAME TITLE: speed-punks
THIS IS A PROBLEM CHILD

GAME TITLE: the-legend-of-heroes-vii-the-trail-of-blue
THIS IS A PROBLEM CHILD

GAME TITLE: no-more-heroes-heroes-paradise
SUCCESSFUL
This is the 288th game in the list.

GAME TITLE: mahjongg-mysteries-ancient-egypt
THIS IS A PROBLEM CHILD

GAME TITLE: injustice-gods-among-us
SUCCESSFUL
This is the 289th game in the list.

GAME TITLE: mvp-baseball-2003
SUCCESSFUL
This is the 290th game in the list.

GAME TITLE: csi-unsolved!
THIS IS A PROBLEM CHILD

GA

KeyboardInterrupt: 

In [None]:
game_info_list

In [None]:
#scraping_df = pd.DataFrame(game_info_list)
#scraping_df.to_csv("../Resources/scraped_ratings2")

In [None]:
# get developer, genre, number_players, rating, release_date

In [None]:
len(problem_games)

In [None]:
#problem_ratings = pd.DataFrame(problem_games)
#problem_ratings.to_csv("../Resources/problem_ratings2")

In [None]:
#'& III ! Pokémon

In [None]:
len(abberant_side_detail_games)