In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as soup
import lxml
from bs4 import NavigableString
import re

# Must gather rating data for all the games without data in the ratings_df in the exploratory analysis notebook.
# Must gather positive, neutral, and negative ratings numbers for both users and critics as well as the
# metascore and userscore

no_rating_df = pd.read_csv("../Resources/games_to_get_ratings.csv")

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [12]:
browser = Browser("chrome", executable_path="/usr/local/bin/chromedriver", headless=True)

In [4]:
games_df = pd.read_csv("../Resources/vgsales.csv.zip")
list_of_games = games_df["Name"].values.tolist()

In [6]:
## Navigating to the correct page via the search bar using splinter is too slow. I will rewrite the code to manually 
# write in the url using string interpolation. 
# I will need to interpolate the correct console name as well into the url. Metacritics url structure is:
# "https://www.metacritic.com/game/{console}/{game}"

In [7]:
# must change game names into the proper format to be interpolated into the URL.

game_conversions= {
    "PSV":"playstation-vita",
    "DC":"dreamcast",
    "WiiU":"wii-u",
    "GC":"gamecube",
    "N64":"nintendo-64",
    "XB":"xbox",
    "PSP":"psp",
    "PS4":"playstation-4",
    "PS": "playstation",
    "Wii":"wii",
    "PS3": "playstation-3",
    "PS2":"playstation-2",
    "GBA":"game-boy-advance",
    "DS":"ds",
    "XB":"xbox",
    "X360":"xbox-360"
}

def convert_to_name(platform_abb):
    if platform_abb in game_conversions:
        full_platform= game_conversions[platform_abb]
    else:
        full_platform="other"
    return full_platform
    

In [8]:
# This actually shows that the vast majority of our data belongs to the consoles listed on metacritic, these "legacy consoles"
# Get rid of those games which fall into the "other" category under full_platform_name
games_df["full_platform_name"]= games_df["Platform"].map(convert_to_name)
games_df = games_df.loc[games_df["full_platform_name"]!="other"]


In [9]:
games_df["Name"].tolist()

['Wii Sports',
 'Mario Kart Wii',
 'Wii Sports Resort',
 'New Super Mario Bros.',
 'Wii Play',
 'New Super Mario Bros. Wii',
 'Nintendogs',
 'Mario Kart DS',
 'Wii Fit',
 'Wii Fit Plus',
 'Kinect Adventures!',
 'Grand Theft Auto V',
 'Grand Theft Auto: San Andreas',
 'Brain Age: Train Your Brain in Minutes a Day',
 'Pokemon Diamond/Pokemon Pearl',
 'Grand Theft Auto V',
 'Grand Theft Auto: Vice City',
 'Pokemon Ruby/Pokemon Sapphire',
 'Pokemon Black/Pokemon White',
 'Brain Age 2: More Training in Minutes a Day',
 'Gran Turismo 3: A-Spec',
 'Call of Duty: Modern Warfare 3',
 'Call of Duty: Black Ops',
 'Call of Duty: Black Ops 3',
 'Call of Duty: Black Ops II',
 'Call of Duty: Black Ops II',
 'Call of Duty: Modern Warfare 2',
 'Call of Duty: Modern Warfare 3',
 'Grand Theft Auto III',
 'Super Smash Bros. Brawl',
 'Call of Duty: Black Ops',
 'Animal Crossing: Wild World',
 'Halo 3',
 'Grand Theft Auto V',
 'Pokemon HeartGold/Pokemon SoulSilver',
 'Super Mario 64',
 'Gran Turismo 4',
 'S

In [10]:
# This worked pretty well, but I will alter the code further see if I can increase my scraping success to fail ratio

In [13]:
side_details = []
problem_games = []
abberant_side_detail_games = []
game_info_list = []
counter = 1

# zip the two 
zipped_list =list(zip(games_df["full_platform_name"].tolist(),games_df["Name"].tolist()))

for platform, name in zipped_list[3000:6000]:
    game_scores = {}
    x = re.compile("\.|:|;|\s|&")
    edited_game_title= re.sub(x,"-", name).lower().replace("---","-").replace("--","-").replace("'", "")
    # periods at the end of the url will show up as a dash. This conditional is to get rid of it.
    if edited_game_title[-1] == "-":
        edited_game_title = edited_game_title[:-1]
        print(f"\nGAME TITLE: {edited_game_title}")
    else:
        print(f"\nGAME TITLE: {edited_game_title}")
    browser.visit(f"https://www.metacritic.com/game/{platform}/{edited_game_title}")
    # fill in my search term to the Metacritic Browser and us "\n" to press "enter" and initialize search
    
    game_scores["Name"]=name
    
    html= browser.html
    my_soup = soup(html, "lxml")
    # Grabbing the Metascore
    try:
        metascore = my_soup.find("a", class_="metascore_anchor").get_text()
        game_scores["metascore"]= metascore
    except:
        problem_games.append(name)
        print('THIS IS A PROBLEM CHILD')
        continue
    # Grabbing the Userscore
    try:
        userscore_first = my_soup.find("div", class_="details side_details")
        user_score = userscore_first.select("div.metascore_w")[0].get_text()
        game_scores["user_score"]= user_score
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing the release data
    try:
        product_data = my_soup.find("div",class_="product_data")
        release_data = product_data.find("li", class_="summary_detail release_data")
        release_date = release_data.find("span",class_="data").get_text()
        game_scores["release_date"]=release_date
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing the positive, mixed, and negative critic review numbers
    try:
        critic_reviews = []
        positive_scores = my_soup.find("div",class_="distribution_wrap")
        x = positive_scores.select("ol.score_counts.hover_none span.count")
        for i in x:
            critic_reviews.append(i.get_text())
        game_scores["positive_critics"] = critic_reviews[0]
        game_scores["neutral_critics"]=critic_reviews[1]
        game_scores["negative_critics"]=critic_reviews[2]
    except:
        problem_games.append(name)
        print('THIS IS A PROBLEM CHILD')
        continue
    # Grabbing the positive, mixed, and negative user review numbers
    try:
        user_reviews = []
        user_scores = my_soup.find("div",class_="user_reviews_module")
        drilling_down = user_scores.find("div",class_="distribution_wrap")
        ordered_list_html = drilling_down.select("ol.score_counts.hover_none span.count")
        for i in ordered_list_html:
            user_reviews.append(i.get_text())
        game_scores["positive_users"] = user_reviews[0]
        game_scores["neutral_users"]=user_reviews[1]
        game_scores["negative_users"]=user_reviews[2]
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing side details
    try:
        side_details = my_soup.find_all("div", class_="details side_details")[1]
        unordered_list = side_details.select("ul.summary_details li")
        developer = unordered_list[0].find("span", class_="data").get_text()
        game_scores["developer"]=developer
        number_players = unordered_list[2].find("span",class_="data").get_text()
        game_scores["number_players"]=number_players
        rating = unordered_list[4].find("span", class_="data").get_text()
        game_scores["rating"]=rating
        game_info_list.append(game_scores)
        print('SUCCESSFUL')
        print(f"This is the {counter}th game in the list.")
        counter+=1
    except:
        abberant_side_detail_games.append(game_scores)
        print("SUCCESS but no side details")
        continue
        
            
        

        
        


GAME TITLE: bob-the-builder-can-we-fix-it?
THIS IS A PROBLEM CHILD

GAME TITLE: xiii
SUCCESSFUL
This is the 1th game in the list.

GAME TITLE: nhl-14
SUCCESSFUL
This is the 2th game in the list.

GAME TITLE: lego-rock-band
SUCCESS but no side details

GAME TITLE: bolt
SUCCESSFUL
This is the 3th game in the list.

GAME TITLE: top-spin-3
SUCCESSFUL
This is the 4th game in the list.

GAME TITLE: tom-clancys-hawx
SUCCESS but no side details

GAME TITLE: lego-dimensions
SUCCESS but no side details

GAME TITLE: breath-of-fire-iv
SUCCESSFUL
This is the 5th game in the list.

GAME TITLE: true-crime-streets-of-la
SUCCESSFUL
This is the 6th game in the list.

GAME TITLE: odin-sphere
SUCCESSFUL
This is the 7th game in the list.

GAME TITLE: donkey-kong-jungle-climber
THIS IS A PROBLEM CHILD

GAME TITLE: the-incredible-hulk
THIS IS A PROBLEM CHILD

GAME TITLE: ms-pac-man-maze-madness
SUCCESSFUL
This is the 8th game in the list.

GAME TITLE: arena-football
SUCCESSFUL
This is the 9th game in the li

SUCCESSFUL
This is the 72th game in the list.

GAME TITLE: tak-and-the-power-of-juju
SUCCESSFUL
This is the 73th game in the list.

GAME TITLE: dynasty-warriors-gundam-3
SUCCESSFUL
This is the 74th game in the list.

GAME TITLE: mortal-kombat-3
THIS IS A PROBLEM CHILD

GAME TITLE: turok-evolution
SUCCESSFUL
This is the 75th game in the list.

GAME TITLE: jak-x-combat-racing
SUCCESSFUL
This is the 76th game in the list.

GAME TITLE: metro-2033
SUCCESSFUL
This is the 77th game in the list.

GAME TITLE: are-you-smarter-than-a-5th-grader?-make-the-grade
THIS IS A PROBLEM CHILD

GAME TITLE: american-idol
SUCCESSFUL
This is the 78th game in the list.

GAME TITLE: top-gun-fire-at-will!
THIS IS A PROBLEM CHILD

GAME TITLE: red-dead-revolver
SUCCESSFUL
This is the 79th game in the list.

GAME TITLE: dragon-ball-z-shin-budokai
SUCCESSFUL
This is the 80th game in the list.

GAME TITLE: lego-the-hobbit
SUCCESSFUL
This is the 81th game in the list.

GAME TITLE: iron-man
SUCCESSFUL
This is the 82th 

SUCCESS but no side details

GAME TITLE: we-cheer
SUCCESSFUL
This is the 139th game in the list.

GAME TITLE: tom-clancys-ghost-recon-advanced-warfighter-2
SUCCESSFUL
This is the 140th game in the list.

GAME TITLE: x-men-legends
SUCCESSFUL
This is the 141th game in the list.

GAME TITLE: alice-in-wonderland
SUCCESSFUL
This is the 142th game in the list.

GAME TITLE: littlest-pet-shop-3-biggest-stars-blue-/-pink-/-purple-team
THIS IS A PROBLEM CHILD

GAME TITLE: front-mission-2
THIS IS A PROBLEM CHILD

GAME TITLE: bulletstorm
SUCCESSFUL
This is the 143th game in the list.

GAME TITLE: f-e-a-r-2-project-origin
THIS IS A PROBLEM CHILD

GAME TITLE: tiger-woods-pga-tour-11
SUCCESSFUL
This is the 144th game in the list.

GAME TITLE: stuntman-ignition
SUCCESSFUL
This is the 145th game in the list.

GAME TITLE: are-you-smarter-than-a-5th-grader?
SUCCESS but no side details

GAME TITLE: f1-2009
SUCCESSFUL
This is the 146th game in the list.

GAME TITLE: oni
SUCCESSFUL
This is the 147th game in

SUCCESSFUL
This is the 204th game in the list.

GAME TITLE: wwe-smackdown-vs-raw-2007
SUCCESSFUL
This is the 205th game in the list.

GAME TITLE: ace-combat-zero-the-belkan-war
SUCCESSFUL
This is the 206th game in the list.

GAME TITLE: r-u-s-e
THIS IS A PROBLEM CHILD

GAME TITLE: disgaea-3-absence-of-justice
SUCCESSFUL
This is the 207th game in the list.

GAME TITLE: test-drive-unlimited
SUCCESSFUL
This is the 208th game in the list.

GAME TITLE: lego-dimensions
SUCCESSFUL
This is the 209th game in the list.

GAME TITLE: picross-ds
SUCCESSFUL
This is the 210th game in the list.

GAME TITLE: final-fantasy-ii-anniversary-edition
SUCCESSFUL
This is the 211th game in the list.

GAME TITLE: nhl-07
SUCCESSFUL
This is the 212th game in the list.

GAME TITLE: harry-potter-and-the-half-blood-prince
SUCCESS but no side details

GAME TITLE: nba-2k11
SUCCESS but no side details

GAME TITLE: world-championship-poker
SUCCESSFUL
This is the 213th game in the list.

GAME TITLE: the-bigs
SUCCESSFUL
Th

SUCCESSFUL
This is the 270th game in the list.

GAME TITLE: bakugan-battle-brawlers-defenders-of-the-core
SUCCESSFUL
This is the 271th game in the list.

GAME TITLE: doom-3-bfg-edition
SUCCESSFUL
This is the 272th game in the list.

GAME TITLE: terraria
SUCCESSFUL
This is the 273th game in the list.

GAME TITLE: backyard-baseball
SUCCESSFUL
This is the 274th game in the list.

GAME TITLE: wipeout-pulse
THIS IS A PROBLEM CHILD

GAME TITLE: barbie-horse-adventures-wild-horse-rescue
SUCCESSFUL
This is the 275th game in the list.

GAME TITLE: lego-marvel-super-heroes
SUCCESS but no side details

GAME TITLE: taiko-no-tatsujin-appare-sandaime
THIS IS A PROBLEM CHILD

GAME TITLE: alone-in-the-dark
SUCCESSFUL
This is the 276th game in the list.

GAME TITLE: one-piece-grand-battle!
THIS IS A PROBLEM CHILD

GAME TITLE: transformers-revenge-of-the-fallen-(xbox-360,-ps3,-pc-versions)
THIS IS A PROBLEM CHILD

GAME TITLE: football-manager-handheld-2009
THIS IS A PROBLEM CHILD

GAME TITLE: crazy-taxi

SUCCESSFUL
This is the 331th game in the list.

GAME TITLE: hot-wheels-velocity-x
SUCCESSFUL
This is the 332th game in the list.

GAME TITLE: shadowrun
SUCCESSFUL
This is the 333th game in the list.

GAME TITLE: fifa-soccer-13
SUCCESSFUL
This is the 334th game in the list.

GAME TITLE: virtua-tennis-4-world-tour
SUCCESSFUL
This is the 335th game in the list.

GAME TITLE: crazy-taxi-fare-wars
SUCCESSFUL
This is the 336th game in the list.

GAME TITLE: clash-of-elementalists
SUCCESS but no side details

GAME TITLE: spider-man-3
SUCCESSFUL
This is the 337th game in the list.

GAME TITLE: disney-infinity-3-0
THIS IS A PROBLEM CHILD

GAME TITLE: l-a-rush
THIS IS A PROBLEM CHILD

GAME TITLE: bomberman-hero
THIS IS A PROBLEM CHILD

GAME TITLE: automobili-lamborghini
THIS IS A PROBLEM CHILD

GAME TITLE: quest-64
THIS IS A PROBLEM CHILD

GAME TITLE: momotarou-dentetsu-v
THIS IS A PROBLEM CHILD

GAME TITLE: doom-64
THIS IS A PROBLEM CHILD

GAME TITLE: boom-blox-bash-party
SUCCESS but no side det

KeyboardInterrupt: 

In [None]:
game_info_list

In [None]:
#scraping_df = pd.DataFrame(game_info_list)
#scraping_df.to_csv("../Resources/scraped_ratings2")

In [None]:
# get developer, genre, number_players, rating, release_date

In [None]:
len(problem_games)

In [None]:
#problem_ratings = pd.DataFrame(problem_games)
#problem_ratings.to_csv("../Resources/problem_ratings2")

In [None]:
#'& III ! Pokémon

In [None]:
len(abberant_side_detail_games)