In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as soup
import lxml
from bs4 import NavigableString
import re

# Must gather rating data for all the games without data in the ratings_df in the exploratory analysis notebook.
# Must gather positive, neutral, and negative ratings numbers for both users and critics as well as the
# metascore and userscore

no_rating_df = pd.read_csv("../Resources/games_to_get_ratings.csv")

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [11]:
browser = Browser("chrome", executable_path="/usr/local/bin/chromedriver", headless=True)

In [4]:
games_df = pd.read_csv("../Resources/vgsales.csv.zip")
list_of_games = games_df["Name"].values.tolist()

In [5]:
## Navigating to the correct page via the search bar using splinter is too slow. I will rewrite the code to manually 
# write in the url using string interpolation. 
# I will need to interpolate the correct console name as well into the url. Metacritics url structure is:
# "https://www.metacritic.com/game/{console}/{game}"

In [6]:
# must change game names into the proper format to be interpolated into the URL.

game_conversions= {
    "PSV":"playstation-vita",
    "DC":"dreamcast",
    "WiiU":"wii-u",
    "GC":"gamecube",
    "N64":"nintendo-64",
    "XB":"xbox",
    "PSP":"psp",
    "PS4":"playstation-4",
    "PS": "playstation",
    "Wii":"wii",
    "PS3": "playstation-3",
    "PS2":"playstation-2",
    "GBA":"game-boy-advance",
    "DS":"ds",
    "XB":"xbox",
    "X360":"xbox-360"
}

def convert_to_name(platform_abb):
    if platform_abb in game_conversions:
        full_platform= game_conversions[platform_abb]
    else:
        full_platform="other"
    return full_platform
    

In [7]:
# This actually shows that the vast majority of our data belongs to the consoles listed on metacritic, these "legacy consoles"
# Get rid of those games which fall into the "other" category under full_platform_name
games_df["full_platform_name"]= games_df["Platform"].map(convert_to_name)
games_df = games_df.loc[games_df["full_platform_name"]!="other"]


In [8]:
games_df["Name"].tolist()

['Wii Sports',
 'Mario Kart Wii',
 'Wii Sports Resort',
 'New Super Mario Bros.',
 'Wii Play',
 'New Super Mario Bros. Wii',
 'Nintendogs',
 'Mario Kart DS',
 'Wii Fit',
 'Wii Fit Plus',
 'Kinect Adventures!',
 'Grand Theft Auto V',
 'Grand Theft Auto: San Andreas',
 'Brain Age: Train Your Brain in Minutes a Day',
 'Pokemon Diamond/Pokemon Pearl',
 'Grand Theft Auto V',
 'Grand Theft Auto: Vice City',
 'Pokemon Ruby/Pokemon Sapphire',
 'Pokemon Black/Pokemon White',
 'Brain Age 2: More Training in Minutes a Day',
 'Gran Turismo 3: A-Spec',
 'Call of Duty: Modern Warfare 3',
 'Call of Duty: Black Ops',
 'Call of Duty: Black Ops 3',
 'Call of Duty: Black Ops II',
 'Call of Duty: Black Ops II',
 'Call of Duty: Modern Warfare 2',
 'Call of Duty: Modern Warfare 3',
 'Grand Theft Auto III',
 'Super Smash Bros. Brawl',
 'Call of Duty: Black Ops',
 'Animal Crossing: Wild World',
 'Halo 3',
 'Grand Theft Auto V',
 'Pokemon HeartGold/Pokemon SoulSilver',
 'Super Mario 64',
 'Gran Turismo 4',
 'S

In [9]:
# This worked pretty well, but I will alter the code further see if I can increase my scraping success to fail ratio

In [10]:

problem_games = []
abberant_side_detail_games = []
game_info_list = []
counter = 1

# zip the two 
zipped_list =list(zip(games_df["full_platform_name"].tolist(),games_df["Name"].tolist()))

for platform, name in zipped_list[12000:15000]:
    game_scores = {}
    x = re.compile("\.|:|;|\s|&")
    edited_game_title= re.sub(x,"-", name).lower().replace("---","-").replace("--","-").replace("'", "")
    # periods at the end of the url will show up as a dash. This conditional is to get rid of it.
    if edited_game_title[-1] == "-":
        edited_game_title = edited_game_title[:-1]
        print(f"\nGAME TITLE: {edited_game_title}")
    else:
        print(f"\nGAME TITLE: {edited_game_title}")
    browser.visit(f"https://www.metacritic.com/game/{platform}/{edited_game_title}")
    # fill in my search term to the Metacritic Browser and us "\n" to press "enter" and initialize search
    
    game_scores["Name"]=name
    
    html= browser.html
    my_soup = soup(html, "lxml")
    # Grabbing the Metascore
    try:
        metascore = my_soup.find("a", class_="metascore_anchor").get_text()
        game_scores["metascore"]= metascore
    except:
        problem_games.append(name)
        print('THIS IS A PROBLEM CHILD')
        continue
    # Grabbing the Userscore
    try:
        userscore_first = my_soup.find("div", class_="details side_details")
        user_score = userscore_first.select("div.metascore_w")[0].get_text()
        game_scores["user_score"]= user_score
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing the release data
    try:
        product_data = my_soup.find("div",class_="product_data")
        release_data = product_data.find("li", class_="summary_detail release_data")
        release_date = release_data.find("span",class_="data").get_text()
        game_scores["release_date"]=release_date
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing the positive, mixed, and negative critic review numbers
    try:
        critic_reviews = []
        positive_scores = my_soup.find("div",class_="distribution_wrap")
        x = positive_scores.select("ol.score_counts.hover_none span.count")
        for i in x:
            critic_reviews.append(i.get_text())
        game_scores["positive_critics"] = critic_reviews[0]
        game_scores["neutral_critics"]=critic_reviews[1]
        game_scores["negative_critics"]=critic_reviews[2]
    except:
        problem_games.append(name)
        print('THIS IS A PROBLEM CHILD')
        continue
    # Grabbing the positive, mixed, and negative user review numbers
    try:
        user_reviews = []
        user_scores = my_soup.find("div",class_="user_reviews_module")
        drilling_down = user_scores.find("div",class_="distribution_wrap")
        ordered_list_html = drilling_down.select("ol.score_counts.hover_none span.count")
        for i in ordered_list_html:
            user_reviews.append(i.get_text())
        game_scores["positive_users"] = user_reviews[0]
        game_scores["neutral_users"]=user_reviews[1]
        game_scores["negative_users"]=user_reviews[2]
    except:
        problem_games.append(name)
        print("THIS IS A PROBLEM CHILD")
        continue
    # Grabbing side details
    try:
        side_details = my_soup.find_all("div", class_="details side_details")[1]
        unordered_list = side_details.select("ul.summary_details li")
        developer = unordered_list[0].find("span", class_="data").get_text()
        game_scores["developer"]=developer
        number_players = unordered_list[2].find("span",class_="data").get_text()
        game_scores["number_players"]=number_players
        rating = unordered_list[4].find("span", class_="data").get_text()
        game_scores["rating"]=rating
        game_info_list.append(game_scores)
        print('SUCCESSFUL')
        print(f"This is the {counter}th game in the list.")
        counter+=1
    except:
        abberant_side_detail_games.append(game_scores)
        print("SUCCESS but no side details")
        continue
        
            
        

        
        


GAME TITLE: kevin-van-dam-big-bass-challenge
THIS IS A PROBLEM CHILD

GAME TITLE: stolen
SUCCESSFUL
This is the 1th game in the list.

GAME TITLE: monster-high-new-ghoul-in-school
SUCCESS but no side details

GAME TITLE: glacier2
THIS IS A PROBLEM CHILD

GAME TITLE: rugby-challenge-3
SUCCESSFUL
This is the 2th game in the list.

GAME TITLE: memories-off-6-t-wave
THIS IS A PROBLEM CHILD

GAME TITLE: superstars-v8-racing
SUCCESS but no side details

GAME TITLE: hatsune-miku-project-diva-x
SUCCESSFUL
This is the 3th game in the list.

GAME TITLE: bullet-girls-2
THIS IS A PROBLEM CHILD

GAME TITLE: pro-race-driver
SUCCESSFUL
This is the 4th game in the list.

GAME TITLE: sleepover-party
THIS IS A PROBLEM CHILD

GAME TITLE: 100-classic-games
THIS IS A PROBLEM CHILD

GAME TITLE: ide-yosuke-no-mahjong-kazoku
THIS IS A PROBLEM CHILD

GAME TITLE: wrc-fia-world-rally-championship
SUCCESSFUL
This is the 5th game in the list.

GAME TITLE: quiz-mobile-gundam-toi-senshi-dx
THIS IS A PROBLEM CHILD



THIS IS A PROBLEM CHILD

GAME TITLE: flow-urban-dance-uprising
SUCCESS but no side details

GAME TITLE: ailu-de-puzzle
THIS IS A PROBLEM CHILD

GAME TITLE: true-pinball
THIS IS A PROBLEM CHILD

GAME TITLE: janes-hotel
THIS IS A PROBLEM CHILD

GAME TITLE: stadium-games
THIS IS A PROBLEM CHILD

GAME TITLE: rlh-run-like-hell
SUCCESSFUL
This is the 31th game in the list.

GAME TITLE: power-play-pool
SUCCESSFUL
This is the 32th game in the list.

GAME TITLE: obscure
SUCCESSFUL
This is the 33th game in the list.

GAME TITLE: super-robot-taisen-scramble-commander-the-2nd
THIS IS A PROBLEM CHILD

GAME TITLE: mahjong-taikai
THIS IS A PROBLEM CHILD

GAME TITLE: espgaluda-ii-black-label
THIS IS A PROBLEM CHILD

GAME TITLE: bratz-kidz
THIS IS A PROBLEM CHILD

GAME TITLE: hot-potato!
SUCCESS but no side details

GAME TITLE: kiniro-no-corda-2-f
THIS IS A PROBLEM CHILD

GAME TITLE: swords
THIS IS A PROBLEM CHILD

GAME TITLE: winter-sports-3-the-great-tournament
THIS IS A PROBLEM CHILD

GAME TITLE: de

THIS IS A PROBLEM CHILD

GAME TITLE: special-forces-nemesis-strike
SUCCESSFUL
This is the 58th game in the list.

GAME TITLE: womens-volleyball-championship
SUCCESSFUL
This is the 59th game in the list.

GAME TITLE: junior-island-adventure
THIS IS A PROBLEM CHILD

GAME TITLE: jikkyou-powerful-pro-yakyuu-wii-ketteiban
THIS IS A PROBLEM CHILD

GAME TITLE: mx-vs-atv-supercross
SUCCESSFUL
This is the 60th game in the list.

GAME TITLE: rakushou!-pachi-slot-sengen-5-rio-paradise
THIS IS A PROBLEM CHILD

GAME TITLE: battle-assault-3-featuring-gundam-seed
SUCCESSFUL
This is the 61th game in the list.

GAME TITLE: crimson-tears
SUCCESSFUL
This is the 62th game in the list.

GAME TITLE: hayate-no-gotoku!-ojousama-produce-daisakusen-boku-iro-ni-somare!-oyashiki-hen
THIS IS A PROBLEM CHILD

GAME TITLE: batman-forever-the-arcade-game
THIS IS A PROBLEM CHILD

GAME TITLE: electroplankton
SUCCESSFUL
This is the 63th game in the list.

GAME TITLE: cabelas-outdoor-adventures
SUCCESS but no side details

THIS IS A PROBLEM CHILD

GAME TITLE: exstetra
THIS IS A PROBLEM CHILD

GAME TITLE: konami-classics-series-arcade-hits
SUCCESSFUL
This is the 85th game in the list.

GAME TITLE: dodge-racing-charger-vs-challenger
THIS IS A PROBLEM CHILD

GAME TITLE: gokujou!!-mecha-mote-iinchou-mm-my-best-friend!
THIS IS A PROBLEM CHILD

GAME TITLE: atelier-sophie-the-alchemist-of-the-mysterious-book
THIS IS A PROBLEM CHILD

GAME TITLE: singstar-chartbreaker
THIS IS A PROBLEM CHILD

GAME TITLE: hiiro-no-kakera-2-hisui-no-shizuku
THIS IS A PROBLEM CHILD

GAME TITLE: pro-yakyuu-spirits-5-kanzenban
THIS IS A PROBLEM CHILD

GAME TITLE: sorcery-saga-the-curse-of-the-great-curry-god
SUCCESSFUL
This is the 86th game in the list.

GAME TITLE: singstar-fussballhits
THIS IS A PROBLEM CHILD

GAME TITLE: a-good-librarian-like-a-good-shepherd-library-party
THIS IS A PROBLEM CHILD

GAME TITLE: dance-factory
SUCCESSFUL
This is the 87th game in the list.

GAME TITLE: lego-legends-of-chima-lavals-journey
SUCCESS but no 

THIS IS A PROBLEM CHILD

GAME TITLE: lux-pain-(jp-sales)
THIS IS A PROBLEM CHILD

GAME TITLE: smashing-drive
SUCCESSFUL
This is the 113th game in the list.

GAME TITLE: senko-no-ronde-duo
THIS IS A PROBLEM CHILD

GAME TITLE: starshot-space-circus-fever
THIS IS A PROBLEM CHILD

GAME TITLE: charlie-blasts-territory
THIS IS A PROBLEM CHILD

GAME TITLE: big-mountain-2000
SUCCESS but no side details

GAME TITLE: tokimeki-memorial-girls-side-1st-love-plus
THIS IS A PROBLEM CHILD

GAME TITLE: cima-the-enemy
SUCCESSFUL
This is the 114th game in the list.

GAME TITLE: n+
SUCCESSFUL
This is the 115th game in the list.

GAME TITLE: katekyoo-hitman-reborn!-kizuna-no-tag-battle
THIS IS A PROBLEM CHILD

GAME TITLE: code-geass-hangyaku-no-lelouch
THIS IS A PROBLEM CHILD

GAME TITLE: hudson-x-greeeen-live!?-deeees!?
THIS IS A PROBLEM CHILD

GAME TITLE: marble-madness-/-klax
THIS IS A PROBLEM CHILD

GAME TITLE: downstream-panic!
SUCCESSFUL
This is the 116th game in the list.

GAME TITLE: dj-max-portabl

THIS IS A PROBLEM CHILD

GAME TITLE: hajime-no-ippo-the-fighting
THIS IS A PROBLEM CHILD

GAME TITLE: merv-griffins-crosswords
THIS IS A PROBLEM CHILD

GAME TITLE: spectral-souls-resurrection-of-the-ethereal-empires
SUCCESSFUL
This is the 146th game in the list.

GAME TITLE: sokukoku-no-kusabi-hiiro-no-kakera-3-portable
THIS IS A PROBLEM CHILD

GAME TITLE: syberia-ii
SUCCESSFUL
This is the 147th game in the list.

GAME TITLE: popcap-hits!-vol-2
THIS IS A PROBLEM CHILD

GAME TITLE: ao-no-kanata-no-four-rhythm
THIS IS A PROBLEM CHILD

GAME TITLE: motor-trend-presents-lotus-challenge
SUCCESSFUL
This is the 148th game in the list.

GAME TITLE: trine-2
THIS IS A PROBLEM CHILD

GAME TITLE: bleach-ds-4th-flame-bringer
THIS IS A PROBLEM CHILD

GAME TITLE: pacific-liberator
SUCCESS but no side details

GAME TITLE: parfait
THIS IS A PROBLEM CHILD

GAME TITLE: tago-akira-no-atama-no-taisou-dai-2-shuu-ginga-oudan-nazotoki-adventure
THIS IS A PROBLEM CHILD

GAME TITLE: the-king-of-fighters-portable

KeyboardInterrupt: 

In [12]:
game_side_details = []
problem_games = []
abberant_side_detail_games = []
game_info_list = []
counter = 1

# zip the two 
zipped_list =list(zip(games_df["full_platform_name"].tolist(),games_df["Name"].tolist()))


for platform, name in zipped_list:
    my_details = {}
    x = re.compile("\.|:|;|\s|&")
    edited_game_title= re.sub(x,"-", name).lower().replace("---","-").replace("--","-").replace("'", "")
    # periods at the end of the url will show up as a dash. This conditional is to get rid of it.
    if edited_game_title[-1] == "-":
        edited_game_title = edited_game_title[:-1]
        print(f"\nGAME TITLE: {edited_game_title}")
    else:
        print(f"\nGAME TITLE: {edited_game_title}")
    browser.visit(f"https://www.metacritic.com/game/{platform}/{edited_game_title}")
    my_details["Name"]=name
    
    html= browser.html
    my_soup = soup(html, "lxml")
    # Grabbing side details
    try:
        side_details = my_soup.find_all("div", class_="details side_details")[1]
        unordered_list = side_details.select("ul.summary_details li")
        try:
            developer = unordered_list[0].find("span", class_="data").get_text()
            my_details["developer"]=developer
        except:
            pass
        try:
            number_players = unordered_list[2].find("span",class_="data").get_text()
            my_details["number_players"]=number_players
        except:
            pass
        try:
            rating = unordered_list[4].find("span", class_="data").get_text()
            my_details["rating"]=rating
        except:
            pass
        game_side_details.append(my_details)
        print('SUCCESSFUL')
        print(f"This is the {counter}th game in the list.")
        counter+=1
    except:
        abberant_side_detail_games.append(name)
        print("SUCCESS but no side details")
        continue
        
            
        

        


GAME TITLE: wii-sports
SUCCESSFUL
This is the 1th game in the list.

GAME TITLE: mario-kart-wii
SUCCESSFUL
This is the 2th game in the list.

GAME TITLE: wii-sports-resort
SUCCESSFUL
This is the 3th game in the list.

GAME TITLE: new-super-mario-bros
SUCCESSFUL
This is the 4th game in the list.

GAME TITLE: wii-play
SUCCESSFUL
This is the 5th game in the list.

GAME TITLE: new-super-mario-bros-wii
SUCCESSFUL
This is the 6th game in the list.

GAME TITLE: nintendogs
SUCCESS but no side details

GAME TITLE: mario-kart-ds
SUCCESSFUL
This is the 7th game in the list.

GAME TITLE: wii-fit
SUCCESSFUL
This is the 8th game in the list.

GAME TITLE: wii-fit-plus
SUCCESSFUL
This is the 9th game in the list.

GAME TITLE: kinect-adventures!
SUCCESSFUL
This is the 10th game in the list.

GAME TITLE: grand-theft-auto-v
SUCCESSFUL
This is the 11th game in the list.

GAME TITLE: grand-theft-auto-san-andreas
SUCCESSFUL
This is the 12th game in the list.

GAME TITLE: brain-age-train-your-brain-in-minut

KeyboardInterrupt: 

In [None]:
game_info_list

In [None]:
#scraping_df = pd.DataFrame(game_info_list)
#scraping_df.to_csv("../Resources/scraped_ratings2")

In [None]:
# get developer, genre, number_players, rating, release_date

In [None]:
len(problem_games)

In [None]:
#problem_ratings = pd.DataFrame(problem_games)
#problem_ratings.to_csv("../Resources/problem_ratings2")

In [None]:
#'& III ! Pokémon

In [None]:
len(abberant_side_detail_games)