In [1]:
import re
from bs4 import BeautifulSoup
import requests
import pandas as pd
from howlongtobeatpy import HowLongToBeat as hltb

In [2]:
list_of_games = pd.read_csv(r"C:\Users\ricar\Documents\Python Scripts\Game Backlog\game_list.csv")

In [3]:
def extract_mc_data(series):
    url = 'https://www.metacritic.com/game/'
    name = series['name'].replace('.','').replace(',','').replace('-','').replace('\'','').replace(':','').replace(' ','-')
    session = requests.Session()
    session.headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
    full_url = url + series['platform'] + '/' + name.lower() + '/'
    response = session.get(full_url)
    parser = BeautifulSoup(response.content, 'html.parser')
    
    review_count_elements = parser.find("span", class_="count")
    if review_count_elements is not None:
        review_count_text = review_count_elements.text.strip()
        review_count = int(re.findall(r'[\d]+', review_count_text)[0])
    else:
        review_count = 0
    
    score_elements = parser.find("span", itemprop="ratingValue")
    if score_elements is not None:
        score_text = score_elements.text.strip()
        score = int(score_text)
    else:
        score = 0
    return pd.Series([series['name'], series['platform'], score, review_count], index =['name', 'platform', 'score', 'total_reviews'])

In [4]:
print(extract_mc_data(list_of_games.iloc[34]))

name             Enter the Gungeon
platform             playstation-4
score                           82
total_reviews                   33
dtype: object


In [5]:
subset_10 = list_of_games.head(10)

In [6]:
subset_10 = subset_10.apply(extract_mc_data, axis='columns')

In [7]:
print(subset_10)

                              name       platform  score  total_reviews
0                             Abzu  playstation-4     78             72
1                  Alien Isolation  playstation-3      0              0
2  Animal Crossing Amiibo Festival          wii-u     46             20
3     Animal Crossing New Horizons         switch     90            111
4                 Assassin's Creed             pc      0              0
5                        Bayonetta          wii-u     86             18
6                      Bayonetta 2          wii-u     91             80
7                    Balloon Fight            NES      0              0
8             Batman Arkham Asylum  playstation-3     91             70
9                       BloodBorne  playstation-4     92            100


In [8]:
list_of_games = list_of_games.apply(extract_mc_data, axis='columns')

In [9]:
list_of_games.head(10)

Unnamed: 0,name,platform,score,total_reviews
0,Abzu,playstation-4,78,72
1,Alien Isolation,playstation-3,0,0
2,Animal Crossing Amiibo Festival,wii-u,46,20
3,Animal Crossing New Horizons,switch,90,111
4,Assassin's Creed,pc,0,0
5,Bayonetta,wii-u,86,18
6,Bayonetta 2,wii-u,91,80
7,Balloon Fight,NES,0,0
8,Batman Arkham Asylum,playstation-3,91,70
9,BloodBorne,playstation-4,92,100


In [10]:
list_of_games.tail(10)

Unnamed: 0,name,platform,score,total_reviews
207,Uncharted 4: A Thief's End,playstation-4,93,113
208,Uncharted: A Lost Legacy,playstation-4,0,0
209,Uncharted: Drake's Fortune,playstation-3,88,66
210,Uncharted: Drake's Fortune,playstation-4,0,0
211,Watch Dogs,playstation-4,80,80
212,World Of Warcraft,pc,93,57
213,World Of Warcraft Burning Crusade,pc,0,0
214,WWF King of the Ring,GB,0,0
215,Xenoblade Chronicles X,wii-u,84,87
216,Zelda II: The Adventure of Link,NES,0,0


In [11]:
def extract_hltb_data(name):
    matching_games = hltb().search(name)
    gametime = [0, 0, 0]
    if matching_games is not None:
        for game in matching_games:
            if name == game.game_name:
                gametime[0] = game.main_story
                gametime[1] = game.main_extra
                
    gametime[2] = (gametime[0] + gametime[1]) / 2
    return pd.Series(gametime)

In [12]:
print(extract_hltb_data(list_of_games.iloc[216]['name']))

0    10.78
1    10.84
2    10.81
dtype: float64


In [13]:
list_of_games[['main_story', 'main_extra', 'avg_playthrough']] = list_of_games['name'].apply(extract_hltb_data)

In [14]:
list_of_games.tail(10)

Unnamed: 0,name,platform,score,total_reviews,main_story,main_extra,avg_playthrough
207,Uncharted 4: A Thief's End,playstation-4,93,113,15.11,17.58,16.345
208,Uncharted: A Lost Legacy,playstation-4,0,0,0.0,0.0,0.0
209,Uncharted: Drake's Fortune,playstation-3,88,66,8.11,9.66,8.885
210,Uncharted: Drake's Fortune,playstation-4,0,0,8.11,9.66,8.885
211,Watch Dogs,playstation-4,80,80,19.37,33.21,26.29
212,World Of Warcraft,pc,93,57,0.0,0.0,0.0
213,World Of Warcraft Burning Crusade,pc,0,0,0.0,0.0,0.0
214,WWF King of the Ring,GB,0,0,0.29,0.0,0.145
215,Xenoblade Chronicles X,wii-u,84,87,68.0,108.73,88.365
216,Zelda II: The Adventure of Link,NES,0,0,10.78,10.84,10.81


In [15]:
list_of_games['normalized_score'] = list_of_games['score'].apply(
    lambda x: (x - list_of_games['score'].min())/(list_of_games['score'].max() - list_of_games['score'].min()))

In [16]:
list_of_games['normalized_reviews'] = list_of_games['total_reviews'].apply(
    lambda x: (x - list_of_games['total_reviews'].min())/(list_of_games['total_reviews'].max() - list_of_games['total_reviews'].min()))

In [17]:
list_of_games['normalized_time'] = list_of_games['avg_playthrough'].apply(
    lambda x: (x - list_of_games['avg_playthrough'].min())/(list_of_games['avg_playthrough'].max() - list_of_games['avg_playthrough'].min()))

In [18]:
list_of_games['playability_score'] = list_of_games.apply(
    lambda x: ((x['normalized_score']*0.5)+(x['normalized_reviews']*0.3)+(x['normalized_time']*0.2)) * 100,
    axis='columns')

In [19]:
list_of_games.sort_values(['playability_score'], inplace=True, ascending=False)
list_of_games[['name', 'platform', 'playability_score']].head(15)

Unnamed: 0,name,platform,playability_score
172,Super Mario Odyssey,switch,81.357335
215,Xenoblade Chronicles X,wii-u,79.820361
207,Uncharted 4: A Thief's End,playstation-4,78.720208
184,The Last Of Us Part II,playstation-4,77.938144
53,God Of War,playstation-4,77.709807
203,Uncharted 2: Among Thieves,playstation-3,77.450676
187,The Legend of Zelda Breath of the Wild,switch,77.024793
102,Ni no Kuni: Wrath of the White Witch,playstation-3,74.404723
3,Animal Crossing New Horizons,switch,73.912414
183,The Last Of Us,playstation-3,73.266593


In [20]:
list_of_games.to_csv(r"C:\Users\ricar\Documents\Python Scripts\Game Backlog\backlog.csv")