### 개요

dictionary 구조

- platform : 발매 플랫폼 string
- title : 게임명 string
- publisher : 퍼블리셔명 string
- 0~99 : 내부 dict (100개 이하)
  - score : 유저 점수
  - review : 리뷰 내용 ex) 3번 리뷰 내용 : dict[3]['review']
- static
  - critic : 평론가 관련 수치 dict
    - mean : 평점
    - total : 전체 리뷰 수
    - positive : 긍정 수
    - mixed : 복합적 평가 수(so so 함)
    - negative : 부정 수
  - user : 유저 관련 수치 dict, 평론가와 동일
- detail
  - summary : 게임 소개
  - 이 외에도 게임 등급, 장르 등을 table에서 긁어와 key : value 형태로 결합
- review number : 마지막 리뷰의 index

### Import

예전 코드 재활용 + joblib 추가

[참고사이트(메타크리틱)](https://www.metacritic.com)

In [1]:
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

from bs4 import BeautifulSoup 
import time
import pandas as pd
import numpy as np
import json
import re
import csv
import requests
import pickle
import tqdm

import sqlite3

import warnings
warnings.filterwarnings("ignore") # 불필요한 Warning 메시지를 꺼줍니다.

options = Options()
options.headless = True

import joblib

### Run Webdriver

In [2]:
service = Service(executable_path=ChromeDriverManager().install()) 

driver = webdriver.Chrome(service=service, options=options)

driver.maximize_window()



Current google-chrome version is 104.0.5112
Get LATEST chromedriver version for 104.0.5112 google-chrome
Driver [C:\Users\piakp\.wdm\drivers\chromedriver\win32\104.0.5112.79\chromedriver.exe] found in cache


### Best games

In [3]:
metacritic_base_url = 'https://www.metacritic.com/browse/games/score/userscore/all/all/filtered?page='

In [4]:
driver.get(metacritic_base_url + str(0))

time.sleep(3)

In [5]:
game_list_page = BeautifulSoup(driver.page_source, 'html.parser')

best_game_anchor_list = game_list_page.select('.clamp-summary-wrap > a')

In [6]:
driver.get(metacritic_base_url + str(1))

time.sleep(3)

In [7]:
game_list_page = BeautifulSoup(driver.page_source, 'html.parser')

best_game_anchor_list.extend(game_list_page.select('.clamp-summary-wrap > a')[:50])

In [8]:
start = 0
end = len(best_game_anchor_list)
file_number = start

for anchor in tqdm.tqdm(best_game_anchor_list[start:end], total=end - start):
    
    # Open Detail & Credits page
    driver.get(f"https://www.metacritic.com{anchor.attrs['href']}/details")
    
    time.sleep(3)
    
    # Initail value input
    
    review_dict = {}
    
    review_dict['platform'] = ''
    review_dict['title'] = ''
    review_dict['publisher'] = ''
    
    review_dict['static'] = {}
    
    review_dict['static']['user'] = {
        'positive' : 0
        , 'mixed' : 0
        , 'negative' : 0
        , 'total' : 0
        , 'mean' : 0
    }
    
    review_dict['static']['critic'] = {
        'positive' : 0
        , 'mixed' : 0
        , 'negative' : 0
        , 'total' : 0
        , 'mean' : 0
    }
    
    review_dict['detail'] = {}
    
    # Beautiful Soup parse
    
    detail_page = BeautifulSoup(driver.page_source, 'html.parser')
    
    product_score_section = detail_page.select('div.product_scores.section')
    
    # Scrape Critic score, Critic review count, User score, Summary discription
    
    try:
        review_dict['static']['critic']['mean'] = int(detail_page.select('a.metascore_anchor span')[0].get_text())
        review_dict['static']['critic']['total'] = int(detail_page.select('p a span')[0].get_text())
    except:
        pass
    
    try:
        review_dict['static']['user']['mean'] = float(detail_page.select('a.metascore_anchor div')[1].get_text())
    except:
        pass
    
    try:
        review_dict['detail']['summary'] = detail_page.select_one('div.summary_detail .data').get_text()
        
        for row in detail_page.select('.product_details tr.alt'):
            review_dict['detail'][row.select_one('th').get_text()] = row.select_one('td').get_text()
    except:
        pass
    
    # Open User Review page
    
    driver.get(f"https://www.metacritic.com{anchor.attrs['href']}/user-reviews")
    
    time.sleep(3)
    
    # Beautiful Soup Parse
    
    review_page = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Scrape Platform, Title, Publisher
    
    try:
        review_dict['platform'] = review_page.select_one(".product_title .platform").get_text().strip()
    except:
        pass
    try:
        review_dict['title'] = review_page.select_one(".product_title h1").get_text()
    except:
        pass
    try:
        review_dict['publisher'] = review_page.select_one(".publisher a").get_text().strip()
    except:
        pass
    
    # Scrape Review's Positive/Mixed/Negative Distibution and User review count

    try:
        score_distribution = review_page.select(".score_counts")
        
        scores = score_distribution[0].select('span.count')
        review_dict['static']['user']['positive'] = int(scores[0].get_text().replace(',',''))
        review_dict['static']['user']['mixed'] = int(scores[1].get_text().replace(',',''))
        review_dict['static']['user']['negative'] = int(scores[2].get_text().replace(',',''))
        
        scores = score_distribution[1].select('span.count')
        review_dict['static']['critic']['positive'] = int(scores[0].get_text().replace(',',''))
        review_dict['static']['critic']['mixed'] = int(scores[1].get_text().replace(',',''))
        review_dict['static']['critic']['negative'] = int(scores[2].get_text().replace(',',''))
        
        review_dict['static']['user']['total'] = int(review_page.select_one('div.score_summary strong').get_text().split()[0])
    except:
        pass

    # Scrape User reviews and scores
    
    try:
        index = 0
        
        for index, review in enumerate(review_page.select_one('.user_reviews').select('div.review_content')):

            review = review.select_one('.review_section')
            wrapped_reviews = review.select('div.review_body span')

            review_text = ''
            review_score = int(review.select_one('div.review_grade div').get_text())

            if len(wrapped_reviews) > 2:
                review_text = wrapped_reviews[2].get_text()
                if not review_text:
                    review_text = wrapped_reviews[1].get_text()
            elif wrapped_reviews:
                review_text = wrapped_reviews[0].get_text()
            else:
                continue

            review_dict[index] = {'score' : review_score, 'review' : review_text}
            
        review_dict['review number'] = index
    except:
        pass
    
    # File dump(pickle)
    
    joblib.dump(review_dict, f'BestReview/{file_number}.pkl')
    
    file_number += 1

100%|████████████████████████████████████████████████████████████████████████████████| 150/150 [26:03<00:00, 10.42s/it]


In [9]:
# dict_check = joblib.load(f'{5}.pkl')
# dict_check

### Worst games

In [10]:
driver.get(metacritic_base_url + str(181))

time.sleep(3)

In [11]:
game_list_page = BeautifulSoup(driver.page_source, 'html.parser')

game_anchor_list = game_list_page.select('.clamp-summary-wrap > a')

game_anchor_list.reverse()

In [12]:
worst_game_anchor_list = game_anchor_list

In [13]:
start = 0
end = 50
file_number = start

for anchor in tqdm.tqdm(best_game_anchor_list[start:end], total=end - start):
    
    # Open Detail & Credits page
    driver.get(f"https://www.metacritic.com{anchor.attrs['href']}/details")
    
    time.sleep(3)
    
    # Initail value input
    
    review_dict = {}
    
    review_dict['platform'] = ''
    review_dict['title'] = ''
    review_dict['publisher'] = ''
    
    review_dict['static'] = {}
    
    review_dict['static']['user'] = {
        'positive' : 0
        , 'mixed' : 0
        , 'negative' : 0
        , 'total' : 0
        , 'mean' : 0
    }
    
    review_dict['static']['critic'] = {
        'positive' : 0
        , 'mixed' : 0
        , 'negative' : 0
        , 'total' : 0
        , 'mean' : 0
    }
    
    review_dict['detail'] = {}
    
    # Beautiful Soup parse
    
    detail_page = BeautifulSoup(driver.page_source, 'html.parser')
    
    product_score_section = detail_page.select('div.product_scores.section')
    
    # Scrape Critic score, Critic review count, User score, Summary discription
    
    try:
        review_dict['static']['critic']['mean'] = int(detail_page.select('a.metascore_anchor span')[0].get_text())
        review_dict['static']['critic']['total'] = int(detail_page.select('p a span')[0].get_text())
    except:
        pass
    
    try:
        review_dict['static']['user']['mean'] = float(detail_page.select('a.metascore_anchor div')[1].get_text())
    except:
        pass
    
    try:
        review_dict['detail']['summary'] = detail_page.select_one('div.summary_detail .data').get_text()
        
        for row in detail_page.select('.product_details tr.alt'):
            review_dict['detail'][row.select_one('th').get_text()] = row.select_one('td').get_text()
    except:
        pass
    
    # Open User Review page
    
    driver.get(f"https://www.metacritic.com{anchor.attrs['href']}/user-reviews")
    
    time.sleep(3)
    
    # Beautiful Soup Parse
    
    review_page = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Scrape Platform, Title, Publisher
    
    try:
        review_dict['platform'] = review_page.select_one(".product_title .platform").get_text().strip()
    except:
        pass
    try:
        review_dict['title'] = review_page.select_one(".product_title h1").get_text()
    except:
        pass
    try:
        review_dict['publisher'] = review_page.select_one(".publisher a").get_text().strip()
    except:
        pass
    
    # Scrape Review's Positive/Mixed/Negative Distibution and User review count

    try:
        score_distribution = review_page.select(".score_counts")
        
        scores = score_distribution[0].select('span.count')
        review_dict['static']['user']['positive'] = int(scores[0].get_text().replace(',',''))
        review_dict['static']['user']['mixed'] = int(scores[1].get_text().replace(',',''))
        review_dict['static']['user']['negative'] = int(scores[2].get_text().replace(',',''))
        
        scores = score_distribution[1].select('span.count')
        review_dict['static']['critic']['positive'] = int(scores[0].get_text().replace(',',''))
        review_dict['static']['critic']['mixed'] = int(scores[1].get_text().replace(',',''))
        review_dict['static']['critic']['negative'] = int(scores[2].get_text().replace(',',''))
        
        review_dict['static']['user']['total'] = int(review_page.select_one('div.score_summary strong').get_text().split()[0])
    except:
        pass

    # Scrape User reviews and scores
    
    try:
        index = 0
        
        for index, review in enumerate(review_page.select_one('.user_reviews').select('div.review_content')):

            review = review.select_one('.review_section')
            wrapped_reviews = review.select('div.review_body span')

            review_text = ''
            review_score = int(review.select_one('div.review_grade div').get_text())

            if len(wrapped_reviews) > 2:
                review_text = wrapped_reviews[2].get_text()
                if not review_text:
                    review_text = wrapped_reviews[1].get_text()
            elif wrapped_reviews:
                review_text = wrapped_reviews[0].get_text()
            else:
                continue

            review_dict[index] = {'score' : review_score, 'review' : review_text}
            
        review_dict['review number'] = index
    except:
        pass
    
    # File dump(pickle)
    
    joblib.dump(review_dict, f'WorstReview/{file_number}.pkl')
    
    file_number += 1

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [07:17<00:00,  8.74s/it]


### Close Webdriver

In [14]:
driver.close()
driver.quit()

### Check Result(random)

In [17]:
result_check = joblib.load('WorstReview/22.pkl')
result_check

{'platform': 'PC',
 'title': 'Final Fantasy XIV: Shadowbringers',
 'publisher': 'Square Enix',
 'static': {'user': {'positive': 1209,
   'mixed': 26,
   'negative': 74,
   'total': 1309,
   'mean': 9.2},
  'critic': {'positive': 24,
   'mixed': 1,
   'negative': 0,
   'total': 25,
   'mean': 90}},
 'detail': {'summary': 'Take part in the next saga of FINAL FANTASY® XIV Online with the next legendary expansion pack—SHADOWBRINGERS.\n\nNew jobs: Gunbreaker and Dancer\nNew races: Viera and Hrothgar\nLevel cap increased: 70 to 80\nNew cities: The Crystarium and Eulmore\nJourney through expansive new areas, such as the Rak’tika Greatwood, Amh Araeng, Il Mheg and Lakeland.\nNew trails, including harrowing encounters with Titania and Innocence.\nNew beast tribes: pixies, Nu Mou, and dwarves\nNew 8-player raid: Eden\nAn exciting new series of alliance raids YoRHa:Dark Apocalypse\nThe Restoration of the Holy See of Ishgard - In the aftermath of the Dragonsong War, Disciples of the Hand and Land 