# PROJETO ANÁLISE - WEB SCRAPING - STEAM

In [1]:
# !python3 -m pip install BeautifulSoup4==4.13.3
# !python3 -m pip install scikit-learn==1.6.1
# !python3 -m pip install selenium==4.29.0
# !python3 -m pip install webdriver_manager

In [2]:
# Imports necessários
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from functions import *
import os
import pprint
import time

In [3]:
# Testando funcionamento da função baixar página para um jogo qualquer
url = "https://store.steampowered.com/app/570940/DARK_SOULS_REMASTERED/"
pagina = baixar_pagina(url)

lista_generos = []
# Testando funcionamento da função extrair gênero para o mesmo jogo qualquer
extrair_genero(pagina, lista_generos)

['Souls-like',
 'Dark Fantasy',
 'RPG',
 'Difficult',
 'Action',
 'Dark',
 'Fantasy',
 'Atmospheric',
 'Great Soundtrack',
 'Action RPG',
 'Lore-Rich',
 'Adventure',
 'Multiplayer',
 'Exploration',
 'Story Rich',
 'Third Person',
 'Replay Value',
 'Character Customization',
 'Soundtrack',
 'Co-op']

## Inicializando o driver do Selenium

In [4]:
try:
    driver.quit()
    print('Fechando o driver')
except Exception as e:
    pass

print('Inicializando o driver')

# Inicializa o driver do Selenium
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless') # ensure GUI is off
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
driver.set_page_load_timeout(60)

Inicializando o driver


## Pega informação dos jogos do carrossel principal - "Destaque"

In [5]:
# Pega pagina inicial da Steam
url = "https://store.steampowered.com/"
driver.get(url)
driver.implicitly_wait(10)

In [6]:
# Inicializa o Beautiful Soup
soup = BeautifulSoup(driver.page_source, 'html.parser')

carousel_items = {}

all_carousel = soup.find_all('a', class_="store_main_capsule")

g_id = 0
for game in all_carousel:
    g_name = game.find('div', class_="app_name").text
    g_reason = game.find('div', class_="reason").text
    g_status = check_if_available(g_reason)
    href = game['href']

    carousel_items[g_id] = {
        'name': g_name,
        'page': href,
        'status': g_status,
    }

    g_id += 1

pprint.pp(carousel_items)

{0: {'name': 'Counter-Strike 2',
     'page': 'https://store.steampowered.com/app/730/CounterStrike_2/?snr=1_4_4__129_1',
     'status': None},
 1: {'name': 'THRONE AND LIBERTY',
     'page': 'https://store.steampowered.com/app/2429640/THRONE_AND_LIBERTY/?snr=1_4_4__129_2',
     'status': None},
 2: {'name': 'GrandChase',
     'page': 'https://store.steampowered.com/app/985810/GrandChase/?snr=1_4_4__129_3',
     'status': None},
 3: {'name': 'Battlefield™ 2042',
     'page': 'https://store.steampowered.com/app/1517290/Battlefield_2042/?snr=1_4_4__129_4',
     'status': None},
 4: {'name': 'Dota 2',
     'page': 'https://store.steampowered.com/app/570/Dota_2/?snr=1_4_4__129_5',
     'status': None},
 5: {'name': "Baldur's Gate 3",
     'page': 'https://store.steampowered.com/app/1086940/Baldurs_Gate_3/?snr=1_4_4__129_6',
     'status': None},
 6: {'name': 'Diablo® IV',
     'page': 'https://store.steampowered.com/app/2344520/Diablo_IV/?snr=1_4_4__129_7',
     'status': None},
 7: {'name

## Pega informação dos jogos mais vendidos por país

In [7]:
# Pega pagina de jogos mais vendidos globalmente da Steam
url_mais_vendidos_global = "https://store.steampowered.com/charts/topselling/global"
driver.get(url_mais_vendidos_global)
driver.implicitly_wait(10)

In [8]:
# Testando captura dos jogos apenas para o ranking global
soup = BeautifulSoup(driver.page_source, 'html.parser')

top50_global = {}

all_games = soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")

limite = 50
for i, game in enumerate(all_games[:limite]):
    top50_global[i] = game.text
top50_global

{}

In [9]:
siglas_paises = {
    'Global': 'global', 
    'Alemanha': 'DE',
    'Austrália': 'AU',
    'Brasil': 'BR',
    'Bélgica': 'BE',
    'Canadá': 'CA',
    'China': 'CN',
    'Coreia do Sul': 'KR',
    'Dinamarca': 'DK',
    'Espanha': 'ES',
    'Estados Unidos': 'US',
    'Rússia': 'RU',
    'Finlândia': 'FI',
    'França': 'FR',
    'Hong Kong': 'HK',
    'Itália': 'IT',
    'Japão': 'JP',
    'Noruega': 'NO',
    'Nova Zelândia': 'NZ',
    'Países Baixos': 'NL',
    'Polônia': 'PL',
    'Reino Unido': 'GB',
    'República Tcheca': 'CZ',
    'Singapura': 'SG',
    'Suécia': 'SE',
    'Suíça': 'CH',
    'Tailândia': 'TH',
    'Taiwan': 'TW',
    'Turquia': 'TR',
    'Áustria': 'AT'
}

top50_por_pais = {}

for pais, sigla in siglas_paises.items():
    # print(f'Lendo página: {pais}')
    url_pais = f"https://store.steampowered.com/charts/topselling/{sigla}"
    driver.get(url_pais)    
    driver.implicitly_wait(10)

    time.sleep(1.7)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    all_games = soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")

    limite = 50
    dict_pais = {}
    for i, game in enumerate(all_games[:limite]):
        dict_pais[i] = game.text
        
    top50_por_pais[pais] = dict_pais

# top50_por_pais

In [10]:
for g in all_games:
    print(g.text)

Counter-Strike 2
Steam Deck
DOOM: The Dark Ages
Clair Obscur: Expedition 33
Schedule I
War Thunder
EA SPORTS FC™ 25
Stellaris
The Elder Scrolls IV: Oblivion Remastered
EVERSPACE™ 2
Drive Beyond Horizons
R.E.P.O.
Cash Cleaner Simulator
ELDEN RING NIGHTREIGN
Dune: Awakening
Kingdom Come: Deliverance II
The Elder Scrolls® Online
Destiny 2
EVERSPACE™ 2 - Wrath of the Ancients
Aviassembly
Tower Dominion
Apex Legends™
No Rest for the Wicked
THRONE AND LIBERTY
EVERSPACE™ 2: Expansion Bundle
Hearts of Iron IV
V Rising
STAR WARS Jedi: Survivor™
Timberborn
Crusader Kings III
Rust
Farming Simulator 25
Tom Clancy's Rainbow Six® Siege
PUBG: BATTLEGROUNDS
Path of Exile 2
Age of Wonders 4
Yu-Gi-Oh! Master Duel
Europa Universalis IV
Baldur's Gate 3
Dead by Daylight
Cyberpunk 2077
Train Sim World® 5
RoadCraft
Call of Duty®: Warzone™
It Takes Two
Black Mesa
DARK SOULS™ III
iRacing
Car Mechanic Simulator 2021
HELLDIVERS™ 2
ELDEN RING
FOUNDRY
Marvel Rivals
Stellaris: Ultimate Bundle
Frostpunk 2
METAL GEAR

In [11]:
# Verifica se algum pais ficou com jogos a menos
for pais, jogos in top50_por_pais.items():
    if len(jogos) < 50:
        print(f'Erro na leitura de: {pais}')

In [12]:
# Montando dataframe a partir do scraping feito
df_ranking = pd.DataFrame.from_dict(top50_por_pais, orient='index').transpose()
df_ranking['Ranking'] = [i for i in range(1, 51)]
cols = ['Ranking'] + [col for col in df_ranking.columns if col != 'Ranking']
df_ranking = df_ranking[cols]
df_ranking.head(10)

Unnamed: 0,Ranking,Global,Alemanha,Austrália,Brasil,Bélgica,Canadá,China,Coreia do Sul,Dinamarca,...,Polônia,Reino Unido,República Tcheca,Singapura,Suécia,Suíça,Tailândia,Taiwan,Turquia,Áustria
0,1,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,DOOM: The Dark Ages,Steam Deck,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,...,Counter-Strike 2,Steam Deck,Counter-Strike 2,Path of Exile 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2
1,2,DOOM: The Dark Ages,Steam Deck,DOOM: The Dark Ages,EA SPORTS FC™ 25,Clair Obscur: Expedition 33,DOOM: The Dark Ages,Delta Force,PUBG: BATTLEGROUNDS,DOOM: The Dark Ages,...,Steam Deck,Counter-Strike 2,DOOM: The Dark Ages,Counter-Strike 2,DOOM: The Dark Ages,Clair Obscur: Expedition 33,NARAKA: BLADEPOINT,Clair Obscur: Expedition 33,EA SPORTS FC™ 25,Steam Deck
2,3,Steam Deck,DOOM: The Dark Ages,Steam Deck,R.E.P.O.,Steam Deck,Counter-Strike 2,DOOM: The Dark Ages,Once Human,Clair Obscur: Expedition 33,...,DOOM: The Dark Ages,DOOM: The Dark Ages,Kingdom Come: Deliverance II,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,DOOM: The Dark Ages,Clair Obscur: Expedition 33,Once Human,DOOM: The Dark Ages,DOOM: The Dark Ages
3,4,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,eFootball™,Schedule I,Clair Obscur: Expedition 33,PUBG: BATTLEGROUNDS,Clair Obscur: Expedition 33,Schedule I,...,Clair Obscur: Expedition 33,Schedule I,War Thunder,DOOM: The Dark Ages,Stellaris,Schedule I,eFootball™,DOOM: The Dark Ages,NBA 2K25,Clair Obscur: Expedition 33
4,5,Schedule I,Schedule I,Destiny 2,NBA 2K25,Destiny 2,The Elder Scrolls IV: Oblivion Remastered,雀魂麻將(MahjongSoul)(indisponível na sua região),Eternal Return,Steam Deck,...,EA SPORTS FC™ 25,Clair Obscur: Expedition 33,Steam Deck,EA SPORTS FC™ 25,Steam Deck,War Thunder,The Sims™ 4,PUBG: BATTLEGROUNDS,PUBG: BATTLEGROUNDS,Schedule I
5,6,Destiny 2,Stellaris,The Elder Scrolls IV: Oblivion Remastered,Clair Obscur: Expedition 33,Cash Cleaner Simulator,Schedule I,NBA 2K25,DOOM: The Dark Ages,Stellaris,...,War Thunder,Stellaris,Clair Obscur: Expedition 33,Dota 2,War Thunder,EA SPORTS FC™ 25,Path of Exile 2,NBA 2K25,Kingdom Come: Deliverance II,War Thunder
6,7,Stellaris,War Thunder,Stellaris,DOOM: The Dark Ages,Stellaris,Destiny 2,Apex Legends™,R.E.P.O.,EA SPORTS FC™ 25,...,Schedule I,Destiny 2,Schedule I,NARAKA: BLADEPOINT,Schedule I,Stellaris,R.E.P.O.,雀魂麻將(MahjongSoul)(indisponível na sua região),eFootball™,EA SPORTS FC™ 25
7,8,The Elder Scrolls IV: Oblivion Remastered,EVERSPACE™ 2,War Thunder,V Rising,Drive Beyond Horizons,Stellaris,WUCHANG: Fallen Feathers,GrandChase,War Thunder,...,R.E.P.O.,War Thunder,Stellaris,R.E.P.O.,Stellaris: Ultimate Bundle,Kingdom Come: Deliverance II,DOOM: The Dark Ages,V Rising,R.E.P.O.,Stellaris
8,9,R.E.P.O.,EA SPORTS FC™ 25,Marvel Rivals,Cuphead,Counter-Strike 2,V Rising,Clair Obscur: Expedition 33,eFootball™,Dune: Awakening,...,Stellaris,EA SPORTS FC™ 25,Cash Cleaner Simulator,Schedule I,R.E.P.O.,Drive Beyond Horizons,Once Human,NARAKA: BLADEPOINT,NARAKA: BLADEPOINT,The Elder Scrolls IV: Oblivion Remastered
9,10,EA SPORTS FC™ 25,Aviassembly,V Rising,The Last of Us™ Parte II Remastered,R.E.P.O.,Dune: Awakening,Kingdom Come: Deliverance II,Sephiria,R.E.P.O.,...,STAR WARS Jedi: Survivor™,Dune: Awakening,Mafia: The Old Country,The Elder Scrolls IV: Oblivion Remastered,Destiny 2,R.E.P.O.,EA SPORTS FC™ 25,Apex Legends™,Clair Obscur: Expedition 33,EVERSPACE™ 2


In [19]:
url_pais = f"https://store.steampowered.com/charts/topselling/global"
driver.get(url_pais)
driver.implicitly_wait(10)

botoes_jogos = driver.find_elements(By.CLASS_NAME, "_2-RN6nWOY56sNmcDHu069P")
jogos_generos = {}
lista_generos = []

count = 0
for i, _ in enumerate(botoes_jogos[:limite]):
    if count == 20: 
        break

    time.sleep(2)
    botoes_jogos = driver.find_elements(By.CLASS_NAME, "_2-RN6nWOY56sNmcDHu069P")
    botoes_jogos[i].click()
    time.sleep(2)

    driver.implicitly_wait(10)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    if 'steamdeck' not in driver.current_url.split('/'):
        pagina = baixar_pagina(driver.current_url)
        top5_generos = extrair_genero(pagina, lista_generos)[0:4]

        # Automatiza colocar uma data de nascimento valida
        # Para jogos +18
        try:
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            nome_jogo = soup.find('div', class_="apphub_AppName").text
        except Exception as e:
            # Insere data valida (1996)
            botao_1996 = driver.find_element(By.XPATH, '//*[@id="ageYear"]/option[97]')
            botao_1996.click()
            # Entra na pagina
            botao_ViewPage = driver.find_element(By.XPATH, '//*[@id="view_product_page_btn"]/span')
            botao_ViewPage.click()
            time.sleep(1)
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            nome_jogo = soup.find('div', class_="apphub_AppName").text

        jogos_generos[nome_jogo] = top5_generos
    else:
        count -= 1

    lista_generos = []
    top5_generos = []
    count += 1
    
    url_pais = f"https://store.steampowered.com/charts/topselling/global"
    driver.get(url_pais)
    driver.implicitly_wait(10)

In [20]:
jogos_generos

{'Counter-Strike 2': ['FPS', 'Shooter', 'Multiplayer', 'Competitive'],
 'DOOM: The Dark Ages': ['Action', 'FPS', 'Demons', 'Dark Fantasy'],
 'Clair Obscur: Expedition 33': ['Turn-Based Combat',
  'Story Rich',
  'Fantasy',
  'Exploration'],
 'Schedule I': ['Simulation', 'Co-op', 'Crime', 'Multiplayer'],
 'Destiny 2': ['Free to Play', 'Open World', 'Looter Shooter', 'Multiplayer'],
 'Stellaris': ['Space', 'Grand Strategy', 'Strategy', 'Sci-fi'],
 'The Elder Scrolls IV: Oblivion Remastered': ['RPG',
  'Open World',
  'Singleplayer',
  'Fantasy'],
 'R.E.P.O.': ['Horror', 'Online Co-Op', 'Multiplayer', 'Comedy'],
 'EA SPORTS FC™ 25': ['Sports',
  'Simulation',
  'Football (Soccer)',
  'Controller'],
 'Apex Legends™': ['Free to Play', 'Battle Royale', 'Multiplayer', 'FPS'],
 'War Thunder': ['Free to Play', 'Simulation', 'Vehicular Combat', 'Combat'],
 'Marvel Rivals': ['Free to Play',
  'Multiplayer',
  'Hero Shooter',
  'Third-Person Shooter'],
 'NBA 2K25': ['Sports', 'Basketball', 'Simula

In [21]:
df_generos_top50 = pd.DataFrame.from_dict(jogos_generos, orient='index')
df_generos_top50['Jogo'] = df_generos_top50.index
cols = ['Gênero 1', 'Gênero 2', 'Gênero 3', 'Gênero 4', 'Jogo']
df_generos_top50.columns = cols
df_generos_top50 = df_generos_top50.reset_index(drop=True)
df_generos_top50 = df_generos_top50.iloc[:, [4, 0, 1, 2, 3]] 

In [22]:
df_generos_top50

Unnamed: 0,Jogo,Gênero 1,Gênero 2,Gênero 3,Gênero 4
0,Counter-Strike 2,FPS,Shooter,Multiplayer,Competitive
1,DOOM: The Dark Ages,Action,FPS,Demons,Dark Fantasy
2,Clair Obscur: Expedition 33,Turn-Based Combat,Story Rich,Fantasy,Exploration
3,Schedule I,Simulation,Co-op,Crime,Multiplayer
4,Destiny 2,Free to Play,Open World,Looter Shooter,Multiplayer
5,Stellaris,Space,Grand Strategy,Strategy,Sci-fi
6,The Elder Scrolls IV: Oblivion Remastered,RPG,Open World,Singleplayer,Fantasy
7,R.E.P.O.,Horror,Online Co-Op,Multiplayer,Comedy
8,EA SPORTS FC™ 25,Sports,Simulation,Football (Soccer),Controller
9,Apex Legends™,Free to Play,Battle Royale,Multiplayer,FPS
