# PROJETO ANÁLISE - WEB SCRAPING - STEAM

In [1]:
# !python3 -m pip install BeautifulSoup4==4.13.3
# !python3 -m pip install scikit-learn==1.6.1
# !python3 -m pip install selenium==4.29.0
# !python3 -m pip install webdriver_manager

In [None]:
# Imports necessários
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from functions import *
import os
import pprint
import time

In [79]:
# Testando funcionamento da função baixar página para um jogo qualquer
url = "https://store.steampowered.com/app/570940/DARK_SOULS_REMASTERED/"
pagina = baixar_pagina(url)

lista_generos = []
# Testando funcionamento da função extrair gênero para o mesmo jogo qualquer
extrair_genero(pagina, lista_generos)

['Souls-like',
 'Dark Fantasy',
 'RPG',
 'Difficult',
 'Action',
 'Dark',
 'Fantasy',
 'Atmospheric',
 'Great Soundtrack',
 'Action RPG',
 'Lore-Rich',
 'Adventure',
 'Multiplayer',
 'Exploration',
 'Story Rich',
 'Third Person',
 'Replay Value',
 'Character Customization',
 'Soundtrack',
 'Co-op']

## Inicializando o driver do Selenium

In [95]:
try:
    driver.quit()
    print('Fechando o driver')
except Exception as e:
    pass

print('Inicializando o driver')

# Inicializa o driver do Selenium
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless') # ensure GUI is off
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
driver.set_page_load_timeout(60)

Fechando o driver
Inicializando o driver


## Pega informação dos jogos do carrossel principal - "Destaque"

In [4]:
# Pega pagina inicial da Steam
url = "https://store.steampowered.com/"
driver.get(url)
driver.implicitly_wait(10)

In [5]:
# Inicializa o Beautiful Soup
soup = BeautifulSoup(driver.page_source, 'html.parser')

carousel_items = {}

all_carousel = soup.find_all('a', class_="store_main_capsule")

g_id = 0
for game in all_carousel:
    g_name = game.find('div', class_="app_name").text
    g_reason = game.find('div', class_="reason").text
    g_status = check_if_available(g_reason)
    href = game['href']

    carousel_items[g_id] = {
        'name': g_name,
        'page': href,
        'status': g_status,
    }

    g_id += 1

pprint.pp(carousel_items)

{0: {'name': 'DARK SOULS™: REMASTERED',
     'page': 'https://store.steampowered.com/app/570940/DARK_SOULS_REMASTERED/?snr=1_4_4__129_1',
     'status': 'Full Release'},
 1: {'name': 'Destiny 2',
     'page': 'https://store.steampowered.com/app/1085660/Destiny_2/?snr=1_4_4__129_2',
     'status': 'Full Release'},
 2: {'name': 'NBA 2K25',
     'page': 'https://store.steampowered.com/app/2878980/NBA_2K25/?snr=1_4_4__129_3',
     'status': 'Full Release'},
 3: {'name': 'Cuphead',
     'page': 'https://store.steampowered.com/app/268910/Cuphead/?snr=1_4_4__129_4',
     'status': 'Full Release'},
 4: {'name': 'Crusader Kings III',
     'page': 'https://store.steampowered.com/app/1158310/Crusader_Kings_III/?snr=1_4_4__129_5',
     'status': 'Full Release'},
 5: {'name': 'Clair Obscur: Expedition 33',
     'page': 'https://store.steampowered.com/app/1903340/Clair_Obscur_Expedition_33/?snr=1_4_4__129_6',
     'status': 'Full Release'},
 6: {'name': 'GrandChase',
     'page': 'https://store.stea

## Pega informação dos jogos mais vendidos por país

In [49]:
# Pega pagina de jogos mais vendidos globalmente da Steam
url_mais_vendidos_global = "https://store.steampowered.com/charts/topselling/global"
driver.get(url_mais_vendidos_global)
driver.implicitly_wait(10)

In [51]:
# Testando captura dos jogos apenas para o ranking global
soup = BeautifulSoup(driver.page_source, 'html.parser')

top50_global = {}

all_games = soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")

limite = 50
for i, game in enumerate(all_games[:limite]):
    top50_global[i] = game.text
top50_global

{0: 'Counter-Strike 2',
 1: 'DOOM: The Dark Ages',
 2: 'Steam Deck',
 3: 'Clair Obscur: Expedition 33',
 4: 'Schedule I',
 5: 'Destiny 2',
 6: 'Stellaris',
 7: 'The Elder Scrolls IV: Oblivion Remastered',
 8: 'R.E.P.O.',
 9: 'EA SPORTS FC™ 25',
 10: 'Apex Legends™',
 11: 'War Thunder',
 12: 'V Rising',
 13: 'NBA 2K25',
 14: 'PUBG: BATTLEGROUNDS',
 15: 'Dune: Awakening',
 16: 'Crusader Kings III',
 17: 'Marvel Rivals',
 18: 'Warframe',
 19: 'Drive Beyond Horizons',
 20: 'Cash Cleaner Simulator',
 21: 'Kingdom Come: Deliverance II',
 22: 'Delta Force',
 23: 'ELDEN RING NIGHTREIGN',
 24: 'Dead by Daylight',
 25: "Baldur's Gate 3",
 26: 'Once Human',
 27: 'Stellaris: Ultimate Bundle',
 28: 'Rust',
 29: 'The Sims™ 4',
 30: 'Dota 2',
 31: 'Hearts of Iron IV',
 32: 'The Elder Scrolls® Online',
 33: 'Split Fiction',
 34: "Tom Clancy's Rainbow Six® Siege",
 35: 'STAR WARS Jedi: Survivor™',
 36: 'DARK SOULS™ III',
 37: 'HELLDIVERS™ 2',
 38: 'Blue Prince',
 39: 'ELDEN RING',
 40: 'Mafia Trilogy',

In [None]:
siglas_paises = {
    'Global': 'global', 
    'Alemanha': 'DE',
    'Austrália': 'AU',
    'Brasil': 'BR',
    'Bélgica': 'BE',
    'Canadá': 'CA',
    'China': 'CN',
    'Coreia do Sul': 'KR',
    'Dinamarca': 'DK',
    'Espanha': 'ES',
    'Estados Unidos': 'US',
    'Rússia': 'RU',
    'Finlândia': 'FI',
    'França': 'FR',
    'Hong Kong': 'HK',
    'Itália': 'IT',
    'Japão': 'JP',
    'Noruega': 'NO',
    'Nova Zelândia': 'NZ',
    'Países Baixos': 'NL',
    'Polônia': 'PL',
    'Reino Unido': 'GB',
    'República Tcheca': 'CZ',
    'Singapura': 'SG',
    'Suécia': 'SE',
    'Suíça': 'CH',
    'Tailândia': 'TH',
    'Taiwan': 'TW',
    'Turquia': 'TR',
    'Áustria': 'AT'
}

top50_por_pais = {}

for pais, sigla in siglas_paises.items():
    # print(f'Lendo página: {pais}')
    url_pais = f"https://store.steampowered.com/charts/topselling/{sigla}"
    driver.get(url_pais)    
    driver.implicitly_wait(10)

    time.sleep(1.7)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    all_games = soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")

    limite = 50
    dict_pais = {}
    for i, game in enumerate(all_games[:limite]):
        dict_pais[i] = game.text
        
    top50_por_pais[pais] = dict_pais

# top50_por_pais

In [54]:
for g in all_games:
    print(g.text)

Counter-Strike 2
DOOM: The Dark Ages
Clair Obscur: Expedition 33
Steam Deck
Schedule I
Stellaris
The Elder Scrolls IV: Oblivion Remastered
War Thunder
EA SPORTS FC™ 25
Drive Beyond Horizons
R.E.P.O.
Kingdom Come: Deliverance II
Cash Cleaner Simulator
Destiny 2
Crusader Kings III
Lost Ark
Baldur's Gate 3
V Rising
Dune: Awakening
Aviassembly
THRONE AND LIBERTY
Hearts of Iron IV
EVERSPACE™ 2
Apex Legends™
ELDEN RING NIGHTREIGN
Anno Franchise
Tower Dominion
Yu-Gi-Oh! Master Duel
The Elder Scrolls® Online
Clair Obscur: Expedition 33 + Dead Cells
Age of Wonders 4
The Sims™ 4
Rust
Tom Clancy's Rainbow Six® Siege
Car Mechanic Simulator 2021
No Rest for the Wicked
EVERSPACE™ 2 - Wrath of the Ancients
Timberborn
inZOI
Equinox: Homecoming
Farming Simulator 25
Train Sim World® 5
The Universim - Collector's Edition Bundle (Vol 1)
Warframe
STAR WARS Jedi: Survivor™
Frostpunk 2
STAR WARS™ Jedi Bundle
Blue Prince
Cyberpunk 2077
The Hundred Line -Last Defense Academy-
Euro Truck Simulator 2
Drop Duchy


In [42]:
# Verifica se algum pais ficou com jogos a menos
for pais, jogos in top50_por_pais.items():
    if len(jogos) < 50:
        print(f'Erro na leitura de: {pais}')

In [55]:
# Montando dataframe a partir do scraping feito
df_ranking = pd.DataFrame.from_dict(top50_por_pais, orient='index').transpose()
df_ranking['Ranking'] = [i for i in range(1, 51)]
cols = ['Ranking'] + [col for col in df_ranking.columns if col != 'Ranking']
df_ranking = df_ranking[cols]
df_ranking.head(10)

Unnamed: 0,Ranking,Global,Alemanha,Austrália,Brasil,Bélgica,Canadá,China,Coreia do Sul,Dinamarca,...,Polônia,Reino Unido,República Tcheca,Singapura,Suécia,Suíça,Tailândia,Taiwan,Turquia,Áustria
0,1,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,DOOM: The Dark Ages,Steam Deck,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,...,Counter-Strike 2,Steam Deck,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2
1,2,DOOM: The Dark Ages,Steam Deck,DOOM: The Dark Ages,EA SPORTS FC™ 25,Steam Deck,Counter-Strike 2,Delta Force,PUBG: BATTLEGROUNDS,DOOM: The Dark Ages,...,Steam Deck,Counter-Strike 2,DOOM: The Dark Ages,Path of Exile 2,DOOM: The Dark Ages,Clair Obscur: Expedition 33,NARAKA: BLADEPOINT,Clair Obscur: Expedition 33,EA SPORTS FC™ 25,DOOM: The Dark Ages
2,3,Steam Deck,DOOM: The Dark Ages,Steam Deck,R.E.P.O.,Clair Obscur: Expedition 33,DOOM: The Dark Ages,DOOM: The Dark Ages,Once Human,Clair Obscur: Expedition 33,...,Clair Obscur: Expedition 33,DOOM: The Dark Ages,Steam Deck,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,DOOM: The Dark Ages,Clair Obscur: Expedition 33,Once Human,DOOM: The Dark Ages,Clair Obscur: Expedition 33
3,4,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,NBA 2K25,Schedule I,Clair Obscur: Expedition 33,PUBG: BATTLEGROUNDS,Clair Obscur: Expedition 33,Schedule I,...,DOOM: The Dark Ages,Schedule I,Kingdom Come: Deliverance II,DOOM: The Dark Ages,Steam Deck,Schedule I,R.E.P.O.,V Rising,NBA 2K25,Steam Deck
4,5,Schedule I,Schedule I,Destiny 2,V Rising,The Elder Scrolls IV: Oblivion Remastered,The Elder Scrolls IV: Oblivion Remastered,雀魂麻將(MahjongSoul)(not available in your region),Eternal Return,Stellaris,...,EA SPORTS FC™ 25,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,Marvel Rivals,Stellaris,Stellaris,V Rising,DOOM: The Dark Ages,PUBG: BATTLEGROUNDS,Schedule I
5,6,Destiny 2,Stellaris,Stellaris,Clair Obscur: Expedition 33,Drive Beyond Horizons,Stellaris,NBA 2K25,DOOM: The Dark Ages,Apex Legends™,...,War Thunder,Stellaris,War Thunder,EA SPORTS FC™ 25,Schedule I,Kingdom Come: Deliverance II,The Sims™ 4,PUBG: BATTLEGROUNDS,R.E.P.O.,Stellaris
6,7,Stellaris,War Thunder,The Elder Scrolls IV: Oblivion Remastered,eFootball™,Cash Cleaner Simulator,Destiny 2,Apex Legends™,R.E.P.O.,Steam Deck,...,Stellaris,Destiny 2,Schedule I,Stellaris: Ultimate Bundle,War Thunder,EA SPORTS FC™ 25,Path of Exile 2,NBA 2K25,Kingdom Come: Deliverance II,The Elder Scrolls IV: Oblivion Remastered
7,8,The Elder Scrolls IV: Oblivion Remastered,EA SPORTS FC™ 25,War Thunder,DOOM: The Dark Ages,Destiny 2,Schedule I,WUCHANG: Fallen Feathers,Sephiria,Cash Cleaner Simulator,...,Schedule I,War Thunder,Stellaris,R.E.P.O.,V Rising,Crusader Kings III,eFootball™,雀魂麻將(MahjongSoul)(not available in your region),War Thunder,War Thunder
8,9,R.E.P.O.,Cash Cleaner Simulator,Marvel Rivals,Cuphead,Stellaris,V Rising,EA SPORTS FC™ 25,eFootball™,Dune: Awakening,...,R.E.P.O.,EA SPORTS FC™ 25,Cash Cleaner Simulator,Apex Legends™,R.E.P.O.,War Thunder,PUBG: BATTLEGROUNDS,Nine Sols,eFootball™,EA SPORTS FC™ 25
9,10,EA SPORTS FC™ 25,EVERSPACE™ 2,V Rising,Dead by Daylight,Counter-Strike 2,Crusader Kings III,Clair Obscur: Expedition 33,Apex Legends™,R.E.P.O.,...,Crusader Kings III,Crusader Kings III,EA SPORTS FC™ 25,Schedule I,Stellaris: Ultimate Bundle,Stellaris: Ultimate Bundle,EA SPORTS FC™ 25,NARAKA: BLADEPOINT,Schedule I,Drive Beyond Horizons


In [105]:
url_pais = f"https://store.steampowered.com/charts/topselling/global"
driver.get(url_pais)
driver.implicitly_wait(10)

botoes_jogos = driver.find_elements(By.CLASS_NAME, "_2-RN6nWOY56sNmcDHu069P")
jogos_generos = {}
lista_generos = []

count = 0
for i, _ in enumerate(botoes_jogos[:limite]):
    if count == 5: 
        break

    time.sleep(2)
    botoes_jogos = driver.find_elements(By.CLASS_NAME, "_2-RN6nWOY56sNmcDHu069P")
    botoes_jogos[i].click()
    time.sleep(2)

    driver.implicitly_wait(10)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    print(driver.current_url)

    if 'steamdeck' not in driver.current_url.split('/'):
        pagina = baixar_pagina(driver.current_url)
        top5_generos = extrair_genero(pagina, lista_generos)[0:4]

        # Automatiza colocar uma data de nascimento valida
        # Para jogos +18
        try:
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            nome_jogo = soup.find('div', class_="apphub_AppName").text
        except Exception as e:
            # Insere data valida (1996)
            botao_1996 = driver.find_element(By.XPATH, '//*[@id="ageYear"]/option[97]')
            botao_1996.click()
            # Entra na pagina
            botao_ViewPage = driver.find_element(By.XPATH, '//*[@id="view_product_page_btn"]/span')
            botao_ViewPage.click()
            time.sleep(1)
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            nome_jogo = soup.find('div', class_="apphub_AppName").text

        print(nome_jogo)
        print(top5_generos)
        print("*"*50 + '\n')
        jogos_generos[nome_jogo] = top5_generos
    else:
        count -= 1

    lista_generos = []
    top5_generos = []
    count += 1
    
    url_pais = f"https://store.steampowered.com/charts/topselling/global"
    driver.get(url_pais)
    driver.implicitly_wait(10)

https://store.steampowered.com/app/730/CounterStrike_2/
Counter-Strike 2
['FPS', 'Shooter', 'Multiplayer', 'Competitive']
**************************************************

https://store.steampowered.com/app/3017860/DOOM_The_Dark_Ages/
DOOM: The Dark Ages
['Action', 'FPS', 'Demons', 'Dark Fantasy']
**************************************************

https://store.steampowered.com/steamdeck
https://store.steampowered.com/app/1903340/Clair_Obscur_Expedition_33/
Clair Obscur: Expedition 33
['Turn-Based Combat', 'Story Rich', 'Fantasy', 'Exploration']
**************************************************

https://store.steampowered.com/app/3164500/Schedule_I/
Schedule I
['Simulation', 'Co-op', 'Crime', 'Multiplayer']
**************************************************

https://store.steampowered.com/app/1085660/Destiny_2/
Destiny 2
['Free to Play', 'Open World', 'Looter Shooter', 'Multiplayer']
**************************************************



In [106]:
jogos_generos

{'Counter-Strike 2': ['FPS', 'Shooter', 'Multiplayer', 'Competitive'],
 'DOOM: The Dark Ages': ['Action', 'FPS', 'Demons', 'Dark Fantasy'],
 'Clair Obscur: Expedition 33': ['Turn-Based Combat',
  'Story Rich',
  'Fantasy',
  'Exploration'],
 'Schedule I': ['Simulation', 'Co-op', 'Crime', 'Multiplayer'],
 'Destiny 2': ['Free to Play', 'Open World', 'Looter Shooter', 'Multiplayer']}

In [107]:
df_generos_top50 = pd.DataFrame.from_dict(jogos_generos, orient='index')
df_generos_top50['Jogo'] = df_generos_top50.index
cols = ['Gênero 1', 'Gênero 2', 'Gênero 3', 'Gênero 4', 'Jogo']
df_generos_top50.columns = cols
df_generos_top50 = df_generos_top50.reset_index(drop=True)
df_generos_top50 = df_generos_top50.iloc[:, [4, 0, 1, 2, 3]] 

In [116]:
df_generos_top50

Unnamed: 0,Jogo,Gênero 1,Gênero 2,Gênero 3,Gênero 4
0,Counter-Strike 2,FPS,Shooter,Multiplayer,Competitive
1,DOOM: The Dark Ages,,,,
2,Clair Obscur: Expedition 33,,,,
3,Steam Deck,Turn-Based Combat,Story Rich,Fantasy,Exploration
4,Schedule I,Simulation,Co-op,Crime,Multiplayer
