# PROJETO ANÁLISE - WEB SCRAPING - STEAM

In [1]:
# !python3 -m pip install BeautifulSoup4==4.13.3
# !python3 -m pip install scikit-learn==1.6.1
# !python3 -m pip install selenium==4.29.0
# !python3 -m pip install webdriver_manager

In [47]:
# Imports necessários
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from functions import *
import pprint
import time

In [2]:
# Testando funcionamento da função baixar página para um jogo qualquer
url = "https://store.steampowered.com/app/570940/DARK_SOULS_REMASTERED/"
pagina = baixar_pagina(url)

lista_generos = []
# Testando funcionamento da função extrair gênero para o mesmo jogo qualquer
extrair_genero(pagina, lista_generos)

['Souls-like',
 'Dark Fantasy',
 'RPG',
 'Difficult',
 'Action',
 'Dark',
 'Fantasy',
 'Atmospheric',
 'Great Soundtrack',
 'Action RPG',
 'Lore-Rich',
 'Adventure',
 'Multiplayer',
 'Exploration',
 'Story Rich',
 'Third Person',
 'Replay Value',
 'Character Customization',
 'Soundtrack',
 'Co-op']

## Inicializando o driver do Selenium

In [25]:
# Inicializa o driver do Selenium
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless') # ensure GUI is off
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
driver.set_page_load_timeout(60)

## Pega informação dos jogos do carrossel principal - "Destaque"

In [4]:
# Pega pagina inicial da Steam
url = "https://store.steampowered.com/"
driver.get(url)
driver.implicitly_wait(10)

In [16]:
# Inicializa o Beautiful Soup
soup = BeautifulSoup(driver.page_source, 'html.parser')

carousel_items = {}

all_carousel = soup.find_all('a', class_="store_main_capsule")

g_id = 0
for game in all_carousel:
    g_name = game.find('div', class_="app_name").text
    g_reason = game.find('div', class_="reason").text
    g_status = check_if_available(g_reason)
    href = game['href']

    carousel_items[g_id] = {
        'name': g_name,
        'page': href,
        'status': g_status,
    }

    g_id += 1

pprint.pp(carousel_items)

{0: {'name': 'DARK SOULS™ III',
     'page': 'https://store.steampowered.com/app/374320/DARK_SOULS_III/?snr=1_4_4__129_1',
     'status': None},
 1: {'name': 'Dota 2',
     'page': 'https://store.steampowered.com/app/570/Dota_2/?snr=1_4_4__129_2',
     'status': None},
 2: {'name': 'ELDEN RING',
     'page': 'https://store.steampowered.com/app/1245620/ELDEN_RING/?snr=1_4_4__129_3',
     'status': None},
 3: {'name': 'Mortal Kombat 1',
     'page': 'https://store.steampowered.com/app/1971870/Mortal_Kombat_1/?snr=1_4_4__129_4',
     'status': None},
 4: {'name': 'GrandChase',
     'page': 'https://store.steampowered.com/app/985810/GrandChase/?snr=1_4_4__129_5',
     'status': None},
 5: {'name': 'Counter-Strike 2',
     'page': 'https://store.steampowered.com/app/730/CounterStrike_2/?snr=1_4_4__129_6',
     'status': None},
 6: {'name': 'EA SPORTS FC™ 25',
     'page': 'https://store.steampowered.com/app/2669320/EA_SPORTS_FC_25/?snr=1_4_4__129_7',
     'status': None},
 7: {'name': 'V Ri

## Pega informação dos jogos mais vendidos por país

In [26]:
# Pega pagina de jogos mais vendidos globalmente da Steam
url_mais_vendidos_global = "https://store.steampowered.com/charts/topselling/global"
driver.get(url_mais_vendidos_global)
driver.implicitly_wait(10)

In [41]:
# Testando captura dos jogos apenas para o ranking global
soup = BeautifulSoup(driver.page_source, 'html.parser')

top50_global = {}

all_games = soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")

limite = 50
for i, game in enumerate(all_games[:limite]):
    top50_global[i] = game.text
top50_global

{0: 'Steam Deck',
 1: 'Counter-Strike 2',
 2: 'DOOM: The Dark Ages',
 3: 'Clair Obscur: Expedition 33',
 4: 'The Elder Scrolls IV: Oblivion Remastered',
 5: 'Stellaris',
 6: 'Schedule I',
 7: 'Destiny 2',
 8: 'V Rising',
 9: 'Crusader Kings III',
 10: 'Marvel Rivals',
 11: 'Apex Legends™',
 12: 'War Thunder',
 13: 'Dune: Awakening',
 14: 'Kingdom Come: Deliverance II',
 15: 'R.E.P.O.',
 16: 'Warframe',
 17: 'Cash Cleaner Simulator',
 18: 'Drive Beyond Horizons',
 19: 'ELDEN RING NIGHTREIGN',
 20: 'Blue Prince',
 21: 'Dead by Daylight',
 22: 'The Sims™ 4',
 23: "Baldur's Gate 3",
 24: 'Clair Obscur: Expedition 33 + Dead Cells',
 25: 'MapleStory',
 26: 'Age of Wonders 4',
 27: 'Crusader Kings III: Collection',
 28: "Tom Clancy's Rainbow Six® Siege",
 29: 'The Elder Scrolls® Online',
 30: 'Rust',
 31: 'HELLDIVERS™ 2',
 32: 'STAR WARS Jedi: Survivor™',
 33: 'FINAL FANTASY VII REBIRTH',
 34: 'Deadzone: Rogue',
 35: 'EA SPORTS FC™ 25',
 36: 'Path of Exile 2',
 37: 'No Rest for the Wicked',
 

In [None]:
siglas_paises = {
    'Global': 'global', 
    'Alemanha': 'DE',
    'Austrália': 'AU',
    'Brasil': 'BR',
    'Bélgica': 'BE',
    'Canadá': 'CA',
    'China': 'CN',
    'Coreia do Sul': 'KR',
    'Dinamarca': 'DK',
    'Espanha': 'ES',
    'Estados Unidos': 'US',
    'Rússia': 'RU',
    'Finlândia': 'FI',
    'França': 'FR',
    'Hong Kong': 'HK',
    'Itália': 'IT',
    'Japão': 'JP',
    'Noruega': 'NO',
    'Nova Zelândia': 'NZ',
    'Países Baixos': 'NL',
    'Polônia': 'PL',
    'Reino Unido': 'GB',
    'República Tcheca': 'CZ',
    'Singapura': 'SG',
    'Suécia': 'SE',
    'Suíça': 'CH',
    'Tailândia': 'TH',
    'Taiwan': 'TW',
    'Turquia': 'TR',
    'Áustria': 'AT'
}

top50_por_pais = {}

for pais, sigla in siglas_paises.items():
    url_pais = f"https://store.steampowered.com/charts/topselling/{sigla}"
    driver.get(url_pais)
    driver.implicitly_wait(10)
    dict_pais = {}

    time.sleep(1)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    all_games = soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")

    limite = 50
    for i, game in enumerate(all_games[:limite]):
        dict_pais[i] = game.text 
    top50_por_pais[pais] = dict_pais

top50_por_pais

{'Global': {0: 'Counter-Strike 2',
  1: 'DOOM: The Dark Ages',
  2: 'Steam Deck',
  3: 'Clair Obscur: Expedition 33',
  4: 'Destiny 2',
  5: 'Schedule I',
  6: 'Stellaris',
  7: 'The Elder Scrolls IV: Oblivion Remastered',
  8: 'R.E.P.O.',
  9: 'EA SPORTS FC™ 25',
  10: 'Apex Legends™',
  11: 'War Thunder',
  12: 'V Rising',
  13: 'Crusader Kings III',
  14: 'PUBG: BATTLEGROUNDS',
  15: 'NBA 2K25',
  16: 'Dune: Awakening',
  17: 'Delta Force',
  18: 'Warframe',
  19: 'Marvel Rivals',
  20: 'Kingdom Come: Deliverance II',
  21: 'Cash Cleaner Simulator',
  22: 'Drive Beyond Horizons',
  23: 'Dead by Daylight',
  24: 'ELDEN RING NIGHTREIGN',
  25: 'Once Human',
  26: 'Stellaris: Ultimate Bundle',
  27: 'Hearts of Iron IV',
  28: 'Dota 2',
  29: "Baldur's Gate 3",
  30: 'Rust',
  31: 'Split Fiction',
  32: 'The Sims™ 4',
  33: 'The Elder Scrolls® Online',
  34: 'Crusader Kings III: Collection',
  35: "Tom Clancy's Rainbow Six® Siege",
  36: 'DARK SOULS™ III',
  37: 'STAR WARS Jedi: Survivo

In [61]:
# Montando dataframe a partir do scraping feito
df_ranking = pd.DataFrame.from_dict(top50_por_pais, orient='index').transpose()
df_ranking['Ranking'] = [i for i in range(1, 51)]
cols = ['Ranking'] + [col for col in df_ranking.columns if col != 'Ranking']
df_ranking = df_ranking[cols]
df_ranking.head(10)

Unnamed: 0,Ranking,Global,Alemanha,Austrália,Brasil,Bélgica,Canadá,China,Coreia do Sul,Dinamarca,...,Polônia,Reino Unido,República Tcheca,Singapura,Suécia,Suíça,Tailândia,Taiwan,Turquia,Áustria
0,1,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Steam Deck,Steam Deck,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,...,Counter-Strike 2,Steam Deck,Counter-Strike 2,Path of Exile 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2,Counter-Strike 2
1,2,DOOM: The Dark Ages,Steam Deck,DOOM: The Dark Ages,EA SPORTS FC™ 25,Clair Obscur: Expedition 33,Counter-Strike 2,Delta Force,PUBG: BATTLEGROUNDS,DOOM: The Dark Ages,...,Clair Obscur: Expedition 33,Counter-Strike 2,DOOM: The Dark Ages,Counter-Strike 2,DOOM: The Dark Ages,Clair Obscur: Expedition 33,R.E.P.O.,Clair Obscur: Expedition 33,EA SPORTS FC™ 25,DOOM: The Dark Ages
2,3,Steam Deck,DOOM: The Dark Ages,Steam Deck,R.E.P.O.,DOOM: The Dark Ages,DOOM: The Dark Ages,DOOM: The Dark Ages,Once Human,Clair Obscur: Expedition 33,...,Steam Deck,DOOM: The Dark Ages,Kingdom Come: Deliverance II,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,DOOM: The Dark Ages,V Rising,V Rising,DOOM: The Dark Ages,Clair Obscur: Expedition 33
3,4,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,Clair Obscur: Expedition 33,V Rising,The Elder Scrolls IV: Oblivion Remastered,Clair Obscur: Expedition 33,PUBG: BATTLEGROUNDS,Clair Obscur: Expedition 33,Schedule I,...,DOOM: The Dark Ages,Schedule I,Steam Deck,DOOM: The Dark Ages,Steam Deck,Schedule I,Clair Obscur: Expedition 33,Once Human,NBA 2K25,Steam Deck
4,5,Destiny 2,Stellaris,Destiny 2,NBA 2K25,Schedule I,The Elder Scrolls IV: Oblivion Remastered,雀魂麻將(MahjongSoul)(indisponível na sua região),Eternal Return,Stellaris,...,War Thunder,Stellaris,Clair Obscur: Expedition 33,R.E.P.O.,Stellaris,War Thunder,NARAKA: BLADEPOINT,DOOM: The Dark Ages,Crusader Kings III,Schedule I
5,6,Schedule I,Schedule I,Stellaris,Clair Obscur: Expedition 33,Cash Cleaner Simulator,Stellaris,NBA 2K25,R.E.P.O.,War Thunder,...,EA SPORTS FC™ 25,Clair Obscur: Expedition 33,War Thunder,EA SPORTS FC™ 25,Schedule I,Stellaris,Path of Exile 2,NBA 2K25,PUBG: BATTLEGROUNDS,Stellaris
6,7,Stellaris,War Thunder,The Elder Scrolls IV: Oblivion Remastered,eFootball™,Drive Beyond Horizons,Schedule I,Apex Legends™,Sephiria,EA SPORTS FC™ 25,...,Schedule I,Destiny 2,Stellaris,Stellaris: Ultimate Bundle,V Rising,EA SPORTS FC™ 25,The Sims™ 4,PUBG: BATTLEGROUNDS,R.E.P.O.,War Thunder
7,8,The Elder Scrolls IV: Oblivion Remastered,EA SPORTS FC™ 25,War Thunder,Cuphead,Stellaris,Destiny 2,EA SPORTS FC™ 25,DOOM: The Dark Ages,Cash Cleaner Simulator,...,Stellaris,War Thunder,Cash Cleaner Simulator,Schedule I,Stellaris: Ultimate Bundle,Drive Beyond Horizons,eFootball™,Nine Sols,Kingdom Come: Deliverance II,EA SPORTS FC™ 25
8,9,R.E.P.O.,Cash Cleaner Simulator,V Rising,Dead by Daylight,Counter-Strike 2,V Rising,WUCHANG: Fallen Feathers,eFootball™,Stellaris: Ultimate Bundle,...,Crusader Kings III,Crusader Kings III,Apex Legends™,Stellaris,War Thunder,Ori: The Collection,EA SPORTS FC™ 25,雀魂麻將(MahjongSoul)(indisponível na sua região),eFootball™,The Elder Scrolls IV: Oblivion Remastered
9,10,EA SPORTS FC™ 25,Aviassembly,Marvel Rivals,DOOM: The Dark Ages,Destiny 2,Crusader Kings III,Clair Obscur: Expedition 33,V Rising,The Elder Scrolls IV: Oblivion Remastered,...,R.E.P.O.,The Elder Scrolls IV: Oblivion Remastered,Schedule I,The Elder Scrolls IV: Oblivion Remastered,The Elder Scrolls IV: Oblivion Remastered,Dune: Awakening,NBA 2K25,R.E.P.O.,Clair Obscur: Expedition 33,Drive Beyond Horizons


In [82]:
url_pais = f"https://store.steampowered.com/charts/topselling/global"
driver.get(url_pais)
driver.implicitly_wait(10)

botoes_jogos = driver.find_elements(By.CLASS_NAME, "_2-RN6nWOY56sNmcDHu069P")
jogos_generos = {}

for i, _ in enumerate(botoes_jogos[:limite]):
    if i == 50: 
        break

    time.sleep(1)
    botoes_jogos = driver.find_elements(By.CLASS_NAME, "_2-RN6nWOY56sNmcDHu069P")
    botoes_jogos[i].click()
    time.sleep(1)

    pagina = baixar_pagina(driver.current_url)
    top5_generos = extrair_genero(pagina, lista_generos)[0:4]
    jogos_generos[soup.find_all('div', class_="_1n_4-zvf0n4aqGEksbgW9N")[i].text] = top5_generos

    url_pais = f"https://store.steampowered.com/charts/topselling/global"
    driver.get(url_pais)
    driver.implicitly_wait(10)

In [83]:
jogos_generos

{'Counter-Strike 2': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'DOOM: The Dark Ages': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'Clair Obscur: Expedition 33': ['Souls-like',
  'Dark Fantasy',
  'RPG',
  'Difficult'],
 'Steam Deck': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'Schedule I': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'Stellaris': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'War Thunder': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'EA SPORTS FC™ 25': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'The Elder Scrolls IV: Oblivion Remastered': ['Souls-like',
  'Dark Fantasy',
  'RPG',
  'Difficult'],
 'Drive Beyond Horizons': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'Apex Legends™': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'Crusader Kings III': ['Souls-like', 'Dark Fantasy', 'RPG', 'Difficult'],
 'Kingdom Come: Deliverance II': ['Souls-like',
  'Dark Fantasy',
  'RPG',
  'Difficult'],
 '