## Bibliotecas

In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

### Configurações para trazer os dados necessários


In [2]:
# Configurando o driver do Chrome
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

# Acessando a página desejada
driver.get('https://steamdb.info/sales/')

# Espera implícita para garantir que a página esteja completamente carregada
driver.implicitly_wait(20)

# Obtendo o código-fonte HTML da página
html = driver.page_source

# Usando BeautifulSoup para analisar o HTML
soup = BeautifulSoup(html, 'html.parser')

# Salva no Dataframe a parte do HTML que queremos transformar
table = soup.find('div', class_='dataTable_table_wrap')

# Fechando o navegador após obter o HTML
driver.quit()

In [3]:
# Dataframe que guarda a tabela gerada a partir de table
df_table = pd.read_html(str(table))[0]
df_table

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Name,%,Price,Rating,Release,Ends,Started
0,,,Machinika: Museum Free To Keepnew historical low,-100%,"R$ 0,00",87.87%,Mar 2021,,
1,,,ENDLESS™ Legend Free To Keepnew historical low,-100%,"R$ 0,00",82.23%,Sep 2014,,
2,,,100% Orange Juice Free To Keep,-100%,"R$ 0,00",90.34%,Sep 2013,,
3,,,Baldur's Gate 3 Weekend DealTop Sellernew hist...,-15%,"R$ 169,99",95.93%,Aug 2023,,
4,,,ANIMAL WELL Introductory OfferTop Sellernew hi...,-10%,"R$ 67,49",93.32%,May 2024,,
...,...,...,...,...,...,...,...,...,...
95,,,BlazBlue Entropy Effect,-22%,"R$ 46,79",91.34%,Feb 2024,,
96,,,Torchlight II,-80%,"R$ 6,99",91.16%,Sep 2012,,
97,,,Death's Door,-75%,"R$ 14,99",91.15%,Jul 2021,,
98,,,"Clustertruck Midweek Deal all-time low: R$ 6,9...",-80%,"R$ 9,39",91.02%,Sep 2016,,


## Tratando os dados

In [4]:
# Apagando as colunas que não seram usadas
df_table_atual = df_table.drop(['Unnamed: 0', 'Unnamed: 1'], axis = 1).drop(['Ends', 'Started'], axis = 1)#.reset_index(drop=True)

In [5]:
# Renomeando a coluna '%' para 'discount_percentage'
df_table_atual = df_table_atual.rename(columns={'%': 'Discount_percentage'})

# Removendo o caracter '%' da coluna 
df_table_atual['Discount_percentage'] = df_table_atual['Discount_percentage'].astype(str).str.replace('%', '').astype(int)

In [11]:
# Removendo o 'R$'' e trocando ',' por '.' da coluna Price
df_table_atual['Price'] = df_table_atual['Price'].str.replace('R$ ', '').str.replace(',', '.')

Unnamed: 0,Name,Discount_percentage,Price,Rating,Release
0,Machinika: Museum Free To Keepnew historical low,-100,0.00,87.87%,Mar 2021
1,ENDLESS™ Legend Free To Keepnew historical low,-100,0.00,82.23%,Sep 2014
2,100% Orange Juice Free To Keep,-100,0.00,90.34%,Sep 2013
3,Baldur's Gate 3 Weekend DealTop Sellernew hist...,-15,169.99,95.93%,Aug 2023
4,ANIMAL WELL Introductory OfferTop Sellernew hi...,-10,67.49,93.32%,May 2024
...,...,...,...,...,...
95,BlazBlue Entropy Effect,-22,46.79,91.34%,Feb 2024
96,Torchlight II,-80,6.99,91.16%,Sep 2012
97,Death's Door,-75,14.99,91.15%,Jul 2021
98,"Clustertruck Midweek Deal all-time low: R$ 6,9...",-80,9.39,91.02%,Sep 2016


In [14]:
# Removendo o caracter '%' da coluna 4
df_table_atual['Rating'] = df_table_atual['Rating'].replace('—', np.nan).str.replace('%', '').astype(float)

In [15]:
df_table_atual

Unnamed: 0,Name,Discount_percentage,Price,Rating,Release
0,Machinika: Museum Free To Keepnew historical low,-100,0.00,87.87,Mar 2021
1,ENDLESS™ Legend Free To Keepnew historical low,-100,0.00,82.23,Sep 2014
2,100% Orange Juice Free To Keep,-100,0.00,90.34,Sep 2013
3,Baldur's Gate 3 Weekend DealTop Sellernew hist...,-15,169.99,95.93,Aug 2023
4,ANIMAL WELL Introductory OfferTop Sellernew hi...,-10,67.49,93.32,May 2024
...,...,...,...,...,...
95,BlazBlue Entropy Effect,-22,46.79,91.34,Feb 2024
96,Torchlight II,-80,6.99,91.16,Sep 2012
97,Death's Door,-75,14.99,91.15,Jul 2021
98,"Clustertruck Midweek Deal all-time low: R$ 6,9...",-80,9.39,91.02,Sep 2016


In [16]:
# Salvando num arquivo .csv
df_principal = df_table_atual.to_csv('SteamDB.csv', index=False)