# Importar librerías necesarias

In [1]:
import time
import re
import sys
import requests
from pathlib import Path

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

### Función para scrollear en la página

In [2]:
def random_keypress_generator():
    keys = [Keys.DOWN, Keys.END, Keys.PAGE_UP, Keys.PAGE_DOWN, Keys.UP]
    action = np.random.choice(keys, p=[0.5, 0.1, 0.1, 0.1, 0.2])
    return action

### Función para obtener url y objeto Soup

In [3]:
def page_bs4(url):
    random = np.random.choice
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_argument('--incognito')
    driver = webdriver.Chrome(r'C:\Program Files\ChromeDriver\chromedriver.exe',
                             chrome_options=options)
    driver.get(url)
    print('Obtaining data from web source. Please stand by.')
    print('-----------'*10)
    time.sleep(random(5))
    for ran in range(random(10)):
        ActionChains(driver).send_keys(random_keypress_generator()).perform()
        time.sleep(random(5))
    page_source = driver.page_source
    driver.quit()
    print('Data succesfully obtained from web source')
    print('-----------'*10)
    return page_source

### Sacar datos de objeto Soup

In [4]:
def games_data(page_source):
    print("Converting source into Bs4 codification data")
    print('-----------'*10)
    soup = BeautifulSoup(page_source, 'lxml')
    game_title = [title.text for title in soup.select('span.title')]
    release_date = [date.text for date in soup.select('div.col.search_released.responsive_secondrow')]
    discount = [discount.text.strip() for discount in soup.select('div.col.search_discount.responsive_secondrow')]
    price = [price.text.strip() for price in soup.select('div.col.search_price.responsive_secondrow')]
    game_link = [link['href'] for link in soup.select('div[data-panel="[]"][id="search_resultsRows"] a[href]')]
    print("Data into Bs4 done")
    print('-----------'*10)
    return game_title, release_date, discount, price, game_link

### Obtener DataFrame

In [5]:
def dataframe(game_title, release_date, discount, price, game_link):
    tuples = list(zip(game_title, release_date, discount, price, game_link))
    print("Making DataFrame of games data")
    df = pd.DataFrame(tuples, columns=["Game_title","Release_date","Discount","Price","Link"])
    return df

In [6]:
### Función principal

In [7]:
def web_scraping():
    print("Obtaining url and starting scraping")
    url = 'https://store.steampowered.com/search/?filter=topsellers'
    page_source = page_bs4(url)
    game_title, release_date, discount, price, game_link = games_data(page_source)
    df = dataframe(game_title, release_date, discount, price, game_link)
    return df

In [8]:
web_scraping()

Obtaining url and starting scraping


  driver = webdriver.Chrome(r'C:\Program Files\ChromeDriver\chromedriver.exe',
  driver = webdriver.Chrome(r'C:\Program Files\ChromeDriver\chromedriver.exe',


Obtaining data from web source. Please stand by.
--------------------------------------------------------------------------------------------------------------
Data succesfully obtained from web source
--------------------------------------------------------------------------------------------------------------
Converting source into Bs4 codification data
--------------------------------------------------------------------------------------------------------------
Data into Bs4 done
--------------------------------------------------------------------------------------------------------------
Making DataFrame of games data


Unnamed: 0,Game_title,Release_date,Discount,Price,Link
0,ELDEN RING,24 FEB 2022,,"Mex$ 1,200.00",https://store.steampowered.com/app/1245620/ELD...
1,LEGO® Star Wars™: La Saga Skywalker,5 ABR 2022,,Mex$ 799.00,https://store.steampowered.com/app/920210/LEGO...
2,No Man's Sky,12 AGO 2016,-50%,Mex$ 509.99Mex$ 254.99,https://store.steampowered.com/app/275850/No_M...
3,LEGO® Star Wars™: La Saga Skywalker,5 ABR 2022,,Mex$ 799.00,https://store.steampowered.com/app/920210/LEGO...
4,Squad,23 SEP 2020,-25%,Mex$ 454.99Mex$ 341.24,https://store.steampowered.com/app/393380/Squa...
...,...,...,...,...,...
145,Call of Duty®: Modern Warfare® 3,8 NOV 2011,-50%,Mex$ 355.99Mex$ 177.99,https://store.steampowered.com/app/115300/Call...
146,Destiny 2: Paquete del 30 aniv. de Bungie,7 DIC 2021,,Mex$ 399.00,https://store.steampowered.com/app/1656370/Des...
147,Cat Cafe Manager,14 ABR 2022,-20%,Mex$ 185.99Mex$ 148.79,https://store.steampowered.com/app/1354830/Cat...
148,"Warhammer 40,000: Chaos Gate - Daemonhunters",5 MAY 2022,,Mex$ 539.99,https://store.steampowered.com/app/1611910/War...
