In [None]:
import requests
import pandas as pd
import time
import os
from tqdm import tqdm
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive')

# Configuración
DRIVE_PATH = '/content/drive/MyDrive/SteamCharts_Data'  # Carpeta en Drive
CSV_FILE = os.path.join(DRIVE_PATH, 'steamcharts_data.csv')
RESUME_FILE = os.path.join(DRIVE_PATH, 'scraped_ids.txt')
API_BASE_URL = 'https://steamcharts.com/api'

# Headers para la API
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'application/json'
}

# Crear carpeta en Drive si no existe
os.makedirs(DRIVE_PATH, exist_ok=True)

# Cargar IDs ya procesadas
def load_scraped_ids():
    if os.path.exists(RESUME_FILE):
        with open(RESUME_FILE, 'r') as f:
            return set(line.strip() for line in f)
    return set()

# Guardar ID procesada
def save_id(game_id):
    with open(RESUME_FILE, 'a') as f:
        f.write(f"{game_id}\n")

# Obtener datos de la API
def get_game_data(game_id):
    url = f"{API_BASE_URL}/app/{game_id}"
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            return response.json()
        return None
    except Exception as e:
        print(f"Error en ID {game_id}: {e}")
        return None

# Función principal
def main(start_id=1, end_id=100):
    # Cargar CSV existente o crear uno nuevo
    if os.path.exists(CSV_FILE):
        df = pd.read_csv(CSV_FILE)
    else:
        df = pd.DataFrame(columns=['game_id', 'name', 'current_players', 'peak_players', 'hours_played', 'last_updated'])

    scraped_ids = load_scraped_ids()

    # Procesar IDs en rango
    for game_id in tqdm(range(start_id, end_id + 1), desc="Consultando API"):
        str_id = str(game_id)
        if str_id in scraped_ids:
            continue

        data = get_game_data(game_id)
        if data:
            new_row = {
                'game_id': game_id,
                'name': data.get('name', 'N/A'),
                'current_players': data.get('current_players', 0),
                'peak_players': data.get('peak_players', 0),
                'hours_played': data.get('hours_played', 0),
                'last_updated': pd.Timestamp.now()
            }
            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
            save_id(str_id)

        time.sleep(1)  # Evitar rate-limiting

    # Guardar CSV en Drive
    df.to_csv(CSV_FILE, index=False)
    print(f"Datos guardados en Drive: {CSV_FILE}")

# Ejecutar
main(start_id=1, end_id=100)

Mounted at /content/drive


Consultando API: 100%|██████████| 100/100 [01:56<00:00,  1.17s/it]

Datos guardados en Drive: /content/drive/MyDrive/SteamCharts_Data/steamcharts_data.csv



