Codi per descarregar les dades de l'API de Steam, usant el dataset de jocs del 2025

El datset incial del jocs s'ha obtingut de Kaggle: https://www.kaggle.com/datasets/srgiomanhes/steam-games-dataset-2025

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import requests
import pandas as pd
import time
import json
from typing import Dict, List, Optional
import os
from datetime import datetime
import concurrent.futures
import threading
from queue import Queue

class SteamAchievementsCollector:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "http://api.steampowered.com"
        self.session = requests.Session()

    def get_all_game_data(self, appid: int, game_data: dict) -> List[dict]:
        """Obté tots els assoliments i reviews d'un joc en una sola funció"""
        url = f"{self.base_url}/ISteamUserStats/GetGlobalAchievementPercentagesForApp/v0000002/"
        params = {'gameid': appid, 'format': 'json'}

        try:
            response = self.session.get(url, params=params, timeout=10)
            if response.status_code != 200:
                return []

            data = response.json()
            if 'achievementpercentages' not in data:
                return []

            achievements = data['achievementpercentages']['achievements']

            # Crear registres per tots els assoliments
            result = []
            for achievement in achievements:
                achievement_data = {
                    'steam_appid': appid,
                    'game_name': game_data['name'],
                    'achievement_name': achievement['name'],
                    'achievement_percent': achievement['percent'],
                    'developers': game_data['developers'],
                    'publishers': game_data['publishers'],
                    'categories': game_data['categories'],
                    'genres': game_data['genres'],
                    'required_age': game_data['required_age'],
                    'n_achievements': game_data['n_achievements'],
                    'platforms': game_data['platforms']
                }
                result.append(achievement_data)

            return result

        except:
            return []

    def process_batch(self, games_batch: List[tuple], delay: float = 0.3) -> List[dict]:
        """Processa un lot de jocs"""
        all_achievements = []

        for appid, game_data in games_batch:
            achievements = self.get_all_game_data(appid, game_data)
            all_achievements.extend(achievements)
            time.sleep(delay)

        return all_achievements

    def collect_achievements_data(self, steam_df: pd.DataFrame,
                                 delay: float = 0.3,
                                 batch_size: int = 200,
                                 save_path: str = '/content/drive/MyDrive/GoogleColab/') -> pd.DataFrame:

        games_with_achievements = steam_df[
            (steam_df['n_achievements'] > 0) &
            (steam_df['is_released'] == True)
        ].copy().reset_index(drop=True)

        total_games = len(games_with_achievements)
        print(f"Processant {total_games} jocs")

        # Preparar dades
        games_data = []
        for idx, game in games_with_achievements.iterrows():
            games_data.append((game['steam_appid'], game.to_dict()))

        all_achievements = []
        processed = 0

        # Processar en lots
        for i in range(0, len(games_data), batch_size):
            batch = games_data[i:i+batch_size]
            batch_achievements = self.process_batch(batch, delay)
            all_achievements.extend(batch_achievements)

            processed += len(batch)
            print(f"Progres: {processed}/{total_games} ({len(all_achievements)} assoliments)")

            # Guardar progres
            if processed % 500 == 0 or processed >= total_games:
                temp_df = pd.DataFrame(all_achievements)
                progress_file = os.path.join(save_path, 'steam_achievements_progress.csv')
                temp_df.to_csv(progress_file, index=False)

        # Guardar resultat final
        final_df = pd.DataFrame(all_achievements)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        final_filename = f'steam_achievements_complete_{timestamp}.csv'
        final_path = os.path.join(save_path, final_filename)
        final_df.to_csv(final_path, index=False)

        print(f"Guardat: {final_filename}")
        print(f"Total assoliments: {len(final_df):,}")
        print(f"Jocs processats: {final_df['steam_appid'].nunique():,}")

        return final_df

def main():
    API_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXX" # canviar la clau de la API per una pròpia, l'he modificat per seguretat
    dataset_path = '/steam_games.csv' # path del dataset on son els jocs, modificar per el pròpi
    save_path = '/GoogleColab/'  # path on guardar dataset dels assoliments, modificar per el pròpi

    steam_df = pd.read_csv(dataset_path)
    collector = SteamAchievementsCollector(API_KEY)

    # Delay
    achievements_df = collector.collect_achievements_data(
        steam_df,
        delay=0.3,
        batch_size=200,
        save_path=save_path
    )

if __name__ == "__main__":
    main()

Processant 31533 jocs
Progres: 200/31533 (3789 assoliments)
Progres: 400/31533 (8590 assoliments)
Progres: 600/31533 (12888 assoliments)
Progres: 800/31533 (17091 assoliments)
Progres: 1000/31533 (21360 assoliments)
Progres: 1200/31533 (25754 assoliments)
Progres: 1400/31533 (29834 assoliments)
Progres: 1600/31533 (33707 assoliments)
Progres: 1800/31533 (38213 assoliments)
Progres: 2000/31533 (42392 assoliments)
Progres: 2200/31533 (46604 assoliments)
Progres: 2400/31533 (50839 assoliments)
Progres: 2600/31533 (56249 assoliments)
Progres: 2800/31533 (61242 assoliments)
Progres: 3000/31533 (66614 assoliments)
Progres: 3200/31533 (71586 assoliments)
Progres: 3400/31533 (76537 assoliments)
Progres: 3600/31533 (81920 assoliments)
Progres: 3800/31533 (86540 assoliments)
Progres: 4000/31533 (90979 assoliments)
Progres: 4200/31533 (96095 assoliments)
Progres: 4400/31533 (100901 assoliments)
Progres: 4600/31533 (105925 assoliments)
Progres: 4800/31533 (110964 assoliments)
Progres: 5000/31533 (