
# IMT2200 — Proyecto: Conexión y Exploración IGDB API  
## Descarga y exploración inicial de datos de videojuegos (IGDB)

**Autor:** Andres Gonzales  
**Curso:** IMT2200 - Introducción a Ciencia de Datos (2025-2)

---

### ⚙️ Descripción
Este notebook conecta directamente a la **API de IGDB**, descarga un conjunto de juegos con campos relevantes y los guarda en `data/igdb_games.csv`.

Luego, realiza una **limpieza básica y un análisis exploratorio inicial** (EDA).

> 💡 El objetivo es dejar automatizado el flujo de adquisición de datos para integrarlo después con la API de Steam/SteamCharts.



## 1. Configuración e importación de librerías


In [1]:

import os
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Configuraciones generales
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 120)

# Crear carpeta data si no existe
os.makedirs('data', exist_ok=True)



## 2. Autenticación y parámetros de la API IGDB

Pega tus credenciales de forma local (no las subas al repositorio público).


In [5]:

client_id = "uksbpu6oftndjav5xb8wzk1a8j63fm"
client_secret = "zj3i336wtqr4qc9mizsubfcpi32xlt"  # <-- pega el secreto real de Twitch aquí

url = "https://id.twitch.tv/oauth2/token"
params = {
    "client_id": client_id,
    "client_secret": client_secret,
    "grant_type": "client_credentials"
}

resp = requests.post(url, params=params)
print(resp.json())


{'access_token': 'lcw4wgqn7ltr8rr0b85l65rtawn2dq', 'expires_in': 5314779, 'token_type': 'bearer'}



## 3. Descarga de datos desde la API


In [None]:
import os, time, json, pathlib, datetime as dt, requests, pandas as pd

CLIENT_ID     = "lbosp4quzjq9oe0nhgd7k291j7lf7y"
CLIENT_SECRET = "0qlq0q84p2zfzucsxa0ocgg167cnsb"

RAW_DIR   = pathlib.Path("datos/no_limpios");  RAW_DIR.mkdir(parents=True, exist_ok=True)
CLEAN_DIR = pathlib.Path("datos/limpios");     CLEAN_DIR.mkdir(parents=True, exist_ok=True)

def epoch(d: dt.date) -> int:
    return int(dt.datetime(d.year, d.month, d.day, tzinfo=dt.timezone.utc).timestamp())

def igdb_token():
    r = requests.post(
        "https://id.twitch.tv/oauth2/token",
        data={"client_id": CLIENT_ID,
              "client_secret": CLIENT_SECRET,
              "grant_type": "client_credentials"},
        timeout=10)
    r.raise_for_status()
    return r.json()["access_token"]

# Descargar juegos IGDB (último año) 
def fetch_igdb_last_year():
    today  = dt.date.today()
    start  = today.replace(year=today.year - 1)            # hace 12 meses
    ini, fin = epoch(start), epoch(today)

    token = igdb_token()
    hdrs  = {"Client-ID": CLIENT_ID, "Authorization": f"Bearer {token}"}
    api   = "https://api.igdb.com/v4/games"

    STEP = 500
    query = f"""
    fields id,name,first_release_date,
           genres.name,platforms.name,
           rating,rating_count,aggregated_rating,total_rating,
           follows,popularity,hypes,
           involved_companies.company.name,
           involved_companies.developer,
           involved_companies.publisher,
           external_games;
    where first_release_date >= {ini}
      & first_release_date <  {fin};
    limit {STEP};
    offset %d;
    """

    data, offset = [], 0
    while True:
        batch = requests.post(api, headers=hdrs, data=query % offset, timeout=30).json()
        if not batch:
            break
        data.extend(batch)
        offset += STEP
        print(f"IGDB +{len(batch):3d}  (offset {offset})")
        time.sleep(0.35)
    RAW_DIR.joinpath("igdb_last_year.json").write_text(json.dumps(data, indent=2))
    return data, hdrs

def steam_ids_from_igdb(igdb_data, hdrs):
    api_ext = "https://api.igdb.com/v4/external_games"
    CHUNK, mapping = 200, {}
    ids_list = [g["id"] for g in igdb_data]
    for chunk in [ids_list[i:i+CHUNK] for i in range(0, len(ids_list), CHUNK)]:
        q = f"fields game, uid, category; where game = ({','.join(map(str,chunk))}) & category = 1;"  # 1=Steam
        for eg in requests.post(api_ext, headers=hdrs, data=q, timeout=30).json():
            mapping[eg["game"]] = int(eg["uid"])
        time.sleep(0.35)
    RAW_DIR.joinpath("steam_appids_last_year.json").write_text(json.dumps(mapping, indent=2))
    return mapping

def fetch_steam(appids, throttle=0.25):
    players, details = {}, {}
    for aid in appids:
        p = requests.get(
            "https://api.steampowered.com/ISteamUserStats/GetNumberOfCurrentPlayers/v1/",
            params={"appid": aid}, timeout=10).json()
        players[aid] = p["response"].get("player_count", 0)

        d = requests.get(
            "https://store.steampowered.com/api/appdetails",
            params={"appids": aid, "cc": "us", "l": "en"}, timeout=10).json()
        if d[str(aid)]["success"]:
            details[aid] = d[str(aid)]["data"]

        print(f"Steam {aid} OK")
        time.sleep(throttle)
    RAW_DIR.joinpath("steam_players_last_year.json").write_text(json.dumps(players, indent=2))
    RAW_DIR.joinpath("steam_details_last_year.json").write_text(json.dumps(details, indent=2))
    return players, details

def save_csv_igdb(games):
    df = pd.json_normalize(games, sep="_")
    df.to_csv(CLEAN_DIR / "igdb_games_last_year.csv", index=False)

def save_csv_steam(players, details):
    df_players = pd.Series(players, name="player_count").to_frame()
    df_details = pd.json_normalize(details).set_index("steam_appid")
    df_players.join(df_details, how="left").to_csv(CLEAN_DIR / "steam_last_year.csv")
if __name__ == "__main__":
    igdb_raw, hdrs = fetch_igdb_last_year()
    steam_map      = steam_ids_from_igdb(igdb_raw, hdrs)

    players, details = fetch_steam(list(steam_map.values()))
    save_csv_igdb(igdb_raw)
    save_csv_steam(players, details)

    print("\n✅ Proceso completo: CSV listos en  datos/limpios/")


IGDB +  1  (offset 500)
IGDB +  1  (offset 1000)
IGDB +  1  (offset 1500)
IGDB +  1  (offset 2000)
IGDB +  1  (offset 2500)
IGDB +  1  (offset 3000)
IGDB +  1  (offset 3500)
IGDB +  1  (offset 4000)
IGDB +  1  (offset 4500)
IGDB +  1  (offset 5000)
IGDB +  1  (offset 5500)
IGDB +  1  (offset 6000)
IGDB +  1  (offset 6500)
IGDB +  1  (offset 7000)
IGDB +  1  (offset 7500)
IGDB +  1  (offset 8000)
IGDB +  1  (offset 8500)
IGDB +  1  (offset 9000)
IGDB +  1  (offset 9500)
IGDB +  1  (offset 10000)
IGDB +  1  (offset 10500)
IGDB +  1  (offset 11000)
IGDB +  1  (offset 11500)
IGDB +  1  (offset 12000)
IGDB +  1  (offset 12500)
IGDB +  1  (offset 13000)
IGDB +  1  (offset 13500)
IGDB +  1  (offset 14000)
IGDB +  1  (offset 14500)
IGDB +  1  (offset 15000)
IGDB +  1  (offset 15500)
IGDB +  1  (offset 16000)
IGDB +  1  (offset 16500)
IGDB +  1  (offset 17000)
IGDB +  1  (offset 17500)
IGDB +  1  (offset 18000)
IGDB +  1  (offset 18500)
IGDB +  1  (offset 19000)
IGDB +  1  (offset 19500)
IGDB +


## 5. Exploración básica (EDA)
