In [1]:
!git clone https://github.com/oktay-atakan/Dsa-210-project.git

Cloning into 'Dsa-210-project'...
remote: Enumerating objects: 108, done.[K
remote: Counting objects: 100% (108/108), done.[K
remote: Compressing objects: 100% (106/106), done.[K
remote: Total 108 (delta 52), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (108/108), 428.97 KiB | 7.94 MiB/s, done.
Resolving deltas: 100% (52/52), done.


In [2]:
import os
import sys

os.chdir("/content/Dsa-210-project")
print("Current working directory:", os.getcwd())

Current working directory: /content/Dsa-210-project


In [3]:
sys.path.append(os.path.abspath("src"))

In [5]:
import os
import requests
import pandas as pd
from tqdm import tqdm
import time
import json
from rawg_api import get_rawg_games
from steam_api import (get_steam_id_by_name,get_steam_store_data,get_tags_from_steamspy)

In [6]:
import requests
import pandas as pd
import time
import re

API_KEY = "YOUR_API_KEY_HERE"



def get_rawg_games(api_key, pages=10):
    games = []
    for page in range(1, pages+1):
        url = f"https://api.rawg.io/api/games?key={api_key}&dates=2015-01-01,2024-12-31&page={page}"
        resp = requests.get(url).json()

        if "results" not in resp:
            print("RAWG API hata:", resp)
            break

        for g in resp["results"]:
            games.append({
                "rawg_id": g["id"],
                "name": g["name"],
                "released": g.get("released"),
                "metacritic": g.get("metacritic"),
                "rating": g.get("rating"),
                "ratings_count": g.get("ratings_count"),
                "genres": ", ".join([x["name"] for x in g["genres"]])
            })
        time.sleep(0.2)
    return pd.DataFrame(games)


rawg_df = get_rawg_games(API_KEY, pages=10)
print("Number of RAWG games:", len(rawg_df))


def get_steam_id_by_name(name):
    try:
        query = name.replace(" ", "%20")
        url = f"https://steamcommunity.com/actions/SearchApps/{query}"
        data = requests.get(url).json()

        if len(data) == 0:
            return None

        return data[0]["appid"]
    except:
        return None


rawg_df["steam_id"] = rawg_df["name"].apply(get_steam_id_by_name)
rawg_df = rawg_df.dropna(subset=["steam_id"])
rawg_df["steam_id"] = rawg_df["steam_id"].astype(int)

print("Games that Steam AppID is found:", len(rawg_df))


def get_steam_store_data(appid):
    url = f"https://store.steampowered.com/api/appdetails?appids={appid}"
    data = requests.get(url).json()

    if not data.get(str(appid), {}).get("success"):
        return None

    d = data[str(appid)]["data"]

    tags = d.get("categories", [])
    tag_names = [t["description"] for t in tags]

    return {
        "appid": appid,
        "steam_name": d.get("name"),
        "is_free": d.get("is_free"),
        "price": d.get("price_overview", {}).get("final", None),
        "metacritic_score": d.get("metacritic", {}).get("score"),
        "tags": ", ".join(tag_names),
        "has_great_soundtrack": 1 if "Great Soundtrack" in tag_names else 0
    }


steam_data = []
print("Fetching Steam Store data...")

for appid in rawg_df["steam_id"]:
    d = get_steam_store_data(appid)
    if d:
        steam_data.append(d)
    time.sleep(0.2)

steam_df = pd.DataFrame(steam_data)
print("Games with Steam Store data:", len(steam_df))



final_df = rawg_df.merge(steam_df, left_on="steam_id", right_on="appid", how="inner")
print("Final merged games:", len(final_df))

final_df.to_csv("final_games.csv", index=False)
final_df.head()

Number of RAWG games: 200
Games that Steam AppID is found: 188
Fetching Steam Store data...
Games with Steam Store data: 188
Final merged games: 200


Unnamed: 0,rawg_id,name,released,metacritic,rating,ratings_count,genres,steam_id,appid,steam_name,is_free,price,metacritic_score,tags,has_great_soundtrack
0,3328,The Witcher 3: Wild Hunt,2015-05-18,92.0,4.64,7101,"Action, RPG",292030,292030,The Witcher 3: Wild Hunt,False,399.0,93.0,"Single-player, Steam Achievements, Steam Tradi...",0
1,28,Red Dead Redemption 2,2018-10-26,96.0,4.59,5419,Action,1174180,1174180,Red Dead Redemption 2,False,1499.0,93.0,"Single-player, Multi-player, PvP, Online PvP, ...",0
2,3439,Life is Strange,2015-01-29,83.0,4.12,3751,Adventure,1265920,1265920,Life is Strange Remastered,False,1199.0,,"Single-player, Steam Achievements, Full contro...",0
3,3439,Life is Strange,2015-01-29,83.0,4.12,3751,Adventure,1265920,1265920,Life is Strange Remastered,False,1199.0,,"Single-player, Steam Achievements, Full contro...",0
4,32,Destiny 2,2017-09-06,82.0,3.52,2659,"Shooter, Action",1085660,1085660,Destiny 2,True,,83.0,"Single-player, Multi-player, PvP, Online PvP, ...",0


In [8]:



df = pd.read_csv("final_games.csv")

results = df["appid"].apply(get_tags_from_steamspy)

df["has_great_soundtrack"] = results.apply(lambda x: x["great"]).astype(int)
df["has_story_rich"]       = results.apply(lambda x: x["story"]).astype(int)
df["has_atmospheric"]      = results.apply(lambda x: x["atmo"]).astype(int)

df.to_csv("final_games_extended.csv", index=False)

df.head()

Unnamed: 0,rawg_id,name,released,metacritic,rating,ratings_count,genres,steam_id,appid,steam_name,is_free,price,metacritic_score,tags,has_great_soundtrack,has_story_rich,has_atmospheric
0,3328,The Witcher 3: Wild Hunt,2015-05-18,92.0,4.64,7101,"Action, RPG",292030,292030,The Witcher 3: Wild Hunt,False,399.0,93.0,"Single-player, Steam Achievements, Steam Tradi...",1,1,1
1,28,Red Dead Redemption 2,2018-10-26,96.0,4.59,5419,Action,1174180,1174180,Red Dead Redemption 2,False,1499.0,93.0,"Single-player, Multi-player, PvP, Online PvP, ...",1,1,1
2,3439,Life is Strange,2015-01-29,83.0,4.12,3751,Adventure,1265920,1265920,Life is Strange Remastered,False,1199.0,,"Single-player, Steam Achievements, Full contro...",1,1,0
3,3439,Life is Strange,2015-01-29,83.0,4.12,3751,Adventure,1265920,1265920,Life is Strange Remastered,False,1199.0,,"Single-player, Steam Achievements, Full contro...",1,1,0
4,32,Destiny 2,2017-09-06,82.0,3.52,2659,"Shooter, Action",1085660,1085660,Destiny 2,True,,83.0,"Single-player, Multi-player, PvP, Online PvP, ...",0,0,1
