# Dataset Analysis

In [25]:
import json

# Carica il dataset JSON
with open('./../dataset/games_dataset_reviews_transformed.json', 'r') as file:
    json_data = json.load(file)

## List of the fields

Before diving into the code, let's give a look to the fields and the type of data that each of them contains (The following code performs a kind of SELECT DISTINCT of the fields):

**NOTE!** 🟢
The field "*Example Content*" contains 5 random samples of the field content. These samples are not aggregated.

| **FIELD** 	| **TYPE** 	| **TO PARSE** 	| **Are we going to use it?** 	| **Example Content** 	|
|---	|:---:	|:---:	|:---:	|---	|
| age_ratings 	|  	| 🟠 	| ✅ 	| [[114607, 114609, 126972, 126973, 126974, 127011], [39395, 54394], [65532], [67799], [57121, 58326, 91135, 93494, 110841, 126135]] 	|
| aggregated_rating 	| float 	|  	| ❌ 	| [65.0, 73.25, 82.5, 69.0, 70.93333333333334] 	|
| aggregated_rating_count 	|  	|  	| ❌ 	| [3, 4, 3, 1, 17] 	|
| alternative_names 	|  	|  	| ❌ 	| [[36941, 36942, 127468], [65648, 105850, 117666], [120224], [97398], [106570, 116583]] 	|
| artworks 	|  	|  	| ❌ 	| [[96775], [14195], [9728], [115992], [11731, 68424]] 	|
| bundles 	|  	|  	| ❌ 	| [[263897], [230749, 230823, 230826], [251322], [29208], [136262, 164674, 164778, 219003, 230498, 230702]] 	|
| category 	|  	| 🟠 	| ✅ 	| [0, 0, 0, 0, 0] 	|
| checksum 	|  	|  	| ❌ 	| ['4e87d634-9146-2025-b09b-9e4ea1e04764', '00e38d21-91cd-3082-4737-7978c44e33e2', '40916ead-d6e4-99f3-ab54-d653605a984b', '489ad4d0-e2f2-09e0-3bae-a76091559c44', 'bc043ffb-ad60-6bd0-801b-55a6141e773c'] 	|
| collection 	|  	| 🟠 	|  	| [602, 2449, 7103, 7856, 559] 	|
| collections 	|  	| 🟠 	|  	| [[602], [18], [18], [39, 7197], [39]] 	|
| cover 	|  	|  	| ❌ 	| [280467, 247255, 272120, 192106, 326638] 	|
| created_at 	|  	|  	| ❌ 	| [1673878007, 1621112209, 1499434349, 1517405808, 1521818623] 	|
| dlcs 	| game_id 	|  	|  	| [[171610], [8219, 222679, 239352], [172092], [212363, 212364, 212365], [140056, 140057, 140058]] 	|
| expanded_games (?) 	| game_id 	|  	|  	| [[135286], [30251], [78807], [57658], [124492]] 	|
| expansions (?) 	| game_id 	|  	|  	| [[171428], [19540, 19551, 26852], [44596], [171147], [18442]] 	|
| external_games 	|  	| game_id of other platforms 	| ❌ 	| [[2639959, 2677882, 2677897, 2677919, 2678595], [2011978], [51807, 154169, 1867507], [221231, 1748471, 1991656], [1989881]] 	|
| first_release_date 	|  	|  	| ✅ 	| [1677110400, 1610928000, 1019001600, 1559952000, 1644796800] 	|
| follows 	|  	| followers of the game 	|  	| [1, 1, 1, 63, 244] 	|
| forks (?) 	|  	|  	|  	| [[79074, 109134, 133635, 147176, 151170, 176876, 197828, 246736, 255413], [235313], [141686], [67248], [142289]] 	|
| franchise 	|  	| 🟠 	|  	| [361, 9, 102, 299, 105] 	|
| franchises 	|  	| 🟠 	|  	| [[1651, 3134], [872, 874], [3390], [509, 977], [1351]] 	|
| game_engines 	|  	| 🟠 	| ❌ 	| [[1543], [427], [439], [72], [10]] 	|
| game_localizations 	|  	| 🟠 	| ❌ 	| [[2026, 13450], [4006, 19440], [14535], [1863, 3984], [11783, 20873]] 	|
| game_modes 	|  	| 🟢 	| ✅ 	| [[1, 2], [1, 2], [1], [1], [1]] 	|
| genres 	|  	| 🟢 	| ✅ 	| [[15], [4, 31, 33], [7, 13, 33], [9, 33], [31, 32]] 	|
| hypes 	|  	| number of followers before release date 	| ❌ 	| [1, 1, 26, 2, 2] 	|
| id 	| game_id 	| id_of_IGDB 	| ✅ 	| [231577, 147666, 44711, 85450, 95080] 	|
| involved_companies 	|  	| 🟠 	| ✅ 	| [[102978, 102979], [85230], [154613], [229692, 229693], [194995, 194996]] 	|
| keywords 	|  	|  	| ❌ 	| [[1699, 2152, 4137, 4438, 5029, 5188, 5189, 10098, 13115], [101], [78, 394, 960, 1033, 2458], [30750, 33125], [1308, 3257]] 	|
| language_supports 	|  	| 🟠 	| ✅ 	| [[504844], [784416, 784417, 784418, 784419], [108775, 108776], [430843, 430844, 430845], [46076, 46078, 46080, 46082, 46084, 46086, 46088, 46090, 46092, 46094, 46096, 46098, 46100, 46102, 46104, 46106, 46108, 46110, 46112, 46114, 148503, 148505, 148507, 148513, 148515]] 	|
| multiplayer_modes 	|  	|  	| ❌ 	| [[11174, 11175], [18654], [20006], [7037, 7982, 12750, 21945], [22267]] 	|
| name 	|  	|  	| ✅ 	| ['Blood Bowl 3: Black Orcs Edition', 'Shinobi Blade', 'DDRMax2: Dance Dance Revolution', 'Transformers Prime: The Game', 'Dotra'] 	|
| parent_game 	| game_id 	|  	|  	| [110980, 46584, 198295, 231152, 107843] 	|
| platforms 	|  	| 🟠 	| ✅ 	| [[49, 169], [130], [8, 52], [34, 39], [6]] 	|
| player_perspectives 	|  	|  	| ❌ 	| [[6], [2, 4], [1, 7], [1], [2]] 	|
| ports 	|  	|  	| ❌ 	| [[73949, 144894], [10], [251353], [122871], [8809]] 	|
| rating 	|  	|  	| ❌ 	| [70.0, 78.83709512164174, 60.0, 40.0, 50.0] 	|
| rating_count 	|  	|  	| ❌ 	| [0, 102, 0, 0, 0] 	|
| release_dates 	|  	|  	| ❌ 	| [[455316, 455317], [249798, 340666], [201923, 201924, 201925], [511087], [345917, 387770, 387772, 387773, 389364, 406085]] 	|
| remakes 	|  	|  	| ❌ 	| [[18189], [222995], [93192], [1181], [245057]] 	|
| remasters 	|  	|  	| ❌ 	| [[265060], [145817, 158981], [11218], [203435], [267307]] 	|
| reviews.author 	|  	|  	| ✅ 	| ['maskedgingerjock', 'MisterEcho', 'Ziggy The Adventurer', 'Obey the Fist!', 'Sixth'] 	|
| reviews.postedDate 	|  	|  	| ✅ 	| ['18 September', '4 September', '3 September', '31 March, 2022\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tSteam Key', '21 September, 2022'] 	|
| reviews.recommended 	|  	|  	| ✅ 	| [False, True, None, False, True] 	|
| reviews.review 	|  	|  	| ✅ 	| ['This game is horrible. This game is boring.You can get both endings in under 10 minutes.There is no ...', 'https://youtu.be/dbbmoI0O5UgPretty nice game if you like indie walking simulator games with some jum...', "Plays fine, looks okay, very limited options, no body, can't jump or crouch, can be finished in abou...", 'Warning: Asset Flip!Pet Puzzle is a Unity Asset flip, what Valve calls a "fake game". 	|
| screenshots 	|  	|  	| ❌ 	| [[1002202, 1002203, 1002204, 1002205, 1002206, 1002207], [447897, 447898, 447899, 447900, 447901], [338676, 338677, 338678, 390639, 390640], [1128097, 1128099, 1128104, 1128107, 1128111], [311510, 311511, 311512, 311513, 311514]] 	|
| similar_games 	| game_id 	|  	|  	| [[17613, 24620, 36269, 36346, 65827, 76340, 77038, 77597, 111043, 112754], [10605, 56033, 87622, 96217, 101608, 103292, 103369, 111130, 114145, 117533], [3679, 18981, 27378, 38967, 43218, 43488, 43680, 63933, 73042, 76901], [18115, 19222, 25905, 41349, 85804, 87170, 87507, 90788, 90965, 95776], [13189, 25222, 27266, 55282, 75948, 87975, 96217, 106992, 111130, 121217]] 	|
| slug 	|  	|  	| ❌ 	| ['blood-bowl-3-black-orcs-edition', 'shinobi-blade', 'ddrmax2-dance-dance-revolution', 'transformers-prime-the-game', 'dotra'] 	|
| standalone_expansions 	|  	|  	| ❌ 	| [[146969], [3735], [27898], [203628], [128415]] 	|
| status 	|  	| 🟠 	| ✅ 	| [8, 4, 4, 4, 4] 	|
| steam 	|  	|  	| ❌ 	| ['2557720', '1092080', '1667954', '1851020', '2009730'] 	|
| storyline 	|  	|  	| ❌ 	| ['In the ancient, mysterious world of the Jade Empire, you train under your master’s watchful eye and ...', 'From the frigid climes of Norsca to the arid wastes of the southern Badlands, The Old World echoes t...', 'You live in a peaceful world that has triumphed over a giant alien invasion. To pay off your debts, ...', "You who are unqualified to drink Meng Po's soup, you have been chosen for the game of life. Three wo...", 'Following the end of the Galactic Civil War, the bright lights of The Arena burst into life and offe...'] 	|
| summary 	|  	|  	| ✅ 	| ["Fashion is not exactly a priority for Black Orcs… but intimidation, on the other hand, now we're tal...", 'Shinobi Blade is an action-packed game, lets you play the role of a teenage ninja who sneaked out of...', 'The dance floor kicks into overdrive with DDRMAX2 Dance Dance Revolution. It is the latest installme...', 'Shoot bubbles and match colors to pop your way up to victory in this bubble shooting adventure, win ...', 'A missing father, a mysterious murder, paintings depicting missing children. Horror game in which th...'] 	|
| tags 	|  	| 🟠 	| ❌ 	| [[268435471], [1, 268435460, 268435487, 268435489], [40, 268435463, 268435469, 268435489, 536872611, 536873064, 536875049, 536875350, 536875941, 536876100, 536876101, 536881010, 536884027], [268435465, 268435489], [268435487, 268435488]] 	|
| themes 	|  	|  	| ❌ 	| [[1], [40], [17, 22, 27], [1], [1]] 	|
| total_rating 	|  	|  	| ✅ 	| [65.0, 70.0, 73.25, 80.66854756082087, 60.0] 	|
| total_rating_count 	|  	|  	| ✅ 	| [3, 0, 4, 105, 0] 	|
| updated_at 	|  	|  	| ❌ 	| [1699266775, 1698219001, 1696215459, 1670993154, 1670992528] 	|
| url 	|  	|  	| ❌ 	| ['https://www.igdb.com/games/blood-bowl-3-black-orcs-edition', 'https://www.igdb.com/games/shinobi-blade', 'https://www.igdb.com/games/ddrmax2-dance-dance-revolution', 'https://www.igdb.com/games/transformers-prime-the-game', 'https://www.igdb.com/games/dotra'] 	|
| version_parent 	|  	|  	| ❌ 	| [138137, 24115, 139736, 138137, 28856] 	|
| version_title 	|  	|  	| ❌ 	| ['Black Orcs Edition', 'Platinum Edition', "Collector's Edition", 'Imperial Nobility Edition', 'Digital Deluxe Edition'] 	|
| videos 	|  	|  	| ❌ 	| [[37984], [64610], [28099], [35464], [28553]] 	|
| websites 	|  	|  	| ❌ 	| [[455726], [145653, 145654, 145655, 354472], [445624], [429116], [407422]] 	|

In [None]:
def explore_keys_with_examples(data, path='', examples=None):
    """
    Funzione ricorsiva per esplorare le chiavi in un dataset JSON e raccogliere esempi.
    
    :param data: Il dataset o una parte di esso.
    :param path: Il percorso corrente delle chiavi.
    :param examples: Un dizionario che tiene traccia degli esempi per ogni chiave.
    :return: None. Modifica il dizionario `examples` direttamente.
    """
    if examples is None:
        examples = {}

    if isinstance(data, dict):
        for k, v in data.items():
            new_path = f"{path}.{k}" if path else k
            if new_path not in examples:
                examples[new_path] = []
            if len(examples[new_path]) < 5:
                # Tronca il testo se è troppo lungo
                if isinstance(v, str) and len(v) > 100:
                    v = v[:100] + "..."
                examples[new_path].append(v)
            explore_keys_with_examples(v, new_path, examples)

    elif isinstance(data, list):
        for item in data:
            explore_keys_with_examples(item, path, examples)

    return examples

# Esplora le chiavi e raccogli esempi
examples = explore_keys_with_examples(json_data)

# Stampa i risultati
for key in sorted(examples):
    print(f"{key}: {examples[key]}")
    #print(f"{examples[key]}")


#### Count objects with particular fields

In [None]:
count_games = sum(1 for item in json_data)
count_without_companies = sum(1 for item in json_data if 'companies' not in item)
count_without_involved_companies = sum(1 for item in json_data if 'involved_companies' not in item)

print(f"Numero di giochi: {count_games}")
print(f"Numero di oggetti senza la chiave 'companies': {count_without_companies}")
print(f"Numero di oggetti senza la chiave 'involved_companies': {count_without_involved_companies}")


### Distinct list of elements of a particular field

In [None]:
def extract_unique_elements_from_field(json_data, field_name):
    """
    Estrae elementi unici da un campo specificato in un dataset JSON.

    :param json_data: Il dataset JSON (lista di dizionari).
    :param field_name: Il nome del campo da cui estrarre gli elementi.
    :return: Un set di elementi unici.
    """
    unique_elements = set()

    for item in json_data:
        if field_name in item and isinstance(item[field_name], list):
            unique_elements.update(item[field_name])

    return unique_elements


# Scegli il campo da cui estrarre gli elementi
field_to_extract = "involved_companies"

# Estrai gli elementi unici
unique_elements = extract_unique_elements_from_field(json_data, field_to_extract)

# Stampa i risultati
print(f"{unique_elements}")


### API request with wrapper

In [23]:
import os
from dotenv import load_dotenv
from igdb.wrapper import IGDBWrapper
from google.protobuf.json_format import MessageToJson
#from igdb.igdbapi_pb2 import GameResult

# Carica le variabili d'ambiente
load_dotenv("./.env")

wrapper = IGDBWrapper(os.getenv('LSD_CLIENT_ID').strip(), os.getenv('LSD_ACCESS_TOKEN').strip())


b'[\n  {\n    "id": 119066,\n    "name": "Left Alive: Day One Edition"\n  },\n  {\n    "id": 26095,\n    "name": "Assassin\\u0027s Creed IV Black Flag: Aveline"\n  },\n  {\n    "id": 26086,\n    "name": "Assassin\\u0027s Creed IV Black Flag: Kraken Ship Pack"\n  },\n  {\n    "id": 58921,\n    "name": "ArmaGallant: Decks of Destiny"\n  },\n  {\n    "id": 120546,\n    "name": "Gravity Ghost: Deluxe Edition"\n  },\n  {\n    "id": 222072,\n    "name": "Tantei Jinguuji Saburou: Prism of Eyes - Shisha ni Sasageru Ishi"\n  },\n  {\n    "id": 42927,\n    "name": "Worms WMD All-Stars"\n  },\n  {\n    "id": 111057,\n    "name": "Phobos Vector Prime: The First Ring"\n  },\n  {\n    "id": 229079,\n    "name": "Project: Summer Ice - Pinball: Pammy"\n  },\n  {\n    "id": 42923,\n    "name": "Tales of Berseria: Collector\\u0027s Edition"\n  }\n]'


In [24]:
import requests

def fetch_company_names(ids):
    """
    Effettua richieste API per recuperare i nomi delle compagnie basandosi sui loro ID.

    :param ids: Una lista di ID.
    :return: Un dizionario con ID come chiavi e nomi delle compagnie come valori.
    """
    #games_message = GameResult()
    company_names = {}

    # Suddividi gli ID in gruppi di 500
    step = 500
    for i in range(27932, len(ids), step):
        batch_ids = ids[i:i + step]
        id_list = ', '.join(map(str, batch_ids))
        payload = f"fields id, name;\r\nlimit 500;\r\nwhere id = ({id_list});\r\nsort id asc;"

        try:
            result = wrapper.api_request(
                'games', 
                'fields id, name; offset 0; where platforms=48;'
            ).decode('utf-8')
        except Exception as e:
            print("Errore nella richiesta API:", e)
            print("Payload:", payload)
            break
        
        #print(games_message.ParseFromString(result).companies)

        companies = result
        print(companies)
        for company in companies:
            company_names[str(company["id"])] = company["name"]
            
    return company_names

# Esempio di lista di ID
ids_to_fetch = list(unique_elements)  # Aggiungi qui tutti gli ID necessari

# Ottieni i nomi delle compagnie
company_names = fetch_company_names(ids_to_fetch)

# Stampa i risultati
print(company_names)

[
  {
    "id": 119066,
    "name": "Left Alive: Day One Edition"
  },
  {
    "id": 26095,
    "name": "Assassin\u0027s Creed IV Black Flag: Aveline"
  },
  {
    "id": 26086,
    "name": "Assassin\u0027s Creed IV Black Flag: Kraken Ship Pack"
  },
  {
    "id": 58921,
    "name": "ArmaGallant: Decks of Destiny"
  },
  {
    "id": 120546,
    "name": "Gravity Ghost: Deluxe Edition"
  },
  {
    "id": 222072,
    "name": "Tantei Jinguuji Saburou: Prism of Eyes - Shisha ni Sasageru Ishi"
  },
  {
    "id": 42927,
    "name": "Worms WMD All-Stars"
  },
  {
    "id": 111057,
    "name": "Phobos Vector Prime: The First Ring"
  },
  {
    "id": 229079,
    "name": "Project: Summer Ice - Pinball: Pammy"
  },
  {
    "id": 42923,
    "name": "Tales of Berseria: Collector\u0027s Edition"
  }
]


TypeError: string indices must be integers, not 'str'

## Setup and Dataset Import

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json

# Carica il dataset
with open('./../dataset/games_dataset_reviews_transformed.json', 'r') as file:
    data = json.load(file)
    
df = pd.json_normalize(data)

## Preliminar analysis

In [4]:
# Visualizza le prime righe del dataset
print(df.head())

# Informazioni sul dataset
print(df.info())

# Statistiche descrittive
print(df.describe())

# Conteggio valori nulli
print(df.isnull().sum())

       id                                       age_ratings artworks  \
0  231577  [114607, 114609, 126972, 126973, 126974, 127011]  [96775]   
1  147666                                    [39395, 54394]  [14195]   
2   44711                                           [65532]   [9728]   
3   85450                                           [67799]      NaN   
4   95080                                               NaN      NaN   

   category     cover  created_at  \
0         0  280467.0  1673878007   
1         0  247255.0  1621112209   
2         0  272120.0  1499434349   
3         0       NaN  1517405808   
4         0       NaN  1521818623   

                                  external_games  first_release_date  \
0  [2639959, 2677882, 2677897, 2677919, 2678595]        1.677110e+09   
1                                      [2011978]        1.610928e+09   
2                       [51807, 154169, 1867507]        1.019002e+09   
3                     [221231, 1748471, 1991656]        

## Enetities Analysis
### Games

In [None]:
# Analisi dei generi
genre_counts = df['genres'].explode().value_counts()
plt.figure(figsize=(10, 6))
sns.barplot(x=genre_counts.index, y=genre_counts.values)
plt.title('Distribuzione dei Generi di Giochi')
plt.xlabel('Genere')
plt.ylabel('Conteggio')
plt.show()

# Analisi delle piattaforme
# Codice simile può essere usato per analizzare le piattaforme