In [2]:
# imports
import pandas as pd
import glob
import os
import json

In [14]:
# Path to games_by_year folder
folder_path = './data/global_games_by_year/'

# get all files
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

# Read and combine the CSV's
df_list = []

for file in csv_files:
    df = pd.read_csv(file)
    df_list.append(df)

# Combine all into DataFrame
games_df = pd.concat(df_list, ignore_index=True)

# Show head
games_df.head()


Unnamed: 0,id,slug,name,released,metacritic,rating,ratings_count,playtime,added,platforms,genres,year
0,22991,akalabeth-world-of-doom-2,Akalabeth: World of Doom,1979-12-31,,2.09,81,1,1525,"PC, iOS, macOS, Linux, Apple II","Adventure, RPG",1979
1,52423,galaxian,Galaxian,1979-01-01,,3.88,80,0,173,"Wii, Game Boy, NES, Apple II, Commodore / Amig...","Shooter, Arcade",1979
2,52394,asteroids-1979,Asteroids (1979),1979-11-01,,3.76,32,0,81,"PC, Xbox 360, PlayStation, Game Boy Advance, G...","Shooter, Arcade, Action",1979
3,52391,adventure-game-atari,Adventure,1979-01-01,,4.04,23,0,65,"iOS, Atari 2600, Atari Flashback","Action, Puzzle",1979
4,53426,lunar-lander,Lunar Lander,1979-01-01,,3.25,7,0,32,Game Boy Advance,,1979


In [11]:
# Path to company top games
json_folder = './data/company top games/'

# get all files
json_files = glob.glob(os.path.join(json_folder, '*.json'))

# List to hold all game entries
company_games_list = []

for file in json_files:
    with open(file, 'r', encoding='utf-8') as f:
        data = json.load(f)
        
        # Only process files with actual game results
        if 'results' in data and isinstance(data['results'], list):
            # Extract company name from filename
            filename = os.path.basename(file)
            company_name = filename.replace('_top_games.json', '').replace('_', ' ')
            
            for game in data['results']:
                game['company'] = company_name
                company_games_list.append(game)

# Convert to DataFrame
company_games_df = pd.json_normalize(company_games_list)

# Get the first 5 unique companies
first_5_companies = company_games_df['company'].unique()[:5]

# Filter the DataFrame to only show rows from these companies
top_5_company_games = company_games_df[company_games_df['company'].isin(first_5_companies)]

# Show a preview of their games
top_5_company_games[['company', 'name', 'released', 'rating']].sort_values(by='company').head(20)

Unnamed: 0,company,name,released,rating
0,343 Industries,Halo: The Master Chief Collection,2014-11-11,4.25
8,343 Industries,Halo: Spartan Assault,2013-07-18,3.12
7,343 Industries,Halo: Spartan Strike,2015-04-16,3.28
6,343 Industries,Halo Infinite,2021-12-08,3.65
5,343 Industries,Halo Wars: Definitive Edition,2017-04-20,3.72
9,343 Industries,Halo Waypoint,2010-09-09,2.72
3,343 Industries,Halo 4,2012-11-06,3.82
2,343 Industries,Halo: Combat Evolved Anniversary,2011-11-15,3.97
1,343 Industries,Halo 2: Anniversary,2014-11-11,4.21
4,343 Industries,Halo 5: Guardians,2015-10-27,3.74


In [12]:
# Path to top 50 games per genre
genre_folder = './data/top 50 games per genre/'
genre_files = glob.glob(os.path.join(genre_folder, '*.json'))

# List to hold all game data
genre_games_list = []

for file in genre_files:
    with open(file, 'r', encoding='utf-8') as f:
        data = json.load(f)

        # Extract genre from the first genre tag in each game
        for game in data:
            if 'genres' in game and isinstance(game['genres'], list) and len(game['genres']) > 0:
                main_genre = game['genres'][0].get('name', 'Unknown')
            else:
                main_genre = 'Unknown'

            game['main_genre'] = main_genre
            genre_games_list.append(game)

# Convert to DataFrame
genre_games_df = pd.json_normalize(genre_games_list)

# Preview
genre_games_df[['main_genre', 'name', 'released', 'rating']].head()

Unnamed: 0,main_genre,name,released,rating
0,Shooter,Portal 2,2011-04-18,4.59
1,Shooter,Counter-Strike: Global Offensive,2012-08-21,3.56
2,Shooter,Left 4 Dead 2,2009-11-17,4.09
3,Shooter,BioShock Infinite,2013-03-26,4.38
4,Shooter,Half-Life 2,2004-11-16,4.48


In [13]:
# Path to the file (adjust if needed)
file_path = './data/top 5000 critic reviewed games/top_5000_critic_reviewed_games.json'

# Load the JSON data
with open(file_path, 'r', encoding='utf-8') as f:
    critic_data = json.load(f)

# Convert to DataFrame
critic_df = pd.json_normalize(critic_data)

# Preview
critic_df[['name', 'released', 'metacritic', 'rating', 'genres']].head()

Unnamed: 0,name,released,metacritic,rating,genres
0,The Legend of Zelda: Ocarina of Time,1998-11-21,99,4.39,"[{'id': 4, 'name': 'Action', 'slug': 'action',..."
1,Soulcalibur (1998),1998-07-30,98,0.0,"[{'id': 6, 'name': 'Fighting', 'slug': 'fighti..."
2,Soulcalibur,1998-07-30,98,4.37,"[{'id': 4, 'name': 'Action', 'slug': 'action',..."
3,Baldur's Gate III,2023-08-03,97,4.47,"[{'id': 3, 'name': 'Adventure', 'slug': 'adven..."
4,Metroid Prime,2002-11-17,97,4.37,"[{'id': 4, 'name': 'Action', 'slug': 'action',..."
