# 📌 Data Collection
---
Testing the IGDB API.


In [1]:
# install IGDB API wrapper
%pip install igdb-api-v4

Note: you may need to restart the kernel to use updated packages.


In [2]:
# imports
from igdb.wrapper import IGDBWrapper
from config import *

In [3]:
# initialize wrapper
wrapper = IGDBWrapper(CLIENT_ID, ACCESS_TOKEN)

> 📅 **1/21/2023**  
> From the new updates: Instead of removing null values (for `summary` and `genres`) in the data after requesting them. Just filter them out before requesting.  
> This is shown below!

In [4]:
# API request using wrapper
min_id = 500
max_id = 1000
limit = 500

byte_array = wrapper.api_request(
    'games',
    f'fields name, summary, genres, genres.name; limit {limit}; where id > {min_id} & id <= {max_id} & summary != null & genres != null; sort id asc;'
)

In [5]:
import json

json_array = json.loads(byte_array)
print(len(json_array))

470


In [6]:
# confirmation if above number checks out
total_games = wrapper.api_request(
    'games/count',
    f'fields name, summary, genres, genres.name; limit {limit}; where id > {min_id} & id <= {max_id} & summary != null & genres != null; sort id asc;'
)
print(json.loads(total_games))

{'count': 470}


In [7]:
import pandas as pd

videogame_df = pd.DataFrame(json_array)
videogame_df.head(10)

videogame_df.to_csv('./debug.csv')

In [8]:
# check for missing values
videogame_df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 470 entries, 0 to 469
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id       470 non-null    int64 
 1   genres   470 non-null    object
 2   name     470 non-null    object
 3   summary  470 non-null    object
dtypes: int64(1), object(3)
memory usage: 14.8+ KB


> 📅 **1/19/2023**  
> *mental note*: `summary` can have null values.  
> *mental note update*: `genre` as well (it turns into a float value)

In [9]:
hello = videogame_df['genres']

hello_str = hello[0]
genre_list = [x['name'] for x in hello_str]
genre_ids = [x['id'] for x in hello_str]

print(genre_list)
print(genre_ids)

["Hack and slash/Beat 'em up", 'Adventure']
[25, 31]


In [10]:
test_df = videogame_df['genres'].to_frame()
test_df.head()

Unnamed: 0,genres
0,"[{'id': 25, 'name': 'Hack and slash/Beat 'em u..."
1,"[{'id': 5, 'name': 'Shooter'}]"
2,"[{'id': 13, 'name': 'Simulator'}, {'id': 14, '..."
3,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i..."
4,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i..."


In [11]:
test_df['genre_count'] = [len(x)for x in test_df['genres']]
test_df['genre_list'] = [[y['name'] for y in x] for x in test_df['genres']]
test_df['genre_ids'] = [[y['id'] for y in x] for x in test_df['genres']]

In [12]:
test_df.head(10)

Unnamed: 0,genres,genre_count,genre_list,genre_ids
0,"[{'id': 25, 'name': 'Hack and slash/Beat 'em u...",2,"[Hack and slash/Beat 'em up, Adventure]","[25, 31]"
1,"[{'id': 5, 'name': 'Shooter'}]",1,[Shooter],[5]
2,"[{'id': 13, 'name': 'Simulator'}, {'id': 14, '...",2,"[Simulator, Sport]","[13, 14]"
3,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",5,"[Role-playing (RPG), Simulator, Strategy, Hack...","[12, 13, 15, 25, 31]"
4,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",3,"[Role-playing (RPG), Hack and slash/Beat 'em u...","[12, 25, 31]"
5,"[{'id': 8, 'name': 'Platform'}, {'id': 31, 'na...",2,"[Platform, Adventure]","[8, 31]"
6,"[{'id': 8, 'name': 'Platform'}, {'id': 31, 'na...",2,"[Platform, Adventure]","[8, 31]"
7,"[{'id': 10, 'name': 'Racing'}, {'id': 14, 'nam...",2,"[Racing, Sport]","[10, 14]"
8,"[{'id': 5, 'name': 'Shooter'}, {'id': 10, 'nam...",2,"[Shooter, Racing]","[5, 10]"
9,"[{'id': 4, 'name': 'Fighting'}]",1,[Fighting],[4]


Finally, apply the things we did on test_df to the videogame_df

In [13]:
videogame_df['genre_count'] = [len(x) for x in videogame_df['genres']]
videogame_df['genre_list'] = [[y['name'] for y in x] for x in videogame_df['genres']]
videogame_df['genre_ids'] = [[y['id'] for y in x] for x in videogame_df['genres']]

videogame_df = videogame_df.drop("genres", axis='columns')
videogame_df.head()

Unnamed: 0,id,name,summary,genre_count,genre_list,genre_ids
0,501,Batman: Arkham City,After the events of Batman: Arkham Asylum (200...,2,"[Hack and slash/Beat 'em up, Adventure]","[25, 31]"
1,502,Brink,Brink is an immersive first-person shooter tha...,1,[Shooter],[5]
2,503,FIFA Soccer 11,The best-selling and most critically-acclaimed...,2,"[Simulator, Sport]","[13, 14]"
3,504,Fable III,"Set 50 years after the events of Fable II, the...",5,"[Role-playing (RPG), Simulator, Strategy, Hack...","[12, 13, 15, 25, 31]"
4,505,Hunted: The Demon's Forge,It will take the combined efforts of two great...,3,"[Role-playing (RPG), Hack and slash/Beat 'em u...","[12, 25, 31]"


In [15]:
# save as csv
videogame_df.to_csv('./sample_data.csv')