# 📌 Data Collection
---
Testing the IGDB API.


In [118]:
# install IGDB API wrapper
%pip install igdb-api-v4

Note: you may need to restart the kernel to use updated packages.


In [119]:
# imports
from igdb.wrapper import IGDBWrapper
from config import *

In [120]:
# initialize wrapper
wrapper = IGDBWrapper(CLIENT_ID, ACCESS_TOKEN)

In [121]:
# API request using wrapper
min_id = 0
max_id = 500
limit = 500

byte_array = wrapper.api_request(
    'games',
    f'fields name, summary, genres, genres.name; limit {limit}; where id > {min_id} & id <= {max_id}; sort id asc;'
)

In [122]:
import json

json_array = json.loads(byte_array)
print(len(json_array))

497


In [123]:
# confirmation if above number checks out
total_games = wrapper.api_request(
    'games/count',
    f'fields name, summary, genres, genres.name; limit {limit}; where id > {min_id} & id <= {max_id}; sort id asc;'
)
print(json.loads(total_games))

{'count': 497}


In [124]:
import pandas as pd

videogame_df = pd.DataFrame(json_array)
videogame_df.head(10)

Unnamed: 0,id,genres,name,summary
0,1,"[{'id': 5, 'name': 'Shooter'}, {'id': 13, 'nam...",Thief II: The Metal Age,The ultimate thief is back! Tread softly as yo...
1,2,"[{'id': 13, 'name': 'Simulator'}, {'id': 31, '...",Thief: The Dark Project,Thief is a first-person stealth game that like...
2,3,"[{'id': 5, 'name': 'Shooter'}, {'id': 13, 'nam...",Thief: Deadly Shadows,"In the third instalment of the Thief series, m..."
3,4,"[{'id': 5, 'name': 'Shooter'}, {'id': 31, 'nam...",Thief,There is a rising tide of fear in The City. Ha...
4,5,"[{'id': 12, 'name': 'Role-playing (RPG)'}]",Baldur's Gate,Baldur's Gate is a fantasy role-playing video ...
5,6,"[{'id': 12, 'name': 'Role-playing (RPG)'}]",Baldur's Gate II: Shadows of Amn,Every World has conflict. Good and evil. Frien...
6,7,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",Jagged Alliance,Desperately you are called to the island of Me...
7,8,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",Jagged Alliance: Deadly Games,The enemy is on the run. One more mortar shell...
8,9,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",Jagged Alliance 2,Jagged Alliance 2 is a perfect blend of strate...
9,10,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",Jade Empire: Special Edition,Step into the role of an aspiring martial-arts...


In [125]:
# check for missing values
videogame_df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 497 entries, 0 to 496
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id       497 non-null    int64 
 1   genres   497 non-null    object
 2   name     497 non-null    object
 3   summary  488 non-null    object
dtypes: int64(1), object(3)
memory usage: 15.7+ KB


> *mental note*: `summary` can have null values.

In [126]:
hello = videogame_df['genres']

hello_str = hello[0]
genre_list = [x['name'] for x in hello_str]
genre_ids = [x['id'] for x in hello_str]

print(genre_list)
print(genre_ids)

['Shooter', 'Simulator', 'Adventure']
[5, 13, 31]


In [127]:
test_df = videogame_df['genres'].to_frame()
test_df.head()

Unnamed: 0,genres
0,"[{'id': 5, 'name': 'Shooter'}, {'id': 13, 'nam..."
1,"[{'id': 13, 'name': 'Simulator'}, {'id': 31, '..."
2,"[{'id': 5, 'name': 'Shooter'}, {'id': 13, 'nam..."
3,"[{'id': 5, 'name': 'Shooter'}, {'id': 31, 'nam..."
4,"[{'id': 12, 'name': 'Role-playing (RPG)'}]"


In [134]:
test_df['genre_count'] = [len(x) for x in test_df['genres']]
test_df['genre_list'] = [[y['name'] for y in x] for x in test_df['genres']]
test_df['genre_ids'] = [[y['id'] for y in x] for x in test_df['genres']]

In [135]:
test_df.head(10)

Unnamed: 0,genres,genre_count,genre_list,genre_ids
0,"[{'id': 5, 'name': 'Shooter'}, {'id': 13, 'nam...",3,"[Shooter, Simulator, Adventure]","[5, 13, 31]"
1,"[{'id': 13, 'name': 'Simulator'}, {'id': 31, '...",2,"[Simulator, Adventure]","[13, 31]"
2,"[{'id': 5, 'name': 'Shooter'}, {'id': 13, 'nam...",3,"[Shooter, Simulator, Adventure]","[5, 13, 31]"
3,"[{'id': 5, 'name': 'Shooter'}, {'id': 31, 'nam...",2,"[Shooter, Adventure]","[5, 31]"
4,"[{'id': 12, 'name': 'Role-playing (RPG)'}]",1,[Role-playing (RPG)],[12]
5,"[{'id': 12, 'name': 'Role-playing (RPG)'}]",1,[Role-playing (RPG)],[12]
6,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",4,"[Role-playing (RPG), Strategy, Turn-based stra...","[12, 15, 16, 24]"
7,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",4,"[Role-playing (RPG), Strategy, Turn-based stra...","[12, 15, 16, 24]"
8,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",5,"[Role-playing (RPG), Strategy, Turn-based stra...","[12, 15, 16, 24, 31]"
9,"[{'id': 12, 'name': 'Role-playing (RPG)'}, {'i...",2,"[Role-playing (RPG), Adventure]","[12, 31]"


Finally, apply the things we did on test_df to the videogame_df

In [136]:
videogame_df['genre_count'] = [len(x) for x in videogame_df['genres']]
videogame_df['genre_list'] = [[y['name'] for y in x] for x in videogame_df['genres']]
videogame_df['genre_ids'] = [[y['id'] for y in x] for x in videogame_df['genres']]

videogame_df = videogame_df.drop("genres", axis='columns')
videogame_df.head()

Unnamed: 0,id,name,summary,genre_count,genre_list,genre_ids
0,1,Thief II: The Metal Age,The ultimate thief is back! Tread softly as yo...,3,"[Shooter, Simulator, Adventure]","[5, 13, 31]"
1,2,Thief: The Dark Project,Thief is a first-person stealth game that like...,2,"[Simulator, Adventure]","[13, 31]"
2,3,Thief: Deadly Shadows,"In the third instalment of the Thief series, m...",3,"[Shooter, Simulator, Adventure]","[5, 13, 31]"
3,4,Thief,There is a rising tide of fear in The City. Ha...,2,"[Shooter, Adventure]","[5, 31]"
4,5,Baldur's Gate,Baldur's Gate is a fantasy role-playing video ...,1,[Role-playing (RPG)],[12]


In [137]:
# save as csv
videogame_df.to_csv('./sample_data.csv')