# Video Game Recommender Project

## Part 2: Cleaning the Data for Modeling

In [1]:
import pandas as pd
import re
import requests

In [2]:
df = pd.read_csv('./Data/games.csv', low_memory=False)
df

Unnamed: 0.1,Unnamed: 0,id,age_ratings,alternative_names,category,cover,created_at,external_games,first_release_date,game_modes,...,aggregated_rating_count,ports,dlcs,remakes,franchise,expansions,standalone_expansions,forks,remasters,expanded_games
0,0,131913,[101730],"[40085, 40086, 40087]",0,267633.0,1584788069,[1977891],1.474416e+09,[1],...,,,,,,,,,,
1,1,88308,,,0,64849.0,1519237439,"[243459, 1960113]",1.478650e+09,,...,,,,,,,,,,
2,2,63308,[14739],,0,78658.0,1505087910,"[12596, 118080, 1185144]",1.372810e+09,"[2, 3]",...,,,,,,,,,,
3,3,95080,,,0,,1521818623,[1989881],,,...,,,,,,,,,,
4,4,104748,,,0,,1530519587,"[1155919, 1972586]",,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274115,2738495,270311,"[155339, 156812, 156813, 156814, 156815, 156816]",,0,338712.0,1696471845,"[2749772, 2749823, 2749915, 2749948, 2749957, ...",1.697674e+09,[1],...,,,,,,,,,,
274116,2738496,26778,"[12738, 12739, 74252]","[23222, 103500, 103531]",0,345907.0,1484338599,"[46568, 148728, 215876, 246558, 402671, 459935...",5.311872e+08,[1],...,,[109597],,[134561],,,,,,
274117,2738497,287107,[167130],,0,,1708046589,"[2888238, 2888279, 2888301]",1.707782e+09,,...,,,,,,,,,,
274118,2738498,172430,"[46359, 55966, 95862, 118055]",[102314],8,180918.0,1632435987,"[2124213, 2124220, 2124674, 2135085, 2160840, ...",1.632355e+09,[1],...,2.0,,,,,,,,,


The first thing I notice about the data is that there are rows for dlc, remakes, expansions, remasters, etc... These rows contain an ID number, which means that those ids hold their own row in the dataframe. I do not want my recommender to recommend add-ons to games, so we have to drop the rows in which these ids appear. 

In [4]:
df.columns

Index(['Unnamed: 0', 'id', 'age_ratings', 'alternative_names', 'category',
       'cover', 'created_at', 'external_games', 'first_release_date',
       'game_modes', 'genres', 'involved_companies', 'keywords', 'name',
       'platforms', 'player_perspectives', 'release_dates', 'screenshots',
       'similar_games', 'slug', 'storyline', 'summary', 'tags', 'themes',
       'updated_at', 'url', 'videos', 'websites', 'checksum',
       'game_localizations', 'collection', 'multiplayer_modes', 'status',
       'language_supports', 'collections', 'hypes', 'artworks',
       'version_parent', 'version_title', 'parent_game', 'bundles',
       'franchises', 'follows', 'rating', 'rating_count', 'total_rating',
       'total_rating_count', 'game_engines', 'aggregated_rating',
       'aggregated_rating_count', 'ports', 'dlcs', 'remakes', 'franchise',
       'expansions', 'standalone_expansions', 'forks', 'remasters',
       'expanded_games'],
      dtype='object')

In [5]:
def extract_ids(string):
    return [int(x) for x in re.findall(r'\d+', string)] 
#extracts the digits, in this case the ids, in the df
#('\d+') looks for 1 or more digits in the string provided
#return [int(x) for x... returns it as an int

In [6]:
non_null_values_dlcs = df['dlcs'].dropna()
unique_values_dlcs = non_null_values_dlcs.unique().tolist()
dlcs_ids = [id_ for sublist in map(extract_ids, unique_values_dlcs) for id_ in sublist]
# get all non nan values, so we only have games that do have dlcs, make a list of the ids that are dlcs
# so then we can remove them from the dataframe
# map() executes the process for each item that it is given, in this case, extract id in the list created
# by the variables in the dlcs column, and turn them into int so we can identify them in the id column. 

In [7]:
non_null_values_remakes = df['remakes'].dropna()
unique_values_remakes = non_null_values_remakes.unique().tolist()
remakes_ids = [id_ for sublist in map(extract_ids, unique_values_remakes) for id_ in sublist]

In [8]:
non_null_values_expansions = df['expansions'].dropna()
unique_values_expansions = non_null_values_expansions.unique().tolist()
expansions_ids = [id_ for sublist in map(extract_ids, unique_values_expansions) for id_ in sublist]

In [9]:
non_null_values_standalone_expansions = df['standalone_expansions'].dropna()
unique_values_standalone_expansions = non_null_values_standalone_expansions.unique().tolist()
standalone_expansions_ids = [id_ for sublist in map(extract_ids, unique_values_standalone_expansions) for id_ in sublist]

In [10]:
non_null_values_remasters = df['remasters'].dropna()
unique_values_remasters = non_null_values_remasters.unique().tolist()
remasters_ids = [id_ for sublist in map(extract_ids, unique_values_remasters) for id_ in sublist]

In [11]:
non_null_values_ports = df['ports'].dropna()
unique_values_ports = non_null_values_ports.unique().tolist()
ports_ids = [id_ for sublist in map(extract_ids, unique_values_ports) for id_ in sublist]

In [12]:
non_null_values_expanded_games = df['expanded_games'].dropna()
unique_values_expanded_games = non_null_values_expanded_games.unique().tolist()
expanded_games_ids = [id_ for sublist in map(extract_ids, unique_values_expanded_games) for id_ in sublist]

In [13]:
all_ids = dlcs_ids + remakes_ids + expansions_ids + standalone_expansions_ids + remasters_ids + ports_ids + expanded_games_ids
df = df[~df['id'].isin(all_ids)] 
df

# finally we have all the ids we need to drop so we transform the dataframe
# ~ makes it so we have the whole dataframe except for all_ids, negates all_ids

Unnamed: 0.1,Unnamed: 0,id,age_ratings,alternative_names,category,cover,created_at,external_games,first_release_date,game_modes,...,aggregated_rating_count,ports,dlcs,remakes,franchise,expansions,standalone_expansions,forks,remasters,expanded_games
0,0,131913,[101730],"[40085, 40086, 40087]",0,267633.0,1584788069,[1977891],1.474416e+09,[1],...,,,,,,,,,,
1,1,88308,,,0,64849.0,1519237439,"[243459, 1960113]",1.478650e+09,,...,,,,,,,,,,
2,2,63308,[14739],,0,78658.0,1505087910,"[12596, 118080, 1185144]",1.372810e+09,"[2, 3]",...,,,,,,,,,,
3,3,95080,,,0,,1521818623,[1989881],,,...,,,,,,,,,,
4,4,104748,,,0,,1530519587,"[1155919, 1972586]",,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274114,2738494,148382,"[58294, 95857, 110147, 118294]",[108185],0,213777.0,1621638326,"[2056735, 2242321, 2242372, 2581765, 2583302, ...",1.645402e+09,[1],...,,,"[201018, 231236, 231241, 231242]",,,,,,,
274115,2738495,270311,"[155339, 156812, 156813, 156814, 156815, 156816]",,0,338712.0,1696471845,"[2749772, 2749823, 2749915, 2749948, 2749957, ...",1.697674e+09,[1],...,,,,,,,,,,
274116,2738496,26778,"[12738, 12739, 74252]","[23222, 103500, 103531]",0,345907.0,1484338599,"[46568, 148728, 215876, 246558, 402671, 459935...",5.311872e+08,[1],...,,[109597],,[134561],,,,,,
274117,2738497,287107,[167130],,0,,1708046589,"[2888238, 2888279, 2888301]",1.707782e+09,,...,,,,,,,,,,


Now, we can select the features we want to model with. The most important features are summary, platforms, genres and game modes. I could have used more features, but that came with caveats. For each feature I kept, it significantly decreased the total of my data set. This is because there was data with NaN values, and there was no correct way to deal with them other than to drop the rows that had NaN values. I can not put 0's or the average of the age rating because then it would not be an accurate or relative representation of the specific game. A lot of games did not have age ratings, so if I wanted to model with this feature, i would have had to drop over 50 000 games. It was like this with a lot of features, and these features I kept were the happy medium. (I also enjoy small indie games, so I wanted to keep a large dataset so that my recommender can recommend even lesser budget/indie games. If I dropped more games with missing values, than those games would have been weeded out of the data.) So finally we end up with a dataset of just over 100 000!

In [13]:
df = df[['name', 'summary', 'platforms', 'genres', 'game_modes', 'url']]
df

Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,[46],[34],[1],https://www.igdb.com/games/maji-kyun-renaissance
1,Hey Duggee: The Big Outdoor App,"Welcome to the Big Outdoors, Squirrels! Introd...",[39],,,https://www.igdb.com/games/hey-duggee-the-big-...
2,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,[6],[5],"[2, 3]",https://www.igdb.com/games/soldier-front-2
3,Dotra,,,,,https://www.igdb.com/games/dotra
4,Space station - build your own ISS,,,,,https://www.igdb.com/games/space-station-build...
...,...,...,...,...,...,...
274114,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","[6, 48, 130]","[5, 31, 32, 33]",[1],https://www.igdb.com/games/sol-cresta
274115,Animal Hospital,Young animal lovers are warmly invited to come...,"[48, 49, 130, 167, 169]",[13],[1],https://www.igdb.com/games/animal-hospital--1
274116,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"[5, 9, 64]","[8, 25, 31]",[1],https://www.igdb.com/games/alex-kidd-in-miracl...
274117,Is It Wrong to Try to Pick Up Girls in a Dunge...,,"[48, 167]",[12],,https://www.igdb.com/games/is-it-wrong-to-try-...


In [14]:
df = df.dropna()
df

Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,[46],[34],[1],https://www.igdb.com/games/maji-kyun-renaissance
2,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,[6],[5],"[2, 3]",https://www.igdb.com/games/soldier-front-2
5,Bubble Whirl Shooter,Shoot bubbles and match colors to pop your way...,"[34, 39]","[9, 33]",[1],https://www.igdb.com/games/bubble-whirl-shooter
9,Blood Bowl 3: Black Orcs Edition,Fashion is not exactly a priority for Black Or...,"[49, 169]",[15],"[1, 2]",https://www.igdb.com/games/blood-bowl-3-black-...
10,Pet Puzzle,Pet Puzzle is a relaxing and leisurely three-i...,[6],[13],[1],https://www.igdb.com/games/pet-puzzle
...,...,...,...,...,...,...
274113,.T.E.S.T: Expected Behaviour,.T.E.S.T: Expected Behavior is a 3D first-pers...,"[6, 14, 48, 49, 130]","[9, 13, 31, 32]",[1],https://www.igdb.com/games/dot-t-e-dot-s-t-exp...
274114,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","[6, 48, 130]","[5, 31, 32, 33]",[1],https://www.igdb.com/games/sol-cresta
274115,Animal Hospital,Young animal lovers are warmly invited to come...,"[48, 49, 130, 167, 169]",[13],[1],https://www.igdb.com/games/animal-hospital--1
274116,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"[5, 9, 64]","[8, 25, 31]",[1],https://www.igdb.com/games/alex-kidd-in-miracl...


In [15]:
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,[46],[34],[1],https://www.igdb.com/games/maji-kyun-renaissance
1,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,[6],[5],"[2, 3]",https://www.igdb.com/games/soldier-front-2
2,Bubble Whirl Shooter,Shoot bubbles and match colors to pop your way...,"[34, 39]","[9, 33]",[1],https://www.igdb.com/games/bubble-whirl-shooter
3,Blood Bowl 3: Black Orcs Edition,Fashion is not exactly a priority for Black Or...,"[49, 169]",[15],"[1, 2]",https://www.igdb.com/games/blood-bowl-3-black-...
4,Pet Puzzle,Pet Puzzle is a relaxing and leisurely three-i...,[6],[13],[1],https://www.igdb.com/games/pet-puzzle
...,...,...,...,...,...,...
111590,.T.E.S.T: Expected Behaviour,.T.E.S.T: Expected Behavior is a 3D first-pers...,"[6, 14, 48, 49, 130]","[9, 13, 31, 32]",[1],https://www.igdb.com/games/dot-t-e-dot-s-t-exp...
111591,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","[6, 48, 130]","[5, 31, 32, 33]",[1],https://www.igdb.com/games/sol-cresta
111592,Animal Hospital,Young animal lovers are warmly invited to come...,"[48, 49, 130, 167, 169]",[13],[1],https://www.igdb.com/games/animal-hospital--1
111593,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"[5, 9, 64]","[8, 25, 31]",[1],https://www.igdb.com/games/alex-kidd-in-miracl...


So now we have our dataset. As we can see, there are id's in the platforms, genres, and game_modes columns. To get the values associated with those id's, I had to get the data from the API and put it in a dataframe. I ran the same initial code that gave me the all the games, just had to change the url and create new values to match the specifications that I needed. Once the data is in a dataframe, I can create a dictionary with the values associated with each id, and transfer the information over to the main dataframe and replace the ids with the names associated with the id number.

In [16]:
client_id = "8m485qqc89htq84deio57c7tryri4v"
client_secret = "6ci2zwn9i9seth4ldmpwf053hcvoj9"
access_token = 'ok2zlmcqawhs5qsxk3nolcsupe51nl'

In [17]:
url = "https://api.igdb.com/v4/platforms"

# initialize an empty list to store all games
all_platforms = []

# define initial query parameters
params = {
    "fields": "*",
    "limit": 500,  # adjust as needed
    "offset": 0   # start from the beginning
}

# include access token in the headers for authentication
headers = {
    "Client-ID": client_id,
    "Authorization": f"Bearer {access_token}"
}

# set a cap for the total number of games
cap_platforms = 10000000

# make the GET requests until the cap is reached
while len(all_platforms) < cap_platforms:
    response = requests.get(url, headers=headers, params=params)
    
    # check if the request was successful
    if response.status_code == 200:
        # convert the response to JSON
        platforms_data = response.json()
        
        # add the retrieved games to the list
        all_platforms.extend(platforms_data)
        
        # check if there are more games to fetch
        if len(platforms_data) < 50:
            break  # exit the loop if there are no more games to fetch
        else:
            # increment the offset to fetch the next page of results
            params["offset"] += 50
    else:
        print("Failed to retrieve data:", response.status_code)
        break

all_platforms = all_platforms[:cap_platforms]

# create df from the collected platforms
platforms_df = pd.DataFrame(all_platforms)

platforms_df = platforms_df[['name', 'id']]

In [18]:
platforms_response = []

# iterate over each row in the DataFrame
for index, row in platforms_df.iterrows():
    # create a dictionary for each platform
    platform_dict = {'id': row['id'], 'name': row['name']}
    # append the dictionary to the list
    platforms_response.append(platform_dict)

platforms_response

[{'id': 123, 'name': 'WonderSwan Color'},
 {'id': 128, 'name': 'PC Engine SuperGrafx'},
 {'id': 129, 'name': 'Texas Instruments TI-99'},
 {'id': 133, 'name': 'Odyssey 2 / Videopac G7000'},
 {'id': 134, 'name': 'Acorn Electron'},
 {'id': 135, 'name': 'Hyper Neo Geo 64'},
 {'id': 136, 'name': 'Neo Geo CD'},
 {'id': 142, 'name': 'PC-50X Family'},
 {'id': 144, 'name': 'AY-3-8710'},
 {'id': 146, 'name': 'AY-3-8605'},
 {'id': 147, 'name': 'AY-3-8606'},
 {'id': 148, 'name': 'AY-3-8607'},
 {'id': 149, 'name': 'PC-9800 Series'},
 {'id': 6, 'name': 'PC (Microsoft Windows)'},
 {'id': 8, 'name': 'PlayStation 2'},
 {'id': 23, 'name': 'Dreamcast'},
 {'id': 25, 'name': 'Amstrad CPC'},
 {'id': 35, 'name': 'Sega Game Gear'},
 {'id': 39, 'name': 'iOS'},
 {'id': 44, 'name': 'Tapwave Zodiac'},
 {'id': 50, 'name': '3DO Interactive Multiplayer'},
 {'id': 62, 'name': 'Atari Jaguar'},
 {'id': 65, 'name': 'Atari 8-bit'},
 {'id': 88, 'name': 'Odyssey'},
 {'id': 89, 'name': 'Microvision'},
 {'id': 90, 'name': 'C

In [19]:
# for genres
url = "https://api.igdb.com/v4/genres"
all_genres = []
params = {
    "fields": "*",
    "limit": 500, 
    "offset": 0   
}
headers = {
    "Client-ID": client_id,
    "Authorization": f"Bearer {access_token}"
}

cap_genres = 10000000

while len(all_genres) < cap_genres:
    response = requests.get(url, headers=headers, params=params)
       
    if response.status_code == 200:
        genres_data = response.json()
        all_genres.extend(genres_data)
        
        if len(genres_data) < 50:
            break  
        else:
            params["offset"] += 50
    else:
        print("Failed to retrieve data:", response.status_code)
        break

all_genres = all_genres[:cap_genres]
genres_df = pd.DataFrame(all_genres)
genres_df = genres_df[['name', 'id']]

In [20]:
genres_response = []
for index, row in genres_df.iterrows():
    genre_dict = {'id': row['id'], 'name': row['name']}
    genres_response.append(genre_dict)

genres_response

[{'id': 2, 'name': 'Point-and-click'},
 {'id': 4, 'name': 'Fighting'},
 {'id': 5, 'name': 'Shooter'},
 {'id': 7, 'name': 'Music'},
 {'id': 8, 'name': 'Platform'},
 {'id': 9, 'name': 'Puzzle'},
 {'id': 10, 'name': 'Racing'},
 {'id': 11, 'name': 'Real Time Strategy (RTS)'},
 {'id': 12, 'name': 'Role-playing (RPG)'},
 {'id': 13, 'name': 'Simulator'},
 {'id': 14, 'name': 'Sport'},
 {'id': 15, 'name': 'Strategy'},
 {'id': 16, 'name': 'Turn-based strategy (TBS)'},
 {'id': 24, 'name': 'Tactical'},
 {'id': 25, 'name': "Hack and slash/Beat 'em up"},
 {'id': 26, 'name': 'Quiz/Trivia'},
 {'id': 30, 'name': 'Pinball'},
 {'id': 31, 'name': 'Adventure'},
 {'id': 32, 'name': 'Indie'},
 {'id': 33, 'name': 'Arcade'},
 {'id': 34, 'name': 'Visual Novel'},
 {'id': 35, 'name': 'Card & Board Game'},
 {'id': 36, 'name': 'MOBA'}]

In [21]:
# for game modes
url = "https://api.igdb.com/v4/game_modes"
all_game_modes = []
params = {
    "fields": "*",
    "limit": 500,  
    "offset": 0   
}
headers = {
    "Client-ID": client_id,
    "Authorization": f"Bearer {access_token}"
}


cap_game_modes = 10000000


while len(all_game_modes) < cap_game_modes:
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        game_modes_data = response.json()
        all_game_modes.extend(game_modes_data)
        
        if len(game_modes_data) < 50:
            break  
        else:
            params["offset"] += 50
    else:
        print("Failed to retrieve data:", response.status_code)
        break


all_game_modes = all_game_modes[:cap_game_modes]
game_modes_df = pd.DataFrame(all_game_modes)
game_modes_df = game_modes_df[['name', 'id']]

In [22]:
game_modes_response = []

for index, row in game_modes_df.iterrows():
    game_modes_dict = {'id': row['id'], 'name': row['name']}
    game_modes_response.append(game_modes_dict)

game_modes_response

[{'id': 1, 'name': 'Single player'},
 {'id': 2, 'name': 'Multiplayer'},
 {'id': 3, 'name': 'Co-operative'},
 {'id': 4, 'name': 'Split screen'},
 {'id': 5, 'name': 'Massively Multiplayer Online (MMO)'},
 {'id': 6, 'name': 'Battle Royale'}]

In [23]:
import ast

df['genres'] = df['genres'].apply(ast.literal_eval) 
# type in df was object, so had to convert to list

genre_mapping = {genre['id']: genre['name'] for genre in genres_response} 
# creates relationship between id number and name

df['genres'] = df['genres'].apply(lambda x: [genre_mapping[genre_id] for genre_id in x]) 
# replaces numbers in the 'genres' column with genre names, applying to each list using lambda function
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['genres'] = df['genres'].apply(ast.literal_eval)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['genres'] = df['genres'].apply(lambda x: [genre_mapping[genre_id] for genre_id in x])


Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,[46],[Visual Novel],[1],https://www.igdb.com/games/maji-kyun-renaissance
1,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,[6],[Shooter],"[2, 3]",https://www.igdb.com/games/soldier-front-2
2,Bubble Whirl Shooter,Shoot bubbles and match colors to pop your way...,"[34, 39]","[Puzzle, Arcade]",[1],https://www.igdb.com/games/bubble-whirl-shooter
3,Blood Bowl 3: Black Orcs Edition,Fashion is not exactly a priority for Black Or...,"[49, 169]",[Strategy],"[1, 2]",https://www.igdb.com/games/blood-bowl-3-black-...
4,Pet Puzzle,Pet Puzzle is a relaxing and leisurely three-i...,[6],[Simulator],[1],https://www.igdb.com/games/pet-puzzle
...,...,...,...,...,...,...
111590,.T.E.S.T: Expected Behaviour,.T.E.S.T: Expected Behavior is a 3D first-pers...,"[6, 14, 48, 49, 130]","[Puzzle, Simulator, Adventure, Indie]",[1],https://www.igdb.com/games/dot-t-e-dot-s-t-exp...
111591,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","[6, 48, 130]","[Shooter, Adventure, Indie, Arcade]",[1],https://www.igdb.com/games/sol-cresta
111592,Animal Hospital,Young animal lovers are warmly invited to come...,"[48, 49, 130, 167, 169]",[Simulator],[1],https://www.igdb.com/games/animal-hospital--1
111593,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"[5, 9, 64]","[Platform, Hack and slash/Beat 'em up, Adventure]",[1],https://www.igdb.com/games/alex-kidd-in-miracl...


In [24]:
df['platforms'] = df['platforms'].apply(ast.literal_eval)
platform_mapping = {platform['id']: platform['name'] for platform in platforms_response}
df['platforms'] = df['platforms'].apply(lambda x: [platform_mapping[platform_id] for platform_id in x])
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['platforms'] = df['platforms'].apply(ast.literal_eval)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['platforms'] = df['platforms'].apply(lambda x: [platform_mapping[platform_id] for platform_id in x])


Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,[PlayStation Vita],[Visual Novel],[1],https://www.igdb.com/games/maji-kyun-renaissance
1,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,[PC (Microsoft Windows)],[Shooter],"[2, 3]",https://www.igdb.com/games/soldier-front-2
2,Bubble Whirl Shooter,Shoot bubbles and match colors to pop your way...,"[Android, iOS]","[Puzzle, Arcade]",[1],https://www.igdb.com/games/bubble-whirl-shooter
3,Blood Bowl 3: Black Orcs Edition,Fashion is not exactly a priority for Black Or...,"[Xbox One, Xbox Series X|S]",[Strategy],"[1, 2]",https://www.igdb.com/games/blood-bowl-3-black-...
4,Pet Puzzle,Pet Puzzle is a relaxing and leisurely three-i...,[PC (Microsoft Windows)],[Simulator],[1],https://www.igdb.com/games/pet-puzzle
...,...,...,...,...,...,...
111590,.T.E.S.T: Expected Behaviour,.T.E.S.T: Expected Behavior is a 3D first-pers...,"[PC (Microsoft Windows), Mac, PlayStation 4, X...","[Puzzle, Simulator, Adventure, Indie]",[1],https://www.igdb.com/games/dot-t-e-dot-s-t-exp...
111591,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","[PC (Microsoft Windows), PlayStation 4, Ninten...","[Shooter, Adventure, Indie, Arcade]",[1],https://www.igdb.com/games/sol-cresta
111592,Animal Hospital,Young animal lovers are warmly invited to come...,"[PlayStation 4, Xbox One, Nintendo Switch, Pla...",[Simulator],[1],https://www.igdb.com/games/animal-hospital--1
111593,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"[Wii, PlayStation 3, Sega Master System/Mark III]","[Platform, Hack and slash/Beat 'em up, Adventure]",[1],https://www.igdb.com/games/alex-kidd-in-miracl...


In [25]:
df['game_modes'] = df['game_modes'].apply(ast.literal_eval)
game_mode_mapping = {mode['id']: mode['name'] for mode in game_modes_response}
df['game_modes'] = df['game_modes'].apply(lambda x: [game_mode_mapping[mode_id] for mode_id in x])
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['game_modes'] = df['game_modes'].apply(ast.literal_eval)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['game_modes'] = df['game_modes'].apply(lambda x: [game_mode_mapping[mode_id] for mode_id in x])


Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,[PlayStation Vita],[Visual Novel],[Single player],https://www.igdb.com/games/maji-kyun-renaissance
1,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,[PC (Microsoft Windows)],[Shooter],"[Multiplayer, Co-operative]",https://www.igdb.com/games/soldier-front-2
2,Bubble Whirl Shooter,Shoot bubbles and match colors to pop your way...,"[Android, iOS]","[Puzzle, Arcade]",[Single player],https://www.igdb.com/games/bubble-whirl-shooter
3,Blood Bowl 3: Black Orcs Edition,Fashion is not exactly a priority for Black Or...,"[Xbox One, Xbox Series X|S]",[Strategy],"[Single player, Multiplayer]",https://www.igdb.com/games/blood-bowl-3-black-...
4,Pet Puzzle,Pet Puzzle is a relaxing and leisurely three-i...,[PC (Microsoft Windows)],[Simulator],[Single player],https://www.igdb.com/games/pet-puzzle
...,...,...,...,...,...,...
111590,.T.E.S.T: Expected Behaviour,.T.E.S.T: Expected Behavior is a 3D first-pers...,"[PC (Microsoft Windows), Mac, PlayStation 4, X...","[Puzzle, Simulator, Adventure, Indie]",[Single player],https://www.igdb.com/games/dot-t-e-dot-s-t-exp...
111591,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","[PC (Microsoft Windows), PlayStation 4, Ninten...","[Shooter, Adventure, Indie, Arcade]",[Single player],https://www.igdb.com/games/sol-cresta
111592,Animal Hospital,Young animal lovers are warmly invited to come...,"[PlayStation 4, Xbox One, Nintendo Switch, Pla...",[Simulator],[Single player],https://www.igdb.com/games/animal-hospital--1
111593,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"[Wii, PlayStation 3, Sega Master System/Mark III]","[Platform, Hack and slash/Beat 'em up, Adventure]",[Single player],https://www.igdb.com/games/alex-kidd-in-miracl...


In [26]:
df['platforms'] = df['platforms'].apply(lambda x: ', '.join(x))
df['genres'] = df['genres'].apply(lambda x: ', '.join(x))
df['game_modes'] = df['game_modes'].apply(lambda x: ', '.join(x))
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['platforms'] = df['platforms'].apply(lambda x: ', '.join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['genres'] = df['genres'].apply(lambda x: ', '.join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['game_modes'] = df['game_modes'].apply(lambda x: ', '.join(x))


Unnamed: 0,name,summary,platforms,genres,game_modes,url
0,Maji Kyun! Renaissance,A cross media collaboration project between Su...,PlayStation Vita,Visual Novel,Single player,https://www.igdb.com/games/maji-kyun-renaissance
1,Soldier Front 2,A free-to-play multiplayer FPS currently in cl...,PC (Microsoft Windows),Shooter,"Multiplayer, Co-operative",https://www.igdb.com/games/soldier-front-2
2,Bubble Whirl Shooter,Shoot bubbles and match colors to pop your way...,"Android, iOS","Puzzle, Arcade",Single player,https://www.igdb.com/games/bubble-whirl-shooter
3,Blood Bowl 3: Black Orcs Edition,Fashion is not exactly a priority for Black Or...,"Xbox One, Xbox Series X|S",Strategy,"Single player, Multiplayer",https://www.igdb.com/games/blood-bowl-3-black-...
4,Pet Puzzle,Pet Puzzle is a relaxing and leisurely three-i...,PC (Microsoft Windows),Simulator,Single player,https://www.igdb.com/games/pet-puzzle
...,...,...,...,...,...,...
111590,.T.E.S.T: Expected Behaviour,.T.E.S.T: Expected Behavior is a 3D first-pers...,"PC (Microsoft Windows), Mac, PlayStation 4, Xb...","Puzzle, Simulator, Adventure, Indie",Single player,https://www.igdb.com/games/dot-t-e-dot-s-t-exp...
111591,Sol Cresta,"DOCK, SPLIT, and FORM UP!\nPilot three ships a...","PC (Microsoft Windows), PlayStation 4, Nintend...","Shooter, Adventure, Indie, Arcade",Single player,https://www.igdb.com/games/sol-cresta
111592,Animal Hospital,Young animal lovers are warmly invited to come...,"PlayStation 4, Xbox One, Nintendo Switch, Play...",Simulator,Single player,https://www.igdb.com/games/animal-hospital--1
111593,Alex Kidd in Miracle World,Journey through the planet Aries to the beauti...,"Wii, PlayStation 3, Sega Master System/Mark III","Platform, Hack and slash/Beat 'em up, Adventure",Single player,https://www.igdb.com/games/alex-kidd-in-miracl...


Finally, there were some big name games in the data that had their own rows for their own season. Dropping the rows really had an impacting on the recommendations, so I could not do that. I had to change the names of these rows to the names of the original game. I also changed the link, so that if it is recommended, they will be directed to the main game as well. 

In [27]:
result = df[df['name'].str.contains('Rocket League')]
df.loc[result.index, 'name'] = result['name'].str.split(':').str[0]
df.loc[result.index, 'url'] = 'https://www.igdb.com/games/rocket-league'
df[df['name'] == 'Rocket League']

Unnamed: 0,name,summary,platforms,genres,game_modes,url
3003,Rocket League,Season 5 is the nineteenth competitive season ...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
7158,Rocket League,"Welcome, esteemed Rocketeers! Season 7 feature...","Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
7757,Rocket League,Season 4 is the eighteenth competitive season ...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
18688,Rocket League,It's time to take it outside!\nFar from the br...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
42355,Rocket League,Rocket League equips players with booster-rigg...,"PlayStation 4, Xbox One, Nintendo Switch","Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
47522,Rocket League,Welcome to the animated world of Rocket League...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
49800,Rocket League,Rocket League is a high-powered hybrid of arca...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
56885,Rocket League,Turn it up! Rocket League Season 2 is ready to...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
59312,Rocket League,Expand your Rocket League customization option...,"Linux, PC (Microsoft Windows), Mac, PlayStation 4","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league
75201,Rocket League,The Season 1 Series include more than a dozen ...,"Linux, PC (Microsoft Windows), Mac, PlayStatio...","Racing, Sport, Indie","Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/rocket-league


In [28]:
result1 = df[df['name'].str.contains('Fortnite')]
df.loc[result1.index, 'name'] = result1['name'].str.split(':').str[0]
df.loc[result1.index, 'url'] = 'https://www.igdb.com/games/fortnite'
df[df['name'] == 'Fortnite']

Unnamed: 0,name,summary,platforms,genres,game_modes,url
3023,Fortnite,"Chapter 2: Season 8, is the eighteenth season ...","PC (Microsoft Windows), PlayStation 4, Xbox On...","Shooter, Role-playing (RPG), Strategy","Multiplayer, Co-operative, Battle Royale",https://www.igdb.com/games/fortnite
6427,Fortnite,Explore the Fortilla and build your own umbrel...,"PC (Microsoft Windows), PlayStation 4, Xbox One",Shooter,"Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/fortnite
7681,Fortnite,Become everyone’s nemesis with The Last Laugh ...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Single player, Multiplayer",https://www.igdb.com/games/fortnite
8594,Fortnite,The volcano has erupted and forever changed so...,"PC (Microsoft Windows), PlayStation 4, Xbox One",Shooter,"Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/fortnite
9797,Fortnite,Season 8 has arrived and a monstrous volcano h...,"PC (Microsoft Windows), PlayStation 4, Xbox One",Shooter,"Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/fortnite
11897,Fortnite,Are you daring enough to enter the Wailing Woo...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/fortnite
11937,Fortnite,"Chapter 2: Season 7, is the seventeenth season...","PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Multiplayer, Co-operative, Battle Royale",https://www.igdb.com/games/fortnite
12370,Fortnite,"Drop into Season 5, where we’re introducing al...","PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/fortnite
12784,Fortnite,Season 4 is starting off with a BANG. Shards o...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Single player, Multiplayer, Co-operative, Spli...",https://www.igdb.com/games/fortnite
20844,Fortnite,Find out what Fortnite Wilds is all about when...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,Battle Royale,https://www.igdb.com/games/fortnite


In [30]:
result2 = df[df['name'].str.contains('Overwatch 2')]
df.loc[result2.index, 'name'] = result2['name'].str.split(':').str[0]
df.loc[result2.index, 'url'] = 'https://www.igdb.com/games/overwatch-2'
df[df['name'] == 'Overwatch 2']

Unnamed: 0,name,summary,platforms,genres,game_modes,url
13093,Overwatch 2,Overwatch 2 Season 9: Champions launches Feb 1...,"PC (Microsoft Windows), PlayStation 4, Xbox On...","Shooter, Strategy",Multiplayer,https://www.igdb.com/games/overwatch-2
17384,Overwatch 2,Overwatch 2 Season 8: Call of the Hunt adds th...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,Multiplayer,https://www.igdb.com/games/overwatch-2
17386,Overwatch 2,Embark on a mythic adventure!\n\nJoin Tracer a...,"PC (Microsoft Windows), PlayStation 4, Xbox On...","Shooter, Strategy",Multiplayer,https://www.igdb.com/games/overwatch-2
17429,Overwatch 2,Overwatch 2’s first tempo tank Ramattra joins ...,"PC (Microsoft Windows), PlayStation 4, Xbox On...","Shooter, Strategy","Single player, Multiplayer, Co-operative",https://www.igdb.com/games/overwatch-2
17463,Overwatch 2,"Get ready for the spookiest event of the year,...","PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,Multiplayer,https://www.igdb.com/games/overwatch-2
20791,Overwatch 2,Blossom onto the battlefield with our newest S...,"PC (Microsoft Windows), PlayStation 4, Xbox On...","Shooter, Strategy","Single player, Multiplayer",https://www.igdb.com/games/overwatch-2
27797,Overwatch 2,Drop into season three to explore the depths o...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Single player, Multiplayer, Co-operative",https://www.igdb.com/games/overwatch-2
30161,Overwatch 2,Overwatch 2 is a free-to-play shooter featurin...,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Shooter,"Multiplayer, Co-operative",https://www.igdb.com/games/overwatch-2
30223,Overwatch 2,"A totally real, totally non-canon dating sim h...",Web browser,"Simulator, Visual Novel",Single player,https://www.igdb.com/games/overwatch-2
65376,Overwatch 2,"Introducing Overwatch 2: Invasion, an all-new ...","PC (Microsoft Windows), PlayStation 4, Xbox On...","Shooter, Strategy",Multiplayer,https://www.igdb.com/games/overwatch-2


In [34]:
df.to_csv('games_clean.csv', index=False)