# ProcessGameMetadata
1. clean the metadata from steam games json file
2. add bundle data into games metadata as column

In [1]:
import pandas as pd
import numpy as np
import json

## Clean steam games metadata

In [2]:
games_metadata = pd.read_json('./json_data/steam_games_fixed.json')

In [3]:
games_metadata.shape

(32135, 16)

In [4]:
games_metadata = games_metadata.dropna(subset=["id", "app_name"])

In [5]:
games_metadata = games_metadata.drop_duplicates(["id"])

In [9]:
list(games_metadata.price)

[4.99,
 'Free To Play',
 'Free to Play',
 0.99,
 2.99,
 3.99,
 9.99,
 18.99,
 29.99,
 nan,
 nan,
 'Free',
 10.99,
 3.99,
 2.99,
 1.5899999999999999,
 14.99,
 1.99,
 59.99,
 4.99,
 2.99,
 0.99,
 0.99,
 2.99,
 2.99,
 4.99,
 4.99,
 9.99,
 9.99,
 1.99,
 3.99,
 nan,
 nan,
 0.99,
 nan,
 nan,
 nan,
 8.99,
 0.99,
 6.99,
 6.99,
 9.99,
 nan,
 nan,
 nan,
 nan,
 7.99,
 nan,
 10.99,
 9.99,
 9.99,
 29.99,
 1.99,
 nan,
 0.99,
 9.99,
 9.99,
 9.99,
 9.99,
 39.99,
 'Free',
 4.99,
 4.99,
 4.99,
 19.99,
 9.99,
 7.49,
 14.99,
 9.99,
 4.99,
 9.99,
 4.99,
 9.99,
 9.99,
 4.99,
 4.99,
 4.99,
 8.99,
 19.99,
 19.99,
 4.99,
 nan,
 19.99,
 4.99,
 14.99,
 4.99,
 4.99,
 4.99,
 2.99,
 2.99,
 9.99,
 2.99,
 4.99,
 4.99,
 12.99,
 19.99,
 12.99,
 12.99,
 9.99,
 4.99,
 6.99,
 nan,
 nan,
 9.99,
 9.99,
 9.99,
 6.99,
 9.99,
 19.99,
 9.99,
 9.99,
 9.99,
 9.99,
 9.99,
 5.99,
 7.99,
 9.99,
 9.99,
 4.99,
 2.99,
 19.99,
 2.99,
 14.99,
 9.99,
 4.99,
 9.99,
 9.99,
 9.99,
 9.99,
 9.99,
 9.99,
 4.99,
 9.99,
 9.99,
 9.99,
 9.99,
 9.99

In [10]:
# Change price of free and demo games to 0
free_or_demo = games_metadata['price'].astype(str).str.contains("Free|free|Demo|demo")
games_metadata.loc[free_or_demo, 'price'] = 0

In [12]:
# Coerce price to numeric, this will change non numeric values to 0
games_metadata.loc[:,'price'] = pd.to_numeric(games_metadata.price, errors='coerce') 

## Bundle data: create item-bundle mapping and add to games metadata

In [21]:
with open('./json_data/bundle_data_fixed.json') as f:
    df_items = json.load(f)   
    bundle_df = pd.json_normalize(data=df_items, 
                              record_path='items',
                              meta=['bundle_final_price', 'bundle_url', 'bundle_price', 'bundle_name', 'bundle_id', 'bundle_discount'])

In [28]:
bundle_df.shape

(3500, 11)

In [23]:
bundle_df.drop_duplicates(subset=["item_id", "bundle_id"], inplace=True)

In [30]:
# convert item id to numeric, results will be float though item id is integer, coerce includes nan so can only be float) which is corresponding to the item id in games metadata
bundle_df.item_id = pd.to_numeric(bundle_df.item_id, errors='coerce') 

In [31]:
bundle_df.dropna(subset=["item_id"], inplace=True)

In [32]:
bundle_df.item_id = bundle_df.item_id.astype(int)
bundle_df

Unnamed: 0,genre,item_id,discounted_price,item_url,item_name,bundle_final_price,bundle_url,bundle_price,bundle_name,bundle_id,bundle_discount
0,"Adventure, Indie, RPG",326950,$8.99,http://store.steampowered.com/app/326950,Sword of Asumi,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%
1,"Adventure, Indie, RPG",331490,$2.99,http://store.steampowered.com/app/331490,Sword of Asumi - Soundtrack,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%
2,"Adventure, Indie, RPG",331491,$1.99,http://store.steampowered.com/app/331491,Sword of Asumi - Graphic Novel,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%
3,"Adventure, Indie, RPG",331492,$0.99,http://store.steampowered.com/app/331492,Sword of Asumi - Character Creator,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%
4,"Adventure, Casual, Indie",348540,$12.99,http://store.steampowered.com/app/348540,Divine Slice of Life,$66.46,http://store.steampowered.com/bundle/450/?utm_...,$73.86,Dharker Studio 2015 Complete,450,10%
...,...,...,...,...,...,...,...,...,...,...,...
3520,"Action, Adventure, Casual, Indie",467220,$4.99,http://store.steampowered.com/app/467220,Dyna Bomb,$5.58,http://store.steampowered.com/bundle/588/?utm_...,$6.98,Dyna Bomb - Game + OST Pack #1,588,20%
3521,"Action, Adventure, Casual, Indie",485090,$1.99,http://store.steampowered.com/app/485090,Dyna Bomb - Soundtrack OST,$5.58,http://store.steampowered.com/bundle/588/?utm_...,$6.98,Dyna Bomb - Game + OST Pack #1,588,20%
3522,"Action, Adventure, Indie",385230,$1.99,http://store.steampowered.com/app/385230,Ninjahtic,$4.77,http://store.steampowered.com/bundle/594/?utm_...,$5.97,The Ninjahtic Series,594,20%
3523,"Action, Adventure, Indie",387880,$1.99,http://store.steampowered.com/app/387880,Ninjahtic Mind Tricks,$4.77,http://store.steampowered.com/bundle/594/?utm_...,$5.97,The Ninjahtic Series,594,20%


In [33]:
bundle_itemid_df = bundle_df[["item_id", "bundle_name"]].groupby('bundle_name')['item_id'].apply(list).reset_index(name='items')

In [34]:
bundle_itemid_df

Unnamed: 0,bundle_name,items
0,"""Bye-Bye, Wacky Planet"": Shoot and Dance Edition","[492130, 509410]"
1,.EXE - Game + OST,"[471640, 489990]"
2,100% Orange Juice - All Stars Collection,"[282800, 327340, 334080, 338360, 376200, 38879..."
3,10tons Shooters,"[262830, 428750]"
4,10tons Total,"[262830, 370550, 394290, 428750, 525450]"
...,...,...
607,Zonitron Productions Mixed Pack,"[368900, 410590, 431270, 432150]"
608,Zotrix Bundle,"[343280, 351720]"
609,fault complete edition,"[286260, 344770, 408360, 441270]"
610,onechanbara Z2 :Chaos All DLC Bundle,"[464510, 464511, 464512, 464513, 464520, 46452..."


In [35]:
item_bundle_df = bundle_df[["item_id", "bundle_name"]].groupby('item_id')['bundle_name'].apply(list).reset_index(name='bundles')

In [36]:
item_bundle_df

Unnamed: 0,item_id,bundles
0,20,"[Half-Life Complete, Valve Complete Pack, Half..."
1,30,"[Valve Complete Pack, Counter-Strike 1 Anthology]"
2,40,"[Valve Complete Pack, Counter-Strike 1 Anthology]"
3,50,"[Half-Life Complete, Valve Complete Pack, Half..."
4,60,"[Valve Complete Pack, Counter-Strike 1 Anthology]"
...,...,...
2782,528389,"[Naruto Shippuden Uncut Season 4, Vol. 4]"
2783,528390,"[Naruto Shippuden Uncut Season 4, Vol. 4]"
2784,528391,"[Naruto Shippuden Uncut Season 4, Vol. 4]"
2785,528392,"[Naruto Shippuden Uncut Season 4, Vol. 4]"


## Merge bundle info into metadata

In [214]:
games_metadata = games_metadata.merge(item_bundle_df, how='left', left_on='id', right_on='item_id')

In [217]:
games_metadata

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore,item_id,bundles
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140.0,Kotoshiro,,,,
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",0.00,False,643980.0,Secret Level SRL,Mostly Positive,,,
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",0.00,False,670290.0,Poolians.com,Mostly Positive,,,
3,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",0.83,http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400.0,彼岸领域,,,,
4,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"[Action, Indie, Casual, Sports]",1.79,http://steamcommunity.com/app/773570/reviews/?...,"[Single-player, Full controller support, HTC V...",2.99,False,773570.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32126,Ghost_RUS Games,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,Colony On Mars,http://store.steampowered.com/app/773640/Colon...,2018-01-04,"[Strategy, Indie, Casual, Simulation]",1.49,http://steamcommunity.com/app/773640/reviews/?...,"[Single-player, Steam Achievements]",1.99,False,773640.0,"Nikita ""Ghost_RUS""",,,,
32127,Sacada,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,LOGistICAL: South Africa,http://store.steampowered.com/app/733530/LOGis...,2018-01-04,"[Strategy, Indie, Casual]",4.24,http://steamcommunity.com/app/733530/reviews/?...,"[Single-player, Steam Achievements, Steam Clou...",4.99,False,733530.0,Sacada,,,,
32128,Laush Studio,"[Indie, Racing, Simulation]",Russian Roads,Russian Roads,http://store.steampowered.com/app/610660/Russi...,2018-01-04,"[Indie, Simulation, Racing]",1.39,http://steamcommunity.com/app/610660/reviews/?...,"[Single-player, Steam Achievements, Steam Trad...",1.99,False,610660.0,Laush Dmitriy Sergeevich,,,,
32129,SIXNAILS,"[Casual, Indie]",EXIT 2 - Directions,EXIT 2 - Directions,http://store.steampowered.com/app/658870/EXIT_...,2017-09-02,"[Indie, Casual, Puzzle, Singleplayer, Atmosphe...",,http://steamcommunity.com/app/658870/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",4.99,False,658870.0,"xropi,stev3ns",1 user reviews,,,


In [218]:
games_metadata.to_csv("games_metadata.csv", index=False)

### Create a full list of games id to name pair for games in both user-items and games metadata