# EDA of Merged Steam Data

## Preliminary

### Import Modules

In [1]:
import sys
sys.path.append('../../') # add path of project root directory to sys.path

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import utilities

### Set Display Options

In [2]:
pd.set_option('display.max_rows', 70) # display more rows
pd.set_option('display.max_columns', 50) # display more columns
pd.set_option('display.float_format', '{:.2f}'.format) # display numbers as decimals

## Load Data

In [3]:
df_steamdb = pd.read_json(r'../../data/steamdb.json')
df_game_data = pd.read_csv(r'../../data/game_data_all.csv', index_col=0)

## Merge Data

In [4]:
df = utilities.URLMerge(df_steamdb, 'store_url', df_game_data, 'link')

## Check Datatypes

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52560 entries, 0 to 52559
Data columns (total 66 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   sid                     52560 non-null  int64  
 1   store_url               52560 non-null  object 
 2   store_promo_url         7353 non-null   object 
 3   store_uscore            33350 non-null  float64
 4   published_store         52470 non-null  object 
 5   published_meta          32834 non-null  object 
 6   published_stsp          19209 non-null  object 
 7   published_hltb          25798 non-null  object 
 8   published_igdb          20666 non-null  object 
 9   image                   52560 non-null  object 
 10  name                    52560 non-null  object 
 11  description             52529 non-null  object 
 12  full_price              45460 non-null  float64
 13  current_price           45460 non-null  float64
 14  discount                6465 non-null 

In [6]:
df.head(3).T

Unnamed: 0,0,1,2
sid,360,380,400
store_url,https://store.steampowered.com/app/360,https://store.steampowered.com/app/380,https://store.steampowered.com/app/400
store_promo_url,,https://www.youtube.com/watch?v=DL_mPw7KEU8,https://www.youtube.com/watch?v=nA9ChSA6wV4
store_uscore,76.00,95.00,98.00
published_store,2006-05-01,2006-06-01,2007-10-10
published_meta,2006-05-01,2006-06-01,2007-10-10
published_stsp,2006-05-01,2006-06-01,2007-10-10
published_hltb,2005-07-02,2006-06-01,2007-10-09
published_igdb,2006-05-01,2006-06-01,2007-10-09
image,https://steamcdn-a.akamaihd.net/steam/apps/360...,https://steamcdn-a.akamaihd.net/steam/apps/380...,https://steamcdn-a.akamaihd.net/steam/apps/400...


## Check for Missing values

### Published Store

In [7]:
df[df['published_store'].isna()]

Unnamed: 0,sid,store_url,store_promo_url,store_uscore,published_store,published_meta,published_stsp,published_hltb,published_igdb,image,name,description,full_price,current_price,discount,platforms,developers,publishers,languages,voiceovers,categories,genres,tags,achievements,gfq_url,...,igdb_single,igdb_complete,igdb_score,igdb_uscore,igdb_popularity,merge_col,game,link,release,peak_players,positive_reviews,negative_reviews,total_reviews,rating,primary_genre,store_genres,publisher,developer,detected_technologies,store_asset_mod_time,review_percentage,players_right_now,24_hour_peak,all_time_peak,all_time_peak_date
554,29160,https://store.steampowered.com/app/29160,https://www.youtube.com/watch?v=AOcSUDVsdu4,77.00,,2009-06-10,2009-06-01,2009-06-10,2009-06-10,https://steamcdn-a.akamaihd.net/steam/apps/291...,Blueberry Garden,Congratulations Blueberry Garden! Winner of t...,499.00,499.00,,"WIN,MAC,LNX",Erik Svedäng,Erik Svedäng,English,,Single-player,"Adventure,Indie","Adventure,Indie,Short,Puzzle",,https://gamefaqs.gamespot.com/pc/960934-bluebe...,...,,,80.00,60.00,1.00,29160,Blueberry Garden,/app/29160/,2009-06-10,14,114,34,148,71.03,Indie (23),"Adventure (25), Indie (23)",Erik Svedäng,Erik Svedäng,Engine.FNA,,77.00,0,1,14,2012-09-01
562,31210,https://store.steampowered.com/app/31210,,88.00,,2010-02-01,2009-12-30,,,https://steamcdn-a.akamaihd.net/steam/apps/312...,Tales of Monkey Island Complete Pack: Chapter ...,About Chapter One<br>\r\n\t\t\t\t\tThe saga be...,,,,WIN,Telltale Games,Telltale Games,English,,Single-player,Adventure,Adventure,,,...,,,,,,31210,Tales of Monkey Island: Chapter 5 - Rise of th...,/app/31210/,2009-12-09,194,115,14,129,80.10,Adventure (25),"Adventure (25), RPG (3)",Telltale,Telltale,Engine.TelltaleTool; SDK.FMOD,2023-06-13,88.00,0,5,194,2009-12-12
563,31270,https://store.steampowered.com/app/31270,https://www.youtube.com/watch?v=XtbMF5pHOEs,88.00,,2010-06-30,2010-07-01,2010-06-30,2010-06-04,https://steamcdn-a.akamaihd.net/steam/apps/312...,Puzzle Agent,When White House inquiries to the Scoggins Era...,499.00,499.00,,"WIN,MAC","LCG Entertainment, Inc.","Athlon Games, Inc.",English,,Single-player,"Action,Adventure,Casual","Puzzle,Adventure,Point & Click,Detective,Myste...",,https://gamefaqs.gamespot.com/pc/995862-puzzle...,...,,5.00,,61.00,1.45,31270,Puzzle Agent,/app/31270/,2010-07-07,228,2129,282,2411,84.63,Adventure (25),"Action (1), Adventure (25), Casual (4)",Telltale,Telltale,Engine.TelltaleTool; SDK.FMOD,2023-06-13,88.00,1,10,228,2013-06-01
604,32740,https://store.steampowered.com/app/32740,,81.00,,1998-06-23,2009-06-01,1998-09-07,,https://steamcdn-a.akamaihd.net/steam/apps/327...,MiG-29 Fulcrum,The Mikoyan-Gurevich MiG-29 Fulcrum is one of ...,999.00,999.00,,WIN,NovaLogic,"NovaLogic,THQ Nordic",English,,"Single-player,Multi-player",Simulation,"Simulation,Flight",,https://gamefaqs.gamespot.com/pc/915731-mig-29...,...,,,,,,32740,MiG-29 Fulcrum,/app/32740/,2009-06-18,6,68,11,79,76.43,Simulation (28),Simulation (28),NovaLogic,NovaLogic,,2019-07-15,86.00,0,2,6,2020-04-12
689,37700,https://store.steampowered.com/app/37700,,71.00,,2009-09-07,2009-09-01,2009-09-08,2009-09-07,https://steamcdn-a.akamaihd.net/steam/apps/377...,Darkest of Days,<p>Have you ever wondered what could happen if...,1999.00,1999.00,,WIN,8monkey Labs,Phantom EFX,"English,French,German,Italian,Russian,Spanish ...",,"Single-player,Partial Controller Support",Action,"Action,FPS,Time Travel,Historical,World War I,...",,https://gamefaqs.gamespot.com/pc/941526-darkes...,...,,,64.00,69.00,1.00,37700,Darkest of Days,/app/37700/,2010-02-08,177,230,74,304,71.07,Action (1),Action (1),Phantom EFX,8monkey Labs,SDK.Bink_Video; SDK.NVIDIA_APEX; SDK.NVIDIA_Ph...,,75.00,0,2,177,2009-09-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50763,1841630,https://store.steampowered.com/app/1841630,,,,,,,,https://cdn.akamai.steamstatic.com/steam/apps/...,Armor Clash 1 Remake [RTS],Armor Clash 1 Remake is an old-school modern w...,1499.00,1499.00,,WIN,Windforce,Windforce,English,English,Single-player,"Indie,Simulation,Strategy,Early Access",,,,...,,,,,,1841630,Armor Clash 2022,/app/1841630/,2022-06-06,4,8,13,21,42.79,Strategy (2),"Action (1), Indie (23), Simulation (28), Strat...",Windforce,Windforce,Engine.Unity; SDK.UnityBurst; SDK.UnityHDRP; S...,2022-09-24,38.00,0,0,4,2022-06-09
51779,1896750,https://store.steampowered.com/app/1896750,,,,,,2016-06-17,,https://cdn.akamai.steamstatic.com/steam/apps/...,Cursed,"In a small unnamed rural town, a girl has been...",,,,WIN,Disaster Squad Productions,Disaster Squad Productions,English,,Single-player,"Action,Adventure,Indie",,,,...,,,,,,1896750,Cursed,/app/1896750/,2022-02-18,10,69,9,78,78.14,Adventure (25),"Action (1), Adventure (25), Free to Play (37)",Llama Games,Llama Games,Engine.Unity; SDK.cURL,2022-12-20,51.00,0,2,23,2023-01-10
51780,1896750,https://store.steampowered.com/app/1896750,,,,,,2016-06-17,,https://cdn.akamai.steamstatic.com/steam/apps/...,Cursed,"In a small unnamed rural town, a girl has been...",,,,WIN,Disaster Squad Productions,Disaster Squad Productions,English,,Single-player,"Action,Adventure,Indie",,,,...,,,,,,1896750,Cursed,/app/1896750/,2022-02-18,10,69,9,78,78.14,Adventure (25),"Action (1), Adventure (25), Indie (23)",Disaster Squad Productions,Disaster Squad Productions,,2022-02-04,87.00,0,2,10,2022-11-12
51781,1896750,https://store.steampowered.com/app/1896750,,,,,,2016-06-17,,https://cdn.akamai.steamstatic.com/steam/apps/...,Cursed,"In a small unnamed rural town, a girl has been...",,,,WIN,Disaster Squad Productions,Disaster Squad Productions,English,,Single-player,"Action,Adventure,Indie",,,,...,,,,,,1896750,Cursed,/app/1896750/,2022-02-18,10,69,9,78,78.14,Adventure (25),"Adventure (25), Indie (23)",Jetdogs Studios,Jetdogs Studios,SDK.BASS,2016-06-06,80.00,1,2,211,2016-08-01


In [8]:
df.head().T

Unnamed: 0,0,1,2,3,4
sid,360,380,400,420,440
store_url,https://store.steampowered.com/app/360,https://store.steampowered.com/app/380,https://store.steampowered.com/app/400,https://store.steampowered.com/app/420,https://store.steampowered.com/app/440
store_promo_url,,https://www.youtube.com/watch?v=DL_mPw7KEU8,https://www.youtube.com/watch?v=nA9ChSA6wV4,https://www.youtube.com/watch?v=n0l5N6Exjz0,https://www.youtube.com/watch?v=C4cfo0f88Ug
store_uscore,76.00,95.00,98.00,96.00,94.00
published_store,2006-05-01,2006-06-01,2007-10-10,2007-10-10,2007-10-10
published_meta,2006-05-01,2006-06-01,2007-10-10,2007-10-10,2007-10-10
published_stsp,2006-05-01,2006-06-01,2007-10-10,2007-10-10,2007-10-10
published_hltb,2005-07-02,2006-06-01,2007-10-09,2007-10-10,2007-10-10
published_igdb,2006-05-01,2006-06-01,2007-10-09,2007-10-09,2007-10-09
image,https://steamcdn-a.akamaihd.net/steam/apps/360...,https://steamcdn-a.akamaihd.net/steam/apps/380...,https://steamcdn-a.akamaihd.net/steam/apps/400...,https://steamcdn-a.akamaihd.net/steam/apps/420...,https://steamcdn-a.akamaihd.net/steam/apps/440...


## Format Data

In [9]:
df = utilities.SetDatetimeCols(df)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52560 entries, 0 to 52559
Data columns (total 66 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   sid                     52560 non-null  int64         
 1   store_url               52560 non-null  object        
 2   store_promo_url         7353 non-null   object        
 3   store_uscore            33350 non-null  float64       
 4   published_store         52470 non-null  datetime64[ns]
 5   published_meta          32834 non-null  datetime64[ns]
 6   published_stsp          19209 non-null  datetime64[ns]
 7   published_hltb          25798 non-null  datetime64[ns]
 8   published_igdb          20666 non-null  datetime64[ns]
 9   image                   52560 non-null  object        
 10  name                    52560 non-null  object        
 11  description             52529 non-null  object        
 12  full_price              45460 non-null  float6

In [11]:
df.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,min,25%,50%,75%,max,std
sid,52560.0,,,,959760.86,360.00,580002.50,936775.00,1312745.00,1979280.00,464752.18
store_url,52560.0,51697.0,https://store.steampowered.com/app/871870,6.0,,,,,,,
store_promo_url,7353.0,6914.0,https://www.youtube.com/watch?v=VoyzGIZ6xLY,16.0,,,,,,,
store_uscore,33350.0,,,,70.26,1.00,56.00,75.00,86.00,100.00,19.76
published_store,52470.0,,,,2018-11-22 04:13:41.680960512,2004-03-17 00:00:00,2017-07-30 06:00:00,2019-03-30 12:00:00,2020-10-15 00:00:00,2022-04-27 00:00:00,
published_meta,32834.0,,,,2017-01-19 22:06:19.362855168,1979-02-01 00:00:00,2016-05-03 06:00:00,2017-11-24 00:00:00,2019-01-25 00:00:00,2020-10-01 00:00:00,
published_stsp,19209.0,,,,2015-11-06 08:37:46.937372928,1970-07-04 00:00:00,2015-06-25 00:00:00,2016-12-05 00:00:00,2017-10-30 00:00:00,2019-11-16 00:00:00,
published_hltb,25798.0,,,,2016-02-24 07:04:16.484998912,1976-04-01 00:00:00,2015-07-31 00:00:00,2017-06-13 00:00:00,2019-01-24 00:00:00,2022-06-10 00:00:00,
published_igdb,20666.0,,,,2015-12-17 17:03:31.477789696,1982-12-31 00:00:00,2015-06-05 00:00:00,2016-11-18 00:00:00,2017-10-24 00:00:00,2029-12-31 00:00:00,
image,52560.0,51697.0,https://steamcdn-a.akamaihd.net/steam/apps/871...,6.0,,,,,,,


In [12]:
df['release_delta'] = (df['release'] - df['published_store']).dt.days

In [13]:
df['release_delta'].describe()

count   52470.00
mean       18.95
std       144.84
min     -5052.00
25%         0.00
50%         0.00
75%         0.00
max      3197.00
Name: release_delta, dtype: float64

In [14]:
df[df['release_delta'] != 0].sort_values('release_delta')[['name', 'release', 'published_store', 'published_meta', 'release_delta']]

Unnamed: 0,name,release,published_store,published_meta,release_delta
42453,Stubbs the Zombie in Rebel Without a Pulse,2007-05-17,2021-03-16,NaT,-5052.00
42454,Second Sight,2009-02-13,2021-04-08,NaT,-4437.00
303,Grand Theft Auto IV: The Complete Edition,2008-12-02,2020-03-24,2008-12-02,-4130.00
750,Serious Sam Classic: The First Encounter,2010-03-22,2019-08-30,2001-03-21,-3448.00
751,Serious Sam Classic: The Second Encounter,2010-09-22,2019-08-30,2002-02-04,-3264.00
...,...,...,...,...,...
50763,Armor Clash 1 Remake [RTS],2022-06-06,NaT,NaT,
51779,Cursed,2022-02-18,NaT,NaT,
51780,Cursed,2022-02-18,NaT,NaT,
51781,Cursed,2022-02-18,NaT,NaT,


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52560 entries, 0 to 52559
Data columns (total 67 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   sid                     52560 non-null  int64         
 1   store_url               52560 non-null  object        
 2   store_promo_url         7353 non-null   object        
 3   store_uscore            33350 non-null  float64       
 4   published_store         52470 non-null  datetime64[ns]
 5   published_meta          32834 non-null  datetime64[ns]
 6   published_stsp          19209 non-null  datetime64[ns]
 7   published_hltb          25798 non-null  datetime64[ns]
 8   published_igdb          20666 non-null  datetime64[ns]
 9   image                   52560 non-null  object        
 10  name                    52560 non-null  object        
 11  description             52529 non-null  object        
 12  full_price              45460 non-null  float6