# EDA - SteamDB

https://github.com/leinstay/steamdb

## Import Modules

In [1]:
## general imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from IPython.display import display
import statsmodels.formula.api as smf


## Set Options

In [2]:
pd.set_option('display.max_rows', 50) # display more rows
pd.set_option('display.max_columns', 50) # display more columns
pd.set_option('display.float_format', '{:.2f}'.format) # display numbers as decimals

## Load Data

In [3]:
df = pd.read_json(r'../../data/steamdb.json')

## Data - First Look

In [4]:
display(df.shape)

(53981, 46)

In [5]:
print(df.columns)

Index(['sid', 'store_url', 'store_promo_url', 'store_uscore',
       'published_store', 'published_meta', 'published_stsp', 'published_hltb',
       'published_igdb', 'image', 'name', 'description', 'full_price',
       'current_price', 'discount', 'platforms', 'developers', 'publishers',
       'languages', 'voiceovers', 'categories', 'genres', 'tags',
       'achievements', 'gfq_url', 'gfq_difficulty', 'gfq_difficulty_comment',
       'gfq_rating', 'gfq_rating_comment', 'gfq_length', 'gfq_length_comment',
       'stsp_owners', 'stsp_mdntime', 'hltb_url', 'hltb_single',
       'hltb_complete', 'meta_url', 'meta_score', 'meta_uscore', 'grnk_score',
       'igdb_url', 'igdb_single', 'igdb_complete', 'igdb_score', 'igdb_uscore',
       'igdb_popularity'],
      dtype='object')


In [6]:
display(df.head(10))

Unnamed: 0,sid,store_url,store_promo_url,store_uscore,published_store,published_meta,published_stsp,published_hltb,published_igdb,image,name,description,full_price,current_price,discount,platforms,developers,publishers,languages,voiceovers,categories,genres,tags,achievements,gfq_url,gfq_difficulty,gfq_difficulty_comment,gfq_rating,gfq_rating_comment,gfq_length,gfq_length_comment,stsp_owners,stsp_mdntime,hltb_url,hltb_single,hltb_complete,meta_url,meta_score,meta_uscore,grnk_score,igdb_url,igdb_single,igdb_complete,igdb_score,igdb_uscore,igdb_popularity
0,10,https://store.steampowered.com/app/10,https://www.youtube.com/watch?v=oKC9SAF4JAc,97.0,2000-11-01,2000-11-08,2000-11-01,1999-06-12,1999-06-12,https://steamcdn-a.akamaihd.net/steam/apps/10/...,Counter-Strike,Play the world's number 1 online action game. ...,999.0,999.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain,...","English,French,German,Italian,Spanish - Spain,...","Multi-player,PvP,Online PvP,Shared/Split Scree...",Action,"Action,FPS,Multiplayer,Shooter,Classic,Team-Ba...",,https://gamefaqs.gamespot.com/pc/429818-counte...,Just Right-Tough,"<a href=""/games/rankings?platform=19&amp;genre...",3.9,"<a href=""/games/rankings?platform=19&amp;genre...",64.5,"<a href=""/games/rankings?platform=19&amp;genre...",15000000.0,200.0,https://howlongtobeat.com/game?id=1953,,,https://www.metacritic.com/game/pc/counter-str...,88.0,92.0,,https://www.igdb.com/games/counter-strike,,,70.0,83.0,25.74
1,20,https://store.steampowered.com/app/20,,84.0,1999-04-01,1999-04-07,1999-04-01,1999-04-07,1999-04-07,https://steamcdn-a.akamaihd.net/steam/apps/20/...,Team Fortress Classic,One of the most popular online action games of...,499.0,499.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain,...",,"Multi-player,PvP,Online PvP,Shared/Split Scree...",Action,"Action,FPS,Multiplayer,Classic,Shooter,Class-B...",,https://gamefaqs.gamespot.com/pc/562917-team-f...,Just Right-Tough,"<a href=""/games/rankings?platform=19&amp;genre...",3.47,"<a href=""/games/rankings?platform=19&amp;genre...",50.6,"<a href=""/games/rankings?platform=19&amp;genre...",3500000.0,20.0,https://howlongtobeat.com/game?id=9634,,,https://www.metacritic.com/game/pc/team-fortre...,,71.0,,https://www.igdb.com/games/team-fortress-classic,,,,70.0,1.67
2,30,https://store.steampowered.com/app/30,https://www.youtube.com/watch?v=j4MCo89bTWE,90.0,2003-05-01,2003-05-06,2003-05-01,2000-08-04,2003-05-01,https://steamcdn-a.akamaihd.net/steam/apps/30/...,Day of Defeat,Enlist in an intense brand of Axis vs. Allied ...,499.0,499.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain",,"Multi-player,Valve Anti-Cheat enabled",Action,"FPS,World War II,Multiplayer,Action,Shooter,Wa...",,https://gamefaqs.gamespot.com/pc/562741-day-of...,Just Right-Tough,"<a href=""/games/rankings?platform=19&amp;genre...",3.69,"<a href=""/games/rankings?platform=19&amp;genre...",53.1,"<a href=""/games/rankings?platform=19&amp;genre...",7500000.0,28.0,https://howlongtobeat.com/game?id=2282,,,https://www.metacritic.com/game/pc/day-of-defe...,79.0,91.0,,https://www.igdb.com/games/day-of-defeat,,,71.0,76.0,1.45
3,40,https://store.steampowered.com/app/40,https://www.youtube.com/watch?v=jN_18uhiMS8,82.0,2001-06-01,2001-06-07,2001-06-01,2001-06-01,2001-07-01,https://steamcdn-a.akamaihd.net/steam/apps/40/...,Deathmatch Classic,Enjoy fast-paced multiplayer gaming with Death...,499.0,499.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain,...",,"Multi-player,PvP,Online PvP,Shared/Split Scree...",Action,"Action,FPS,Multiplayer,Classic,Shooter,First-P...",,https://gamefaqs.gamespot.com/pc/637185-deathm...,Just Right-Tough,"<a href=""/games/rankings?platform=19&amp;genre...",3.15,"<a href=""/games/rankings?platform=19&amp;genre...",2.9,"<a href=""/games/rankings?platform=19&amp;genre...",7500000.0,11.0,https://howlongtobeat.com/game?id=2373,,,https://www.metacritic.com/game/pc/deathmatch-...,,68.0,,https://www.igdb.com/games/deathmatch-classic,,,,75.0,1.0
4,50,https://store.steampowered.com/app/50,https://www.youtube.com/watch?v=ZNZsWm-Ulk4,95.0,1999-11-01,1999-10-31,1999-11-01,1999-11-01,1999-11-10,https://steamcdn-a.akamaihd.net/steam/apps/50/...,Half-Life: Opposing Force,Return to the Black Mesa Research Facility as ...,499.0,499.0,,"WIN,MAC,LNX",Gearbox Software,Valve,"English,French,German,Korean",,"Single-player,Multi-player,Valve Anti-Cheat en...",Action,"FPS,Action,Sci-fi,Singleplayer,Classic,Shooter...",,https://gamefaqs.gamespot.com/pc/149551-half-l...,Just Right,"<a href=""/games/rankings?platform=19&amp;genre...",3.88,"<a href=""/games/rankings?platform=19&amp;genre...",10.7,"<a href=""/games/rankings?platform=19&amp;genre...",7500000.0,235.0,https://howlongtobeat.com/game?id=4256,5.0,7.0,https://www.metacritic.com/game/pc/half-life-o...,,86.0,,https://www.igdb.com/games/half-life-opposing-...,6.0,4.0,70.0,82.0,3.45
5,60,https://store.steampowered.com/app/60,https://www.youtube.com/watch?v=0NxAdFF0Pv0,81.0,2000-11-01,2000-11-01,2000-11-01,2000-11-01,2000-11-01,https://steamcdn-a.akamaihd.net/steam/apps/60/...,Ricochet,A futuristic action game that challenges your ...,499.0,499.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain,...",,"Multi-player,PvP,Online PvP,Valve Anti-Cheat e...",Action,"Action,FPS,Multiplayer,First-Person,Classic,Cy...",,https://gamefaqs.gamespot.com/pc/582495-ricochet,Just Right,"<a href=""/games/rankings?platform=19&amp;genre...",2.65,"<a href=""/games/rankings?platform=19&amp;genre...",14.5,"<a href=""/games/rankings?platform=19&amp;genre...",3500000.0,4.0,https://howlongtobeat.com/game?id=7799,,,https://www.metacritic.com/game/pc/ricochet,,68.0,,https://www.igdb.com/games/ricochet,,,,72.0,2.56
6,70,https://store.steampowered.com/app/70,https://www.youtube.com/watch?v=qobDF0w5qJc,96.0,1998-11-08,1998-11-19,1998-11-08,1998-11-19,1998-11-27,https://steamcdn-a.akamaihd.net/steam/apps/70/...,Half-Life,Named Game of the Year by over 50 publications...,999.0,999.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain,...","English,French,German,Italian,Spanish - Spain,...","Single-player,Multi-player,PvP,Online PvP,Stea...",Action,"FPS,Classic,Action,Sci-fi,Singleplayer,Shooter...",,https://gamefaqs.gamespot.com/pc/43362-half-li...,Just Right-Tough,"<a href=""/games/rankings?platform=19&amp;genre...",4.23,"<a href=""/games/rankings?platform=19&amp;genre...",21.1,"<a href=""/games/rankings?platform=19&amp;genre...",7500000.0,152.0,https://howlongtobeat.com/game?id=4247,12.0,15.0,https://www.metacritic.com/game/pc/half-life?f...,96.0,90.0,,https://www.igdb.com/games/half-life,12.0,15.0,80.0,90.0,14.82
7,80,https://store.steampowered.com/app/80,https://www.youtube.com/watch?v=cEoXkWMRD2I,90.0,2004-03-01,2004-01-03,2004-03-01,2004-03-23,2004-03-23,https://steamcdn-a.akamaihd.net/steam/apps/80/...,Counter-Strike: Condition Zero,"With its extensive Tour of Duty campaign, a ne...",999.0,999.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Spanish - Spain,...",,"Single-player,Multi-player,Valve Anti-Cheat en...",Action,"Action,FPS,Shooter,Multiplayer,Singleplayer,Ta...",,https://gamefaqs.gamespot.com/pc/533806-counte...,Just Right,"<a href=""/games/rankings?platform=19&amp;genre...",3.5,"<a href=""/games/rankings?platform=19&amp;genre...",36.2,"<a href=""/games/rankings?platform=19&amp;genre...",15000000.0,40.0,https://howlongtobeat.com/game?id=1955,,,https://www.metacritic.com/game/pc/counter-str...,65.0,87.0,,https://www.igdb.com/games/counter-strike-cond...,,,66.0,75.0,4.12
8,130,https://store.steampowered.com/app/130,https://www.youtube.com/watch?v=vdJwW_0X0Ug,90.0,2001-06-01,2001-06-11,2001-06-01,2001-06-12,2001-06-12,https://steamcdn-a.akamaihd.net/steam/apps/130...,Half-Life: Blue Shift,Made by Gearbox Software and originally releas...,499.0,499.0,,"WIN,MAC,LNX",Gearbox Software,Valve,"English,French,German",,"Single-player,Remote Play Together",Action,"FPS,Action,Sci-fi,Singleplayer,Shooter,Aliens,...",,https://gamefaqs.gamespot.com/pc/475304-half-l...,Just Right,"<a href=""/games/rankings?platform=19&amp;genre...",3.55,"<a href=""/games/rankings?platform=19&amp;genre...",8.5,"<a href=""/games/rankings?platform=19&amp;genre...",7500000.0,148.0,https://howlongtobeat.com/game?id=4254,3.0,3.0,https://www.metacritic.com/game/pc/half-life-b...,71.0,82.0,,https://www.igdb.com/games/half-life-blue-shift,4.0,2.0,60.0,72.0,3.67
9,220,https://store.steampowered.com/app/220,https://www.youtube.com/watch?v=ID1dWN3n7q4,97.0,2004-11-16,2004-11-16,2004-11-16,2004-11-16,2004-11-16,https://steamcdn-a.akamaihd.net/steam/apps/220...,Half-Life 2,1998. HALF-LIFE sends a shock through the game...,999.0,999.0,,"WIN,MAC,LNX",Valve,Valve,"English,French,German,Italian,Korean,Spanish -...","English,French,German,Italian,Korean,Spanish -...","Single-player,Steam Achievements,Steam Trading...",Action,"FPS,Action,Sci-fi,Singleplayer,Classic,Story R...",33.0,https://gamefaqs.gamespot.com/pc/914642-half-l...,Just Right,"<a href=""/games/rankings?platform=19&amp;genre...",4.4,"<a href=""/games/rankings?platform=19&amp;genre...",21.8,"<a href=""/games/rankings?platform=19&amp;genre...",15000000.0,396.0,https://howlongtobeat.com/game?id=4248,13.0,19.0,https://www.metacritic.com/game/pc/half-life-2...,96.0,91.0,,https://www.igdb.com/games/half-life-2,14.0,28.0,91.0,91.0,23.74


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53981 entries, 0 to 53980
Data columns (total 46 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   sid                     53981 non-null  int64  
 1   store_url               53981 non-null  object 
 2   store_promo_url         7546 non-null   object 
 3   store_uscore            33462 non-null  float64
 4   published_store         53831 non-null  object 
 5   published_meta          33603 non-null  object 
 6   published_stsp          19616 non-null  object 
 7   published_hltb          25831 non-null  object 
 8   published_igdb          20965 non-null  object 
 9   image                   53981 non-null  object 
 10  name                    53981 non-null  object 
 11  description             53932 non-null  object 
 12  full_price              46817 non-null  float64
 13  current_price           46817 non-null  float64
 14  discount                6529 non-null 

In [8]:
df.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
sid,53981.0,,,,959052.05,470186.77,10.0,577760.0,938680.0,1316230.0,1979280.0
store_url,53981.0,53981.0,https://store.steampowered.com/app/10,1.0,,,,,,,
store_promo_url,7546.0,7189.0,https://www.youtube.com/watch?v=,6.0,,,,,,,
store_uscore,33462.0,,,,70.45,19.69,1.0,57.0,75.0,86.0,100.0
published_store,53831.0,3763.0,2020-01-17,87.0,,,,,,,
published_meta,33603.0,4155.0,2020-01-17,79.0,,,,,,,
published_stsp,19616.0,3152.0,2016-04-05,57.0,,,,,,,
published_hltb,25831.0,4808.0,2020-04-18,207.0,,,,,,,
published_igdb,20965.0,3845.0,2018-12-31,147.0,,,,,,,
image,53981.0,53981.0,https://steamcdn-a.akamaihd.net/steam/apps/10/...,1.0,,,,,,,


In [9]:
list(df['genres'].unique())

['Action',
 'Action,Free to Play',
 'Action,Free to Play,Strategy',
 'Action,Adventure',
 'Indie',
 'Indie,Strategy',
 'Strategy',
 'RPG',
 'Action,RPG',
 'Action,Indie,RPG',
 'Casual,Indie',
 'Indie,RPG',
 'Simulation,Strategy',
 'Action,Indie,Strategy',
 'Action,Simulation,Strategy',
 'Action,Simulation',
 'Racing',
 'RPG,Strategy',
 'Adventure,Casual',
 'Casual',
 'Casual,Strategy',
 'Action,Casual',
 'Indie,Simulation',
 'Casual,Strategy,Indie',
 'Action,Indie',
 'Action,Strategy',
 'Adventure',
 'Action,Adventure,Racing,Simulation,Strategy',
 'Adventure,RPG',
 'Action,Adventure,RPG',
 'Action,Free to Play,Massively Multiplayer',
 'Indie,Simulation,Strategy',
 'Nudity,Violent,Action,Adventure,RPG',
 'Action,Free to Play,Massively Multiplayer,RPG,Strategy',
 'Racing,Simulation,Sports',
 'Action,Adventure,Indie',
 'Simulation',
 'Free to Play,Massively Multiplayer,RPG',
 'Sports',
 'Casual,Indie,Racing',
 'Action,Casual,Indie',
 'Adventure,Indie',
 'Action,Racing',
 'Action,Free to P

## Data Formatting

### Datetime conversion

In [10]:
df['published_store'] = pd.to_datetime(df['published_store'])
df['published_meta'] = pd.to_datetime(df['published_meta'])
df['published_stsp'] = pd.to_datetime(df['published_stsp'])
df['published_hltb'] = pd.to_datetime(df['published_hltb'])
df['published_igdb'] = pd.to_datetime(df['published_igdb'])

### Genre

In [11]:
# genres
# split strings in genre column
df['genres'] = df['genres'].apply(lambda x: x.split(','))

In [12]:
df.loc[50000, 'genres']

['Casual', 'Indie', 'Strategy']

In [13]:
# replace genres
df['genres'] = df['genres'].apply(lambda genres: list(set(['Indie' if genre == 'Инди' else genre for genre in genres])))
df['genres'] = df['genres'].apply(lambda genres: list(set(['Adventure' if genre == 'Приключенческие игры' else genre for genre in genres])))

## Data Cleaning

In [14]:
# drop columns not needed for analyses
df.drop(['sid', 'store_url', 'store_promo_url', 'published_meta', 'published_stsp', 'published_hltb',
'published_igdb', 'image', 'current_price', 'discount',
'gfq_url', 'gfq_difficulty_comment', 'gfq_rating_comment', 'gfq_length_comment',
'hltb_url', 'meta_url', 'igdb_url'], axis=1, inplace=True)

In [15]:
## missing data 1: languages and voiceover

## If language or voiceover is missing, set to "One_unknown"
df.loc[df["languages"].isna(), "languages"] = "One_unknown"
df.loc[df["voiceovers"].isna(), "voiceovers"] = "One_unknown"

In [16]:
## delete games without English as language:
count_no_en = 0

for x in df.index:
    if "english" not in df.loc[x,"languages"].lower():
        count_no_en += 1
        df = df.drop(labels=x, axis=0)
print(f"No English: {count_no_en}")

No English: 1931


In [17]:
## use only number of languages and voiceovers
df["languages"] = df["languages"].apply(lambda value: len(value.split(",")))
df["voiceovers"] = df["voiceovers"].apply(lambda value: len(value.split(",")))

display(df["languages"].unique())

array([ 8,  9,  5,  4,  3, 18, 12, 11,  1, 17, 26, 19, 22, 28,  2,  6, 10,
       25,  7, 15, 13, 14, 16, 27, 24, 20, 21, 23, 29], dtype=int64)

In [18]:
## missing data 2: delete columns with high percentage of missing values

## check missing values
display(df.isna().sum())

store_uscore       19433
published_store      141
name                   0
description           36
full_price          6733
platforms              0
developers            92
publishers             0
languages              0
voiceovers             0
categories            63
genres                 0
tags               31902
achievements       23905
gfq_difficulty     39746
gfq_rating         38616
gfq_length         41380
stsp_owners        17519
stsp_mdntime       42863
hltb_single        37464
hltb_complete      39640
meta_score         48354
meta_uscore        46046
grnk_score         46923
igdb_single        51814
igdb_complete      51860
igdb_score         47551
igdb_uscore        39485
igdb_popularity    31195
dtype: int64

In [20]:
## drop columns with more than 75% missing data:
for col in df.columns:
    if df[col].isna().sum() > df.shape[0]*0.75:
        df.drop(col, axis=1, inplace=True)

display(df.isna().sum())

store_uscore       19433
published_store      141
name                   0
description           36
full_price          6733
platforms              0
developers            92
publishers             0
languages              0
voiceovers             0
categories            63
genres                 0
tags               31902
achievements       23905
gfq_rating         38616
stsp_owners        17519
hltb_single        37464
igdb_popularity    31195
dtype: int64

In [21]:
## df=df.dropna(how="any")
display(df.shape)

display(df)

df.dtypes

(52050, 18)

Unnamed: 0,store_uscore,published_store,name,description,full_price,platforms,developers,publishers,languages,voiceovers,categories,genres,tags,achievements,gfq_rating,stsp_owners,hltb_single,igdb_popularity
0,97.00,2000-11-01,Counter-Strike,Play the world's number 1 online action game. ...,999.00,"WIN,MAC,LNX",Valve,Valve,8,8,"Multi-player,PvP,Online PvP,Shared/Split Scree...",[Action],"Action,FPS,Multiplayer,Shooter,Classic,Team-Ba...",,3.90,15000000.00,,25.74
1,84.00,1999-04-01,Team Fortress Classic,One of the most popular online action games of...,499.00,"WIN,MAC,LNX",Valve,Valve,9,1,"Multi-player,PvP,Online PvP,Shared/Split Scree...",[Action],"Action,FPS,Multiplayer,Classic,Shooter,Class-B...",,3.47,3500000.00,,1.67
2,90.00,2003-05-01,Day of Defeat,Enlist in an intense brand of Axis vs. Allied ...,499.00,"WIN,MAC,LNX",Valve,Valve,5,1,"Multi-player,Valve Anti-Cheat enabled",[Action],"FPS,World War II,Multiplayer,Action,Shooter,Wa...",,3.69,7500000.00,,1.45
3,82.00,2001-06-01,Deathmatch Classic,Enjoy fast-paced multiplayer gaming with Death...,499.00,"WIN,MAC,LNX",Valve,Valve,9,1,"Multi-player,PvP,Online PvP,Shared/Split Scree...",[Action],"Action,FPS,Multiplayer,Classic,Shooter,First-P...",,3.15,7500000.00,,1.00
4,95.00,1999-11-01,Half-Life: Opposing Force,Return to the Black Mesa Research Facility as ...,499.00,"WIN,MAC,LNX",Gearbox Software,Valve,4,1,"Single-player,Multi-player,Valve Anti-Cheat en...",[Action],"FPS,Action,Sci-fi,Singleplayer,Classic,Shooter...",,3.88,7500000.00,5.00,3.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53976,,2022-04-22,Kuggs,Kuggs is a fast-paced shooter with some light ...,199.00,WIN,Forrest Powell,Self,1,1,Single-player,[Adventure],,,,,,
53977,,2022-04-22,Whispered Secrets: Tying the Knot Collector's ...,Grandma Studios invites you on the latest chil...,1399.00,WIN,GrandMa Studios,Big Fish Games,1,1,Single-player,"[Casual, Adventure]",,,,,,
53978,,2022-04-21,Grey Eyes of Death,Embark on a deadly journey for survival. Your ...,599.00,"WIN,MAC,LNX",Hosted Games,Hosted Games,1,1,"Single-player,Steam Achievements,Steam Cloud","[Adventure, Indie, RPG]",,30.00,,,,
53979,,2022-04-24,Memory Puzzle - Futanari Threesome,"Train your brain with this memory game, reveal...",199.00,WIN,EroticGamesClub,EroticGamesClub,1,1,"Single-player,Steam Achievements,Steam Leaderb...","[Casual, Indie]",,3.00,,,,


store_uscore              float64
published_store    datetime64[ns]
name                       object
description                object
full_price                float64
platforms                  object
developers                 object
publishers                 object
languages                   int64
voiceovers                  int64
categories                 object
genres                     object
tags                       object
achievements              float64
gfq_rating                float64
stsp_owners               float64
hltb_single               float64
igdb_popularity           float64
dtype: object

In [23]:
## One-Hot Encoding
all_genres = list(set(g for genres in df['genres'] for g in genres))
one_hot_df = pd.DataFrame(0, index=df.index, columns=all_genres)
for i, genres in enumerate(df['genres']):
    one_hot_df.loc[i, genres] = 1
df = df.drop(columns=['genres']).join(one_hot_df)

In [24]:
## Rename columns
df.rename(columns={'Game Development':'Game_Development',
'Free to Play':'Free_to_Play',
'Massively Multiplayer':'Massively_Multiplayer',
'Early Access':'Early_Access',
'Sexual Content':'Sexual_Content'}, inplace=True)

In [25]:
## Platforms
## split strings in platforms
df['platforms'] = df['platforms'].apply(lambda x: x.split(','))

In [27]:
## One-Hot Encoding
all_genres = list(set(g for genres in df['platforms'] for g in genres))
one_hot_df = pd.DataFrame(0, index=df.index, columns=all_genres)
for i, genres in enumerate(df['platforms']):
    one_hot_df.loc[i, genres] = 1
df = df.drop(columns=['platforms']).join(one_hot_df)

## Feature Engineering

In [28]:
## publish date as timedelta
df["published_store_timedelta"] = pd.to_datetime(df["published_store"]) - pd.Timestamp(1997, 1, 1)
df["published_store_timedelta"] = df["published_store_timedelta"].apply(lambda value: value.days)
