### Import Library

In [None]:
import pandas as pd
import numpy as np
import ast

### Load Dataset

In [None]:
data = pd.read_csv("steam_games.csv", delimiter=",")
df = data.copy()

df

### Gather Information

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

### Cleaning Data

#### Drop Unnecessary Column

In [None]:
# drop columns
cols_drop=["n_achievements", "additional_content", "required_age"]
df.drop(columns=[col for col in cols_drop if col in df.columns ], inplace=True)

#### Clean Date Column

In [None]:
pd.options.display.max_rows=None
display(df["release_date"].value_counts())

In [None]:
# Replace value "Not Released" to NaT (Not a Time)
df["release_date"] = df["release_date"].replace("Not Released", pd.NaT)

In [None]:
# checking it
df["release_date"].unique()

In [None]:
# remove time in date value
df["release_date"] = df["release_date"].str.split().str[0]

In [None]:
# checking it
display(df["release_date"].unique())

In [None]:
# convert "release_date" to datetime
df["release_date"] = pd.to_datetime(df["release_date"], errors="coerce")

In [None]:
# checking it
df["release_date"].unique()

#### Clean Categories Column

In [None]:
df["categories"].unique()

In [None]:
type(df["categories"][0])

In [None]:
# Convert string to list for next step 
df["categories"] = df["categories"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Function for set values based on "categories"
# Single-player = 2
# Multi-player = 3
# Single-player & Multi-player = 4
# Else = 1
def assign_mode_value(categories):
    categories_set = set(categories)
    if "Single-player" in categories_set and "Multi-player" in categories_set:
        return 4
    elif "Single-player" in categories_set:
        return 2
    elif "Multi-player" in categories_set:
        return 3
    else:
        return 1

df["game_mode_value"] = df["categories"].apply(assign_mode_value)

In [None]:
# checking it
df["game_mode_value"].value_counts()

#### Clean Genres Column

In [None]:
df["genres"].unique()

In [None]:
# function for get first element with checking
def get_first_genres(x):
    if isinstance(x, str):
        try:
            genres_list = ast.literal_eval(x) # convert str to list
            if isinstance(genres_list, list) and len (genres_list) > 0:
                return genres_list[0] # get first elemen if exist
        except:
            return None # If parsing is fail, return None
    return None # if str, return None

df["first_genres"] = df["genres"].apply(get_first_genres)

df.head(10)

In [None]:
df["first_genres"].unique()

In [None]:
df.drop(columns=["genres"], inplace=True )