In [7]:
import pandas as pd

# First dataset
df = pd.read_csv('steam.csv')
df = df.drop(columns = {"appid", "english", "required_age", "genres", "steamspy_tags", "achievements"})

# Rename column name "owners" to "max_owners", "positive_ratings" to "positive_reviews", 
# "negative_ratings" to "negative_reviews"
df.rename(columns = {"owners": "max_owners", "positive_ratings": "positive_reviews",
                    "negative_ratings": "negative_reviews"}, inplace = True)


# Finds the maximum number of owners within the range of the number of owners
for i in df.index:
    # Return the value of max_owners to return the maximum owner, instead of the range of owners
    df.at[i, "max_owners"] = df.at[i, "max_owners"].split("-")[1]
    
    # Replace ; by , and make each platform capitalized
    platforms = df.at[i, "platforms"].split(";")
    platforms = map(lambda x: x.capitalize(), platforms)
    platform_value = ""
    for platform in platforms:
        platform_value += platform + ","
    df.at[i, "platforms"] = platform_value[:-1]
    
    # Replace ; by , and make each categories capitalized
    categories = df.at[i, "categories"].split(";")
    categories = map(lambda x: x.capitalize(), categories)
    category_value = ""
    for category in categories:
        category_value += category + ","
    df.at[i, "categories"] = category_value[:-1]
    
# Second dataset
df2 = pd.read_csv('steam_games.csv')
df2 = df2.drop(columns = {"achievements", "game_description", "mature_content", "minimum_requirements", "recommended_requirements",
                          "discount_price", "url", "types", "desc_snippet", "game_details", "genre", "release_date",
                          "developer", "publisher", "recent_reviews", "all_reviews", "original_price"})

# Merge the first and second datasets, and set the index to the column "name"
df_final = df.merge(df2, on = "name", how = 'inner')
df_final.set_index("name", inplace=True)


# Set target and features where the target is the positive_reviews
target_positive_reviews = df_final["positive_reviews"]
features_positive_reviews = df_final.drop("positive_reviews", axis = 1)

df_final

Unnamed: 0_level_0,release_date,developer,publisher,platforms,categories,positive_reviews,negative_reviews,average_playtime,median_playtime,max_owners,price,popular_tags,languages
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Counter-Strike,2000-11-01,Valve,Valve,"Windows,Mac,Linux","Multi-player,Online multi-player,Local multi-p...",124534,3339,17612,317,20000000,7.19,"Action,FPS,Multiplayer,Shooter,Classic,Team-Ba...","English,French,German,Italian,Spanish - Spain,..."
Team Fortress Classic,1999-04-01,Valve,Valve,"Windows,Mac,Linux","Multi-player,Online multi-player,Local multi-p...",3318,633,277,62,10000000,3.99,"Action,FPS,Multiplayer,Classic,Shooter,Team-Ba...","English,French,German,Italian,Spanish - Spain,..."
Day of Defeat,2003-05-01,Valve,Valve,"Windows,Mac,Linux","Multi-player,Valve anti-cheat enabled",3416,398,187,34,10000000,3.99,"FPS,World War II,Multiplayer,Shooter,Action,Wa...","English,French,German,Italian,Spanish - Spain"
Deathmatch Classic,2001-06-01,Valve,Valve,"Windows,Mac,Linux","Multi-player,Online multi-player,Local multi-p...",1273,267,258,184,10000000,3.99,"Action,FPS,Classic,Multiplayer,Shooter,First-P...","English,French,German,Italian,Spanish - Spain,..."
Half-Life: Opposing Force,1999-11-01,Gearbox Software,Valve,"Windows,Mac,Linux","Single-player,Multi-player,Valve anti-cheat en...",5250,288,624,415,10000000,3.99,"FPS,Action,Classic,Sci-fi,Singleplayer,Shooter...","English,French,German,Korean"
Ricochet,2000-11-01,Valve,Valve,"Windows,Mac,Linux","Multi-player,Online multi-player,Valve anti-ch...",2758,684,175,10,10000000,3.99,"Action,FPS,Multiplayer,Masterpiece,Classic,Fir...","English,French,German,Italian,Spanish - Spain,..."
Half-Life,1998-11-08,Valve,Valve,"Windows,Mac,Linux","Single-player,Multi-player,Online multi-player...",27755,1100,1300,83,10000000,7.19,"FPS,Classic,Action,Sci-fi,Singleplayer,Masterp...","English,French,German,Italian,Spanish - Spain,..."
Half-Life: Blue Shift,2001-06-01,Gearbox Software,Valve,"Windows,Mac,Linux",Single-player,3822,420,361,205,10000000,3.99,"FPS,Action,Sci-fi,Singleplayer,Shooter,Aliens,...","English,French,German"
Half-Life 2,2004-11-16,Valve,Valve,"Windows,Mac,Linux","Single-player,Steam achievements,Steam trading...",67902,2419,691,402,20000000,7.19,"FPS,Action,Sci-fi,Classic,Singleplayer,Masterp...","English,French,German,Italian,Korean,Spanish -..."
Counter-Strike: Source,2004-11-01,Valve,Valve,"Windows,Mac,Linux","Multi-player,Cross-platform multiplayer,Steam ...",76640,3497,6842,400,20000000,7.19,"Shooter,Action,FPS,Multiplayer,Team-Based,Firs...","English,French,German,Italian,Japanese,Korean,..."
