In [5]:
import pandas as pd

df = pd.read_csv('data/bgg_dataset.csv', sep=';')
COLUMN_MAPPING = {
    "ID": "id",
    "Name": "name",
    "Year Published": "year_published",
    "Min Players": "min_players",
    "Max Players": "max_players",
    "Play Time": "play_time",
    "Min Age": "min_age",
    "Users Rated": "users_rated",
    "Rating Average": "rating_average",
    "BGG Rank": "bgg_rank",
    "Complexity Average": "complexity_average",
    "Owned Users": "owned_users",
    "Mechanics": "mechanics",
    "Domains": "domains"
}
    
# Clean data
# Rename Columns to reflect python naming
df = df.rename(columns=COLUMN_MAPPING)
# Adjust European numbering (with commas) with US decimal numbers
df['rating_average'] = df['rating_average'].str.replace(',','.').astype(float)
df['complexity_average'] = df['complexity_average'].str.replace(',','.').astype(float)
# Remove Duplicates
df.fillna({'domains':'missing'}, inplace=True)
df.fillna({'mechanics':'missing'}, inplace=True)
df.fillna({'owned_users':'none'}, inplace = True)
# Create new column that is a list of mechanics
df["mechanics_list"]=df["mechanics"].str.split(',')
# df.loc[df["mechanics_list"] == ["missing"], "mechanics_list"] = [[]]
# Years should be integers - not floats
df["year_published"]=df["year_published"].fillna(1900).astype(int)
df.drop(columns=["id"], inplace=True)


In [17]:
df["mechanics_list"] = df["mechanics_list"].apply(lambda x: [] if x == ["missing"] else x)


In [12]:
df.iloc[[1,2,3,4]]

Unnamed: 0,name,year_published,min_players,max_players,play_time,min_age,users_rated,rating_average,bgg_rank,complexity_average,owned_users,mechanics,domains,mechanics_list
1,Pandemic Legacy: Season 1,2015,2,4,60,13,41643,8.61,2,2.84,65294.0,"Action Points, Cooperative Game, Hand Manageme...","Strategy Games, Thematic Games","[Action Points, Cooperative Game, Hand Manag..."
2,Brass: Birmingham,2018,2,4,120,14,19217,8.66,3,3.91,28785.0,"Hand Management, Income, Loans, Market, Networ...",Strategy Games,"[Hand Management, Income, Loans, Market, N..."
3,Terraforming Mars,2016,1,5,120,12,64864,8.43,4,3.24,87099.0,"Card Drafting, Drafting, End Game Bonuses, Han...",Strategy Games,"[Card Drafting, Drafting, End Game Bonuses, ..."
4,Twilight Imperium: Fourth Edition,2017,3,6,480,14,13468,8.7,5,4.22,16831.0,"Action Drafting, Area Majority / Influence, Ar...","Strategy Games, Thematic Games","[Action Drafting, Area Majority / Influence, ..."


In [10]:
df.head()

Unnamed: 0,name,year_published,min_players,max_players,play_time,min_age,users_rated,rating_average,bgg_rank,complexity_average,owned_users,mechanics,domains,mechanics_list
0,Gloomhaven,2017,1,4,120,14,42055,8.79,1,3.86,68323.0,"Action Queue, Action Retrieval, Campaign / Bat...","Strategy Games, Thematic Games","[Action Queue, Action Retrieval, Campaign / ..."
1,Pandemic Legacy: Season 1,2015,2,4,60,13,41643,8.61,2,2.84,65294.0,"Action Points, Cooperative Game, Hand Manageme...","Strategy Games, Thematic Games","[Action Points, Cooperative Game, Hand Manag..."
2,Brass: Birmingham,2018,2,4,120,14,19217,8.66,3,3.91,28785.0,"Hand Management, Income, Loans, Market, Networ...",Strategy Games,"[Hand Management, Income, Loans, Market, N..."
3,Terraforming Mars,2016,1,5,120,12,64864,8.43,4,3.24,87099.0,"Card Drafting, Drafting, End Game Bonuses, Han...",Strategy Games,"[Card Drafting, Drafting, End Game Bonuses, ..."
4,Twilight Imperium: Fourth Edition,2017,3,6,480,14,13468,8.7,5,4.22,16831.0,"Action Drafting, Area Majority / Influence, Ar...","Strategy Games, Thematic Games","[Action Drafting, Area Majority / Influence, ..."
