# 1. Install/Import required libraries

In [None]:
#!pip install seaborn

import pandas as pd
import ast
import seaborn as sns

# 2. Import data

In [None]:
# Import cleaned dataset
df = pd.read_csv('../data/cleaned2.csv')
df['categories'] = df['categories']\
    .apply(lambda x: ast.literal_eval(x))
df['mechanics'] = df['mechanics']\
    .apply(lambda x: ast.literal_eval(x))
df.head()

# 3. Investigate Relationship Between Game Categories and Game Mechanics

## 3.1. Plot heatmap of categories vs mechanics

In [None]:
exploded_subset = df[['categories', 'mechanics']]\
    .copy()\
    .explode('categories')\
    .explode('mechanics')\
    .reset_index(drop=True)\
    .dropna()

# Create a matrix with categories on one axis, and mechanics on the other.
# Cell values are the number of occurrences.
exploded_subset['val'] = 1
matrix = exploded_subset.pivot_table(index='categories', columns='mechanics', values='val', aggfunc='sum', fill_value=0)
matrix
sns.heatmap(matrix)

## 3.2. Impose a minimum threshold of 1500

Drop all rows/columns that don't have a max value of 1500+

In [None]:
# Remove categories and mechanics that do not have a max value of at least 1500
threshold = 1500
matrix_filtered = matrix.copy()
for col in matrix_filtered.columns:
    if matrix_filtered[col].max() < threshold:
        matrix_filtered.drop([col], axis='columns', inplace=True)
for row in matrix_filtered.index:
    if matrix_filtered.loc[row].max() < threshold:
        matrix_filtered.drop([row], axis='index', inplace=True)

hm = sns.heatmap(matrix_filtered);
_ = hm.set_xticklabels(hm.get_xticklabels(), rotation=45, ha='right')
