# Precomputing the Game Values
This notebook contains all code that was used to pre-compute the game values for the benchmark games in `shapiq`.

In [2]:
# general imports used for different games
import os
import sys
import random

import numpy as np
import pandas as pd

from shapiq.games.benchmark import pre_compute_and_store, pre_compute_and_store_from_list, get_game_files
from shapiq.games import Game

## Local Explanation Games
This section showcases how the local explanation games can be pre-computed. The following games are pre-computed:
- `AdultCensusLocalXAI`
- `CaliforniaHousingLocalXAI`
- `BikeSharingLocalXAI`
- `SentimentAnalysisLocalXAI`

### SentimentAnalysis Game (Language Model Examples)

In [7]:
from shapiq.games.benchmark import SentimentAnalysisLocalXAI


def pre_compute_imdb(n_games: int, n_players: int, n_jobs: int = 1) -> None:
    """Loads the IMDB dataset and pre-computes the values for the sentiment analysis game.

    Args:
        n_games: The number of games to pre-compute the values for.
        n_players: The number of players in the game.
        n_jobs: The number of parallel jobs to run. Default is 1.
    """

    # load the IMDB dataset
    imdb_data = pd.read_csv(os.path.join("data", "simplified_imdb.csv"))
    imdb_data = imdb_data[imdb_data["length"] == n_players]

    # make text column into unique identifier by taking the first letter of each word
    imdb_data["game_id"] = imdb_data["text"].apply(
        lambda x: "".join([word[0] for word in x.split()])
    )

    # read the games that have already been pre-computed
    all_game_files = get_game_files(SentimentAnalysisLocalXAI, n_players=n_players)
    # get game_ids from the files
    all_game_ids = set([file.split(".")[0] for file in all_game_files])
    print(f"Found {len(all_game_ids)} games precomputed.")

    # get the games that have not been pre-computed
    imdb_data = imdb_data[~imdb_data["game_id"].isin(all_game_ids)]

    # sample random games
    imdb_data = imdb_data.sample(n=n_games)

    # get the games
    games, game_ids = [], []
    for _, row in imdb_data.iterrows():
        game = SentimentAnalysisLocalXAI(input_text=row["text"], verbose=True)
        games.append(game)
        game_ids.append(row["game_id"])

    # pre-compute the values for the games
    # save_dir = os.path.join("precomputed", "SentimentAnalysis(Game)")
    print(f"Precomputing {n_games} games with {n_players} players.")
    pre_compute_and_store_from_list(games, game_ids=game_ids, n_jobs=n_jobs)

In [3]:
# pre-compute the values for the sentiment analysis game

pre_compute_imdb(n_games=2, n_players=10, n_jobs=1)

Found 8 games precomputed.
Precomputing 2 games with 10 players.


Evaluating game:   0%|          | 0/1024 [00:00<?, ? coalition/s]

Evaluating game:   0%|          | 0/1024 [00:00<?, ? coalition/s]

### Tabular Datasets Games

In [7]:
from shapiq.games.benchmark import AdultCensusLocalXAI, CaliforniaHousingLocalXAI, BikeSharingLocalXAI


def pre_compute_tabular_local_xai(dataset_name: str, model_name: str, n_games: int, n_jobs: int = 1) -> None:
    """Pre-computes the values for the tabular datasets games.

    Args:
        dataset_name: The name of the dataset to pre-compute the values for.
        model_name: The name of the model to use for the game.
        n_games: The number of games to pre-compute the values for.
        n_jobs: The number of parallel jobs to run. Default is 1.
    """

    # load the correct game
    if dataset_name == "adult_census":
        game_class = AdultCensusLocalXAI
    elif dataset_name == "california_housing":
        game_class = CaliforniaHousingLocalXAI
    elif dataset_name == "bike_sharing":
        game_class = BikeSharingLocalXAI
    else:
        raise ValueError(f"Unknown dataset name: {dataset_name}")
    
    setup_game = game_class(model_name=model_name)
    
    # read the games that have already been pre-computed
    all_game_files = get_game_files(setup_game, n_players=setup_game.n_players)
    # get game_ids from the files
    all_game_ids = set([file.split(".")[0] for file in all_game_files])
    print(f"Found {len(all_game_ids)} games precomputed.")
    
    # get n_samples from the test set
    n_test = len(setup_game.setup.x_test)
    
    # get the games
    games, game_ids = [], []
    while len(games) < n_games:
        data_i = random.randint(0, n_test - 1)
        game_id = f"{model_name}_{data_i}"
        if game_id not in all_game_ids:
            game = game_class(model_name=model_name, x=data_i, verbose=False, random_state=42)
            games.append(game)
            game_ids.append(game_id)    

    # pre-compute the values for the games
    print(f"Precomputing {n_games} games for {setup_game.game_name} with {model_name}.")
    pre_compute_and_store_from_list(games, game_ids=game_ids, n_jobs=n_jobs)

In [9]:
# pre-compute the values for the tabular datasets games

DATASET_NAME = "adult_census"
MODEL_NAME = "decision_tree"
N_GAMES = 10
N_JOBS = 1

pre_compute_tabular_local_xai(DATASET_NAME, MODEL_NAME, N_GAMES, N_JOBS)

Trained model decision_tree for the adult_census dataset.
Score on training data: 0.8140203449800973
Found 0 games precomputed.
Precomputing 10 games for AdultCensus_LocalExplanation_Game with decision_tree.


## Global Explanation Games
This section showcases how the global explanation games can be pre-computed. The following games are pre-computed:
- `AdultCensusGlobalXAI`
- `CaliforniaHousingGlobalXAI`
- `BikeSharingGlobalXAI`

In [3]:
from shapiq.games.benchmark import AdultCensusGlobalXAI, CaliforniaHousingGlobalXAI, BikeSharingGlobalXAI

def pre_compute_tabular_global_xai(dataset_name: str, model_name: str, loss_function: str, n_games: int, n_jobs: int = 1) -> None:
    """Pre-computes the values for the tabular datasets games.
    
    Args:
        dataset_name: The name of the dataset to pre-compute the values for.
        model_name: The name of the model to use for the game.
        loss_function: The loss function to use for the game.
        n_games: The number of games to pre-compute the values for.
        n_jobs: The number of parallel jobs to run. Default is 1.
    """
    
    # load the correct game
    if dataset_name == "adult_census":
        game_class = AdultCensusGlobalXAI
    elif dataset_name == "california_housing":
        game_class = CaliforniaHousingGlobalXAI
    elif dataset_name == "bike_sharing":
        game_class = BikeSharingGlobalXAI
    else:
        raise ValueError(f"Unknown dataset name: {dataset_name}")
    
    setup_game = game_class(model_name=model_name, loss_function=loss_function)
    
    # read the games that have already been pre-computed
    all_game_files = get_game_files(setup_game, n_players=setup_game.n_players)
    
    # get game_ids from the files
    all_game_ids = set([file.split(".")[0] for file in all_game_files])
    print(f"Found {len(all_game_ids)} games precomputed.")
    
    # get the games
    games, game_ids = [], []
    while len(games) < n_games:
        random_state = random.randint(0, 1000)
        game_id = f"{model_name}_{loss_function}_{random_state}"
        if game_id not in all_game_ids:
            game = game_class(model_name=model_name, loss_function=loss_function, verbose=False, random_state=random_state)
            games.append(game)
            game_ids.append(game_id)
    
    # pre-compute the values for the games
    print(f"Precomputing {n_games} games for {setup_game.game_name} with {model_name} and {loss_function}.")
    pre_compute_and_store_from_list(games, game_ids=game_ids, n_jobs=n_jobs)

In [6]:
# pre-compute the values for the tabular datasets games

DATASET_NAME = "california_housing"  # "california_housing", "bike_sharing", "adult_census"
MODEL_NAME = "decision_tree"  # "decision_tree", "random_forest", "gradient_boosting"
LOSS_FUNCTION = "r2_score"  # "r2_score", "mean_squared_error", "mean_absolute_error", "log_loss", "accuracy", "f1_score", "roc_auc_score"
N_GAMES = 8
N_JOBS = 1

pre_compute_tabular_global_xai(DATASET_NAME, MODEL_NAME, LOSS_FUNCTION, N_GAMES, N_JOBS)

Trained model decision_tree for the california_housing dataset.
Score on training data: 0.6144619626794067
Found 2 games precomputed.
Precomputing 8 games for CaliforniaHousing_GlobalExplanation_Game with decision_tree and r2_score.
