# Recourse Generation

The key point of this library - to implement recourse methods.

## Usage

Here is how to use a RecourseGenerator

In [3]:
# Import necessary components
from rocelib.recourse_methods.BinaryLinearSearch import BinaryLinearSearch
from rocelib.models.pytorch_models.TrainablePyTorchModel import TrainablePyTorchModel
from rocelib.datasets.ExampleDatasets import get_example_dataset
from rocelib.models.sklearn_models.TrainableLogisticRegressionModel import TrainableLogisticRegressionModel
from rocelib.tasks.ClassificationTask import ClassificationTask
from rocelib.recourse_methods.STCE import TrexNN

# Load and preprocess dataset
dl = get_example_dataset("ionosphere")
dl.default_preprocess()

# Load model, note some RecourseGenerators may only work with a certain type of model,
# e.g., MCE only works with a TrainablePyTorchModel
# model = TrainableLogisticRegressionModel()
model = TrainablePyTorchModel(34, [10], 1)
# Create task
model.train(dl)
task = ClassificationTask(model, dl)


In [4]:
# Each RecourseGenerator takes the task on creation, it can also take a custom distance function
#recourse_gen = BinaryLinearSearch(task)
recourse_gen = TrexNN(task)
# Get negative instances, the default column_name is always "target" but you can set it to the
# name of your dataset's target variable
negs = dl.get_negative_instances(neg_value=0, column_name="target")

# You can generate for a set of instances stored in a DataFrame
print(recourse_gen.generate(negs.head(5)))

   feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
1   0.348433        0.0   0.721648  -0.527811   0.634308  -1.037587   
3   0.348433        0.0   0.721648  -1.125172   0.768477   1.921340   
5   0.348433        0.0  -1.243407  -0.114091  -1.349028  -0.511523   
7  -2.869990        0.0  -1.290430  -0.100661  -1.157858  -0.251849   
9   0.348433        0.0  -1.327935  -0.292560  -1.157858  -0.251849   

   feature_6  feature_7  feature_8  feature_9  ...  feature_24  feature_25  \
1  -1.339106  -2.029452   0.964074  -0.469482  ...   -1.037790   -0.383054   
3   0.329433  -2.152585  -1.010873  -0.375331  ...    1.045426    1.926340   
5  -1.133699  -0.456917  -0.720437  -0.237965  ...   -0.624980    0.109965   
7   0.914531  -2.152585  -1.010873  -0.375331  ...    1.045426    2.109592   
9  -1.118190  -0.229536  -0.784346  -0.930218  ...   -0.942070    0.114081   

   feature_26  feature_27  feature_28  feature_29  feature_30  feature_31  \
1   -1.447849   -0.208419  

In [17]:
# You can generate for one instance, that can be a Series
print(recourse_gen.generate_for_instance(negs.iloc[0]))

   feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
1   0.348433        0.0   0.721648  -0.374822   0.668592  -0.754725   

   feature_6  feature_7  feature_8  feature_9  ...  feature_26  feature_27  \
1  -0.727327  -1.443417   0.939151  -0.597236  ...   -1.156499    0.016656   

   feature_28  feature_29  feature_30  feature_31  feature_32  feature_33  \
1   -0.762151    0.021424   -0.716593    0.036511   -0.761113    0.046193   

   target      loss  
1       1  0.058028  

[1 rows x 36 columns]


In [10]:
# You can generate for all negative instances
print(recourse_gen.generate_for_all(neg_value=0, column_name="target"))

     feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
1     0.348433        0.0   0.690526  -0.398488   0.657984  -0.812093   
3     0.348433        0.0   0.503254  -0.649715   0.649199   0.939155   
5     0.348433        0.0  -0.468945  -0.096833  -0.394883  -0.354418   
7    -0.581892        0.0  -0.001999  -0.072066   0.211646  -0.279242   
9     0.348433        0.0  -0.248192  -0.010496  -0.142386   0.058144   
..         ...        ...        ...        ...        ...        ...   
242   0.348433        0.0  -0.278011  -1.062065  -0.902017  -0.241978   
244   0.348433        0.0  -1.074052  -0.052282   0.737076   0.085947   
246   0.348433        0.0  -0.358022  -0.085565  -0.144406  -0.248432   
250   0.348433        0.0  -0.200273   0.409059  -0.290839   0.653651   
252   0.348433        0.0  -0.320657   0.063514  -0.254427   0.117673   

     feature_6  feature_7  feature_8  feature_9  ...  feature_26  feature_27  \
1    -0.693998  -1.507096   0.940103  -0.43

## Implementing your own RecourseGenerator

Here is an example of creating your own RecourseGenerator. Let's make a simple one which gets
n different positive instances and chooses a random one. Let's say it also allows a random seed value.

In [18]:
from rocelib.recourse_methods.RecourseGenerator import RecourseGenerator
import pandas as pd

# Implement the RecourseGenerator class
class RandomCE(RecourseGenerator):

    # You must implement the _generation_method function, this returns the CE for a given
    # instance, if you take any extra arguments make sure to specify them before **kwargs,
    # like we have done for n and seed (they must have some default value)
    def _generation_method(self, instance,
                           column_name="target", neg_value=0, n=50, seed=None, **kwargs):
        # Remember, the RecourseGenerator has access to its Task! Use this to get access to your dataset or model,
        # or to use any of their methods, here we use the ClassificationTask's get_random_positive_instance() method
        pos = pd.concat([self.task.get_random_positive_instance(neg_value=neg_value, column_name=column_name) for _ in range(n)])

        # Depending on whether a seed is provided, we return a random positive - the result must be a DataFrame
        if seed is None:
            return pos.sample(n=1)

        return pos.sample(n=1, random_state=seed)

Within the RecourseGenerator you can access:

- The Task - self.Task
- The DatasetLoader - self.task.training_data
- The TrainableModel - self.task.model

and their respective methods. If your method needs additional arguments, you can put them in the function signature
but do NOT remove any other arguments (including **kwargs). Remember to return a DataFrame!

Here is our new CE in use below:

In [19]:
# Create RecourseGenerator
random_ce = RandomCE(task)

# Test it
ces = random_ce.generate_for_all()
print(ces)

     feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
1     0.348433        0.0  -0.287006  -0.059418  -0.327280  -0.460692   
3     0.348433        0.0   0.484384  -0.432757   0.641493  -0.492138   
5     0.348433        0.0   0.721648  -0.240904   0.768477  -0.187913   
7     0.348433        0.0   0.545410  -0.002046   0.656576  -0.202300   
9     0.348433        0.0   0.613620   0.296294   0.594067   0.468411   
..         ...        ...        ...        ...        ...        ...   
242   0.348433        0.0  -0.619744  -0.667804  -0.301718   0.231077   
244   0.348433        0.0  -0.104511  -1.482835   0.487810   0.949881   
246   0.348433        0.0   0.721648   0.105575   0.680925  -0.449413   
250   0.348433        0.0   0.591607   0.208886   0.647426   0.299250   
252   0.348433        0.0   0.642915   0.094890   0.706064   0.167120   

     feature_6  feature_7  feature_8  feature_9  ...  feature_24  feature_25  \
1    -0.088637  -0.309458  -0.020832  -0.17

We can even verify it by seeing all the predictions for the CEs are positive.

In [20]:
print(model.predict(ces))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
