In [52]:
import joblib
import src.helpers.metrics as evaluation

In [152]:

# Specify the path to the file

path = 'trained_models/'
version = path + '18_06_2024__13-10'

dataset = 'student_addiction'

# Load the models

for model in ['rf']: #['rf', 'rbf_svm', 'dnn']:
    
    # Load the model
    loaded_model = joblib.load(version + '/' + dataset + '_' + model + '.joblib')

In [153]:
import src.data.api.models as data_models
import dice_ml
from dice_ml.utils import helpers  # helper functions
from dice_ml import Dice


student = data_models.StudentAddiction()


df = student.get_data()

#print dtypes
print(df.dtypes)


Experimentation                       int64
Academic_Performance_Decline          int64
Social_Isolation                      int64
Financial_Issues                      int64
Physical_Mental_Health_Problems       int64
Legal_Consequences                    int64
Relationship_Strain                   int64
Risk_Taking_Behavior                  int64
Withdrawal_Symptoms                   int64
Denial_and_Resistance_to_Treatment    int64
Addiction_Class                       int64
dtype: object


In [154]:
X_train, X_test, y_train, y_test = student.train_test_split(student.getX(), student.getY())

Xy_train_merged = X_train.copy()
Xy_train_merged[student.y] = y_train

print(len(Xy_train_merged))

d = dice_ml.Data(dataframe=df, continuous_features=[], outcome_name='Addiction_Class')

# Experimentation                       int64
# Academic_Performance_Decline          int64
# Social_Isolation                      int64
# Financial_Issues                      int64
# Physical_Mental_Health_Problems       int64
# Legal_Consequences                    int64
# Relationship_Strain                   int64
# Risk_Taking_Behavior                  int64
# Withdrawal_Symptoms                   int64
# Denial_and_Resistance_to_Treatment    int64
# Addiction_Class                       int64
# dtype: object

deeeed = dice_ml.data.Data(features={
            'Experimentation': [0,1],
            'Academic_Performance_Decline': [0,1],
            'Social_Isolation': [0,1],
            'Financial_Issues': [0,1],
            'Physical_Mental_Health_Problems': [0,1],
            'Legal_Consequences': [0,1],
            'Relationship_Strain': [0,1],
            'Risk_Taking_Behavior': [0,1],
            'Withdrawal_Symptoms': [0,1],
            'Denial_and_Resistance_to_Treatment': [0,1]},
         outcome_name='Addiction_Class',)


30072


In [162]:
m = dice_ml.Model(model=loaded_model, backend='sklearn')
exp = dice_ml.Dice(d, m, method='random')

query_instance = X_test[0:1]
dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=1, desired_class=1, verbose=True,
                                        diversity_weight=1,
                                        proximity_weight=1,
                                        sparsity_weight=2)

# Visualize counterfactual explanation
dice_exp.visualize_as_dataframe(show_only_changes=True)

100%|██████████| 1/1 [00:00<00:00,  8.14it/s]

Diverse Counterfactuals found! total time taken: 00 min 00 sec
Query instance (original outcome : 1)





Unnamed: 0,Experimentation,Academic_Performance_Decline,Social_Isolation,Financial_Issues,Physical_Mental_Health_Problems,Legal_Consequences,Relationship_Strain,Risk_Taking_Behavior,Withdrawal_Symptoms,Denial_and_Resistance_to_Treatment,Addiction_Class
0,1,1,1,0,1,0,0,0,1,0,1



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,Experimentation,Academic_Performance_Decline,Social_Isolation,Financial_Issues,Physical_Mental_Health_Problems,Legal_Consequences,Relationship_Strain,Risk_Taking_Behavior,Withdrawal_Symptoms,Denial_and_Resistance_to_Treatment,Addiction_Class
0,-,-,-,-,-,-,-,-,-,-,-


In [165]:
# count class imbalance

print(y_train.value_counts())

0    20953
1     9119
Name: Addiction_Class, dtype: int64


In [None]:
analysis = evaluation.CounterfactualAnalysis(model='DICE', factual=query_instance, counterfactual=dice_exp)

In [None]:
import src.helpers.helpers as helpers
import src.helpers.metrics as evaluation
import src.data.api.models as data_models
import src.cf_methods.models as cf_algs
import src.helpers.gridsearch as gridsearch
import pandas as pd
import numpy as np

datasets = helpers.load_json_file('training_settings.json')
params = helpers.load_json_file('experiment_params.json')

random_state = 42

#Repeat for all active datasets


for dataset in datasets:
    
    dataset_results = None
    
    if datasets[dataset]['active'] == False:
        continue
    
    log = f"CF evaluation for {dataset}"
    
    dataset_model_name = datasets[dataset]['data_model']
    dataset_model_class = getattr(data_models, dataset_model_name, None)
    
    if dataset_model_name == None:
        print(f"Dataset {dataset} has no model name")
        continue
    
    if dataset_model_class is not None:
        
        # Load the dataset with the smote parameter (e.g. False or 'auto')and random state
        
        smote = datasets[dataset]['smote']
        cleaned_data = dataset_model_class(smote=smote, random_state=random_state)
        
    else:
        print(f"Dataset {dataset} ({dataset_model_name}) not found")
        continue
    
    
    # Split the data into target variable and features
    
    X = cleaned_data['X']
    y = cleaned_data['y']
        
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = cleaned_data.train_test_split(X, y)
    

    # Repeat for all models
    
    models = [
        'NICE_minmax', 'NICE_standard'
        'DICE_random', 'DICE_genetic', 'DICE_kdtree',
        'CE-OCL', 'CE-OCL_trust'
        ]
    
    experiments = ["diversity-proximity", "sparsity-proximity", "computational-efficiency"]
    
    for experiment in experiments:
    
        for model in models: 
        
        # Set FIXED parameters for the model
        
        
            match model:
                
                case 'NICE_minmax' | 'NICE_standard':
                    
                    params = params[experiment]['NICE']
                    
                case 'DICE_random' | 'DICE_genetic' | 'DICE_kdtree':
                    
                    params = params[experiment]['DICE']
                
                
                case 'CE_OCL' | 'CE_OCL_trust':
                    
                    params = params[experiment]['CE_OCL']
                    
                    model_instance = cf_algs.CE_OCL(X_train)
                    
                    model_instance.set_data_restrictions(
                    real_features = [],
                    binary_features = [],
                    integer_features = [],
                    categorical_encodings = {},
                    
                    # Empty in order to turn off actionability constraints
                    only_positive_features = [],
                    only_increasing_features = [],
                    immutable_features = [],
                    conditionally_mutable_features = []
                    )
                    
                    # Trust regions require at least 1 observation in the target class (of the COUNTERfactual)
                    ref = np.where(y_test==1)
                    
                    model_instance.set_metric_constraints(
                        sparsity_constraint = False,
                        trust_region_constraint = False,
                        trust_region_reference=None,
                    )
            
                case _:
                    raise ValueError('Model not supported')
            
            
            # Set the scoring function for the grid search
            if experiment == "diversity-proximity":
            
                def custom_scoring(metrics):
                    return ( metrics['diversity'] + metrics['proximity']) / 2
                
            elif experiment == "sparsity-proximity":
                
                def custom_scoring(metrics):
                    return ( metrics['sparsity'] + metrics['proximity'] ) / 2
                
            search = gridsearch.GridSearch(model=model_instance,
                                        param_grid=params,
                                        scoring=custom_scoring, # can also be 'diversity', 'proximity', 'sparsity',....
                                        )      
            
            # Find optimal parameters using grid search on the TEST dataset
            search.optimize(X_test, y_test)
            
            best_params = search.self.best_params_
            
            # Print best params using some nice markup  and ###
            
            print(f"Best parameters for {model} in {dataset} given scoring function '{scoring}' are:")
            print(f"{'#'*20}")
            print(best_params)
            
            analysis = evaluation.CounterfactualAnalysis(model_instance, dataset)
            metrics = analysis.evaluate()
            
            if dataset_results is None:
                dataset_results = metrics
            else:
                dataset_results = pd.concat([dataset_results, metrics])
            
    #Save results
    dataset_results.to_csv(f"output/results/{experiment}-{dataset}_results.csv")
            
            
        
        
            