## Imports

In [1]:
import os
import sys

import io
from contextlib import redirect_stdout

sys.path.insert(0, os.getcwd() + '/python_scripts')

from transform_data import csv_to_clingo, undersample_csv_to_clingo
from single_proxy import get_single_proxies
from multi_proxy_choice_rules import get_proxy_clusters_choice_rules
from multi_proxy_hardcoded import get_proxy_clusters_hardcoded
from multi_proxy_undersampled import process_potential_implications, check_implication


## General info

* Whenever default values are meantioned, they are as follows:

| Attribute | Default Value |
|---|---|
| Minimum implication probability | 80 |
| Maximum incidence probability | 5 |
| Minimum proxy cluster size | 1 |
| Maximum proxy cluster size | 3 |

* All the above mentioned attribute values should be **integers**
* There may be issues with running clingo programs on jupyter notebook. For example, a `IOPub data rate exceeded` might occur. In this case, it can be useful to run the required instructions on an external python file or in a command line 
* Calls to functions from files in the `python_scripts` directory should be done in the root of this repo

---

## .csv data transformation

 **⚠ Edit cell below** to use preferred dataset

In [2]:
sourcedatafolder = "example_datasets_no_ordinals/"
outdatafolder = "clingo_data/"

#dataset = "student-performance-mat"
#protected_attributes = ["sex"]
#outcome_attribute = "G3"

#dataset = "student-performance-por"
#protected_attributes = ["sex"]
#outcome_attribute = "G3"

#dataset = "adult"
#protected_attributes = ["gender", "race"]
#outcome_attribute = "income"

#dataset = "bank-marketing"
#protected_attributes = ["marital"]
#outcome_attribute = "deposit"

#dataset = "compas"
#protected_attributes = ["race", "sex"]
#outcome_attribute = "is_violent_recid"

dataset = "german-credit"
protected_attributes = ["age_cat"]
outcome_attribute = "class"

#dataset = "credit-card-clients"
#protected_attributes = ["SEX", "MARRIAGE"]
#outcome_attribute = "default.payment.next.month"

#dataset = "diabetes"
#protected_attributes = ["gender"]
#outcome_attribute = "readmitted"

#dataset = "kdd-adult-census-income"
#protected_attributes = ["sex", "race"]
#outcome_attribute = "income"

#dataset = "law-school"
#protected_attributes = ["sex", "race", "race1", "race2"]
#outcome_attribute = "gpa"

#dataset = "open-university-learning-analytics"
#protected_attributes = ["gender"]
#outcome_attribute = "final_result"

---

Creating output directory if it does not exist already

In [4]:
!mkdir -p $outdatafolder

Creating data file readable by clingo programs

In [3]:
csv_to_clingo(sourcedatafolder, dataset, outdatafolder, protected_attributes, outcome_attribute)

 **⚠ Resulting file name** should be the following `datafile`:

In [5]:
datafile = outdatafolder + "data-" + dataset + ".lp"

---

# Single proxy discovery

## Alternative 1 - running clingo directly

In [6]:
!clingo $datafile clingo_scripts/single_proxy_default.lp

clingo version 5.6.2
Reading from clingo_data/data-german-credit.lp ...
Solving...
Answer: 1
protected("age_cat") implication("credit_history","critical/other existing credit","age_cat","old",88,29) implication("savings_status","no known savings","age_cat","old",84,18) implication("employment",">=7","age_cat","old",94,25) implication("personal_status","male single","age_cat","old",89,54) implication("other_parties","none","age_cat","old",81,90) implication("other_payment_plans","none","age_cat","old",80,81) implication("housing","own","age_cat","old",85,71) implication("own_telephone","yes","age_cat","old",88,40) implication("foreign_worker","yes","age_cat","old",80,96) implication("checking_status","no checking","age_cat","old",87,39) implication("purpose","education","age_cat","old",86,5) implication("housing","for free","age_cat","old",95,10) implication("credit_history","delayed previously","age_cat","old",92,8) implication("purpose","new car","age_cat","old",86,23) implication("pr

## Alternative 2 - running clingo through python 

```
get_single_proxies(
    datafile: str, 
    min_implication_probability: optional int, 
    min_incidence_probability: optional int,
)
```

### Alternative 2.1 - using default values

In [7]:
get_single_proxies(datafile)

protected("age_cat") outcome("class") count_items(1000) implication("credit_history","critical/other existing credit","age_cat","old",88,29) implication("savings_status","no known savings","age_cat","old",84,18) implication("employment",">=7","age_cat","old",94,25) implication("personal_status","male single","age_cat","old",89,54) implication("other_parties","none","age_cat","old",81,90) implication("other_payment_plans","none","age_cat","old",80,81) implication("housing","own","age_cat","old",85,71) implication("own_telephone","yes","age_cat","old",88,40) implication("foreign_worker","yes","age_cat","old",80,96) implication("checking_status","no checking","age_cat","old",87,39) implication("purpose","education","age_cat","old",86,5) implication("housing","for free","age_cat","old",95,10) implication("credit_history","delayed previously","age_cat","old",92,8) implication("purpose","new car","age_cat","old",86,23) implication("property_magnitude","no known property","age_cat","old",92,1

### Alternative 2.2 - customizing minimum implication and incidence proabilities

In [8]:
get_single_proxies(datafile, 85, 1)

protected("age_cat") outcome("class") count_items(1000) implication("credit_history","critical/other existing credit","age_cat","old",88,29) implication("employment",">=7","age_cat","old",94,25) implication("personal_status","male single","age_cat","old",89,54) implication("housing","own","age_cat","old",85,71) implication("own_telephone","yes","age_cat","old",88,40) implication("checking_status","no checking","age_cat","old",87,39) implication("purpose","education","age_cat","old",86,5) implication("housing","for free","age_cat","old",95,10) implication("credit_history","delayed previously","age_cat","old",92,8) implication("purpose","new car","age_cat","old",86,23) implication("property_magnitude","no known property","age_cat","old",92,15) implication("purpose","used car","age_cat","old",86,10) implication("job","high qualif/self emp/mgmt","age_cat","old",96,14) implication("savings_status",">=1000","age_cat","old",87,4) implication("personal_status","male div/sep","age_cat","old",96

---

# Mutliple proxy discovery

## Choice Rules method

### Alternative 1 - running clingo directly
⚠ This method is **unadvised** since it potentially takes a longer runtime.
It will use the mentioned default values.

In [9]:
!clingo -W none $datafile clingo_scripts/multi_proxy_choice_rules_default.lp 0

clingo version 5.6.2
Reading from clingo_data/data-german-credit.lp ...
Solving...
Answer: 1
proxy("foreign_worker","yes") implication("age_cat","old",80,96) count_attributes_in_cluster(1)
Answer: 2
proxy("other_parties","none") proxy("foreign_worker","yes") implication("age_cat","old",80,88) count_attributes_in_cluster(2)
Answer: 3
proxy("other_parties","none") implication("age_cat","old",81,90) count_attributes_in_cluster(1)
Answer: 4
proxy("other_parties","none") proxy("other_payment_plans","none") implication("age_cat","old",80,74) count_attributes_in_cluster(2)
Answer: 5
proxy("other_payment_plans","none") implication("age_cat","old",80,81) count_attributes_in_cluster(1)
Answer: 6
proxy("checking_status","<0") proxy("other_parties","none") proxy("housing","own") implication("age_cat","old",80,14) count_attributes_in_cluster(3)
Answer: 7
proxy("other_parties","none") proxy("housing","own") proxy("foreign_worker","yes") implication("age_cat","old",84,62) count_attributes_in_cluster(

Answer: 54
proxy("savings_status","<100") proxy("other_parties","none") proxy("housing","own") implication("age_cat","old",84,37) count_attributes_in_cluster(3)
Answer: 55
proxy("own_telephone","none") proxy("credit_history","critical/other existing credit") proxy("other_parties","none") implication("age_cat","old",86,15) count_attributes_in_cluster(3)
Answer: 56
proxy("own_telephone","none") proxy("credit_history","critical/other existing credit") proxy("other_payment_plans","none") implication("age_cat","old",84,13) count_attributes_in_cluster(3)
Answer: 57
proxy("own_telephone","none") proxy("credit_history","critical/other existing credit") implication("age_cat","old",85,16) count_attributes_in_cluster(2)
Answer: 58
proxy("savings_status","<100") proxy("own_telephone","none") proxy("credit_history","critical/other existing credit") implication("age_cat","old",83,10) count_attributes_in_cluster(3)
Answer: 59
proxy("own_telephone","none") proxy("credit_history","critical/other existi

Answer: 104
proxy("checking_status","<0") proxy("personal_status","male single") implication("age_cat","old",85,14) count_attributes_in_cluster(2)
Answer: 105
proxy("checking_status","<0") proxy("personal_status","male single") proxy("foreign_worker","yes") implication("age_cat","old",85,14) count_attributes_in_cluster(3)
Answer: 106
proxy("own_telephone","none") proxy("checking_status","<0") proxy("personal_status","male single") implication("age_cat","old",83,9) count_attributes_in_cluster(3)
Answer: 107
proxy("checking_status","<0") proxy("personal_status","male single") proxy("job","skilled") implication("age_cat","old",85,8) count_attributes_in_cluster(3)
Answer: 108
proxy("savings_status","<100") proxy("personal_status","male single") implication("age_cat","old",87,32) count_attributes_in_cluster(2)
Answer: 109
proxy("savings_status","<100") proxy("personal_status","male single") proxy("foreign_worker","yes") implication("age_cat","old",87,30) count_attributes_in_cluster(3)
Answe

Answer: 158
proxy("personal_status","male single") proxy("own_telephone","yes") proxy("foreign_worker","yes") implication("age_cat","old",93,24) count_attributes_in_cluster(3)
Answer: 159
proxy("personal_status","male single") proxy("own_telephone","yes") proxy("other_payment_plans","none") implication("age_cat","old",92,18) count_attributes_in_cluster(3)
Answer: 160
proxy("personal_status","male single") proxy("own_telephone","yes") proxy("other_parties","none") implication("age_cat","old",93,22) count_attributes_in_cluster(3)
Answer: 161
proxy("credit_history","existing paid") proxy("own_telephone","yes") proxy("job","skilled") implication("age_cat","old",80,11) count_attributes_in_cluster(3)
Answer: 162
proxy("own_telephone","yes") proxy("job","skilled") implication("age_cat","old",84,24) count_attributes_in_cluster(2)
Answer: 163
proxy("own_telephone","yes") proxy("job","skilled") proxy("housing","own") implication("age_cat","old",87,17) count_attributes_in_cluster(3)
Answer: 164
p

Answer: 210
proxy("checking_status","no checking") proxy("credit_history","critical/other existing credit") proxy("own_telephone","yes") implication("age_cat","old",92,6) count_attributes_in_cluster(3)
Answer: 211
proxy("savings_status","<100") proxy("checking_status","no checking") proxy("own_telephone","yes") implication("age_cat","old",90,8) count_attributes_in_cluster(3)
Answer: 212
proxy("checking_status","no checking") proxy("own_telephone","yes") implication("age_cat","old",93,17) count_attributes_in_cluster(2)
Answer: 213
proxy("checking_status","no checking") proxy("own_telephone","yes") proxy("job","skilled") implication("age_cat","old",89,11) count_attributes_in_cluster(3)
Answer: 214
proxy("checking_status","no checking") proxy("own_telephone","yes") proxy("housing","own") implication("age_cat","old",95,13) count_attributes_in_cluster(3)
Answer: 215
proxy("checking_status","no checking") proxy("own_telephone","yes") proxy("foreign_worker","yes") implication("age_cat","old",

Answer: 259
proxy("employment","1<=X<4") proxy("personal_status","male single") proxy("own_telephone","yes") implication("age_cat","old",89,6) count_attributes_in_cluster(3)
Answer: 260
proxy("employment","1<=X<4") proxy("own_telephone","yes") implication("age_cat","old",86,11) count_attributes_in_cluster(2)
Answer: 261
proxy("employment","1<=X<4") proxy("own_telephone","yes") proxy("foreign_worker","yes") implication("age_cat","old",85,11) count_attributes_in_cluster(3)
Answer: 262
proxy("employment","1<=X<4") proxy("own_telephone","yes") proxy("other_parties","none") implication("age_cat","old",85,10) count_attributes_in_cluster(3)
Answer: 263
proxy("employment","1<=X<4") proxy("own_telephone","yes") proxy("other_payment_plans","none") implication("age_cat","old",87,9) count_attributes_in_cluster(3)
Answer: 264
proxy("employment","1<=X<4") proxy("own_telephone","yes") proxy("housing","own") implication("age_cat","old",88,8) count_attributes_in_cluster(3)
Answer: 265
proxy("savings_st

Answer: 308
proxy("property_magnitude","car") proxy("own_telephone","yes") proxy("other_parties","none") implication("age_cat","old",88,14) count_attributes_in_cluster(3)
Answer: 309
proxy("property_magnitude","car") proxy("credit_history","critical/other existing credit") implication("age_cat","old",87,9) count_attributes_in_cluster(2)
Answer: 310
proxy("property_magnitude","car") proxy("credit_history","critical/other existing credit") proxy("foreign_worker","yes") implication("age_cat","old",87,8) count_attributes_in_cluster(3)
Answer: 311
proxy("property_magnitude","car") proxy("credit_history","critical/other existing credit") proxy("housing","own") implication("age_cat","old",90,8) count_attributes_in_cluster(3)
Answer: 312
proxy("savings_status","<100") proxy("property_magnitude","car") proxy("credit_history","critical/other existing credit") implication("age_cat","old",87,5) count_attributes_in_cluster(3)
Answer: 313
proxy("property_magnitude","car") proxy("credit_history","cri

Answer: 354
proxy("own_telephone","yes") proxy("property_magnitude","real estate") proxy("other_payment_plans","none") implication("age_cat","old",85,6) count_attributes_in_cluster(3)
Answer: 355
proxy("credit_history","existing paid") proxy("personal_status","male single") proxy("property_magnitude","real estate") implication("age_cat","old",80,5) count_attributes_in_cluster(3)
Answer: 356
proxy("checking_status","no checking") proxy("personal_status","male single") proxy("property_magnitude","real estate") implication("age_cat","old",94,5) count_attributes_in_cluster(3)
Answer: 357
proxy("own_telephone","yes") proxy("other_parties","none") proxy("property_magnitude","real estate") implication("age_cat","old",82,6) count_attributes_in_cluster(3)
Answer: 358
proxy("checking_status","no checking") proxy("property_magnitude","real estate") implication("age_cat","old",85,11) count_attributes_in_cluster(2)
Answer: 359
proxy("own_telephone","none") proxy("checking_status","no checking") pro

Answer: 401
proxy("checking_status","0<=X<200") proxy("credit_history","critical/other existing credit") implication("age_cat","old",87,5) count_attributes_in_cluster(2)
Answer: 402
proxy("checking_status","0<=X<200") proxy("credit_history","critical/other existing credit") proxy("foreign_worker","yes") implication("age_cat","old",87,5) count_attributes_in_cluster(3)
Answer: 403
proxy("checking_status","0<=X<200") proxy("personal_status","male single") implication("age_cat","old",87,13) count_attributes_in_cluster(2)
Answer: 404
proxy("checking_status","0<=X<200") proxy("personal_status","male single") proxy("foreign_worker","yes") implication("age_cat","old",87,13) count_attributes_in_cluster(3)
Answer: 405
proxy("checking_status","0<=X<200") proxy("personal_status","male single") proxy("other_payment_plans","none") implication("age_cat","old",86,10) count_attributes_in_cluster(3)
Answer: 406
proxy("checking_status","0<=X<200") proxy("personal_status","male single") proxy("housing","o

Answer: 462
proxy("employment",">=7") proxy("personal_status","male single") proxy("job","skilled") implication("age_cat","old",95,12) count_attributes_in_cluster(3)
Answer: 463
proxy("own_telephone","none") proxy("employment",">=7") proxy("job","skilled") implication("age_cat","old",90,9) count_attributes_in_cluster(3)
Answer: 464
proxy("employment",">=7") proxy("purpose","radio/tv") proxy("job","skilled") implication("age_cat","old",92,5) count_attributes_in_cluster(3)
Answer: 465
proxy("own_telephone","none") proxy("credit_history","critical/other existing credit") proxy("employment",">=7") implication("age_cat","old",96,5) count_attributes_in_cluster(3)
Answer: 466
proxy("credit_history","critical/other existing credit") proxy("employment",">=7") proxy("job","skilled") implication("age_cat","old",95,7) count_attributes_in_cluster(3)
Answer: 467
proxy("employment",">=7") proxy("housing","own") implication("age_cat","old",95,17) count_attributes_in_cluster(2)
Answer: 468
proxy("emplo

Answer: 518
proxy("savings_status","<100") proxy("employment",">=7") proxy("housing","own") implication("age_cat","old",94,8) count_attributes_in_cluster(3)
Answer: 519
proxy("employment","1<=X<4") proxy("property_magnitude","life insurance") implication("age_cat","old",82,6) count_attributes_in_cluster(2)
Answer: 520
proxy("employment","1<=X<4") proxy("property_magnitude","life insurance") proxy("foreign_worker","yes") implication("age_cat","old",81,6) count_attributes_in_cluster(3)
Answer: 521
proxy("employment","1<=X<4") proxy("property_magnitude","life insurance") proxy("housing","own") implication("age_cat","old",91,5) count_attributes_in_cluster(3)
Answer: 522
proxy("employment","1<=X<4") proxy("property_magnitude","life insurance") proxy("other_parties","none") implication("age_cat","old",82,5) count_attributes_in_cluster(3)
Answer: 523
proxy("employment","1<=X<4") proxy("property_magnitude","life insurance") proxy("other_payment_plans","none") implication("age_cat","old",81,5) 

Answer: 564
proxy("property_magnitude","life insurance") proxy("own_telephone","yes") proxy("other_payment_plans","none") implication("age_cat","old",86,6) count_attributes_in_cluster(3)
Answer: 565
proxy("purpose","new car") implication("age_cat","old",86,23) count_attributes_in_cluster(1)
Answer: 566
proxy("own_telephone","none") proxy("purpose","new car") implication("age_cat","old",82,14) count_attributes_in_cluster(2)
Answer: 567
proxy("purpose","new car") proxy("other_parties","none") implication("age_cat","old",85,21) count_attributes_in_cluster(2)
Answer: 568
proxy("own_telephone","none") proxy("purpose","new car") proxy("other_parties","none") implication("age_cat","old",82,13) count_attributes_in_cluster(3)
Answer: 569
proxy("purpose","new car") proxy("other_payment_plans","none") implication("age_cat","old",86,19) count_attributes_in_cluster(2)
Answer: 570
proxy("own_telephone","none") proxy("purpose","new car") proxy("other_payment_plans","none") implication("age_cat","old"

Answer: 615
proxy("employment","1<=X<4") proxy("purpose","new car") proxy("foreign_worker","yes") implication("age_cat","old",83,6) count_attributes_in_cluster(3)
Answer: 616
proxy("employment","1<=X<4") proxy("purpose","new car") proxy("other_payment_plans","none") implication("age_cat","old",85,6) count_attributes_in_cluster(3)
Answer: 617
proxy("purpose","new car") proxy("job","skilled") implication("age_cat","old",85,13) count_attributes_in_cluster(2)
Answer: 618
proxy("purpose","new car") proxy("job","skilled") proxy("foreign_worker","yes") implication("age_cat","old",84,12) count_attributes_in_cluster(3)
Answer: 619
proxy("purpose","new car") proxy("job","skilled") proxy("other_parties","none") implication("age_cat","old",84,11) count_attributes_in_cluster(3)
Answer: 620
proxy("savings_status","<100") proxy("purpose","new car") proxy("job","skilled") implication("age_cat","old",86,7) count_attributes_in_cluster(3)
Answer: 621
proxy("purpose","new car") proxy("personal_status","ma

Answer: 671
proxy("employment","1<=X<4") proxy("job","unskilled resident") proxy("housing","own") implication("age_cat","old",81,6) count_attributes_in_cluster(3)
Answer: 672
proxy("checking_status","no checking") proxy("job","unskilled resident") implication("age_cat","old",88,7) count_attributes_in_cluster(2)
Answer: 673
proxy("checking_status","no checking") proxy("job","unskilled resident") proxy("foreign_worker","yes") implication("age_cat","old",87,6) count_attributes_in_cluster(3)
Answer: 674
proxy("own_telephone","none") proxy("checking_status","no checking") proxy("job","unskilled resident") implication("age_cat","old",87,6) count_attributes_in_cluster(3)
Answer: 675
proxy("checking_status","no checking") proxy("job","unskilled resident") proxy("housing","own") implication("age_cat","old",100,5) count_attributes_in_cluster(3)
Answer: 676
proxy("checking_status","no checking") proxy("job","unskilled resident") proxy("other_parties","none") implication("age_cat","old",87,6) coun

Answer: 723
proxy("savings_status","no known savings") proxy("employment",">=7") implication("age_cat","old",93,6) count_attributes_in_cluster(2)
Answer: 724
proxy("savings_status","no known savings") proxy("employment",">=7") proxy("foreign_worker","yes") implication("age_cat","old",93,6) count_attributes_in_cluster(3)
Answer: 725
proxy("savings_status","no known savings") proxy("employment",">=7") proxy("other_payment_plans","none") implication("age_cat","old",94,5) count_attributes_in_cluster(3)
Answer: 726
proxy("checking_status","no checking") proxy("savings_status","no known savings") proxy("other_parties","none") implication("age_cat","old",89,9) count_attributes_in_cluster(3)
Answer: 727
proxy("own_telephone","none") proxy("savings_status","no known savings") proxy("housing","own") implication("age_cat","old",80,6) count_attributes_in_cluster(3)
Answer: 728
proxy("savings_status","no known savings") proxy("employment",">=7") proxy("other_parties","none") implication("age_cat","

Answer: 772
proxy("employment","4<=X<7") proxy("own_telephone","yes") proxy("other_payment_plans","none") implication("age_cat","old",84,6) count_attributes_in_cluster(3)
Answer: 773
proxy("employment","4<=X<7") proxy("own_telephone","yes") proxy("housing","own") implication("age_cat","old",88,5) count_attributes_in_cluster(3)
Answer: 774
proxy("checking_status","no checking") proxy("employment","4<=X<7") implication("age_cat","old",80,7) count_attributes_in_cluster(2)
Answer: 775
proxy("checking_status","no checking") proxy("employment","4<=X<7") proxy("other_parties","none") implication("age_cat","old",80,7) count_attributes_in_cluster(3)
Answer: 776
proxy("checking_status","no checking") proxy("employment","4<=X<7") proxy("housing","own") implication("age_cat","old",85,6) count_attributes_in_cluster(3)
Answer: 777
proxy("credit_history","existing paid") proxy("employment","4<=X<7") proxy("personal_status","male single") implication("age_cat","old",84,5) count_attributes_in_cluster(3

Answer: 819
proxy("credit_history","existing paid") proxy("property_magnitude","no known property") implication("age_cat","old",92,7) count_attributes_in_cluster(2)
Answer: 820
proxy("credit_history","existing paid") proxy("property_magnitude","no known property") proxy("foreign_worker","yes") implication("age_cat","old",92,7) count_attributes_in_cluster(3)
Answer: 821
proxy("credit_history","existing paid") proxy("property_magnitude","no known property") proxy("personal_status","male single") implication("age_cat","old",94,5) count_attributes_in_cluster(3)
Answer: 822
proxy("credit_history","existing paid") proxy("property_magnitude","no known property") proxy("other_parties","none") implication("age_cat","old",93,6) count_attributes_in_cluster(3)
Answer: 823
proxy("credit_history","existing paid") proxy("property_magnitude","no known property") proxy("other_payment_plans","none") implication("age_cat","old",94,5) count_attributes_in_cluster(3)
Answer: 824
proxy("job","high qualif/sel

Answer: 873
proxy("other_payment_plans","bank") proxy("other_parties","none") implication("age_cat","old",86,12) count_attributes_in_cluster(2)
Answer: 874
proxy("other_payment_plans","bank") proxy("other_parties","none") proxy("foreign_worker","yes") implication("age_cat","old",86,11) count_attributes_in_cluster(3)
Answer: 875
proxy("own_telephone","none") proxy("other_payment_plans","bank") proxy("other_parties","none") implication("age_cat","old",80,7) count_attributes_in_cluster(3)
Answer: 876
proxy("other_payment_plans","bank") proxy("job","skilled") proxy("other_parties","none") implication("age_cat","old",84,6) count_attributes_in_cluster(3)
Answer: 877
proxy("other_payment_plans","bank") proxy("other_parties","none") proxy("housing","own") implication("age_cat","old",90,8) count_attributes_in_cluster(3)
Answer: 878
proxy("savings_status","<100") proxy("other_payment_plans","bank") implication("age_cat","old",83,8) count_attributes_in_cluster(2)
Answer: 879
proxy("savings_status

Answer: 924
proxy("housing","for free") proxy("own_telephone","yes") proxy("other_parties","none") implication("age_cat","old",96,5) count_attributes_in_cluster(3)
Answer: 925
proxy("housing","for free") proxy("own_telephone","yes") implication("age_cat","old",96,6) count_attributes_in_cluster(2)
Answer: 926
proxy("housing","for free") proxy("own_telephone","yes") proxy("foreign_worker","yes") implication("age_cat","old",96,6) count_attributes_in_cluster(3)
Answer: 927
proxy("housing","for free") proxy("property_magnitude","no known property") proxy("own_telephone","yes") implication("age_cat","old",96,5) count_attributes_in_cluster(3)
Answer: 928
proxy("savings_status","100<=X<500") proxy("housing","own") implication("age_cat","old",83,7) count_attributes_in_cluster(2)
Answer: 929
proxy("savings_status","100<=X<500") proxy("housing","own") proxy("foreign_worker","yes") implication("age_cat","old",82,7) count_attributes_in_cluster(3)
Answer: 930
proxy("savings_status","100<=X<500") pro

Answer: 983
proxy("purpose","business") proxy("job","skilled") proxy("other_parties","none") implication("age_cat","old",89,5) count_attributes_in_cluster(3)
Answer: 984
proxy("credit_history","delayed previously") proxy("foreign_worker","yes") implication("age_cat","old",92,8) count_attributes_in_cluster(2)
Answer: 985
proxy("credit_history","delayed previously") implication("age_cat","old",92,8) count_attributes_in_cluster(1)
Answer: 986
proxy("credit_history","delayed previously") proxy("other_parties","none") proxy("foreign_worker","yes") implication("age_cat","old",93,8) count_attributes_in_cluster(3)
Answer: 987
proxy("credit_history","delayed previously") proxy("other_parties","none") implication("age_cat","old",93,8) count_attributes_in_cluster(2)
Answer: 988
proxy("credit_history","delayed previously") proxy("job","skilled") proxy("foreign_worker","yes") implication("age_cat","old",87,5) count_attributes_in_cluster(3)
Answer: 989
proxy("credit_history","delayed previously") pr

### Alternative 2 - running clingo though python

```
get_proxy_clusters_choice_rules(
    datafile: str, 
    min_implication_probability: optional int, 
    min_incidence_probability: optional int,
    min_cluster_size: optional int,
    max_cluster_size: optional int
)
```

#### Alternative 2.1 - using default values

In [None]:
get_proxy_clusters_choice_rules(datafile)
# Same as
# get_proxy_clusters_choice_rules(datafile, 80, 5, 1, 3)

#### Alternative 2.2 - customizing values

In [None]:
get_proxy_clusters_choice_rules(datafile, 80, 1, 1, 1)

## Hardcoded method

This method uses default values.

⚠ The `get_proxy_clusters_hardcoded` function should only be called in the root of this repo. 

The minimum implication and incidence probability values **can** be changed but they require some hardcoding. The clingo rules for this method are in the three following files:
* `clingo_scripts/multi_proxy_hardcoded_1.lp`
* `clingo_scripts/multi_proxy_hardcoded_2.lp`
* `clingo_scripts/multi_proxy_hardcoded_3.lp`

To change the minimum **implication** probability, the above mentioned files must update the following code line
> `    P >= 80,` >> `    P >= <new-minimum-implication>, `


To change the minimum **incidence** probability, the above mentioned files must update the following code line
> `    I >= 5,` >> `    I >= <new-minimum-incidence>, `

```
get_proxy_clusters_hardcoded(
    datafile: str
)
```

In [10]:
get_proxy_clusters_hardcoded(datafile)

protected("age_cat") implication("credit_history","critical/other existing credit","age_cat","old",88,29) implication("savings_status","no known savings","age_cat","old",84,18) implication("employment",">=7","age_cat","old",94,25) implication("personal_status","male single","age_cat","old",89,54) implication("other_parties","none","age_cat","old",81,90) implication("other_payment_plans","none","age_cat","old",80,81) implication("housing","own","age_cat","old",85,71) implication("own_telephone","yes","age_cat","old",88,40) implication("foreign_worker","yes","age_cat","old",80,96) implication("checking_status","no checking","age_cat","old",87,39) implication("purpose","education","age_cat","old",86,5) implication("housing","for free","age_cat","old",95,10) implication("credit_history","delayed previously","age_cat","old",92,8) implication("purpose","new car","age_cat","old",86,23) implication("property_magnitude","no known property","age_cat","old",92,15) implication("savings_status","50

protected("age_cat") implication("checking_status","no checking","credit_history","critical/other existing credit","purpose","radio/tv","age_cat","old",92,5) implication("checking_status","no checking","credit_history","existing paid","purpose","radio/tv","age_cat","old",85,6) implication("checking_status","no checking","credit_history","critical/other existing credit","employment",">=7","age_cat","old",98,5) implication("checking_status","no checking","purpose","radio/tv","employment",">=7","age_cat","old",98,5) implication("checking_status","<0","savings_status","<100","employment",">=7","age_cat","old",92,5) implication("credit_history","critical/other existing credit","savings_status","<100","employment",">=7","age_cat","old",98,5) implication("credit_history","existing paid","savings_status","<100","employment",">=7","age_cat","old",90,5) implication("checking_status","no checking","credit_history","critical/other existing credit","personal_status","male single","age_cat","old",97

## Undersampled Hardcoded method

⚠ This method requires previous data transformation (undersampling) and subsequent verifications.

### .csv data transformation
 **⚠ Edit cell bellow** if needed


In [11]:
sourcedatafolder = "example_datasets_no_ordinals/"
undersampleddatafolder = "undersampled_clingo_data/"
n_records = 500

In [12]:
!mkdir -p $undersampleddatafolder

In [13]:
undersample_csv_to_clingo(sourcedatafolder, dataset, undersampleddatafolder, protected_attributes, outcome_attribute, n_records)

 **⚠ Resulting file name** should be the following `undersampleddatafile`:

In [14]:
undersampleddatafile = undersampleddatafolder + "recs-" + str(n_records) + "-data-" + dataset + ".lp"

### Hardcoded regular usage
But we redirect the clingo output into a string variable 

In [15]:
clingo_output = ""

with io.StringIO() as buf, redirect_stdout(buf):
    get_proxy_clusters_hardcoded(undersampleddatafile)
    clingo_output = buf.getvalue()
    
print(clingo_output)

protected("age_cat") implication("checking_status",">=200","age_cat","old",82,5) implication("credit_history","critical/other existing credit","age_cat","old",91,29) implication("purpose","new car","age_cat","old",86,23) implication("savings_status","500<=X<1000","age_cat","old",82,6) implication("personal_status","male single","age_cat","old",90,59) implication("other_parties","none","age_cat","old",82,88) implication("property_magnitude","no known property","age_cat","old",92,15) implication("other_payment_plans","bank","age_cat","old",91,13) implication("housing","for free","age_cat","old",94,11) implication("foreign_worker","yes","age_cat","old",82,95) implication("savings_status","<100","age_cat","old",80,60) implication("employment","4<=X<7","age_cat","old",84,15) implication("other_parties","co applicant","age_cat","old",85,5) implication("property_magnitude","life insurance","age_cat","old",82,23) implication("other_payment_plans","none","age_cat","old",80,81) implication("hous

### Verifying proxies against full dataset

Processing potential proxies from previous step

In [16]:
potential_proxy_string = ""

with io.StringIO() as buf, redirect_stdout(buf):
    process_potential_implications(clingo_output)
    potential_proxy_string = buf.getvalue()
    
print(potential_proxy_string)


potential_implication("checking_status",">=200","other_parties","none","age_cat","old",81,5) .
potential_implication("checking_status",">=200","foreign_worker","yes","age_cat","old",81,5) .
potential_implication("credit_history","critical/other existing credit","purpose","new car","age_cat","old",95,8) .
potential_implication("credit_history","critical/other existing credit","personal_status","male single","age_cat","old",94,19) .
potential_implication("credit_history","critical/other existing credit","other_parties","none","age_cat","old",91,26) .
potential_implication("credit_history","critical/other existing credit","property_magnitude","no known property","age_cat","old",92,5) .
potential_implication("credit_history","critical/other existing credit","own_telephone","none","age_cat","old",89,15) .
potential_implication("credit_history","critical/other existing credit","foreign_worker","yes","age_cat","old",90,28) .
potential_implication("credit_history","critical/other existing cred

The minimum implication and incidence probability values can be changed as previously explained. The clingo rules for this method are in the three following files:

* `clingo_scripts/multi_proxy_hardcoded_check_1.lp`
* `clingo_scripts/multi_proxy_hardcoded_check_2.lp`
* `clingo_scripts/multi_proxy_hardcoded_check_3.lp`


```
check_implication(
    potential_proxy_string: str,
    datafile: str
)
```


In [17]:
check_implication(potential_proxy_string, datafile)

cluster size = 1
protected("age_cat")




cluster size = 2
protected("age_cat") implication("checking_status",">=200","other_parties","none","age_cat","old",85,6) implication("checking_status",">=200","foreign_worker","yes","age_cat","old",83,5) implication("credit_history","critical/other existing credit","purpose","new car","age_cat","old",97,7) implication("credit_history","critical/other existing credit","personal_status","male single","age_cat","old",95,18) implication("credit_history","critical/other existing credit","other_parties","none","age_cat","old",89,26) implication("credit_history","critical/other existing credit","own_telephone","none","age_cat","old",85,16) implication("credit_history","critical/other existing credit","foreign_worker","yes","age_cat","old",88,27) implication("credit_history","critical/other existing credit","savings_status","<100","age_cat","old",86,18) implication("credit_history","critical/other existing credit","employment","4<=X<7","age_cat","old",

protected("age_cat") implication("checking_status","no checking","credit_history","critical/other existing credit","personal_status","male single","age_cat","old",97,9) implication("credit_history","existing paid","purpose","new car","personal_status","male single","age_cat","old",90,6) implication("credit_history","critical/other existing credit","savings_status","<100","personal_status","male single","age_cat","old",94,10) implication("purpose","new car","savings_status","<100","personal_status","male single","age_cat","old",96,8) implication("checking_status","<0","savings_status","<100","personal_status","male single","age_cat","old",85,12) implication("checking_status","no checking","savings_status","<100","personal_status","male single","age_cat","old",92,10) implication("checking_status","0<=X<200","savings_status","<100","personal_status","male single","age_cat","old",85,7) implication("credit_history","existing paid","savings_status","<100","personal_status","male single","age

---

⚠ If the previous cell yields **Notebook errors**, do the following steps instead:

In [None]:
auxfilename = "potential_proxies_" + dataset + ".lp"
f = open(auxfilename,"w")
f.write(potential_proxy_string)
print("datafile:", datafile)
print("dataset:", dataset)

Run the following in a command line in the root of the repo:

```python3 python_scripts/multi_proxy_undersampled.py potential_proxies_<dataset>.lp <datafile>```

For example:

```python3 python_scripts/multi_proxy_undersampled.py potential_proxies_student-performance-por.lp clingo_data/data-student-performance-por.lp```

---