Using HoloClean to clean the adult data from the fairness_and_repair_exploratory_analysis notebook.

Based on https://github.com/HoloClean/holoclean/blob/master/examples/holoclean_repair_example.py .

In [3]:
import sys
sys.path.append('holoclean/')
import holoclean

In [5]:
from detect import NullDetector, ViolationDetector
from repair.featurize import *

# 1. Setup a HoloClean session.
hc = holoclean.HoloClean(
    db_name='holo',
    domain_thresh_1=0,
    domain_thresh_2=0,
    weak_label_thresh=0.99,
    max_domain=10000,
    cor_strength=0.6,
    nb_cor_strength=0.8,
    epochs=10,
    weight_decay=0.01,
    learning_rate=0.001,
    threads=1,
    batch_size=1,
    verbose=True,
    timeout=3*60000,
    feature_norm=False,
    weight_norm=False,
    print_fw=True
).session

# 2. Load training data and denial constraints.
hc.load_data('adult', 'adult.csv')
hc.load_dcs('holoclean/testdata/adult_constraints.txt')
hc.ds.set_constraints(hc.get_dcs())

# 3. Detect erroneous cells using these two detectors.
detectors = [NullDetector(), ViolationDetector()]
hc.detect_errors(detectors)

# 4. Repair errors utilizing the defined features.
hc.setup_domain()
featurizers = [
    InitAttrFeaturizer(),
    OccurAttrFeaturizer(),
    FreqFeaturizer(),
    ConstraintFeaturizer(),
]

hc.repair_errors(featurizers)

15:22:26 - [DEBUG] - initiating session with parameters: {'db_user': 'holocleanuser', 'db_pwd': 'abcd1234', 'db_host': 'localhost', 'db_name': 'holo', 'threads': 1, 'timeout': 180000, 'seed': 45, 'learning_rate': 0.001, 'optimizer': 'adam', 'epochs': 10, 'weight_decay': 0.01, 'momentum': 0.0, 'batch_size': 1, 'weak_label_thresh': 0.99, 'domain_thresh_1': 0, 'domain_thresh_2': 0, 'max_domain': 10000, 'cor_strength': 0.6, 'nb_cor_strength': 0.8, 'feature_norm': False, 'weight_norm': False, 'estimator_epochs': 3, 'estimator_batch_size': 32, 'verbose': True, 'bias': False, 'print_fw': True, 'debug_mode': False}
15:22:27 - [ INFO] - Loaded 32392 rows with 421096 cells
15:22:32 - [DEBUG] - Time to create index: 0.00 secs
15:22:32 - [DEBUG] - Time to create index: 0.00 secs
15:22:32 - [DEBUG] - Time to create index: 0.00 secs
15:22:33 - [DEBUG] - Time to create index: 0.00 secs
15:22:33 - [DEBUG] - Time to create index: 0.00 secs
15:22:33 - [DEBUG] - Time to create index: 0.00 secs
15:22:33 -

22062it [00:06, 3254.62it/s]7 [00:06<00:42, 3251.43it/s][A
22397it [00:06, 3275.25it/s]7 [00:06<00:41, 3276.58it/s][A
22727it [00:07, 3281.11it/s]7 [00:06<00:41, 3282.24it/s][A
23062it [00:07, 3299.65it/s]7 [00:07<00:41, 3296.31it/s][A
23405it [00:07, 3328.62it/s]7 [00:07<00:40, 3327.71it/s][A
23740it [00:07, 3326.79it/s]7 [00:07<00:40, 3322.36it/s][A
24077it [00:07, 3331.61it/s]7 [00:07<00:40, 3329.60it/s][A
24412it [00:07, 3333.32it/s]7 [00:07<00:40, 3330.64it/s][A
24746it [00:07, 3311.05it/s]7 [00:07<00:40, 3330.49it/s][A
25078it [00:07, 3310.24it/s]7 [00:07<00:40, 3302.99it/s][A
25415it [00:07, 3325.27it/s]7 [00:07<00:40, 3326.75it/s][A
25750it [00:07, 3323.34it/s]7 [00:07<00:40, 3321.21it/s][A
26083it [00:08, 3302.51it/s]7 [00:07<00:40, 3302.12it/s][A
26414it [00:08, 3302.07it/s]7 [00:08<00:40, 3301.85it/s][A
26745it [00:08, 3279.88it/s]7 [00:08<00:40, 3279.82it/s][A
27076it [00:08, 3288.75it/s]7 [00:08<00:40, 3284.69it/s][A
27413it [00:08, 3301.19it/s]7 [00:08<00:

67826it [00:20, 3335.14it/s]7 [00:20<00:27, 3339.99it/s][A
68165it [00:20, 3343.74it/s]7 [00:20<00:27, 3348.93it/s][A
68501it [00:20, 3340.58it/s]7 [00:20<00:27, 3342.18it/s][A
68843it [00:20, 3356.35it/s]7 [00:20<00:26, 3353.40it/s][A
69179it [00:21, 2864.76it/s]7 [00:20<00:31, 2868.37it/s][A
69516it [00:21, 2997.07it/s]7 [00:21<00:29, 2996.21it/s][A
69847it [00:21, 3080.25it/s]7 [00:21<00:28, 3083.80it/s][A
70175it [00:21, 3125.84it/s]7 [00:21<00:28, 3123.43it/s][A
70497it [00:21, 3153.41it/s]7 [00:21<00:28, 3148.11it/s][A
70822it [00:21, 3168.36it/s]7 [00:21<00:27, 3177.59it/s][A
71148it [00:21, 3193.61it/s]7 [00:21<00:27, 3193.11it/s][A
71491it [00:21, 3249.34it/s]7 [00:21<00:26, 3245.19it/s][A
71824it [00:21, 3273.12it/s]7 [00:21<00:26, 3260.15it/s][A
72164it [00:21, 3295.53it/s]7 [00:21<00:26, 3295.75it/s][A
72495it [00:22, 3289.29it/s]7 [00:21<00:26, 3297.25it/s][A
72829it [00:22, 3294.76it/s]7 [00:22<00:26, 3296.30it/s][A
73176it [00:22, 3338.17it/s]7 [00:22<00:

113278it [00:34, 3288.27it/s]7 [00:34<00:13, 3304.40it/s][A
113609it [00:34, 3281.74it/s]7 [00:34<00:13, 3289.31it/s][A
113939it [00:34, 3284.29it/s]7 [00:34<00:13, 3288.26it/s][A
114268it [00:34, 3248.77it/s]7 [00:34<00:13, 3245.30it/s][A
114594it [00:34, 3199.01it/s]7 [00:34<00:13, 3186.61it/s][A
114915it [00:34, 3149.99it/s]7 [00:34<00:13, 3157.76it/s][A
115246it [00:34, 3188.65it/s]7 [00:34<00:13, 3181.48it/s][A
115566it [00:35, 3186.22it/s]7 [00:34<00:13, 3180.40it/s][A
115893it [00:35, 3200.64it/s]7 [00:35<00:13, 3200.92it/s][A
116224it [00:35, 3229.30it/s]7 [00:35<00:13, 3229.90it/s][A
116557it [00:35, 3257.94it/s]7 [00:35<00:12, 3256.37it/s][A
116894it [00:35, 3285.50it/s]7 [00:35<00:12, 3284.01it/s][A
117237it [00:35, 3325.23it/s]7 [00:35<00:12, 3322.22it/s][A
117575it [00:35, 3328.70it/s]7 [00:35<00:12, 3328.36it/s][A
117909it [00:35, 3300.03it/s]7 [00:35<00:12, 3308.99it/s][A
118240it [00:35, 3267.62it/s]7 [00:35<00:12, 3266.74it/s][A
118567it [00:35, 3264.13

156987it [00:48, 2877.38it/s]7 [00:47<00:00, 3049.53it/s][A
157278it [00:48, 2884.40it/s]7 [00:48<00:00, 2874.08it/s][A
157611it [00:48, 2998.89it/s]7 [00:48<00:00, 2886.16it/s][A
157914it [00:48, 3002.74it/s]7 [00:48<00:00, 2997.93it/s][A
158216it [00:48, 2959.53it/s]7 [00:48<00:00, 2986.87it/s][A
158514it [00:48, 2884.95it/s]7 [00:48<00:00, 2963.14it/s][A
158809it [00:48, 2893.18it/s]7 [00:48<00:00, 2887.33it/s][A
100%|█████████▉| 158809/158857 [00:48<00:00, 2896.17it/s][A
158857it [00:48, 3244.95it/s]7 [00:48<00:00, 3263.25it/s][A
15:24:09 - [DEBUG] - DONE assembling cell domain table in 52.63s
15:24:09 - [ INFO] - number of (additional) weak labels assigned from posterior model: 391
15:24:09 - [DEBUG] - DONE generating domain and weak labels
15:24:37 - [DEBUG] - Time to create index: 0.00 secs
15:24:37 - [DEBUG] - Time to create index: 0.00 secs
15:24:37 - [DEBUG] - Time to create index: 0.00 secs
15:24:42 - [DEBUG] - Time to create table: 0.00 secs
15:24:55 - [DEBUG] - Ti

15:41:03 - [DEBUG] - Time to store featurizer weights: 0.00 secs


'featurizer InitAttrFeaturizer,size 12,max 1.0000,min 1.0000,avg 1.0000,abs_avg 1.0000,weights:\nAge 1.0\nWorkclass 1.0\nEducation 1.0\nMaritalstatus 1.0\nOccupation 1.0\nRelationship 1.0\nRace 1.0\nSex 1.0\nHoursPerWeek 1.0\nCountry 1.0\nIncome 1.0\nIncomeBinary 1.0\nfeaturizer OccurAttrFeaturizer,size 144,max 0.4481,min -0.0054,avg 0.0938,abs_avg 0.0939,weights:\nAge X Age 0.0\nAge X Workclass 0.0\nAge X Education -0.0\nAge X Maritalstatus 0.0\nAge X Occupation -0.0\nAge X Relationship -0.0\nAge X Race -0.0\nAge X Sex -0.0\nAge X HoursPerWeek -0.0\nAge X Country -0.0\nAge X Income -0.0\nAge X IncomeBinary -0.0\nWorkclass X Age 0.213\nWorkclass X Workclass 0.0\nWorkclass X Education 0.219\nWorkclass X Maritalstatus 0.214\nWorkclass X Occupation 0.189\nWorkclass X Relationship 0.215\nWorkclass X Race 0.213\nWorkclass X Sex 0.213\nWorkclass X HoursPerWeek 0.223\nWorkclass X Country 0.209\nWorkclass X Income 0.213\nWorkclass X IncomeBinary 0.213\nEducation X Age -0.0\nEducation X Workcla

For original

In [5]:
from detect import NullDetector, ViolationDetector
from repair.featurize import *

# 1. Setup a HoloClean session.
hc = holoclean.HoloClean(
    db_name='holo',
    domain_thresh_1=0,
    domain_thresh_2=0,
    weak_label_thresh=0.99,
    max_domain=10000,
    cor_strength=0.6,
    nb_cor_strength=0.8,
    epochs=10,
    weight_decay=0.01,
    learning_rate=0.001,
    threads=1,
    batch_size=1,
    verbose=True,
    timeout=3*60000,
    feature_norm=False,
    weight_norm=False,
    print_fw=True
).session

# 2. Load training data and denial constraints.
hc.load_data('adult_data', 'adult_data.csv')
hc.load_dcs('adult_constraints.txt')
hc.ds.set_constraints(hc.get_dcs())

# 3. Detect erroneous cells using these two detectors.
detectors = [NullDetector(), ViolationDetector()]
hc.detect_errors(detectors)

# 4. Repair errors utilizing the defined features.
hc.setup_domain()
featurizers = [
    InitAttrFeaturizer(),
    OccurAttrFeaturizer(),
    FreqFeaturizer(),
    ConstraintFeaturizer(),
]

hc.repair_errors(featurizers)

01:27:07 - [DEBUG] - initiating session with parameters: {'db_user': 'holocleanuser', 'db_pwd': 'abcd1234', 'db_host': 'localhost', 'db_name': 'holo', 'threads': 1, 'timeout': 180000, 'seed': 45, 'learning_rate': 0.001, 'optimizer': 'adam', 'epochs': 10, 'weight_decay': 0.01, 'momentum': 0.0, 'batch_size': 1, 'weak_label_thresh': 0.99, 'domain_thresh_1': 0, 'domain_thresh_2': 0, 'max_domain': 10000, 'cor_strength': 0.6, 'nb_cor_strength': 0.8, 'feature_norm': False, 'weight_norm': False, 'estimator_epochs': 3, 'estimator_batch_size': 32, 'verbose': True, 'bias': False, 'print_fw': True, 'debug_mode': False}
01:27:07 - [ INFO] - Loaded 32392 rows with 421096 cells
01:27:13 - [DEBUG] - Time to create index: 0.00 secs
01:27:13 - [DEBUG] - Time to create index: 0.00 secs
01:27:13 - [DEBUG] - Time to create index: 0.00 secs
01:27:13 - [DEBUG] - Time to create index: 0.00 secs
01:27:13 - [DEBUG] - Time to create index: 0.00 secs
01:27:13 - [DEBUG] - Time to create index: 0.00 secs
01:27:13 -

59463it [00:20, 2974.52it/s]3 [00:20<00:33, 2969.54it/s][A
59764it [00:20, 2984.86it/s]3 [00:20<00:33, 2968.49it/s][A
60063it [00:20, 2967.62it/s]3 [00:20<00:32, 2985.71it/s][A
60360it [00:20, 2967.05it/s]3 [00:20<00:33, 2954.54it/s][A
60662it [00:21, 2979.87it/s]3 [00:20<00:32, 2982.64it/s][A
60963it [00:21, 2976.99it/s]3 [00:20<00:32, 2973.55it/s][A
61269it [00:21, 2989.52it/s]3 [00:21<00:32, 2999.49it/s][A
61571it [00:21, 2989.58it/s]3 [00:21<00:32, 2989.22it/s][A
61870it [00:21, 2962.04it/s]3 [00:21<00:32, 2984.39it/s][A
62179it [00:21, 2988.11it/s]3 [00:21<00:32, 2975.89it/s][A
62482it [00:21, 2993.20it/s]3 [00:21<00:31, 3000.61it/s][A
62794it [00:21, 3029.96it/s]3 [00:21<00:31, 3020.74it/s][A
63098it [00:21, 3017.90it/s]3 [00:21<00:31, 3022.54it/s][A
63400it [00:21, 3016.98it/s]3 [00:21<00:31, 3017.95it/s][A
63702it [00:22, 3008.42it/s]3 [00:21<00:31, 3017.99it/s][A
64003it [00:22, 2995.29it/s]3 [00:21<00:31, 2992.91it/s][A
64307it [00:22, 3006.71it/s]3 [00:22<00:

138731it [00:48, 2907.34it/s]3 [00:47<00:06, 2903.76it/s][A
139024it [00:48, 2909.27it/s]3 [00:48<00:06, 2917.58it/s][A
139315it [00:48, 2901.26it/s]3 [00:48<00:06, 2911.07it/s][A
139611it [00:48, 2912.89it/s]3 [00:48<00:06, 2910.86it/s][A
139903it [00:48, 2908.39it/s]3 [00:48<00:06, 2916.50it/s][A
140194it [00:48, 2894.12it/s]3 [00:48<00:06, 2892.70it/s][A
140484it [00:48, 2893.20it/s]3 [00:48<00:06, 2890.51it/s][A
140774it [00:48, 2880.80it/s]3 [00:48<00:05, 2901.91it/s][A
141064it [00:48, 2886.18it/s]3 [00:48<00:05, 2891.85it/s][A
141353it [00:49, 2839.84it/s]3 [00:48<00:05, 2831.17it/s][A
141643it [00:49, 2852.23it/s]3 [00:49<00:05, 2847.43it/s][A
141939it [00:49, 2877.40it/s]3 [00:49<00:05, 2874.22it/s][A
142228it [00:49, 2874.01it/s]3 [00:49<00:05, 2874.47it/s][A
142516it [00:49, 2865.68it/s]3 [00:49<00:05, 2870.43it/s][A
142809it [00:49, 2881.08it/s]3 [00:49<00:05, 2877.84it/s][A
143099it [00:49, 2879.53it/s]3 [00:49<00:05, 2876.54it/s][A
143389it [00:49, 2881.86

 80%|████████  | 8/10 [09:26<02:21, 70.64s/it]01:44:40 - [DEBUG] - Epoch 9, cost = 0.280715, acc = 98.25%
 90%|█████████ | 9/10 [10:36<01:10, 70.41s/it]01:45:56 - [DEBUG] - Epoch 10, cost = 0.280715, acc = 98.25%
100%|██████████| 10/10 [11:51<00:00, 71.98s/it]
01:45:56 - [ INFO] - DONE training repair model.
01:45:56 - [DEBUG] - Time to fit repair model: 713.18 secs
01:45:56 - [ INFO] - inferring on 583 examples (cells)
01:46:04 - [DEBUG] - Time to execute query: 5.31 secs
01:46:07 - [DEBUG] - Time to create index: 0.00 secs
01:46:07 - [DEBUG] - Time to create index: 0.00 secs
01:46:07 - [ INFO] - DONE inferring repairs.
01:46:07 - [DEBUG] - Time to infer correct cell values: 8.47 secs
01:46:08 - [DEBUG] - Time to create table: 0.00 secs
01:46:08 - [DEBUG] - Time to create index: 0.00 secs
01:46:08 - [DEBUG] - Time to create index: 0.00 secs
01:46:08 - [ INFO] - DONE collecting the inferred values.
01:46:08 - [DEBUG] - Time to collect inferred values: 0.08 secs
01:46:16 - [ INFO] - DON

'featurizer InitAttrFeaturizer,size 12,max 1.0000,min 1.0000,avg 1.0000,abs_avg 1.0000,weights:\nAge 1.0\nWorkclass 1.0\nEducation 1.0\nMaritalstatus 1.0\nOccupation 1.0\nRelationship 1.0\nRace 1.0\nSex 1.0\nHoursPerWeek 1.0\nCountry 1.0\nIncome 1.0\nIncomeBinary 1.0\nfeaturizer OccurAttrFeaturizer,size 144,max 0.9271,min -0.0115,avg 0.1021,abs_avg 0.1023,weights:\nAge X Age 0.0\nAge X Workclass 0.0\nAge X Education -0.0\nAge X Maritalstatus 0.0\nAge X Occupation -0.0\nAge X Relationship -0.0\nAge X Race -0.0\nAge X Sex -0.0\nAge X HoursPerWeek -0.0\nAge X Country -0.0\nAge X Income -0.0\nAge X IncomeBinary -0.0\nWorkclass X Age 0.253\nWorkclass X Workclass 0.0\nWorkclass X Education 0.263\nWorkclass X Maritalstatus 0.234\nWorkclass X Occupation 0.385\nWorkclass X Relationship 0.227\nWorkclass X Race 0.21\nWorkclass X Sex 0.211\nWorkclass X HoursPerWeek 0.251\nWorkclass X Country 0.209\nWorkclass X Income 0.209\nWorkclass X IncomeBinary 0.209\nEducation X Age -0.0\nEducation X Workclas

In [8]:
import sys
sys.path.append('../')
import holoclean
from detect import ErrorsLoaderDetector
from repair.featurize import *


# 1. Setup a HoloClean session.
hc = holoclean.HoloClean(
    db_name='holo',
    domain_thresh_1=0,
    domain_thresh_2=0,
    weak_label_thresh=0.99,
    max_domain=10000,
    cor_strength=0.6,
    nb_cor_strength=0.8,
    epochs=10,
    weight_decay=0.01,
    learning_rate=0.001,
    threads=1,
    batch_size=1,
    verbose=True,
    timeout=3*60000,
    feature_norm=False,
    weight_norm=False,
    print_fw=True
).session

# 2. Load training data and denial constraints.
hc.load_data('adult2', 'adult2.csv')
hc.load_dcs('adult_constraints.txt')
hc.ds.set_constraints(hc.get_dcs())

# 3. Detect erroneous cells.
detectors = [NullDetector(), ViolationDetector()]
hc.detect_errors(detectors)

# 4. Repair errors utilizing the defined features.
hc.setup_domain()
featurizers = [
    InitAttrFeaturizer(),
    OccurAttrFeaturizer(),
    FreqFeaturizer(),
    ConstraintFeaturizer(),
]

hc.repair_errors(featurizers)

13:56:06 - [DEBUG] - initiating session with parameters: {'db_user': 'holocleanuser', 'db_pwd': 'abcd1234', 'db_host': 'localhost', 'db_name': 'holo', 'threads': 1, 'timeout': 180000, 'seed': 45, 'learning_rate': 0.001, 'optimizer': 'adam', 'epochs': 10, 'weight_decay': 0.01, 'momentum': 0.0, 'batch_size': 1, 'weak_label_thresh': 0.99, 'domain_thresh_1': 0, 'domain_thresh_2': 0, 'max_domain': 10000, 'cor_strength': 0.6, 'nb_cor_strength': 0.8, 'feature_norm': False, 'weight_norm': False, 'estimator_epochs': 3, 'estimator_batch_size': 32, 'verbose': True, 'bias': False, 'print_fw': True, 'debug_mode': False}
13:56:07 - [ INFO] - Loaded 32392 rows with 421096 cells
13:56:15 - [DEBUG] - Time to create index: 0.00 secs
13:56:15 - [DEBUG] - Time to create index: 0.00 secs
13:56:16 - [DEBUG] - Time to create index: 0.00 secs
13:56:16 - [DEBUG] - Time to create index: 0.00 secs
13:56:16 - [DEBUG] - Time to create index: 0.00 secs
13:56:16 - [DEBUG] - Time to create index: 0.00 secs
13:56:16 -

67058it [00:20, 3303.44it/s]2 [00:20<00:27, 3308.58it/s][A
67393it [00:20, 3308.34it/s]2 [00:20<00:27, 3302.38it/s][A
67734it [00:21, 3333.91it/s]2 [00:20<00:27, 3329.02it/s][A
68068it [00:21, 3319.86it/s]2 [00:21<00:27, 3315.42it/s][A
68412it [00:21, 3349.10it/s]2 [00:21<00:27, 3351.72it/s][A
68748it [00:21, 3335.93it/s]2 [00:21<00:27, 3320.83it/s][A
69085it [00:21, 3334.65it/s]2 [00:21<00:26, 3339.09it/s][A
69421it [00:21, 3338.76it/s]2 [00:21<00:26, 3354.86it/s][A
69766it [00:21, 3360.25it/s]2 [00:21<00:26, 3356.98it/s][A
70106it [00:21, 3368.48it/s]2 [00:21<00:26, 3368.75it/s][A
70443it [00:21, 3360.21it/s]2 [00:21<00:26, 3368.66it/s][A
70780it [00:21, 3361.45it/s]2 [00:21<00:26, 3358.56it/s][A
71120it [00:22, 3362.62it/s]2 [00:21<00:26, 3356.97it/s][A
71457it [00:22, 3353.79it/s]2 [00:22<00:26, 3347.98it/s][A
71796it [00:22, 3352.48it/s]2 [00:22<00:25, 3362.87it/s][A
72132it [00:22, 3340.67it/s]2 [00:22<00:26, 3333.71it/s][A
72469it [00:22, 3343.33it/s]2 [00:22<00:

156424it [00:48, 3269.55it/s]2 [00:48<00:00, 3250.72it/s][A
156759it [00:48, 3285.10it/s]2 [00:48<00:00, 3283.16it/s][A
157089it [00:48, 3284.91it/s]2 [00:48<00:00, 3277.10it/s][A
157433it [00:48, 3319.61it/s]2 [00:48<00:00, 3309.32it/s][A
157766it [00:48, 3280.24it/s]2 [00:48<00:00, 3288.77it/s][A
158107it [00:48, 3308.37it/s]2 [00:48<00:00, 3301.17it/s][A
158439it [00:48, 3244.19it/s]2 [00:48<00:00, 3276.44it/s][A
158765it [00:48, 3237.60it/s]2 [00:48<00:00, 3241.68it/s][A
158882it [00:49, 3231.86it/s]2 [00:48<00:00, 3250.14it/s][A
13:57:52 - [DEBUG] - DONE assembling cell domain table in 52.74s
13:57:52 - [ INFO] - number of (additional) weak labels assigned from posterior model: 370
13:57:52 - [DEBUG] - DONE generating domain and weak labels
13:58:35 - [DEBUG] - Time to create index: 0.00 secs
13:58:35 - [DEBUG] - Time to create index: 0.00 secs
13:58:35 - [DEBUG] - Time to create index: 0.00 secs
13:58:45 - [DEBUG] - Time to create table: 0.00 secs
13:59:02 - [DEBUG] - Ti

14:15:01 - [DEBUG] - Time to store featurizer weights: 0.00 secs


'featurizer InitAttrFeaturizer,size 12,max 1.0000,min 1.0000,avg 1.0000,abs_avg 1.0000,weights:\nAge 1.0\nWorkclass 1.0\nEducation 1.0\nMaritalstatus 1.0\nOccupation 1.0\nRelationship 1.0\nRace 1.0\nSex 1.0\nHoursPerWeek 1.0\nCountry 1.0\nIncome 1.0\nIncomeBinary 1.0\nfeaturizer OccurAttrFeaturizer,size 144,max 0.4395,min -0.0080,avg 0.0903,abs_avg 0.0905,weights:\nAge X Age 0.0\nAge X Workclass 0.0\nAge X Education -0.0\nAge X Maritalstatus 0.0\nAge X Occupation -0.0\nAge X Relationship -0.0\nAge X Race -0.0\nAge X Sex -0.0\nAge X HoursPerWeek -0.0\nAge X Country -0.0\nAge X Income -0.0\nAge X IncomeBinary -0.0\nWorkclass X Age 0.195\nWorkclass X Workclass 0.0\nWorkclass X Education 0.198\nWorkclass X Maritalstatus 0.197\nWorkclass X Occupation 0.163\nWorkclass X Relationship 0.194\nWorkclass X Race 0.195\nWorkclass X Sex 0.194\nWorkclass X HoursPerWeek 0.204\nWorkclass X Country 0.193\nWorkclass X Income 0.197\nWorkclass X IncomeBinary 0.197\nEducation X Age -0.0\nEducation X Workcla