In [19]:
import json
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dataset_names = ['adult', 'heloc', 'house', 'magic']

## Experiment 1

In [17]:
for name in dataset_names:
    
    print('dataset:', name)
    
    with open(f'exp1-accuracies-{name}.json') as file:
        accuracies = json.load(file)
        
    with open(f'exp1-sparsities-{name}.json') as file:
        sparsities = json.load(file)
        
    for rate in ['2', '4', '6']:
        
        print('  rate: 1e-', rate)
        
        plt.style.use('seaborn-darkgrid')
        l1 = plt.scatter(sparsities['drnet'], accuracies['drnet'], c='red', label='DR-Net')
        l2 = plt.scatter(sparsities[f'r2ntab{rate}'], accuracies[f'r2ntab{rate}'], c='blue', label=f'R2N-Tab $\lambda$=1e-{rate}')
        
        mean_accuracy_drnet = sum(accuracies['drnet']) / len(accuracies['drnet'])
        mean_accuracy_r2ntab = sum(accuracies[f'r2ntab{rate}']) / len(accuracies[f'r2ntab{rate}'])
        mean_sparsity_drnet = sum(sparsities['drnet']) / len(sparsities['drnet'])
        mean_sparsity_r2ntab = sum(sparsities[f'r2ntab{rate}']) / len(sparsities[f'r2ntab{rate}'])
        
        print('    mean accuracy:', mean_accuracy_r2ntab)
        print('    mean sparsity:', mean_sparsity_r2ntab)
        
        plt.scatter(mean_sparsity_drnet, mean_accuracy_drnet, marker='X', edgecolors='black', s=120, c='red')
        plt.scatter(mean_sparsity_r2ntab, mean_accuracy_r2ntab, marker='X', edgecolors='black', s=120, c='blue')
        
        combined_legend = plt.Line2D([0], [0], marker='X', color='w', label='Mean', markerfacecolor='black', markersize=12)
        
        plt.xlabel('# Conditions', fontsize=15)
        plt.ylabel('Test accuracy', fontsize=15)
        plt.ylim([mean_accuracy_r2ntab-0.1, mean_accuracy_drnet+0.04])    
        plt.legend(handles=[l1, l2, combined_legend], loc='lower right', fontsize=12)
        plt.savefig(f'{name}-{rate}.pdf')
        plt.clf()

dataset: adult
  rate: 1e- 2
    mean accuracy: 0.8125642300679596
    mean sparsity: 77.6
  rate: 1e- 4
    mean accuracy: 0.8266202552627216
    mean sparsity: 188.8
  rate: 1e- 6
    mean accuracy: 0.8258743576993203
    mean sparsity: 166.2
dataset: heloc
  rate: 1e- 2
    mean accuracy: 0.7177820267686423
    mean sparsity: 724.9
  rate: 1e- 4
    mean accuracy: 0.7130975143403441
    mean sparsity: 836.1
  rate: 1e- 6
    mean accuracy: 0.7164913957934991
    mean sparsity: 809.8
dataset: house
  rate: 1e- 2
    mean accuracy: 0.8273206056616195
    mean sparsity: 184.6
  rate: 1e- 4
    mean accuracy: 0.8425499231950845
    mean sparsity: 182.3
  rate: 1e- 6
    mean accuracy: 0.8473557164801406
    mean sparsity: 202.0
dataset: magic
  rate: 1e- 2
    mean accuracy: 0.8370662460567824
    mean sparsity: 216.3
  rate: 1e- 4
    mean accuracy: 0.8435331230283912
    mean sparsity: 304.6
  rate: 1e- 6
    mean accuracy: 0.8417192429022082
    mean sparsity: 295.3


<Figure size 640x480 with 0 Axes>

## Experiment 2

In [None]:
for name in dataset_names: 
    plt.plot(data[index])
    plt.plot(data[index+1])
    plt.plot(data[index+2])
    plt.axhline(y=n_dummies, color='r', linestyle='dashed', label='total dummies')    
    plt.title(f"Feature removal over time on the {dataset} dataset")
    plt.ylabel("# Uninformative features removed")
    plt.xlabel("Epoch")
    plt.legend(['cancelrate 1e-2', 'cancelrate 1e-4', 'cancelrate 1e-6', 'total dummies'])
    plt.savefig(f'dummy_{dataset}.pdf')
    plt.show()

## Experiment 3

In [23]:
for name in dataset_names:
    
    with open(f'exp3-accuracies-{name}.json') as file:
        accuracies = json.load(file)
        
    with open(f'exp3-sparsities-{name}.json') as file:
        sparsities = json.load(file)
        
    with open(f'exp1-accuracies-{name}.json') as file:
        accuracies_r2ntab = json.load(file)
        
    with open(f'exp1-sparsities-{name}.json') as file:
        sparsities_r2ntab = json.load(file)
        
    if name == 'adult':
        rate = '2'
    elif name == 'heloc':
        rate = '2'
    elif name == 'house':
        rate = '4'
    elif name == 'magic':
        rate = '6'
        
    plt.style.use('seaborn-darkgrid')
    l1 = plt.scatter(sparsities['gb'], accuracies['gb'], c='red', label='Gradient Boosting')
    l2 = plt.scatter(sparsities['pca'], accuracies['pca'], c='orange', label='PCA')
    l3 = plt.scatter(sparsities_r2ntab[f'r2ntab{rate}'], accuracies_r2ntab[f'r2ntab{rate}'], c='blue', label=f'R2N-Tab $\lambda$=1e-{rate}')

    mean_accuracy_gb = sum(accuracies['gb']) / len(accuracies['gb'])
    mean_accuracy_pca = sum(accuracies['pca']) / len(accuracies['pca'])
    mean_accuracy_r2ntab = sum(accuracies_r2ntab[f'r2ntab{rate}']) / len(accuracies_r2ntab[f'r2ntab{rate}'])
    mean_sparsity_gb = sum(sparsities['gb']) / len(sparsities['gb'])
    mean_sparsity_pca = sum(sparsities['pca']) / len(sparsities['pca'])
    mean_sparsity_r2ntab = sum(sparsities_r2ntab[f'r2ntab{rate}']) / len(accuracies_r2ntab[f'r2ntab{rate}'])

    plt.scatter(mean_sparsity_gb, mean_accuracy_gb, marker='X', edgecolors='black', s=120, c='red')
    plt.scatter(mean_sparsity_pca, mean_accuracy_pca, marker='X', edgecolors='black', s=120, c='orange')
    plt.scatter(mean_sparsity_r2ntab, mean_accuracy_r2ntab, marker='X', edgecolors='black', s=120, c='blue')

    combined_legend = plt.Line2D([0], [0], marker='X', color='w', label='Mean', markerfacecolor='black', markersize=12)

    plt.xlabel('# Conditions', fontsize=15)
    plt.ylabel('Test accuracy', fontsize=15)
    minimum = min(mean_accuracy_gb, mean_accuracy_pca, mean_accuracy_r2ntab)
    plt.ylim([minimum-0.1, mean_accuracy_gb+0.04])    
    plt.legend(handles=[l1, l2, l3, combined_legend], loc='lower right', fontsize=12)
    plt.savefig(f'exp3{name}-{rate}.pdf')
    plt.clf()

<Figure size 640x480 with 0 Axes>

## Experiment 4

In [22]:
with open('exp4accs.json') as file:
    accuracies = json.load(file)
    
with open('exp4spars.json') as file:
    sparsities = json.load(file)
    
for i in range(4):
    
    print('dataset:', dataset_names[i])
    
    r2ntab_accuracies = accuracies['r2ntab'][i*10:i*10+10]
    classy_accuracies = accuracies['classy'][i*10:i*10+10]
    r2ntab_sparsities = sparsities['r2ntab'][i*10:i*10+10]
    classy_sparsities = sparsities['classy'][i*10:i*10+10]
    
    print('  r2ntab mean accuracy:', sum(r2ntab_accuracies) / len(r2ntab_accuracies), ', std:', np.std(r2ntab_accuracies))
    print('  classy mean accuracy:', sum(classy_accuracies) / len(classy_accuracies), ', std:', np.std(classy_accuracies))
    print('  r2ntab mean sparsity:', sum(r2ntab_sparsities) / len(r2ntab_sparsities), ', std:', np.std(r2ntab_sparsities))
    print('  classy mean sparsity:', sum(classy_sparsities) / len(classy_sparsities), ', std:', np.std(classy_sparsities))

dataset: adult
  r2ntab mean accuracy: 0.8199237526935189 , std: 0.01769005973062399
  classy mean accuracy: 0.8350737609812697 , std: 0.0
  r2ntab mean sparsity: 35.7 , std: 12.41813190459821
  classy mean sparsity: 195.0 , std: 0.0
dataset: heloc
  r2ntab mean accuracy: 0.6951242829827915 , std: 0.016573699354165722
  classy mean accuracy: 0.709847036328872 , std: 1.1102230246251565e-16
  r2ntab mean sparsity: 28.3 , std: 15.84960567332828
  classy mean sparsity: 60.0 , std: 0.0
dataset: house
  r2ntab mean accuracy: 0.8247092385341233 , std: 0.02540702018492649
  classy mean accuracy: 0.8591178406846611 , std: 1.1102230246251565e-16
  r2ntab mean sparsity: 23.0 , std: 7.810249675906654
  classy mean sparsity: 342.0 , std: 0.0
dataset: magic
  r2ntab mean accuracy: 0.8381440588853838 , std: 0.008047814337102475
  classy mean accuracy: 0.8593585699263933 , std: 0.0
  r2ntab mean sparsity: 38.9 , std: 6.28410693734599
  classy mean sparsity: 205.0 , std: 0.0
