# Representation Fairness
As we see in (past experiment), the dumb CNN with the Quadratic activation fails to learn a linear labelling (add plot). We find a minimal CNN architecture that matches the FCNN in test accuracy for a fixed training set size of $d$-dimensional images and then plot its sample complexity.

In [1]:
from IPython import get_ipython
from IPython.core.magic import register_cell_magic
ipython = get_ipython()

# New cell magic to use python variables in cell magic
@register_cell_magic
def format(line, cell):
    magic, program = line.split(' ')
    ipython.run_cell_magic(magic, program, cell.format(**globals()))

In [6]:
# Experiment control
delete_results  = True # Delete results of previous runs for that parameter set (all models).

In [7]:
# (TODO) Turn this into a .py with options.
# Imports
import sys
import pickle
import numpy as np
import torch

# Local python scripts
from helpers import calc_label, train_model
from models import ModelLoader

# Use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device: ", device, "\n")

# Seed random number generation
torch.manual_seed(0)
np.random.seed(0)

# Model loader
loader = ModelLoader()

# Global constants
learning_rate = 0.01
tolerance = 0.01
batch_size = 64
max_epochs = 100
window = 10 # Window size for convergence crit.
rel_conv_crit = 0.01
abs_conv_crit = 0.01

# Input shape
channels = 3 # RGB images
img_size = 8 # Image side length
input_shape = (channels, img_size, img_size)
input_size = img_size * img_size * 3

# Data size
mag = 5
splits = 1
N_tr = 10**mag
N_te = 10000

# Full training and test setas
gauss_x_tr = torch.tensor(np.random.normal(0,1,size=(N_tr,*input_shape)),dtype=torch.float32, device=device)
gauss_x_te = torch.tensor(np.random.normal(0,1,size=(N_te,*input_shape)),dtype=torch.float32, device=device)

# Full h1 training and test labels (Replace with p=1 for h1)
p_norm = 1
gauss_y_tr = calc_label(gauss_x_tr, p=p_norm).to(device)
gauss_y_te = calc_label(gauss_x_te, p=p_norm).to(device)

# Option del - delete contents of results file
file_path = 'results/week4/mag'+str(mag)+'_l'+str(img_size)+'_e'+str(max_epochs)+'.pkl' # Results file
if delete_results:
    with open(file_path, 'wb+') as file:
        pickle.dump(dict(), file)

# Train a single FCNN on the dataset and record test accuracy. (TODO)
fcnn = loader.load( "FCNN", "Quad", {'input_shape': input_shape})
_, epsilon = train_model(fcnn, batch_size, learning_rate, gauss_x_tr, gauss_y_tr, gauss_x_te, gauss_y_te, rel_conv_crit, abs_conv_crit, max_epochs, window, N_te)

# Find CNN by bisection on out_channels such that it matches the FCNN's test accuracy
found   = False
ch_curr  = np.random.randint(10, 50)
ch_hist  = [0] 
ch_prev  = 0
iterate = 1 
while not found:
    print("Iterate ", iterate, "Out channels: ", ch_curr)

    # DumbCNN to adjust
    model = loader.load( "DumbCNN", "Quad", {'input_shape': input_shape, 'out_channels': ch_curr})
    name = "DumbCNN+Quad"
    
    _, accuracy = train_model(model, batch_size, learning_rate, gauss_x_tr, gauss_y_tr, gauss_x_te, gauss_y_te, rel_conv_crit, abs_conv_crit, max_epochs, window, N_te)
    found = abs(accuracy - epsilon) < tolerance

    # Finished training
    print("Finished training. Acc: ", accuracy)

    # Read file contents
    with open(file_path, 'rb') as file:
        test_acc = pickle.load(file)

    # Add experiment to results
    test_acc[(name, ch_curr)] = accuracy

    # Write accuracy to file
    with open(file_path, 'wb') as file:
        pickle.dump(test_acc, file)

    # Finish if found.
    if found:
        break

    # Bisection method for finding correct training set size
    ch_hist += [ch_curr]
    ch_hist.sort()
    idx = ch_hist.index(ch_curr)
    if accuracy > epsilon:
        ch_curr = ch_curr // 2 if idx == 0 else (ch_curr + ch_hist[idx-1]) // 2
    else:
        ch_curr = 2 * ch_curr if idx == len(ch_hist)-1 else (ch_curr + ch_hist[idx+1]) // 2

    # If converged (not found) then reset the search
    if abs(ch_prev - ch_curr) < 4:
        ch_curr  = np.random.randint(10, 50)
        ch_hist  = [0] 
        ch_prev  = 0
    else:
        ch_prev = ch_curr

    # Try again with a different number of out_channels
    iterate += 1

print(f"Finished after {iterate} iterations.")


Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Try the new cross-platform PowerShell https://aka.ms/pscore6

PS C:\Users\oilio\Documents\School Documents\EPFL\MA3\Semester Project\Tasks> python tr_size_vs_te_acc.py -actv "Quad" -arch "ParamFairCNN" -min 4 -max 4 -f results/week4/prop0.5mag4_l6_e10.pkl -s 1 -e 10 -p 1 -l 6 -del True -prop 0.5
Using device:  cpu 



In [None]:
# (TODO) Print the data.
import pickle
import numpy as np
import matplotlib.pyplot as plt

# Load data
with open(filepath, 'rb') as file:
    test_acc = pickle.load(file)

# Extract data to plot
names = set([name for name, _ in test_acc.keys()])

# For every model, make line plot
for name in names:
    x, y = list(zip(*sorted([(tr_size, te_acc) for (_name, tr_size), te_acc  in test_acc.items() if name == _name])))
    plt.semilogx(x, y, marker='.', linestyle='-', label=name)

# Plot graphics
plt.xlabel('Training set size')
plt.ylabel('Test accuracy')
plt.title('Entry-wise normal images, p={:d}'.format(norm))
plt.legend()
plt.grid(True)

# Show the plot
plt.show()