In [1]:
import pdb
from tqdm import tqdm

import numpy as np
import pandas as pd

from numpy.random import shuffle

from matplotlib import pyplot as plt
%matplotlib widget

from sklearn.decomposition import PCA

from keras.models import Sequential
from keras.layers import Dense, Activation

Using TensorFlow backend.


In [2]:
import os, sys
lib_path = os.path.abspath('../methods')
sys.path.insert(0, lib_path)

from models import Model
from data_tools import ImageDataset
import data_tools as dt

In [3]:
import sys
from PyQt5.QtWidgets import QApplication
app = QApplication(sys.argv)
screen = app.screens()[0]
dpi = screen.physicalDotsPerInch()
app.quit()

ModuleNotFoundError: No module named 'PyQt5'

In [None]:
n_train = 1350
n_test = 450

(x_train, y_train), (x_test, y_test) = dt.generate_nclasses_dataset(
    nb_samples=n_train+n_test,
    nb_classes=18,
    width_in_pixels=56,
    noise=30,
    free_location=True,
    verbose=False,
    nb_test_samples=n_test)

In [None]:
shapes_18 = ImageDataset(x_train, y_train, x_test, y_test)

In [None]:
w_in = shapes_18.tot_dim
w_1 = 100
w_2 = 100
max_epochs = 400

In [None]:
# Currently dichotomies will only be binary
shapes_parity = [list(map(lambda x: 2*x, range(9))), list(map(lambda x: 2*x + 1, range(9)))]
shapes_smallness = [range(0,9), range(9,18)]

In [None]:
shapes_18.build_dichLabels(shapes_smallness, 'smaller_than_9')
shapes_18.build_dichLabels(shapes_parity, 'parity')

shapes_18.hstack_dichs('parity', 'smaller_than_9')

In [None]:
spl_size = 75
spl_ids = np.arange(shapes_18.n_train)
shuffle(spl_ids)
spl_ids = spl_ids[:spl_size]

shapes_18.spl = {
    'x': shapes_18.train['x'][spl_ids],
    'y': shapes_18.train['y'][spl_ids]
}

## 2) Studying the effect of supervision format for representation in hidden layers

### 2.2) Product supervision

In [None]:
shapes_prod = [set(s1).intersection(set(s2)) for s2 in shapes_smallness for s1 in shapes_parity]
shapes_18.build_dichLabels(shapes_prod, 'parity_prod_smaller_than_9')

In [None]:
dnn_prod = Sequential([
    Dense(w_1, input_shape=(w_in,)),
    Activation('tanh'),
    Dense(w_2),
    Activation('tanh'),
    Dense(4),
    Activation('tanh')
])

model = Model(dnn_prod)

# Train the model, iterating on the data in batches of 32 samples
model.fit(shapes_18, dich_name='parity_prod_smaller_than_9', epochs=10, batch_size=32)

model.evaluate(shapes_18, dich_name='parity_prod_smaller_than_9', batch_size=128)

In [None]:
%matplotlib widget
model.sample_eval(shapes_18, 2)

In [None]:
%matplotlib widget
pca = PCA(n_components=2)
reduced_prod_2d, fig3 = model.get_repr(shapes_18, pca, dim=2, plotting=True)
fig3.canvas.layout.width = '80%'
fig3.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_prod_2d):
    print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]), 100*red['expl_var'][0], 100*red['expl_var'][1]))

In [None]:
pca3 = PCA(n_components=3)
reduced_prod_3d, fig4 = model.get_repr(shapes_18, pca3, dim=3, plotting=True)
fig4.canvas.layout.width = '80%'
fig4.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_prod_3d):
    print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}% + {4:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]+red['expl_var'][2]), 100*red['expl_var'][0], 100*red['expl_var'][1], 100*red['expl_var'][2]))

### 2.3) Compact hstacking supervision

In [None]:
shapes_18.compstack_dichs('parity', 'smaller_than_9')

In [None]:
dnn_compstack = Sequential([
    Dense(w_1, input_shape=(w_in,)),
    Activation('tanh'),
    Dense(w_2),
    Activation('tanh'),
    Dense(2),
    Activation('tanh')
])

compstack = Model(dnn_compstack)

# Train the model, iterating on the data in batches of 32 samples
compstack.fit(shapes_18, dich_name='parity_compstack_smaller_than_9', epochs=10, batch_size=32)

compstack.evaluate(shapes_18, dich_name='parity_compstack_smaller_than_9', batch_size=128)

In [None]:
%matplotlib widget
pca = PCA(n_components=2)
reduced_compstack_2d, fig5 = compstack.get_repr(shapes_18, pca, dim=2, plotting=True)
fig5.canvas.layout.width = '80%'
fig5.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_compstack_2d):
    print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]), 100*red['expl_var'][0], 100*red['expl_var'][1]))

In [None]:
pca3 = PCA(n_components=3)
reduced_compstack_3d, fig6 = compstack.get_repr(shapes_18, pca3, dim=3, plotting=True)
fig6.canvas.layout.width = '80%'
fig6.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_compstack_3d):
    if red['avg_lbl_repr'].shape[1] > 2:
        print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}% + {4:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]+red['expl_var'][2]), 100*red['expl_var'][0], 100*red['expl_var'][1], 100*red['expl_var'][2]))
    else:
        print('Layer {0:d} could not be projected in 3d'.format(lay_id))

## 3) Testing representation for categorical features

### 3.1) hstack

In [None]:
# Currently dichotomies will only be binary
shapes_range = [range(0,6), range(6,12), range(12,18)]

In [None]:
shapes_18.build_dichLabels(shapes_range, 'range_bins')

shapes_18.hstack_dichs('parity', 'range_bins')
shapes_18.compstack_dichs('parity', 'range_bins')

shapes_18.build_catLabels(shapes_range, 'cat_range_bins')
shapes_18.compstack_dichs('parity', 'cat_range_bins')

In [None]:
dnn_ctg_hstack = Sequential([
    Dense(w_1, input_shape=(w_in,)),
    Activation('tanh'),
    Dense(w_2),
    Activation('tanh'),
    Dense(5),
    Activation('tanh')
])

ctg_hstack = Model(dnn_ctg_hstack)

# Train the model, iterating on the data in batches of 32 samples
ctg_hstack.fit(shapes_18, dich_name='parity_hstack_range_bins', epochs=20, batch_size=32)

ctg_hstack.evaluate(shapes_18, dich_name='parity_hstack_range_bins', batch_size=128)

In [None]:
pca = PCA(n_components=2)
reduced_ctg_hstack_2d, fig7 = ctg_hstack.get_repr(shapes_18, pca, plotting=True)
fig7.canvas.layout.width = '80%'
fig7.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_ctg_hstack_2d):
    print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]), 100*red['expl_var'][0], 100*red['expl_var'][1]))

In [None]:
pca3 = PCA(n_components=3)
reduced_ctg_hstack_3d, fig8 = ctg_hstack.get_repr(shapes_18, pca3, dim=3, plotting=True)
fig8.canvas.layout.width = '80%'
fig8.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_ctg_hstack_3d):
    if red['avg_lbl_repr'].shape[1] > 2:
        print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}% + {4:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]+red['expl_var'][2]), 100*red['expl_var'][0], 100*red['expl_var'][1], 100*red['expl_var'][2]))
    else:
        print('Layer {0:d} could not be projected in 3d'.format(lay_id))

### 3.2) Product

In [None]:
shapes_ctg_prod = [set(s1).intersection(set(s2)) for s2 in shapes_range for s1 in shapes_parity]
shapes_18.build_dichLabels(shapes_ctg_prod, 'parity_prod_range_bins')

In [None]:
w_in =shapes_18.tot_dim
w_1 = 100
w_2 = 100
max_epochs = 400

In [None]:
dnn_ctg_prod = Sequential([
    Dense(w_1, input_shape=(w_in,)),
    Activation('tanh'),
    Dense(w_2),
    Activation('tanh'),
    Dense(6),
    Activation('tanh')
])

ctg_prod = Model(dnn_ctg_prod)

# Train the model, iterating on the data in batches of 32 samples
ctg_prod.fit(shapes_18, dich_name='parity_prod_range_bins', epochs=10, batch_size=32)

ctg_prod.evaluate(shapes_18, dich_name='parity_prod_range_bins', batch_size=128)

In [None]:
pca = PCA(n_components=2)
reduced_ctg_prod_2d, fig9 = ctg_prod.get_repr(shapes_18, pca, plotting=True)
fig9.canvas.layout.width = '80%'
fig9.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_ctg_prod_2d):
    print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]), 100*red['expl_var'][0], 100*red['expl_var'][1]))

In [None]:
pca3 = PCA(n_components=3)
reduced_ctg_prod_3d, fig10 = ctg_prod.get_repr(shapes_18, pca3, dim=3, plotting=True)
fig10.canvas.layout.width = '80%'
fig10.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_ctg_prod_3d):
    if red['avg_lbl_repr'].shape[1] > 2:
        print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}% + {4:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]+red['expl_var'][2]), 100*red['expl_var'][0], 100*red['expl_var'][1], 100*red['expl_var'][2]))
    else:
        print('Layer {0:d} could not be projected in 3d'.format(lay_id))

### 3.3) compstack

In [None]:
w_in =shapes_18.tot_dim
w_1 = 100
w_2 = 100
max_epochs = 400

In [None]:
dnn_ctg_compstack = Sequential([
    Dense(w_1, input_shape=(w_in,)),
    Activation('tanh'),
    Dense(w_2),
    Activation('tanh'),
    Dense(2),
    Activation('tanh')
])

ctg_compstack = Model(dnn_ctg_compstack)

# Train the model, iterating on the data in batches of 32 samples
ctg_compstack.fit(shapes_18, dich_name='parity_compstack_cat_range_bins', epochs=10, batch_size=32)

ctg_compstack.evaluate(shapes_18, dich_name='parity_compstack_cat_range_bins', batch_size=128)

In [None]:
%matplotlib widget
pca = PCA(n_components=2)
reduced_ctg_compstack_2d, fig11 = ctg_compstack.get_repr(shapes_18, pca, plotting=True)
fig11.canvas.layout.width = '80%'
fig11.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_ctg_compstack_2d):
    print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]), 100*red['expl_var'][0], 100*red['expl_var'][1]))

In [None]:
pca3 = PCA(n_components=3)
reduced_ctg_compstack_3d, fig12 = ctg_compstack.get_repr(shapes_18, pca3, dim=3, plotting=True)
fig12.canvas.layout.width = '80%'
fig12.canvas.layout.height = '5000px'

In [None]:
for lay_id, red in enumerate(reduced_ctg_compstack_3d):
    if red['avg_lbl_repr'].shape[1] > 2:
        print('Layer {0:d} - {1:.1f}% 2d var - {2:.1f}% + {3:.1f}% + {4:.1f}%'.format(lay_id, 100*(red['expl_var'][0]+red['expl_var'][1]+red['expl_var'][2]), 100*red['expl_var'][0], 100*red['expl_var'][1], 100*red['expl_var'][2]))
    else:
        print('Layer {0:d} could not be projected in 3d'.format(lay_id))