In [1]:
import os

import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
import dionysus as dion
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

from pt_activation.models.cifar_alexnet import AlexNet

%load_ext autoreload
%autoreload 2

In [2]:
def create_filtrations(model, batch_size, up_to):
    device = torch.device("cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True}
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    testset = datasets.CIFAR10(root='../data', train=False,
                                           download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False, num_workers=2)

    model.eval()
    test_loss = 0
    correct = 0
    t = 0
    res_df = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output, hiddens = model(data, hiddens=True)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            for s in range(data.shape[0]):
                # check if this makes sense
                num_hids = len(hiddens)
                this_hiddens = [hiddens[i][s] for i in range(num_hids)]
                print('Filtration: {}'.format(s+t))
                f = model.compute_dynamic_filtration(data[s,0], this_hiddens, percentile=50)
                #
                row = {'filtration':f, 'loss':output.cpu().numpy()[s][0], 'class':target.cpu().numpy()[s], 'prediction':pred.cpu().numpy()[s][0]}
                res_df.append(row)

            t += batch_size
            if t >= up_to:
                break

    return pd.DataFrame(res_df)

In [3]:
model_location = '/home/tgebhart/projects/pt_activation/logdir/models/cifar_alexnet.pt'
model = AlexNet()
model.load_state_dict(torch.load(model_location))

In [4]:
res_df = create_filtrations(model, 10, 10)

Files already downloaded and verified
Filtration: 0


AttributeError: 'MaxPool2d' object has no attribute 'weight'

In [None]:
sample_graphs = []
for s in range(res_df.shape[0]):
    print(s)
    subgraphs = {}
    f = res_df['filtration'].iloc[s]
    m = dion.homology_persistence(f)
    dgms = dion.init_diagrams(m,f)
    for i,c in enumerate(m):
        if len(c) == 2:
            if f[c[0].index][0] in subgraphs:
                subgraphs[f[c[0].index][0]].add_edge(f[c[0].index][0],f[c[1].index][0],weight=f[i].data)
            else:
                eaten = False
                for k, v in subgraphs.items():
                    if v.has_node(f[c[0].index][0]):
                        v.add_edge(f[c[0].index][0], f[c[1].index][0], weight=f[i].data)
                        eaten = True
                        break
                if not eaten:
                    g = nx.Graph()
                    g.add_edge(f[c[0].index][0], f[c[1].index][0], weight=f[i].data)
                    subgraphs[f[c[0].index][0]] = g
                    
    sample_graphs.append(subgraphs)

In [None]:
res_df.head()

In [None]:
res_df[res_df['prediction'] != res_df['class']]

In [None]:
# options = {
#     'node_color': 'red',
#     'node_size': 2,
#     'width': 3,
#     'with_labels':True}
# nx.draw_random(subgraphs[243], **options)

In [None]:
ids = model.layerwise_ids()
ids

In [None]:
goi = 445

In [None]:
print(sample_graphs[goi].keys())

In [None]:
options = {
    'node_color': 'red',
    'node_size': 2,
    'width': 3,
    'with_labels':True}
nx.draw_spring(sample_graphs[goi][3709], **options)

In [None]:
gois = [sample_graphs[goi][k] for k in sample_graphs[goi].keys()]

In [None]:
all_goi = nx.compose_all(gois)

In [None]:
options = {
    'node_color': 'red',
    'node_size': 2,
    'width': 2,
    'with_labels':True}
nx.draw_spring(all_goi, **options)

In [None]:
from node2vec import Node2Vec

In [None]:
take = 10
embedding_info = []
for i in range(len(sample_graphs)):
    print('Sample: {}/{}'.format(i,len(sample_graphs)))
    subs = []
    for k in list(sample_graphs[i].keys())[:take]:
        node2vec = Node2Vec(sample_graphs[i][k], dimensions=64, walk_length=5, num_walks=5, workers=4, quiet=True)
        model = node2vec.fit(window=10, min_count=1, batch_words=4)
        model.wv.save_word2vec_format('../data/node2vec_temp')
        t = pd.read_csv('../data/node2vec_temp', delimiter=' ', names=['node']+list(range(64))).drop(0, axis=0)
        srt = sorted(sample_graphs[i][k].edges(data=True),key= lambda x: x[2]['weight'],reverse=True)
        life = srt[0][2]['weight'] - srt[-1][2]['weight']
        subs.append({'sample':i, 'generator':k, 'lifetime':life,'embedding':t})
    embedding_info.append(subs)

In [None]:
embedding_info[0][5]['embedding'][list(range(64))].values.mean(axis=0)

In [None]:
embedding_info[0][5]

In [None]:
dim = 64
avg_embds = np.zeros(shape=(len(embedding_info),dim))
for i in range(len(embedding_info)):
    print('{}/{}'.format(i, len(embedding_info)))
    gs = embedding_info[i]
    ws = np.zeros(len(gs))
    embds = np.zeros((len(gs),dim))
    for j in range(len(gs)):
        ws[j] = gs[j]['lifetime']
        embds[j,:] = gs[j]['embedding'][list(range(dim))].values.mean(axis=0)
    avg_embds[i,:] = np.dot(ws,embds)/np.sum(ws)

In [None]:
from sklearn.cluster import KMeans
centers = KMeans(n_clusters=10, random_state=0).fit_predict(avg_embds)

In [None]:
res_df.append

In [None]:
cluster_df = res_df.join(pd.DataFrame(centers))
cluster_df.head()

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
X = avg_embds
y = res_df['class'].values

X_train = avg_embds[:-100,:]
y_train = res_df.iloc[:-100]['class'].values
X_test = avg_embds[-100:,:]
y_test = res_df.iloc[-100:]['class'].values

clf = LogisticRegression(solver='lbfgs', multi_class='multinomial')


In [None]:
cross_val_score(clf, X, y, cv=10)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA

In [None]:
clf = DecisionTreeClassifier(random_state=0)
cross_val_score(clf, X, y, cv=10)

In [None]:
h = 0.2
reduced_data = PCA(n_components=2).fit_transform(X)
kmeans = KMeans(init='k-means++', n_clusters=10, n_init=10)
kmeans.fit(reduced_data)
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])

In [None]:
Z = Z.reshape(xx.shape)
plt.figure(1)
plt.clf()
plt.imshow(Z, interpolation='nearest',
           extent=(xx.min(), xx.max(), yy.min(), yy.max()),
           cmap=plt.cm.Paired,
           aspect='auto', origin='lower')

plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
# Plot the centroids as a white X
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1],
            marker='x', s=169, linewidths=3,
            color='w', zorder=10)
plt.title('K-means clustering on the digits dataset (PCA-reduced data)\n'
          'Centroids are marked with white cross')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()

In [None]:
take = -1
edges = set()
for i in range(len(sample_graphs)):
    for k in list(sample_graphs[i].keys())[:take]:
        for x in sample_graphs[i][k].edges(data=True):
            edge_name = str(x[0])+'-'+str(x[1])
            edges.add(edge_name)

In [None]:
edf = pd.DataFrame(np.zeros((len(sample_graphs),len(edges))), columns=list(edges))
for i in range(len(sample_graphs)):
    print('Sample: {}/{}'.format(i,len(sample_graphs)))
    for k in list(sample_graphs[i].keys())[:take]:
        for x in sample_graphs[i][k].edges(data=True):
            edge_name = str(x[0])+'-'+str(x[1])
            edf.iloc[i][edge_name] = x[2]['weight']

In [None]:
edf.head()

In [None]:
X = edf.values
y = res_df['class'].values

In [None]:
from sklearn import svm

In [None]:
clf = svm.SVC(gamma='scale', decision_function_shape='ovo')
cross_val_score(clf, X, y, cv=10)

In [None]:
y_incorrect = res_df[res_df['class'] != res_df['prediction']]
y_correct = res_df[res_df['class'] == res_df['prediction']]
y_incorrect

In [None]:
X_correct = edf.iloc[y_correct.index]
X_incorrect = edf.iloc[y_incorrect.index]

In [None]:
clf.fit(X_correct.values, y_correct['class'].values) 

In [None]:
inc_preds = clf.predict(X_incorrect.values)

In [None]:
inc_preds

In [None]:
clf.support_vectors_.shape