In [None]:
% load_ext autoreload
% autoreload 2
% matplotlib inline

In [None]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import sys
import os

In [None]:
# add gcnn module
sys.path.append('..')

# bigger figure
plt.rcParams['figure.figsize'] = 18, 5

In [None]:
# setting seed for reproducability
np.random.seed(0)
torch.manual_seed(0);

In [None]:
# scope to specfic gpu
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
from gcnn.datasets import load_icebergs

# train = all labelled cases from Kaggle
measures = load_icebergs('train')
measures.head(4)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train, test = train_test_split(range(len(measures)), test_size=0.15, stratify=measures.is_iceberg, random_state=0)

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
band_scaler = MinMaxScaler()
angle_scaler = MinMaxScaler()

In [None]:
for _, e in measures.iloc[train].iterrows():
    band_scaler.partial_fit(e.band_1.reshape(1, -1))
    band_scaler.partial_fit(e.band_2.reshape(1, -1))
    
angle_scaler.fit(measures.iloc[train].inc_angle.dropna().values.reshape(-1, 1));

Usable data contains the two bands, the angle and the target (label).

In [None]:
angles = angle_scaler.transform(measures.inc_angle.fillna(0).values.reshape(-1, 1))

In [None]:
targets = measures.is_iceberg.values.reshape(-1, 1)

In [None]:
from gcnn import graph

In [None]:
# commented as slow [nx.average_node_connectivity(g) for g in small_graphs]

In [None]:
from gcnn.coarsening import graclus

In [None]:
scores = pd.DataFrame([], columns=['name', 'accuracy', 'precision', 'recall', 'f1']).set_index('name')
scores

In [None]:
bands = np.stack([
    band_scaler.transform(np.stack(measures.band_1)),
    band_scaler.transform(np.stack(measures.band_2)),
], axis=1).reshape(-1, 2, 75, 75)

In [None]:
flat_features = np.c_[bands.reshape(-1, 2 * 75 * 75), angles.reshape(-1, 1)]
flat_features.shape

In [None]:
from gcnn.utils import score_classification

In [None]:
cuda = torch.cuda.is_available()
cuda

In [None]:
import torch.utils.data as data_utils

Prepare pytorch tensors.

In [None]:
from gcnn.nets import BaselineCNNSimple
from gcnn.utils import NNplusplus
from skorch import NeuralNet

Skorch provides a sklearn interface over pytorch models.

In [None]:
cnn = NNplusplus(
    BaselineCNNSimple,
    use_cuda=cuda,
    optimizer=torch.optim.Adam,
    criterion=nn.BCEWithLogitsLoss, 
    batch_size=50, # 25 # 50
    max_epochs=30, 
    lr=0.001, # 
)

In [None]:
features = bands[train].astype(np.float32)
labels = targets[train].astype(np.float32)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_dist_cnn = {
    'batch_size': [25,50],
    'lr':[0.001,0.0001,0.00001],
}

In [None]:
random_search_cnn = GridSearchCV(cnn,param_dist_cnn)

In [None]:
random_search_cnn.fit(features, labels)

In [None]:
random_search_cnn.best

In [None]:
import scipy.sparse
from scipy.sparse import csgraph
from gcnn.nets import GraphCNNSimple
from gcnn.utils import NNplusplus

In [None]:
grid = graph.knn(graph.grid_coordinates(75), k=8, metric='cityblock')
#grid = nx.adjacency_matrix(graph, sorted(graph.nodes))

In [None]:
def coarsen_permute(graph, bands):
    
    # node should be ordered in a way corresponding to bands data order
    grid = nx.adjacency_matrix(graph, sorted(graph.nodes))
    
    # coarsen grid
    grid = sp.sparse.csr.csr_matrix(grid)
    laps, perms = graclus.coarsen(grid, levels=3, self_connections=False)
    print()
    
    # compute normed laplacian
    laps = [csgraph.laplacian(g, normed=True) for g in laps[:-1]]
    
    # update data accordingly
    pbands = graclus.perm_data(bands, perms).astype(np.float32)
    
    return laps, pbands

In [None]:
laps, pbands = coarsen_permute(grid, bands[:, 0, :, :].reshape(-1, 75 * 75))

In [None]:
param_dist_cnn = {
    'batch_size': [25,50],
    'lr':[0.001,0.0001,0.00001],
}
n_iter_search_cnn = 6

In [None]:
l0 = torch.from_numpy(laps[0].todense()).float()
l2 = torch.from_numpy(laps[2].todense()).float()

if cuda:
    l0 = l0.cuda()
    l2 = l2.cuda()

net = NNplusplus(
    GraphCNNSimple,
    module__k=25,
    module__lf0=l0,
    module__lf2=l2,
    use_cuda=cuda,
    optimizer=torch.optim.Adam,
    criterion=nn.BCEWithLogitsLoss, 
    batch_size=50,
    max_epochs=15,
    lr=0.001,
)

In [None]:
features = pbands[train]
labels = targets[train].astype(np.float32)

In [None]:
random_search_net = GridSearchCV(net,param_dist_cnn)

In [None]:
features.shape

In [None]:
random_search_net.fit(features,labels)

In [None]:
random_search_net.best_params_

In [None]:
random_search_net.best_score_

In [None]:
random_search_cnn.best_score_

In [None]:
random_search_net.best_score_