# Semisupervised Learning

In [63]:
import numpy as np
import pandas as pd
from keras import utils as ku
from sklearn.semi_supervised import LabelSpreading

### Data Import

In [55]:
def read_hdf5(filename, dataname):
    train_i = ku.HDF5Matrix(filename, '%s/axis1' % dataname)
    train_x = ku.HDF5Matrix(filename, '%s/block0_values' % dataname)
    try:
        train_y = ku.HDF5Matrix(filename, '%s/block1_values' % dataname)
    except KeyError:
        train_y = None

    train_i = np.array(train_i.data).astype('int')
    train_x = np.array(train_x.data)
    if train_y is not None:
        train_y = np.array(train_y.data).astype('int')
    else:
        train_y = np.ones((len(train_x), 1)) * -1

    return train_i, train_x, train_y

In [58]:
# Data import
labeled_i, labeled_x, labeled_y = read_hdf5('./data/train_labeled.h5', 'train')
unlabeled_i, unlabeled_x, unlabeled_y = read_hdf5('./data/train_unlabeled.h5', 'train')

# Suffling of data
...

# Split in train and validation set
...

Ellipsis

In [72]:
# Combine both data sets
X = np.concatenate((labeled_x, unlabeled_x), axis=0)
y = np.concatenate((labeled_y, unlabeled_y), axis=0)

print(y)

[[ 7.]
 [ 8.]
 [ 1.]
 ..., 
 [-1.]
 [-1.]
 [-1.]]


In [67]:
test_i, test_x, tmp = read_hdf5('./data/test.h5', 'test')

**Semi-supervised graph inference algorithms** work by constructing a similarity graph over all items in the input dataset. LabelPropagation and LabelSpreading differ in modifications to the similarity matrix that graph and the clamping effect on the label distributions. Clamping allows the algorithm to change the weight of the true ground labeled data to some degree. 

In [74]:
model = LabelSpreading(kernel='rbf', gamma=20,  
                       alpha=0.2, max_iter=30, tol=0.001, n_jobs=4)
model.fit(X, y)

  y = column_or_1d(y, warn=True)
  self.label_distributions_ /= normalizer


LabelSpreading(alpha=0.2, gamma=20, kernel='rbf', max_iter=30, n_jobs=4,
        n_neighbors=7, tol=0.001)

In [78]:
y_pred = model.predict(test_x)

In [79]:
y_pred

array([ 0.,  0.,  0., ...,  0.,  0.,  0.])