In [1]:
import numpy as np
import numbers
from copy import deepcopy
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits, load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold

In [26]:
class DemocraticCo:  

    def __init__(self, base_cls, random_state=None):
        """
        Constructor. Creates the co-training instance.
        
        Parameters
        ----------
        base_cls:
            Classifiers
        random_state:
            Random object or seed
        """
        
        self.n = len(base_cls)
        self.classes = []
        self.rd = self.check_random_state(random_state)
        self.classifiers = {i: base_cls[i] for i in range(self.n)}


    def fit(self, L, y, U):
        """
        Trains the Democratic-Co.

        Parameters
        ----------
        L: np.array
            Labeled data used for training
        y: np.array
            Labeled data tags used for training
        U: np.array
            Unlabeled data used for training
        """

        self.classes = np.unique(y)

        e = [0.0 for i in range(self.n)]
        q = [0.0 for i in range(self.n)]

        previous_e = [0.0 for i in range(self.n)]
        previous_q = [0.0 for i in range(self.n)]

        cls_changes = np.array([False for i in range(self.n)])
        cls_pseudo_updates = [([], []) for i in range(self.n)]

        changes = True

        while changes:

            hyps = deepcopy(self.classifiers)

            for i in range(self.n):

                if len(cls_pseudo_updates[i][0]) > 0:
                    X_train = np.concatenate((L, cls_pseudo_updates[i][0]))
                    y_train = np.concatenate((y, cls_pseudo_updates[i][1]))
                else:
                    X_train = L
                    y_train = y
                
                hyps[i] = hyps[i].fit(X_train, y_train)

            matrix = [[set() for j in range(len(self.classes))] for x in range(len(U))]

            for x in range(len(U)):
                for i in range(self.n):
                    j = hyps[i].predict([U[x]])[0]
                    c = matrix[x][j]
                    c.add(i)
                    matrix[x][j] = c


            # Chose which exs to propose for labeling

            for i in range(self.n):
                wi = 0.6
                cls_pseudo_updates[i] = ([], [])

            changes = False



    def check_random_state(self, seed=None):
        """
        Turn seed into a np.random.RandomState instance.
        Source: SkLearn

        Parameters
        ----------
        seed : None, int or instance of RandomState
            If None, return the RandomState singleton.
            If int, return a new RandomState seeded with seed.
            If RandomState instance, return it.

        Returns
        -------
        numpy.random.RandomState
            The random state object based on seed parameter.
        """

        if seed is None or seed is np.random:
            return np.random.mtrand._rand

        if isinstance(seed, numbers.Integral):
            return np.random.RandomState(seed)

        if isinstance(seed, np.random.RandomState):
            return seed

In [24]:
h_0 = DecisionTreeClassifier()
h_1 = GaussianNB()
h_2 = KNeighborsClassifier()

dataset = load_wine()

X = np.array(dataset.data)
y = np.array(dataset.target)

rd = np.random.RandomState(5)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=rd)

for train_index, test_index in skf.split(X, y):

        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        L_train, U_train, Ly_train, Uy_train = train_test_split(X_train, y_train, test_size=0.8, random_state=rd, stratify=y_train)

In [27]:
democratic_co = DemocraticCo([h_0, h_1, h_2], random_state=5)
democratic_co.fit(L_train, Ly_train, U_train)