### Testing for Co-regression with co-training

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor

In [2]:
n_neighbors = 3
estimator1 = KNeighborsRegressor(n_neighbors=n_neighbors)
estimator2 = KNeighborsRegressor(n_neighbors=n_neighbors)

In [None]:
class CTRegressor():
    def __init__(
                 self,
                 estimator1=None,
                 estimator2=None,
                 k_neighbors = 3,
                 random_state=0
                 ):

        # initialize a BaseCTEstimator object
        super().__init__(estimator1, estimator2, random_state)

        # if not given, set classifiers as gaussian naive bayes estimators
        if self.estimator1 is None:
            self.estimator1 = KNeighborsRegressor()
        if self.estimator2 is None:
            self.estimator2 = KNeighborsRegressor()

        self.n_views_ = 2  # only 2 view learning supported currently

        self.class_name = "CTRegressor"
        
        self.k_neighbors_ = 3

    # requires Labeled sets for each view, U' shared by both views
    def fit(
            self,
            Xs,
            y,
            p=1,
            n=1,
            unlabeled_pool_size=50,
            num_iter=50
            ):
        # split data
        # fit each estimator to Labeled set
        # for num_iter:
            # for each view:
                # for each sample in U' for that view:
                    # regress (y_hat = h(x))
                    # find the k nearest examples in L to x (a set called Omega)
                    # fit a new kNN to the labeled set with the addition of this sample
                    # compute and store deltaMSE (MSE of the old regressor on each sample in Omega, minus MSE of new regressor on each sample in Omega)
                # if there exists deltaMSE > 0
                # then pick this example (and its regression) to add to the labeled set of other view, remove from U'
            # if didn't add a new sample to either set, then exit

        # final regressor = .5*h1(x) + h2(x)

        y = np.array(y)
        
        self.p_, self.n_ = p, n
        
        self.unlabeled_pool_size_ = unlabeled_pool_size
        self.num_iter_ = num_iter

        # extract the multiple views given
        X1 = Xs[0]
        X2 = Xs[1]

        # the full set of unlabeled samples
        U = [i for i, y_i in enumerate(y) if np.isnan(y_i)]
        
        # shuffle unlabeled_pool data for easy random access
        np.random.shuffle(U)

        # the small pool of unlabled samples to draw from in training
        unlabeled_pool = U[-min(len(U), self.unlabeled_pool_size_):]

        # the labeled samples
        L = [i for i, y_i in enumerate(y) if ~np.isnan(y_i)]

        # remove the pool from overall unlabeled data
        U = U[:-len(unlabeled_pool)]

        it = 0
        
        while it < self.num_iter_ and U:
            it += 1

            # fit each model to its respective view
            self.estimator1.fit(X1[L], y[L])
            self.estimator2.fit(X2[L], y[L])
            
            y_hat1 = self.estimator1.predict(X1[unlabeled_pool])
            y_hat2 = self.estimator2.predict(X2[unlabeled_pool])
            
            neighbors1 = self.estimator1.kneighbors(X1[unlabeled_pool], n_neighbors=self.k_neighbors_)
            neighbors2 = self.estimator1.kneighbors(X2[unlabeled_pool], n_neighbors=self.k_neighbors_)
            
            # find sample in view 1 which lowers the MSE the most
            for sample, u in enumerate(unlabeled_pool):
                new_L = L.copy().append(u)
                new_y = y[L].copy().append(y_hat1[sample])
                new_estimator = KNeighborsRegressor(n_neighbors=self.k_neighbors)
                new_estimator.fit(X1[new_L], new_y)
                delta_MSE = self.estimate_delta_MSE_(self.estimator1, new_estimator, X1[L], y[L])
                if sample == 0:
                    max_delta_MSE = delta_MSE
                    best_sample = sample
                else:
                    if delta_MSE > max_delta_MSE:
                        max_delta_MSE = delta_MSE
                        best_sample = sample
                
            # placeholder function to estimate MSE on labeled data
            curr_MSE1 = self.estimate_MSE_(X1[L], y[L])
            curr_MSE2 = self.estimate_MSE_(X2{L}, y[L])
            
            
        
        
    