In [48]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.mixture import GaussianMixture


In [49]:
d = np.load("drors_for_all.npy")
X = d[0:-1:10,:4]
Y = d[0:-1:10,4]

In [50]:


class PipelineGrid:

    def __init__(self, dim_reducer, param_grid, classifier):
        self.dim_reducer = dim_reducer
        self.classifier = classifier
        self.param_grid = param_grid
        self.pipeline = Pipeline(steps=[("reducer", dim_reducer), ("classifier", classifier)])
        self.grid_search = GridSearchCV(self.pipeline, param_grid=param_grid)

    def fit(self, X): self.grid_search.fit(X)

    def get_best_estimator(self, X):
        self.fit(X)
        return self.grid_search.best_estimator_

    def get_all_estimators(self): return self.grid_search.cv_results_["params"]

    def fit_subpipes(self, X):
        self.fit(X)
        pipes = []
        perms = self.get_all_estimators()
        for perm in perms:
            subpipe = Pipeline(steps=[("reducer", self.dim_reducer), ("classifier", self.classifier)])
            for k, v in perm.items():
                subpipe.set_params(**{k: v})
            preds = subpipe.fit_predict(X)
            perm["preds"] = preds
            pipes.append(perm)

        return pipes

    def get_populations(self, X, Y):
        fitted_subpipes = self.fit_subpipes(X)
        for fit in fitted_subpipes:
            populations = {}
            for system in set(Y):
                system_indices = np.where(Y == system)
                fit_preds = fit["preds"]
                system_preds = fit_preds[system_indices]
                populations[system] = [system_preds[system_preds == i].shape[0] / system_preds.shape[0] for i in set(system_preds)]
            fit["populations"] = populations
        return fitted_subpipes



In [51]:
dr = PCA()
c = GaussianMixture()
params = {"reducer__n_components": [1,2,3], "classifier__n_components": [2,3,4,5]}

pipe = PipelineGrid(dim_reducer=dr, param_grid=params, classifier=c)

print(pipe.get_populations(X, Y))

[{'classifier__n_components': 2, 'reducer__n_components': 1, 'preds': array([0, 0, 0, ..., 0, 0, 0]), 'populations': {1.0: [1.0], 2.0: [0.1217564870259481, 0.8782435129740519], 3.0: [0.982, 0.018], 4.0: [0.91, 0.09], 5.0: [0.904, 0.096], 6.0: [0.956, 0.044], 7.0: [0.845691382765531, 0.15430861723446893], 8.0: [1.0]}}, {'classifier__n_components': 2, 'reducer__n_components': 2, 'preds': array([1, 1, 1, ..., 1, 1, 1]), 'populations': {1.0: [0.002, 0.998], 2.0: [0.8862275449101796, 0.11377245508982035], 3.0: [0.026, 0.974], 4.0: [0.084, 0.916], 5.0: [0.104, 0.896], 6.0: [0.044, 0.956], 7.0: [0.1623246492985972, 0.8376753507014028], 8.0: [1.0]}}, {'classifier__n_components': 2, 'reducer__n_components': 3, 'preds': array([1, 1, 1, ..., 1, 1, 1]), 'populations': {1.0: [1.0], 2.0: [0.8802395209580839, 0.11976047904191617], 3.0: [0.032, 0.968], 4.0: [0.084, 0.916], 5.0: [0.104, 0.896], 6.0: [0.046, 0.954], 7.0: [0.1623246492985972, 0.8376753507014028], 8.0: [1.0]}}, {'classifier__n_components'