# SMuDGE tutorial :  multiple monoview generators. 

In this tutorial, we will learn how to use different monoview generators and configure them. 
Let us first define the vizualization functions :


In [1]:
from sklearn.datasets import make_classification, make_gaussian_quantiles
import numpy as np
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.colors import DEFAULT_PLOTLY_COLORS

from multiview_generator.multiple_sub_problems import MultiViewSubProblemsGenerator



def plot_3d(X, y):
    fig = go.Figure()
    for label in np.unique(y):
        indices = np.where(y==label)
        fig.add_trace(go.Scatter3d(x=X[indices, 0][0], 
                                   y=X[indices, 1][0],
                                   z=X[indices, 2][0],
                                  name="Class {}".format(label+1), 
                                  mode="markers", 
                                  marker=dict(
                            size=1,)))
    return fig

def plot_3d_4_views(generator, n_views=4, n_classes=3):
    fig = make_subplots(rows=2, cols=2,
                        subplot_titles= ["View {}".format(view_index)
                                         for view_index in range(n_views)],
                        specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}, ],
                                   [{'type': 'scatter3d'},
                                    {'type': 'scatter3d'}, ]])
    row = 1
    col = 1
    show_legend = True
    # Plot the data for each view and each label
    for view_index in range(n_views):
        for lab_index in range(n_classes):
            concerned_examples = np.where(generator.y == lab_index)[0]
            fig.add_trace(
                go.Scatter3d(
                    x=generator.view_data[view_index][concerned_examples, 0],
                    y=generator.view_data[view_index][concerned_examples, 1],
                    z=generator.view_data[view_index][concerned_examples, 2],
                    text=[generator.example_ids[ind] for ind in concerned_examples],
                    hoverinfo='text',
                    legendgroup="Class {}".format(lab_index),
                    mode='markers', marker=dict(size=1,
                                                color=DEFAULT_PLOTLY_COLORS[lab_index],
                                                opacity=0.8),
                    name="Class {}".format(lab_index),
                    showlegend=show_legend),
                row=row, col=col)
        show_legend = False
        col += 1
        if col == 3:
            col = 1
            row += 1
    return fig


## [`make_classification`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html) : the hyper-cube vertices

The first and simplest generator we use is `make_classification`, provided by `scikit-learn`. It generates an hypercube and sets `n_clusters_per_class` clusters of samples on its vertices for each class.  

Let's vizualize it in a 3D example : 

In [2]:
X, y = make_classification(n_samples=1000, n_features=3, 
                           n_informative=3, n_redundant=0, 
                           n_classes=4, n_clusters_per_class=2, 
                           class_sep=10, flip_y=0, random_state=42)

plot_3d(X, y).show()

It is clear that this problem can be solved with a simple decision tree. 
However, the fact that make_classification is very customizable is a huge advantgae for the generator. 
So if we change just the `hypercube` argument : 

In [3]:
X, y = make_classification(n_samples=1000, n_features=3, 
                           n_informative=3, n_redundant=0, 
                           n_classes=4, n_clusters_per_class=2, 
                           class_sep=10, flip_y=0, random_state=42,
                           hypercube=False,)

plot_3d(X, y)

Setting `hypercube=False` means that the vertices are the one of a random polytope. So, it is less controlable, but it adds some difficulty to the problem. However, we see that despite the huge `class_sep`, classes are overlapping, which can be huge drawback depending on the situation. 

## [`make_gaussian_quantiles`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles), the concentric spheres

This generator, also from `scikit-learn` generates `n_classes` concentric (possibly multi_dimentional) spheres. 

In [4]:
X, y = make_gaussian_quantiles(mean=None, cov=0.5, 
                               n_samples=1000, n_features=3, 
                               n_classes=4, shuffle=False, 
                               random_state=42)
plot_3d(X, y).show()

This problem requires a more complex algorithm than a decision tree to be solved. 

## SMuDGE usage : generator configuration

In order to use these generators in SMuDGE at their full potential, we need to se some configuration.
First, let us initialize SMuDGE with the configuration of the proevious tutorial : 

In [5]:
name = "tuto"
n_views = 4
n_classes = 3
error_matrix = [
   [0.4, 0.4, 0.4, 0.4],
   [0.55, 0.4, 0.4, 0.4],
   [0.4, 0.5, 0.52, 0.55]
]
n_samples = 1000
n_features = 3
class_weights = [0.333, 0.333, 0.333,]
random_state = np.random.RandomState(42)
complementarity = 0.3
redundancy = 0.2
mutual_error = 0.1

Then, let's configure the four monoview generators that will build our multiview dataset :

In [6]:
sub_problem_type = ["make_classification", 
                   "make_gaussian_quantiles", 
                   "make_classification", 
                   "make_gaussian_quantiles"]
sub_problem_configuration = [
    {"n_informative":n_features, 
     "n_redundant":0,
     "n_repeated":0,
    "class_sep": 10, 
    "hypercube":True},
    
    {"mean": [1.0 for _ in range(n_features)], 
    "cov":1.0}, 
    
    {"n_informative":n_features, 
     "n_redundant":0,
     "n_repeated":0,
    "class_sep": 10, 
    "hypercube":False},
    
    {"mean": None, 
    "cov":10.0}, 
    
]

Now let us generate the dataset

In [7]:
generator = MultiViewSubProblemsGenerator(name=name, n_views=n_views,
                                          n_classes=n_classes,
                                          n_samples=n_samples,
                                          n_features=n_features,
                                          class_weights=class_weights,
                                          error_matrix=error_matrix,
                                          random_state=random_state,
                                          redundancy=redundancy,
                                          complementarity=complementarity,
                                          mutual_error=mutual_error, 
                                          sub_problem_configurations=sub_problem_configuration,
                                          sub_problem_type=sub_problem_type)
view_data, y = generator.generate_multi_view_dataset()

Let us plot the dataset views : 

In [8]:
plot_3d_4_views(generator).show()