
# Tabular Data Classifier

Train and upload different classifiers to Unbox. 

Modified from https://scikit-learn.org/stable/auto_examples/classification/plot_classification_probability.html#sphx-glr-auto-examples-classification-plot-classification-probability-py

We use a 3 class dataset, and we classify it with a Support Vector classifier, L1 and L2
penalized logistic regression with either a One-Vs-Rest or multinomial setting,
and Gaussian process classification.

Linear SVC is not a probabilistic classifier by default but it has a built-in
calibration option enabled in this example (`probability=True`).

The logistic regression with One-Vs-Rest is not a multiclass classifier out of
the box. As a result it has more trouble in separating class 2 and 3 than the
other estimators.


In [None]:
print(__doc__)

# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Modified by Rishab Ramanathan on 10/11/2021
# License: BSD 3 clause

import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data[:, 0:2]  # we only take the first two features for visualization
y = iris.target

n_features = X.shape[1]

C = 10
kernel = 1.0 * RBF([1.0, 1.0])  # for GPC

# Create different classifiers.
classifiers = {
    'L1 logistic': LogisticRegression(C=C, penalty='l1',
                                      solver='saga',
                                      multi_class='multinomial',
                                      max_iter=10000),
    'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',
                                                    solver='saga',
                                                    multi_class='multinomial',
                                                    max_iter=10000),
    'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',
                                            solver='saga',
                                            multi_class='ovr',
                                            max_iter=10000),
    'Linear SVC': SVC(kernel='linear', C=C, probability=True,
                      random_state=0),
    'GPC': GaussianProcessClassifier(kernel)
}

n_classifiers = len(classifiers)

# Test dataset
xx = np.linspace(3, 9, 100)
yy = np.linspace(1, 5, 100).T
xx, yy = np.meshgrid(xx, yy)
Xfull = np.c_[xx.ravel(), yy.ravel()]

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X, y)

    y_pred = classifier.predict(X)
    accuracy = accuracy_score(y, y_pred)
    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
    
    if name == "Linear SVC":
        yoming = classifier.predict_proba(X)

    # View probabilities:
    probas = classifier.predict_proba(Xfull)
    n_classes = np.unique(y_pred).size

In [None]:
import pandas as pd

feature_names = iris.feature_names[:2]
class_names = iris.target_names.tolist()

df = pd.DataFrame(X, columns=feature_names)
df["target"] = y

# UNBOX

In [None]:
import unboxapi
from unboxapi.tasks import TaskType
from unboxapi.models import ModelType
client = unboxapi.UnboxClient("YOUR_API_KEY_HERE")

## Create function

In [None]:
import numpy as np

def predict_proba(model, input_features: np.ndarray):
    return model.predict_proba(input_features)

In [None]:
# Comment this out and uncomment the next section to load the project
project = client.create_project(
    name="Sklearn Iris Tabular",
    description="Project for Iris Tabular data"
)

# Use this for loading the project on subsequent runs
'''
project = client.load_project(
    name="Sklearn Iris Tabular"
)
'''

dataset = project.add_dataframe(
    df=df,
    class_names=class_names,
    label_column_name='target',
    name="Iris Sklearn Train",
    description='this is my iris training demo dataset',
    task_type=TaskType.TabularClassification,
    feature_names=feature_names,
)
dataset.to_dict()

In [None]:
model = project.add_model(
    function=predict_proba, 
    model=classifiers['L1 logistic'],
    model_type=ModelType.sklearn,
    task_type=TaskType.TabularClassification,
    class_names=class_names,
    name='Iris - L1 Logistic',
    description='this is my first tabular classification model',
    feature_names=feature_names,
    train_sample_df=df,
    train_sample_label_column_name='target',
)
model.to_dict()