In [1]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn import datasets
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv("fetal_health.csv")

In [3]:
df['fetal_health'] = df.fetal_health.astype(int)
df['fetal_health'] = df['fetal_health'].map({3: 0, 1:1, 2:2})

In [4]:
df.fetal_health.unique()

array([2, 1, 0])

In [5]:
train, test = train_test_split(df, test_size=0.2)

In [6]:
x_train = train.loc[:, train.columns != 'fetal_health'].to_numpy()
y_train = train['fetal_health'].to_numpy()
x_test = test.loc[:, test.columns != 'fetal_health'].to_numpy()
y_test = test['fetal_health'].to_numpy()

In [7]:
clf = LogisticRegression(C=10, penalty='l1',
                                      solver='saga',
                                      multi_class='multinomial',
                                      max_iter=10000)

In [8]:
clf.fit(x_train, y_train)

LogisticRegression(C=10, max_iter=10000, multi_class='multinomial',
                   penalty='l1', solver='saga')

In [9]:
print(classification_report(y_test, clf.predict(x_test)))

              precision    recall  f1-score   support

           0       0.78      0.84      0.81        38
           1       0.90      0.96      0.93       321
           2       0.71      0.43      0.54        67

    accuracy                           0.87       426
   macro avg       0.80      0.75      0.76       426
weighted avg       0.86      0.87      0.86       426



# UNBOX

In [10]:
import unboxapi
from unboxapi.tasks import TaskType
from unboxapi.models import ModelType
client = unboxapi.UnboxClient("22791537-9377-4328-bafb-e7b013c32e0c")

## Create function

In [11]:
import numpy as np

def predict_proba(model, input_features: np.ndarray):
    return model.predict_proba(input_features)

In [12]:
class_names = ["Pathological", "Normal", "Suspect"]

In [13]:
feature_names = test.loc[:, test.columns != 'fetal_health'].columns.values.tolist()

In [14]:
from unboxapi.tasks import TaskType

dataset = client.add_dataframe(
    df=test,
    class_names=class_names,
    label_column_name='fetal_health',
    name="Fetal Health Sklearn Train - N3",
    description='this is my fetal health training demo dataset',
    task_type=TaskType.TabularClassification,
    feature_names=feature_names,
)
dataset.to_dict()

{'_links': {'models': '/api/datasets/22/models',
  'rows': '/api/datasets/22/rows',
  'self': '/api/datasets/22',
  'tags': '/api/datasets/22/tags'},
 'classNameCounts': None,
 'classNames': ['Pathological', 'Normal', 'Suspect'],
 'columnNames': None,
 'dateCreated': '2021-10-25T22:18:47.736745Z',
 'description': 'this is my fetal health training demo dataset',
 'featureNames': ['baseline value',
  'accelerations',
  'fetal_movement',
  'uterine_contractions',
  'light_decelerations',
  'severe_decelerations',
  'prolongued_decelerations',
  'abnormal_short_term_variability',
  'mean_value_of_short_term_variability',
  'percentage_of_time_with_abnormal_long_term_variability',
  'mean_value_of_long_term_variability',
  'histogram_width',
  'histogram_min',
  'histogram_max',
  'histogram_number_of_peaks',
  'histogram_number_of_zeroes',
  'histogram_mode',
  'histogram_mean',
  'histogram_median',
  'histogram_variance',
  'histogram_tendency'],
 'id': 22,
 'inProgressNotifications': ['

In [18]:
model = client.add_model(
    function=predict_proba, 
    model=clf,
    model_type=ModelType.sklearn,
    task_type=TaskType.TabularClassification,
    class_names=class_names,
    name='Fetal Classifier - N3',
    description='this is my second tabular classification model',
    feature_names=feature_names,
    preprocessed_train_sample_df=train[:100],
)
model.to_dict()

Bundling model and artifacts...
Uploading model to Unbox...


{'_links': {'datasets': '/api/models/9e716108-801d-4fea-80ea-55e0b92981f7/datasets',
  'inferenceRuns': '/api/models/9e716108-801d-4fea-80ea-55e0b92981f7/inference-runs',
  'runReports': '/api/models/9e716108-801d-4fea-80ea-55e0b92981f7/run-reports',
  'self': '/api/models/9e716108-801d-4fea-80ea-55e0b92981f7'},
 'classNames': ['Pathological', 'Normal', 'Suspect'],
 'datasetCount': 0,
 'dateCreated': '2021-10-25T22:19:56.333728Z',
 'description': 'this is my second tabular classification model',
 'featureNames': ['baseline value',
  'accelerations',
  'fetal_movement',
  'uterine_contractions',
  'light_decelerations',
  'severe_decelerations',
  'prolongued_decelerations',
  'abnormal_short_term_variability',
  'mean_value_of_short_term_variability',
  'percentage_of_time_with_abnormal_long_term_variability',
  'mean_value_of_long_term_variability',
  'histogram_width',
  'histogram_min',
  'histogram_max',
  'histogram_number_of_peaks',
  'histogram_number_of_zeroes',
  'histogram_mo