In [None]:
import pandas as pd
import numpy as np
import json
import pickle

from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score,KFold
from sklearn.preprocessing import StandardScaler  
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,VotingClassifier,BaggingClassifier

from sklearn.metrics import classification_report

In [None]:
data = pd.read_csv("Churn_Modelling.csv")

In [None]:
X = data.iloc[:, 3:-1]
y = data.iloc[:, -1]

In [None]:
def data_encode(df):
    df = df.copy(True)
    categorical_map = {}
    for col in df.select_dtypes(include=['category','object']).columns:
        codes,uniques = df[col].factorize(sort=True)
        df[col]=codes
        categorical_map[col] = list(uniques)
    return df, categorical_map

In [None]:
X_enc, categorical_map = data_encode(X)

In [None]:
sklearn_model = LogisticRegression(random_state=1300)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X_enc, y, test_size = 0.2, random_state = 0)

In [None]:
sklearn_model.fit(x_train, y_train)

In [None]:
print(classification_report(y_val, sklearn_model.predict(x_val)))

In [None]:
class_names = ["Retained", "Exited"]
feature_names = x_val.columns.values.tolist()

# UNBOX

In [None]:
import unboxapi
from unboxapi.tasks import TaskType
from unboxapi.models import ModelType
client = unboxapi.UnboxClient("YOUR_API_KEY_HERE")

## Create function

In [None]:
def predict_proba(model, input_features: np.ndarray):
    return model.predict_proba(input_features)

In [None]:
predict_proba(sklearn_model, x_val[:10][feature_names].to_numpy())

In [None]:
x_val['churn'] = y_val.values

In [None]:
from unboxapi.tasks import TaskType

dataset = client.add_dataframe(
    df=x_val,
    class_names=class_names,
    label_column_name='churn',
    name="Churn Validation",
    description='this is my churn dataset',
    task_type=TaskType.TabularClassification,
    feature_names=feature_names,
    categorical_features_map=categorical_map,
)
dataset.to_dict()

In [None]:
model = client.add_model(
    function=predict_proba, 
    model=sklearn_model,
    model_type=ModelType.sklearn,
    task_type=TaskType.TabularClassification,
    class_names=class_names,
    name='Churn Classifier',
    description='this is my churn classification model',
    feature_names=feature_names,
    train_sample_df=x_train[:100],
    categorical_features_map=categorical_map,
)
model.to_dict()