# IMPORT

In [None]:
import tempfile
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline

# LOAD DATA, TRAIN MODEL

In [None]:
df = pd.read_csv("./training.csv")
df = df.sample(frac=1)

In [None]:
df["category"] = df.category.astype('category')
df["label_code"] = df.category.cat.codes

In [None]:
label_dict = dict(zip(df.category.cat.codes, df.category))

label_list = [None] * len(label_dict)
for index, label in label_dict.items():
    label_list[index] = label

In [None]:
df_train = df[:7000]
df_test = df[7000:]

In [None]:
df_train

In [None]:
model = Pipeline([
                         ('count_vect', CountVectorizer(ngram_range=(1,2),
                                                        stop_words='english')), 
                         ('lr', LogisticRegression())])
model.fit(df_train.text, df_train.label_code)

In [None]:
x_test, y_test = df_test.text, df_test.label_code
print(classification_report(y_test, model.predict(x_test)))

In [None]:
model.predict(['good', 'bad'])

# UNBOX

In [None]:
import unboxapi
from unboxapi.models import ModelType
from unboxapi.tasks import TaskType

client = unboxapi.UnboxClient("YOUR_API_KEY_HERE")

## Create function

In [None]:
def predict_function(model, text_list):
    return model.predict_proba(text_list)

In [None]:
texts = ['some new text, sweet noodles', 'happy time', 'sad day']

predict_function(model, texts)

# Package (function, model) & Upload to Unbox Server

In [None]:
unbox_model = client.add_model(
    function=predict_function, 
    model=model,
    model_type=ModelType.sklearn,
    task_type=TaskType.TextClassification,
    class_names=label_list,
    name='Banking Model',
    description='this is my sklearn banking model'
)
unbox_model.to_dict()

In [None]:
dataset = client.add_dataframe(
    df=df_test,
    class_names=label_list,
    label_column_name="label_code",
    text_column_name="text",
    task_type=TaskType.TextClassification,
    name="Banking Test Dataset",
    description="my banking validation dataset"
)
dataset.to_dict()