Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 82 additions & 47 deletions examples/sentiment-analysis/sentiment-unbox.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,24 @@
"outputs": [],
"source": [
"columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']\n",
"dftrain = pd.read_csv('./data/training.1600000.processed.noemoticon.csv',\n",
" header = None,\n",
" encoding ='ISO-8859-1')\n",
"dftest = pd.read_csv('./data/testdata.manual.2009.06.14.csv',\n",
" header = None,\n",
" encoding ='ISO-8859-1')\n",
"dftrain.columns = columns\n",
"dftest.columns = columns"
"df_train_file_path = './data/training.1600000.processed.noemoticon.csv'\n",
"df_train_name = 'training.1600000.processed.noemoticon'\n",
"df_train = pd.read_csv(df_train_file_path,\n",
" header=None,\n",
" encoding='ISO-8859-1')\n",
"\n",
"df_test_file_path = './data/testdata.manual.2009.06.14.csv'\n",
"df_test_name = 'testdata.manual.2009.06.14'\n",
"df_test = pd.read_csv(df_test_file_path,\n",
" header=None,\n",
" encoding='ISO-8859-1')\n",
"df_train.columns = columns\n",
"df_test.columns = columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "multiple-disability",
"metadata": {},
"outputs": [
Expand All @@ -106,23 +111,23 @@
" ('lr', LogisticRegression())])"
]
},
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sentiment_lr = Pipeline([\n",
" ('count_vect', CountVectorizer(min_df = 100,\n",
" ngram_range = (1,2),\n",
" stop_words = 'english')), \n",
" ('count_vect', CountVectorizer(min_df=100,\n",
" ngram_range=(1,2),\n",
" stop_words='english')), \n",
" ('lr', LogisticRegression())])\n",
"sentiment_lr.fit(dftrain.text, dftrain.polarity)"
"sentiment_lr.fit(df_train.text, df_train.polarity)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"id": "civilian-auditor",
"metadata": {},
"outputs": [
Expand All @@ -143,13 +148,13 @@
}
],
"source": [
"Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]\n",
"print(classification_report(ytest,sentiment_lr.predict(Xtest)))"
"x_test, y_test = df_test.text[df_test.polarity != 2], df_test.polarity[df_test.polarity != 2]\n",
"print(classification_report(y_test, sentiment_lr.predict(x_test)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "numerous-ability",
"metadata": {},
"outputs": [
Expand All @@ -159,18 +164,18 @@
"array([4])"
]
},
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sentiment_lr.predict([Xtest[0]])"
"sentiment_lr.predict([x_test[0]])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "electronic-princess",
"metadata": {},
"outputs": [
Expand All @@ -180,13 +185,13 @@
"array([4, 0])"
]
},
"execution_count": 7,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sentiment_lr.predict([\"good\", \"bad\"])"
"sentiment_lr.predict(['good', 'bad'])"
]
},
{
Expand All @@ -199,22 +204,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"id": "medium-field",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"What is your Unbox email?me@vikasnair.com\n",
"What is your Unbox password?········\n"
]
}
],
"outputs": [],
"source": [
"import unboxapi\n",
"client = unboxapi.UnboxClient()"
"client = unboxapi.UnboxClient(email='me@vikasnair.com', password='00000000')"
]
},
{
Expand All @@ -227,19 +223,19 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"id": "maritime-writing",
"metadata": {},
"outputs": [],
"source": [
"class_dict = {4: \"positive\", 0: \"negative\", 2: \"neutral\"}\n",
"class_dict = { 4: 'positive', 0: 'negative', 2: 'neutral' }\n",
"def predict_function(model, text_list):\n",
" return [class_dict[d] for d in model.predict(text_list)]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 12,
"id": "bored-treasury",
"metadata": {},
"outputs": [
Expand All @@ -249,13 +245,13 @@
"['positive', 'positive', 'negative']"
]
},
"execution_count": 10,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"texts = [\"some new text, sweet noodles\", \"happy time\", \"sad day\"]\n",
"texts = ['some new text, sweet noodles', 'happy time', 'sad day']\n",
"\n",
"predict_function(sentiment_lr, texts)"
]
Expand All @@ -270,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "present-seating",
"metadata": {
"scrolled": true
Expand All @@ -280,16 +276,47 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[2021-03-01 04:47:41,045] WARNING - pip package requirement pandas already exist\n",
"[2021-03-01 04:47:41,052] WARNING - pip package requirement scikit-learn already exist\n"
"Uploading model...\n",
"[2021-03-07 04:11:30,623] WARNING - Using BentoML installed in `editable` model, the local BentoML repository including all code changes will be packaged together with saved bundle created, under the './bundled_pip_dependencies' directory of the saved bundle.\n",
"[2021-03-07 04:12:02,814] INFO - Detected non-PyPI-released BentoML installed, copying local BentoML modulefiles to target saved bundle path..\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"warning: no previously-included files matching '*~' found anywhere in distribution\n",
"warning: no previously-included files matching '*.pyo' found anywhere in distribution\n",
"warning: no previously-included files matching '.git' found anywhere in distribution\n",
"warning: no previously-included files matching '.ipynb_checkpoints' found anywhere in distribution\n",
"warning: no previously-included files matching '__pycache__' found anywhere in distribution\n",
"warning: no directories found matching 'bentoml/yatai/web/dist'\n",
"no previously-included directories found matching 'e2e_tests'\n",
"no previously-included directories found matching 'tests'\n",
"no previously-included directories found matching 'benchmark'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"UPDATING BentoML-0.11.0+33.g7e83376/bentoml/_version.py\n",
"set BentoML-0.11.0+33.g7e83376/bentoml/_version.py to '0.11.0+33.g7e83376'\n"
]
}
],
"source": [
"client.add(\n",
" function=predict_function,\n",
" model=sentiment_lr\n",
")"
"print('Uploading model...')\n",
"client.add_model(function=predict_function, model=sentiment_lr)\n",
"print('Complete.')\n",
"\n",
"print('\\nUploading dataset (from file)...')\n",
"response_i = client.add_dataset(df_train_file_path, df_train_name)\n",
"print(f'Complete. Response: {response_i}')\n",
"\n",
"print('\\nUploading dataset (from data frame)...')\n",
"response_j = client.add_dataframe(df_test, df_test_name)\n",
"print(f'Complete. Response: {response_j}')"
]
},
{
Expand All @@ -299,6 +326,14 @@
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "corporate-azerbaijan",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
80 changes: 44 additions & 36 deletions unboxapi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,59 @@
import os
import getpass
import tarfile
import pyrebase
import bentoml
import bentoml, getpass, os, pandas as pd, tarfile, tempfile, uuid

from bentoml.saved_bundle.bundler import _write_bento_content_to_dir
from bentoml.utils.tempdir import TempDirectory

from .lib.network import FlaskAPI, FirebaseAPI
from .template import create_template_model


class UnboxClient(object):
def __init__(self):
self.authenticate()

def add(self, function, model):
bento_service = create_template_model("sklearn", "text")
# Public functions

def __init__(self, email: str = None, password: str = None):
self.flask_api = FlaskAPI()
self.firebase_api = FirebaseAPI(email=email, password=password)


def add_model(self, function, model):
bento_service = create_template_model('sklearn', 'text')
bento_service.pack('model', model)
bento_service.pack('function', function)

with TempDirectory() as temp_dir:
_write_bento_content_to_dir(bento_service, temp_dir)

with TempDirectory() as tarfile_dir:
file_name = f'{bento_service.name}.tar'
tarfile_path = f'{tarfile_dir}/{file_name}'
with tarfile.open(tarfile_path, mode="w:gz") as tar:
model_id = str(uuid.uuid1())
tarfile_path = f'{tarfile_dir}/{model_id}'

with tarfile.open(tarfile_path, mode='w:gz') as tar:
tar.add(temp_dir, arcname=bento_service.name)
self.upload(
f"users/{self.user['localId']}/models/{file_name}", tarfile_path)

def upload(self, remote_path, file_path):
storage = self.firebase.storage()
storage.child(remote_path).put(file_path, self.user['idToken'])

def authenticate(self):
config = {
"apiKey": "AIzaSyAKlGQOmXTjPQhL1Uvj-Jr-_jUtNWmpOgs",
"authDomain": "unbox-ai.firebaseapp.com",
"databaseURL": "https://unbox-ai.firebaseio.com",
"storageBucket": "unbox-ai.appspot.com"
}

self.firebase = pyrebase.initialize_app(config)

# Get a reference to the auth service
auth = self.firebase.auth()

# Log the user in
email = input("What is your Unbox email?")
password = getpass.getpass("What is your Unbox password?")
self.user = auth.sign_in_with_email_and_password(email, password)

user_id = self.firebase_api.user['localId']
remote_path = f'users/{user_id}/models/{model_id}'
self.firebase_api.upload(remote_path, tarfile_path)


def add_dataset(self, file_path: str, name: str):
# For now, let's upload straight to Firebase Storage from here
user_id = self.firebase_api.user['localId']
dataset_id = str(uuid.uuid1())
remote_path = f'users/{user_id}/datasets/{dataset_id}'
self.firebase_api.upload(remote_path, file_path)

# And then set the metadata via request to our Flask API
id_token = self.firebase_api.user['idToken']
response = self.flask_api.upload_dataset_metadata(user_id,
dataset_id,
name,
id_token)
return response.json()


def add_dataframe(self, df: pd.DataFrame, name: str):
with tempfile.TemporaryDirectory() as tmp_dir:
dataset_file_path = os.path.join(tmp_dir, str(uuid.uuid1()))
df.to_csv(dataset_file_path, index=False)
return self.add_dataset(dataset_file_path, name)
Loading