From 249f1d1b3f4a307de9560f5709d2fcdc3926a9e4 Mon Sep 17 00:00:00 2001
From: Bayomi <gabriel@tryunbox.ai>
Date: Sun, 7 Mar 2021 00:29:54 -0500
Subject: [PATCH 1/2] adding dataset support

---
 unboxapi/__init__.py    | 36 +++++++++++++++++++++++++++++++-----
 unboxapi/lib/network.py | 18 ++++++++++++++++++
 unboxapi/template.py    |  2 +-
 3 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 unboxapi/lib/network.py

diff --git a/unboxapi/__init__.py b/unboxapi/__init__.py
index 1d3895d5..4c50e024 100644
--- a/unboxapi/__init__.py
+++ b/unboxapi/__init__.py
@@ -5,13 +5,19 @@
 import bentoml
 from bentoml.saved_bundle.bundler import _write_bento_content_to_dir
 from bentoml.utils.tempdir import TempDirectory
+import pandas as pd
+import uuid
+from .lib.network import FlaskAPIRequest
 
 from .template import create_template_model
 
 
 class UnboxClient(object):
-    def __init__(self):
-        self.authenticate()
+    def __init__(self, email=None, password=None):
+        self.firebase = None
+        self.user = None
+        self.flask_api_request = FlaskAPIRequest()
+        self.authenticate(email, password)
 
     def add(self, function, model):
         bento_service = create_template_model("sklearn", "text")
@@ -32,7 +38,12 @@ def upload(self, remote_path, file_path):
         storage = self.firebase.storage()
         storage.child(remote_path).put(file_path, self.user['idToken'])
 
-    def authenticate(self):
+    def authenticate(self, email, password):
+
+        if not email or not password:
+            email = input("What is your Unbox email?")
+            password = getpass.getpass("What is your Unbox password?")
+
         config = {
             "apiKey": "AIzaSyAKlGQOmXTjPQhL1Uvj-Jr-_jUtNWmpOgs",
             "authDomain": "unbox-ai.firebaseapp.com",
@@ -46,6 +57,21 @@ def authenticate(self):
         auth = self.firebase.auth()
 
         # Log the user in
-        email = input("What is your Unbox email?")
-        password = getpass.getpass("What is your Unbox password?")
         self.user = auth.sign_in_with_email_and_password(email, password)
+
+    def add_dataset(self, file_path: str):
+        self.flask_api_request.upload_dataset(file_path)
+
+    def add_dataframe(self, df: pd.DataFrame, file_path: str):
+        df.to_csv(file_path, index=False)
+        self.add_dataset(file_path)
+
+
+    # def add_dataset(self, file_name: str):
+    #     self.upload(
+    #         f"users/{self.user['localId']}/datasets/{file_name}", file_name)
+    #
+    # def add_dataframe(self, df: pd.DataFrame, file_name: str):
+    #     df.to_csv(file_name, index=False)
+    #     self.upload(
+    #         f"users/{self.user['localId']}/datasets/{file_name}", file_name)
diff --git a/unboxapi/lib/network.py b/unboxapi/lib/network.py
new file mode 100644
index 00000000..e576d1d5
--- /dev/null
+++ b/unboxapi/lib/network.py
@@ -0,0 +1,18 @@
+import requests
+
+
+class FlaskAPIRequest:
+
+    def __init__(self):
+        self.url = "http://localhost:5000"
+
+    def post(self, file, data, endpoint="/"):
+        return requests.post(
+            self.url + endpoint,
+            files={"file": file},
+            data=data,
+        )
+
+    def upload_dataset(self, filename):
+        file = (filename, open(filename, 'rb'))
+        return self.post(file, {}, endpoint="/upload_dataset")
diff --git a/unboxapi/template.py b/unboxapi/template.py
index 8623c01a..44b1af52 100644
--- a/unboxapi/template.py
+++ b/unboxapi/template.py
@@ -5,7 +5,7 @@
     "sklearn": "SklearnModelArtifact",
     "pytorch": "PytorchModelArtifact",
     "tensorflow": "TensorflowSavedModelArtifact",
-    "transformers": "TransformersModelArtifac"
+    "transformers": "TransformersModelArtifact"
 }
 
 

From 228cac463580a6659810fb3211d65b92eed27b22 Mon Sep 17 00:00:00 2001
From: vikasnair <me@vikasnair.com>
Date: Sun, 7 Mar 2021 04:17:33 -0800
Subject: [PATCH 2/2] Integrate datasets v1 with Firebase Firestore & Storage

---
 .../sentiment-analysis/sentiment-unbox.ipynb  | 129 +++++++++++-------
 unboxapi/__init__.py                          |  96 ++++++-------
 unboxapi/lib/network.py                       |  73 ++++++++--
 unboxapi/template.py                          |  27 ++--
 4 files changed, 194 insertions(+), 131 deletions(-)

diff --git a/examples/sentiment-analysis/sentiment-unbox.ipynb b/examples/sentiment-analysis/sentiment-unbox.ipynb
index 948e85a5..f2136a30 100644
--- a/examples/sentiment-analysis/sentiment-unbox.ipynb
+++ b/examples/sentiment-analysis/sentiment-unbox.ipynb
@@ -67,19 +67,24 @@
    "outputs": [],
    "source": [
     "columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']\n",
-    "dftrain = pd.read_csv('./data/training.1600000.processed.noemoticon.csv',\n",
-    "                      header = None,\n",
-    "                      encoding ='ISO-8859-1')\n",
-    "dftest = pd.read_csv('./data/testdata.manual.2009.06.14.csv',\n",
-    "                     header = None,\n",
-    "                     encoding ='ISO-8859-1')\n",
-    "dftrain.columns = columns\n",
-    "dftest.columns = columns"
+    "df_train_file_path = './data/training.1600000.processed.noemoticon.csv'\n",
+    "df_train_name = 'training.1600000.processed.noemoticon'\n",
+    "df_train = pd.read_csv(df_train_file_path,\n",
+    "                      header=None,\n",
+    "                      encoding='ISO-8859-1')\n",
+    "\n",
+    "df_test_file_path = './data/testdata.manual.2009.06.14.csv'\n",
+    "df_test_name = 'testdata.manual.2009.06.14'\n",
+    "df_test = pd.read_csv(df_test_file_path,\n",
+    "                     header=None,\n",
+    "                     encoding='ISO-8859-1')\n",
+    "df_train.columns = columns\n",
+    "df_test.columns = columns"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "id": "multiple-disability",
    "metadata": {},
    "outputs": [
@@ -106,23 +111,23 @@
        "                ('lr', LogisticRegression())])"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "sentiment_lr = Pipeline([\n",
-    "                         ('count_vect', CountVectorizer(min_df = 100,\n",
-    "                                                        ngram_range = (1,2),\n",
-    "                                                        stop_words = 'english')), \n",
+    "                         ('count_vect', CountVectorizer(min_df=100,\n",
+    "                                                        ngram_range=(1,2),\n",
+    "                                                        stop_words='english')), \n",
     "                         ('lr', LogisticRegression())])\n",
-    "sentiment_lr.fit(dftrain.text, dftrain.polarity)"
+    "sentiment_lr.fit(df_train.text, df_train.polarity)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "id": "civilian-auditor",
    "metadata": {},
    "outputs": [
@@ -143,13 +148,13 @@
     }
    ],
    "source": [
-    "Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]\n",
-    "print(classification_report(ytest,sentiment_lr.predict(Xtest)))"
+    "x_test, y_test = df_test.text[df_test.polarity != 2], df_test.polarity[df_test.polarity != 2]\n",
+    "print(classification_report(y_test, sentiment_lr.predict(x_test)))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "numerous-ability",
    "metadata": {},
    "outputs": [
@@ -159,18 +164,18 @@
        "array([4])"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sentiment_lr.predict([Xtest[0]])"
+    "sentiment_lr.predict([x_test[0]])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "id": "electronic-princess",
    "metadata": {},
    "outputs": [
@@ -180,13 +185,13 @@
        "array([4, 0])"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sentiment_lr.predict([\"good\", \"bad\"])"
+    "sentiment_lr.predict(['good', 'bad'])"
    ]
   },
   {
@@ -199,22 +204,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "id": "medium-field",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "What is your Unbox email?me@vikasnair.com\n",
-      "What is your Unbox password?········\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import unboxapi\n",
-    "client = unboxapi.UnboxClient()"
+    "client = unboxapi.UnboxClient(email='me@vikasnair.com', password='00000000')"
    ]
   },
   {
@@ -227,19 +223,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "id": "maritime-writing",
    "metadata": {},
    "outputs": [],
    "source": [
-    "class_dict = {4: \"positive\", 0: \"negative\", 2: \"neutral\"}\n",
+    "class_dict = { 4: 'positive', 0: 'negative', 2: 'neutral' }\n",
     "def predict_function(model, text_list):\n",
     "    return [class_dict[d] for d in model.predict(text_list)]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "id": "bored-treasury",
    "metadata": {},
    "outputs": [
@@ -249,13 +245,13 @@
        "['positive', 'positive', 'negative']"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "texts = [\"some new text, sweet noodles\", \"happy time\", \"sad day\"]\n",
+    "texts = ['some new text, sweet noodles', 'happy time', 'sad day']\n",
     "\n",
     "predict_function(sentiment_lr, texts)"
    ]
@@ -270,7 +266,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "present-seating",
    "metadata": {
     "scrolled": true
@@ -280,16 +276,47 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2021-03-01 04:47:41,045] WARNING - pip package requirement pandas already exist\n",
-      "[2021-03-01 04:47:41,052] WARNING - pip package requirement scikit-learn already exist\n"
+      "Uploading model...\n",
+      "[2021-03-07 04:11:30,623] WARNING - Using BentoML installed in `editable` model, the local BentoML repository including all code changes will be packaged together with saved bundle created, under the './bundled_pip_dependencies' directory of the saved bundle.\n",
+      "[2021-03-07 04:12:02,814] INFO - Detected non-PyPI-released BentoML installed, copying local BentoML modulefiles to target saved bundle path..\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "warning: no previously-included files matching '*~' found anywhere in distribution\n",
+      "warning: no previously-included files matching '*.pyo' found anywhere in distribution\n",
+      "warning: no previously-included files matching '.git' found anywhere in distribution\n",
+      "warning: no previously-included files matching '.ipynb_checkpoints' found anywhere in distribution\n",
+      "warning: no previously-included files matching '__pycache__' found anywhere in distribution\n",
+      "warning: no directories found matching 'bentoml/yatai/web/dist'\n",
+      "no previously-included directories found matching 'e2e_tests'\n",
+      "no previously-included directories found matching 'tests'\n",
+      "no previously-included directories found matching 'benchmark'\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "UPDATING BentoML-0.11.0+33.g7e83376/bentoml/_version.py\n",
+      "set BentoML-0.11.0+33.g7e83376/bentoml/_version.py to '0.11.0+33.g7e83376'\n"
      ]
     }
    ],
    "source": [
-    "client.add(\n",
-    "    function=predict_function,\n",
-    "    model=sentiment_lr\n",
-    ")"
+    "print('Uploading model...')\n",
+    "client.add_model(function=predict_function, model=sentiment_lr)\n",
+    "print('Complete.')\n",
+    "\n",
+    "print('\\nUploading dataset (from file)...')\n",
+    "response_i = client.add_dataset(df_train_file_path, df_train_name)\n",
+    "print(f'Complete. Response: {response_i}')\n",
+    "\n",
+    "print('\\nUploading dataset (from data frame)...')\n",
+    "response_j = client.add_dataframe(df_test, df_test_name)\n",
+    "print(f'Complete. Response: {response_j}')"
    ]
   },
   {
@@ -299,6 +326,14 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "corporate-azerbaijan",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/unboxapi/__init__.py b/unboxapi/__init__.py
index 4c50e024..fdc79387 100644
--- a/unboxapi/__init__.py
+++ b/unboxapi/__init__.py
@@ -1,77 +1,59 @@
-import os
-import getpass
-import tarfile
-import pyrebase
-import bentoml
+import bentoml, getpass, os, pandas as pd, tarfile, tempfile, uuid
+
 from bentoml.saved_bundle.bundler import _write_bento_content_to_dir
 from bentoml.utils.tempdir import TempDirectory
-import pandas as pd
-import uuid
-from .lib.network import FlaskAPIRequest
 
+from .lib.network import FlaskAPI, FirebaseAPI
 from .template import create_template_model
 
 
 class UnboxClient(object):
-    def __init__(self, email=None, password=None):
-        self.firebase = None
-        self.user = None
-        self.flask_api_request = FlaskAPIRequest()
-        self.authenticate(email, password)
-
-    def add(self, function, model):
-        bento_service = create_template_model("sklearn", "text")
+
+    # Public functions
+
+    def __init__(self, email: str = None, password: str = None):
+        self.flask_api = FlaskAPI()
+        self.firebase_api = FirebaseAPI(email=email, password=password)
+
+
+    def add_model(self, function, model):
+        bento_service = create_template_model('sklearn', 'text')
         bento_service.pack('model', model)
         bento_service.pack('function', function)
 
         with TempDirectory() as temp_dir:
             _write_bento_content_to_dir(bento_service, temp_dir)
-            with TempDirectory() as tarfile_dir:
-                file_name = f'{bento_service.name}.tar'
-                tarfile_path = f'{tarfile_dir}/{file_name}'
-                with tarfile.open(tarfile_path, mode="w:gz") as tar:
-                    tar.add(temp_dir, arcname=bento_service.name)
-                self.upload(
-                    f"users/{self.user['localId']}/models/{file_name}", tarfile_path)
-
-    def upload(self, remote_path, file_path):
-        storage = self.firebase.storage()
-        storage.child(remote_path).put(file_path, self.user['idToken'])
-
-    def authenticate(self, email, password):
 
-        if not email or not password:
-            email = input("What is your Unbox email?")
-            password = getpass.getpass("What is your Unbox password?")
-
-        config = {
-            "apiKey": "AIzaSyAKlGQOmXTjPQhL1Uvj-Jr-_jUtNWmpOgs",
-            "authDomain": "unbox-ai.firebaseapp.com",
-            "databaseURL": "https://unbox-ai.firebaseio.com",
-            "storageBucket": "unbox-ai.appspot.com"
-        }
+            with TempDirectory() as tarfile_dir:
+                model_id = str(uuid.uuid1())
+                tarfile_path = f'{tarfile_dir}/{model_id}'
 
-        self.firebase = pyrebase.initialize_app(config)
+                with tarfile.open(tarfile_path, mode='w:gz') as tar:
+                    tar.add(temp_dir, arcname=bento_service.name)
 
-        # Get a reference to the auth service
-        auth = self.firebase.auth()
+                    user_id = self.firebase_api.user['localId']
+                    remote_path = f'users/{user_id}/models/{model_id}'
+                    self.firebase_api.upload(remote_path, tarfile_path)
 
-        # Log the user in
-        self.user = auth.sign_in_with_email_and_password(email, password)
 
-    def add_dataset(self, file_path: str):
-        self.flask_api_request.upload_dataset(file_path)
+    def add_dataset(self, file_path: str, name: str):
+        # For now, let's upload straight to Firebase Storage from here
+        user_id = self.firebase_api.user['localId']
+        dataset_id = str(uuid.uuid1())
+        remote_path = f'users/{user_id}/datasets/{dataset_id}'
+        self.firebase_api.upload(remote_path, file_path)
 
-    def add_dataframe(self, df: pd.DataFrame, file_path: str):
-        df.to_csv(file_path, index=False)
-        self.add_dataset(file_path)
+        # And then set the metadata via request to our Flask API
+        id_token = self.firebase_api.user['idToken']
+        response = self.flask_api.upload_dataset_metadata(user_id,
+                                                          dataset_id,
+                                                          name,
+                                                          id_token)
+        return response.json()
 
 
-    # def add_dataset(self, file_name: str):
-    #     self.upload(
-    #         f"users/{self.user['localId']}/datasets/{file_name}", file_name)
-    #
-    # def add_dataframe(self, df: pd.DataFrame, file_name: str):
-    #     df.to_csv(file_name, index=False)
-    #     self.upload(
-    #         f"users/{self.user['localId']}/datasets/{file_name}", file_name)
+    def add_dataframe(self, df: pd.DataFrame, name: str):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            dataset_file_path = os.path.join(tmp_dir, str(uuid.uuid1()))
+            df.to_csv(dataset_file_path, index=False)
+            return self.add_dataset(dataset_file_path, name)
diff --git a/unboxapi/lib/network.py b/unboxapi/lib/network.py
index e576d1d5..7fbbc0f0 100644
--- a/unboxapi/lib/network.py
+++ b/unboxapi/lib/network.py
@@ -1,18 +1,63 @@
-import requests
+import pyrebase, requests
 
 
-class FlaskAPIRequest:
+class FlaskAPI:
 
     def __init__(self):
-        self.url = "http://localhost:5000"
-
-    def post(self, file, data, endpoint="/"):
-        return requests.post(
-            self.url + endpoint,
-            files={"file": file},
-            data=data,
-        )
-
-    def upload_dataset(self, filename):
-        file = (filename, open(filename, 'rb'))
-        return self.post(file, {}, endpoint="/upload_dataset")
+        self.url = 'http://localhost:5000'
+
+
+    def post(self, endpoint: str = '/', data: any = None, file: any = None):
+        return requests.post(self.url + endpoint,
+                             json=data,
+                             files=file and {'file': file})
+
+
+    def upload_dataset_metadata(self,
+                                user_id: str,
+                                dataset_id: str,
+                                name: str,
+                                id_token: str):
+        data = {
+            'dataset_id': dataset_id,
+            'id_token': id_token,
+            'name': name,
+            'user_id': user_id
+            }
+        return self.post(endpoint='/dataset', data=data)
+
+
+    def upload_dataset(self, file_path: str, name: str, id_token: str):
+        data = { 'name' : name, 'id_token' : id_token }
+        file = open(file_path, 'rb')
+        return self.post(endpoint='/dataset', data=data, file=file)
+
+
+class FirebaseAPI:
+
+    def __init__(self, email: str = None, password: str = None):
+        if not email or not password:
+            email = input('What is your Unbox email?')
+            password = getpass.getpass('What is your Unbox password?')
+
+        config = {
+            'apiKey': 'AIzaSyAKlGQOmXTjPQhL1Uvj-Jr-_jUtNWmpOgs',
+            'authDomain': 'unbox-ai.firebaseapp.com',
+            'databaseURL': 'https://unbox-ai.firebaseio.com',
+            'storageBucket': 'unbox-ai.appspot.com'
+        }
+
+        # Initialize Pyrebase instance
+        self.firebase = pyrebase.initialize_app(config)
+
+        # Get a reference to the auth service
+        auth = self.firebase.auth()
+
+        # Login
+        self.user = auth.sign_in_with_email_and_password(email, password)
+
+
+    def upload(self, remote_path: str, file_path: str):
+        storage = self.firebase.storage()
+        id_token = self.user['idToken']
+        storage.child(remote_path).put(file_path, id_token)
\ No newline at end of file
diff --git a/unboxapi/template.py b/unboxapi/template.py
index 44b1af52..290e3d4b 100644
--- a/unboxapi/template.py
+++ b/unboxapi/template.py
@@ -2,17 +2,17 @@
 
 
 modelTypes = {
-    "sklearn": "SklearnModelArtifact",
-    "pytorch": "PytorchModelArtifact",
-    "tensorflow": "TensorflowSavedModelArtifact",
-    "transformers": "TransformersModelArtifact"
+    'sklearn': 'SklearnModelArtifact',
+    'pytorch': 'PytorchModelArtifact',
+    'tensorflow': 'TensorflowSavedModelArtifact',
+    'transformers': 'TransformersModelArtifact'
 }
 
 
-def create_template_model(model_type, input_type):
-    # return TemplateModel([modelTypes[model_type]('model'), PickleArtifact('function')])
+# TODO: in dire need of cleanup
+def create_template_model(model_type: str, input_type: str):
     with open('template_model.py', 'w') as python_file:
-        file_contents = f"""\
+        file_contents = f'''\
         from typing import List
         from bentoml import env, artifacts, api, BentoService
         from bentoml.frameworks.{model_type} import {modelTypes[model_type]}
@@ -25,9 +25,9 @@ def create_template_model(model_type, input_type):
         class TemplateModel(BentoService):
 
             @api(input=DataframeInput(
-                orient="records",
-                columns=["text"],
-                dtype={{"text": "str"}},
+                orient='records',
+                columns=['text'],
+                dtype={{'text': 'str'}},
             ), batch=True)
             def batch(self, df):
                 text = df['text'].tolist()
@@ -40,17 +40,18 @@ def batch(self, df):
             def predict(self, parsed_json_list: List[JsonSerializable], tasks: List[InferenceTask]):
                 text = []
                 for json, task in zip(parsed_json_list, tasks):
-                    if "text" in json:
+                    if 'text' in json:
                         text.append(json['text'])
                     else:
                         task.discard(http_status=400,
-                                    err_msg="input json must contain `text` field")
+                                    err_msg='input json must contain `text` field')
 
                 return self.artifacts.function(
                     self.artifacts.model,
                     text
                 )
-        """
+        '''
         python_file.write(textwrap.dedent(file_contents))
+
     from template_model import TemplateModel
     return TemplateModel()