openlayer-ai · vikasnair · Mar 7, 2021 · Mar 7, 2021 · Mar 7, 2021
diff --git a/examples/sentiment-analysis/sentiment-unbox.ipynb b/examples/sentiment-analysis/sentiment-unbox.ipynb
@@ -67,19 +67,24 @@
    "outputs": [],
    "source": [
     "columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']\n",
-    "dftrain = pd.read_csv('./data/training.1600000.processed.noemoticon.csv',\n",
-    "                      header = None,\n",
-    "                      encoding ='ISO-8859-1')\n",
-    "dftest = pd.read_csv('./data/testdata.manual.2009.06.14.csv',\n",
-    "                     header = None,\n",
-    "                     encoding ='ISO-8859-1')\n",
-    "dftrain.columns = columns\n",
-    "dftest.columns = columns"
+    "df_train_file_path = './data/training.1600000.processed.noemoticon.csv'\n",
+    "df_train_name = 'training.1600000.processed.noemoticon'\n",
+    "df_train = pd.read_csv(df_train_file_path,\n",
+    "                      header=None,\n",
+    "                      encoding='ISO-8859-1')\n",
+    "\n",
+    "df_test_file_path = './data/testdata.manual.2009.06.14.csv'\n",
+    "df_test_name = 'testdata.manual.2009.06.14'\n",
+    "df_test = pd.read_csv(df_test_file_path,\n",
+    "                     header=None,\n",
+    "                     encoding='ISO-8859-1')\n",
+    "df_train.columns = columns\n",
+    "df_test.columns = columns"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "id": "multiple-disability",
    "metadata": {},
    "outputs": [
@@ -106,23 +111,23 @@
        "                ('lr', LogisticRegression())])"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "sentiment_lr = Pipeline([\n",
-    "                         ('count_vect', CountVectorizer(min_df = 100,\n",
-    "                                                        ngram_range = (1,2),\n",
-    "                                                        stop_words = 'english')), \n",
+    "                         ('count_vect', CountVectorizer(min_df=100,\n",
+    "                                                        ngram_range=(1,2),\n",
+    "                                                        stop_words='english')), \n",
     "                         ('lr', LogisticRegression())])\n",
-    "sentiment_lr.fit(dftrain.text, dftrain.polarity)"
+    "sentiment_lr.fit(df_train.text, df_train.polarity)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "id": "civilian-auditor",
    "metadata": {},
    "outputs": [
@@ -143,13 +148,13 @@
     }
    ],
    "source": [
-    "Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]\n",
-    "print(classification_report(ytest,sentiment_lr.predict(Xtest)))"
+    "x_test, y_test = df_test.text[df_test.polarity != 2], df_test.polarity[df_test.polarity != 2]\n",
+    "print(classification_report(y_test, sentiment_lr.predict(x_test)))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "numerous-ability",
    "metadata": {},
    "outputs": [
@@ -159,18 +164,18 @@
        "array([4])"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sentiment_lr.predict([Xtest[0]])"
+    "sentiment_lr.predict([x_test[0]])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "id": "electronic-princess",
    "metadata": {},
    "outputs": [
@@ -180,13 +185,13 @@
        "array([4, 0])"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sentiment_lr.predict([\"good\", \"bad\"])"
+    "sentiment_lr.predict(['good', 'bad'])"
    ]
   },
   {
@@ -199,22 +204,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "id": "medium-field",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "What is your Unbox email?me@vikasnair.com\n",
-      "What is your Unbox password?········\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import unboxapi\n",
-    "client = unboxapi.UnboxClient()"
+    "client = unboxapi.UnboxClient(email='me@vikasnair.com', password='00000000')"
    ]
   },
   {
@@ -227,19 +223,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "id": "maritime-writing",
    "metadata": {},
    "outputs": [],
    "source": [
-    "class_dict = {4: \"positive\", 0: \"negative\", 2: \"neutral\"}\n",
+    "class_dict = { 4: 'positive', 0: 'negative', 2: 'neutral' }\n",
     "def predict_function(model, text_list):\n",
     "    return [class_dict[d] for d in model.predict(text_list)]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "id": "bored-treasury",
    "metadata": {},
    "outputs": [
@@ -249,13 +245,13 @@
        "['positive', 'positive', 'negative']"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "texts = [\"some new text, sweet noodles\", \"happy time\", \"sad day\"]\n",
+    "texts = ['some new text, sweet noodles', 'happy time', 'sad day']\n",
     "\n",
     "predict_function(sentiment_lr, texts)"
    ]
@@ -270,7 +266,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "present-seating",
    "metadata": {
     "scrolled": true
@@ -280,16 +276,47 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[2021-03-01 04:47:41,045] WARNING - pip package requirement pandas already exist\n",
-      "[2021-03-01 04:47:41,052] WARNING - pip package requirement scikit-learn already exist\n"
+      "Uploading model...\n",
+      "[2021-03-07 04:11:30,623] WARNING - Using BentoML installed in `editable` model, the local BentoML repository including all code changes will be packaged together with saved bundle created, under the './bundled_pip_dependencies' directory of the saved bundle.\n",
+      "[2021-03-07 04:12:02,814] INFO - Detected non-PyPI-released BentoML installed, copying local BentoML modulefiles to target saved bundle path..\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "warning: no previously-included files matching '*~' found anywhere in distribution\n",
+      "warning: no previously-included files matching '*.pyo' found anywhere in distribution\n",
+      "warning: no previously-included files matching '.git' found anywhere in distribution\n",
+      "warning: no previously-included files matching '.ipynb_checkpoints' found anywhere in distribution\n",
+      "warning: no previously-included files matching '__pycache__' found anywhere in distribution\n",
+      "warning: no directories found matching 'bentoml/yatai/web/dist'\n",
+      "no previously-included directories found matching 'e2e_tests'\n",
+      "no previously-included directories found matching 'tests'\n",
+      "no previously-included directories found matching 'benchmark'\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "UPDATING BentoML-0.11.0+33.g7e83376/bentoml/_version.py\n",
+      "set BentoML-0.11.0+33.g7e83376/bentoml/_version.py to '0.11.0+33.g7e83376'\n"
      ]
     }
    ],
    "source": [
-    "client.add(\n",
-    "    function=predict_function,\n",
-    "    model=sentiment_lr\n",
-    ")"
+    "print('Uploading model...')\n",
+    "client.add_model(function=predict_function, model=sentiment_lr)\n",
+    "print('Complete.')\n",
+    "\n",
+    "print('\\nUploading dataset (from file)...')\n",
+    "response_i = client.add_dataset(df_train_file_path, df_train_name)\n",
+    "print(f'Complete. Response: {response_i}')\n",
+    "\n",
+    "print('\\nUploading dataset (from data frame)...')\n",
+    "response_j = client.add_dataframe(df_test, df_test_name)\n",
+    "print(f'Complete. Response: {response_j}')"
    ]
   },
   {
@@ -299,6 +326,14 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "corporate-azerbaijan",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/unboxapi/__init__.py b/unboxapi/__init__.py
@@ -1,51 +1,59 @@
-import os
-import getpass
-import tarfile
-import pyrebase
-import bentoml
+import bentoml, getpass, os, pandas as pd, tarfile, tempfile, uuid
+
 from bentoml.saved_bundle.bundler import _write_bento_content_to_dir
 from bentoml.utils.tempdir import TempDirectory
 
+from .lib.network import FlaskAPI, FirebaseAPI
 from .template import create_template_model
 
 
 class UnboxClient(object):
-    def __init__(self):
-        self.authenticate()
 
-    def add(self, function, model):
-        bento_service = create_template_model("sklearn", "text")
+    # Public functions
+
+    def __init__(self, email: str = None, password: str = None):
+        self.flask_api = FlaskAPI()
+        self.firebase_api = FirebaseAPI(email=email, password=password)
+
+
+    def add_model(self, function, model):
+        bento_service = create_template_model('sklearn', 'text')
         bento_service.pack('model', model)
         bento_service.pack('function', function)
 
         with TempDirectory() as temp_dir:
             _write_bento_content_to_dir(bento_service, temp_dir)
+
             with TempDirectory() as tarfile_dir:
-                file_name = f'{bento_service.name}.tar'
-                tarfile_path = f'{tarfile_dir}/{file_name}'
-                with tarfile.open(tarfile_path, mode="w:gz") as tar:
+                model_id = str(uuid.uuid1())
+                tarfile_path = f'{tarfile_dir}/{model_id}'
+
+                with tarfile.open(tarfile_path, mode='w:gz') as tar:
                     tar.add(temp_dir, arcname=bento_service.name)
-                self.upload(
-                    f"users/{self.user['localId']}/models/{file_name}", tarfile_path)
-
-    def upload(self, remote_path, file_path):
-        storage = self.firebase.storage()
-        storage.child(remote_path).put(file_path, self.user['idToken'])
-
-    def authenticate(self):
-        config = {
-            "apiKey": "AIzaSyAKlGQOmXTjPQhL1Uvj-Jr-_jUtNWmpOgs",
-            "authDomain": "unbox-ai.firebaseapp.com",
-            "databaseURL": "https://unbox-ai.firebaseio.com",
-            "storageBucket": "unbox-ai.appspot.com"
-        }
-
-        self.firebase = pyrebase.initialize_app(config)
-
-        # Get a reference to the auth service
-        auth = self.firebase.auth()
-
-        # Log the user in
-        email = input("What is your Unbox email?")
-        password = getpass.getpass("What is your Unbox password?")
-        self.user = auth.sign_in_with_email_and_password(email, password)
+
+                    user_id = self.firebase_api.user['localId']
+                    remote_path = f'users/{user_id}/models/{model_id}'
+                    self.firebase_api.upload(remote_path, tarfile_path)
+
+
+    def add_dataset(self, file_path: str, name: str):
+        # For now, let's upload straight to Firebase Storage from here
+        user_id = self.firebase_api.user['localId']
+        dataset_id = str(uuid.uuid1())
+        remote_path = f'users/{user_id}/datasets/{dataset_id}'
+        self.firebase_api.upload(remote_path, file_path)
+
+        # And then set the metadata via request to our Flask API
+        id_token = self.firebase_api.user['idToken']
+        response = self.flask_api.upload_dataset_metadata(user_id,
+                                                          dataset_id,
+                                                          name,
+                                                          id_token)
+        return response.json()
+
+
+    def add_dataframe(self, df: pd.DataFrame, name: str):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            dataset_file_path = os.path.join(tmp_dir, str(uuid.uuid1()))
+            df.to_csv(dataset_file_path, index=False)
+            return self.add_dataset(dataset_file_path, name)