|
67 | 67 | "outputs": [], |
68 | 68 | "source": [ |
69 | 69 | "columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']\n", |
70 | | - "dftrain = pd.read_csv('./data/training.1600000.processed.noemoticon.csv',\n", |
71 | | - " header = None,\n", |
72 | | - " encoding ='ISO-8859-1')\n", |
73 | | - "dftest = pd.read_csv('./data/testdata.manual.2009.06.14.csv',\n", |
74 | | - " header = None,\n", |
75 | | - " encoding ='ISO-8859-1')\n", |
76 | | - "dftrain.columns = columns\n", |
77 | | - "dftest.columns = columns" |
| 70 | + "df_train_file_path = './data/training.1600000.processed.noemoticon.csv'\n", |
| 71 | + "df_train_name = 'training.1600000.processed.noemoticon'\n", |
| 72 | + "df_train = pd.read_csv(df_train_file_path,\n", |
| 73 | + " header=None,\n", |
| 74 | + " encoding='ISO-8859-1')\n", |
| 75 | + "\n", |
| 76 | + "df_test_file_path = './data/testdata.manual.2009.06.14.csv'\n", |
| 77 | + "df_test_name = 'testdata.manual.2009.06.14'\n", |
| 78 | + "df_test = pd.read_csv(df_test_file_path,\n", |
| 79 | + " header=None,\n", |
| 80 | + " encoding='ISO-8859-1')\n", |
| 81 | + "df_train.columns = columns\n", |
| 82 | + "df_test.columns = columns" |
78 | 83 | ] |
79 | 84 | }, |
80 | 85 | { |
81 | 86 | "cell_type": "code", |
82 | | - "execution_count": 4, |
| 87 | + "execution_count": 6, |
83 | 88 | "id": "multiple-disability", |
84 | 89 | "metadata": {}, |
85 | 90 | "outputs": [ |
|
106 | 111 | " ('lr', LogisticRegression())])" |
107 | 112 | ] |
108 | 113 | }, |
109 | | - "execution_count": 4, |
| 114 | + "execution_count": 6, |
110 | 115 | "metadata": {}, |
111 | 116 | "output_type": "execute_result" |
112 | 117 | } |
113 | 118 | ], |
114 | 119 | "source": [ |
115 | 120 | "sentiment_lr = Pipeline([\n", |
116 | | - " ('count_vect', CountVectorizer(min_df = 100,\n", |
117 | | - " ngram_range = (1,2),\n", |
118 | | - " stop_words = 'english')), \n", |
| 121 | + " ('count_vect', CountVectorizer(min_df=100,\n", |
| 122 | + " ngram_range=(1,2),\n", |
| 123 | + " stop_words='english')), \n", |
119 | 124 | " ('lr', LogisticRegression())])\n", |
120 | | - "sentiment_lr.fit(dftrain.text, dftrain.polarity)" |
| 125 | + "sentiment_lr.fit(df_train.text, df_train.polarity)" |
121 | 126 | ] |
122 | 127 | }, |
123 | 128 | { |
124 | 129 | "cell_type": "code", |
125 | | - "execution_count": 5, |
| 130 | + "execution_count": 7, |
126 | 131 | "id": "civilian-auditor", |
127 | 132 | "metadata": {}, |
128 | 133 | "outputs": [ |
|
143 | 148 | } |
144 | 149 | ], |
145 | 150 | "source": [ |
146 | | - "Xtest, ytest = dftest.text[dftest.polarity!=2], dftest.polarity[dftest.polarity!=2]\n", |
147 | | - "print(classification_report(ytest,sentiment_lr.predict(Xtest)))" |
| 151 | + "x_test, y_test = df_test.text[df_test.polarity != 2], df_test.polarity[df_test.polarity != 2]\n", |
| 152 | + "print(classification_report(y_test, sentiment_lr.predict(x_test)))" |
148 | 153 | ] |
149 | 154 | }, |
150 | 155 | { |
151 | 156 | "cell_type": "code", |
152 | | - "execution_count": 6, |
| 157 | + "execution_count": 8, |
153 | 158 | "id": "numerous-ability", |
154 | 159 | "metadata": {}, |
155 | 160 | "outputs": [ |
|
159 | 164 | "array([4])" |
160 | 165 | ] |
161 | 166 | }, |
162 | | - "execution_count": 6, |
| 167 | + "execution_count": 8, |
163 | 168 | "metadata": {}, |
164 | 169 | "output_type": "execute_result" |
165 | 170 | } |
166 | 171 | ], |
167 | 172 | "source": [ |
168 | | - "sentiment_lr.predict([Xtest[0]])" |
| 173 | + "sentiment_lr.predict([x_test[0]])" |
169 | 174 | ] |
170 | 175 | }, |
171 | 176 | { |
172 | 177 | "cell_type": "code", |
173 | | - "execution_count": 7, |
| 178 | + "execution_count": 9, |
174 | 179 | "id": "electronic-princess", |
175 | 180 | "metadata": {}, |
176 | 181 | "outputs": [ |
|
180 | 185 | "array([4, 0])" |
181 | 186 | ] |
182 | 187 | }, |
183 | | - "execution_count": 7, |
| 188 | + "execution_count": 9, |
184 | 189 | "metadata": {}, |
185 | 190 | "output_type": "execute_result" |
186 | 191 | } |
187 | 192 | ], |
188 | 193 | "source": [ |
189 | | - "sentiment_lr.predict([\"good\", \"bad\"])" |
| 194 | + "sentiment_lr.predict(['good', 'bad'])" |
190 | 195 | ] |
191 | 196 | }, |
192 | 197 | { |
|
199 | 204 | }, |
200 | 205 | { |
201 | 206 | "cell_type": "code", |
202 | | - "execution_count": 8, |
| 207 | + "execution_count": 10, |
203 | 208 | "id": "medium-field", |
204 | 209 | "metadata": {}, |
205 | | - "outputs": [ |
206 | | - { |
207 | | - "name": "stdout", |
208 | | - "output_type": "stream", |
209 | | - "text": [ |
210 | | - "What is your Unbox email?me@vikasnair.com\n", |
211 | | - "What is your Unbox password?········\n" |
212 | | - ] |
213 | | - } |
214 | | - ], |
| 210 | + "outputs": [], |
215 | 211 | "source": [ |
216 | 212 | "import unboxapi\n", |
217 | | - "client = unboxapi.UnboxClient()" |
| 213 | + "client = unboxapi.UnboxClient(email='me@vikasnair.com', password='00000000')" |
218 | 214 | ] |
219 | 215 | }, |
220 | 216 | { |
|
227 | 223 | }, |
228 | 224 | { |
229 | 225 | "cell_type": "code", |
230 | | - "execution_count": 9, |
| 226 | + "execution_count": 11, |
231 | 227 | "id": "maritime-writing", |
232 | 228 | "metadata": {}, |
233 | 229 | "outputs": [], |
234 | 230 | "source": [ |
235 | | - "class_dict = {4: \"positive\", 0: \"negative\", 2: \"neutral\"}\n", |
| 231 | + "class_dict = { 4: 'positive', 0: 'negative', 2: 'neutral' }\n", |
236 | 232 | "def predict_function(model, text_list):\n", |
237 | 233 | " return [class_dict[d] for d in model.predict(text_list)]" |
238 | 234 | ] |
239 | 235 | }, |
240 | 236 | { |
241 | 237 | "cell_type": "code", |
242 | | - "execution_count": 10, |
| 238 | + "execution_count": 12, |
243 | 239 | "id": "bored-treasury", |
244 | 240 | "metadata": {}, |
245 | 241 | "outputs": [ |
|
249 | 245 | "['positive', 'positive', 'negative']" |
250 | 246 | ] |
251 | 247 | }, |
252 | | - "execution_count": 10, |
| 248 | + "execution_count": 12, |
253 | 249 | "metadata": {}, |
254 | 250 | "output_type": "execute_result" |
255 | 251 | } |
256 | 252 | ], |
257 | 253 | "source": [ |
258 | | - "texts = [\"some new text, sweet noodles\", \"happy time\", \"sad day\"]\n", |
| 254 | + "texts = ['some new text, sweet noodles', 'happy time', 'sad day']\n", |
259 | 255 | "\n", |
260 | 256 | "predict_function(sentiment_lr, texts)" |
261 | 257 | ] |
|
270 | 266 | }, |
271 | 267 | { |
272 | 268 | "cell_type": "code", |
273 | | - "execution_count": 14, |
| 269 | + "execution_count": null, |
274 | 270 | "id": "present-seating", |
275 | 271 | "metadata": { |
276 | 272 | "scrolled": true |
|
280 | 276 | "name": "stdout", |
281 | 277 | "output_type": "stream", |
282 | 278 | "text": [ |
283 | | - "[2021-03-01 04:47:41,045] WARNING - pip package requirement pandas already exist\n", |
284 | | - "[2021-03-01 04:47:41,052] WARNING - pip package requirement scikit-learn already exist\n" |
| 279 | + "Uploading model...\n", |
| 280 | + "[2021-03-07 04:11:30,623] WARNING - Using BentoML installed in `editable` model, the local BentoML repository including all code changes will be packaged together with saved bundle created, under the './bundled_pip_dependencies' directory of the saved bundle.\n", |
| 281 | + "[2021-03-07 04:12:02,814] INFO - Detected non-PyPI-released BentoML installed, copying local BentoML modulefiles to target saved bundle path..\n" |
| 282 | + ] |
| 283 | + }, |
| 284 | + { |
| 285 | + "name": "stderr", |
| 286 | + "output_type": "stream", |
| 287 | + "text": [ |
| 288 | + "warning: no previously-included files matching '*~' found anywhere in distribution\n", |
| 289 | + "warning: no previously-included files matching '*.pyo' found anywhere in distribution\n", |
| 290 | + "warning: no previously-included files matching '.git' found anywhere in distribution\n", |
| 291 | + "warning: no previously-included files matching '.ipynb_checkpoints' found anywhere in distribution\n", |
| 292 | + "warning: no previously-included files matching '__pycache__' found anywhere in distribution\n", |
| 293 | + "warning: no directories found matching 'bentoml/yatai/web/dist'\n", |
| 294 | + "no previously-included directories found matching 'e2e_tests'\n", |
| 295 | + "no previously-included directories found matching 'tests'\n", |
| 296 | + "no previously-included directories found matching 'benchmark'\n" |
| 297 | + ] |
| 298 | + }, |
| 299 | + { |
| 300 | + "name": "stdout", |
| 301 | + "output_type": "stream", |
| 302 | + "text": [ |
| 303 | + "UPDATING BentoML-0.11.0+33.g7e83376/bentoml/_version.py\n", |
| 304 | + "set BentoML-0.11.0+33.g7e83376/bentoml/_version.py to '0.11.0+33.g7e83376'\n" |
285 | 305 | ] |
286 | 306 | } |
287 | 307 | ], |
288 | 308 | "source": [ |
289 | | - "client.add(\n", |
290 | | - " function=predict_function,\n", |
291 | | - " model=sentiment_lr\n", |
292 | | - ")" |
| 309 | + "print('Uploading model...')\n", |
| 310 | + "client.add_model(function=predict_function, model=sentiment_lr)\n", |
| 311 | + "print('Complete.')\n", |
| 312 | + "\n", |
| 313 | + "print('\\nUploading dataset (from file)...')\n", |
| 314 | + "response_i = client.add_dataset(df_train_file_path, df_train_name)\n", |
| 315 | + "print(f'Complete. Response: {response_i}')\n", |
| 316 | + "\n", |
| 317 | + "print('\\nUploading dataset (from data frame)...')\n", |
| 318 | + "response_j = client.add_dataframe(df_test, df_test_name)\n", |
| 319 | + "print(f'Complete. Response: {response_j}')" |
293 | 320 | ] |
294 | 321 | }, |
295 | 322 | { |
|
299 | 326 | "metadata": {}, |
300 | 327 | "outputs": [], |
301 | 328 | "source": [] |
| 329 | + }, |
| 330 | + { |
| 331 | + "cell_type": "code", |
| 332 | + "execution_count": null, |
| 333 | + "id": "corporate-azerbaijan", |
| 334 | + "metadata": {}, |
| 335 | + "outputs": [], |
| 336 | + "source": [] |
302 | 337 | } |
303 | 338 | ], |
304 | 339 | "metadata": { |
|
0 commit comments