practical-nlp · kartikay-bagla · Jun 21, 2021 · Jun 13, 2021 · Jun 13, 2021 · Jun 13, 2021
diff --git a/Ch10/01_BioBERT_Demo.ipynb b/Ch10/01_BioBERT_Demo.ipynb
diff --git a/Ch10/02_LexNLP.ipynb b/Ch10/02_LexNLP.ipynb
diff --git a/Ch10/03_FinBERT.ipynb b/Ch10/03_FinBERT.ipynb
diff --git a/Ch11/01_AutoML_Demo.ipynb b/Ch11/01_AutoML_Demo.ipynb
diff --git a/Ch4/05_DeepNN_Example.ipynb b/Ch4/05_DeepNN_Example.ipynb
@@ -77,7 +77,7 @@
     "except ModuleNotFoundError:\n",
     "    \n",
     "    if not os.path.exists(os.getcwd()+'\\\\Data\\\\glove.6B'):\n",
-    "        os.makdir(os.getcwd()+'\\\\Data\\\\glove.6B')\n",
+    "        os.mkdir(os.getcwd()+'\\\\Data\\\\glove.6B')\n",
     "        \n",
     "        url='http://nlp.stanford.edu/data/glove.6B.zip' \n",
     "        path=os.getcwd()+'\\Data' \n",
@@ -549,7 +549,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,

diff --git a/Ch5/01_KPE.ipynb b/Ch5/01_KPE.ipynb
diff --git a/Ch5/02_NERTraining.ipynb b/Ch5/02_NERTraining.ipynb
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 1,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -37,7 +37,9 @@
     "from pprint import pprint\n",
     "from sklearn.metrics import f1_score,classification_report\n",
     "from sklearn.pipeline import Pipeline\n",
-    "import string\n"
+    "import string\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
    ]
   },
   {
@@ -52,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 2,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -79,12 +81,12 @@
     "            words.append(word)\n",
     "            tags.append(tag)\n",
     "    fh.close()\n",
-    "    return myoutput\n"
+    "    return myoutput"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 3,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -150,7 +152,7 @@
     "            wordfeats[\"nextNextTag\"] = sen_tags[i + 2][1]\n",
     "        #That is it! You can add whatever you want!\n",
     "        feats.append(wordfeats)\n",
-    "    return feats\n"
+    "    return feats"
    ]
   },
   {
@@ -165,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 4,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -195,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 5,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -217,7 +219,7 @@
     "    print(metrics.flat_f1_score(Y_dev, y_pred,average='weighted', labels=labels))\n",
     "    print(metrics.flat_classification_report(Y_dev, y_pred, labels=sorted_labels, digits=3))\n",
     "    #print(metrics.sequence_accuracy_score(Y_dev, y_pred))\n",
-    "    get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)\n"
+    "    get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)"
    ]
   },
   {
@@ -232,7 +234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 6,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -258,12 +260,12 @@
     "            cell = \"%{0}.0f\".format(columnwidth) % cm[i, j]\n",
     "            sum =  sum + int(cell)\n",
     "            print(cell, end=\" \")\n",
-    "        print(sum) #Prints the total number of instances per cat at the end.\n"
+    "        print(sum) #Prints the total number of instances per cat at the end."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 7,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -278,7 +280,7 @@
     "    for yseq_true, yseq_pred in zip(y_true, y_pred):\n",
     "        trues.extend(yseq_true)\n",
     "        preds.extend(yseq_pred)\n",
-    "    print_cm(confusion_matrix(trues,preds,labels),labels)\n"
+    "    print_cm(confusion_matrix(trues,preds,labels),labels)"
    ]
   },
   {
@@ -293,7 +295,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 0,
+   "execution_count": 8,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -307,19 +309,21 @@
      "text": [
       "Training a Sequence classification model with CRF\n",
       "0.9255103670420659\n",
-      "             precision    recall  f1-score   support\n",
+      "              precision    recall  f1-score   support\n",
       "\n",
-      "          O      0.973     0.981     0.977     38323\n",
-      "      B-LOC      0.694     0.765     0.728      1668\n",
-      "      I-LOC      0.738     0.482     0.584       257\n",
-      "     B-MISC      0.648     0.309     0.419       702\n",
-      "     I-MISC      0.626     0.505     0.559       216\n",
-      "      B-ORG      0.670     0.561     0.611      1661\n",
-      "      I-ORG      0.551     0.704     0.618       835\n",
-      "      B-PER      0.773     0.766     0.769      1617\n",
-      "      I-PER      0.819     0.886     0.851      1156\n",
+      "           O      0.973     0.981     0.977     38323\n",
+      "       B-LOC      0.694     0.765     0.728      1668\n",
+      "       I-LOC      0.738     0.482     0.584       257\n",
+      "      B-MISC      0.648     0.309     0.419       702\n",
+      "      I-MISC      0.626     0.505     0.559       216\n",
+      "       B-ORG      0.670     0.561     0.611      1661\n",
+      "       I-ORG      0.551     0.704     0.618       835\n",
+      "       B-PER      0.773     0.766     0.769      1617\n",
+      "       I-PER      0.819     0.886     0.851      1156\n",
       "\n",
-      "avg / total      0.926     0.928     0.926     46435\n",
+      "    accuracy                          0.928     46435\n",
+      "   macro avg      0.721     0.662     0.679     46435\n",
+      "weighted avg      0.926     0.928     0.926     46435\n",
       "\n",
       "\n",
       "\n",
@@ -338,10 +342,18 @@
     }
    ],
    "source": [
-    "\n",
     "def main():\n",
-    "    train_path = 'conlldata/train.txt'\n",
-    "    test_path = 'conlldata/test.txt'\n",
+    "    \n",
+    "    try:\n",
+    "        from google.colab import files\n",
+    "        uploaded = files.upload()\n",
+    "        # files are present in Data/conlldata\n",
+    "        train_path = 'train.txt'\n",
+    "        test_path = 'test.txt'\n",
+    "    except:\n",
+    "        train_path = 'Data/conlldata/train.txt'\n",
+    "        test_path = 'Data/conlldata/test.txt'\n",
+    "        \n",
     "    conll_train = load__data_conll(train_path)\n",
     "    conll_dev = load__data_conll(test_path)\n",
     "    \n",
@@ -352,7 +364,7 @@
     "    print(\"Done with sequence model\")\n",
     "\n",
     "if __name__==\"__main__\":\n",
-    "    main()\n"
+    "    main()"
    ]
   },
   {
@@ -364,17 +376,6 @@
    "source": [
     "This is pretty good. We already have a model which has an F-score of 92%!!!"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "colab": {},
-    "colab_type": "code",
-    "id": "reO-wnZocnB2"
-   },
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -383,21 +384,21 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.17"
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,