Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,811 changes: 1,914 additions & 1,897 deletions Ch10/01_BioBERT_Demo.ipynb

Large diffs are not rendered by default.

564 changes: 126 additions & 438 deletions Ch10/02_LexNLP.ipynb

Large diffs are not rendered by default.

3,031 changes: 1,545 additions & 1,486 deletions Ch10/03_FinBERT.ipynb

Large diffs are not rendered by default.

648 changes: 318 additions & 330 deletions Ch11/01_AutoML_Demo.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions Ch4/05_DeepNN_Example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
"except ModuleNotFoundError:\n",
" \n",
" if not os.path.exists(os.getcwd()+'\\\\Data\\\\glove.6B'):\n",
" os.makdir(os.getcwd()+'\\\\Data\\\\glove.6B')\n",
" os.mkdir(os.getcwd()+'\\\\Data\\\\glove.6B')\n",
" \n",
" url='http://nlp.stanford.edu/data/glove.6B.zip' \n",
" path=os.getcwd()+'\\Data' \n",
Expand Down Expand Up @@ -549,7 +549,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
"version": "3.7.4"
}
},
"nbformat": 4,
Expand Down
396 changes: 291 additions & 105 deletions Ch5/01_KPE.ipynb

Large diffs are not rendered by default.

91 changes: 46 additions & 45 deletions Ch5/02_NERTraining.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 1,
"metadata": {
"colab": {},
"colab_type": "code",
Expand All @@ -37,7 +37,9 @@
"from pprint import pprint\n",
"from sklearn.metrics import f1_score,classification_report\n",
"from sklearn.pipeline import Pipeline\n",
"import string\n"
"import string\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
Expand All @@ -52,7 +54,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 2,
"metadata": {
"colab": {},
"colab_type": "code",
Expand All @@ -79,12 +81,12 @@
" words.append(word)\n",
" tags.append(tag)\n",
" fh.close()\n",
" return myoutput\n"
" return myoutput"
]
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 3,
"metadata": {
"colab": {},
"colab_type": "code",
Expand Down Expand Up @@ -150,7 +152,7 @@
" wordfeats[\"nextNextTag\"] = sen_tags[i + 2][1]\n",
" #That is it! You can add whatever you want!\n",
" feats.append(wordfeats)\n",
" return feats\n"
" return feats"
]
},
{
Expand All @@ -165,7 +167,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 4,
"metadata": {
"colab": {},
"colab_type": "code",
Expand Down Expand Up @@ -195,7 +197,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 5,
"metadata": {
"colab": {},
"colab_type": "code",
Expand All @@ -217,7 +219,7 @@
" print(metrics.flat_f1_score(Y_dev, y_pred,average='weighted', labels=labels))\n",
" print(metrics.flat_classification_report(Y_dev, y_pred, labels=sorted_labels, digits=3))\n",
" #print(metrics.sequence_accuracy_score(Y_dev, y_pred))\n",
" get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)\n"
" get_confusion_matrix(Y_dev, y_pred,labels=sorted_labels)"
]
},
{
Expand All @@ -232,7 +234,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 6,
"metadata": {
"colab": {},
"colab_type": "code",
Expand All @@ -258,12 +260,12 @@
" cell = \"%{0}.0f\".format(columnwidth) % cm[i, j]\n",
" sum = sum + int(cell)\n",
" print(cell, end=\" \")\n",
" print(sum) #Prints the total number of instances per cat at the end.\n"
" print(sum) #Prints the total number of instances per cat at the end."
]
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 7,
"metadata": {
"colab": {},
"colab_type": "code",
Expand All @@ -278,7 +280,7 @@
" for yseq_true, yseq_pred in zip(y_true, y_pred):\n",
" trues.extend(yseq_true)\n",
" preds.extend(yseq_pred)\n",
" print_cm(confusion_matrix(trues,preds,labels),labels)\n"
" print_cm(confusion_matrix(trues,preds,labels),labels)"
]
},
{
Expand All @@ -293,7 +295,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": 8,
"metadata": {
"colab": {},
"colab_type": "code",
Expand All @@ -307,19 +309,21 @@
"text": [
"Training a Sequence classification model with CRF\n",
"0.9255103670420659\n",
" precision recall f1-score support\n",
" precision recall f1-score support\n",
"\n",
" O 0.973 0.981 0.977 38323\n",
" B-LOC 0.694 0.765 0.728 1668\n",
" I-LOC 0.738 0.482 0.584 257\n",
" B-MISC 0.648 0.309 0.419 702\n",
" I-MISC 0.626 0.505 0.559 216\n",
" B-ORG 0.670 0.561 0.611 1661\n",
" I-ORG 0.551 0.704 0.618 835\n",
" B-PER 0.773 0.766 0.769 1617\n",
" I-PER 0.819 0.886 0.851 1156\n",
" O 0.973 0.981 0.977 38323\n",
" B-LOC 0.694 0.765 0.728 1668\n",
" I-LOC 0.738 0.482 0.584 257\n",
" B-MISC 0.648 0.309 0.419 702\n",
" I-MISC 0.626 0.505 0.559 216\n",
" B-ORG 0.670 0.561 0.611 1661\n",
" I-ORG 0.551 0.704 0.618 835\n",
" B-PER 0.773 0.766 0.769 1617\n",
" I-PER 0.819 0.886 0.851 1156\n",
"\n",
"avg / total 0.926 0.928 0.926 46435\n",
" accuracy 0.928 46435\n",
" macro avg 0.721 0.662 0.679 46435\n",
"weighted avg 0.926 0.928 0.926 46435\n",
"\n",
"\n",
"\n",
Expand All @@ -338,10 +342,18 @@
}
],
"source": [
"\n",
"def main():\n",
" train_path = 'conlldata/train.txt'\n",
" test_path = 'conlldata/test.txt'\n",
" \n",
" try:\n",
" from google.colab import files\n",
" uploaded = files.upload()\n",
" # files are present in Data/conlldata\n",
" train_path = 'train.txt'\n",
" test_path = 'test.txt'\n",
" except:\n",
" train_path = 'Data/conlldata/train.txt'\n",
" test_path = 'Data/conlldata/test.txt'\n",
" \n",
" conll_train = load__data_conll(train_path)\n",
" conll_dev = load__data_conll(test_path)\n",
" \n",
Expand All @@ -352,7 +364,7 @@
" print(\"Done with sequence model\")\n",
"\n",
"if __name__==\"__main__\":\n",
" main()\n"
" main()"
]
},
{
Expand All @@ -364,17 +376,6 @@
"source": [
"This is pretty good. We already have a model which has an F-score of 92%!!!"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "reO-wnZocnB2"
},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -383,21 +384,21 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 2",
"display_name": "Python 3",
"language": "python",
"name": "python2"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.17"
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
Expand Down
Loading