diff --git a/Ch7/01_ElasticSearch.ipynb b/Ch7/01_ElasticSearch.ipynb index 7f68040..52c048b 100644 --- a/Ch7/01_ElasticSearch.ipynb +++ b/Ch7/01_ElasticSearch.ipynb @@ -1,273 +1,284 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.6" - }, - "colab": { - "name": "ElasticSearch.ipynb", - "provenance": [] - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "GVXrddQRLrQU" + }, + "source": [ + "This notebook shows how to use Elastic Search to index and search through data. We will use a dataset called CMU Book summaries [dataset](http://www.cs.cmu.edu/~dbamman/booksummaries.html). Alternateively, the dataset's link can be found in the `BookSummaries_Link.md` file under the Data folder in Ch7. \n", + "\n", + "For this code to work, elastic search instance has to be running in the background. \n", + "For this you need to follow these steps :\n", + "\n", + "Linux :\n", + "\n", + " 1. Go to the elasticsearch-X.Y.Z/bin folder on your machine\n", + " 2. Run ./elasticsearch. \n", + " \n", + "Windows :\n", + "\n", + " 1. Download the latest [release](https://www.elastic.co/guide/en/elasticsearch/reference/current/windows.html)\n", + " 2. Run .\\bin\\elasticsearch.bat\n", + " \n", + "[ElasticSearch Documentation](https://www.elastic.co/guide/index.html)\n", + " \n", + "You should now be able to access this instance on localhost:9200\n", + "\n" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "GVXrddQRLrQU", - "colab_type": "text" - }, - "source": [ - "This notebook shows how to use Elastic Search to index and search through data. We will use a dataset called CMU Book summaries [dataset](http://www.cs.cmu.edu/~dbamman/booksummaries.html). Alternateively, the dataset's link can be found in the `BookSummaries_Link.md` file under the Data folder in Ch7. \n", - "\n", - "For this code to work, elastic search instance has to be running in the background. For this you need to follow these steps:\n", - "1. Go to the elasticsearch-X.Y.Z/bin folder on your machine\n", - "2. run ./elasticsearch. \n", - "You should now be able to access this instance on localhost:9200\n", - "Note: This notebook might not work on windows.\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZbO4arQuLrQe", - "colab_type": "code", - "colab": {} - }, - "source": [ - "from elasticsearch import Elasticsearch \n", - "from datetime import datetime" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "G7gy5xXpLrQq", - "colab_type": "code", - "colab": {} - }, - "source": [ - "#elastic search instance has to be running on the machine. Default port is 9200. \n", - "\n", - "#Call the Elastic Search instance, and delete any pre-existing index\n", - "es=Elasticsearch([{'host':'localhost','port':9200}])\n", - "if es.indices.exists(index=\"myindex\"):\n", - " es.indices.delete(index='myindex', ignore=[400, 404]) #Deleting existing index for now " - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "riA5ep9yLrQ1", - "colab_type": "code", - "colab": {}, - "outputId": "78d1be5e-3c28-4342-8d36-754146b357ae" - }, - "source": [ - "#Build an index from booksummaries dataset. I am using only 500 documents for now.\n", - "path = \"booksummaries.txt\" #Add your path.\n", - "count = 1\n", - "for line in open(path):\n", - " fields = line.split(\"\\t\")\n", - " doc = {'id' : fields[0],\n", - " 'title': fields[2],\n", - " 'author': fields[3],\n", - " 'summary': fields[6]\n", - " }\n", - "\n", - " res = es.index(index=\"myindex\", id=fields[0], body=doc)\n", - " count = count+1\n", - " if count%100 == 0:\n", - " print(\"indexed 100 documents\")\n", - " if count == 501:\n", - " break" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "indexed 100 documents\n", - "indexed 100 documents\n", - "indexed 100 documents\n", - "indexed 100 documents\n", - "indexed 100 documents\n" - ], - "name": "stdout" - } - ] - }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ZbO4arQuLrQe", + "scrolled": true + }, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch \n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "G7gy5xXpLrQq" + }, + "outputs": [], + "source": [ + "#elastic search instance has to be running on the machine. Default port is 9200. \n", + "\n", + "#Call the Elastic Search instance, and delete any pre-existing index\n", + "es=Elasticsearch([{'host':'localhost','port':9200}])\n", + "if es.indices.exists(index=\"myindex\"):\n", + " es.indices.delete(index='myindex', ignore=[400, 404]) #Deleting existing index for now " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "riA5ep9yLrQ1", + "outputId": "78d1be5e-3c28-4342-8d36-754146b357ae" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "RGZjVbWlLrRB", - "colab_type": "code", - "colab": {}, - "outputId": "0867238c-15aa-4a01-bbe1-0fce2abc628a" - }, - "source": [ - "#Check to see how big is the index\n", - "res = es.search(index=\"myindex\", body={\"query\": {\"match_all\": {}}})\n", - "print(\"Your index has %d entries\" % res['hits']['total']['value'])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Your index has 500 entries\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "indexed 100 documents\n", + "indexed 100 documents\n", + "indexed 100 documents\n", + "indexed 100 documents\n", + "indexed 100 documents\n" + ] + } + ], + "source": [ + "#Build an index from booksummaries dataset. I am using only 500 documents for now.\n", + "path = \"booksummaries.txt\" #Add your path.\n", + "count = 1\n", + "for line in open(path):\n", + " fields = line.split(\"\\t\")\n", + " doc = {'id' : fields[0],\n", + " 'title': fields[2],\n", + " 'author': fields[3],\n", + " 'summary': fields[6]\n", + " }\n", + "\n", + " res = es.index(index=\"myindex\", id=fields[0], body=doc)\n", + " count = count+1\n", + " if count%100 == 0:\n", + " print(\"indexed 100 documents\")\n", + " if count == 501:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "RGZjVbWlLrRB", + "outputId": "0867238c-15aa-4a01-bbe1-0fce2abc628a" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "Ajt1YhMbLrRI", - "colab_type": "code", - "colab": {}, - "outputId": "679721a1-a4a6-45e3-8775-33247e5a1a39" - }, - "source": [ - "#Try a test query. The query searches \"summary\" field which contains the text\n", - "#and does a full text query on that field.\n", - "res = es.search(index=\"myindex\", body={\"query\": {\"match\": {\"summary\": \"animal\"}}})\n", - "print(\"Your search returned %d results.\" % res['hits']['total']['value'])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Your search returned 16 results.\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Your index has 500 entries\n" + ] + } + ], + "source": [ + "#Check to see how big is the index\n", + "res = es.search(index=\"myindex\", body={\"query\": {\"match_all\": {}}})\n", + "print(\"Your index has %d entries\" % res['hits']['total']['value'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Ajt1YhMbLrRI", + "outputId": "679721a1-a4a6-45e3-8775-33247e5a1a39" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "tZcEXsQYLrRP", - "colab_type": "code", - "colab": {}, - "outputId": "794c9038-9bbe-4dc4-dd73-9283ec42c2f6" - }, - "source": [ - "#Printing the title field and summary field's first 100 characters for 2nd result\n", - "print(res[\"hits\"][\"hits\"][2][\"_source\"][\"title\"])\n", - "print(res[\"hits\"][\"hits\"][2][\"_source\"][\"summary\"][:100])\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Dead Air\n", - " The first person narrative begins on 11 September 2001, and Banks uses the protagonist's conversati\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Your search returned 16 results.\n" + ] + } + ], + "source": [ + "#Try a test query. The query searches \"summary\" field which contains the text\n", + "#and does a full text query on that field.\n", + "res = es.search(index=\"myindex\", body={\"query\": {\"match\": {\"summary\": \"animal\"}}})\n", + "print(\"Your search returned %d results.\" % res['hits']['total']['value'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tZcEXsQYLrRP", + "outputId": "794c9038-9bbe-4dc4-dd73-9283ec42c2f6" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "nkYJBQaHLrRW", - "colab_type": "code", - "colab": {}, - "outputId": "4ecd428d-fc38-4347-9521-df3f400e6ea4" - }, - "source": [ - "#match query considers both exact matches, and fuzzy matches and works as a OR query. \n", - "#match_phrase looks for exact matches.\n", - "while True:\n", - " query = input(\"Enter your search query: \")\n", - " if query == \"STOP\":\n", - " break\n", - " res = es.search(index=\"myindex\", body={\"query\": {\"match_phrase\": {\"summary\": query}}})\n", - " print(\"Your search returned %d results:\" % res['hits']['total']['value'])\n", - " for hit in res[\"hits\"][\"hits\"]:\n", - " print(hit[\"_source\"][\"title\"])\n", - " #to get a snippet 100 characters before and after the match\n", - " loc = hit[\"_source\"][\"summary\"].lower().index(query)\n", - " print(hit[\"_source\"][\"summary\"][:100])\n", - " print(hit[\"_source\"][\"summary\"][loc-100:loc+100])\n", - "\n", - " " - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Enter your search query: countess\n", - "Your search returned 7 results:\n", - "All's Well That Ends Well\n", - "71\n", - " Helena, the orphan daughter of a famous physician, is the ward of the Countess of Rousillon, and ho\n", - "\n", - "The Last Man\n", - "904\n", - " Mary Shelley states in the introduction that in 1818 she discovered, in the Sibyl's cave near Naple\n", - "ng leaves the throne, the monarchy come to an end and a republic is created. When the king dies the Countess attempts to raise their son, Adrian, to reclaim the throne, but Adrian opposes his mother a\n", - "The Luck of Barry Lyndon\n", - "2624\n", - " Redmond Barry of Bally Barry, born to a genteel but ruined Irish family, fancies himself a gentlema\n", - "chy, where they win considerable sums of money and Redmond cleverly sets up a plan to marry a young countess of some means. Again, fortune turns against him, and a series of circumstances undermines h\n", - "Carmilla\n", - "2554\n", - " The story is presented by Le Fanu as part of the casebook of Dr Hesselius, whose departures from me\n", - "ily heirloom restored portraits arrives at the castle, Laura finds one of her ancestors, \"Mircalla, Countess Karnstein\", dated 1698. The portrait resembles Carmilla exactly, down to the mole on her ne\n", - "Anna Karenina\n", - "1330\n", - " The novel is divided into eight parts. Its epigraph is Vengeance is mine, I will repay, from Romans\n", - " at the railway station to meet Anna, Stiva bumps into Vronsky; he is there to meet his mother, the Countess Vronskaya. Anna and Vronskaya have traveled and talked together in the same carriage. As th\n", - "Murder on the Orient Express\n", - "5569\n", - " Hercule Poirot boards the Orient Express in Constantinople. The train is unusually crowded for the \n", - "us tragic actress of the New York stage, and was Sonia Armstrong's mother and Daisy's grandmother; *Countess Andrenyi (née Helena Goldenberg) was Sonia Armstrong's sister; *Count Andryeni was the husb\n", - "War and Peace\n", - "2612\n", - " War and Peace has a large cast of characters, the majority of whom are introduced in the first book\n", - "is impetuous and eager to join the army when of age. The heads of the family, Count Ilya Rostov and Countess Natalya Rostova, are an affectionate couple but forever worried about their disordered fina\n", - "Enter your search query: STOP\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Dead Air\n", + " The first person narrative begins on 11 September 2001, and Banks uses the protagonist's conversati\n" + ] + } + ], + "source": [ + "#Printing the title field and summary field's first 100 characters for 2nd result\n", + "print(res[\"hits\"][\"hits\"][2][\"_source\"][\"title\"])\n", + "print(res[\"hits\"][\"hits\"][2][\"_source\"][\"summary\"][:100])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "nkYJBQaHLrRW", + "outputId": "4ecd428d-fc38-4347-9521-df3f400e6ea4" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "XU7OUNXLLrRd", - "colab_type": "code", - "colab": {} - }, - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your search query: countess\n", + "Your search returned 7 results:\n", + "All's Well That Ends Well\n", + "71\n", + " Helena, the orphan daughter of a famous physician, is the ward of the Countess of Rousillon, and ho\n", + "\n", + "The Last Man\n", + "904\n", + " Mary Shelley states in the introduction that in 1818 she discovered, in the Sibyl's cave near Naple\n", + "ng leaves the throne, the monarchy come to an end and a republic is created. When the king dies the Countess attempts to raise their son, Adrian, to reclaim the throne, but Adrian opposes his mother a\n", + "The Luck of Barry Lyndon\n", + "2624\n", + " Redmond Barry of Bally Barry, born to a genteel but ruined Irish family, fancies himself a gentlema\n", + "chy, where they win considerable sums of money and Redmond cleverly sets up a plan to marry a young countess of some means. Again, fortune turns against him, and a series of circumstances undermines h\n", + "Carmilla\n", + "2554\n", + " The story is presented by Le Fanu as part of the casebook of Dr Hesselius, whose departures from me\n", + "ily heirloom restored portraits arrives at the castle, Laura finds one of her ancestors, \"Mircalla, Countess Karnstein\", dated 1698. The portrait resembles Carmilla exactly, down to the mole on her ne\n", + "Anna Karenina\n", + "1330\n", + " The novel is divided into eight parts. Its epigraph is Vengeance is mine, I will repay, from Romans\n", + " at the railway station to meet Anna, Stiva bumps into Vronsky; he is there to meet his mother, the Countess Vronskaya. Anna and Vronskaya have traveled and talked together in the same carriage. As th\n", + "Murder on the Orient Express\n", + "5569\n", + " Hercule Poirot boards the Orient Express in Constantinople. The train is unusually crowded for the \n", + "us tragic actress of the New York stage, and was Sonia Armstrong's mother and Daisy's grandmother; *Countess Andrenyi (née Helena Goldenberg) was Sonia Armstrong's sister; *Count Andryeni was the husb\n", + "War and Peace\n", + "2612\n", + " War and Peace has a large cast of characters, the majority of whom are introduced in the first book\n", + "is impetuous and eager to join the army when of age. The heads of the family, Count Ilya Rostov and Countess Natalya Rostova, are an affectionate couple but forever worried about their disordered fina\n", + "Enter your search query: STOP\n" + ] } - ] + ], + "source": [ + "#match query considers both exact matches, and fuzzy matches and works as a OR query. \n", + "#match_phrase looks for exact matches.\n", + "while True:\n", + " query = input(\"Enter your search query: \")\n", + " if query == \"STOP\":\n", + " break\n", + " res = es.search(index=\"myindex\", body={\"query\": {\"match_phrase\": {\"summary\": query}}})\n", + " print(\"Your search returned %d results:\" % res['hits']['total']['value'])\n", + " for hit in res[\"hits\"][\"hits\"]:\n", + " print(hit[\"_source\"][\"title\"])\n", + " #to get a snippet 100 characters before and after the match\n", + " loc = hit[\"_source\"][\"summary\"].lower().index(query)\n", + " print(hit[\"_source\"][\"summary\"][:100])\n", + " print(hit[\"_source\"][\"summary\"][loc-100:loc+100])\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "XU7OUNXLLrRd" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "name": "ElasticSearch.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/Ch7/02_TopicModelling.ipynb b/Ch7/02_TopicModelling.ipynb index f9f1454..74f2829 100644 --- a/Ch7/02_TopicModelling.ipynb +++ b/Ch7/02_TopicModelling.ipynb @@ -10,41 +10,35 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: nltk in /home/etherealenvy/.local/lib/python3.6/site-packages (3.5)\n", - "Requirement already satisfied: tqdm in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (4.46.0)\n", - "Requirement already satisfied: click in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (7.1.2)\n", - "Requirement already satisfied: joblib in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (0.14.1)\n", - "Requirement already satisfied: regex in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (2020.4.4)\n", - "Requirement already satisfied: gensim in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (3.8.3)\n", - "Requirement already satisfied: smart-open>=1.8.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (2.0.0)\n", - "Requirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.14.0)\n", - "Requirement already satisfied: numpy>=1.11.3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.18.4)\n", - "Requirement already satisfied: scipy>=0.18.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.4.1)\n", - "Requirement already satisfied: requests in /home/etherealenvy/.local/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n", - "Requirement already satisfied: boto in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.49.0)\n", - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (1.13.4)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2020.4.5.1)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.9.5)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.3.3)\n", - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (1.16.4)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (0.15.2)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (2.8.1)\n" + "Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n", + "Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n", + "Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n", + "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (5.0.0)\n", + "Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" ] } ], "source": [ - "!pip install nltk\n", - "!pip install gensim" + "# Import OS \n", + "import os\n", + "# For NLTK virtual environments are high recommended and it requires python verisions higher than 3.5\n", + "!pip install gensim\n", + "!pip install nltk" ] }, { @@ -302,21 +296,32 @@ " tokens = word_tokenize(textstring)\n", " return [token.lower() for token in tokens if token.isalpha() and token not in stops]\n", "\n", - "data_path = \"/home/etherealenvy/Downloads/booksummaries/booksummaries.txt\"\n", + "# This is a sample path of your downloaded data set. This is currently set to a windows based path . \n", + "# Please update it to your actual download path regradless of your choice of operating system \n", + "\n", + "data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),data)\n", + "\n", "summaries = []\n", "for line in open(data_path, encoding=\"utf-8\"):\n", " temp = line.split(\"\\t\")\n", " summaries.append(preprocess(temp[6]))\n", "\n", "# Create a dictionary representation of the documents.\n", + "\n", "dictionary = Dictionary(summaries)\n", + "\n", "# Filter infrequent or too frequent words.\n", + "\n", "dictionary.filter_extremes(no_below=10, no_above=0.5)\n", "corpus = [dictionary.doc2bow(summary) for summary in summaries]\n", + "\n", "# Make a index to word dictionary.\n", + "\n", "temp = dictionary[0] # This is only to \"load\" the dictionary.\n", "id2word = dictionary.id2token\n", + "\n", "#Train the topic model\n", + "\n", "model = LdaModel(corpus=corpus, id2word=id2word,iterations=400, num_topics=10)\n", "top_topics = list(model.top_topics(corpus))\n", "pprint(top_topics)\n" @@ -411,7 +416,9 @@ { "cell_type": "code", "execution_count": 19, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", @@ -437,27 +444,6 @@ " \n", "print(\"=\" * 20)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -476,7 +462,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.10" + "version": "3.7.9" } }, "nbformat": 4, diff --git a/Ch7/03_TextSummarization.ipynb b/Ch7/03_TextSummarization.ipynb index f4dc4f6..b710e53 100644 --- a/Ch7/03_TextSummarization.ipynb +++ b/Ch7/03_TextSummarization.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": 1, "metadata": { "colab": {}, "colab_type": "code", @@ -53,31 +53,64 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: sumy in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (0.8.1)\n", - "Requirement already satisfied: breadability>=0.1.20 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from sumy) (0.1.20)\n", - "Requirement already satisfied: pycountry>=18.2.23 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from sumy) (19.8.18)\n", - "Requirement already satisfied: requests>=2.7.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from sumy) (2.23.0)\n", - "Requirement already satisfied: docopt<0.7,>=0.6.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from sumy) (0.6.2)\n", - "Requirement already satisfied: nltk>=3.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from sumy) (3.5)\n", - "Requirement already satisfied: lxml>=2.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from breadability>=0.1.20->sumy) (4.5.0)\n", - "Requirement already satisfied: chardet in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from breadability>=0.1.20->sumy) (3.0.4)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests>=2.7.0->sumy) (1.25.9)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests>=2.7.0->sumy) (2.9)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests>=2.7.0->sumy) (2018.8.24)\n", - "Requirement already satisfied: tqdm in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from nltk>=3.0.2->sumy) (4.46.0)\n", - "Requirement already satisfied: click in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from nltk>=3.0.2->sumy) (7.1.2)\n", - "Requirement already satisfied: regex in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from nltk>=3.0.2->sumy) (2020.4.4)\n", - "Requirement already satisfied: joblib in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from nltk>=3.0.2->sumy) (0.14.1)\n" + "Collecting sumy\n", + " Using cached sumy-0.8.1-py2.py3-none-any.whl (83 kB)\n", + "Requirement already satisfied: requests>=2.7.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sumy) (2.25.1)\n", + "Requirement already satisfied: nltk>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sumy) (3.6.2)\n", + "Collecting docopt<0.7,>=0.6.1\n", + " Using cached docopt-0.6.2.tar.gz (25 kB)\n", + "Collecting pycountry>=18.2.23\n", + " Downloading pycountry-20.7.3.tar.gz (10.1 MB)\n", + "Collecting breadability>=0.1.20\n", + " Using cached breadability-0.1.20.tar.gz (32 kB)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests>=2.7.0->sumy) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests>=2.7.0->sumy) (2020.12.5)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests>=2.7.0->sumy) (4.0.0)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests>=2.7.0->sumy) (1.25.9)\n", + "Requirement already satisfied: regex in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk>=3.0.2->sumy) (2021.4.4)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Requirement already satisfied: joblib in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk>=3.0.2->sumy) (1.0.1)\n", + "Requirement already satisfied: tqdm in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk>=3.0.2->sumy) (4.46.1)\n", + "Requirement already satisfied: click in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk>=3.0.2->sumy) (7.1.2)\n", + "Collecting lxml>=2.0\n", + " Downloading lxml-4.6.3-cp37-cp37m-win_amd64.whl (3.5 MB)\n", + "Using legacy setup.py install for docopt, since package 'wheel' is not installed.\n", + "Using legacy setup.py install for pycountry, since package 'wheel' is not installed.\n", + "Using legacy setup.py install for breadability, since package 'wheel' is not installed.\n", + "Installing collected packages: docopt, pycountry, lxml, breadability, sumy\n", + " Running setup.py install for docopt: started\n", + " Running setup.py install for docopt: finished with status 'done'\n", + " Running setup.py install for pycountry: started\n", + " Running setup.py install for pycountry: finished with status 'done'\n", + " Running setup.py install for breadability: started\n", + " Running setup.py install for breadability: finished with status 'done'\n", + "Successfully installed breadability-0.1.20 docopt-0.6.2 lxml-4.6.3 pycountry-20.7.3 sumy-0.8.1\n" ] } ], "source": [ - "!pip install sumy #install sumy" + "# Install sumy\n", + "\n", + "!pip install sumy" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -109,12 +142,12 @@ ], "source": [ "import nltk\n", - "# nltk.download('punkt')" + "# For NLTK virtual environments are high recommended and it requires python verisions higher than 3.5 on windows" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -182,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": 2, "metadata": { "colab": {}, "colab_type": "code", @@ -194,39 +227,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "Collecting gensim\n", - " Downloading gensim-3.8.3-cp36-cp36m-manylinux1_x86_64.whl (24.2 MB)\n", - "\u001b[K |████████████████████████████████| 24.2 MB 117 kB/s eta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: scipy>=0.18.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.4.1)\n", - "Requirement already satisfied: numpy>=1.11.3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.18.4)\n", - "Collecting smart-open>=1.8.1\n", - " Using cached smart_open-2.0.0.tar.gz (103 kB)\n", - "Requirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.14.0)\n", - "Requirement already satisfied: requests in /home/etherealenvy/.local/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n", - "Collecting boto\n", - " Using cached boto-2.49.0-py2.py3-none-any.whl (1.4 MB)\n", - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (1.13.4)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2020.4.5.1)\n", - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (1.16.4)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.9.5)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.3.3)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (0.15.2)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (2.8.1)\n", - "Building wheels for collected packages: smart-open\n", - " Building wheel for smart-open (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for smart-open: filename=smart_open-2.0.0-py3-none-any.whl size=101341 sha256=ea090b3b65b0b537ff5b71ea0f1f683dc02f3a05bb489f90eb65da4f3c619a9a\n", - " Stored in directory: /home/etherealenvy/.cache/pip/wheels/16/64/85/f3205b74e01a98fb81e081c0d61c2ecd04e4645a986db3726e\n", - "Successfully built smart-open\n", - "Installing collected packages: boto, smart-open, gensim\n", - "Successfully installed boto-2.49.0 gensim-3.8.3 smart-open-2.0.0\n" + "Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n", + "Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n", + "Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n", + "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (5.0.0)\n", + "Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" ] } ], "source": [ - "!pip install gensim #installation of the library" + "!pip install gensim" ] }, { @@ -241,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -311,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": 3, "metadata": { "colab": {}, "colab_type": "code", @@ -323,9 +341,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: summa in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (1.2.0)\r\n", - "Requirement already satisfied: scipy>=0.19 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from summa) (1.4.1)\r\n", - "Requirement already satisfied: numpy>=1.13.3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from scipy>=0.19->summa) (1.18.4)\r\n" + "Collecting summa\n", + " Using cached summa-1.2.0.tar.gz (54 kB)\n", + "Requirement already satisfied: scipy>=0.19 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from summa) (1.6.3)\n", + "Requirement already satisfied: numpy<1.23.0,>=1.16.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from scipy>=0.19->summa) (1.20.2)\n", + "Using legacy setup.py install for summa, since package 'wheel' is not installed.\n", + "Installing collected packages: summa\n", + " Running setup.py install for summa: started\n", + " Running setup.py install for summa: finished with status 'done'\n", + "Successfully installed summa-1.2.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" ] } ], @@ -335,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -375,177 +407,315 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": 4, "metadata": { "colab": {}, "colab_type": "code", - "id": "jN3afk2nB0Ut", - "outputId": "232f50c2-7a72-4ff9-88c8-63626564177c" + "id": "iEGOmpwjB0VE", + "outputId": "a13ba70e-859f-467a-d2c7-48ab3db91c56" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Python 3.6.10 :: Anaconda, Inc.\r\n" + "Collecting bert-extractive-summarizer\n", + " Downloading bert_extractive_summarizer-0.7.1-py3-none-any.whl (18 kB)\n", + "Collecting spacy\n", + " Downloading spacy-3.0.6-cp37-cp37m-win_amd64.whl (11.7 MB)\n", + "Requirement already satisfied: transformers in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from bert-extractive-summarizer) (2.11.0)\n", + "Collecting scikit-learn\n", + " Downloading scikit_learn-0.24.2-cp37-cp37m-win_amd64.whl (6.8 MB)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (4.46.1)\n", + "Collecting typer<0.4.0,>=0.3.0\n", + " Using cached typer-0.3.2-py3-none-any.whl (21 kB)\n", + "Collecting pydantic<1.8.0,>=1.7.1\n", + " Downloading pydantic-1.7.4-cp37-cp37m-win_amd64.whl (1.7 MB)\n", + "Requirement already satisfied: numpy>=1.15.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (1.20.2)\n", + "Collecting catalogue<2.1.0,>=2.0.3\n", + " Using cached catalogue-2.0.4-py3-none-any.whl (16 kB)\n", + "Collecting cymem<2.1.0,>=2.0.2\n", + " Downloading cymem-2.0.5-cp37-cp37m-win_amd64.whl (35 kB)\n", + "Collecting thinc<8.1.0,>=8.0.3\n", + " Downloading thinc-8.0.3-cp37-cp37m-win_amd64.whl (1.0 MB)\n", + "Requirement already satisfied: typing-extensions<4.0.0.0,>=3.7.4; python_version < \"3.8\" in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (3.10.0.0)\n", + "Collecting spacy-legacy<3.1.0,>=3.0.4\n", + " Using cached spacy_legacy-3.0.5-py2.py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: jinja2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (2.11.3)\n", + "Collecting preshed<3.1.0,>=3.0.2\n", + " Downloading preshed-3.0.5-cp37-cp37m-win_amd64.whl (108 kB)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (2.25.1)\n", + "Requirement already satisfied: setuptools in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (47.1.0)\n", + "Collecting murmurhash<1.1.0,>=0.28.0\n", + " Downloading murmurhash-1.0.5-cp37-cp37m-win_amd64.whl (20 kB)\n", + "Requirement already satisfied: packaging>=20.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy->bert-extractive-summarizer) (20.9)\n", + "Collecting blis<0.8.0,>=0.4.0\n", + " Downloading blis-0.7.4-cp37-cp37m-win_amd64.whl (6.5 MB)\n", + "Collecting wasabi<1.1.0,>=0.8.1\n", + " Using cached wasabi-0.8.2-py3-none-any.whl (23 kB)\n", + "Collecting srsly<3.0.0,>=2.4.1\n", + " Downloading srsly-2.4.1-cp37-cp37m-win_amd64.whl (450 kB)\n", + "Collecting pathy>=0.3.5\n", + " Using cached pathy-0.5.2-py3-none-any.whl (42 kB)\n", + "Requirement already satisfied: filelock in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers->bert-extractive-summarizer) (3.0.12)\n", + "Requirement already satisfied: regex!=2019.12.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers->bert-extractive-summarizer) (2021.4.4)\n", + "Requirement already satisfied: sentencepiece in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers->bert-extractive-summarizer) (0.1.95)\n", + "Requirement already satisfied: tokenizers==0.7.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers->bert-extractive-summarizer) (0.7.0)\n", + "Requirement already satisfied: sacremoses in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers->bert-extractive-summarizer) (0.0.45)\n", + "Requirement already satisfied: joblib>=0.11 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from scikit-learn->bert-extractive-summarizer) (1.0.1)\n", + "Requirement already satisfied: scipy>=0.19.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from scikit-learn->bert-extractive-summarizer) (1.6.3)\n", + "Collecting threadpoolctl>=2.0.0\n", + " Using cached threadpoolctl-2.1.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: click<7.2.0,>=7.1.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from typer<0.4.0,>=0.3.0->spacy->bert-extractive-summarizer) (7.1.2)\n", + "Requirement already satisfied: zipp>=0.5; python_version < \"3.8\" in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from catalogue<2.1.0,>=2.0.3->spacy->bert-extractive-summarizer) (3.1.0)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from jinja2->spacy->bert-extractive-summarizer) (1.1.1)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy->bert-extractive-summarizer) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy->bert-extractive-summarizer) (2020.12.5)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy->bert-extractive-summarizer) (1.25.9)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy->bert-extractive-summarizer) (4.0.0)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from packaging>=20.0->spacy->bert-extractive-summarizer) (2.4.7)\n", + "Collecting smart-open<4.0.0,>=2.2.0\n", + " Using cached smart_open-3.0.0.tar.gz (113 kB)\n", + "Requirement already satisfied: six in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers->bert-extractive-summarizer) (1.15.0)\n", + "Using legacy setup.py install for smart-open, since package 'wheel' is not installed.\n", + "Installing collected packages: typer, pydantic, catalogue, cymem, murmurhash, preshed, srsly, blis, wasabi, thinc, spacy-legacy, smart-open, pathy, spacy, threadpoolctl, scikit-learn, bert-extractive-summarizer\n", + " Attempting uninstall: wasabi\n", + " Found existing installation: wasabi 0.7.0\n", + " Uninstalling wasabi-0.7.0:\n", + " Successfully uninstalled wasabi-0.7.0\n", + " Attempting uninstall: smart-open\n", + " Found existing installation: smart-open 5.0.0\n", + " Uninstalling smart-open-5.0.0:\n", + " Successfully uninstalled smart-open-5.0.0\n", + " Running setup.py install for smart-open: started\n", + " Running setup.py install for smart-open: finished with status 'done'\n", + "Successfully installed bert-extractive-summarizer-0.7.1 blis-0.7.4 catalogue-2.0.4 cymem-2.0.5 murmurhash-1.0.5 pathy-0.5.2 preshed-3.0.5 pydantic-1.7.4 scikit-learn-0.24.2 smart-open-3.0.0 spacy-3.0.6 spacy-legacy-3.0.5 srsly-2.4.1 thinc-8.0.3 threadpoolctl-2.1.0 typer-0.3.2 wasabi-0.8.2\n" ] - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "iEGOmpwjB0VE", - "outputId": "a13ba70e-859f-467a-d2c7-48ab3db91c56" - }, - "outputs": [ + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: bert-extractive-summarizer in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (0.4.2)\n", - "Requirement already satisfied: transformers in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from bert-extractive-summarizer) (2.2.2)\n", - "Requirement already satisfied: spacy in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from bert-extractive-summarizer) (2.1.3)\n", - "Requirement already satisfied: scikit-learn in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from bert-extractive-summarizer) (0.22.2.post1)\n", - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (1.13.4)\n", - "Requirement already satisfied: regex in /home/etherealenvy/.local/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (2020.4.4)\n", - "Requirement already satisfied: numpy in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (1.18.4)\n", - "Requirement already satisfied: sacremoses in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (0.0.43)\n", - "Requirement already satisfied: tqdm in /home/etherealenvy/.local/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (4.46.0)\n", - "Requirement already satisfied: requests in /home/etherealenvy/.local/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (2.23.0)\n", - "Requirement already satisfied: sentencepiece in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers->bert-extractive-summarizer) (0.1.86)\n", - "Requirement already satisfied: blis<0.3.0,>=0.2.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (0.2.4)\n", - "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (0.6.0)\n", - "Requirement already satisfied: thinc<7.1.0,>=7.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (7.0.8)\n", - "Requirement already satisfied: plac<1.0.0,>=0.9.6 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (0.9.6)\n", - "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (2.0.1)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (1.0.2)\n", - "Requirement already satisfied: jsonschema<3.0.0,>=2.6.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (2.6.0)\n", - "Requirement already satisfied: srsly<1.1.0,>=0.0.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (1.0.2)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy->bert-extractive-summarizer) (2.0.3)\n", - "Requirement already satisfied: joblib>=0.11 in /home/etherealenvy/.local/lib/python3.6/site-packages (from scikit-learn->bert-extractive-summarizer) (0.14.1)\n", - "Requirement already satisfied: scipy>=0.17.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from scikit-learn->bert-extractive-summarizer) (1.4.1)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->transformers->bert-extractive-summarizer) (0.3.3)\n", - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->transformers->bert-extractive-summarizer) (1.16.4)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->transformers->bert-extractive-summarizer) (0.9.5)\n", - "Requirement already satisfied: six in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from sacremoses->transformers->bert-extractive-summarizer) (1.14.0)\n", - "Requirement already satisfied: click in /home/etherealenvy/.local/lib/python3.6/site-packages (from sacremoses->transformers->bert-extractive-summarizer) (7.1.2)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers->bert-extractive-summarizer) (1.25.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers->bert-extractive-summarizer) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers->bert-extractive-summarizer) (2020.4.5.1)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers->bert-extractive-summarizer) (2.9)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->transformers->bert-extractive-summarizer) (2.8.1)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->transformers->bert-extractive-summarizer) (0.15.2)\n", - "Requirement already satisfied: spacy==2.1.3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (2.1.3)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (1.0.2)\n", - "Requirement already satisfied: blis<0.3.0,>=0.2.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (0.2.4)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/etherealenvy/.local/lib/python3.6/site-packages (from spacy==2.1.3) (2.23.0)\n", - "Requirement already satisfied: numpy>=1.15.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (1.18.4)\n", - "Requirement already satisfied: thinc<7.1.0,>=7.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (7.0.8)\n", - "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (0.6.0)\n", - "Requirement already satisfied: plac<1.0.0,>=0.9.6 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (0.9.6)\n", - "Requirement already satisfied: srsly<1.1.0,>=0.0.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (1.0.2)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (2.0.3)\n", - "Requirement already satisfied: jsonschema<3.0.0,>=2.6.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (2.6.0)\n", - "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy==2.1.3) (2.0.1)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (1.25.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (2020.4.5.1)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (2.9)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /home/etherealenvy/.local/lib/python3.6/site-packages (from thinc<7.1.0,>=7.0.2->spacy==2.1.3) (4.46.0)\n" + "Collecting spacy==2.1.3\n", + " Downloading spacy-2.1.3-cp37-cp37m-win_amd64.whl (26.9 MB)\n", + "Collecting preshed<2.1.0,>=2.0.1\n", + " Downloading preshed-2.0.1-cp37-cp37m-win_amd64.whl (73 kB)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy==2.1.3) (1.0.5)\n", + "Collecting plac<1.0.0,>=0.9.6" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: transformers==2.2.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (2.2.2)\n", - "Requirement already satisfied: requests in /home/etherealenvy/.local/lib/python3.6/site-packages (from transformers==2.2.2) (2.23.0)\n", - "Requirement already satisfied: sentencepiece in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers==2.2.2) (0.1.86)\n", - "Requirement already satisfied: sacremoses in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers==2.2.2) (0.0.43)\n", - "Requirement already satisfied: regex in /home/etherealenvy/.local/lib/python3.6/site-packages (from transformers==2.2.2) (2020.4.4)\n", - "Requirement already satisfied: numpy in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers==2.2.2) (1.18.4)\n", - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from transformers==2.2.2) (1.13.4)\n", - "Requirement already satisfied: tqdm in /home/etherealenvy/.local/lib/python3.6/site-packages (from transformers==2.2.2) (4.46.0)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers==2.2.2) (2.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers==2.2.2) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers==2.2.2) (2020.4.5.1)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->transformers==2.2.2) (1.25.9)\n", - "Requirement already satisfied: click in /home/etherealenvy/.local/lib/python3.6/site-packages (from sacremoses->transformers==2.2.2) (7.1.2)\n", - "Requirement already satisfied: six in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from sacremoses->transformers==2.2.2) (1.14.0)\n", - "Requirement already satisfied: joblib in /home/etherealenvy/.local/lib/python3.6/site-packages (from sacremoses->transformers==2.2.2) (0.14.1)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->transformers==2.2.2) (0.3.3)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->transformers==2.2.2) (0.9.5)\n", - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->transformers==2.2.2) (1.16.4)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->transformers==2.2.2) (2.8.1)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->transformers==2.2.2) (0.15.2)\n", - "Requirement already satisfied: neuralcoref in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (4.0)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/etherealenvy/.local/lib/python3.6/site-packages (from neuralcoref) (2.23.0)\n", - "Requirement already satisfied: numpy>=1.15.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from neuralcoref) (1.18.4)\n", - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from neuralcoref) (1.13.4)\n", - "Requirement already satisfied: spacy>=2.1.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from neuralcoref) (2.1.3)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2020.4.5.1)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (1.25.9)\n", - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->neuralcoref) (0.9.5)\n", - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->neuralcoref) (1.16.4)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->neuralcoref) (0.3.3)\n", - "Requirement already satisfied: blis<0.3.0,>=0.2.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (0.2.4)\n", - "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (0.6.0)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (1.0.2)\n", - "Requirement already satisfied: srsly<1.1.0,>=0.0.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (1.0.2)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (2.0.3)\n", - "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (2.0.1)\n", - "Requirement already satisfied: jsonschema<3.0.0,>=2.6.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (2.6.0)\n", - "Requirement already satisfied: plac<1.0.0,>=0.9.6 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (0.9.6)\n", - "Requirement already satisfied: thinc<7.1.0,>=7.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (7.0.8)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->neuralcoref) (2.8.1)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->neuralcoref) (0.15.2)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /home/etherealenvy/.local/lib/python3.6/site-packages (from thinc<7.1.0,>=7.0.2->spacy>=2.1.0->neuralcoref) (4.46.0)\n", - "Requirement already satisfied: six>=1.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.17.0,>=1.16.4->boto3->neuralcoref) (1.14.0)\n", - "Requirement already satisfied: torch in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (1.5.0)\n", - "Requirement already satisfied: numpy in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from torch) (1.18.4)\n", - "Requirement already satisfied: future in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from torch) (0.18.2)\n", - "Requirement already satisfied: neuralcoref in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (4.0)\n", - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from neuralcoref) (1.13.4)\n", - "Requirement already satisfied: numpy>=1.15.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from neuralcoref) (1.18.4)\n", - "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/etherealenvy/.local/lib/python3.6/site-packages (from neuralcoref) (2.23.0)\n", - "Requirement already satisfied: spacy>=2.1.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from neuralcoref) (2.1.3)\n" + "\n", + " Downloading plac-0.9.6-py2.py3-none-any.whl (20 kB)\n", + "Collecting thinc<7.1.0,>=7.0.2\n", + " Downloading thinc-7.0.8-cp37-cp37m-win_amd64.whl (1.9 MB)\n", + "Collecting jsonschema<3.0.0,>=2.6.0\n", + " Downloading jsonschema-2.6.0-py2.py3-none-any.whl (39 kB)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy==2.1.3) (0.8.2)\n", + "Collecting blis<0.3.0,>=0.2.2\n", + " Downloading blis-0.2.4-cp37-cp37m-win_amd64.whl (3.1 MB)\n", + "Collecting srsly<1.1.0,>=0.0.5\n", + " Downloading srsly-1.0.5-cp37-cp37m-win_amd64.whl (176 kB)\n", + "Requirement already satisfied: numpy>=1.15.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy==2.1.3) (1.20.2)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy==2.1.3) (2.25.1)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy==2.1.3) (2.0.5)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from thinc<7.1.0,>=7.0.2->spacy==2.1.3) (4.46.1)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (2020.12.5)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (4.0.0)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy==2.1.3) (1.25.9)\n", + "Installing collected packages: preshed, plac, srsly, blis, thinc, jsonschema, spacy\n", + " Attempting uninstall: preshed\n", + " Found existing installation: preshed 3.0.5\n", + " Uninstalling preshed-3.0.5:\n", + " Successfully uninstalled preshed-3.0.5\n", + " Attempting uninstall: srsly\n", + " Found existing installation: srsly 2.4.1\n", + " Uninstalling srsly-2.4.1:\n", + " Successfully uninstalled srsly-2.4.1\n", + " Attempting uninstall: blis\n", + " Found existing installation: blis 0.7.4\n", + " Uninstalling blis-0.7.4:\n", + " Successfully uninstalled blis-0.7.4\n", + " Attempting uninstall: thinc\n", + " Found existing installation: thinc 8.0.3\n", + " Uninstalling thinc-8.0.3:\n", + " Successfully uninstalled thinc-8.0.3\n", + " Attempting uninstall: jsonschema\n", + " Found existing installation: jsonschema 3.2.0\n", + " Uninstalling jsonschema-3.2.0:\n", + " Successfully uninstalled jsonschema-3.2.0\n", + " Attempting uninstall: spacy\n", + " Found existing installation: spacy 3.0.6\n", + " Uninstalling spacy-3.0.6:\n", + " Successfully uninstalled spacy-3.0.6\n", + "Successfully installed blis-0.2.4 jsonschema-2.6.0 plac-0.9.6 preshed-2.0.1 spacy-2.1.3 srsly-1.0.5 thinc-7.0.8\n", + "Collecting transformers==2.2.2\n", + " Downloading transformers-2.2.2-py3-none-any.whl (387 kB)\n", + "Requirement already satisfied: sacremoses in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers==2.2.2) (0.0.45)\n", + "Requirement already satisfied: sentencepiece in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers==2.2.2) (0.1.95)\n", + "Requirement already satisfied: regex in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers==2.2.2) (2021.4.4)\n", + "Requirement already satisfied: tqdm in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers==2.2.2) (4.46.1)\n", + "Collecting boto3\n", + " Downloading boto3-1.17.74.tar.gz (98 kB)\n", + "Requirement already satisfied: numpy in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers==2.2.2) (1.20.2)\n", + "Requirement already satisfied: requests in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers==2.2.2) (2.25.1)\n", + "Requirement already satisfied: joblib in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers==2.2.2) (1.0.1)\n", + "Requirement already satisfied: click in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers==2.2.2) (7.1.2)\n", + "Requirement already satisfied: six in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers==2.2.2) (1.15.0)\n", + "Collecting botocore<1.21.0,>=1.20.74\n", + " Downloading botocore-1.20.74-py2.py3-none-any.whl (7.5 MB)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->neuralcoref) (0.9.5)\n", - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->neuralcoref) (0.3.3)\n", - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->neuralcoref) (1.16.4)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (1.25.9)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2020.4.5.1)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (3.0.4)\n", - "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (2.0.3)\n", - "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (0.6.0)\n", - "Requirement already satisfied: srsly<1.1.0,>=0.0.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (1.0.2)\n", - "Requirement already satisfied: plac<1.0.0,>=0.9.6 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (0.9.6)\n", - "Requirement already satisfied: jsonschema<3.0.0,>=2.6.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (2.6.0)\n", - "Requirement already satisfied: thinc<7.1.0,>=7.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (7.0.8)\n", - "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (1.0.2)\n", - "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (2.0.1)\n", - "Requirement already satisfied: blis<0.3.0,>=0.2.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from spacy>=2.1.0->neuralcoref) (0.2.4)\n", - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->neuralcoref) (2.8.1)\n", - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->neuralcoref) (0.15.2)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in /home/etherealenvy/.local/lib/python3.6/site-packages (from thinc<7.1.0,>=7.0.2->spacy>=2.1.0->neuralcoref) (4.46.0)\n", - "Requirement already satisfied: six>=1.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.17.0,>=1.16.4->boto3->neuralcoref) (1.14.0)\n", - "Requirement already satisfied: en_core_web_sm==2.1.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz#egg=en_core_web_sm==2.1.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (2.1.0)\n", - "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", + "Collecting jmespath<1.0.0,>=0.7.1\n", + " Using cached jmespath-0.10.0-py2.py3-none-any.whl (24 kB)\n", + "Collecting s3transfer<0.5.0,>=0.4.0\n", + " Downloading s3transfer-0.4.2-py2.py3-none-any.whl (79 kB)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers==2.2.2) (1.25.9)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers==2.2.2) (2.10)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers==2.2.2) (4.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers==2.2.2) (2020.12.5)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from botocore<1.21.0,>=1.20.74->boto3->transformers==2.2.2) (2.8.1)\n", + "Using legacy setup.py install for boto3, since package 'wheel' is not installed.\n", + "Installing collected packages: jmespath, botocore, s3transfer, boto3, transformers\n", + " Running setup.py install for boto3: started\n", + " Running setup.py install for boto3: finished with status 'done'\n", + " Attempting uninstall: transformers\n", + " Found existing installation: transformers 2.11.0\n", + " Uninstalling transformers-2.11.0:\n", + " Successfully uninstalled transformers-2.11.0\n", + "Successfully installed boto3-1.17.74 botocore-1.20.74 jmespath-0.10.0 s3transfer-0.4.2 transformers-2.2.2\n", + "Collecting neuralcoref\n", + " Using cached neuralcoref-4.0-cp37-cp37m-win_amd64.whl (227 kB)\n", + "Requirement already satisfied: numpy>=1.15.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (1.20.2)\n", + "Requirement already satisfied: spacy>=2.1.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (2.1.3)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (2.25.1)\n", + "Requirement already satisfied: boto3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (1.17.74)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (1.0.5)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (0.8.2)\n", + "Requirement already satisfied: thinc<7.1.0,>=7.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (7.0.8)\n", + "Requirement already satisfied: jsonschema<3.0.0,>=2.6.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (2.6.0)\n", + "Requirement already satisfied: plac<1.0.0,>=0.9.6 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (0.9.6)\n", + "Requirement already satisfied: srsly<1.1.0,>=0.0.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (1.0.5)\n", + "Requirement already satisfied: blis<0.3.0,>=0.2.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (0.2.4)\n", + "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (2.0.1)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (2.0.5)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (1.25.9)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2.10)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (4.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2020.12.5)\n", + "Requirement already satisfied: botocore<1.21.0,>=1.20.74 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->neuralcoref) (1.20.74)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->neuralcoref) (0.10.0)\n", + "Requirement already satisfied: s3transfer<0.5.0,>=0.4.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->neuralcoref) (0.4.2)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from thinc<7.1.0,>=7.0.2->spacy>=2.1.0->neuralcoref) (4.46.1)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from botocore<1.21.0,>=1.20.74->boto3->neuralcoref) (2.8.1)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.21.0,>=1.20.74->boto3->neuralcoref) (1.15.0)\n", + "Installing collected packages: neuralcoref\n", + "Successfully installed neuralcoref-4.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n", + "ERROR: Invalid requirement: '#you'\n", + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: neuralcoref in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0)\n", + "Requirement already satisfied: boto3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (1.17.74)\n", + "Requirement already satisfied: spacy>=2.1.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (2.1.3)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (2.25.1)\n", + "Requirement already satisfied: numpy>=1.15.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from neuralcoref) (1.20.2)\n", + "Requirement already satisfied: botocore<1.21.0,>=1.20.74 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->neuralcoref) (1.20.74)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->neuralcoref) (0.10.0)\n", + "Requirement already satisfied: s3transfer<0.5.0,>=0.4.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->neuralcoref) (0.4.2)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (1.0.5)\n", + "Requirement already satisfied: srsly<1.1.0,>=0.0.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (1.0.5)\n", + "Requirement already satisfied: blis<0.3.0,>=0.2.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (0.2.4)\n", + "Requirement already satisfied: plac<1.0.0,>=0.9.6 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (0.9.6)\n", + "Requirement already satisfied: jsonschema<3.0.0,>=2.6.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (2.6.0)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (2.0.5)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.2.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (0.8.2)\n", + "Requirement already satisfied: preshed<2.1.0,>=2.0.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (2.0.1)\n", + "Requirement already satisfied: thinc<7.1.0,>=7.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from spacy>=2.1.0->neuralcoref) (7.0.8)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (1.25.9)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (2020.12.5)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests<3.0.0,>=2.13.0->neuralcoref) (4.0.0)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from botocore<1.21.0,>=1.20.74->boto3->neuralcoref) (2.8.1)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from thinc<7.1.0,>=7.0.2->spacy>=2.1.0->neuralcoref) (4.46.1)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.21.0,>=1.20.74->boto3->neuralcoref) (1.15.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting en_core_web_sm==2.1.0\n", + " Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz (11.1 MB)\n", + "Using legacy setup.py install for en-core-web-sm, since package 'wheel' is not installed.\n", + "Installing collected packages: en-core-web-sm\n", + " Running setup.py install for en-core-web-sm: started\n", + " Running setup.py install for en-core-web-sm: finished with status 'done'\n", + "Successfully installed en-core-web-sm-2.1.0\n", + "[+] Download and installation successful\n", "You can now load the model via spacy.load('en_core_web_sm')\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'C:\\Users\\sukee\\AppData\\Local\\Programs\\Python\\Python37\\python.exe -m pip install --upgrade pip' command.\n" + ] } ], "source": [ @@ -561,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -624,7 +794,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -639,26 +809,58 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: transformers in /usr/local/lib/python3.6/dist-packages (2.9.0)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n", - "Requirement already satisfied: tokenizers==0.7.0 in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n", - "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from transformers) (0.1.86)\n", - "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.4)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n", - "Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers) (0.0.43)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.9)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.4.5.1)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.14.1)\n", - "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.12.0)\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (1.5.0+cu101)\n", - "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch) (0.16.0)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torch) (1.18.4)\n" + "Requirement already satisfied: transformers in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (2.2.2)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Requirement already satisfied: numpy in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (1.20.2)\n", + "Requirement already satisfied: requests in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (2.25.1)\n", + "Requirement already satisfied: sentencepiece in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (0.1.95)\n", + "Requirement already satisfied: boto3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (1.17.74)\n", + "Requirement already satisfied: tqdm in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (4.46.1)\n", + "Requirement already satisfied: sacremoses in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (0.0.45)\n", + "Requirement already satisfied: regex in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from transformers) (2021.4.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers) (1.25.9)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers) (4.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->transformers) (2020.12.5)\n", + "Requirement already satisfied: botocore<1.21.0,>=1.20.74 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->transformers) (1.20.74)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->transformers) (0.10.0)\n", + "Requirement already satisfied: s3transfer<0.5.0,>=0.4.0 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from boto3->transformers) (0.4.2)\n", + "Requirement already satisfied: joblib in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers) (1.0.1)\n", + "Requirement already satisfied: six in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers) (1.15.0)\n", + "Requirement already satisfied: click in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from sacremoses->transformers) (7.1.2)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from botocore<1.21.0,>=1.20.74->boto3->transformers) (2.8.1)\n", + "Requirement already satisfied: torch in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (1.5.1+cpu)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Requirement already satisfied: future in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from torch) (0.18.2)\n", + "Requirement already satisfied: numpy in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from torch) (1.20.2)\n" ] } ], @@ -730,17 +932,6 @@ "\n", "print (\"\\n\\nSummarized text: \\n\",output)" ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yvpff-KJH84c" - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -750,23 +941,23 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.17" + "pygments_lexer": "ipython3", + "version": "3.7.9" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 } diff --git a/Ch7/04_RecommenderSystems.ipynb b/Ch7/04_RecommenderSystems.ipynb index e9578c1..a9eaa8f 100644 --- a/Ch7/04_RecommenderSystems.ipynb +++ b/Ch7/04_RecommenderSystems.ipynb @@ -9,49 +9,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting gensim\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/1d/69/1262ed0050c21f5054702b8e96a2d8c310d4cd059e4a08c9a2fe6a5dae65/gensim-3.8.3-cp35-cp35m-manylinux1_x86_64.whl (24.2MB)\n", - "\u001b[K 100% |████████████████████████████████| 24.2MB 930kB/s ta 0:00:011 41% |█████████████▎ | 10.1MB 5.2MB/s eta 0:00:03\n", - "\u001b[?25hCollecting smart-open>=1.8.1 (from gensim)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/77/744c79da6e66691e3500b6dffff29bdd787015eae817d594791edc7b719b/smart_open-2.0.0.tar.gz (103kB)\n", - "\u001b[K 100% |████████████████████████████████| 112kB 3.4MB/s ta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from gensim) (1.14.0)\n", - "Collecting scipy>=0.18.1 (from gensim)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c1/60/8cbf00c0deb50a971e6e3a015fb32513960a92867df979870a454481817c/scipy-1.4.1-cp35-cp35m-manylinux1_x86_64.whl (26.0MB)\n", - "\u001b[K 100% |████████████████████████████████| 26.0MB 1.0MB/s ta 0:00:011\n", - "\u001b[?25hCollecting numpy>=1.11.3 (from gensim)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/38/92/fa5295d9755c7876cb8490eab866e1780154033fa45978d9cf74ffbd4c68/numpy-1.18.4-cp35-cp35m-manylinux1_x86_64.whl (20.0MB)\n", - "\u001b[K 100% |████████████████████████████████| 20.0MB 1.8MB/s eta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: requests in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n", - "Collecting boto (from smart-open>=1.8.1->gensim)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/23/10/c0b78c27298029e4454a472a1919bde20cb182dab1662cec7f2ca1dcc523/boto-2.49.0-py2.py3-none-any.whl (1.4MB)\n", - "\u001b[K 100% |████████████████████████████████| 1.4MB 4.6MB/s eta 0:00:01\n", - "\u001b[?25hCollecting boto3 (from smart-open>=1.8.1->gensim)\n", - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/36/9e/e963605983fc1188c200ce84e2e07a1882c84a9e4c71cba80076b21441bb/boto3-1.13.4-py2.py3-none-any.whl (128kB)\n", - "\u001b[K 100% |████████████████████████████████| 133kB 6.7MB/s ta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (2018.8.24)\n", - "Collecting jmespath<1.0.0,>=0.7.1 (from boto3->smart-open>=1.8.1->gensim)\n", - " Using cached https://files.pythonhosted.org/packages/a3/43/1e939e1fcd87b827fe192d0c9fc25b48c5b3368902bfb913de7754b0dc03/jmespath-0.9.5-py2.py3-none-any.whl\n", - "Collecting s3transfer<0.4.0,>=0.3.0 (from boto3->smart-open>=1.8.1->gensim)\n", - " Using cached https://files.pythonhosted.org/packages/69/79/e6afb3d8b0b4e96cefbdc690f741d7dd24547ff1f94240c997a26fa908d3/s3transfer-0.3.3-py2.py3-none-any.whl\n", - "Collecting botocore<1.17.0,>=1.16.4 (from boto3->smart-open>=1.8.1->gensim)\n" + "Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n", + "Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n", + "Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n", + "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (3.0.0)\n", + "Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n", + "Requirement already satisfied: requests in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from smart-open>=1.8.1->gensim) (2.25.1)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (4.0.0)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (2020.12.5)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: nltk in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (3.6.2)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Requirement already satisfied: joblib in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (1.0.1)\n", + "Requirement already satisfied: tqdm in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (4.46.1)\n", + "Requirement already satisfied: regex in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (2021.4.4)\n", + "Requirement already satisfied: click in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (7.1.2)\n" ] } ], "source": [ "!pip install gensim\n", - "!pip install nltk\n", - "#todo: add pip for downloading nltk data?" + "!pip install nltk" ] }, { @@ -61,7 +75,7 @@ "outputs": [], "source": [ "from nltk.tokenize import word_tokenize\n", - "from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n" + "from gensim.models.doc2vec import Doc2Vec, TaggedDocument" ] }, { @@ -70,13 +84,13 @@ "metadata": {}, "outputs": [], "source": [ - "#Read the dataset’s README to understand the data format. \n", + "# Read the dataset’s README to understand the data format. \n", + "\n", "data_path = \"booksummaries.txt\"\n", "mydata = {} #titles-summaries dictionary object\n", "for line in open(data_path, encoding=\"utf-8\"):\n", " temp = line.split(\"\\t\")\n", - " mydata[temp[2]] = temp[6]\n", - "\n" + " mydata[temp[2]] = temp[6]" ] }, { @@ -90,8 +104,7 @@ "model = Doc2Vec(vector_size=50, alpha=0.025, min_count=10, dm =1, epochs=100)\n", "model.build_vocab(train_doc2vec)\n", "model.train(train_doc2vec, total_examples=model.corpus_count, epochs=model.epochs)\n", - "model.save(\"d2v.model\")\n", - "\n" + "model.save(\"d2v.model\")" ] }, { @@ -117,16 +130,9 @@ "Napoleon enacts changes to the governance structure of the farm, replacing meetings with a committee of pigs who will run the farm.\n", " \"\"\"\n", "new_vector = model.infer_vector(word_tokenize(sample))\n", - "sims = model.docvecs.most_similar([new_vector]) #gives 10 most similar titles\n", + "sims = model.docvecs.most_similar([new_vector])\n", "print(sims)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -145,9 +151,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.6" + "version": "3.7.9" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/Ch7/05_MachineTranslation.ipynb b/Ch7/05_MachineTranslation.ipynb index 18da537..1383602 100644 --- a/Ch7/05_MachineTranslation.ipynb +++ b/Ch7/05_MachineTranslation.ipynb @@ -9,28 +9,33 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Collecting requests\n", - " Using cached https://files.pythonhosted.org/packages/1a/70/1935c770cb3be6e3a8b78ced23d7e0f3b187f5cbfab4749523ed65d7c9b1/requests-2.23.0-py2.py3-none-any.whl\n", - "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 (from requests)\n", - " Using cached https://files.pythonhosted.org/packages/e1/e5/df302e8017440f111c11cc41a6b432838672f5a70aa29227bf58149dc72f/urllib3-1.25.9-py2.py3-none-any.whl\n", - "Collecting chardet<4,>=3.0.2 (from requests)\n", - " Using cached https://files.pythonhosted.org/packages/bc/a9/01ffebfb562e4274b6487b4bb1ddec7ca55ec7510b22e4c51f14098443b8/chardet-3.0.4-py2.py3-none-any.whl\n", - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests) (2018.8.24)\n", - "Collecting idna<3,>=2.5 (from requests)\n", - " Using cached https://files.pythonhosted.org/packages/89/e3/afebe61c546d18fb1709a61bee788254b40e736cff7271c7de5de2dc4128/idna-2.9-py2.py3-none-any.whl\n", - "\u001b[31mjupyterlab-server 1.0.0 has requirement jsonschema>=3.0.1, but you'll have jsonschema 2.6.0 which is incompatible.\u001b[0m\n", - "\u001b[31mjupyterlab 2.1.0 has requirement jupyterlab_server>=1.1.0, but you'll have jupyterlab-server 1.0.0 which is incompatible.\u001b[0m\n", - "Installing collected packages: urllib3, chardet, idna, requests\n", - "Successfully installed chardet-3.0.4 idna-2.9 requests-2.23.0 urllib3-1.25.9\n", - "\u001b[33mYou are using pip version 10.0.1, however version 20.1 is available.\n", - "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" + "Requirement already satisfied: requests in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (2.25.1)" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (4.0.0)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (1.25.9)\n", + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (2020.12.5)\n", + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests) (2.10)\n" ] } ], @@ -53,12 +58,14 @@ "metadata": {}, "outputs": [], "source": [ - "#You will need a subscription key - you can use trial version\n", + "# You will need a subscription key - you can use trial version\n", + "# This will be user based\n", + "\n", "subscription_key = \"XXXX\"\n", "endpoint = \"https://api-nam.cognitive.microsofttranslator.com\"\n", "path = '/translate?api-version=3.0'\n", "params = '&to=de' #From English to German (de)\n", - "constructed_url = endpoint + path + params\n" + "constructed_url = endpoint + path + params" ] }, { @@ -80,28 +87,15 @@ "print(json.dumps(response, sort_keys=True, indent=4, separators=(',', ': ')))\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Google Cloud also has a translate service, which can be explored as an additional exercise." - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "Google Cloud also has a translate service, which can be explored as an additional exercise.\n", "#todo: get a trial subscription key and show output for this, and perhaps add google example code from: https://cloud.google.com/translate/docs" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -120,9 +114,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.6" + "version": "3.7.9" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }