diff --git a/_doc/notebooks/td2a/seance_5_intro_et_json.ipynb b/_doc/notebooks/td2a/seance_5_intro_et_json.ipynb index f3c727505..6c5224e51 100644 --- a/_doc/notebooks/td2a/seance_5_intro_et_json.ipynb +++ b/_doc/notebooks/td2a/seance_5_intro_et_json.ipynb @@ -144,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": { "collapsed": false }, @@ -155,7 +155,7 @@ "['twitter_for_network_100000.db']" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -167,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -177,15 +177,15 @@ "output_type": "stream", "text": [ "User defined method or cross-method\n", - "100 loops, best of 3: 11.7 ms per loop\n", - "100 loops, best of 3: 10.2 ms per loop\n", - "100 loops, best of 3: 7.11 ms per loop\n", - "100 loops, best of 3: 5.4 ms per loop\n", + "10 loops, best of 3: 26.4 ms per loop\n", + "10 loops, best of 3: 20.9 ms per loop\n", + "100 loops, best of 3: 13 ms per loop\n", + "100 loops, best of 3: 10.8 ms per loop\n", "Builtin function\n", - "1000 loops, best of 3: 951 µs per loop\n", + "100 loops, best of 3: 3.81 ms per loop\n", "Numpy function\n", - "10000 loops, best of 3: 73.3 µs per loop\n", - "10000 loops, best of 3: 69.7 µs per loop\n" + "10000 loops, best of 3: 84.4 µs per loop\n", + "10000 loops, best of 3: 82.2 µs per loop\n" ] } ], @@ -269,14 +269,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ - "import os, psutil, gc\n", - "import resource\n", + "import os, psutil, gc, sys\n", + "if not sys.platform.startswith(\"win\"):\n", + " import resource\n", "\n", "def memory_usage_psutil():\n", " gc.collect()\n", @@ -284,12 +285,13 @@ " mem = process.memory_info()[0] / float(2 ** 20)\n", "\n", " print( \"Memory used : %i MB\" % mem )\n", - " print( \"Max memory usage : %i MB\" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss//1024) )" + " if not sys.platform.startswith(\"win\"):\n", + " print( \"Max memory usage : %i MB\" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss//1024) )" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -298,8 +300,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 54 MB\n", - "Max memory usage : 54 MB\n" + "Memory used : 108 MB\n" ] } ], @@ -309,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 8, "metadata": { "collapsed": true }, @@ -325,7 +326,7 @@ " print(\"ujson not available\")\n", " import json\n", "\n", - "conn_sqlite = sqlite3.connect(\"twitter_for_network_10000.db\")\n", + "conn_sqlite = sqlite3.connect(\"twitter_for_network_100000.db\")\n", "cursor_sqlite = conn_sqlite.cursor()" ] }, @@ -338,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -347,7 +348,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1225448091\n" + "108086205\n" ] } ], @@ -359,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -368,8 +369,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 57 MB\n", - "Max memory usage : 57 MB\n" + "Memory used : 112 MB\n" ] } ], @@ -531,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 11, "metadata": { "collapsed": false, "scrolled": false @@ -542,112 +542,59 @@ "output_type": "stream", "text": [ "############### user raw json ###############\n", - "{\"utc_offset\": null, \"friends_count\": 164, \"entities\": {\"description\": {\"urls\": []}, \"url\": {\"urls\": [{\"expanded_url\": \"http://www.aeroportsdeparis.fr/\", \"display_url\": \"aeroportsdeparis.fr\", \"indices\": [0, 22], \"url\": \"http://t.co/dFybncvjp1\"}]}}, \"description\": \"Pr\\u00e9sident-Directeur G\\u00e9n\\u00e9ral du groupe A\\u00e9roports de Paris. Vos informations passagers \\u00e0 suivre sur @AeroportsParis, et institutionnelles sur @ADPcorporate.\", \"id\": 49191180, \"contributors_enabled\": false, \"geo_enabled\": false, \"name\": \"Augustin de Romanet\", \"favourites_count\": 16, \"verified\": false, \"protected\": false, \"created_at\": \"Sun Jun 21 01:50:47 +0000 2009\", \"statuses_count\": 130, \"lang\": \"en\", \"time_zone\": null, \"screen_name\": \"Romanet\", \"location\": \"Paris\", \"id_str\": \"49191180\", \"url\": \"http://t.co/dFybncvjp1\", \"followers_count\": 772, \"listed_count\": 23, \"has_extended_profile\": false}\n", + "{\"utc_offset\": 7200, \"friends_count\": 454, \"entities\": {\"description\": {\"urls\": []}, \"url\": {\"urls\": [{\"expanded_url\": \"http://www.havas.com\", \"display_url\": \"havas.com\", \"indices\": [0, 22], \"url\": \"http://t.co/8GcZtydjWh\"}]}}, \"description\": \"Havas Group CEO\", \"id\": 1103159180, \"contributors_enabled\": false, \"geo_enabled\": false, \"name\": \"Yannick Bollor\\u00e9\", \"favourites_count\": 873, \"verified\": true, \"protected\": false, \"created_at\": \"Sat Jan 19 08:23:33 +0000 2013\", \"statuses_count\": 654, \"lang\": \"en\", \"time_zone\": \"Ljubljana\", \"screen_name\": \"YannickBollore\", \"location\": \"\", \"id_str\": \"1103159180\", \"url\": \"http://t.co/8GcZtydjWh\", \"followers_count\": 7345, \"listed_count\": 118, \"has_extended_profile\": false}\n", "############### user as python dict ###############\n", "{'contributors_enabled': False,\n", - " 'created_at': 'Sun Jun 21 01:50:47 +0000 2009',\n", - " 'description': 'Président-Directeur Général du groupe Aéroports de Paris. '\n", - " 'Vos informations passagers à suivre sur @AeroportsParis, et '\n", - " 'institutionnelles sur @ADPcorporate.',\n", + " 'created_at': 'Sat Jan 19 08:23:33 +0000 2013',\n", + " 'description': 'Havas Group CEO',\n", " 'entities': {'description': {'urls': []},\n", - " 'url': {'urls': [{'display_url': 'aeroportsdeparis.fr',\n", - " 'expanded_url': 'http://www.aeroportsdeparis.fr/',\n", + " 'url': {'urls': [{'display_url': 'havas.com',\n", + " 'expanded_url': 'http://www.havas.com',\n", " 'indices': [0, 22],\n", - " 'url': 'http://t.co/dFybncvjp1'}]}},\n", - " 'favourites_count': 16,\n", - " 'followers_count': 772,\n", - " 'friends_count': 164,\n", + " 'url': 'http://t.co/8GcZtydjWh'}]}},\n", + " 'favourites_count': 873,\n", + " 'followers_count': 7345,\n", + " 'friends_count': 454,\n", " 'geo_enabled': False,\n", " 'has_extended_profile': False,\n", - " 'id': 49191180,\n", - " 'id_str': '49191180',\n", + " 'id': 1103159180,\n", + " 'id_str': '1103159180',\n", " 'lang': 'en',\n", - " 'listed_count': 23,\n", - " 'location': 'Paris',\n", - " 'name': 'Augustin de Romanet',\n", + " 'listed_count': 118,\n", + " 'location': '',\n", + " 'name': 'Yannick Bolloré',\n", " 'protected': False,\n", - " 'screen_name': 'Romanet',\n", - " 'statuses_count': 130,\n", - " 'time_zone': None,\n", - " 'url': 'http://t.co/dFybncvjp1',\n", - " 'utc_offset': None,\n", - " 'verified': False}\n", + " 'screen_name': 'YannickBollore',\n", + " 'statuses_count': 654,\n", + " 'time_zone': 'Ljubljana',\n", + " 'url': 'http://t.co/8GcZtydjWh',\n", + " 'utc_offset': 7200,\n", + " 'verified': True}\n", "############### status as python dict ###############\n", "{'contributors': None,\n", " 'coordinates': None,\n", - " 'created_at': 'Fri Jul 24 17:24:44 +0000 2015',\n", - " 'entities': {'hashtags': [{'indices': [61, 71], 'text': 'Chrétiens'},\n", - " {'indices': [87, 93], 'text': 'Daech'}],\n", - " 'media': [{'display_url': 'pic.twitter.com/JghmnBBb8q',\n", - " 'expanded_url': 'http://twitter.com/FChretienne/status/624595066967830528/photo/1',\n", - " 'id': 624580461596049408,\n", - " 'id_str': '624580461596049408',\n", - " 'indices': [117, 139],\n", - " 'media_url': 'http://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'media_url_https': 'https://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'sizes': {'large': {'h': 413,\n", - " 'resize': 'fit',\n", - " 'w': 620},\n", - " 'medium': {'h': 399,\n", - " 'resize': 'fit',\n", - " 'w': 600},\n", - " 'small': {'h': 226,\n", - " 'resize': 'fit',\n", - " 'w': 340},\n", - " 'thumb': {'h': 150,\n", - " 'resize': 'crop',\n", - " 'w': 150}},\n", - " 'source_status_id': 624595066967830528,\n", - " 'source_status_id_str': '624595066967830528',\n", - " 'source_user_id': 599222987,\n", - " 'source_user_id_str': '599222987',\n", - " 'type': 'photo',\n", - " 'url': 'http://t.co/JghmnBBb8q'}],\n", + " 'created_at': 'Wed Jul 22 17:14:47 +0000 2015',\n", + " 'entities': {'hashtags': [{'indices': [16, 28], 'text': 'Agriculture'}],\n", " 'symbols': [],\n", - " 'urls': [{'display_url': 'bit.ly/1gSn5RW',\n", - " 'expanded_url': 'http://bit.ly/1gSn5RW',\n", - " 'indices': [94, 116],\n", - " 'url': 'http://t.co/8mWRNCXmJo'}],\n", - " 'user_mentions': [{'id': 599222987,\n", - " 'id_str': '599222987',\n", - " 'indices': [3, 15],\n", - " 'name': 'Famille Chrétienne ن',\n", - " 'screen_name': 'FChretienne'},\n", + " 'urls': [{'display_url': 'blog-fillon.com/2015/07/plan-d…',\n", + " 'expanded_url': 'http://www.blog-fillon.com/2015/07/plan-de-soutien-du-gouvernement-a-l-agriculture-decevant.html',\n", + " 'indices': [139, 140],\n", + " 'url': 'http://t.co/wZ3HkhHvuM'}],\n", + " 'user_mentions': [{'id': 34598169,\n", + " 'id_str': '34598169',\n", + " 'indices': [3, 14],\n", + " 'name': 'Fix RICHARD',\n", + " 'screen_name': 'FixRichard'},\n", " {'id': 551669623,\n", " 'id_str': '551669623',\n", - " 'indices': [32, 47],\n", + " 'indices': [113, 128],\n", " 'name': 'François Fillon',\n", " 'screen_name': 'FrancoisFillon'}]},\n", - " 'extended_entities': {'media': [{'display_url': 'pic.twitter.com/JghmnBBb8q',\n", - " 'expanded_url': 'http://twitter.com/FChretienne/status/624595066967830528/photo/1',\n", - " 'id': 624580461596049408,\n", - " 'id_str': '624580461596049408',\n", - " 'indices': [117, 139],\n", - " 'media_url': 'http://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'media_url_https': 'https://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'sizes': {'large': {'h': 413,\n", - " 'resize': 'fit',\n", - " 'w': 620},\n", - " 'medium': {'h': 399,\n", - " 'resize': 'fit',\n", - " 'w': 600},\n", - " 'small': {'h': 226,\n", - " 'resize': 'fit',\n", - " 'w': 340},\n", - " 'thumb': {'h': 150,\n", - " 'resize': 'crop',\n", - " 'w': 150}},\n", - " 'source_status_id': 624595066967830528,\n", - " 'source_status_id_str': '624595066967830528',\n", - " 'source_user_id': 599222987,\n", - " 'source_user_id_str': '599222987',\n", - " 'type': 'photo',\n", - " 'url': 'http://t.co/JghmnBBb8q'}]},\n", " 'favorite_count': 0,\n", " 'favorited': False,\n", " 'geo': None,\n", - " 'id': 624631311471411200,\n", - " 'id_str': '624631311471411200',\n", + " 'id': 623904030251708416,\n", + " 'id_str': '623904030251708416',\n", " 'in_reply_to_screen_name': None,\n", " 'in_reply_to_status_id': None,\n", " 'in_reply_to_status_id_str': None,\n", @@ -657,74 +604,29 @@ " 'lang': 'fr',\n", " 'place': None,\n", " 'possibly_sensitive': False,\n", - " 'retweet_count': 51,\n", + " 'retweet_count': 23,\n", " 'retweeted': False,\n", " 'retweeted_status': {'contributors': None,\n", " 'coordinates': None,\n", - " 'created_at': 'Fri Jul 24 15:00:43 +0000 2015',\n", - " 'entities': {'hashtags': [{'indices': [44, 54],\n", - " 'text': 'Chrétiens'},\n", - " {'indices': [70, 76],\n", - " 'text': 'Daech'}],\n", - " 'media': [{'display_url': 'pic.twitter.com/JghmnBBb8q',\n", - " 'expanded_url': 'http://twitter.com/FChretienne/status/624595066967830528/photo/1',\n", - " 'id': 624580461596049408,\n", - " 'id_str': '624580461596049408',\n", - " 'indices': [100, 122],\n", - " 'media_url': 'http://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'media_url_https': 'https://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'sizes': {'large': {'h': 413,\n", - " 'resize': 'fit',\n", - " 'w': 620},\n", - " 'medium': {'h': 399,\n", - " 'resize': 'fit',\n", - " 'w': 600},\n", - " 'small': {'h': 226,\n", - " 'resize': 'fit',\n", - " 'w': 340},\n", - " 'thumb': {'h': 150,\n", - " 'resize': 'crop',\n", - " 'w': 150}},\n", - " 'type': 'photo',\n", - " 'url': 'http://t.co/JghmnBBb8q'}],\n", + " 'created_at': 'Wed Jul 22 17:09:33 +0000 2015',\n", + " 'entities': {'hashtags': [{'indices': [0, 12],\n", + " 'text': 'Agriculture'}],\n", " 'symbols': [],\n", - " 'urls': [{'display_url': 'bit.ly/1gSn5RW',\n", - " 'expanded_url': 'http://bit.ly/1gSn5RW',\n", - " 'indices': [77, 99],\n", - " 'url': 'http://t.co/8mWRNCXmJo'}],\n", + " 'urls': [{'display_url': 'blog-fillon.com/2015/07/plan-d…',\n", + " 'expanded_url': 'http://www.blog-fillon.com/2015/07/plan-de-soutien-du-gouvernement-a-l-agriculture-decevant.html',\n", + " 'indices': [113, 135],\n", + " 'url': 'http://t.co/wZ3HkhHvuM'}],\n", " 'user_mentions': [{'id': 551669623,\n", " 'id_str': '551669623',\n", - " 'indices': [15, 30],\n", + " 'indices': [97, 112],\n", " 'name': 'François '\n", " 'Fillon',\n", " 'screen_name': 'FrancoisFillon'}]},\n", - " 'extended_entities': {'media': [{'display_url': 'pic.twitter.com/JghmnBBb8q',\n", - " 'expanded_url': 'http://twitter.com/FChretienne/status/624595066967830528/photo/1',\n", - " 'id': 624580461596049408,\n", - " 'id_str': '624580461596049408',\n", - " 'indices': [100,\n", - " 122],\n", - " 'media_url': 'http://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'media_url_https': 'https://pbs.twimg.com/media/CKr0nuMVAAAl_MM.jpg',\n", - " 'sizes': {'large': {'h': 413,\n", - " 'resize': 'fit',\n", - " 'w': 620},\n", - " 'medium': {'h': 399,\n", - " 'resize': 'fit',\n", - " 'w': 600},\n", - " 'small': {'h': 226,\n", - " 'resize': 'fit',\n", - " 'w': 340},\n", - " 'thumb': {'h': 150,\n", - " 'resize': 'crop',\n", - " 'w': 150}},\n", - " 'type': 'photo',\n", - " 'url': 'http://t.co/JghmnBBb8q'}]},\n", - " 'favorite_count': 20,\n", + " 'favorite_count': 4,\n", " 'favorited': False,\n", " 'geo': None,\n", - " 'id': 624595066967830528,\n", - " 'id_str': '624595066967830528',\n", + " 'id': 623902715584888832,\n", + " 'id_str': '623902715584888832',\n", " 'in_reply_to_screen_name': None,\n", " 'in_reply_to_status_id': None,\n", " 'in_reply_to_status_id_str': None,\n", @@ -734,70 +636,70 @@ " 'lang': 'fr',\n", " 'place': None,\n", " 'possibly_sensitive': False,\n", - " 'retweet_count': 51,\n", + " 'retweet_count': 23,\n", " 'retweeted': False,\n", - " 'source': 'TweetDeck',\n", - " 'text': 'Rencontre avec @FrancoisFillon au sujet des '\n", - " \"#Chrétiens d'Orient et de #Daech \"\n", - " 'http://t.co/8mWRNCXmJo http://t.co/JghmnBBb8q',\n", + " 'source': 'iOS',\n", + " 'text': '#Agriculture : \"Le plan du Gouvernement '\n", + " \"n'apporte aucune solution durable aux \"\n", + " 'filières en crise\" @FrancoisFillon '\n", + " 'http://t.co/wZ3HkhHvuM',\n", " 'truncated': False,\n", " 'user': {'contributors_enabled': False,\n", - " 'created_at': 'Mon Jun 04 13:01:40 +0000 '\n", - " '2012',\n", + " 'created_at': 'Thu Apr 23 12:27:59 +0000 '\n", + " '2009',\n", " 'default_profile': False,\n", " 'default_profile_image': False,\n", - " 'description': 'Compte officiel de Famille '\n", - " 'Chrétienne, hebdomadaire '\n", - " 'catholique pour toute la '\n", - " 'famille depuis 1978.',\n", + " 'description': '#CM #ComPol #SocialMedia '\n", + " '#Politique • #PDL2015 '\n", + " '#AvecRetailleau • #Angers '\n", + " '#Paris #Nantes',\n", " 'entities': {'description': {'urls': []},\n", - " 'url': {'urls': [{'display_url': 'famillechretienne.fr',\n", - " 'expanded_url': 'http://www.famillechretienne.fr',\n", + " 'url': {'urls': [{'display_url': 'about.me/fixrichard',\n", + " 'expanded_url': 'http://about.me/fixrichard',\n", " 'indices': [0,\n", " 22],\n", - " 'url': 'http://t.co/eQZ8AnaOqT'}]}},\n", - " 'favourites_count': 2,\n", + " 'url': 'http://t.co/apaFxTrJMQ'}]}},\n", + " 'favourites_count': 10728,\n", " 'follow_request_sent': False,\n", - " 'followers_count': 7242,\n", + " 'followers_count': 2488,\n", " 'following': False,\n", - " 'friends_count': 797,\n", - " 'geo_enabled': False,\n", + " 'friends_count': 865,\n", + " 'geo_enabled': True,\n", " 'has_extended_profile': False,\n", - " 'id': 599222987,\n", - " 'id_str': '599222987',\n", + " 'id': 34598169,\n", + " 'id_str': '34598169',\n", " 'is_translation_enabled': False,\n", " 'is_translator': False,\n", " 'lang': 'fr',\n", - " 'listed_count': 128,\n", - " 'location': 'France',\n", - " 'name': 'Famille Chrétienne ن',\n", + " 'listed_count': 178,\n", + " 'location': 'Angers, Pays de la Loire',\n", + " 'name': 'Fix RICHARD',\n", " 'notifications': False,\n", - " 'profile_background_color': 'E8EDF0',\n", - " 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/446609674867970048/9tFZIPaq.png',\n", - " 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/446609674867970048/9tFZIPaq.png',\n", - " 'profile_background_tile': False,\n", - " 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/599222987/1426667652',\n", - " 'profile_image_url': 'http://pbs.twimg.com/profile_images/527776419644841984/Jj9EQX_p_normal.jpeg',\n", - " 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/527776419644841984/Jj9EQX_p_normal.jpeg',\n", - " 'profile_link_color': '005FAC',\n", - " 'profile_sidebar_border_color': '000000',\n", - " 'profile_sidebar_fill_color': 'DDEEF6',\n", + " 'profile_background_color': 'C71E4E',\n", + " 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme14/bg.gif',\n", + " 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme14/bg.gif',\n", + " 'profile_background_tile': True,\n", + " 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/34598169/1436884606',\n", + " 'profile_image_url': 'http://pbs.twimg.com/profile_images/621396058682343424/IMzOsat1_normal.jpg',\n", + " 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/621396058682343424/IMzOsat1_normal.jpg',\n", + " 'profile_link_color': '0C4499',\n", + " 'profile_sidebar_border_color': 'FFFFFF',\n", + " 'profile_sidebar_fill_color': 'C0DFEC',\n", " 'profile_text_color': '333333',\n", " 'profile_use_background_image': True,\n", " 'protected': False,\n", - " 'screen_name': 'FChretienne',\n", - " 'statuses_count': 8423,\n", + " 'screen_name': 'FixRichard',\n", + " 'statuses_count': 20446,\n", " 'time_zone': 'Paris',\n", - " 'url': 'http://t.co/eQZ8AnaOqT',\n", + " 'url': 'http://t.co/apaFxTrJMQ',\n", " 'utc_offset': 7200,\n", " 'verified': False}},\n", " 'source': 'Twitter for iPhone',\n", - " 'text': 'RT @FChretienne: Rencontre avec @FrancoisFillon au sujet des '\n", - " \"#Chrétiens d'Orient et de #Daech http://t.co/8mWRNCXmJo \"\n", - " 'http://t.co/JghmnBBb8q',\n", + " 'text': 'RT @FixRichard: #Agriculture : \"Le plan du Gouvernement n\\'apporte '\n", + " 'aucune solution durable aux filières en crise\" @FrancoisFillon '\n", + " 'http://t.c…',\n", " 'truncated': False,\n", " 'user': {'contributors_enabled': False,\n", " 'created_at': 'Thu Apr 12 08:09:49 +0000 2012',\n", @@ -1041,7 +943,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -1052,194 +954,96 @@ "\n", "db_name = 'cours_ensae'\n", "conn_string = \"host='localhost' dbname='{0}' user='python' password='kyojin'\".format( db_name )\n", - "conn_psql = psycopg2.connect(conn_string)\n", - "cursor_psql = conn_psql.cursor()" + "try:\n", + " conn_psql = psycopg2.connect(conn_string)\n", + " cursor_psql = conn_psql.cursor()\n", + " postgre_ok = True\n", + "except psycopg2.OperationalError:\n", + " postgre_ok = False " ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 13, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/plain": [ - "90310" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "conn_psql.server_version" + "if postgre_ok:\n", + " conn_psql.server_version" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ - "conn_psql.rollback()" + "if postgre_ok:\n", + " conn_psql.rollback()" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 15, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The slowest run took 11.21 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "1000 loops, best of 3: 361 µs per loop\n" - ] - }, - { - "data": { - "text/plain": [ - "(10000,\n", - " 'AF',\n", - " [(40000,), (40001,), (40002,), (40003,)],\n", - " [(160000,),\n", - " (160001,),\n", - " (160002,),\n", - " (160003,),\n", - " (160004,),\n", - " (160005,),\n", - " (160006,),\n", - " (160007,),\n", - " (160008,),\n", - " (160009,),\n", - " (160010,),\n", - " (160011,),\n", - " (160012,),\n", - " (160013,),\n", - " (160014,),\n", - " (160015,)])" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "def get_data_sql(doc_id):\n", - " cursor_psql.execute(\"SELECT id, company FROM document WHERE id = %s\", (doc_id,))\n", - " res_1 = cursor_psql.fetchone()\n", - " cursor_psql.execute(\"SELECT id FROM ticket WHERE document_id = %s ORDER BY id\", (doc_id,))\n", - " res_2 = cursor_psql.fetchall()\n", - " tickets_id = [it[0] for it in res_2 ]\n", - " cursor_psql.execute(\"SELECT id FROM coupon WHERE ticket_id = ANY( %s ) ORDER BY id\", (tickets_id,))\n", - " res_3 = cursor_psql.fetchall()\n", - " return res_1 + (res_2,) + (res_3,)\n", + "if postgre_ok:\n", + " def get_data_sql(doc_id):\n", + " cursor_psql.execute(\"SELECT id, company FROM document WHERE id = %s\", (doc_id,))\n", + " res_1 = cursor_psql.fetchone()\n", + " cursor_psql.execute(\"SELECT id FROM ticket WHERE document_id = %s ORDER BY id\", (doc_id,))\n", + " res_2 = cursor_psql.fetchall()\n", + " tickets_id = [it[0] for it in res_2 ]\n", + " cursor_psql.execute(\"SELECT id FROM coupon WHERE ticket_id = ANY( %s ) ORDER BY id\", (tickets_id,))\n", + " res_3 = cursor_psql.fetchall()\n", + " return res_1 + (res_2,) + (res_3,)\n", "\n", - "%timeit get_data_sql(10000) \n", - "get_data_sql(10000)" + " %timeit get_data_sql(10000) \n", + " get_data_sql(10000)" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 16, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1000 loops, best of 3: 308 µs per loop\n" - ] - }, - { - "data": { - "text/plain": [ - "[(10000, 'AF', 40000, 160000),\n", - " (10000, 'AF', 40000, 160001),\n", - " (10000, 'AF', 40000, 160002),\n", - " (10000, 'AF', 40000, 160003),\n", - " (10000, 'AF', 40001, 160004),\n", - " (10000, 'AF', 40001, 160005),\n", - " (10000, 'AF', 40001, 160006),\n", - " (10000, 'AF', 40001, 160007),\n", - " (10000, 'AF', 40002, 160008),\n", - " (10000, 'AF', 40002, 160009),\n", - " (10000, 'AF', 40002, 160010),\n", - " (10000, 'AF', 40002, 160011),\n", - " (10000, 'AF', 40003, 160012),\n", - " (10000, 'AF', 40003, 160013),\n", - " (10000, 'AF', 40003, 160014),\n", - " (10000, 'AF', 40003, 160015)]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "def get_data_sql_join(doc_id):\n", - " cursor_psql.execute(\"SELECT d.id, d.company, t.id, c.id FROM document as d \\\n", - " JOIN ticket as t on d.id = t.document_id \\\n", - " JOIN coupon as c on t.id = c.ticket_id \\\n", - " WHERE d.id = %s\", (doc_id,))\n", - " return cursor_psql.fetchall()\n", + "if postgre_ok:\n", + " def get_data_sql_join(doc_id):\n", + " cursor_psql.execute(\"SELECT d.id, d.company, t.id, c.id FROM document as d \\\n", + " JOIN ticket as t on d.id = t.document_id \\\n", + " JOIN coupon as c on t.id = c.ticket_id \\\n", + " WHERE d.id = %s\", (doc_id,))\n", + " return cursor_psql.fetchall()\n", "\n", - "%timeit get_data_sql_join(10000) \n", - "get_data_sql_join(10000)" + " %timeit get_data_sql_join(10000) \n", + " get_data_sql_join(10000)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 17, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The slowest run took 5.01 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "10000 loops, best of 3: 152 µs per loop\n" - ] - }, - { - "data": { - "text/plain": [ - "(10000,\n", - " 'AF',\n", - " {'id': 10000,\n", - " 'tickets': {'40000': {'coupons': [160000, 160001, 160002, 160003],\n", - " 'id': 40000},\n", - " '40001': {'coupons': [160004, 160005, 160006, 160007], 'id': 40001},\n", - " '40002': {'coupons': [160008, 160009, 160010, 160011], 'id': 40002},\n", - " '40003': {'coupons': [160012, 160013, 160014, 160015], 'id': 40003}}})" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "def get_data_nosql(doc_id):\n", - " cursor_psql.execute(\"SELECT id, company, content FROM document_nosql WHERE id = %s\", (doc_id,))\n", - " return cursor_psql.fetchone()\n", + "if postgre_ok:\n", + " def get_data_nosql(doc_id):\n", + " cursor_psql.execute(\"SELECT id, company, content FROM document_nosql WHERE id = %s\", (doc_id,))\n", + " return cursor_psql.fetchone()\n", "\n", - "%timeit get_data_nosql(10000)\n", - "get_data_nosql(10000)" + " %timeit get_data_nosql(10000)\n", + " get_data_nosql(10000)" ] }, { @@ -1252,44 +1056,27 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 18, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'_id': ObjectId('5622750b1a73480a89cbbd9f'),\n", - " 'grades': [20, 18, 7, 12],\n", - " 'nom': 'Martin',\n", - " 'prenom': 'Nicolas'}\n", - "{'_id': ObjectId('5622750b1a73480a89cbbda0'),\n", - " 'grades': [11, 5, 7, 12],\n", - " 'nom': 'Dupont',\n", - " 'prenom': 'Jean'}\n", - "{'_id': ObjectId('5622750b1a73480a89cbbda1'),\n", - " 'grades': [10, 10, 10, 10],\n", - " 'nom': 'Martin',\n", - " 'prenom': 'Gilles'}\n" - ] - } - ], + "outputs": [], "source": [ - "import pymongo\n", + "mongo = False\n", + "if mongo:\n", + " import pymongo\n", "\n", - "mongo_client = pymongo.MongoClient( 'localhost', 27017 )\n", - "mongo_db = mongo_client.ensae_db\n", + " mongo_client = pymongo.MongoClient( 'localhost', 27017 )\n", + " mongo_db = mongo_client.ensae_db\n", "\n", - "mongo_db.table_for_ensae.delete_many( {} )\n", - "mongo_db.table_for_ensae.insert_one( {'nom' : 'Martin', 'prenom' : 'Nicolas', 'grades': [20,18,7,12]} )\n", - "mongo_db.table_for_ensae.insert_one( {'nom' : 'Dupont', 'prenom' : 'Jean', 'grades': [11,5,7,12]} )\n", - "mongo_db.table_for_ensae.insert_one( {'nom' : 'Martin', 'prenom' : 'Gilles', 'grades': [10,10,10,10]} )\n", + " mongo_db.table_for_ensae.delete_many( {} )\n", + " mongo_db.table_for_ensae.insert_one( {'nom' : 'Martin', 'prenom' : 'Nicolas', 'grades': [20,18,7,12]} )\n", + " mongo_db.table_for_ensae.insert_one( {'nom' : 'Dupont', 'prenom' : 'Jean', 'grades': [11,5,7,12]} )\n", + " mongo_db.table_for_ensae.insert_one( {'nom' : 'Martin', 'prenom' : 'Gilles', 'grades': [10,10,10,10]} )\n", "\n", - "user = mongo_db.table_for_ensae.find_one( {'nom' : 'Dupont'} )\n", - "user_list = mongo_db.table_for_ensae.find( {} )\n", - "_ = list(map( pprint.pprint, user_list ))" + " user = mongo_db.table_for_ensae.find_one( {'nom' : 'Dupont'} )\n", + " user_list = mongo_db.table_for_ensae.find( {} )\n", + " _ = list(map( pprint.pprint, user_list ))" ] }, { @@ -1301,38 +1088,18 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 19, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'list': [{'_id': ObjectId('5622750b1a73480a89cbbd9f'),\n", - " 'grades': [20, 18, 7, 12],\n", - " 'nom': 'Martin',\n", - " 'prenom': 'Nicolas'},\n", - " {'_id': ObjectId('5622750b1a73480a89cbbda1'),\n", - " 'grades': [10, 10, 10, 10],\n", - " 'nom': 'Martin',\n", - " 'prenom': 'Gilles'}],\n", - " 'nom': 'Martin'},\n", - " {'list': [{'_id': ObjectId('5622750b1a73480a89cbbda0'),\n", - " 'grades': [11, 5, 7, 12],\n", - " 'nom': 'Dupont',\n", - " 'prenom': 'Jean'}],\n", - " 'nom': 'Dupont'}]\n" - ] - } - ], + "outputs": [], "source": [ - "result = mongo_db.table_for_ensae.group(['nom'], \n", + "if mongo:\n", + " result = mongo_db.table_for_ensae.group(['nom'], \n", " None,\n", " {'list': []}, # initial\n", " 'function(obj, prev) {prev.list.push(obj)}')\n", - "pprint.pprint( result )" + " pprint.pprint( result )" ] }, { @@ -1364,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -1373,7 +1140,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "11943812\n" + "4284281\n" ] } ], @@ -1404,7 +1171,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 21, "metadata": { "collapsed": false }, @@ -1414,12 +1181,12 @@ "output_type": "stream", "text": [ " id screen_name\n", - "0 49191180 Romanet\n", - "1 22691514 psayer\n", - "2 1103159180 YannickBollore\n", - "3 2865692548 yveslemasne\n", - "4 413129652 bernardpivot1\n", - "(3286027, 2)\n" + "0 1103159180 YannickBollore\n", + "1 2865692548 yveslemasne\n", + "2 24732180 harlemdesir\n", + "3 359979086 jpraffarin\n", + "4 273341346 gilles_schnepp\n", + "(100071, 2)\n" ] } ], diff --git a/_doc/notebooks/td2a/seance_5_prog_fonctionnelle.ipynb b/_doc/notebooks/td2a/seance_5_prog_fonctionnelle.ipynb index 3e5a31460..8821685cd 100644 --- a/_doc/notebooks/td2a/seance_5_prog_fonctionnelle.ipynb +++ b/_doc/notebooks/td2a/seance_5_prog_fonctionnelle.ipynb @@ -134,11 +134,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { - "collapsed": true + "collapsed": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['twitter_for_network_100000.db']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import pyensae\n", "pyensae.download_data(\"twitter_for_network_100000.db.zip\")" @@ -171,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -184,9 +195,9 @@ "[1, 2, 3, 4]\n", "[1, 2, 3, 4]\n", "[4, 3, 2, 1]\n", - "The slowest run took 29.46 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "100 loops, best of 3: 2.07 ms per loop\n", - "100 loops, best of 3: 2.82 ms per loop\n" + "The slowest run took 23.73 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "100 loops, best of 3: 3.96 ms per loop\n", + "100 loops, best of 3: 4.81 ms per loop\n" ] } ], @@ -235,27 +246,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ - "import os, psutil, gc\n", - "import resource\n", - "\n", + "import os, psutil, gc, sys\n", + "if not sys.platform.startswith(\"win\"):\n", + " import resource\n", + " \n", "def memory_usage_psutil():\n", " gc.collect()\n", " process = psutil.Process(os.getpid())\n", " mem = process.memory_info()[0] / float(2 ** 20)\n", "\n", " print( \"Memory used : %i MB\" % mem )\n", - " print( \"Max memory usage : %i MB\" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss//1024) )" + " if not sys.platform.startswith(\"win\"):\n", + " print( \"Max memory usage : %i MB\" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss//1024) )" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": { "collapsed": false }, @@ -264,8 +277,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 39 MB\n", - "Max memory usage : 58 MB\n" + "Memory used : 109 MB\n" ] } ], @@ -295,7 +307,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -304,7 +316,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "1000000 loops, best of 3: 830 ns per loop\n", + "The slowest run took 8.45 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "100000 loops, best of 3: 1.77 µs per loop\n", "\n" ] } @@ -324,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -333,7 +346,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1 loops, best of 3: 404 ms per loop\n" + "1 loops, best of 3: 888 ms per loop\n" ] }, { @@ -342,7 +355,7 @@ "333333833333500000" ] }, - "execution_count": 5, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -361,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -370,8 +383,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 39 MB\n", - "Max memory usage : 58 MB\n" + "Memory used : 109 MB\n" ] } ], @@ -389,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -398,7 +410,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "1 loops, best of 3: 368 ms per loop\n", + "1 loops, best of 3: 973 ms per loop\n", "\n" ] } @@ -411,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -421,7 +433,7 @@ "output_type": "stream", "text": [ "333333833333500000\n", - "100 loops, best of 3: 12.7 ms per loop\n" + "10 loops, best of 3: 72.4 ms per loop\n" ] } ], @@ -432,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -441,8 +453,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 86 MB\n", - "Max memory usage : 117 MB\n" + "Memory used : 149 MB\n" ] } ], @@ -459,7 +470,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -470,7 +481,7 @@ "0" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -488,7 +499,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "metadata": { "collapsed": true }, @@ -500,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "metadata": { "collapsed": false }, @@ -510,7 +521,7 @@ "output_type": "stream", "text": [ "333333833333500000\n", - "1 loops, best of 3: 403 ms per loop\n" + "1 loops, best of 3: 1.01 s per loop\n" ] } ], @@ -533,7 +544,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": { "collapsed": false }, @@ -544,7 +555,7 @@ "['twitter_for_network_100000.db']" ] }, - "execution_count": 2, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -556,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": { "collapsed": false }, @@ -565,8 +576,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 86 MB\n", - "Max memory usage : 117 MB\n" + "Memory used : 149 MB\n" ] } ], @@ -576,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": { "collapsed": true }, @@ -606,7 +616,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -615,9 +625,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "The slowest run took 4.18 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "100000 loops, best of 3: 8.24 µs per loop\n", - "100000 loops, best of 3: 13.1 µs per loop\n" + "The slowest run took 4.26 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "10000 loops, best of 3: 16.9 µs per loop\n", + "10000 loops, best of 3: 28.6 µs per loop\n" ] } ], @@ -634,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": { "collapsed": false }, @@ -653,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": { "collapsed": false }, @@ -664,7 +674,7 @@ "100071" ] }, - "execution_count": 17, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -688,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "metadata": { "collapsed": false }, @@ -699,7 +709,7 @@ "108086205" ] }, - "execution_count": 18, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -717,7 +727,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "metadata": { "collapsed": false }, @@ -726,8 +736,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The slowest run took 6.81 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "1 loops, best of 3: 1.5 µs per loop\n" + "1 loops, best of 3: 5.99 µs per loop\n" ] } ], @@ -738,7 +747,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "metadata": { "collapsed": false }, @@ -749,7 +758,7 @@ "108086205" ] }, - "execution_count": 20, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -764,7 +773,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, "metadata": { "collapsed": false }, @@ -775,7 +784,7 @@ "108086205" ] }, - "execution_count": 21, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -796,7 +805,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "metadata": { "collapsed": false }, @@ -805,8 +814,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "1 loops, best of 3: 954 ms per loop\n", - "1 loops, best of 3: 1.15 s per loop\n" + "1 loops, best of 3: 2.52 s per loop\n", + "1 loops, best of 3: 2.75 s per loop\n" ] } ], @@ -830,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 28, "metadata": { "collapsed": false }, @@ -839,8 +848,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 295 MB\n", - "Max memory usage : 521 MB\n" + "Memory used : 156 MB\n" ] } ], @@ -858,7 +866,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 29, "metadata": { "collapsed": false }, @@ -867,8 +875,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "1 loops, best of 3: 1.02 s per loop\n", - "1 loops, best of 3: 1.02 s per loop\n" + "1 loops, best of 3: 2.66 s per loop\n", + "1 loops, best of 3: 2.72 s per loop\n" ] } ], @@ -896,7 +904,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 30, "metadata": { "collapsed": false }, @@ -905,9 +913,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "1000 loops, best of 3: 2.54 µs per loop\n", - "The slowest run took 4.45 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "100000 loops, best of 3: 8.32 µs per loop\n" + "1000 loops, best of 3: 11.3 µs per loop\n", + "100000 loops, best of 3: 16.6 µs per loop\n" ] } ], @@ -956,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 31, "metadata": { "collapsed": false }, @@ -989,7 +996,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 32, "metadata": { "collapsed": false }, @@ -1017,7 +1024,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 33, "metadata": { "collapsed": true }, @@ -1028,7 +1035,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 34, "metadata": { "collapsed": false }, @@ -1039,7 +1046,7 @@ "4284281" ] }, - "execution_count": 29, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1062,7 +1069,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 35, "metadata": { "collapsed": false }, @@ -1073,7 +1080,7 @@ "4284281" ] }, - "execution_count": 30, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1094,7 +1101,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 36, "metadata": { "collapsed": false }, @@ -1105,7 +1112,7 @@ "4284281" ] }, - "execution_count": 31, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1121,7 +1128,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 37, "metadata": { "collapsed": false }, @@ -1152,7 +1159,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 38, "metadata": { "collapsed": false }, @@ -1163,7 +1170,7 @@ "10000" ] }, - "execution_count": 33, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1182,7 +1189,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 39, "metadata": { "collapsed": false }, @@ -1191,12 +1198,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "The slowest run took 4533.33 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "1 loops, best of 3: 22.8 µs per loop\n", - "The slowest run took 41853.23 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "1 loops, best of 3: 2.51 µs per loop\n", - "The slowest run took 34816.54 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "1 loops, best of 3: 3.46 µs per loop\n", + "The slowest run took 7732.75 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "1 loops, best of 3: 46.6 µs per loop\n", + "The slowest run took 46923.46 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "1 loops, best of 3: 5.56 µs per loop\n", + "The slowest run took 36110.35 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "1 loops, best of 3: 8.55 µs per loop\n", "{1: 1603,\n", " 2: 107,\n", " 3: 32,\n", @@ -1279,7 +1286,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 40, "metadata": { "collapsed": false }, @@ -1288,8 +1295,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Memory used : 257 MB\n", - "Max memory usage : 521 MB\n" + "Memory used : 118 MB\n" ] } ], @@ -1317,7 +1323,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 41, "metadata": { "collapsed": false }, @@ -1333,7 +1339,7 @@ " {'age': 2, 'animal': 'chien', 'npm': 'Max'}]}" ] }, - "execution_count": 36, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -1353,7 +1359,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 42, "metadata": { "collapsed": false }, @@ -1363,56 +1369,56 @@ "output_type": "stream", "text": [ "'' : 69975\n", - "'Abidjan-RCI' : 1\n", - "'Périgueux' : 1\n", - "'Champigny-sur-Marne' : 1\n", - "'Cannes en France' : 1\n", - "'Charleroi ' : 1\n", - "'San Francisco, California' : 2\n", - "'Paris, Chantilly - FRANCE' : 1\n", - "'région PACA FRANCE' : 1\n", - "'Cg ' : 1\n", - "'au large' : 1\n", - "'Департамент всемирной глупости' : 1\n", - "'nontron' : 1\n", - "'Amiens, Picardie' : 5\n", - "'real madrid' : 1\n", - "'Harel, Luxembourg' : 1\n", - "'Virginia' : 1\n", - "'Saint-François' : 1\n", - "'gabon' : 2\n", - "'Lormont' : 1\n", - "'reliée' : 1\n", - "'créon- cadillac' : 1\n", - "'Paris - Montmatre ' : 1\n", - "'Limoges/Paris' : 1\n", - "'GlobeTrottoir! Zion' : 1\n", - "'adraoui' : 1\n", - "'Haiti' : 27\n", - "'Cloudsdale' : 1\n", - "'domloup' : 1\n", - "'Elysée' : 1\n", - "'gijon' : 1\n", - "'faraway' : 1\n", - "'Meaux' : 4\n", - "'mecca' : 1\n", - "'06100 NICE' : 1\n", - "'Funding Tech Startups' : 1\n", - "'ABBEVILLE, France' : 1\n", - "'Ile de France' : 22\n", - "'Barquisimeto - Venezuela' : 1\n", - "'Paris,Milan ' : 1\n", - "'LOCATION DE VOITURE ' : 1\n", - "'rio + são paulo' : 1\n", - "'french riviera' : 1\n", - "'PARIS..with LOVE' : 1\n", - "'capbreton' : 1\n", - "'51490' : 1\n", - "'Toulouse France ' : 1\n", - "'Les Loges, Île-de-France' : 1\n", - "'VERSAILLES ' : 1\n", - "'Aulnay, Poitou-Charentes' : 1\n", - "'LOMME' : 1\n" + "'Communauté Valencienne, Espagne' : 1\n", + "'Coral Springs, Fl' : 1\n", + "'Abbiategrasso' : 1\n", + "'Getafe - Bordeaux - Vigo' : 1\n", + "'Piscop, France' : 1\n", + "'Roma - Oslo' : 1\n", + "'tarbes' : 2\n", + "'Samoa' : 1\n", + "'E♡' : 1\n", + "'Epernay sous gevrey' : 1\n", + "'Porto-Vecchio, Corse' : 2\n", + "'Planète Terre®' : 1\n", + "'donzere' : 1\n", + "'Laval, take it or leave it' : 1\n", + "'Albi - Bordeaux ' : 1\n", + "'Vannes ' : 1\n", + "'Paris - Strasbourg' : 2\n", + "'Itabashi-ku, Tokyo' : 1\n", + "'Paris, Bilbao, Dieppe' : 1\n", + "'hello' : 1\n", + "'Francfort-sur-le-Main, Hesse' : 1\n", + "'Issy Les Moulineaux' : 1\n", + "'montgenost' : 1\n", + "'France/Toulouse' : 1\n", + "'UAE DUBAI' : 1\n", + "'Paris ~ Somewhere' : 1\n", + "'Vezin le Coquet' : 1\n", + "'تازة المغرب' : 1\n", + "'Paris 16' : 1\n", + "'Senegal' : 29\n", + "'Paris XVème' : 1\n", + "'tunis' : 29\n", + "'st cyprien plage' : 1\n", + "'Dhaka,Bangladesh' : 1\n", + "'Saône-et-Loire (71)' : 1\n", + "'panama' : 1\n", + "'63720 chappes' : 1\n", + "'Poueyferré' : 1\n", + "'Yaoundé-Cameroon' : 1\n", + "'Maroc,Meknès' : 1\n", + "'Bucarest' : 2\n", + "'france strasbourg' : 1\n", + "'dans un monde loin du vôtre' : 1\n", + "'nantes/paris' : 1\n", + "'Elassona, Greece' : 1\n", + "'San Francisco Bay Area' : 3\n", + "'Vannes, Bretagne' : 1\n", + "'AIX / MARSEILLE / PACA' : 1\n", + "'Castellon,España' : 1\n", + "'Roque perez' : 1\n" ] } ], @@ -1441,7 +1447,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 43, "metadata": { "collapsed": false }, @@ -1476,7 +1482,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 44, "metadata": { "collapsed": false }, @@ -1487,7 +1493,7 @@ "4284281" ] }, - "execution_count": 41, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1512,7 +1518,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 45, "metadata": { "collapsed": false }, @@ -1521,8 +1527,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "The slowest run took 55529.77 times longer than the fastest. This could mean that an intermediate result is being cached \n", - "1 loops, best of 3: 2.27 µs per loop\n" + "The slowest run took 52894.00 times longer than the fastest. This could mean that an intermediate result is being cached \n", + "1 loops, best of 3: 5.56 µs per loop\n" ] }, { @@ -1554,7 +1560,7 @@ " 'paris ': 366667}" ] }, - "execution_count": 42, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } diff --git a/_doc/notebooks/td2a/td2a_cenonce_session_5_donnees_non_structurees_et_programmation_fonctionnelle.ipynb b/_doc/notebooks/td2a/td2a_cenonce_session_5_donnees_non_structurees_et_programmation_fonctionnelle.ipynb index b7906d1b1..760cd6c40 100644 --- a/_doc/notebooks/td2a/td2a_cenonce_session_5_donnees_non_structurees_et_programmation_fonctionnelle.ipynb +++ b/_doc/notebooks/td2a/td2a_cenonce_session_5_donnees_non_structurees_et_programmation_fonctionnelle.ipynb @@ -514,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -558,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -566,10 +566,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -592,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": { "collapsed": true }, @@ -614,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -622,10 +622,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 17, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -650,28 +650,27 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { - "ename": "NameError", - "evalue": "name 'comptez_unique' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mcursor_sqlite\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m \u001b[1;34m\"SELECT content FROM tw_users\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mcomptez_unique\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0mpluck_loc\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0mget_json_seq\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcursor_sqlite\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mNameError\u001b[0m: name 'comptez_unique' is not defined" - ] + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "import cytoolz as ct \n", "\n", "cursor_sqlite.execute( \"SELECT content FROM tw_users\")\n", - "comptez_unique( pluck_loc( get_json_seq(cursor_sqlite)))" + "# comptez_unique( pluck_loc( get_json_seq(cursor_sqlite)))" ] }, { @@ -685,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": { "collapsed": true }, @@ -711,7 +710,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": { "collapsed": false }, @@ -719,10 +718,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -737,7 +736,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -745,10 +744,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -787,7 +786,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": { "collapsed": false }, @@ -827,7 +826,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": { "collapsed": false }, @@ -887,7 +886,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": { "collapsed": true }, @@ -905,7 +904,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 24, "metadata": { "collapsed": false }, @@ -938,7 +937,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 25, "metadata": { "collapsed": true }, @@ -959,7 +958,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 26, "metadata": { "collapsed": false }, @@ -991,7 +990,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 27, "metadata": { "collapsed": false }, @@ -1012,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 28, "metadata": { "collapsed": false }, @@ -1024,7 +1023,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 29, "metadata": { "collapsed": false }, @@ -1038,7 +1037,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 30, "metadata": { "collapsed": false }, diff --git a/_doc/notebooks/td2a/td2a_correction_session_5_donnees_non_structurees_et_programmation_fonctionnelle_corrige.ipynb b/_doc/notebooks/td2a/td2a_correction_session_5_donnees_non_structurees_et_programmation_fonctionnelle_corrige.ipynb index a29d38614..91cff2c96 100644 --- a/_doc/notebooks/td2a/td2a_correction_session_5_donnees_non_structurees_et_programmation_fonctionnelle_corrige.ipynb +++ b/_doc/notebooks/td2a/td2a_correction_session_5_donnees_non_structurees_et_programmation_fonctionnelle_corrige.ipynb @@ -2119,7 +2119,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 25, "metadata": { "collapsed": true }, @@ -2132,7 +2132,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 26, "metadata": { "collapsed": false }, @@ -2148,7 +2148,7 @@ " 'tw_users_htmldata']" ] }, - "execution_count": 30, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -2159,7 +2159,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 27, "metadata": { "collapsed": true }, @@ -2177,7 +2177,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 28, "metadata": { "collapsed": true }, @@ -2198,7 +2198,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "metadata": { "collapsed": false }, @@ -2230,7 +2230,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 30, "metadata": { "collapsed": false }, @@ -2251,7 +2251,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 31, "metadata": { "collapsed": false }, @@ -2262,7 +2262,7 @@ "80536977" ] }, - "execution_count": 37, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -2275,7 +2275,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 32, "metadata": { "collapsed": false }, @@ -2284,18 +2284,18 @@ "data": { "text/plain": [ "[(b'', 299.8513534747518),\n", - " (b'Bourg la Reine', 7914.0),\n", - " (b'Amiens - France', 364.0),\n", - " (b'Huesca', 9.0),\n", - " (b'Germany / Deutschland', 32.0),\n", - " (b'drancy', 64.0),\n", - " (b'Alencon', 23.0),\n", - " (b\"abidjan cote d'ivoire\", 4.5),\n", - " (b'Singapore / France', 569.0),\n", - " (b'Undetermined \\t', 47.0)]" + " (b'hi', 8.0),\n", + " (b'Yvelines VAR ', 8799.0),\n", + " (b'venezia', 0.0),\n", + " (b'Dieue sur meuse', 6.0),\n", + " (b'ZAC Nicopolis', 63.0),\n", + " (b'Cannes ', 64.6),\n", + " (b'Knokke-Heist, Belgi\\xc3\\xab', 2.0),\n", + " (b'\\xc3\\x9cT: 50.648738,2.980248', 69.0),\n", + " (b'Guebwiller', 71.0)]" ] }, - "execution_count": 41, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -2324,7 +2324,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 33, "metadata": { "collapsed": false }, @@ -2342,7 +2342,7 @@ " (7, 1)]" ] }, - "execution_count": 43, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } diff --git a/src/ensae_teaching_cs/automation/notebook_test_helper.py b/src/ensae_teaching_cs/automation/notebook_test_helper.py index a19b4cdc2..936aea19e 100644 --- a/src/ensae_teaching_cs/automation/notebook_test_helper.py +++ b/src/ensae_teaching_cs/automation/notebook_test_helper.py @@ -50,9 +50,11 @@ def get_additional_paths(): import pyquickhelper import pyensae import pymmails + import pymyinstall addpath = [os.path.dirname(pyquickhelper.__file__), os.path.dirname(pyensae.__file__), os.path.dirname(pymmails.__file__), + os.path.dirname(pymyinstall.__file__), os.path.join(os.path.abspath(os.path.dirname(__file__)), ".."), ] addpath = [os.path.normpath(os.path.join(_, "..")) for _ in addpath]