In [23]:
# The tweets in our twitter collection have a field called "source". This field describes the application
# that was used to create the tweet. Following the examples for using the $group operator, your task is 
# to modify the 'make-pipeline' function to identify most used applications for creating tweets. 
# As a check on your query, 'web' is listed as the most frequently used application.
# 'Ubertwitter' is the second most used. The number of counts should be stored in a field named 'count'
# (see the assertion at the end of the script).

# Please modify only the 'make_pipeline' function so that it creates and returns an aggregation pipeline
# that can be passed to the MongoDB aggregate function. As in our examples in this lesson, the aggregation 
# pipeline should be a list of one or more dictionary objects. 
# Please review the lesson examples if you are unsure of the syntax.

# Your code will be run against a MongoDB instance that we have provided. 
# If you want to run this code locally on your machine, you have to install MongoDB, 
# download and insert the dataset.
# For instructions related to MongoDB setup and datasets please see Course Materials.

# Please note that the dataset you are using here is a smaller version of the twitter dataset 
# used in examples in this lesson. 
# If you attempt some of the same queries that we looked at in the lesson examples,
# your results will be different.

In [1]:
from pymongo import MongoClient
client = MongoClient('localhost:27017')
client.list_database_names()

['admin', 'config', 'local', 'udacity']

In [2]:
db = client.udacity

db.list_collection_names()

['twitter']

In [3]:
twitter = db.twitter

In [4]:
twitter

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'udacity'), 'twitter')

In [5]:
# Example Tweet:
twitter.find_one()

{'_id': ObjectId('63e3b543beb9905c522e8828'),
 'text': 'eu preciso de terminar de fazer a minha tabela, está muito foda **',
 'in_reply_to_status_id': None,
 'retweet_count': None,
 'contributors': None,
 'created_at': 'Thu Sep 02 18:11:23 +0000 2010',
 'geo': None,
 'source': 'web',
 'coordinates': None,
 'in_reply_to_screen_name': None,
 'truncated': False,
 'entities': {'user_mentions': [], 'urls': [], 'hashtags': []},
 'retweeted': False,
 'place': None,
 'user': {'friends_count': 73,
  'profile_sidebar_fill_color': '768575',
  'location': '',
  'verified': False,
  'follow_request_sent': None,
  'favourites_count': 1,
  'profile_sidebar_border_color': '1c9dbd',
  'profile_image_url': 'http://a2.twimg.com/profile_images/1036412454/OgAAADXK9q6kaxrvfwQTINH66RVLAH9YHb-veRTA4FaWb9KtbGGV_yKTGzmvzTfJidqAb5gK_mpspIE-MIvAASGH2CwAm1T1UIPQk0-HS8x_TV5kdnW30nch7ODk-1_normal.jpg',
  'geo_enabled': False,
  'created_at': 'Fri Jul 03 21:44:05 +0000 2009',
  'description': 'só os loucos sabem (:',

In [21]:
pipeline = [
    {'$group': {'_id':'$source', 'count':{'$sum': 1}}},
    {'$sort': {'count': -1}},
    {'$match': {'count': {'$gte':10}}}
]

In [22]:
# [doc for doc in db.tweets.aggregate(pipeline)]
for doc in twitter.aggregate(pipeline):
    print(doc)

{'_id': 'web', 'count': 23136}
{'_id': '<a href="http://www.ubertwitter.com/bb/download.php" rel="nofollow">ÜberTwitter</a>', 'count': 3393}
{'_id': '<a href="http://www.tweetdeck.com" rel="nofollow">TweetDeck</a>', 'count': 3370}
{'_id': '<a href="http://blackberry.com/twitter" rel="nofollow">Twitter for BlackBerry®</a>', 'count': 2249}
{'_id': '<a href="http://twitter.com/" rel="nofollow">Twitter for iPhone</a>', 'count': 2009}
{'_id': '<a href="http://www.echofon.com/" rel="nofollow">Echofon</a>', 'count': 1774}
{'_id': '<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>', 'count': 1652}
{'_id': '<a href="http://mobile.twitter.com" rel="nofollow">mobile web</a>', 'count': 1374}
{'_id': '<a href="/devices" rel="nofollow">txt</a>', 'count': 1085}
{'_id': '<a href="http://www.hootsuite.com" rel="nofollow">HootSuite</a>', 'count': 706}
{'_id': '<a href="http://formspring.me" rel="nofollow">Formspring.me</a>', 'count': 494}
{'_id': '<a href="http://twidroid.com" rel="nofollo