# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [8]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [16]:
rssfeed = feedparser.parse(url)
rssfeed

{'feed': {'title': 'Radar',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Radar'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'https://www.oreilly.com/radar'},
   {'rel': 'self',
    'type': 'application/rss+xml',
    'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
   {'rel': 'hub',
    'href': 'http://pubsubhubbub.appspot.com/',
    'type': 'text/html'}],
  'link': 'https://www.oreilly.com/radar',
  'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
  'subtitle_detail': {'type': 'text/html',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
  'updated': 'Tue, 05 Nov 2019 12:28:35 +0000',
  'updated_parsed': time.struct_time(tm_year=2019, tm_mon=11, tm_m

### 2. Obtain a list of components (keys) that are available for this feed.

In [48]:
rssfeed.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [47]:
rssfeed.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [36]:
print(rssfeed.feed.title)
print(rssfeed.feed.subtitle)
#print(rssfeed.feed.author)
print(rssfeed.feed.link)

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [41]:
print(len(rssfeed.entries))

18


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [53]:
rssfeed.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [55]:
titles = [rssfeed.entries[i].title for i in range(len(rssfeed.entries))]
print(titles)

['It’s important to cultivate your organization’s collective genius', 'Four short links: 5 November 2019', 'Four short links: 4 November 2019', 'Quantum computing’s potential is still far off, but quantum supremacy shows we’re on the right track', 'Four short links: 1 November 2019', 'Highlights from TensorFlow World in Santa Clara, California 2019', 'Sticker recommendations and AI-driven innovations on the Hike messaging platform', '“Human error”: How can we help people build models that do what they expect', 'Personalization of Spotify Home and TensorFlow', 'TensorFlow.js: Bringing machine learning to JavaScript', 'TFX: An end-to-end ML platform for everyone', 'MLIR: Accelerating AI', 'TensorFlow Hub: The platform to share and discover pretrained models for TensorFlow', 'TensorFlow Lite: ML for mobile and IoT devices', 'Four short links: 31 October 2019', 'Accelerating ML at Twitter', 'The latest from TensorFlow', 'TensorFlow World 2019 opening keynote']


### 8. Calculate the percentage of "Four short links" entry titles.

In [67]:
percentage = ("{0:.0%}".format(len([i for i in titles if "Four short links" in i])/len(titles)))
print(percentage)

22%


### 9. Create a Pandas data frame from the feed's entries.

In [68]:
import pandas as pd

In [83]:
df = pd.DataFrame(rssfeed.entries)
df.head(10)

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/its-important-to...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/its-important-to...,False,https://www.oreilly.com/radar/?p=10231,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 05 Nov 2019 05:05:36 +0000","(2019, 11, 5, 5, 5, 36, 1, 309, 0)",0,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Future of the Firm', 'scheme': None...",It’s important to cultivate your organization’...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/its-important-to...
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=10644,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 05 Nov 2019 05:01:13 +0000","(2019, 11, 5, 5, 1, 13, 1, 309, 0)",0,&#8220;Nearly All&#8221; Counter-Strike Microt...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 5 November 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=10612,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 04 Nov 2019 05:01:01 +0000","(2019, 11, 4, 5, 1, 1, 0, 308, 0)",0,Beyond Bots and Trolls: Understanding Disinfor...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 4 November 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
3,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/quantum-computin...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/quantum-computin...,False,https://www.oreilly.com/radar/?p=10154,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 04:05:34 +0000","(2019, 11, 1, 4, 5, 34, 4, 305, 0)",0,One of the most exciting topics we’ve been fol...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Innovation & Disruption', 'scheme':...",Quantum computing’s potential is still far off...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/quantum-computin...
4,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=10586,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 04:01:53 +0000","(2019, 11, 1, 4, 1, 53, 4, 305, 0)",0,Vortimo &#8212; software that organizes inform...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 1 November 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
5,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/highlights-from-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,False,https://www.oreilly.com/radar/?p=10168,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 00:30:39 +0000","(2019, 11, 1, 0, 30, 39, 4, 305, 0)",0,People from across the TensorFlow community ca...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Highlights from TensorFlow World in Santa Clar...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...
6,Ankur Narang,{'name': 'Ankur Narang'},[{'name': 'Ankur Narang'}],https://www.oreilly.com/radar/sticker-recommen...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/sticker-recommen...,False,https://www.oreilly.com/radar/?p=10426,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 00:00:59 +0000","(2019, 11, 1, 0, 0, 59, 4, 305, 0)",0,This is a keynote from TensorFlow World in San...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Sticker recommendations and AI-driven innovati...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/sticker-recommen...
7,Anna Roth,{'name': 'Anna Roth'},[{'name': 'Anna Roth'}],https://www.oreilly.com/radar/human-error-how-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/human-error-how-...,False,https://www.oreilly.com/radar/?p=10414,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 00:00:58 +0000","(2019, 11, 1, 0, 0, 58, 4, 305, 0)",0,This is a keynote from TensorFlow World in San...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",“Human error”: How can we help people build mo...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/human-error-how-...
8,Tony Jebara,{'name': 'Tony Jebara'},[{'name': 'Tony Jebara'}],https://www.oreilly.com/radar/personalization-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/personalization-...,False,https://www.oreilly.com/radar/?p=10400,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 00:00:46 +0000","(2019, 11, 1, 0, 0, 46, 4, 305, 0)",0,This is a keynote from TensorFlow World in San...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Personalization of Spotify Home and TensorFlow,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/personalization-...
9,Sandeep Gupta and Joseph Paul Cohen,{'name': 'Sandeep Gupta and Joseph Paul Cohen'},[{'name': 'Sandeep Gupta and Joseph Paul Cohen'}],https://www.oreilly.com/radar/tensorflow-js-br...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/tensorflow-js-br...,False,https://www.oreilly.com/radar/?p=10436,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 01 Nov 2019 00:00:36 +0000","(2019, 11, 1, 0, 0, 36, 4, 305, 0)",0,This is a keynote from TensorFlow World in San...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",TensorFlow.js: Bringing machine learning to Ja...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/tensorflow-js-br...


### 10. Count the number of entries per author and sort them in descending order.

In [70]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
11,Nat Torkington,4
0,Ankur Narang,1
1,Anna Roth,1
2,Chris Lattner and Tatiana Shpeisman,1
3,Jared Duke and Sarah Sirajuddin,1
4,Jeff Dean,1
5,Jenn Webb,1
6,Konstantinos Katsiapis and Anusha Ramesh,1
7,Mac Slocum,1
8,Megan Kacholia,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [90]:
length = [len(i) for i in df['title']]
df['title_length'] = length 
df[['title', 'author','title_length']].sort_values('title_length', ascending=False)

Unnamed: 0,title,author,title_length
3,Quantum computing’s potential is still far off...,Mike Loukides,100
12,TensorFlow Hub: The platform to share and disc...,Mike Liang,83
6,Sticker recommendations and AI-driven innovati...,Ankur Narang,80
7,“Human error”: How can we help people build mo...,Anna Roth,75
0,It’s important to cultivate your organization’...,Jenn Webb,65
5,Highlights from TensorFlow World in Santa Clar...,Mac Slocum,64
9,TensorFlow.js: Bringing machine learning to Ja...,Sandeep Gupta and Joseph Paul Cohen,54
8,Personalization of Spotify Home and TensorFlow,Tony Jebara,46
13,TensorFlow Lite: ML for mobile and IoT devices,Jared Duke and Sarah Sirajuddin,46
10,TFX: An end-to-end ML platform for everyone,Konstantinos Katsiapis and Anusha Ramesh,43


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [92]:
ML = [i for i in titles if "machine learning" in i]
print(ML)

['TensorFlow.js: Bringing machine learning to JavaScript']
