# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [42]:
import feedparser
import pandas as pd

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'
feedburner = feedparser.parse(url)
print(feedburner['feed'])

{'title': 'Radar', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Radar'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.oreilly.com/radar'}, {'rel': 'self', 'type': 'application/rss+xml', 'href': 'http://feeds.feedburner.com/oreilly/radar/atom'}, {'rel': 'hub', 'href': 'http://pubsubhubbub.appspot.com/', 'type': 'text/html'}], 'link': 'https://www.oreilly.com/radar', 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'}, 'updated': 'Tue, 17 Nov 2020 13:01:17 +0000', 'updated_parsed': time.struct_time(tm_year=2020, tm_mon=11, tm_mday=17, tm_hour=13, tm_min=1, tm_sec=17, tm_wday=1, tm_yday=322, tm_isd

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
feedburner.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
#Since the feed component is now structured as just a dictionary inside the larger dictionary, 
#we can view its keys to get a sense of what type of information is available to us within the feed dictionary
feedburner.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [15]:
print ("feed title: " + feedburner.feed.title)
print ('')
print ("feed subtitle: " + feedburner.feed.subtitle)
print ('')
print ("feed link: " + feedburner.feed.link)

feed title: Radar

feed subtitle: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology

feed link: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [28]:
print(f'There are {len(feedburner.entries)} entries in this RSS feed')

There are 60 entries in this RSS feed


In [33]:
#por ver un entry
#feedburner.entries[0]

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [35]:
#We can see that the data structure within this seems to be a list where each entry is an element that contains 
#a dictionary with the information for each entry. We can access the individual entries via indexing 
#and then we can look at the keys available for the entry by calling the keys() method. 

In [36]:
feedburner.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [38]:
titles = [feedburner.entries[i].title for i in range(len(feedburner.entries))]
print(titles)

['On Exactitude in Technical Debt', 'Four short links: 17 Nov 2020', 'Four short links: 13 Nov 2020', 'Multi-Paradigm Languages', 'Four short links: 10 November 2020', 'Four short links: 6 Nov 2020', 'Four short links: 4 Nov 2020', 'Radar trends to watch: November 2020', 'Four short links: 30 Oct 2020', 'Four short links: 28 Oct 2020', 'Our Favorite Questions', 'Four short links: 21 Oct 2020', 'Four Short Links: 16 October 2020', 'Four short links: 14 Oct 2020', 'AI Product Management After Deployment', 'Four short links: 9 October 2020', 'AI and Creativity', 'Four short links: 6 October 2020', 'Four short links: 2 October 2020', 'Radar trends to watch: October 2020', 'Four short links: 29 Sep 2020', 'Four short links: 25 September 2020', 'Four short links: 18 Sep 2020', 'Four short links: 16 Sep 2020', 'How to Set AI Goals', 'Four short links: 11 Sep 2020', 'Four short links: 9 Sep 2020', 'Pair Programming with AI', 'Four short links: 4 September 2020', 'Four short links: 2 September 

### 8. Calculate the percentage of "Four short links" entry titles.

In [52]:
#number of titles that start with "Four short links"
four_list = [title for title in titles if title.startswith('Four short links')]
print(len(four_list))

39


In [61]:
percentage = ((len(four_list))/(len(titles)))*100
print(f'The percentage of Four short links entry titles is {percentage}%')

The percentage of Four short links entry titles is 65.0%


### 9. Create a Pandas data frame from the feed's entries.

In [62]:
df = pd.DataFrame(feedburner.entries)
df.head(10)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,On Exactitude in Technical Debt,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/on-exactitude-in...,"Tue, 17 Nov 2020 12:23:15 +0000","(2020, 11, 17, 12, 23, 15, 1, 322, 0)",[{'name': 'Kevlin Henney'}],Kevlin Henney,{'name': 'Kevlin Henney'},"[{'term': 'Commentary', 'scheme': None, 'label...",https://www.oreilly.com/radar/?p=13412,False,If software is such stuff as dreams are made o...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/on-exactitude-in...,0,https://www.oreilly.com/radar/on-exactitude-in...
1,Four short links: 17 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 17 Nov 2020 12:21:43 +0000","(2020, 11, 17, 12, 21, 43, 1, 322, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13414,False,NDSS Symposium 2020 Papers &#8212; Large pile ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
2,Four short links: 13 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 13 Nov 2020 12:20:52 +0000","(2020, 11, 13, 12, 20, 52, 4, 318, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13409,False,Advanced System on a Chip Lecture Notes (2016)...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,Multi-Paradigm Languages,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/multi-paradigm-l...,"Tue, 10 Nov 2020 13:29:21 +0000","(2020, 11, 10, 13, 29, 21, 1, 315, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Software Engineering', 'scheme': No...",https://www.oreilly.com/radar/?p=13406,False,The programming world used to be split into fu...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/multi-paradigm-l...,0,https://www.oreilly.com/radar/multi-paradigm-l...
4,Four short links: 10 November 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 10 Nov 2020 12:13:23 +0000","(2020, 11, 10, 12, 13, 23, 1, 315, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13404,False,Hypothesis as Liability &#8212; Would the ment...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
5,Four short links: 6 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 06 Nov 2020 11:59:34 +0000","(2020, 11, 6, 11, 59, 34, 4, 311, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13399,False,Dealing with Security Holes in Chips &#8212; s...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
6,Four short links: 4 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 04 Nov 2020 11:46:23 +0000","(2020, 11, 4, 11, 46, 23, 2, 309, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13396,False,The AI Who Mistook a Bald Head for a Football ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
7,Radar trends to watch: November 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Mon, 02 Nov 2020 12:28:14 +0000","(2020, 11, 2, 12, 28, 14, 0, 307, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13388,False,Perhaps the most important event this month is...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
8,Four short links: 30 Oct 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 30 Oct 2020 11:05:49 +0000","(2020, 10, 30, 11, 5, 49, 4, 304, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13385,False,"Mutation Testing &#8212; in this paper, we sem...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
9,Four short links: 28 Oct 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 28 Oct 2020 11:39:13 +0000","(2020, 10, 28, 11, 39, 13, 2, 302, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13382,False,"Phantom of the ADAS &#8212; In this paper, we ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [63]:
#We could do that by aggregating by author, counting the number of entry titles, 
#and then sorting the results in descending order.
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
8,Nat Torkington,40
6,Mike Loukides,10
0,,1
1,Alex Castrounis,1
2,Justin Norman and Mike Loukides,1
3,"Justin Norman, Peter Skomoroch and Mike Loukides",1
4,Kevlin Henney,1
5,Matthew Rocklin and Hugo Bowne-Anderson,1
7,Mike Loukides and Steve Swoyer,1
9,Q Ethan McCallum and Mike Loukides,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [64]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False).head()

Unnamed: 0,title,author,title_length
36,Why Best-of-Breed is a Better Choice than All-...,Matthew Rocklin and Hugo Bowne-Anderson,79
14,AI Product Management After Deployment,Justin Norman and Mike Loukides,38
30,Radar trends to watch: September 2020,Mike Loukides,37
38,The Least Liked Programming Languages,Mike Loukides,37
7,Radar trends to watch: November 2020,Mike Loukides,36


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [75]:
titles_ml = []

for x in enumerate(df['summary']):
    if 'machine learning' in x:
        titles_ml.append(df['title'])
        print(titles_ml)