# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
feed = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
feed.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
feed['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print('Title:', feed['feed'].title)
print('Subtitle:', feed['feed'].subtitle)
print('Author: There is not a known author')
print('Link:', feed['feed'].link)

Title: Radar
Subtitle: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
Author: There is not a known author
Link: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(feed['entries'])

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [8]:
feed['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [9]:
[entry.title for entry in feed['entries']]

['InfoTribes, Reality Brokers',
 'The End of Silicon Valley as We Know It?',
 'The Next Generation of AI',
 'Radar trends to watch: March 2021',
 'Product Management for AI',
 '5 things on our data and AI radar for 2021',
 '5 infrastructure and operations trends to watch in 2021',
 'The Wrong Question',
 'Radar trends to watch: February 2021',
 'Where Programming, Ops, AI, and the Cloud are Headed in 2021',
 'Seven Legal Questions for Data Scientists',
 'Patterns',
 'Radar trends to watch: January 2021',
 'Four short links: 14 Dec 2020',
 'Four short links: 8 Dec 2020',
 'O’Reilly’s top 20 live online training courses of 2020',
 'What is functional programming?',
 'Four short links: 4 Dec 2020',
 'Four short links: 1 Dec 2020',
 'Radar trends to watch: December 2020',
 'Four short links: 27 Nov 2020',
 'Four short links: 24 Nov 2020',
 'Four short links: 20 Nov 2020',
 'On Exactitude in Technical Debt',
 'Four short links: 17 Nov 2020',
 'Four short links: 13 Nov 2020',
 'Multi-Paradig

### 8. Calculate the percentage of "Four short links" entry titles.

In [10]:
four_short_links = 0

for entry in feed.entries:
    if entry.title.lower().startswith('four short links'):
        four_short_links += 1

percentage = (four_short_links / len(feed.entries)) * 100
print(f'{round(percentage, 2)}%')

53.33%


### 9. Create a Pandas data frame from the feed's entries.

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(feed['entries'])
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,"InfoTribes, Reality Brokers","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/infotribes-reali...,"Tue, 23 Mar 2021 14:40:55 +0000","(2021, 3, 23, 14, 40, 55, 1, 82, 0)",[{'name': 'Hugo Bowne-Anderson'}],Hugo Bowne-Anderson,{'name': 'Hugo Bowne-Anderson'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13687,False,It seems harder than ever to agree with others...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/infotribes-reali...,0,https://www.oreilly.com/radar/infotribes-reali...
1,The End of Silicon Valley as We Know It?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-end-of-silic...,"Thu, 11 Mar 2021 17:22:01 +0000","(2021, 3, 11, 17, 22, 1, 3, 70, 0)",[{'name': 'Tim O’Reilly'}],Tim O’Reilly,{'name': 'Tim O’Reilly'},"[{'term': 'Radar Column', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13665,False,"High-profile entrepreneurs like Elon Musk, ven...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-end-of-silic...,0,https://www.oreilly.com/radar/the-end-of-silic...
2,The Next Generation of AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-next-generat...,"Tue, 09 Mar 2021 13:46:41 +0000","(2021, 3, 9, 13, 46, 41, 1, 68, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13676,False,Programs like AlphaZero and GPT-3 are massive ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-next-generat...,0,https://www.oreilly.com/radar/the-next-generat...
3,Radar trends to watch: March 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Mon, 01 Mar 2021 14:13:29 +0000","(2021, 3, 1, 14, 13, 29, 0, 60, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13672,False,"For a short month, a lot happened in February–...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
4,Product Management for AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/product-manageme...,"Fri, 26 Feb 2021 19:40:39 +0000","(2021, 2, 26, 19, 40, 39, 4, 57, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13646,False,"A couple of years ago, Pete Skomoroch, Roger M...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/product-manageme...,0,https://www.oreilly.com/radar/product-manageme...


### 10. Count the number of entries per author and sort them in descending order.

In [13]:
df['author'].value_counts()

Nat Torkington                                    31
Mike Loukides                                     16
                                                   4
Patrick Hall and Ayoub Ouederni                    1
Kevlin Henney                                      1
Justin Norman and Mike Loukides                    1
Tim O’Reilly                                       1
Alex Castrounis                                    1
Hugo Bowne-Anderson                                1
Q Ethan McCallum and Mike Loukides                 1
Q Ethan McCallum, Chris Butler and Shane Glynn     1
Matthew Rocklin and Hugo Bowne-Anderson            1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [14]:
title_length = df['title'].apply(lambda x: len(x))
df.insert(2, 'title_length', title_length)

In [15]:
sub_df = df[['title', 'author', 'title_length']]
sub_df.sort_values(by='title_length', ascending=False).head()

Unnamed: 0,title,author,title_length
59,Why Best-of-Breed is a Better Choice than All-...,Matthew Rocklin and Hugo Bowne-Anderson,79
9,"Where Programming, Ops, AI, and the Cloud are ...",Mike Loukides,60
6,5 infrastructure and operations trends to watc...,,55
15,O’Reilly’s top 20 live online training courses...,,54
5,5 things on our data and AI radar for 2021,,42


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [16]:
df[df['summary'].str.lower().str.contains('machine learning')]['title'].tolist()

['5 things on our data and AI radar for 2021',
 'Seven Legal Questions for Data Scientists',
 'Four short links: 21 August 2020']