# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [5]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [6]:
oreilly = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [7]:
oreilly.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [8]:
oreilly.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [10]:
print(oreilly.feed.title)
print('')
print(oreilly.feed.subtitle)
print('')
print(oreilly.feed.link)
#there is no author available for the whole feed

Radar

Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology

https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [11]:
print(len(oreilly.entries))

15


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [16]:
oreilly.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [20]:
titles = []
for i in range (0,15):
    title = oreilly.entries[i].title
    titles.append(title)
    i+=1
titles

['Healthy Data',
 'Formal Informal Languages',
 'Radar Trends to Watch: November 2022',
 'What We Learned Auditing Sophisticated AI for Bias',
 'The Collaborative Metaverse',
 'What Is Hyperautomation?',
 'Radar Trends to Watch: October 2022',
 'The Problem with Intelligence',
 'Radar Trends to Watch: September 2022',
 'Ad Networks and Content Marketing',
 'On Technique',
 'Scaling False Peaks',
 'The Metaverse Is Not a Place',
 'Radar Trends to Watch: August 2022',
 'SQL: The Universal Solvent for REST APIs']

### 8. Calculate the percentage of "Four short links" entry titles.

In [21]:
#Zero, Four short links stopped being published on December 14th, 2020 (two years ago!)

### 9. Create a Pandas data frame from the feed's entries.

In [22]:
import pandas as pd

In [35]:
df = pd.DataFrame(oreilly.entries)
df

Unnamed: 0,author,author_detail,authors,comments,content,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/healthy-data/#re...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14784,https://www.oreilly.com/radar/healthy-data/,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 15 Nov 2022 15:18:53 +0000","(2022, 11, 15, 15, 18, 53, 1, 319, 0)",0,"This summer, we started asking about “technica...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Data', 'scheme': None, 'label': Non...",Healthy Data,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/healthy-data/feed/
1,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/formal-informal-...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14772,https://www.oreilly.com/radar/formal-informal-...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 08 Nov 2022 11:58:09 +0000","(2022, 11, 8, 11, 58, 9, 1, 312, 0)",0,We’ve all been impressed by the generative art...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Programming', 'scheme': None, 'labe...",Formal Informal Languages,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/formal-informal-...
2,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/radar-trends-to-...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14760,https://www.oreilly.com/radar/radar-trends-to-...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 01 Nov 2022 11:15:57 +0000","(2022, 11, 1, 11, 15, 57, 1, 305, 0)",0,Maintaining a separate category for AI is gett...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Radar Trends', 'scheme': None, 'lab...",Radar Trends to Watch: November 2022,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...
3,Patrick Hall,{'name': 'Patrick Hall'},[{'name': 'Patrick Hall'}],https://www.oreilly.com/radar/what-we-learned-...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14754,https://www.oreilly.com/radar/what-we-learned-...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 18 Oct 2022 11:14:23 +0000","(2022, 10, 18, 11, 14, 23, 1, 291, 0)",0,A recently passed law in New York City require...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",What We Learned Auditing Sophisticated AI for ...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/what-we-learned-...
4,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/the-collaborativ...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14750,https://www.oreilly.com/radar/the-collaborativ...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 12 Oct 2022 20:01:45 +0000","(2022, 10, 12, 20, 1, 45, 2, 285, 0)",0,We want to congratulate Dylan Field on his sta...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Metaverse', 'scheme': None, 'label'...",The Collaborative Metaverse,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/the-collaborativ...
5,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/what-is-hyperaut...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14733,https://www.oreilly.com/radar/what-is-hyperaut...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 11 Oct 2022 10:59:21 +0000","(2022, 10, 11, 10, 59, 21, 1, 284, 0)",0,Gartner has anointed “Hyperautomation” one of ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",What Is Hyperautomation?,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/what-is-hyperaut...
6,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/radar-trends-to-...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14726,https://www.oreilly.com/radar/radar-trends-to-...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 04 Oct 2022 11:15:42 +0000","(2022, 10, 4, 11, 15, 42, 1, 277, 0)",0,September was a busy month. In addition to con...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Radar Trends', 'scheme': None, 'lab...",Radar Trends to Watch: October 2022,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...
7,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/the-problem-with...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14718,https://www.oreilly.com/radar/the-problem-with...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 13 Sep 2022 11:21:40 +0000","(2022, 9, 13, 11, 21, 40, 1, 256, 0)",0,Projects like OpenAI’s DALL-E and DeepMind’s G...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Artificial Intelligence', 'scheme':...",The Problem with Intelligence,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/the-problem-with...
8,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],https://www.oreilly.com/radar/radar-trends-to-...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14700,https://www.oreilly.com/radar/radar-trends-to-...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 06 Sep 2022 11:21:09 +0000","(2022, 9, 6, 11, 21, 9, 1, 249, 0)",0,It’s hardly news to talk about the AI developm...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Radar Trends', 'scheme': None, 'lab...",Radar Trends to Watch: September 2022,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...
9,Q McCallum,{'name': 'Q McCallum'},[{'name': 'Q McCallum'}],https://www.oreilly.com/radar/ad-networks-and-...,"[{'type': 'text/html', 'language': None, 'base...",False,https://www.oreilly.com/radar/?p=14688,https://www.oreilly.com/radar/ad-networks-and-...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 16 Aug 2022 11:21:21 +0000","(2022, 8, 16, 11, 21, 21, 1, 228, 0)",0,"In a recent Radar piece, I explored N-sided ma...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Operations', 'scheme': None, 'label...",Ad Networks and Content Marketing,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/ad-networks-and-...


### 10. Count the number of entries per author and sort them in descending order.

In [36]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
2,Mike Loukides,10
0,Jon Udell,1
1,Kevlin Henney,1
3,Patrick Hall,1
4,Q McCallum,1
5,Tim O’Reilly,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [43]:
df['title_length']=df['title'].apply(len)
df2= pd.DataFrame(df, columns=['title', 'author', 'title_length'])
df2 = df2.sort_values(by='title_length', ascending=False)
df2

Unnamed: 0,title,author,title_length
3,What We Learned Auditing Sophisticated AI for ...,Patrick Hall,50
14,SQL: The Universal Solvent for REST APIs,Jon Udell,40
8,Radar Trends to Watch: September 2022,Mike Loukides,37
2,Radar Trends to Watch: November 2022,Mike Loukides,36
6,Radar Trends to Watch: October 2022,Mike Loukides,35
13,Radar Trends to Watch: August 2022,Mike Loukides,34
9,Ad Networks and Content Marketing,Q McCallum,33
7,The Problem with Intelligence,Mike Loukides,29
12,The Metaverse Is Not a Place,Tim O’Reilly,28
4,The Collaborative Metaverse,Mike Loukides,27


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [50]:
summaries = []
for i in range (0,15):
    summary = oreilly.entries[i].summary
    summaries.append(summary)
    i+=1
summaries

machine_learning = []
for i in range(0,15):
    if 'machine learning' in summaries[i]:
        machine_learning.append(oreilly.entries[i].title)
        i+=1
machine_learning


['Radar Trends to Watch: October 2022']