# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
radar = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
radar.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
radar.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
radar.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

In [7]:
print('Title: {}\nSubtitle: {}\nAuthor: {}\nLink: {}'.format(radar.feed.title,radar.feed.subtitle,'not available',radar.feed.link))

Title: Radar
Subtitle: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
Author: not available
Link: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [8]:
print('Number of entries: {}'.format(len(radar.entries)))

Number of entries: 60


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [9]:
print('Keys of first entry: {}'.format(radar.entries[0].keys()))

Keys of first entry: dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])


### 7. Extract a list of entry titles.

In [10]:
titles=[x.title for x in radar.entries]

In [11]:
import pandas as pd
pd.Series(titles)

0                              What to Do When AI Fails
1                         Four short links: 18 May 2020
2                         Four short links: 15 May 2020
3           Practical Skills for The AI Product Manager
4                         Four short links: 14 May 2020
5                         Four short links: 13 May 2020
6                         Four short links: 12 May 2020
7                            When models are everywhere
8                         Four short links: 11 May 2020
9                          Four short links: 8 May 2020
10                      Radar trends to watch: May 2020
11                         Four short links: 7 May 2020
12                         Four short links: 6 May 2020
13                         Four short links: 5 May 2020
14                                             On COBOL
15                         Four short links: 4 May 2020
16                         Four short links: 1 May 2020
17                      Four short links: 30 Apr

### 8. Calculate the percentage of "Four short links" entry titles.

In [12]:
four_short=[x for x in titles if x.startswith('Four short links')]

In [13]:
print('Percentage of "Four short links" entry titles: {}%'.format(round(100*len(four_short)/len(titles),2)))

Percentage of "Four short links" entry titles: 70.0%


### 9. Create a Pandas data frame from the feed's entries.

In [14]:
import pandas as pd

In [15]:
df=pd.DataFrame(radar.entries)

In [16]:
df.head()

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Andrew Burt and Patrick Hall,{'name': 'Andrew Burt and Patrick Hall'},[{'name': 'Andrew Burt and Patrick Hall'}],https://www.oreilly.com/radar/what-to-do-when-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/what-to-do-when-...,False,https://www.oreilly.com/radar/?p=12798,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 18 May 2020 14:03:08 +0000","(2020, 5, 18, 14, 3, 8, 0, 139, 0)",0,"These are unprecedented times, at least by inf...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",What to Do When AI Fails,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/what-to-do-when-...
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=12794,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 18 May 2020 12:41:25 +0000","(2020, 5, 18, 12, 41, 25, 0, 139, 0)",0,The Web Assembly App Gap &#8212; This essay st...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 18 May 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=12789,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 15 May 2020 11:22:50 +0000","(2020, 5, 15, 11, 22, 50, 4, 136, 0)",0,Favourite Developer-Efficiency Tips &#8212; Be...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 15 May 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
3,"Justin Norman, Peter Skomoroch and Mike Loukides","{'name': 'Justin Norman, Peter Skomoroch and M...","[{'name': 'Justin Norman, Peter Skomoroch and ...",https://www.oreilly.com/radar/practical-skills...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/practical-skills...,False,https://www.oreilly.com/radar/?p=12786,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 14 May 2020 12:40:45 +0000","(2020, 5, 14, 12, 40, 45, 3, 135, 0)",0,"In our previous article, What You Need to Know...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Practical Skills for The AI Product Manager,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/practical-skills...
4,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=12783,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 14 May 2020 11:28:00 +0000","(2020, 5, 14, 11, 28, 0, 3, 135, 0)",0,Malware Toolkit Targetting Airgapped Networks ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 14 May 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [17]:
df.groupby('author').agg({'title':'count'}).sort_values('title', ascending=False)

Unnamed: 0_level_0,title
author,Unnamed: 1_level_1
Nat Torkington,42
Jenn Webb,4
Mike Loukides,4
Roger Magoulas and Steve Swoyer,2
Andrew Burt and Patrick Hall,1
Cynthia Owens,1
Daniel Wu and Mike Loukides,1
Hugo Bowne-Anderson,1
Hugo Bowne-Anderson and Mike Loukides,1
"Justin Norman, Peter Skomoroch and Mike Loukides",1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [18]:
df['title_lenght']=df.title.apply(len)

In [19]:
df[['title','author','title_lenght']].sort_values('title_lenght',ascending=False)

Unnamed: 0,title,author,title_lenght
49,Great leaders inspire innovation and creativit...,Jenn Webb,76
23,How data privacy leader Apple found itself in ...,Daniel Wu and Mike Loukides,71
50,Strong leaders forge an intersection of knowle...,Jenn Webb,64
56,It’s an unprecedented crisis: 8 things to do r...,Cynthia Owens,54
43,What you need to know about product management...,Peter Skomoroch and Mike Loukides,53
47,An enterprise vision is your company’s North Star,Jenn Webb,49
48,Leaders need to mobilize change-ready workforces,Jenn Webb,48
44,The unreasonable importance of data preparation,Hugo Bowne-Anderson,47
46,3 ways to confront modern business challenges,Rita J. King,45
3,Practical Skills for The AI Product Manager,"Justin Norman, Peter Skomoroch and Mike Loukides",43


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [20]:
import re
ml_titles=[df.loc[i].title for i in df.index if re.findall('machine learning',df.loc[i].summary.lower())]
ml_titles

['Practical Skills for The AI Product Manager',
 'When models are everywhere',
 'Four short links: 8 May 2020',
 'What you need to know about product management for AI']