# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
parser = feedparser.parse(url)
parser

{'bozo': False,
 'entries': [{'title': 'Low-Code and the Democratization of Programming',
   'title_detail': {'type': 'text/plain',
    'language': None,
    'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
    'value': 'Low-Code and the Democratization of Programming'},
   'links': [{'rel': 'alternate',
     'type': 'text/html',
     'href': 'https://www.oreilly.com/radar/low-code-and-the-democratization-of-programming/'}],
   'link': 'https://www.oreilly.com/radar/low-code-and-the-democratization-of-programming/',
   'comments': 'https://www.oreilly.com/radar/low-code-and-the-democratization-of-programming/#respond',
   'published': 'Tue, 16 Nov 2021 12:36:18 +0000',
   'published_parsed': time.struct_time(tm_year=2021, tm_mon=11, tm_mday=16, tm_hour=12, tm_min=36, tm_sec=18, tm_wday=1, tm_yday=320, tm_isdst=0),
   'authors': [{}],
   'author': '',
   'tags': [{'term': 'Programming', 'scheme': None, 'label': None},
    {'term': 'Signals', 'scheme': None, 'label': None}],
   

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
keys = parser.keys()
print (keys)

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
feed_keys = parser.feed.keys()
print (feed_keys)

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])


### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
title = parser.feed.title
subtitle = parser.feed.subtitle
link = parser.feed.link
print (title, subtitle, link)

Radar Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [8]:
len(parser['entries'])

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [10]:
feed_keys = parser.entries[0].keys()
print (feed_keys)

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])


### 7. Extract a list of entry titles.

In [12]:
titles = []
for x in parser.entries:
    titles.append(x.title)
    
titles

['Low-Code and the Democratization of Programming',
 'Remote Teams in ML/AI',
 'Radar trends to watch: November 2021',
 'The Sobering Truth About the Impact of Your Business Ideas',
 'MLOps and DevOps: Why Data Makes It Different',
 'The Quality of Auto-Generated Code',
 'Radar trends to watch: October 2021',
 'Ethical Social Media: Oxymoron or Attainable Goal?',
 '2021 Data/AI Salary Survey',
 'Radar trends to watch: September 2021',
 'Rebranding Data',
 'A Way Forward with Communal Computing',
 'Defending against ransomware is all about the basics',
 'Radar trends to watch: August 2021',
 'Communal Computing’s Many Problems',
 'Thinking About Glue',
 'Radar trends to watch: July 2021',
 'Hand Labeling Considered Harmful',
 'Two economies. Two sets of rules.',
 'Communal Computing',
 'Code as Infrastructure',
 'Radar trends to watch: June 2021',
 'AI Powered Misinformation and Manipulation at Scale #GPT-3',
 'DeepCheapFakes',
 'Radar trends to watch: May 2021',
 'Checking Jeff Bezos’s

### 8. Calculate the percentage of "Four short links" entry titles.

In [13]:

four_shots = []
for x in titles:
    if ("Four short links:" in x):
        four_shots.append(x)

num_titles = (len(titles))
num_four = (len(four_shots))

print (num_titles,num_four)

print ((num_four*100)/num_titles)

60 12
20.0


### 9. Create a Pandas data frame from the feed's entries.

In [14]:
import pandas as pd

In [15]:
df = pd.DataFrame(parser.entries)
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,author_detail
0,Low-Code and the Democratization of Programming,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/low-code-and-the...,https://www.oreilly.com/radar/low-code-and-the...,"Tue, 16 Nov 2021 12:36:18 +0000","(2021, 11, 16, 12, 36, 18, 1, 320, 0)",[{}],,"[{'term': 'Programming', 'scheme': None, 'labe...",https://www.oreilly.com/radar/?p=14083,False,"In the past decade, the growth in low-code and...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/low-code-and-the...,0,
1,Remote Teams in ML/AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/remote-teams-in-...,https://www.oreilly.com/radar/remote-teams-in-...,"Tue, 09 Nov 2021 14:05:48 +0000","(2021, 11, 9, 14, 5, 48, 1, 313, 0)",[{'name': 'Q McCallum'}],Q McCallum,"[{'term': 'Building a data culture', 'scheme':...",https://www.oreilly.com/radar/?p=14075,False,I&#8217;m well-versed in the ups and downs of ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/remote-teams-in-...,0,{'name': 'Q McCallum'}
2,Radar trends to watch: November 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 02 Nov 2021 11:40:17 +0000","(2021, 11, 2, 11, 40, 17, 1, 306, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14066,False,While October’s news was dominated by Facebook...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,{'name': 'Mike Loukides'}
3,The Sobering Truth About the Impact of Your Bu...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-sobering-tru...,https://www.oreilly.com/radar/the-sobering-tru...,"Tue, 26 Oct 2021 13:07:58 +0000","(2021, 10, 26, 13, 7, 58, 1, 299, 0)","[{'name': 'Eric Colson, Daragh Sibley and Dave...","Eric Colson, Daragh Sibley and Dave Spiegel","[{'term': 'Business', 'scheme': None, 'label':...",https://www.oreilly.com/radar/?p=14041,False,The introduction of data science into the busi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-sobering-tru...,0,"{'name': 'Eric Colson, Daragh Sibley and Dave ..."
4,MLOps and DevOps: Why Data Makes It Different,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/mlops-and-devops...,https://www.oreilly.com/radar/mlops-and-devops...,"Tue, 19 Oct 2021 14:17:38 +0000","(2021, 10, 19, 14, 17, 38, 1, 292, 0)",[{'name': 'Ville Tuulos and Hugo Bowne-Anderso...,Ville Tuulos and Hugo Bowne-Anderson,"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14018,False,Much has been written about struggles of deplo...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/mlops-and-devops...,0,{'name': 'Ville Tuulos and Hugo Bowne-Anderson'}


### 10. Count the number of entries per author and sort them in descending order.

In [16]:
autores = df.groupby('author', as_index=False).agg({'title':'count'})
autores.columns = ['author', 'entries']
autores.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
6,Mike Loukides,27
8,Nat Torkington,12
0,,4
1,Chris Butler,3
13,Tim O’Reilly,3
11,Q McCallum,2
2,"Eric Colson, Daragh Sibley and Dave Spiegel",1
3,Hugo Bowne-Anderson,1
4,Kevlin Henney,1
5,Mike Barlow,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [17]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False).head()

Unnamed: 0,title,author,title_length
38,"Where Programming, Ops, AI, and the Cloud are ...",Mike Loukides,60
3,The Sobering Truth About the Impact of Your Bu...,"Eric Colson, Daragh Sibley and Dave Spiegel",58
22,AI Powered Misinformation and Manipulation at ...,Nitesh Dhanjani,58
35,5 infrastructure and operations trends to watc...,,55
44,O’Reilly’s top 20 live online training courses...,,54


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [18]:
df.title[df.summary.str.contains('machine learning.')].tolist()

['MLOps and DevOps: Why Data Makes It Different',
 'Hand Labeling Considered Harmful',
 'Radar trends to watch: April 2021',
 'Seven Legal Questions for Data Scientists']