# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [69]:
feed = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [7]:
feed.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [24]:
feed['feed']

{'title': 'Radar',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Radar'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://www.oreilly.com/radar'},
  {'rel': 'self',
   'type': 'application/rss+xml',
   'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
  {'rel': 'hub',
   'href': 'http://pubsubhubbub.appspot.com/',
   'type': 'text/html'}],
 'link': 'https://www.oreilly.com/radar',
 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
 'subtitle_detail': {'type': 'text/html',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
 'updated': 'Fri, 13 Nov 2020 12:20:52 +0000',
 'updated_parsed': time.struct_time(tm_year=2020, tm_mon=11, tm_mday=13, tm_hour=12, tm_min=20,

### 4. Extract and print the feed title, subtitle, author, and link.

In [40]:
feed_title = feed['feed']['title']
feed_subtitle = feed['feed']['subtitle']
link = feed['feed']['link']

print('title:', feed_title, '\n', 'subtitle:', feed_subtitle , '\n', 'link:', link)

title: Radar 
 subtitle: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology 
 link: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [45]:
len(feed['entries'])

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [51]:
feed['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [58]:
entry_titles = [feed['entries'][i].title for i in range(len(feed['entries']))]
print(entry_titles)

['Four short links: 13 Nov 2020', 'Multi-Paradigm Languages', 'Four short links: 10 November 2020', 'Four short links: 6 Nov 2020', 'Four short links: 4 Nov 2020', 'Radar trends to watch: November 2020', 'Four short links: 30 Oct 2020', 'Four short links: 28 Oct 2020', 'Our Favorite Questions', 'Four short links: 21 Oct 2020', 'Four Short Links: 16 October 2020', 'Four short links: 14 Oct 2020', 'AI Product Management After Deployment', 'Four short links: 9 October 2020', 'AI and Creativity', 'Four short links: 6 October 2020', 'Four short links: 2 October 2020', 'Radar trends to watch: October 2020', 'Four short links: 29 Sep 2020', 'Four short links: 25 September 2020', 'Four short links: 18 Sep 2020', 'Four short links: 16 Sep 2020', 'How to Set AI Goals', 'Four short links: 11 Sep 2020', 'Four short links: 9 Sep 2020', 'Pair Programming with AI', 'Four short links: 4 September 2020', 'Four short links: 2 September 2020', 'Radar trends to watch: September 2020', 'Four short links: 2

### 8. Calculate the percentage of "Four short links" entry titles.

In [65]:
s = 'Four short links:'
count = 0

for i in entry_titles:
    if s in i:
        count += 1
    
percentage = count / len(entry_titles)
print(percentage)

0.6666666666666666


### 9. Create a Pandas data frame from the feed's entries.

In [66]:
import pandas as pd

In [67]:
df = pd.DataFrame(feed.entries)
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 13 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 13 Nov 2020 12:20:52 +0000","(2020, 11, 13, 12, 20, 52, 4, 318, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13409,False,Advanced System on a Chip Lecture Notes (2016)...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Multi-Paradigm Languages,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/multi-paradigm-l...,"Tue, 10 Nov 2020 13:29:21 +0000","(2020, 11, 10, 13, 29, 21, 1, 315, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Software Engineering', 'scheme': No...",https://www.oreilly.com/radar/?p=13406,False,The programming world used to be split into fu...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/multi-paradigm-l...,0,https://www.oreilly.com/radar/multi-paradigm-l...
2,Four short links: 10 November 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 10 Nov 2020 12:13:23 +0000","(2020, 11, 10, 12, 13, 23, 1, 315, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13404,False,Hypothesis as Liability &#8212; Would the ment...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,Four short links: 6 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 06 Nov 2020 11:59:34 +0000","(2020, 11, 6, 11, 59, 34, 4, 311, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13399,False,Dealing with Security Holes in Chips &#8212; s...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
4,Four short links: 4 Nov 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 04 Nov 2020 11:46:23 +0000","(2020, 11, 4, 11, 46, 23, 2, 309, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13396,False,The AI Who Mistook a Bald Head for a Football ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [68]:
df['author'].value_counts()

Nat Torkington                                      41
Mike Loukides                                       10
Matthew Rocklin and Hugo Bowne-Anderson              1
Justin Norman, Peter Skomoroch and Mike Loukides     1
Q Ethan McCallum, Chris Butler and Shane Glynn       1
Sarah Gold                                           1
Q Ethan McCallum and Mike Loukides                   1
Alex Castrounis                                      1
Justin Norman and Mike Loukides                      1
Mike Loukides and Steve Swoyer                       1
                                                     1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [71]:
df['length'] = df['title'].apply(len)
df[['title', 'author', 'length']].sort_values('length', ascending = False)

Unnamed: 0,title,author,length
34,Why Best-of-Breed is a Better Choice than All-...,Matthew Rocklin and Hugo Bowne-Anderson,79
12,AI Product Management After Deployment,Justin Norman and Mike Loukides,38
28,Radar trends to watch: September 2020,Mike Loukides,37
36,The Least Liked Programming Languages,Mike Loukides,37
5,Radar trends to watch: November 2020,Mike Loukides,36
19,Four short links: 25 September 2020,Nat Torkington,35
17,Radar trends to watch: October 2020,Mike Loukides,35
2,Four short links: 10 November 2020,Nat Torkington,34
26,Four short links: 4 September 2020,Nat Torkington,34
27,Four short links: 2 September 2020,Nat Torkington,34


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [143]:
index = [df[df['summary'].str.contains(pat = 'Machine Learning') == True].index]
title_ml_art = []

for i in index:
    title_ml_art.append(list(df.iloc[i, 0]))

print(title_ml_art)


[['Four short links: 21 August 2020']]
