# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser
import requests as r

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

### 2. Obtain a list of components (keys) that are available for this feed.

In [3]:
rss = feedparser.parse(url)

In [4]:
rss.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
rss['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

In [6]:
rss['feed']

{'title': 'Radar',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Radar'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://www.oreilly.com/radar'},
  {'rel': 'self',
   'type': 'application/rss+xml',
   'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
  {'rel': 'hub',
   'href': 'http://pubsubhubbub.appspot.com/',
   'type': 'text/html'}],
 'link': 'https://www.oreilly.com/radar',
 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
 'subtitle_detail': {'type': 'text/html',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
 'updated': 'Fri, 08 Nov 2019 12:51:42 +0000',
 'updated_parsed': time.struct_time(tm_year=2019, tm_mon=11, tm_mday=8, tm_hour=12, tm_min=51, 

### 4. Extract and print the feed title, subtitle, author, and link.

In [7]:
print(f"Feed Title: {rss['feed']['title']}\nFeed Subtitle: {rss['feed']['subtitle']}\nFeed Link: {rss['feed']['link']}")

#no existe author dentro de feed

Feed Title: Radar
Feed Subtitle: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
Feed Link: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [8]:
len(rss['entries'])

18

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [9]:
rss['entries'][0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [10]:
entry_titles = [i['title'] for i in rss['entries']]

In [11]:
entry_titles

['Bitcoin and the disruption of monetary oppression',
 'Four short links: 8 November 2019',
 'Highlights from the O’Reilly Software Architecture Conference in Berlin 2019',
 'Highlights from the O’Reilly Velocity Conference in Berlin 2019',
 'From the trenches: Patrick Kua',
 '5 things Go taught me about open source?',
 'Building high-performing engineering teams, one pixel at a time',
 'How to deploy infrastructure in just 13.8 billion years',
 'Controlled chaos: The inevitable marriage of DevOps and security',
 'The ultimate guide to complicated systems',
 'Cognitive biases in the architect’s life',
 'The three-headed dog: Architecture, process, structure',
 'A world of deepfakes',
 'Radar trends to watch: November 2019',
 'Four short links: 7 November 2019',
 'Modern machine learning architectures: Data and hardware and platform, oh my',
 'The new norms of cloud native',
 'Observability: Understanding production through your customers’ eyes']

### 8. Calculate the percentage of "Four short links" entry titles.

In [12]:
percentage = str(round(len([i for i in entry_titles if "Four short links" in i])/len(entry_titles) * 100, 2)) + "%"

In [13]:
percentage

'11.11%'

### 9. Create a Pandas data frame from the feed's entries.

In [14]:
import pandas as pd

In [15]:
rss_df = pd.DataFrame(rss['entries'])

In [26]:
rss_df.head(3)

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/bitcoin-and-the-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...,False,https://www.oreilly.com/radar/?p=9571,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 08 Nov 2019 05:10:27 +0000","(2019, 11, 8, 5, 10, 27, 4, 312, 0)",0,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Innovation & Disruption', 'scheme':...",Bitcoin and the disruption of monetary oppression,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=10775,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 08 Nov 2019 05:01:04 +0000","(2019, 11, 8, 5, 1, 4, 4, 312, 0)",0,Probabilistic Scripts for Automating Common-Se...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 8 November 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/highlights-from-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,False,https://www.oreilly.com/radar/?p=10569,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 07 Nov 2019 20:10:44 +0000","(2019, 11, 7, 20, 10, 44, 3, 311, 0)",0,Experts from across the software architecture ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Next Architecture', 'scheme': None,...",Highlights from the O’Reilly Software Architec...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...


### 10. Count the number of entries per author and sort them in descending order.

In [25]:
rss_df['author'].value_counts(ascending=False)

Mac Slocum                      2
Nat Torkington                  2
Allen Holub                     1
Birgitta Boeckeler              1
Brian Sletten                   1
Jenn Webb                       1
Christine Yen                   1
Ingrid Burrington               1
Kelly Shortridge                1
Lena Reinhard                   1
Ben Lorica and Mike Loukides    1
Mike Loukides                   1
Cheryl Hung                     1
Dave Cheney                     1
Patrick Kua and Neal Ford       1
Jennifer Davis                  1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [54]:
rss_df = rss_df.assign(Title_lenght = rss_df['title'].apply(len))

In [58]:
rss_df.head(3)

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss,Title_lenght
0,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/bitcoin-and-the-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...,False,https://www.oreilly.com/radar/?p=9571,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 08 Nov 2019 05:10:27 +0000","(2019, 11, 8, 5, 10, 27, 4, 312, 0)",0,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Innovation & Disruption', 'scheme':...",Bitcoin and the disruption of monetary oppression,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...,49
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=10775,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 08 Nov 2019 05:01:04 +0000","(2019, 11, 8, 5, 1, 4, 4, 312, 0)",0,Probabilistic Scripts for Automating Common-Se...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 8 November 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,33
2,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/highlights-from-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,False,https://www.oreilly.com/radar/?p=10569,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 07 Nov 2019 20:10:44 +0000","(2019, 11, 7, 20, 10, 44, 3, 311, 0)",0,Experts from across the software architecture ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Next Architecture', 'scheme': None,...",Highlights from the O’Reilly Software Architec...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,76


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [86]:
ml = rss_df[rss_df['summary'].str.contains("machine learning") | rss_df['summary'].str.contains("Machine learning")]

In [87]:
ml

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss,Title_lenght


In [94]:
test = rss_df[rss_df['summary'].str.contains("software") | rss_df['summary'].str.contains("Bitcoin")]

In [95]:
test

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss,Title_lenght
0,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/bitcoin-and-the-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...,False,https://www.oreilly.com/radar/?p=9571,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 08 Nov 2019 05:10:27 +0000","(2019, 11, 8, 5, 10, 27, 4, 312, 0)",0,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Innovation & Disruption', 'scheme':...",Bitcoin and the disruption of monetary oppression,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...,49
2,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/highlights-from-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,False,https://www.oreilly.com/radar/?p=10569,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 07 Nov 2019 20:10:44 +0000","(2019, 11, 7, 20, 10, 44, 3, 311, 0)",0,Experts from across the software architecture ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Next Architecture', 'scheme': None,...",Highlights from the O’Reilly Software Architec...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,76
