# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [13]:
import feedparser
import re

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
lab = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
lab.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
lab.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

In [6]:
lab.feed

{'title': 'Radar',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Radar'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://www.oreilly.com/radar'},
  {'rel': 'self',
   'type': 'application/rss+xml',
   'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
  {'rel': 'hub',
   'href': 'http://pubsubhubbub.appspot.com/',
   'type': 'text/html'}],
 'link': 'https://www.oreilly.com/radar',
 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
 'subtitle_detail': {'type': 'text/html',
  'language': None,
  'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
  'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
 'updated': 'Mon, 11 Nov 2019 14:18:02 +0000',
 'updated_parsed': time.struct_time(tm_year=2019, tm_mon=11, tm_mday=11, tm_hour=14, tm_min=18,

### 4. Extract and print the feed title, subtitle, author, and link.

In [7]:
print(lab.feed.title)
print(lab.feed.subtitle)
#print(lab.feed.subtitle.author)
print(lab.feed.link)
#print(lab.feed.author)
# I think there is no author

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [8]:
len(lab.entries)

18

In [9]:
lab.entries[0].title

'Four short links: 11 November 2019'

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [10]:
lab.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [11]:
lst=[lab.entries[i].title for i in range(len(lab.entries))]
lst

['Four short links: 11 November 2019',
 'Bitcoin and the disruption of monetary oppression',
 'Four short links: 8 November 2019',
 'Highlights from the O’Reilly Software Architecture Conference in Berlin 2019',
 'Highlights from the O’Reilly Velocity Conference in Berlin 2019',
 'From the trenches: Patrick Kua',
 '5 things Go taught me about open source?',
 'Building high-performing engineering teams, one pixel at a time',
 'How to deploy infrastructure in just 13.8 billion years',
 'Controlled chaos: The inevitable marriage of DevOps and security',
 'The ultimate guide to complicated systems',
 'Cognitive biases in the architect’s life',
 'The three-headed dog: Architecture, process, structure',
 'A world of deepfakes',
 'Radar trends to watch: November 2019',
 'Four short links: 7 November 2019',
 'Modern machine learning architectures: Data and hardware and platform, oh my',
 'The new norms of cloud native']

### 8. Calculate the percentage of "Four short links" entry titles.

In [28]:
fst=[re.findall('Four short links:\w*',i) for i in lst]
fst=[i for i in fst if i !=[]]

In [29]:
len(fst)/len(lst)*100

16.666666666666664

### 9. Create a Pandas data frame from the feed's entries.

In [30]:
import pandas as pd

In [31]:
df = pd.DataFrame(lab.entries)
df

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 11 November 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 11 Nov 2019 05:01:56 +0000","(2019, 11, 11, 5, 1, 56, 0, 315, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=10811,False,WebAssembly.sh an open source terminal that us...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Bitcoin and the disruption of monetary oppression,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/bitcoin-and-the-...,"Fri, 08 Nov 2019 05:10:27 +0000","(2019, 11, 8, 5, 10, 27, 4, 312, 0)",[{'name': 'Jenn Webb'}],Jenn Webb,{'name': 'Jenn Webb'},"[{'term': 'Innovation & Disruption', 'scheme':...",https://www.oreilly.com/radar/?p=9571,False,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/bitcoin-and-the-...,0,https://www.oreilly.com/radar/bitcoin-and-the-...
2,Four short links: 8 November 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 08 Nov 2019 05:01:04 +0000","(2019, 11, 8, 5, 1, 4, 4, 312, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=10775,False,Probabilistic Scripts for Automating Common-Se...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,Highlights from the O’Reilly Software Architec...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/highlights-from-...,"Thu, 07 Nov 2019 20:10:44 +0000","(2019, 11, 7, 20, 10, 44, 3, 311, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10569,False,Experts from across the software architecture ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,0,https://www.oreilly.com/radar/highlights-from-...
4,Highlights from the O’Reilly Velocity Conferen...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/highlights-from-...,"Thu, 07 Nov 2019 20:09:07 +0000","(2019, 11, 7, 20, 9, 7, 3, 311, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10577,False,People from across the cloud native and distri...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,0,https://www.oreilly.com/radar/highlights-from-...
5,From the trenches: Patrick Kua,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/from-the-trenche...,"Thu, 07 Nov 2019 20:00:48 +0000","(2019, 11, 7, 20, 0, 48, 3, 311, 0)",[{'name': 'Patrick Kua and Neal Ford'}],Patrick Kua and Neal Ford,{'name': 'Patrick Kua and Neal Ford'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10503,False,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/from-the-trenche...,0,https://www.oreilly.com/radar/from-the-trenche...
6,5 things Go taught me about open source?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/5-things-go-taug...,"Thu, 07 Nov 2019 20:00:38 +0000","(2019, 11, 7, 20, 0, 38, 3, 311, 0)",[{'name': 'Dave Cheney'}],Dave Cheney,{'name': 'Dave Cheney'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10549,False,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/5-things-go-taug...,0,https://www.oreilly.com/radar/5-things-go-taug...
7,"Building high-performing engineering teams, on...","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/building-high-pe...,"Thu, 07 Nov 2019 20:00:35 +0000","(2019, 11, 7, 20, 0, 35, 3, 311, 0)",[{'name': 'Lena Reinhard'}],Lena Reinhard,{'name': 'Lena Reinhard'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10556,False,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/building-high-pe...,0,https://www.oreilly.com/radar/building-high-pe...
8,How to deploy infrastructure in just 13.8 bill...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/how-to-deploy-in...,"Thu, 07 Nov 2019 20:00:34 +0000","(2019, 11, 7, 20, 0, 34, 3, 311, 0)",[{'name': 'Ingrid Burrington'}],Ingrid Burrington,{'name': 'Ingrid Burrington'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10536,False,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/how-to-deploy-in...,0,https://www.oreilly.com/radar/how-to-deploy-in...
9,Controlled chaos: The inevitable marriage of D...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/controlled-chaos...,"Thu, 07 Nov 2019 20:00:30 +0000","(2019, 11, 7, 20, 0, 30, 3, 311, 0)",[{'name': 'Kelly Shortridge'}],Kelly Shortridge,{'name': 'Kelly Shortridge'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=10561,False,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/controlled-chaos...,0,https://www.oreilly.com/radar/controlled-chaos...


### 10. Count the number of entries per author and sort them in descending order.

In [32]:
df[["author","title"]].groupby('author').count().sort_values(by="title",ascending=False)

Unnamed: 0_level_0,title
author,Unnamed: 1_level_1
Nat Torkington,3
Mac Slocum,2
Allen Holub,1
Ben Lorica and Mike Loukides,1
Birgitta Boeckeler,1
Brian Sletten,1
Cheryl Hung,1
Dave Cheney,1
Ingrid Burrington,1
Jenn Webb,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [39]:
lenght = [len(df['title'][i]) for i in range(len(df['title']))]
df['lenght']=lenght
df[['author','title','lenght']].sort_values('lenght',ascending=False)

Unnamed: 0,author,title,lenght
16,Brian Sletten,Modern machine learning architectures: Data an...,76
3,Mac Slocum,Highlights from the O’Reilly Software Architec...,76
9,Kelly Shortridge,Controlled chaos: The inevitable marriage of D...,64
4,Mac Slocum,Highlights from the O’Reilly Velocity Conferen...,63
7,Lena Reinhard,"Building high-performing engineering teams, on...",63
8,Ingrid Burrington,How to deploy infrastructure in just 13.8 bill...,55
12,Allen Holub,"The three-headed dog: Architecture, process, s...",54
1,Jenn Webb,Bitcoin and the disruption of monetary oppression,49
10,Jennifer Davis,The ultimate guide to complicated systems,41
6,Dave Cheney,5 things Go taught me about open source?,40


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [47]:
summ=df['summary']
summ

0     WebAssembly.sh an open source terminal that us...
1     In this interview from O&#8217;Reilly Foo Camp...
2     Probabilistic Scripts for Automating Common-Se...
3     Experts from across the software architecture ...
4     People from across the cloud native and distri...
5     This is a keynote highlight from the O&#8217;R...
6     This is a keynote highlight from the O&#8217;R...
7     This is a keynote highlight from the O&#8217;R...
8     This is a keynote highlight from the O&#8217;R...
9     This is a keynote highlight from the O&#8217;R...
10    This is a keynote highlight from the O&#8217;R...
11    This is a keynote highlight from the O&#8217;R...
12    This is a keynote highlight from the O&#8217;R...
13    Deepfakes have been very much in the news for ...
14    5G trends 5G networks get so much commentary t...
15    DNS Wars &#8212; But perhaps the position make...
16    This is a keynote highlight from the O&#8217;R...
17    This is a keynote highlight from the O&#82

In [45]:
ml=[i for i in summ if re.findall('\w*[Mm]achine [Ll]earning\w*',i)]
ml

[]

In [48]:
ml2=[i for i in lst if re.findall('\w*[Mm]achine [Ll]earning\w*',i)]
ml2

['Modern machine learning architectures: Data and hardware and platform, oh my']