# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [4]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [5]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [8]:
rss_parser = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [13]:
print(rss_parser.keys())

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [14]:
print(rss_parser.feed.keys())

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])


In [22]:
print(rss_parser.feed.link)

https://www.oreilly.com/radar


### 4. Extract and print the feed title, subtitle, author, and link.

In [34]:
rss_list=[rss_parser.feed.title,rss_parser.feed.subtitle,rss_parser.feed.subtitle,rss_parser.entries[0].authors[0].name,rss_parser.feed.link]
print(rss_list)

['Radar', 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'Nat Torkington', 'https://www.oreilly.com/radar']


### 5. Count the number of entries that are contained in this RSS feed.

In [41]:
print(len(rss_parser.entries))

60


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [46]:
print(rss_parser.entries[0].keys())

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])


### 7. Extract a list of entry titles.

In [49]:
titles = [rss_parser.entries[i].title for i in range(len(rss_parser.entries))]
print(titles)

['Four short links: 13 February 2020', 'The state of data quality in 2020', 'Four short links: 12 February 2020', 'Four short links: 11 February 2020', 'Four short links: 10 February 2020', 'Four short links: 7 February 2020', 'Radar trends to watch: February 2020', 'Four short links: 6 February 2020', 'Four short links: 5 February 2020', 'Four short links: 4 February 2020', 'AI meets operations', 'Four short links: 3 February 2020', 'Four short links: 31 January 2020', 'Four short links: 30 January 2020', 'Four short links: 29 January 2020', 'Four short links: 28 January 2020', 'Four short links: 27 January 2020', 'Four short links: 24 January 2020', 'Four short links: 23 January 2020', 'Four short links: 22 January 2020', 'Four short links: 21 January 2020', 'Four short links: 20 January 2020', 'Four short links: 17 January 2020', 'Four short links: 16 January 2020', 'Reinforcement learning for the real world', 'Four short links: 15 January 2020', 'Four short links: 14 January 2020',

### 8. Calculate the percentage of "Four short links" entry titles.

In [151]:
titles_fsl = [rss_parser.entries[i].title for i in range(len(rss_parser.entries)) if rss_parser.entries[i].title.find('Four short links')!=-1]
perc_fsl = len(titles_fsl)/len(titles)
print("{0:.2f}%".format(perc_fsl * 100))

78.33%


### 9. Create a Pandas data frame from the feed's entries.

In [62]:
import pandas as pd

In [89]:
columns = list(rss_parser.entries)
df = pd.DataFrame(rss_parser.entries)
df.head()

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11952,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 13 Feb 2020 05:01:00 +0000","(2020, 2, 13, 5, 1, 0, 3, 44, 0)",0,Ofcom To Regulate UK Internet &#8212; The regu...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 13 February 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
1,Roger Magoulas and Steve Swoyer,{'name': 'Roger Magoulas and Steve Swoyer'},[{'name': 'Roger Magoulas and Steve Swoyer'}],https://www.oreilly.com/radar/the-state-of-dat...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-state-of-dat...,False,https://www.oreilly.com/radar/?p=11549,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 12 Feb 2020 06:00:00 +0000","(2020, 2, 12, 6, 0, 0, 2, 43, 0)",0,We suspected that data quality was a topic bri...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",The state of data quality in 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/the-state-of-dat...
2,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11936,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 12 Feb 2020 05:01:00 +0000","(2020, 2, 12, 5, 1, 0, 2, 43, 0)",0,Drafting an Engineering Strategy (Mathias Meye...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 12 February 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
3,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11773,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 11 Feb 2020 05:01:00 +0000","(2020, 2, 11, 5, 1, 0, 1, 42, 0)",0,The Fate of Empires &#8212; 1977 text summariz...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 11 February 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
4,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11745,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 10 Feb 2020 05:01:00 +0000","(2020, 2, 10, 5, 1, 0, 0, 41, 0)",0,The Digital Dictators: How Technology Strength...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 10 February 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [92]:
df.groupby('author').size().reset_index(name='counts').sort_values(by='counts', ascending=False)

Unnamed: 0,author,counts
4,Nat Torkington,47
3,Mike Loukides,4
0,,2
2,Jenn Webb,2
1,Alison McCauley,1
5,Patrick Hall and Andrew Burt,1
6,Roger Magoulas,1
7,Roger Magoulas and Steve Swoyer,1
8,Zan McQuade and Amanda Quinn,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [105]:
df['title_length']=df.title.apply(lambda x:len(x))
df_out= df[['title','author','title_length']].sort_values(by='title_length', ascending=False)
df_out.head()

Unnamed: 0,title,author,title_length
54,5 industries that demonstrate how blockchains ...,Alison McCauley,63
57,Why you should care about debugging machine le...,Patrick Hall and Andrew Burt,59
27,Where programming languages are headed in 2020,Zan McQuade and Amanda Quinn,46
51,AI is computer science disguised as hard work,Jenn Webb,45
24,Reinforcement learning for the real world,Jenn Webb,41


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [149]:
df[df.summary.str.lower().str.contains('machine learning', regex=False)]
# df[df.summary.str.lower()find('machine learning')!=-1].title

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,...,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss,len,title_length
0,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11952,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",...,"(2020, 2, 13, 5, 1, 0, 3, 44, 0)",0,Ofcom To Regulate UK Internet &#8212; The regu...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 13 February 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,34,34
7,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11682,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",...,"(2020, 2, 6, 5, 1, 0, 3, 37, 0)",0,Assembler &#8212; Google&#8217;s Jigsaw group ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 6 February 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,33,33
15,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11581,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",...,"(2020, 1, 28, 5, 1, 0, 1, 28, 0)",0,TinyML Book &#8212; machine learning for embed...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 28 January 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,33,33
28,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=11495,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",...,"(2020, 1, 13, 5, 1, 0, 0, 13, 0)",0,Simulated Customer &#8212; The site will rando...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 13 January 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,33,33
57,Patrick Hall and Andrew Burt,{'name': 'Patrick Hall and Andrew Burt'},[{'name': 'Patrick Hall and Andrew Burt'}],https://www.oreilly.com/radar/why-you-should-c...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/why-you-should-c...,False,https://www.oreilly.com/radar/?p=11197,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",...,"(2019, 12, 12, 11, 0, 0, 3, 346, 0)",0,For all the excitement about machine learning ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Why you should care about debugging machine le...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/why-you-should-c...,59,59
