# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [6]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [7]:
data = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [8]:
data.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [22]:
data.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [32]:
data.feed.title

'Radar'

In [33]:
data.feed.subtitle

'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'

In [37]:
data.feed.feedburner_emailserviceid

'oreilly/radar/atom'

In [38]:
data.feed.link

'https://www.oreilly.com/radar'

### 5. Count the number of entries that are contained in this RSS feed.

In [45]:
titles = [data.entries[i].title for i in range(len(data.entries))]
print(titles)

['Four short links: 2 October 2019', 'Four short links: 1 October 2019', 'TinyML: The challenges and opportunities of low-power ML applications', 'Four short links: 30 September 2019', 'Highlights from the Strata Data Conference in New York 2019', 'Four short links: 27 September 2019', 'Data Science Pioneers: Conquering the next frontier, a documentary investigating the future of data science', 'Postrevolutionary big data: Promoting the general welfare', 'Say what? The ethical challenges of designing for humanlike interaction', 'RL in real life: Bringing reinforcement learning to the enterprise', 'Strata Data Awards winners 2019', 'Staying safe in the AI era', 'Data sonification: Making music from the yield curve', 'Unlocking the value of your data', 'Four Short Links: 26 September 2019', 'The future of Google Cloud data processing', 'Interactive sports analytics', 'AI isn’t magic. It’s computer science.']


In [46]:
print("Number of entries: ",len(titles))

Number of entries:  18


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [44]:
data.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [47]:
print(titles)

['Four short links: 2 October 2019', 'Four short links: 1 October 2019', 'TinyML: The challenges and opportunities of low-power ML applications', 'Four short links: 30 September 2019', 'Highlights from the Strata Data Conference in New York 2019', 'Four short links: 27 September 2019', 'Data Science Pioneers: Conquering the next frontier, a documentary investigating the future of data science', 'Postrevolutionary big data: Promoting the general welfare', 'Say what? The ethical challenges of designing for humanlike interaction', 'RL in real life: Bringing reinforcement learning to the enterprise', 'Strata Data Awards winners 2019', 'Staying safe in the AI era', 'Data sonification: Making music from the yield curve', 'Unlocking the value of your data', 'Four Short Links: 26 September 2019', 'The future of Google Cloud data processing', 'Interactive sports analytics', 'AI isn’t magic. It’s computer science.']


### 8. Calculate the percentage of "Four short links" entry titles.

In [55]:
count=0
for title in titles:
    title=title.lower()
    if "four short links" in title:
        count+=1
print(count)

5


In [73]:
percentage=round(count/len(title)*100,2) #arredondando para 2 casas decimais

print("Percentage:",percentage,"%.")

Percentage: 13.16 %.


### 9. Create a Pandas data frame from the feed's entries.

In [74]:
import pandas as pd

In [76]:
df = pd.DataFrame(data.entries)
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 2 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 02 Oct 2019 04:01:09 +0000","(2019, 10, 2, 4, 1, 9, 2, 275, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9832,False,Data Fallacies to Avoid &#8212; nifty infograp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Four short links: 1 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 01 Oct 2019 04:05:11 +0000","(2019, 10, 1, 4, 5, 11, 1, 274, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9792,False,Just Enough Research &#8212; a book that comes...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
2,TinyML: The challenges and opportunities of lo...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/tinyml-the-chall...,"Tue, 01 Oct 2019 04:01:53 +0000","(2019, 10, 1, 4, 1, 53, 1, 274, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': '~home', 'scheme': None, 'label': No...",https://www.oreilly.com/radar/?p=9378,False,Pete Warden has an ambitious goal: he wants to...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...,0,https://www.oreilly.com/radar/tinyml-the-chall...
3,Four short links: 30 September 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 30 Sep 2019 04:01:40 +0000","(2019, 9, 30, 4, 1, 40, 0, 273, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9768,False,Stamos on CLOUD Act — cogent and informative s...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
4,Highlights from the Strata Data Conference in ...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/highlights-from-...,"Fri, 27 Sep 2019 12:00:52 +0000","(2019, 9, 27, 12, 0, 52, 4, 270, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=9487,False,People from across the data world came togethe...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,0,https://www.oreilly.com/radar/highlights-from-...


### 10. Count the number of entries per author and sort them in descending order.

In [77]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
0,Mac Slocum,16
1,jwebb@oreilly.com,2


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [82]:
[len(tit) for tit in df["title"]]

[32, 32, 69, 35, 59, 35, 107, 57, 71, 66, 31, 26, 52, 32, 35, 42, 28, 38]

In [86]:
df["length"]=[len(tit) for tit in df["title"]]

In [94]:
df2 = df[['author', 'title', 'length']]
df2.sort_values('length', ascending=False)

Unnamed: 0,author,title,length
6,Mac Slocum,Data Science Pioneers: Conquering the next fro...,107
8,Mac Slocum,Say what? The ethical challenges of designing ...,71
2,Mac Slocum,TinyML: The challenges and opportunities of lo...,69
9,Mac Slocum,RL in real life: Bringing reinforcement learni...,66
4,Mac Slocum,Highlights from the Strata Data Conference in ...,59
7,Mac Slocum,Postrevolutionary big data: Promoting the gene...,57
12,Mac Slocum,Data sonification: Making music from the yield...,52
15,Mac Slocum,The future of Google Cloud data processing,42
17,Mac Slocum,AI isn’t magic. It’s computer science.,38
5,Mac Slocum,Four short links: 27 September 2019,35


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [111]:
df["summary"]=[y.lower() for y in df["summary"]]

In [113]:
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length
0,Four short links: 2 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 02 Oct 2019 04:01:09 +0000","(2019, 10, 2, 4, 1, 9, 2, 275, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9832,False,data fallacies to avoid &#8212; nifty infograp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,32
1,Four short links: 1 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 01 Oct 2019 04:05:11 +0000","(2019, 10, 1, 4, 5, 11, 1, 274, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9792,False,just enough research &#8212; a book that comes...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,32
2,TinyML: The challenges and opportunities of lo...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/tinyml-the-chall...,"Tue, 01 Oct 2019 04:01:53 +0000","(2019, 10, 1, 4, 1, 53, 1, 274, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': '~home', 'scheme': None, 'label': No...",https://www.oreilly.com/radar/?p=9378,False,pete warden has an ambitious goal: he wants to...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...,0,https://www.oreilly.com/radar/tinyml-the-chall...,69
3,Four short links: 30 September 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 30 Sep 2019 04:01:40 +0000","(2019, 9, 30, 4, 1, 40, 0, 273, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9768,False,stamos on cloud act — cogent and informative s...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,35
4,Highlights from the Strata Data Conference in ...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/highlights-from-...,"Fri, 27 Sep 2019 12:00:52 +0000","(2019, 9, 27, 12, 0, 52, 4, 270, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=9487,False,people from across the data world came togethe...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,0,https://www.oreilly.com/radar/highlights-from-...,59


In [125]:
df[df["summary"].str.contains("machine learning")]["title"].values

array(['TinyML: The challenges and opportunities of low-power ML applications'], dtype=object)

In [126]:
df["summary"].str.contains("machine learning")

0     False
1     False
2      True
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
Name: summary, dtype: bool

In [129]:
df[df["summary"].str.contains("machine learning")]

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,length
2,TinyML: The challenges and opportunities of lo...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/tinyml-the-chall...,"Tue, 01 Oct 2019 04:01:53 +0000","(2019, 10, 1, 4, 1, 53, 1, 274, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': '~home', 'scheme': None, 'label': No...",https://www.oreilly.com/radar/?p=9378,False,pete warden has an ambitious goal: he wants to...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...,0,https://www.oreilly.com/radar/tinyml-the-chall...,69


In [107]:
b=[y for y in a if "machine learning" in y]

In [108]:
b

['pete warden has an ambitious goal: he wants to build machine learning (ml) applications that can run on a microcontroller for a year using only a hearing aid battery for power. this goal means that the system&#8217;s power consumption has to be under a milliwatt, ideally a few tens of microwatts. this power level places [&#8230;]']