# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
feedburner = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
feedburner.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
feedburner['feed'].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
feedburner['feed'].title

'Radar'

In [7]:
feedburner['feed'].subtitle

'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'

In [8]:
#there is no author, i'm not sure if this is what you mean
feedburner['feed'].feedburner_feedburnerhostname

'https://feedburner.google.com'

In [9]:
feedburner['feed'].link

'https://www.oreilly.com/radar'

### 5. Count the number of entries that are contained in this RSS feed.

In [10]:
len(feedburner.entries)

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [11]:
feedburner.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [12]:
entry_titles = [e.title for e in feedburner.entries]

### 8. Calculate the percentage of "Four short links" entry titles.

In [13]:
len(list(filter(lambda x: 'four short links' in x.lower(), entry_titles))) / 60 * 100

53.333333333333336

### 9. Create a Pandas data frame from the feed's entries.

In [14]:
import pandas as pd

In [15]:
df = pd.DataFrame(feedburner.entries)
df.head(3)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,"InfoTribes, Reality Brokers","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/infotribes-reali...,"Tue, 23 Mar 2021 14:40:55 +0000","(2021, 3, 23, 14, 40, 55, 1, 82, 0)",[{'name': 'Hugo Bowne-Anderson'}],Hugo Bowne-Anderson,{'name': 'Hugo Bowne-Anderson'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13687,False,It seems harder than ever to agree with others...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/infotribes-reali...,0,https://www.oreilly.com/radar/infotribes-reali...
1,The End of Silicon Valley as We Know It?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-end-of-silic...,"Thu, 11 Mar 2021 17:22:01 +0000","(2021, 3, 11, 17, 22, 1, 3, 70, 0)",[{'name': 'Tim O’Reilly'}],Tim O’Reilly,{'name': 'Tim O’Reilly'},"[{'term': 'Radar Column', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13665,False,"High-profile entrepreneurs like Elon Musk, ven...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-end-of-silic...,0,https://www.oreilly.com/radar/the-end-of-silic...
2,The Next Generation of AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-next-generat...,"Tue, 09 Mar 2021 13:46:41 +0000","(2021, 3, 9, 13, 46, 41, 1, 68, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13676,False,Programs like AlphaZero and GPT-3 are massive ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-next-generat...,0,https://www.oreilly.com/radar/the-next-generat...


### 10. Count the number of entries per author and sort them in descending order.

In [16]:
df.groupby('author').count()['title'].sort_values(ascending=False)

author
Nat Torkington                                    31
Mike Loukides                                     16
                                                   4
Alex Castrounis                                    1
Hugo Bowne-Anderson                                1
Justin Norman and Mike Loukides                    1
Kevlin Henney                                      1
Matthew Rocklin and Hugo Bowne-Anderson            1
Patrick Hall and Ayoub Ouederni                    1
Q Ethan McCallum and Mike Loukides                 1
Q Ethan McCallum, Chris Butler and Shane Glynn     1
Tim O’Reilly                                       1
Name: title, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [17]:
df['title_length'] = df['title'].apply(lambda x: len(x))
df.head(3)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,title_length
0,"InfoTribes, Reality Brokers","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/infotribes-reali...,"Tue, 23 Mar 2021 14:40:55 +0000","(2021, 3, 23, 14, 40, 55, 1, 82, 0)",[{'name': 'Hugo Bowne-Anderson'}],Hugo Bowne-Anderson,{'name': 'Hugo Bowne-Anderson'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13687,False,It seems harder than ever to agree with others...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/infotribes-reali...,0,https://www.oreilly.com/radar/infotribes-reali...,27
1,The End of Silicon Valley as We Know It?,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-end-of-silic...,"Thu, 11 Mar 2021 17:22:01 +0000","(2021, 3, 11, 17, 22, 1, 3, 70, 0)",[{'name': 'Tim O’Reilly'}],Tim O’Reilly,{'name': 'Tim O’Reilly'},"[{'term': 'Radar Column', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13665,False,"High-profile entrepreneurs like Elon Musk, ven...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-end-of-silic...,0,https://www.oreilly.com/radar/the-end-of-silic...,40
2,The Next Generation of AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/the-next-generat...,"Tue, 09 Mar 2021 13:46:41 +0000","(2021, 3, 9, 13, 46, 41, 1, 68, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13676,False,Programs like AlphaZero and GPT-3 are massive ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-next-generat...,0,https://www.oreilly.com/radar/the-next-generat...,25


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [18]:
machine_learning_titles = list(filter(None, list((df.apply(lambda x: x['title'] if 'machine learning' in x['summary'].lower() else None, axis=1)).values)))

In [19]:
machine_learning_titles

['5 things on our data and AI radar for 2021',
 'Seven Legal Questions for Data Scientists',
 'Four short links: 21 August 2020']