# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
RSS = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
RSS.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
RSS_feed = RSS.feed
RSS_feed.keys()

dict_keys(['title', 'title_detail', 'id', 'guidislink', 'link', 'updated', 'updated_parsed', 'subtitle', 'subtitle_detail', 'links', 'authors', 'author_detail', 'author', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [7]:
title = RSS_feed.title
print(title)
subtitle = RSS_feed.subtitle
print(subtitle)
author = RSS_feed.author
print(author)
link = RSS_feed.link
print(link)

All - O'Reilly Media
All of our Ideas and Learning material from all of our topics.
O'Reilly Media
https://www.oreilly.com


### 5. Count the number of entries that are contained in this RSS feed.

In [11]:
len(RSS.entries)

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [12]:
RSS.entries[0].keys()

dict_keys(['title', 'title_detail', 'updated', 'updated_parsed', 'id', 'guidislink', 'link', 'content', 'summary', 'links', 'authors', 'author_detail', 'author', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [13]:
titles = [] 
for i in range(len(RSS.entries)):
    titles.append(RSS.entries[i].title)
titles

['Four short links: 17 July 2019',
 'Four short links: 16 July 2019',
 'Managing machine learning in the enterprise: Lessons from banking and health care',
 'Four short links: 15 July 2019',
 'Four short links: 12 July 2019',
 'Four short links: 11 July 2019',
 'Four short links: 10 July 2019',
 'Four short links: 9 July 2019',
 'The circle of fairness',
 "Highlights from the O'Reilly Artificial Intelligence Conference in Beijing 2019",
 'The future of hiring and the talent market with AI',
 'The future of machine learning is tiny',
 'Designing computer hardware for artificial intelligence',
 'Data orchestration for AI, big data, and cloud',
 'Top AI breakthroughs you need to know',
 'AI and systems at RISELab',
 'Toward learned algorithms, data structures, and systems',
 'AI and retail',
 'Four short links: 8 July 2019',
 'Four short links: 5 July 2019',
 'Four short links: 4 July 2019',
 'Tools for machine learning development',
 'New live online training courses',
 'Four short links

### 8. Calculate the percentage of "Four short links" entry titles.

In [21]:
count = 0
for title in titles:
    if title.startswith('Four short links'):
        count += 1
    else:
        continue
print(f'The percentage of four short link posts is: {(count/len(titles))*100}%')

The percentage of four short link posts is: 41.66666666666667%


### 9. Create a Pandas data frame from the feed's entries.

In [22]:
import pandas as pd

In [23]:
entries_df = pd.DataFrame(RSS.entries)
entries_df

Unnamed: 0,author,author_detail,authors,content,feedburner_origlink,guidislink,id,link,links,summary,title,title_detail,updated,updated_parsed
0,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-17:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Margaret Hamilton, WeChat Censorship, R...",Four short links: 17 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-17T08:00:00Z,"(2019, 7, 17, 8, 0, 0, 2, 198, 0)"
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-16:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Quantum TiqTaqToe, Social Media and Dep...",Four short links: 16 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-16T15:10:00Z,"(2019, 7, 16, 15, 10, 0, 1, 197, 0)"
2,"Ben Lorica, Harish Doddi, David Talby","{'name': 'Ben Lorica, Harish Doddi, David Talby'}","[{'name': 'Ben Lorica, Harish Doddi, David Tal...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/managing-machine...,True,"tag:www.oreilly.com,2019-07-15:/ideas/managing...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Managing machine learning in the enterprise: L...,"{'type': 'text/plain', 'language': None, 'base...",2019-07-15T11:00:00Z,"(2019, 7, 15, 11, 0, 0, 0, 196, 0)"
3,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-15:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Climbing Robot, Programming and Program...",Four short links: 15 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-15T08:00:00Z,"(2019, 7, 15, 8, 0, 0, 0, 196, 0)"
4,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-12:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Hosting Hate, Releasing, Government Inn...",Four short links: 12 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-12T10:50:00Z,"(2019, 7, 12, 10, 50, 0, 4, 193, 0)"
5,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-11:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Museum Copyright, Twitter Apprenticeshi...",Four short links: 11 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-11T12:10:00Z,"(2019, 7, 11, 12, 10, 0, 3, 192, 0)"
6,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-10:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Optimizations and Security, 512 Byte Pa...",Four short links: 10 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-10T12:45:00Z,"(2019, 7, 10, 12, 45, 0, 2, 191, 0)"
7,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/four-short-links...,True,"tag:www.oreilly.com,2019-07-09:/ideas/four-sho...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,"<p><em>Future of Work, GRANDstack, Hilarious L...",Four short links: 9 July 2019,"{'type': 'text/plain', 'language': None, 'base...",2019-07-09T11:40:00Z,"(2019, 7, 9, 11, 40, 0, 1, 190, 0)"
8,Mike Loukides,{'name': 'Mike Loukides'},[{'name': 'Mike Loukides'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/the-circle-of-fa...,True,"tag:www.oreilly.com,2019-07-09:/ideas/the-circ...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,The circle of fairness,"{'type': 'text/plain', 'language': None, 'base...",2019-07-09T11:00:00Z,"(2019, 7, 9, 11, 0, 0, 1, 190, 0)"
9,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/highlights-from-...,True,"tag:www.oreilly.com,2019-07-08:/ideas/highligh...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Highlights from the O'Reilly Artificial Intell...,"{'type': 'text/plain', 'language': None, 'base...",2019-07-08T15:51:00Z,"(2019, 7, 8, 15, 51, 0, 0, 189, 0)"


### 10. Count the number of entries per author and sort them in descending order.

In [25]:
authors = entries_df.groupby('author', as_index = False).agg({'title':'count'})#agg con diccionario de columna y método
authors = authors.sort_values('title', ascending=False)
authors

Unnamed: 0,author,title
22,Nat Torkington,25
3,Ben Lorica,4
13,Jenn Webb,3
4,"Ben Lorica, Harish Doddi, David Talby",2
0,Abigail Hing Wen,1
16,Maria Zhang,1
27,Tim Kraska,1
26,Rebecca Wirfs-Brock,1
25,"Rebecca Parsons, Neal Ford",1
24,Pete Warden,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [28]:
entries_11 = entries_df[['title', 'author']]
entries_11['title lenght'] = entries_11['title'].apply(lambda x: len(x))
entries_11

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,title,author,title lenght
0,Four short links: 17 July 2019,Nat Torkington,30
1,Four short links: 16 July 2019,Nat Torkington,30
2,Managing machine learning in the enterprise: L...,"Ben Lorica, Harish Doddi, David Talby",81
3,Four short links: 15 July 2019,Nat Torkington,30
4,Four short links: 12 July 2019,Nat Torkington,30
5,Four short links: 11 July 2019,Nat Torkington,30
6,Four short links: 10 July 2019,Nat Torkington,30
7,Four short links: 9 July 2019,Nat Torkington,29
8,The circle of fairness,Mike Loukides,22
9,Highlights from the O'Reilly Artificial Intell...,Jenn Webb,79


In [29]:
entries_11 = entries_11.sort_values('title lenght', ascending=False)
entries_11 = entries_11.reset_index()
entries_11

Unnamed: 0,index,title,author,title lenght
0,26,RISELab’s AutoPandas hints at automation tech ...,Ben Lorica,97
1,2,Managing machine learning in the enterprise: L...,"Ben Lorica, Harish Doddi, David Talby",81
2,47,Infrastructure first: Because solving complex ...,Everett Harper,81
3,9,Highlights from the O'Reilly Artificial Intell...,Jenn Webb,79
4,51,Highlights from the O'Reilly Software Architec...,Jenn Webb,78
5,36,Enabling end-to-end machine learning pipelines...,Ben Lorica,73
6,31,AI and machine learning will require retrainin...,Ben Lorica,72
7,58,Channel into the universe of eventually perfec...,Lena Hall,67
8,59,Scaling teams with technology (or is it the ot...,Chen Goldberg,62
9,43,Prioritizing technical debt as if time and mon...,Adam Tornhill,57


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [31]:
entries_ml = entries_df[entries_df['summary'].str.contains('machine learning')]
entries_ml

Unnamed: 0,author,author_detail,authors,content,feedburner_origlink,guidislink,id,link,links,summary,title,title_detail,updated,updated_parsed
2,"Ben Lorica, Harish Doddi, David Talby","{'name': 'Ben Lorica, Harish Doddi, David Talby'}","[{'name': 'Ben Lorica, Harish Doddi, David Tal...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/managing-machine...,True,"tag:www.oreilly.com,2019-07-15:/ideas/managing...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Managing machine learning in the enterprise: L...,"{'type': 'text/plain', 'language': None, 'base...",2019-07-15T11:00:00Z,"(2019, 7, 15, 11, 0, 0, 0, 196, 0)"
9,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/highlights-from-...,True,"tag:www.oreilly.com,2019-07-08:/ideas/highligh...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Highlights from the O'Reilly Artificial Intell...,"{'type': 'text/plain', 'language': None, 'base...",2019-07-08T15:51:00Z,"(2019, 7, 8, 15, 51, 0, 0, 189, 0)"
11,Pete Warden,{'name': 'Pete Warden'},[{'name': 'Pete Warden'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/the-future-of-ma...,True,"tag:www.oreilly.com,2019-07-08:/ideas/the-futu...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,The future of machine learning is tiny,"{'type': 'text/plain', 'language': None, 'base...",2019-07-08T15:50:00Z,"(2019, 7, 8, 15, 50, 0, 0, 189, 0)"
21,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/tools-for-machin...,True,"tag:www.oreilly.com,2019-07-03:/ideas/tools-fo...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Tools for machine learning development,"{'type': 'text/plain', 'language': None, 'base...",2019-07-03T13:35:00Z,"(2019, 7, 3, 13, 35, 0, 2, 184, 0)"
22,,,,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/new-live-online-...,True,"tag:www.oreilly.com,2019-07-03:/ideas/new-live...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,New live online training courses,"{'type': 'text/plain', 'language': None, 'base...",2019-07-03T11:20:00Z,"(2019, 7, 3, 11, 20, 0, 2, 184, 0)"
26,Ben Lorica,{'name': 'Ben Lorica'},[{'name': 'Ben Lorica'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/riselabs-autopan...,True,"tag:www.oreilly.com,2019-07-01:/ideas/riselabs...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,RISELab’s AutoPandas hints at automation tech ...,"{'type': 'text/plain', 'language': None, 'base...",2019-07-01T11:00:00Z,"(2019, 7, 1, 11, 0, 0, 0, 182, 0)"
31,Ben Lorica,{'name': 'Ben Lorica'},[{'name': 'Ben Lorica'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/ai-and-machine-l...,True,"tag:www.oreilly.com,2019-06-26:/ideas/ai-and-m...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,AI and machine learning will require retrainin...,"{'type': 'text/plain', 'language': None, 'base...",2019-06-26T11:00:00Z,"(2019, 6, 26, 11, 0, 0, 2, 177, 0)"
36,Ben Lorica,{'name': 'Ben Lorica'},[{'name': 'Ben Lorica'}],"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/enabling-end-to-...,True,"tag:www.oreilly.com,2019-06-20:/ideas/enabling...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,Enabling end-to-end machine learning pipelines...,"{'type': 'text/plain', 'language': None, 'base...",2019-06-20T11:50:00Z,"(2019, 6, 20, 11, 50, 0, 3, 171, 0)"
38,"Ben Lorica, Harish Doddi, David Talby","{'name': 'Ben Lorica, Harish Doddi, David Talby'}","[{'name': 'Ben Lorica, Harish Doddi, David Tal...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/what-are-model-g...,True,"tag:www.oreilly.com,2019-06-19:/ideas/what-are...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,What are model governance and model operations?,"{'type': 'text/plain', 'language': None, 'base...",2019-06-19T11:00:00Z,"(2019, 6, 19, 11, 0, 0, 2, 170, 0)"
40,"Ihab Ilyas, Ben Lorica","{'name': 'Ihab Ilyas, Ben Lorica'}","[{'name': 'Ihab Ilyas, Ben Lorica'}]","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/ideas/the-quest-for-hi...,True,"tag:www.oreilly.com,2019-06-18:/ideas/the-ques...",http://feedproxy.google.com/~r/oreilly/radar/a...,[{'href': 'http://feedproxy.google.com/~r/orei...,<p><img src='https://d3ucjech6zwjp8.cloudfront...,The quest for high-quality data,"{'type': 'text/plain', 'language': None, 'base...",2019-06-18T11:00:00Z,"(2019, 6, 18, 11, 0, 0, 1, 169, 0)"
