# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [67]:
import feedparser
import re

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [6]:
feeds=feedparser.parse('http://feeds.feedburner.com/oreilly/radar/atom')

### 2. Obtain a list of components (keys) that are available for this feed.

In [7]:
feeds.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [8]:
feeds.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [16]:
print(feeds.feed.title, feeds.feed.subtitle)


Radar Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology


[{'title': 'Low-Code and the Democratization of Programming',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Low-Code and the Democratization of Programming'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'https://www.oreilly.com/radar/low-code-and-the-democratization-of-programming/'}],
  'link': 'https://www.oreilly.com/radar/low-code-and-the-democratization-of-programming/',
  'comments': 'https://www.oreilly.com/radar/low-code-and-the-democratization-of-programming/#respond',
  'published': 'Tue, 16 Nov 2021 12:36:18 +0000',
  'published_parsed': time.struct_time(tm_year=2021, tm_mon=11, tm_mday=16, tm_hour=12, tm_min=36, tm_sec=18, tm_wday=1, tm_yday=320, tm_isdst=0),
  'authors': [{}],
  'author': '',
  'tags': [{'term': 'Programming', 'scheme': None, 'label': None},
   {'term': 'Signals', 'scheme': None, 'label': None}],
  'id': 'https://www.oreilly.com/radar/?p=1408

### 5. Count the number of entries that are contained in this RSS feed.

In [30]:
number_entries=len(feeds.entries)
print(number_entries)

60


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [21]:
feeds.entries[1].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [20]:
feeds.entries[1].title

'Remote Teams in ML/AI'

### 8. Calculate the percentage of "Four short links" entry titles.

In [31]:
(len([entrie.title for entrie in feeds.entries if entrie.title.startswith("Four")])/number_entries)*100

20.0

### 9. Create a Pandas data frame from the feed's entries.

In [22]:
import pandas as pd

In [25]:
df = pd.DataFrame(feeds.entries)
df.head(10)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,author_detail
0,Low-Code and the Democratization of Programming,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/low-code-and-the...,https://www.oreilly.com/radar/low-code-and-the...,"Tue, 16 Nov 2021 12:36:18 +0000","(2021, 11, 16, 12, 36, 18, 1, 320, 0)",[{}],,"[{'term': 'Programming', 'scheme': None, 'labe...",https://www.oreilly.com/radar/?p=14083,False,"In the past decade, the growth in low-code and...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/low-code-and-the...,0,
1,Remote Teams in ML/AI,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/remote-teams-in-...,https://www.oreilly.com/radar/remote-teams-in-...,"Tue, 09 Nov 2021 14:05:48 +0000","(2021, 11, 9, 14, 5, 48, 1, 313, 0)",[{'name': 'Q McCallum'}],Q McCallum,"[{'term': 'Building a data culture', 'scheme':...",https://www.oreilly.com/radar/?p=14075,False,I&#8217;m well-versed in the ups and downs of ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/remote-teams-in-...,0,{'name': 'Q McCallum'}
2,Radar trends to watch: November 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 02 Nov 2021 11:40:17 +0000","(2021, 11, 2, 11, 40, 17, 1, 306, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14066,False,While October’s news was dominated by Facebook...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,{'name': 'Mike Loukides'}
3,The Sobering Truth About the Impact of Your Bu...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-sobering-tru...,https://www.oreilly.com/radar/the-sobering-tru...,"Tue, 26 Oct 2021 13:07:58 +0000","(2021, 10, 26, 13, 7, 58, 1, 299, 0)","[{'name': 'Eric Colson, Daragh Sibley and Dave...","Eric Colson, Daragh Sibley and Dave Spiegel","[{'term': 'Business', 'scheme': None, 'label':...",https://www.oreilly.com/radar/?p=14041,False,The introduction of data science into the busi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-sobering-tru...,0,"{'name': 'Eric Colson, Daragh Sibley and Dave ..."
4,MLOps and DevOps: Why Data Makes It Different,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/mlops-and-devops...,https://www.oreilly.com/radar/mlops-and-devops...,"Tue, 19 Oct 2021 14:17:38 +0000","(2021, 10, 19, 14, 17, 38, 1, 292, 0)",[{'name': 'Ville Tuulos and Hugo Bowne-Anderso...,Ville Tuulos and Hugo Bowne-Anderson,"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14018,False,Much has been written about struggles of deplo...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/mlops-and-devops...,0,{'name': 'Ville Tuulos and Hugo Bowne-Anderson'}
5,The Quality of Auto-Generated Code,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-quality-of-a...,https://www.oreilly.com/radar/the-quality-of-a...,"Tue, 12 Oct 2021 13:45:10 +0000","(2021, 10, 12, 13, 45, 10, 1, 285, 0)",[{'name': 'Mike Loukides and Kevlin Henney'}],Mike Loukides and Kevlin Henney,"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14007,False,Kevlin Henney and I were riffing on some ideas...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-quality-of-a...,0,{'name': 'Mike Loukides and Kevlin Henney'}
6,Radar trends to watch: October 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 05 Oct 2021 11:42:52 +0000","(2021, 10, 5, 11, 42, 52, 1, 278, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14000,False,The unwilling star of this month’s trends is c...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,{'name': 'Mike Loukides'}
7,Ethical Social Media: Oxymoron or Attainable G...,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/ethical-social-m...,https://www.oreilly.com/radar/ethical-social-m...,"Tue, 21 Sep 2021 11:55:27 +0000","(2021, 9, 21, 11, 55, 27, 1, 264, 0)",[{'name': 'Mike Barlow'}],Mike Barlow,"[{'term': 'Social Media', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13981,False,Humans have wrestled with ethics for millennia...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/ethical-social-m...,0,{'name': 'Mike Barlow'}
8,2021 Data/AI Salary Survey,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/2021-data-ai-sal...,https://www.oreilly.com/radar/2021-data-ai-sal...,"Wed, 15 Sep 2021 11:32:26 +0000","(2021, 9, 15, 11, 32, 26, 2, 258, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13950,False,"In June 2021, we asked the recipients of our&#...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/2021-data-ai-sal...,0,{'name': 'Mike Loukides'}
9,Radar trends to watch: September 2021,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Wed, 01 Sep 2021 12:18:33 +0000","(2021, 9, 1, 12, 18, 33, 2, 244, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13943,False,Let’s start with a moment of silence for O’Rei...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,{'name': 'Mike Loukides'}


### 10. Count the number of entries per author and sort them in descending order.

In [40]:
df.author.value_counts()

Mike Loukides                                  27
Nat Torkington                                 12
                                                4
Chris Butler                                    3
Tim O’Reilly                                    3
Q McCallum                                      2
Hugo Bowne-Anderson                             1
Shayan Mohanty and Hugo Bowne-Anderson          1
Eric Colson, Daragh Sibley and Dave Spiegel     1
Mike Barlow                                     1
Ville Tuulos and Hugo Bowne-Anderson            1
Kevlin Henney                                   1
Nitesh Dhanjani                                 1
Mike Loukides and Kevlin Henney                 1
Patrick Hall and Ayoub Ouederni                 1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [62]:
df['length']= [len(titulo) for titulo in df.title]
df_return=df[['title', 'author', 'length']]
df_return=df_return.sort_values("length", ascending=False)

### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [113]:
list_ml=[]
for i in range(1,60):
    if re.search(r"machine learning", df.summary[i])!=None:
        list_ml.append(df.title[i])    
#no es la solución más elegante pero funciona :p