# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [30]:
import feedparser
import regex as re

### 1. Use feedparser to parse the following RSS feed URL.

In [4]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [5]:
feedburner = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [6]:
list(feedburner.keys())

['feed',
 'entries',
 'bozo',
 'headers',
 'etag',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [7]:
list(feedburner['feed'].keys())

['title',
 'title_detail',
 'links',
 'link',
 'subtitle',
 'subtitle_detail',
 'updated',
 'updated_parsed',
 'language',
 'sy_updateperiod',
 'sy_updatefrequency',
 'generator_detail',
 'generator',
 'feedburner_info',
 'geo_lat',
 'geo_long',
 'feedburner_emailserviceid',
 'feedburner_feedburnerhostname']

### 4. Extract and print the feed title, subtitle, author, and link.

In [8]:
for i in ['title', 'subtitle', 'link']:
    print(f'{i}: {feedburner["feed"][i]}')

title: Radar
subtitle: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
link: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [13]:
len([i for i in feedburner.entries])


60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [23]:
feedburner.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [33]:
titles = [i["title"] for i in feedburner.entries

### 8. Calculate the percentage of "Four short links" entry titles.

In [51]:
longitud_total = len(titles)

longitud_short_links = len([i for i in titles if re.findall("four short links", i.lower())])

porcentaje = float((longitud_short_links / longitud_total))

porcentaje

0.5333333333333333

### 9. Create a Pandas data frame from the feed's entries.

In [52]:
import pandas as pd

In [95]:
entries_df = pd.DataFrame(feedburner.entries)

### 10. Count the number of entries per author and sort them in descending order.

In [79]:
count = entries_df[["title","author"]].groupby("author").count().sort_values("title", ascending=False)

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [89]:
entries_df["length_title"] = entries_df["title"].apply(lambda x: len(x))

entries_df_length = entries_df[["title", "author", "length_title"]].sort_values("length_title", ascending= False)

### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [105]:
machine_learning_list = [i for i in entries_df["summary"].apply(lambda x: x if re.findall("machine learning", x) else "").tolist() if i != ""]
