# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [2]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [4]:
oreilly = feedparser.parse(url)



### 2. Obtain a list of components (keys) that are available for this feed.

In [6]:
oreilly_components = list(oreilly.keys())
print(oreilly_components)

['bozo', 'entries', 'feed', 'headers', 'href', 'status', 'encoding', 'version', 'namespaces']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [14]:
oreilly_feed_components = list(oreilly.feed.keys())
print(oreilly_feed_components)

['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator']
{'title': 'Radar', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Radar'}, 'links': [{'href': 'https://www.oreilly.com/radar/feed/', 'rel': 'self', 'type': 'application/rss+xml'}, {'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.oreilly.com/radar'}], 'link': 'https://www.oreilly.com/radar', 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'}, 'updated': 'Tue, 16 Aug 2022 15:37:30 +0000', 'updated_parsed': time.struct_time(tm_

### 4. Extract and print the feed title, subtitle, author, and link.

In [15]:
# WARNING: THERE'S NO 'AUTHOR' COMPONENT IN FEED

oreilly_feed_components_selected = (oreilly.feed.title, oreilly.feed.subtitle, oreilly.feed.link)
print(oreilly_feed_components_selected)

('Radar', 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'https://www.oreilly.com/radar')


### 5. Count the number of entries that are contained in this RSS feed.

In [19]:
print(len(oreilly.entries))

15


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [20]:
entry_components = list(oreilly.entries[0].keys())
print(entry_components)

['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments']


### 7. Extract a list of entry titles.

In [21]:
entry_titles = [entry.title for entry in oreilly.entries]
print(entry_titles)

['Ad Networks and Content Marketing', 'On Technique', 'Scaling False Peaks', 'The Metaverse Is Not a Place', 'Radar Trends to Watch: August 2022', 'SQL: The Universal Solvent for REST APIs', 'Artificial Creativity?', 'Radar Trends to Watch: July 2022', '2022 Cloud Salary Survey', '“Sentience” is the Wrong Question', 'Closer to AGI?', 'Radar Trends to Watch: June 2022', 'Building a Better Middleman', 'Quantum Computing without the Hype', 'Radar trends to watch: May 2022']


### 8. Calculate the percentage of "Four short links" entry titles.

In [27]:
# WARNING, THERE ARE NO "FOUR SHORT LINKS" TITLES. CALCULATING PERCENTAGE OF "TRENDS TO WATCH" TITLES INSTEAD

counted_titles = [entry_title for entry_title in entry_titles if "Radar trends".lower() in entry_title.lower()]
percentage_titles = (len(counted_titles)/len(entry_titles))*100
print(percentage_titles)


['Radar Trends to Watch: August 2022', 'Radar Trends to Watch: July 2022', 'Radar Trends to Watch: June 2022', 'Radar trends to watch: May 2022']
26.666666666666668


### 9. Create a Pandas data frame from the feed's entries.

In [28]:
import pandas as pd

In [29]:
entries_df = pd.DataFrame(oreilly.entries)

entries_df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,Ad Networks and Content Marketing,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/ad-networks-and-...,https://www.oreilly.com/radar/ad-networks-and-...,"Tue, 16 Aug 2022 11:21:21 +0000","(2022, 8, 16, 11, 21, 21, 1, 228, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'Operations', 'scheme': None, 'label...",https://www.oreilly.com/radar/?p=14688,False,"In a recent Radar piece, I explored N-sided ma...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/ad-networks-and-...,0
1,On Technique,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/on-technique/,https://www.oreilly.com/radar/on-technique/#re...,"Tue, 09 Aug 2022 11:12:22 +0000","(2022, 8, 9, 11, 12, 22, 1, 221, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14669,False,"In a previous article, I wrote about how model...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/on-technique/feed/,0
2,Scaling False Peaks,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/scaling-false-pe...,https://www.oreilly.com/radar/scaling-false-pe...,"Thu, 04 Aug 2022 11:12:44 +0000","(2022, 8, 4, 11, 12, 44, 3, 216, 0)",[{'name': 'Kevlin Henney'}],Kevlin Henney,{'name': 'Kevlin Henney'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=14661,False,Humans are notoriously poor at judging distanc...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/scaling-false-pe...,0
3,The Metaverse Is Not a Place,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-metaverse-is...,https://www.oreilly.com/radar/the-metaverse-is...,"Tue, 02 Aug 2022 18:38:46 +0000","(2022, 8, 2, 18, 38, 46, 1, 214, 0)",[{'name': 'Tim O’Reilly'}],Tim O’Reilly,{'name': 'Tim O’Reilly'},"[{'term': 'Metaverse', 'scheme': None, 'label'...",https://www.oreilly.com/radar/?p=14641,False,The metaphors we use to describe new technolog...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-metaverse-is...,0
4,Radar Trends to Watch: August 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 02 Aug 2022 11:18:24 +0000","(2022, 8, 2, 11, 18, 24, 1, 214, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14631,False,The large model train keeps rolling on. This m...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0


### 10. Count the number of entries per author and sort them in descending order.

In [33]:
print(entries_df["author"].value_counts(ascending = False))

Mike Loukides    10
Q McCallum        2
Kevlin Henney     1
Tim O’Reilly      1
Jon Udell         1
Name: author, dtype: int64


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [36]:
entries_df["length"] = entries_df["title"].apply(len)

entries_df[['title', 'author', 'length']].sort_values('length', ascending=False).head()



Unnamed: 0,title,author,length
5,SQL: The Universal Solvent for REST APIs,Jon Udell,40
4,Radar Trends to Watch: August 2022,Mike Loukides,34
13,Quantum Computing without the Hype,Mike Loukides,34
0,Ad Networks and Content Marketing,Q McCallum,33
9,“Sentience” is the Wrong Question,Mike Loukides,33


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [55]:
machine_learning_entries = entries_df[entries_df["summary"].str.contains("DALL-E")]

machine_learning_entries_list = machine_learning_entries.values.tolist()

print(machine_learning_entries_list)




