# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [70]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [71]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [72]:
feed = feedparser.parse(url)

In [73]:
print(feed)



### 2. Obtain a list of components (keys) that are available for this feed.

In [74]:
print(feed.keys())

dict_keys(['bozo', 'entries', 'feed', 'headers', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [75]:
print(feed.feed.keys())

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])


### 4. Extract and print the feed title, subtitle, author, and link.

In [76]:
print(feed.feed.title)
print(feed.feed.subtitle)
print(feed.feed.generator)
print(feed.feed.link)

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
https://wordpress.org/?v=5.3.14
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [77]:
print(len(feed.entries))

15


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [78]:
entry = feed.entries[2]
print(entry)

{'title': 'AI’s ‘SolarWinds Moment’ Will Occur; It’s Just a Matter of When', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'AI’s ‘SolarWinds Moment’ Will Occur; It’s Just a Matter of When'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.oreilly.com/radar/ais-solarwinds-moment-will-occur-its-just-a-matter-of-when/'}], 'link': 'https://www.oreilly.com/radar/ais-solarwinds-moment-will-occur-its-just-a-matter-of-when/', 'comments': 'https://www.oreilly.com/radar/ais-solarwinds-moment-will-occur-its-just-a-matter-of-when/#respond', 'published': 'Tue, 29 Nov 2022 12:36:46 +0000', 'published_parsed': time.struct_time(tm_year=2022, tm_mon=11, tm_mday=29, tm_hour=12, tm_min=36, tm_sec=46, tm_wday=1, tm_yday=333, tm_isdst=0), 'authors': [{'name': 'Mike Barlow'}], 'author': 'Mike Barlow', 'author_detail': {'name': 'Mike Barlow'}, 'tags': [{'term': 'AI & ML', 'scheme': None, 'label': None}, {'t

In [79]:
print(feed.entries[0].keys())

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])


### 7. Extract a list of entry titles.

In [80]:
entry_titles = [entry.title for entry in feed.entries]
entry_titles = [entry.replace('\n', ' ') for entry in entry_titles]
entry_titles = [entry.replace('\xa0', ' ') for entry in entry_titles]
print(entry_titles)

['What Does Copyright Say about Generative Models?', 'Radar Trends to Watch: December 2022', 'AI’s ‘SolarWinds Moment’ Will Occur; It’s Just a Matter of When', 'Technical Health Isn’t Optional', 'Healthy Data', 'Formal Informal Languages', 'Radar Trends to Watch: November 2022', 'What We Learned Auditing Sophisticated AI for Bias', 'The Collaborative Metaverse', 'What Is Hyperautomation?', 'Radar Trends to Watch: October 2022', 'The Problem with Intelligence', 'Radar Trends to Watch: September 2022', 'Ad Networks and Content Marketing', 'On Technique']


In [81]:
entry = feed.entries[0]
entry.content[0].value = entry.content[0].value.replace('\n', ' ')
entry.content[0].value = entry.content[0].value.replace('\xa0', ' ')
print(entry.content[0].value)

<p>The current generation of flashy AI applications, ranging from GitHub Copilot to Stable Diffusion, raise fundamental issues with copyright law. I am not an attorney, but these issues need to be addressed–at least within the culture that surrounds the use of these models, if not the legal system itself.</p>    <p>Copyright protects outputs of creative processes, not inputs. You can copyright a work you produced, whether that’s a computer program, a literary work, music, or an image. There is a concept of “fair use” that’s most applicable to text, but still applicable in other domains. The problem with fair use is that it is never precisely defined. The <a href="https://www.copyright.gov/help/faq/faq-fairuse.html#:~:text=Under%20the%20fair%20use%20doctrine,news%20reporting%2C%20and%20scholarly%20reports." rel="noreferrer noopener" target="_blank">US Copyright Office’s statement about fair use</a> is a model for vagueness:</p>    <blockquote class="wp-block-quote"><p>Under the fair use

### 8. Calculate the percentage of "Four short links" entry titles.

In [82]:
percentage = (entry_titles.count('Four short links')/len(entry_titles))*100
print(percentage)

0.0


### 9. Create a Pandas data frame from the feed's entries.

In [83]:
import pandas as pd

In [84]:
df = pd.DataFrame(feed.entries)
print(df.head())

                                               title  \
0   What Does Copyright Say about Generative Models?   
1               Radar Trends to Watch: December 2022   
2  AI’s ‘SolarWinds Moment’ Will Occur; It’s Just...   
3                    Technical Health Isn’t Optional   
4                                       Healthy Data   

                                        title_detail  \
0  {'type': 'text/plain', 'language': None, 'base...   
1  {'type': 'text/plain', 'language': None, 'base...   
2  {'type': 'text/plain', 'language': None, 'base...   
3  {'type': 'text/plain', 'language': None, 'base...   
4  {'type': 'text/plain', 'language': None, 'base...   

                                               links  \
0  [{'rel': 'alternate', 'type': 'text/html', 'hr...   
1  [{'rel': 'alternate', 'type': 'text/html', 'hr...   
2  [{'rel': 'alternate', 'type': 'text/html', 'hr...   
3  [{'rel': 'alternate', 'type': 'text/html', 'hr...   
4  [{'rel': 'alternate', 'type': 'text/html', 

### 10. Count the number of entries per author and sort them in descending order.

In [85]:
author_count = df.groupby('author').size().sort_values(ascending=False)
print(author_count)

author
Mike Loukides    12
Mike Barlow       1
Patrick Hall      1
Q McCallum        1
dtype: int64


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [86]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values(by='title_length', ascending=False)
print(df[['title', 'author', 'title_length']].sort_values(by='title_length', ascending=False))

                                                title         author  \
2   AI’s ‘SolarWinds Moment’ Will Occur; It’s Just...    Mike Barlow   
7   What We Learned Auditing Sophisticated AI for ...   Patrick Hall   
0    What Does Copyright Say about Generative Models?  Mike Loukides   
12              Radar Trends to Watch: September 2022  Mike Loukides   
1                Radar Trends to Watch: December 2022  Mike Loukides   
6                Radar Trends to Watch: November 2022  Mike Loukides   
10                Radar Trends to Watch: October 2022  Mike Loukides   
13                  Ad Networks and Content Marketing     Q McCallum   
3                     Technical Health Isn’t Optional  Mike Loukides   
11                      The Problem with Intelligence  Mike Loukides   
8                         The Collaborative Metaverse  Mike Loukides   
5                           Formal Informal Languages  Mike Loukides   
9                            What Is Hyperautomation?  Mike Louk

### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [87]:
ml_titles = df[df['summary'].str.contains('machine learning')]['title']
print(ml_titles)

10    Radar Trends to Watch: October 2022
Name: title, dtype: object
