# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [5]:
radar = feedparser.parse(url)
radar

{'bozo': False,
 'entries': [{'title': 'Building a Better Middleman',
   'title_detail': {'type': 'text/plain',
    'language': None,
    'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
    'value': 'Building a Better Middleman'},
   'links': [{'rel': 'alternate',
     'type': 'text/html',
     'href': 'https://www.oreilly.com/radar/building-a-better-middleman-2/'}],
   'link': 'https://www.oreilly.com/radar/building-a-better-middleman-2/',
   'comments': 'https://www.oreilly.com/radar/building-a-better-middleman-2/#respond',
   'published': 'Tue, 17 May 2022 10:58:32 +0000',
   'published_parsed': time.struct_time(tm_year=2022, tm_mon=5, tm_mday=17, tm_hour=10, tm_min=58, tm_sec=32, tm_wday=1, tm_yday=137, tm_isdst=0),
   'authors': [{'name': 'Q McCallum'}],
   'author': 'Q McCallum',
   'author_detail': {'name': 'Q McCallum'},
   'tags': [{'term': 'Operations', 'scheme': None, 'label': None},
    {'term': 'Deep Dive', 'scheme': None, 'label': None}],
   'id': 'https://www.o

In [7]:
print(radar['feed'])

{'title': 'Radar', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Radar'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.oreilly.com/radar'}, {'rel': 'self', 'type': 'application/rss+xml', 'href': 'http://feeds.feedburner.com/oreilly/radar/atom'}, {'rel': 'hub', 'href': 'http://pubsubhubbub.appspot.com/', 'type': 'text/html'}], 'link': 'https://www.oreilly.com/radar', 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'}, 'updated': 'Tue, 17 May 2022 21:39:49 +0000', 'updated_parsed': time.struct_time(tm_year=2022, tm_mon=5, tm_mday=17, tm_hour=21, tm_min=39, tm_sec=49, tm_wday=1, tm_yday=137, tm_isd

### 2. Obtain a list of components (keys) that are available for this feed.

In [8]:
radar.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [10]:
radar.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [16]:
print (radar.feed.title)
print ('')
print (radar.feed.subtitle)
print ('')
print (radar.feed.link)

Radar

Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology

https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [18]:
len(radar.entries)

15

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [21]:
radar.entries[1].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments'])

### 7. Extract a list of entry titles.

In [23]:
titulos = [radar.entries[i].title for i in range(len(radar.entries))]

### 8. Calculate the percentage of "Four short links" entry titles.

In [46]:
[radar.entries[i].title for i in range(len(radar.entries)) if radar.entries[i].title == "Four short links"]

[]

### 9. Create a Pandas data frame from the feed's entries.

In [24]:
import pandas as pd

In [25]:
df = pd.DataFrame(radar.entries)
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments
0,Building a Better Middleman,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/building-a-bette...,https://www.oreilly.com/radar/building-a-bette...,"Tue, 17 May 2022 10:58:32 +0000","(2022, 5, 17, 10, 58, 32, 1, 137, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'Operations', 'scheme': None, 'label...",https://www.oreilly.com/radar/?p=14497,False,"In the previous article, I explored the role o...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/building-a-bette...,0
1,Quantum Computing without the Hype,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/quantum-computin...,https://www.oreilly.com/radar/quantum-computin...,"Tue, 10 May 2022 11:45:05 +0000","(2022, 5, 10, 11, 45, 5, 1, 130, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Big Data Tools and Pipelines', 'sch...",https://www.oreilly.com/radar/?p=14492,False,"Several weeks ago, I had a great conversation ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/quantum-computin...,0
2,Radar trends to watch: May 2022,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/radar-trends-to-...,https://www.oreilly.com/radar/radar-trends-to-...,"Tue, 03 May 2022 11:19:02 +0000","(2022, 5, 3, 11, 19, 2, 1, 123, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=14482,False,April was the month for large language models....,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0
3,Building a Better Middleman,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/building-a-bette...,https://www.oreilly.com/radar/building-a-bette...,"Tue, 19 Apr 2022 12:22:21 +0000","(2022, 4, 19, 12, 22, 21, 1, 109, 0)",[{'name': 'Q McCallum'}],Q McCallum,{'name': 'Q McCallum'},"[{'term': 'Operations', 'scheme': None, 'label...",https://www.oreilly.com/radar/?p=14442,False,What comes to mind when you hear the term &#82...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/building-a-bette...,0
4,The General Purpose Pendulum,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.oreilly.com/radar/the-general-purp...,https://www.oreilly.com/radar/the-general-purp...,"Tue, 12 Apr 2022 11:59:19 +0000","(2022, 4, 12, 11, 59, 19, 1, 102, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Hardware', 'scheme': None, 'label':...",https://www.oreilly.com/radar/?p=14436,False,"Pendulums do what they do: they swing one way,...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-general-purp...,0


### 10. Count the number of entries per author and sort them in descending order.

In [26]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
3,Mike Loukides,9
0,Chris Butler,2
4,Q McCallum,2
1,Christina Morillo,1
2,Jeffrey Carr,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [29]:
df['title_length'] = df['title'].apply(len)
df['title'].apply(len)

0     27
1     34
2     31
3     27
4     28
5     33
6     34
7     13
8     22
9     45
10    29
11    35
12    33
13    30
14    13
Name: title, dtype: int64

In [30]:
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False).head()

Unnamed: 0,title,author,title_length
9,Identity problems get bigger in the metaverse,Chris Butler,45
11,Epstein Barr and the Cause of Cause,Mike Loukides,35
1,Quantum Computing without the Hype,Mike Loukides,34
6,AI Adoption in the Enterprise 2022,Mike Loukides,34
5,Radar trends to watch: April 2022,Mike Loukides,33


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [37]:
resumenes = df.summary
len(df.summary)

15

In [38]:
for i in resumenes:
    if i.__contains__("machine learning"):
        print(i)    

In [41]:
for i in resumenes:
    if i.__contains__("December"):
        print(i)   

In December 2021 and January 2022, we asked recipients of our&#160;Data&#160;and&#160;AI Newsletters&#160;to participate in our annual survey on AI adoption. We were particularly interested in what, if anything, has changed since last year. Are companies farther along in AI adoption? Do they have working applications in production? Are they using tools like AutoML to generate [&#8230;]
