In [4]:
 
# importing the required modules 
import csv 
import requests 
import xml.etree.ElementTree as ET 
  
def loadRSS(): 
  
    # url of rss feed 
    url = 'http://www.hindustantimes.com/rss/topnews/rssfeed.xml'
  
    # creating HTTP response object from given url 
    resp = requests.get(url) 
  
    # saving the xml file 
    with open('topnewsfeed.xml', 'wb') as f: 
        f.write(resp.content) 
          
  
def parseXML(xmlfile): 
  
    # create element tree object 
    tree = ET.parse(xmlfile) 
  
    # get root element 
    root = tree.getroot() 
  
    # create empty list for news items 
    newsitems = [] 
  
    # iterate news items 
    for item in root.findall('./channel/item'): 
  
        # empty news dictionary 
        news = {} 
  
        # iterate child elements of item 
        for child in item: 
  
            # special checking for namespace object content:media 
            if child.tag == '{http://search.yahoo.com/mrss/}content': 
                news['media'] = child.attrib['url'] 
            else: 
                news[child.tag] = child.text.encode('utf8') 
  
        # append news dictionary to news items list 
        newsitems.append(news) 
      
    # return news items list 
    return newsitems 
  
  
def savetoCSV(newsitems, filename): 
  
    # specifying the fields for csv file 
    fields = ['guid', 'title', 'pubDate', 'description', 'link', 'media'] 
  
    # writing to csv file 
    with open(filename, 'w') as csvfile: 
  
        # creating a csv dict writer object 
        writer = csv.DictWriter(csvfile, fieldnames = fields) 
  
        # writing headers (field names) 
        writer.writeheader() 
  
        # writing data rows 
        writer.writerows(newsitems) 
  
      
def main(): 
    # load rss from web to update existing xml file 
    loadRSS() 
  
    # parse xml file 
    newsitems = parseXML('topnewsfeed.xml') 
    print(newsitems)
  
    # store news items in a csv file 
    savetoCSV(newsitems, 'topnews.csv') 
      
      
if __name__ == "__main__": 
  
    # calling main function 
    main() 


[{'title': b"Kareena Kapoor on breakup with Shahid Kapoor during Jab We Met: 'We went our separate...", 'description': b'Kareena Kapoor said it was Shahid Kapoor who asked her to consider working in Imtiaz Ali\xe2\x80\x99s Jab We Met.', 'link': b'https://www.hindustantimes.com/bollywood/kareena-kapoor-on-breakup-with-shahid-kapoor-during-jab-we-met-we-went-our-separate-ways-this-gem-came-out-of-it/story-QaG6kgC3hYsDC7pwaP9ntN.html', 'guid': b'https://www.hindustantimes.com/bollywood/kareena-kapoor-on-breakup-with-shahid-kapoor-during-jab-we-met-we-went-our-separate-ways-this-gem-came-out-of-it/story-QaG6kgC3hYsDC7pwaP9ntN.html', 'pubDate': b'Thu, 20 Feb 2020 12:59:32 GMT ', 'media': 'https://www.hindustantimes.com/rf/image_size_630x354/HT/p2/2020/02/20/Pictures/_df02304a-53c3-11ea-a3cd-8211a7b3c8e4.jpg'}, {'title': b"India responds to China's 'sabotage' barb over Amit Shah's Arunachal visit", 'description': b'China had registered its objection to Amit Shah\xe2\x80\x99s visit to Arunach

In [12]:
import xml.etree.ElementTree as ET
doc = ET.parse('topnewsfeed.xml')
root = doc.getroot() # <--- this is the new line
print(root)
print(root.keys())
print(root.items)
#['operation', 'type', '
#'sp'
root.get("title")
#'sp'

<Element 'rss' at 0x7f277f0b14a8>
['version']
<built-in method items of xml.etree.ElementTree.Element object at 0x7f277f0b14a8>


In [13]:
pip install xmltodict

Collecting xmltodict
  Downloading https://files.pythonhosted.org/packages/28/fd/30d5c1d3ac29ce229f6bdc40bbc20b28f716e8b363140c26eff19122d8a5/xmltodict-0.12.0-py2.py3-none-any.whl
Installing collected packages: xmltodict
Successfully installed xmltodict-0.12.0


In [3]:
import xmltodict
import pprint
import json

my_xml = """
    <audience>
      <id what="attribute">123</id>
      <name>Shubham</name>
    </audience>
"""

pp = pprint.PrettyPrinter(indent=4)
pp.pprint(json.dumps(xmltodict.parse(my_xml)))

('{"audience": {"id": {"@what": "attribute", "#text": "123"}, "name": '
 '"Shubham"}}')
