### Example file for retrieving data from the internet

In [5]:
import urllib.request

def main():
    webUrl = urllib.request.urlopen("http://www.google.com")
    print("result code: "+ str(webUrl.getcode()))
    data = webUrl.read()
    print(data)

In [6]:
if __name__ == "__main__":
    main()

result code: 200
b'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en"><head><meta content="Search the world\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\'re looking for." name="description"><meta content="noodp" name="robots"><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="UlEGoKX0AagjhoXDFMFWyg==">(function(){window.google={kEI:\'Lb5YYJzFKY20swX5q6C4CQ\',kEXPI:\'0,1302433,56976,954,5104,207,4804,2316,383,246,5,306,1048,4920,16,314,6385,1116131,1233,1196554,496,56,328928,51224,16114,28684,9188,8384,4859,1361,9291,3023,4744,12841,4020,978,13228,2974,911,4154,6430,1142,6290,7095,4517,2778,920,261,2015,10,2794,1593,1279,1042,1170,530,149,1103,840,517,1466,58,156,4100,1447,1,3,2063,606,2023,1777,520,1704,2565,328,1284,8789,3227,2845

### Example file for parsing and processing JSON

In [7]:
import urllib.request
import json

In [16]:
def printResults(data):
    # Use the json module to load the string data into a dictionary
    theJSON = json.loads(data)

    # now we can access the contents of the JSON like any other Python object
    if "title" in theJSON["metadata"]:
        print(theJSON["metadata"]["title"])

    # output the number of events, plus the magnitude and each event name
    count = theJSON["metadata"]["count"]
    print(str(count) + " events recorded")

    # for each event, print the place where it occurred
    for i in theJSON["features"]:
        print(i["properties"]["place"])
    print("--------------\n")

    # print the events that only have a magnitude greater than 4
    for i in theJSON["features"]:
        if i["properties"]["mag"] >= 4.0:
            print("%2.1f" % i["properties"]["mag"], i["properties"]["place"])
    print("--------------\n")

    # print only the events where at least 1 person reported feeling something
    print("\n\nEvents that were felt:")
    for i in theJSON["features"]:
        feltReports = i["properties"]["felt"]
        if (feltReports != None):
            if (feltReports > 0):
                print("%2.1f" % i["properties"]["mag"], i["properties"]
                      ["place"], " reported " + str(feltReports) + " times")


In [17]:
def main():
  # define a variable to hold the source URL
  # In this case we'll use the free data feed from the USGS
  # This feed lists all earthquakes for the last day larger than Mag 2.5
    urlData = "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"

  # Open the URL and read the data
    webUrl = urllib.request.urlopen(urlData)
    print ("result code: " + str(webUrl.getcode()))
    if(webUrl.getcode() == 200 ):
        data = webUrl.read()
        printResults(data)
    else:
        print("Received error, cannot parse result")


In [18]:

if __name__ == "__main__":
  main()

result code: 200
USGS Magnitude 2.5+ Earthquakes, Past Day
28 events recorded
246 km N of Saga, China
179 km NE of Gisborne, New Zealand
33 km N of San Vicente, Chile
171 km WNW of Neiafu, Tonga
3 km NW of Petersville, Alaska
69 km N of Isangel, Vanuatu
40 km NE of Ambler, Alaska
Kermadec Islands region
102 km SSW of Uyuni, Bolivia
30 km NNE of Akhiok, Alaska
85 km SW of Ocós, Guatemala
88 km W of San Antonio de los Cobres, Argentina
52 km SSW of Whites City, New Mexico
87 km WSW of Abra Pampa, Argentina
21 km W of Volcano, Hawaii
2 km ENE of Soconusco, Mexico
92 km ESE of Puerto Gaitán, Colombia
55 km S of Whites City, New Mexico
11 km W of Hennessey, Oklahoma
128 km WSW of Ternate, Indonesia
39km NNE of Toms Place, CA
163 km SSE of Ust’-Kamchatsk Staryy, Russia
15km N of Morgan Hill, CA
1km ESE of Loma Linda, CA
58 km S of Whites City, New Mexico
11 km E of Verdikoússa, Greece
73 km NNW of Tobelo, Indonesia
Kermadec Islands region
--------------

4.5 246 km N of Saga, China
5.1 179 k

### Example file for parsing and processing HTML

In [20]:
from html.parser import HTMLParser

In [22]:
metacount = 0

# create a subclass of HTMLParser and override the handler methods
class MyHTMLParser(HTMLParser):
  # function to handle an opening tag in the doc
  # this will be called when the closing ">" of the tag is reached
  def handle_starttag(self, tag, attrs):
    global metacount
    if tag == "meta":
      metacount += 1

    print ("Encountered a start tag:", tag)
    pos = self.getpos() # returns a tuple indication line and character
    print ("\tAt line: ", pos[0], " position ", pos[1])

    if attrs.__len__() > 0:
      print ("\tAttributes:")
      for a in attrs:
        print ("\t", a[0],"=",a[1])
      
  # function to handle the ending tag
  def handle_endtag(self, tag):
    print ("Encountered an end tag:", tag)
    pos = self.getpos()
    print ("\tAt line: ", pos[0], " position ", pos[1])
    
  # function to handle character and text data (tag contents)
  def handle_data(self, data):
    if (data.isspace()):
      return
    print ("Encountered some text data:", data)
    pos = self.getpos()
    print ("\tAt line: ", pos[0], " position ", pos[1])
  
  # function to handle the processing of HTML comments
  def handle_comment(self, data):
    print ("Encountered comment:", data)
    pos = self.getpos()
    print ("\tAt line: ", pos[0], " position ", pos[1])

def main():
  # instantiate the parser and feed it some HTML
  parser = MyHTMLParser()
    
  # open the sample HTML file and read it
  f = open("samplehtml.html")
  if f.mode == "r":
    contents = f.read() # read the entire file
    parser.feed(contents)
  
  print ("%d meta tags encountered" % metacount)

if __name__ == "__main__":
  main();
  

FileNotFoundError: [Errno 2] No such file or directory: 'samplehtml.html'

### Example file for parsing and processing XML

In [23]:

import xml.dom.minidom

def main():
  # use the parse() function to load and parse an XML file
  doc = xml.dom.minidom.parse("samplexml.xml")
  
  # print out the document node and the name of the first child tag
  print (doc.nodeName)
  print (doc.firstChild.tagName)
  
  # get a list of XML tags from the document and print each one
  skills = doc.getElementsByTagName("skill")
  print ("%d skills:" % skills.length)
  for skill in skills:
    print (skill.getAttribute("name"))
    
  # create a new XML tag and add it into the document
  newSkill = doc.createElement("skill")
  newSkill.setAttribute("name", "jQuery")
  doc.firstChild.appendChild(newSkill)

  skills = doc.getElementsByTagName("skill")
  print ("%d skills:" % skills.length)
  for skill in skills:
    print (skill.getAttribute("name"))
        
if __name__ == "__main__":
  main();



FileNotFoundError: [Errno 2] No such file or directory: 'samplexml.xml'