# Fetching Internet Data

In [3]:
import urllib.request

def main():
    weburl = urllib.request.urlopen("http://www.google.com")
    print(f"result code : {weburl.getcode()}")
    # op : result code : 200
    
    data = weburl.read()
    print(data)
    # html code of google page
    
if __name__ == "__main__":
    main()

result code : 200
b'<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en-IN"><head><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script nonce="SoQ4-lPKrVQlmYeQciaOOg">(function(){window.google={kEI:\'ImD4Y8-vNOrp1sQPu8GQ2AQ\',kEXPI:\'0,1359409,6059,206,4804,2316,383,246,5,1129120,1197661,91,649,166,379924,16114,28684,22431,1361,12311,2824,14764,4998,13228,10731,26741,4819,1593,1279,2891,1103,3036,7615,606,29880,27304,3506,15756,3,346,230,1014,1,5445,148,11323,2652,4,1528,2304,27348,1714,13065,11442,6653,9358,7428,5821,2536,4094,7596,1,39047,2,3105,2,14022,2715,23024,5679,1021,31121,4568,6253,23424,1246,5841,14967,4333,8,7476,445,2,2,1,26632,8155,7381,2,1478,14490,872,6578,1252,11804,7,1922,5784,3997,21389,388,14375,6305,20198,15606,4531,14,82,4641,8691,6874,1622,1778,3900,1077,3787,938,3514,4226,2426,753,3344,550,3269,2,563,

# Working with JSON data

In [4]:
# https://earthquake.usgs.gov/earthquakes/feed/
# https://earthquake.usgs.gov/earthquakes/feed/v1.0/geojson.php

In [14]:
import urllib.request
import json

def printResults(data):
    
    # use the json module to load the data into a dictionary
    theJSON = json.loads(data)
    #print(f"theJSON data : {theJSON}")
    
    # now we can access the contents of the JSON like any other python object
    if "title" in theJSON["metadata"]:
        print(f'The Title : {theJSON["metadata"]["title"]}')
    
    # output the number of events, plus the magnitude and each event name
    count = theJSON["metadata"]["count"]
    print(f'{count} event recorded')
    
    # for each event, print the place where it occurred
    for i in theJSON["features"]:
         print(i["properties"]["place"])
    print("----"*10)
   
    # print the events that only have a maginitude greater than 4
    for i in theJSON["features"]:
         if i["properties"]["mag"] >= 4.0:
              print(i["properties"]["place"])     
    print("----"*10)         
    
    # print only the events where as least 1 person reported feeling something
    print("\nEvents that were felt : ")
    for i in theJSON["features"]:
        feltReport = i["properties"]["felt"]
        if feltReport != None:
              if feltReport > 0:
                  print(i["properties"]["place"], feltReport, "times.")
    
    
def main():
    urlData = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson"
    
    # open the url and read the data
    weburl = urllib.request.urlopen(urlData)
    print(f"result code : {weburl.getcode()}")
    # op : result code : 200
    
    if weburl.getcode() == 200:
        data = weburl.read()
        printResults(data)
    else:
        print(f"Received an error from the server, cannot print results : {weburl.getcode()}")
    

if __name__ == "__main__":
    main()

result code : 200
The Title : USGS Magnitude 2.5+ Earthquakes, Past Day
36 event recorded
52 km NNW of Sandy Ground Village, Anguilla
3 km SW of Indios, Puerto Rico
39 km ESE of Port Heiden, Alaska
64 km NE of Calama, Chile
Turkey-Syria border region
163 km WNW of Panguna, Papua New Guinea
18 km S of Weston, Colorado
23 km SSE of Pihuamo, Mexico
29 km ENE of Kandrian, Papua New Guinea
80 km SW of Atocha, Bolivia
235 km SSE of Old Harbor, Alaska
10 km SE of Otra Banda, Dominican Republic
50 km S of Guantánamo, Cuba
Pakistan
70 km NW of La Ligua, Chile
76 km WNW of La Ligua, Chile
9 km W of Freedom, Idaho
15 km W of Freedom, Idaho
11 km ENE of Pāhala, Hawaii
60 km SSW of Nueva Concepción, Guatemala
Rat Islands, Aleutian Islands, Alaska
Rat Islands, Aleutian Islands, Alaska
177 km N of Tobelo, Indonesia
9 km N of Miyako, Japan
59 km WSW of San Pedro de Atacama, Chile
186 km WSW of Adak, Alaska
central East Pacific Rise
19 km W of Susitna, Alaska
6 km NW of Kışlak, Turkey
39 km SE of Perry

# Parsing and processing HTML

In [18]:
from html.parser import HTMLParser

paragraphs = 0

class MyHTMLParser(HTMLParser):
    
    def handle_comment(self, data):
        print(f"Encountered a comment : {data}")
        pos = self.getpos()
        print(f"At line : {pos[0]}, Position : {pos[1]}")
    
    def handle_starttag(self, tag, attrs):
        print(f"Encountered a start tag : {tag}")
        pos = self.getpos()
        print(f"At line : {pos[0]}, Position : {pos[1]}")
        
        global paragraphs
        if tag == "p":
            paragraphs += 1
            
        if len(attrs) > 0:
            print("Attribut : ")
            for a in attrs:
                print(f"\t{a[0]} = {a[1]}")
    
    def handle_data(self, data):
        if data.isspace():
            return
        
        print(f"Encountered text data : {data}")
        pos = self.getpos()
        print(f"At line : {pos[0]}, Position : {pos[1]}")
    
def main():
    
    # instantiate the parser and feed it some HTML
    parser = MyHTMLParser()
    
    f = open("sampleHTML.html")
    if f.mode == "r":
        contents = f.read() # read the entire file
        parser.feed(contents)
        
    print(f"Paragraph tags : {paragraphs}")
        
if __name__ == "__main__":
    main()

Encountered a start tag : html
At line : 2, Position : 0
Attribut : 
	lang = en
Encountered a start tag : head
At line : 3, Position : 0
Encountered a start tag : meta
At line : 4, Position : 4
Attribut : 
	charset = UTF-8
Encountered a start tag : meta
At line : 5, Position : 4
Attribut : 
	http-equiv = X-UA-Compatible
	content = IE=edge
Encountered a start tag : meta
At line : 6, Position : 4
Attribut : 
	name = viewport
	content = width=device-width, initial-scale=1.0
Encountered a start tag : title
At line : 7, Position : 4
Encountered text data : Document
At line : 7, Position : 11
Encountered a start tag : body
At line : 9, Position : 0
Encountered a comment :  This is a comment 
At line : 10, Position : 4
Encountered a start tag : h1
At line : 11, Position : 4
Encountered text data : HTML Sample File
At line : 11, Position : 8
Encountered a start tag : p
At line : 12, Position : 4
Encountered text data : This is some text
At line : 12, Position : 7
Encountered a start tag : p
At

# Manipulating XML

In [None]:


def main():
    
    # use the parse() function to load and parse an XML file
    
    
    # print out the document node and the name of the first child tag
    
    
    # get the list of XML tags from the document and print each one
    
    
    # create a new XML tag and add it into the document
    
    
    
if __name__ == "__main__":
    main()