## File IO

In [2]:
f = open("example.txt", "w")

In [3]:
f.write("line 1\n")

7

In [4]:
f.write("line 2\n")

7

In [5]:
f.close()

In [6]:
f = open("example.txt")

In [7]:
f.read()

'line 1\nline 2\n'

In [8]:
with open("example.txt") as f:
    print(f.read())

line 1
line 2



In [9]:
f.closed

True

## JSON Serialization

In [10]:
import json

In [12]:
data = {
    "city": "Dresden",
    "year": 2024,
    "lines": [1, 2, 3, 8],
}

In [14]:
json.dumps(data)

'{"city": "Dresden", "year": 2024, "lines": [1, 2, 3, 8]}'

In [15]:
with open("data1.json", "w") as f:
    f.write(json.dumps(data))

In [17]:
with open("data1.json", "w") as f:
    json.dump(data, f)

In [19]:
with open("data1.json") as f:
    print(json.load(f))

{'city': 'Dresden', 'year': 2024, 'lines': [1, 2, 3, 8]}


Not all types are supported.

In [21]:
data = {
    "city": "Dresden",
    "lines": set([1, 2, 3, 4]),
}

In [23]:
# json.dumps(data) # TypeError

## Working with XML

There is some support for XML in the standard library.

In [27]:
data = """<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>
"""

In [30]:
import xml.etree.ElementTree as ET
root = ET.fromstring(data)

for child in root:
    print(child.tag, child.attrib)

root[0][1].text

country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}


'2008'

In [31]:
for country in root.findall('country'):
    rank = country.find('rank').text
    name = country.get('name')
    print(name, rank)

Liechtenstein 1
Singapore 4
Panama 68


In [35]:

# All 'neighbor' grand-children of 'country' children of the top-level
# elements
root.findall("./country/neighbor")

[<Element 'neighbor' at 0x762b07e38400>,
 <Element 'neighbor' at 0x762b07e383b0>,
 <Element 'neighbor' at 0x762b07e38bd0>,
 <Element 'neighbor' at 0x762b07e39300>,
 <Element 'neighbor' at 0x762b07e39350>]

In [32]:
# Nodes with name='Singapore' that have a 'year' child
root.findall(".//year/..[@name='Singapore']")

[<Element 'country' at 0x762b07e38360>]

In [34]:
# All 'neighbor' nodes that are the second child of their parent
root.findall(".//neighbor[2]")

[<Element 'neighbor' at 0x762b07e383b0>,
 <Element 'neighbor' at 0x762b07e39350>]

## CSV

In [36]:
import csv
with open('eggs.csv', 'w', newline='') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow(['Spam'] * 5 + ['Baked Beans'])
    spamwriter.writerow(['Spam', 'Lovely Spam', 'Wonderful Spam'])

In [37]:
with open('eggs.csv', newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    for row in spamreader:
        print(', '.join(row))

Spam, Spam, Spam, Spam, Spam, Baked Beans
Spam, Lovely Spam, Wonderful Spam


Reading directly into dictionary.

In [39]:
import csv
with open('names.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row['first_name'], row['last_name'])




print(row)

anna anker
bert boden
cleo canto
dino dante
{'first_name': 'dino', 'last_name': 'dante'}


## HTML

In [41]:
html_doc = """<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>

<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""


In [43]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')

print(soup.prettify())

<html>
 <head>
  <title>
   The Dormouse's story
  </title>
 </head>
 <body>
  <p class="title">
   <b>
    The Dormouse's story
   </b>
  </p>
  <p class="story">
   Once upon a time there were three little sisters; and their names were
   <a class="sister" href="http://example.com/elsie" id="link1">
    Elsie
   </a>
   ,
   <a class="sister" href="http://example.com/lacie" id="link2">
    Lacie
   </a>
   and
   <a class="sister" href="http://example.com/tillie" id="link3">
    Tillie
   </a>
   ;
and they lived at the bottom of a well.
  </p>
  <p class="story">
   ...
  </p>
 </body>
</html>


In [44]:
soup.title

<title>The Dormouse's story</title>

In [45]:
soup.title.name

'title'

In [46]:
soup.title.string

"The Dormouse's story"

In [47]:
soup.title.parent.name

'head'

In [48]:
soup.p

<p class="title"><b>The Dormouse's story</b></p>

In [49]:
soup.p['class']

['title']

In [50]:
soup.a

<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

In [51]:
soup.find_all('a')

[<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
 <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
 <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

In [52]:
soup.find(id="link3")

<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>

## Multiple Files and Directories

* os.walk and glob.glob

In [53]:
import glob

In [54]:
glob.glob("*.csv")

['eggs.csv', 'names.csv']

In [56]:
import os

In [57]:
os.walk(".")

<generator object _walk at 0x762b07b1ece0>

In [61]:
for root, dirs, files in os.walk("."):
    for f in files:
        print(os.path.join(root, f))

./imperative.ipynb
./guess.ipynb
./memory.ipynb
./eggs.csv
./data1.json
./HelloPandas.ipynb
./IO.ipynb
./represent.ipynb
./overview.ipynb
./oo.ipynb
./firstlast.ipynb
./python_world_view.ipynb
./names.csv
./example.txt
./.ipynb_checkpoints/HelloPandas-checkpoint.ipynb
./.ipynb_checkpoints/overview-checkpoint.ipynb
./.ipynb_checkpoints/guess-checkpoint.ipynb
./.ipynb_checkpoints/memory-checkpoint.ipynb
./.ipynb_checkpoints/represent-checkpoint.ipynb
./.ipynb_checkpoints/python_world_view-checkpoint.ipynb
./.ipynb_checkpoints/names-checkpoint.csv
./.ipynb_checkpoints/firstlast-checkpoint.ipynb
./.ipynb_checkpoints/learning-python3-checkpoint.ipynb
./.ipynb_checkpoints/imperative-checkpoint.ipynb
./.ipynb_checkpoints/IO-checkpoint.ipynb
./.ipynb_checkpoints/oo-checkpoint.ipynb
./intro/04.ipynb
./intro/06.ipynb
./intro/01.ipynb
./intro/07.ipynb
./intro/05.ipynb
./intro/surface_ex.pdf
./intro/surface_ex.png
./intro/03.ipynb
./intro/02.ipynb
./intro/08.ipynb
./intro/.ipynb_checkpoints/03-che

## Reading data from the web

Standard library recommends the third-party requests package for higher level HTTP interaction. 

In [62]:
import urllib.request
with urllib.request.urlopen('http://www.python.org/') as f:
    print(f.read(300))

b'<!doctype html>\n<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]-->\n<!--[if IE 7]>      <html class="no-js ie7 lt-ie8 lt-ie9">          <![endif]-->\n<!--[if IE 8]>      <html class="no-js ie8 lt-ie9">                 <![endif]-->\n<!--[if gt IE 8]><!--><html class="no-js"'


In [63]:
import requests

In [64]:
r = requests.get('https://api.github.com/events')

In [65]:
r.status_code

200

In [66]:
len(r.text)

89188

In [72]:
events = r.json()

In [73]:
len(events)

30

In [74]:
events[0]

{'id': '37445370657',
 'type': 'CreateEvent',
 'actor': {'id': 81474942,
  'login': 'hohner2008',
  'display_login': 'hohner2008',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/hohner2008',
  'avatar_url': 'https://avatars.githubusercontent.com/u/81474942?'},
 'repo': {'id': 731077003,
  'name': 'hohner2008/openapi-generator',
  'url': 'https://api.github.com/repos/hohner2008/openapi-generator'},
 'payload': {'ref': 'snyk-fix-ae7a132f724a5a1a72ffc5fce28de0a0',
  'ref_type': 'branch',
  'master_branch': 'master',
  'description': 'OpenAPI Generator allows generation of API client libraries (SDK generation), server stubs, documentation and configuration automatically given an OpenAPI Spec (v2, v3)',
  'pusher_type': 'user'},
 'public': True,
 'created_at': '2024-04-14T23:10:49Z'}

In [78]:
r = requests.put('https://httpbin.org/put', data={'key': 'value'})

In [79]:
r.status_code

200

In [80]:
r.headers

{'Date': 'Sun, 14 Apr 2024 23:19:36 GMT', 'Content-Type': 'application/json', 'Content-Length': '483', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}

In [81]:
r.url

'https://httpbin.org/put'

In [83]:
r.json()

{'args': {},
 'data': '',
 'files': {},
 'form': {'key': 'value'},
 'headers': {'Accept': '*/*',
  'Accept-Encoding': 'gzip, deflate, br',
  'Content-Length': '9',
  'Content-Type': 'application/x-www-form-urlencoded',
  'Host': 'httpbin.org',
  'User-Agent': 'python-requests/2.31.0',
  'X-Amzn-Trace-Id': 'Root=1-661c6487-5431408a7eaecc02534bbf88'},
 'json': None,
 'origin': '46.114.203.250',
 'url': 'https://httpbin.org/put'}