# Espressioni regolari

Da [re](https://docs.python.org/3/library/re.html) e [Regular Expression HOWTO](https://docs.python.org/3.8/howto/regex.html) — per sperimentare [pythex](https://pythex.org).

In [29]:
import re

r = re.compile(r'(.*)@.*\.([^.]*pec[^.]*)(\..+)?\.(.*)$')
r.match('massimo@dominio.miapec.it').groups()

('massimo', 'miapec', None, 'it')

In [30]:
with open('emails.txt') as inf:
    for line in inf:
        m = r.match(line)
        if m:
            print(line.strip(), m.groups())

cm3lazio@pec.lapostapec.it" ('cm3lazio', 'lapostapec', None, 'it"')
demografici@pec.cassinadepecchi.gov.it" ('demografici', 'cassinadepecchi', '.gov', 'it"')
aire@comune.pec.como.it" ('aire', 'pec', '.como', 'it"')
ufficioprotocollo@comunefarainsabina.pecpa.it" ('ufficioprotocollo', 'pecpa', None, 'it"')
amministrazionegerocarne@pec.personalpec.eu" ('amministrazionegerocarne', 'personalpec', None, 'eu"')
pec@comunemeleti.casellapec.info" ('pec', 'casellapec', None, 'info"')
comunemisterbianco@pec.pec-pa.it" ('comunemisterbianco', 'pec-pa', None, 'it"')
uffprotocollo@comunemontopolidisabina.pecpa.it" ('uffprotocollo', 'pecpa', None, 'it"')
protocollo@palazzolo.viapec.it" ('protocollo', 'viapec', None, 'it"')
protocollo@pec.comune.pecetto.al.it" ('protocollo', 'pecetto', '.al', 'it"')
info@pec.comune.pecetto.to.it" ('info', 'pecetto', '.to', 'it"')
comune@comune.santaluciadelmela.pec.telecompost.it" ('comune', 'pec', '.telecompost', 'it"')
affarigenerali@comune.teramo.pecpa.it" ('affarig

In [51]:
r = re.compile(r'.*\.([^.]+)"$')
tld = []
with open('emails.txt') as inf:
    for line in inf:
        m = r.match(line)
        if m: tld.append(m.group(1))

tld[:10]

['net', 'it', 'it', 'it', 'it', 'it', 'it', 'it', 'it', 'it']

In [55]:
tld2num = dict()

for d in tld:
    try:
        tld2num[d] += 1
    except KeyError:
        tld2num[d] = 0
        
tld2num

{'net': 211,
 'it': 7468,
 'IT': 30,
 'eu': 39,
 'com': 16,
 'org': 17,
 'at': 0,
 'info': 7,
 'biz': 1,
 'NET': 0}

# Collezioni evolute

Da [collections](https://docs.python.org/3/library/collections.html).

In [56]:
from collections import defaultdict

tld2num = defaultdict(lambda: 0)

for d in tld:
    tld2num[d] += 1

tld2num

defaultdict(<function __main__.<lambda>()>,
            {'net': 212,
             'it': 7469,
             'IT': 31,
             'eu': 40,
             'com': 17,
             'org': 18,
             'at': 1,
             'info': 8,
             'biz': 2,
             'NET': 1})

In [64]:
sig2words = defaultdict(list)

with open('zingarelli.txt') as inf:
    for line in inf:
        word = line.strip()
        sig = ''.join(sorted(word))
        sig2words[sig].append(word)
        
for sig, words in sig2words.items():
    if len(words) > 20: print(words)

['ANTIMERO', 'ARTIMONE', 'ENORMITA', 'ENTRIAMO', 'MARONITE', 'MATERINO', 'MATRONEI', 'MERITANO', 'MINARETO', 'MINATORE', 'MINORATE', 'MONETARI', 'MONTERAI', 'NORMIATE', 'RIMENATO', 'RINOMATE', 'RITEMANO', 'TERNIAMO', 'TIMONARE', 'TIMONERA', 'TRAMENIO']
['APRIRESTE', 'ESTIRPARE', 'ESTIRPERA', 'PARERESTI', 'PRESTARIE', 'PRESTERAI', 'RAPERESTI', 'RAPIRESTE', 'RESPIRATE', 'RIPESTARE', 'RIPESTERA', 'RIPRETESA', 'RISAPRETE', 'RISPERATE', 'SPARIRETE', 'SPETRERAI', 'SPIETRARE', 'SPIETRERA', 'SPRETERAI', 'STERPERAI', 'STREPERAI']
['APRISTE', 'ARPISTE', 'ESPATRI', 'ESTIRPA', 'PARESTI', 'PIASTRE', 'PRATESI', 'PRESTAI', 'RAPISTE', 'RIPESTA', 'SERPITA', 'SPARITE', 'SPERATI', 'SPETRAI', 'SPIETRA', 'SPIRATE', 'SPRETAI', 'STERPAI', 'STIPARE', 'STIPERA', 'STRAPIE']
['CANTERI', 'CARENTI', 'CARTINE', 'CENTRAI', 'CERANTI', 'CERNITA', 'CERTANI', 'CETRINA', 'CIANTRE', 'CINTARE', 'CINTERA', 'CITERNA', 'CREANTI', 'CRENATI', 'CRETINA', 'INCERTA', 'NACRITE', 'NARTECI', 'NATRICE', 'NECTRIA', 'RECANTI', 'RECINTA'

In [66]:
from collections import Counter

tld2num = Counter(tld)
tld2num

Counter({'net': 212,
         'it': 7469,
         'IT': 31,
         'eu': 40,
         'com': 17,
         'org': 18,
         'at': 1,
         'info': 8,
         'biz': 2,
         'NET': 1})

In [76]:
from collections import namedtuple

User = namedtuple('User', 'first, last, email')

u = User('Mario', 'Rossi', 'mariorossi@gmail.com')
u.first

'Mario'

In [77]:
User._make(['Franco', 'Bruni', 'fra@bruni.it'])

User(first='Franco', last='Bruni', email='fra@bruni.it')

# Formati di dati comuni

Da [csv](https://docs.python.org/3/library/csv.html) e [json](https://docs.python.org/3/library/json.html).

In [83]:
from urllib.request import urlopen
import csv

Color = namedtuple('Color', 'short, long, hex, r, g, b')

with urlopen('https://raw.githubusercontent.com/codebrainz/color-names/master/output/colors.csv') as inf:
    colors = list(map(Color._make, csv.reader(inf.read().decode('utf-8').splitlines())))

colors[:10]    

[Color(short='air_force_blue_raf', long='Air Force Blue (Raf)', hex='#5d8aa8', r='93', g='138', b='168'),
 Color(short='air_force_blue_usaf', long='Air Force Blue (Usaf)', hex='#00308f', r='0', g='48', b='143'),
 Color(short='air_superiority_blue', long='Air Superiority Blue', hex='#72a0c1', r='114', g='160', b='193'),
 Color(short='alabama_crimson', long='Alabama Crimson', hex='#a32638', r='163', g='38', b='56'),
 Color(short='alice_blue', long='Alice Blue', hex='#f0f8ff', r='240', g='248', b='255'),
 Color(short='alizarin_crimson', long='Alizarin Crimson', hex='#e32636', r='227', g='38', b='54'),
 Color(short='alloy_orange', long='Alloy Orange', hex='#c46210', r='196', g='98', b='16'),
 Color(short='almond', long='Almond', hex='#efdecd', r='239', g='222', b='205'),
 Color(short='amaranth', long='Amaranth', hex='#e52b50', r='229', g='43', b='80'),
 Color(short='amber', long='Amber', hex='#ffbf00', r='255', g='191', b='0')]

In [100]:
from json import loads

with urlopen('https://raw.githubusercontent.com/codebrainz/color-names/master/output/colors.json') as inf:
    color_dict = loads(inf.read().decode('utf-8'))

JSONDecodeError: Expecting property name enclosed in double quotes: line 6 column 3 (char 115)

In [138]:
data = """{
  "air_force_blue_raf": {
    "name": "Air Force Blue (Raf)",
    "hex": "#5d8aa8",
    "rgb": [93, 138, 168],
  },
}"""

loads(data)

JSONDecodeError: Expecting property name enclosed in double quotes: line 6 column 3 (char 115)

In [139]:
import re

wrong = re.compile(r',\s*\}')
wrong.findall(data)

[',\n  }', ',\n}']

In [141]:
print(re.sub(wrong, '}', data))

{
  "air_force_blue_raf": {
    "name": "Air Force Blue (Raf)",
    "hex": "#5d8aa8",
    "rgb": [93, 138, 168]}}


In [142]:
loads(re.sub(s, '\n}', data))

{'air_force_blue_raf': {'name': 'Air Force Blue (Raf)',
  'hex': '#5d8aa8',
  'rgb': [93, 138, 168]}}

In [145]:
with urlopen('https://raw.githubusercontent.com/codebrainz/color-names/master/output/colors.json') as inf:
    color_dict = loads(re.sub(s, '}', inf.read().decode('utf-8')))
    
list(color_dict.items())[:10]

[('air_force_blue_raf',
  {'name': 'Air Force Blue (Raf)', 'hex': '#5d8aa8', 'rgb': [93, 138, 168]}),
 ('air_force_blue_usaf',
  {'name': 'Air Force Blue (Usaf)', 'hex': '#00308f', 'rgb': [0, 48, 143]}),
 ('air_superiority_blue',
  {'name': 'Air Superiority Blue', 'hex': '#72a0c1', 'rgb': [114, 160, 193]}),
 ('alabama_crimson',
  {'name': 'Alabama Crimson', 'hex': '#a32638', 'rgb': [163, 38, 56]}),
 ('alice_blue',
  {'name': 'Alice Blue', 'hex': '#f0f8ff', 'rgb': [240, 248, 255]}),
 ('alizarin_crimson',
  {'name': 'Alizarin Crimson', 'hex': '#e32636', 'rgb': [227, 38, 54]}),
 ('alloy_orange',
  {'name': 'Alloy Orange', 'hex': '#c46210', 'rgb': [196, 98, 16]}),
 ('almond', {'name': 'Almond', 'hex': '#efdecd', 'rgb': [239, 222, 205]}),
 ('amaranth', {'name': 'Amaranth', 'hex': '#e52b50', 'rgb': [229, 43, 80]}),
 ('amber', {'name': 'Amber', 'hex': '#ffbf00', 'rgb': [255, 191, 0]})]

# Persistenza dei dati

## Compressione

[gzip](https://docs.python.org/3/library/gzip.html) e [zipfile](https://docs.python.org/3/library/zipfile.html).

In [151]:
import gzip

with gzip.open('test.gz', 'w') as ouf:
    ouf.write('Questa è una prova'.encode('utf-8'))

In [152]:
! gzcat test.gz

Questa è una prova

In [176]:
from os import remove
from shutil import copyfileobj
from tempfile import NamedTemporaryFile
from urllib.request import urlopen

with urlopen('https://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip') as inf:
    with NamedTemporaryFile(delete = False) as tmp:
        copyfileobj(inf, tmp)

In [177]:
%%bash -s "$tmp.name"

unzip -l "$1"

Archive:  /var/folders/23/yqhr033n6rl4q23_2fry24j80000gn/T/tmpm8ubt2_8
  Length      Date    Time    Name
---------  ---------- -----   ----
   884736  11-29-2015 10:53   chinook.db
---------                     -------
   884736                     1 file


In [178]:
import zipfile

with zipfile.ZipFile(tmp.name) as inf:
    for entry in inf.infolist():
        if entry.filename.endswith('.db'):
            inf.extract(entry)

In [179]:
! ls *.db

chinook.db


In [182]:
try:
    remove(tmp.name)
except FileNotFoundError:
    pass

## DB

Da [sqlite3](https://docs.python.org/3/library/sqlite3.html); si veda l'[esempio](https://www.sqlitetutorial.net/sqlite-sample-database/) con [dati](https://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip) e [schema](https://www.sqlitetutorial.net/wp-content/uploads/2018/03/sqlite-sample-database-diagram-color.pdf).

In [253]:
import sqlite3

import sqlite3

conn = sqlite3.connect('chinook.db')
cur = conn.cursor()

In [254]:
for row in cur.execute('SELECT * FROM playlists WHERE Name LIKE ?', ['Classical%']):
    print(row)

(12, 'Classical')
(13, 'Classical 101 - Deep Cuts')
(14, 'Classical 101 - Next Steps')
(15, 'Classical 101 - The Basics')


In [260]:
cur.row_factory = sqlite3.Row

query = """
SELECT albums.Title as aTitle, playlists.Name as pName
    FROM playlists 
        JOIN playlist_track USING(PlaylistId) 
        JOIN tracks USING(TrackId) 
        JOIN albums USING(AlbumId) 
    WHERE playlists.Name LIKE 'Classical 101%'
    ORDER BY pName, aTitle
"""

prev = None
for row in cur.execute(query):
    pName = row["pName"]
    if pName != prev:
        if prev: print('\n')
        print(f'Playlist: {pName}\n')
        prev = pName
    print(f'\t{row["aTitle"]}')


Playlist: Classical 101 - Deep Cuts

	Bach: Orchestral Suites Nos. 1 - 4
	Bartok: Violin & Viola Concertos
	Beethoven: Symphony No. 6 'Pastoral' Etc.
	Charpentier: Divertissements, Airs & Concerts
	English Renaissance
	Great Recordings of the Century - Shubert: Schwanengesang, 4 Lieder
	Great Recordings of the Century: Paganini's 24 Caprices
	Górecki: Symphony No. 3
	J.S. Bach: Chaconne, Suite in E Minor, Partita in E Major & Prelude, Fugue and Allegro
	Koyaanisqatsi (Soundtrack from the Motion Picture)
	Liszt - 12 Études D'Execution Transcendante
	Locatelli: Concertos for Violin, Strings and Continuo, Vol. 3
	Mendelssohn: A Midsummer Night's Dream
	Monteverdi: L'Orfeo
	Mozart: Chamber Music
	Nielsen: The Six Symphonies
	Prokofiev: Symphony No.5 & Stravinksy: Le Sacre Du Printemps
	Purcell: Music for the Queen Mary
	Purcell: The Fairy Queen
	Respighi:Pines of Rome
	Schubert: The Late String Quartets & String Quintet (3 CD's)
	South American Getaway
	Szymanowski: Piano Works, Vol. 1
	Th

# Supporto allo sviluppo e debugging

Da [unittest](https://docs.python.org/3/library/unittest.html) e [timeit](https://docs.python.org/3/library/timeit.html).

Una libreria esterna: [tqdm](https://github.com/tqdm/tqdm).