# Hello Python

I put some examples of usage of various Python modules. Note that this is just something for me to quickly refresh my memory, you should just read the documentation for explanations and full coverage of module capabilities. 

# Input / Output

In [1]:
import sys

print("Don't talk to strangers")
print("Don't talk to strangers", end='\n\n')
print("Don't", "talk", "to", "strangers", sep=', ')
print("Don't talk to strangers", file=sys.stdout, flush=True)

Don't talk to strangers
Don't talk to strangers

Don't, talk, to, strangers
Don't talk to strangers


In [2]:
import math

print('{1} and {0} and {1}'.format('spam', 'eggs')) # use duplicate indices
print('This {food} is {adjective}. Also {1} + {0} = ?'.format( # pass kwargs and use their keys
    1, 3, food='spam', adjective='absolutely horrible'
))
print('{0:.3f}, {0:8.3f}, '.format(10 * math.pi)) # {key:width.precisionf}

print('%8.3f.' % (10 * math.pi))# old printf style

eggs and spam and eggs
This spam is absolutely horrible. Also 3 + 1 = ?
31.416,   31.416, 
  31.416.


In [3]:
a = 10
rest = [1, 2, 3]
print(f"Since 3.6 Python has format strings! Use values from the context like this a={2**a} rest={rest}")

Since 3.6 Python has format strings! Use values from the context like this a=1024 rest=[1, 2, 3]


In [4]:
with open('garden.txt') as opened:
    content = opened.read() # reads everything as one string
    chars = len(content)
    print(chars, content[2:10])

with open('garden.txt') as opened:
    lines, chars = 0, 0
    line = opened.readline()
    while line:
        lines += 1
        chars += len(line)
        line = opened.readline()
    print(chars, lines)

with open('garden.txt') as opened:
    lines, chars = 0, 0
    for line in opened:
        lines += 1
        chars += len(line)
    print(chars, lines)

with open('garden.txt') as opened: # this is the idiomatic way if you have no special requirements
    lines, chars = 0, 0
    for line in opened: # this is memory efficient since it is a generator
        lines += 1
        chars += len(line)
    print(chars, lines)
    
with open('garden.txt') as opened:
    content = opened.readlines() # reads everything split into lines
    lines = len(content)
    chars = sum(map(len, content))
    print(chars, lines)

440 om too m
440 16
440 16
440 16
440 16


# Regexes and Globs

In [5]:
import re

regex_string = r'[^abc]\d{2}' # r - raw string notation, no need to escape backslashes \\d
print(type(regex_string), regex_string) # still a string, not compiled

compiled_regex = re.compile(regex_string)
print(type(compiled_regex), compiled_regex)

print('match')
print(re.match(regex_string, 'a12 p54 b23'))
print(re.match(regex_string, 'p54 b23')) # matches prefix

print('fullmatch')
print(re.fullmatch(regex_string, 'a12 p54 b23'))
print(re.fullmatch(regex_string, 'p54 b23'))
print(re.fullmatch(regex_string, 'p54'))  # only matches full string

print('search')
print(re.search(regex_string, 'a12 C32 p54 b23 h99 y11')) # matches anywhere
print(re.search(regex_string, 'a12 C32 p54 b23 h99 y11', flags=re.I)) 
# many useful flags, passed to function or to compile
# match: group() returns string, start() end() and span() return positions of match

print('findall')
print(re.findall(regex_string, 'a12 c32 p54 b23 h99 y11'))

print('finditer')
print(re.finditer(regex_string, 'a12 c32 p54 b23 h99 y11'))
print('First: ', next(re.finditer(regex_string, 'a12 c32 p54 b23 h99 y11')))

print('split')
print(re.split(r'\s', 'a12 c32  p54 b23\th99 y11', maxsplit=5))

print('sub')
print(re.sub(r'\d', '_', 'a12 c32 p54 b23 h99 y11', count=9))

<class 'str'> [^abc]\d{2}
<class '_sre.SRE_Pattern'> re.compile('[^abc]\\d{2}')
match
None
<_sre.SRE_Match object; span=(0, 3), match='p54'>
fullmatch
None
None
<_sre.SRE_Match object; span=(0, 3), match='p54'>
search
<_sre.SRE_Match object; span=(4, 7), match='C32'>
<_sre.SRE_Match object; span=(8, 11), match='p54'>
findall
['p54', 'h99', 'y11']
finditer
<callable_iterator object at 0x7fdfc6763e48>
First:  <_sre.SRE_Match object; span=(8, 11), match='p54'>
split
['a12', 'c32', '', 'p54', 'b23', 'h99 y11']
sub
a__ c__ p__ b__ h_9 y11


In [6]:
# Useful regex concept:

print(re.match('0\d*0', '012034056')) # greedy * matches all it can '0120340'
print(re.match('0\d*?0', '012034056')) # lazy *? matches as few characters as possible '0120'

# Also those, but better refer to docs when there is a need
# \w - unicode word characted 
# \s - whitespace
# \d digit
# \b - beginning and end of a word

# \W \S - matches all that the lowercase version does not match

<_sre.SRE_Match object; span=(0, 7), match='0120340'>
<_sre.SRE_Match object; span=(0, 4), match='0120'>


In [7]:
import glob

print(glob.glob('*.txt'))

# by the way, python globs do not support recursive ** !! Use glob2 package 

['garden.txt']


# Exceptions

In [8]:
class CustomException(Exception): # must be a subclass of Exception
    pass

class NotAException:
    pass

# raise CustomException('everything will be fire') # CustomException: everything will be fire
# raise NotAException # TypeError: exceptions must derive from BaseException

In [9]:
try:
    pass
#     raise OSError('bad os')
#     raise TypeError('bad type')
#     raise ValueError('bad stuff')
except OSError as err: # bind exception to variable
    print("OS error: {0}".format(err))
    raise ValueError from err # rethrow, sets __cause__, will get different message (cause vs while handling)
except (RuntimeError, TypeError, NameError): # multiple
    print("Could not convert data to an integer.")
    raise ValueError from None # throws but clears traceback and avoids 
                               # 'During handling of _, another exception occurred'
except: # handle all other exceptions
    print("Unexpected error:", sys.exc_info()[0])
    raise # rethrows without adding anything to traceback
else:
    print('Everything went okay')
finally:
    print('Always excecute, cleanup')

Everything went okay
Always excecute, cleanup


In [10]:
def wonder_what_will_be_returned():
    try:
        raise ValueError
    except ValueError:
#         return 10 
        raise # does not matter if we return or rethrow here, finally overrides the result
    else:
        return 20
    finally:
        return 30 # <- this one

print(wonder_what_will_be_returned())

30


# Random

In [11]:
import random

random.seed(13) # global seed

print(random.randrange(9, 18, 3)) # 9, 12, 15 Not including 18, just like range
print(random.randint(10, 20)) # [10, 20] inclusive

chars = ['a', 'b', 'c', 'd', 'e']
print(random.choice(chars))
print(random.sample(chars, 3))
random.shuffle(chars)
print(chars)

print(random.random()) # uniform [0.0, 1.0)
print(random.uniform(-2.0, 3.0)) # uniform [a, b]
print(random.normalvariate(5.0, 1.0))
# other like triangular(low, high, mode), gammavariate(alpha, beta), weibullvariate(alpha, beta)

12
14
b
['b', 'e', 'a']
['d', 'c', 'a', 'e', 'b']
0.7446921713252124
-1.8501301023520411
2.970174481495891


# Datetime

In [12]:
import datetime as dt

# date, time, datetime (subclass of data hmm), timedelta, tzinfo, timezone
# date always naive, time/datetime naive or aware (pass tzinfo during creation)
# year must be in [0, 9999], month in [1, 12], day [1, month length], hour [0, 23], minute [0, 59] etc

print('date')
date1 = dt.date(year=2010, month=12, day=31)
print(date1)
date2 = dt.date(1970, 1, 1)
print(date2)

print(date1.year, date1.month, date1.day, sep='; ')
print(dt.date.today(), dt.date.fromtimestamp(1000), dt.date.min, dt.date.max, dt.date.resolution)

print('\ntime')
time1 = dt.time(hour=23, minute=59, second=59, microsecond=23412, tzinfo=None, fold=0)
print(time1)
time2 = dt.time(minute=59, second=59, tzinfo=dt.timezone.utc) # all optional
print(time2)
print(time1.hour, time1.minute, time1.second, time1.microsecond, time1.tzinfo, time1.fold)
print(dt.time, dt.time.min, dt.time.max)
print(time1.replace(minute=11, microsecond=0))

print('\ndatetime')
datetime1 = dt.datetime(year=2010, month=12, day=31, hour=23, minute=59, 
                        second=59, microsecond=999, tzinfo=None, fold=0) 
# fold matters when hour changes due to dst, 0 = the earlier 2am, 1 = the latter (after the clock rollback)
print(datetime1)
datetime2 = dt.datetime(year=2010, month=6, day=4, minute=30) # optional starting from hour
datetime3 = dt.datetime(year=2010, month=12, day=31, minute=30, tzinfo=dt.timezone.utc) # aware of tz when tz is not None

print(datetime1.year, datetime1.month, datetime1.day, datetime1.hour, datetime1.minute, sep='; ')
print(dt.datetime.today(), dt.datetime.now(dt.timezone.utc), dt.datetime.utcnow()) # today - naive local, utcnow - naive utc
print(datetime1.replace(month=7, hour=11))
print(datetime1.date(), datetime1.time(), sep=' and ')
print(dt.datetime.combine(date1, time1)) # keeps tzinfo from time

date
2010-12-31
1970-01-01
2010; 12; 31
2017-11-03 1970-01-01 0001-01-01 9999-12-31 1 day, 0:00:00

time
23:59:59.023412
00:59:59+00:00
23 59 59 23412 None 0
<class 'datetime.time'> 00:00:00 23:59:59.999999
23:11:59

datetime
2010-12-31 23:59:59.000999
2010; 12; 31; 23; 59
2017-11-03 02:00:08.619398 2017-11-03 01:00:08.619405+00:00 2017-11-03 01:00:08.619409
2010-07-31 11:59:59.000999
2010-12-31 and 23:59:59.000999
2010-12-31 23:59:59.023412


In [13]:
td0 = dt.timedelta(weeks=1, days=2, hours=3, minutes=4, seconds=5, milliseconds=6, microseconds=7)
print(td0)
td1 = dt.timedelta(days=12, minutes=64) # wraps
print(td1)
td2 = dt.timedelta(days=12, minutes=-64) # subtracts
print(td2)
print(td0 + td1, td0 / 2, -td2, sep='; ')

print(date1 - date2, type(date1 - date2))

print(datetime1 - datetime2, type(datetime1 - datetime2))
# print(datetime1 - datetime3) # can only perform operations when both arguments aware or both naive

9 days, 3:04:05.006007
12 days, 1:04:00
11 days, 22:56:00
21 days, 4:08:05.006007; 4 days, 13:32:02.503004; -12 days, 1:04:00
14974 days, 0:00:00 <class 'datetime.timedelta'>
210 days, 23:29:59.000999 <class 'datetime.timedelta'>


In [14]:
import pytz # additional package from pip!

zone1 = dt.timezone.utc
zone2 = pytz.timezone('US/Eastern')
zone3 = pytz.timezone('Europe/Amsterdam')
zone4 = pytz.timezone('Asia/Samarkand')

for zone in (zone1, zone2, zone3, zone4):
    print(zone.utcoffset(datetime1), zone.dst(datetime1), zone.tzname(datetime1), sep='; ')

0:00:00; None; UTC
-1 day, 19:00:00; 0:00:00; EST
1:00:00; 0:00:00; CET
5:00:00; 0:00:00; +05


In [15]:
print(date1.isoformat(), date1.ctime(), date1.strftime('%Y %m %d'), sep='; ')
print(datetime1.isoformat(), datetime1.ctime(), datetime1.strftime('%Y %m %d %H %M %s'), sep='; ')
print(time1.isoformat(), time1.strftime('%H %M %s'), sep='; ')

parsed_dt = dt.datetime.strptime('1993-10-07 11:22', '%Y-%m-%d %H:%M')
print(parsed_dt)

2010-12-31; Fri Dec 31 00:00:00 2010; 2010 12 31
2010-12-31T23:59:59.000999; Fri Dec 31 23:59:59 2010; 2010 12 31 23 59 1293836399
23:59:59.023412; 23 59 -2208907441
1993-10-07 11:22:00


# Walking directories

In [16]:
import os

print(os.listdir(path='.ipynb_checkpoints')) # list contents of a directory

with os.scandir(path='.ipynb_checkpoints') as entries: # returns an iterator of os.DirEntry
    print(*entries) 

for dirpath, dnames, fnames in os.walk(".ipynb_checkpoints"): # recursive walk
    if dirpath.startswith('.'):
        for fname in fnames:
            fpath = os.path.join(dirpath, fname) # os independent join
            print(fpath)
            
# there are also:
# os.mkdir os.makedirs os.remove os.removedir os.removedirs os.rename os.renames os.chmod os.chown os.chdir

['HelloFlask-checkpoint.ipynb', 'HelloSqlAlchemy-checkpoint.ipynb', 'HelloPython-checkpoint.ipynb', 'HelloClasses-checkpoint.ipynb', 'HelloVisualizations-checkpoint.ipynb', 'HelloDecorators-checkpoint.ipynb']
<DirEntry 'HelloFlask-checkpoint.ipynb'> <DirEntry 'HelloSqlAlchemy-checkpoint.ipynb'> <DirEntry 'HelloPython-checkpoint.ipynb'> <DirEntry 'HelloClasses-checkpoint.ipynb'> <DirEntry 'HelloVisualizations-checkpoint.ipynb'> <DirEntry 'HelloDecorators-checkpoint.ipynb'>
.ipynb_checkpoints/HelloFlask-checkpoint.ipynb
.ipynb_checkpoints/HelloSqlAlchemy-checkpoint.ipynb
.ipynb_checkpoints/HelloPython-checkpoint.ipynb
.ipynb_checkpoints/HelloClasses-checkpoint.ipynb
.ipynb_checkpoints/HelloVisualizations-checkpoint.ipynb
.ipynb_checkpoints/HelloDecorators-checkpoint.ipynb


In [17]:
statinfo = os.stat('HelloPython.ipynb')
print(statinfo)
print(statinfo.st_mode, statinfo.st_dev, statinfo.st_uid, statinfo.st_gid, statinfo.st_size, 
      statinfo.st_atime, statinfo.st_mtime, statinfo.st_ctime) # a lot, really

os.stat_result(st_mode=33188, st_ino=4589749, st_dev=2049, st_nlink=1, st_uid=1000, st_gid=1000, st_size=26797, st_atime=1509401183, st_mtime=1509670803, st_ctime=1509670803)
33188 2049 1000 1000 26797 1509401183.8266475 1509670803.1317866 1509670803.1317866


In [18]:
with os.scandir(path='.ipynb_checkpoints') as entries: # returns an iterator of os.DirEntry
    entry = next(entries)
    print(entry)
    print(entry.name, entry.path, entry.is_dir(), entry.is_file(), entry.is_symlink())
    print(entry.stat())

<DirEntry 'HelloFlask-checkpoint.ipynb'>
HelloFlask-checkpoint.ipynb .ipynb_checkpoints/HelloFlask-checkpoint.ipynb False True False
os.stat_result(st_mode=33188, st_ino=11668329, st_dev=2049, st_nlink=1, st_uid=1000, st_gid=1000, st_size=1159, st_atime=1509666881, st_mtime=1509666887, st_ctime=1509666887)


In [19]:
print(os.path.abspath('../yo'))
print(os.path.dirname('hi/hey/ho'))
print(os.path.exists('../yo'))
print(os.path.expanduser('~/.'))
print(os.path.join('hey', 'yo', 'ho.txt')) 
print(os.path.normpath('yo/bro/./hey/../../ho.txt')) 
print(os.path.realpath('yo/bro/./hey/../../ho.txt')) 
print(os.path.relpath('/where/are/you')) 
print(os.path.getatime('HelloPython.ipynb')) # or FileNotFoundError
print(os.path.getsize('HelloPython.ipynb')) 
print(os.path.isfile('HelloPython.ipynb')) 
print(os.path.isdir('HelloPython.ipynb')) 

/usr/delegated/michal/Workspace/PycharmProjects/yo
hi/hey
False
/home/michal/.
hey/yo/ho.txt
yo/ho.txt
/usr/delegated/michal/Workspace/PycharmProjects/hello-python/yo/ho.txt
../../../../../../where/are/you
1509401183.8266475
26797
True
False


# Compression

# Json

In [20]:
import json

doc = '["foo", {"bar":["baz", null, 1.0, 2], "moo": true}]'
doc_loaded = json.loads(doc)
print(type(doc_loaded), doc_loaded)
doc_dumped = json.dumps(doc_loaded, indent=2, sort_keys=True)
print(type(doc_dumped), doc_dumped)

<class 'list'> ['foo', {'bar': ['baz', None, 1.0, 2], 'moo': True}]
<class 'str'> [
  "foo",
  {
    "bar": [
      "baz",
      null,
      1.0,
      2
    ],
    "moo": true
  }
]


In [21]:
import io # by the way this is a Python module for in memory streams

with io.StringIO(doc) as opened:
    doc_loaded2 = json.load(opened)
print(type(doc_loaded2), doc_loaded2)

with io.StringIO() as opened:
    json.dump(doc_loaded2, opened, indent=4)
    doc_dumped2 = opened.getvalue()
print(type(doc_dumped2), doc_dumped2)

<class 'list'> ['foo', {'bar': ['baz', None, 1.0, 2], 'moo': True}]
<class 'str'> [
    "foo",
    {
        "bar": [
            "baz",
            null,
            1.0,
            2
        ],
        "moo": true
    }
]


# Concurrency

# Logging

# Pickles

# Internationalization

In [22]:
import gettext



In [23]:
import locale # not too exciting

locale.setlocale(locale.LC_ALL, '')

print(*(locale.nl_langinfo(option) for option in [locale.CODESET, locale.DAY_1, locale.MON_1, locale.THOUSEP]))

print(locale.getdefaultlocale())
print(locale.currency(100.555))
print(locale.str(1005.3352))

UTF-8 Sunday January ,
('en_US', 'UTF-8')
$100.56
1005.3352
