### Integer

In [None]:
import sys

sys.getsizeof(0)  # overhead for creating an int obj

24

In [None]:
sys.getsizeof(1)  # uses an extra 4 bytes to store the int

28

In [None]:
# How much longer do large operations take?

from timeit import timeit

small = timeit('10 * 2', number=10_000_000)
large = timeit('2**100 * 2', number=10_000_000)

In [None]:
small, large

(0.09522910000009688, 5.9420910999999705)

In [None]:
# Operations
13 % 4, 13 % -4, -13 % 4, -13 % -4

(1, -3, 3, -1)

In [None]:
13 // 4, 13 // -4, -13 // 4, -13 // -4

(3, -4, -4, 3)

In [None]:
13 == 4 * (13 // 4) + 13 % 2  # generally b = a * (b // a) + (b % a), which helps make sense of the % values

True

In [None]:
bin(10), oct(10), hex(255)

('0b1010', '0o12', '0xff')

In [None]:
int("B", 16)

11

In [None]:
int("B", 11)

ValueError: invalid literal for int() with base 11: 'B'

In [None]:
# Represent an integer in any base
def from_base10(n, b):
        if b < 2:
            raise ValueError('Base b must be >= 2')
        if n < 0:
            raise ValueError('Number n must be > 0')
        if n == 0:
            return [0]
        digits = []
        while n > 0:
            m = n % b
            n = n // b  # could also say n,m = divmod(n,b)
            digits.insert(0, m)
        return digits

In [None]:
from_base10(10, 2)

[1, 0, 1, 0]

In [None]:
def encode(digits, digit_map):
    if max(digits) >= len(digit_map):
        raise ValueError('digit_map is not long enough to encode the digits')
    return ''.join(digit_map[d] for d in digits)

In [None]:
encode([15, 15], '0123456789ABCDEF')

'FF'

In [None]:
import string

def rebase_from10(num, base):
    digit_map = '0123456789' + string.ascii_uppercase
    if base < 2 or base > 36:
        raise ValueError('Invalid base: 2 <= base <= 36')
    sign = -1 if num < 0 else 1
    num *= sign
    digits = from_base10(num, base)
    encoding = encode(digits, digit_map)
    if sign == -1:
        encoding = '-' + encoding
    return encoding

In [None]:
rebased = rebase_from10(10, 2)
rebased, int(rebased, 2)

('1010', 10)

In [None]:
rebased = rebase_from10(-314, 2)
rebased, int(rebased, 2)

('-100111010', -314)

### Fractions

In [None]:
from fractions import Fraction

In [None]:
Fraction(1,2), Fraction(0.125), Fraction('0.125'), Fraction(22, 7)

(Fraction(1, 2), Fraction(1, 8), Fraction(1, 8), Fraction(22, 7))

In [None]:
x = Fraction(1,4)
x.numerator, x.denominator

(1, 4)

In [None]:
# Floats (even irrational ones) are represented as rational numbers due to memory constraints
import math
x = Fraction(math.pi)
x, float(x)

(Fraction(884279719003555, 281474976710656), 3.141592653589793)

In [None]:
a = 0.125
b = 0.3
a, Fraction(a), b, Fraction(b)  # 0.3 doesn't play nicely

(0.125, Fraction(1, 8), 0.3, Fraction(5404319552844595, 18014398509481984))

In [None]:
format(b, '0.5f'), format(b, '0.15f'), format(b, '0.25f')

('0.30000', '0.300000000000000', '0.2999999999999999888977698')

In [None]:
# Limit the denom to get an appx of a float
x = Fraction(0.3)
x.limit_denominator(10)

Fraction(3, 10)

In [None]:
Fraction(math.pi).limit_denominator(10)

Fraction(22, 7)

In [None]:
math.isclose(math.pi, Fraction(math.pi)), math.isclose(math.pi, Fraction(22,7))

(True, False)

### Floats
- Python (CPython) floats are implemented using the C double type which (usually) implements the IEEE 754 double-precision binary float, also called binary64
    - sign -> 1 bit (0 = positive, 1 = negative)
    - exponent -> 11 bits (-1022, 1023)
    - significant digits -> 52 bits -> 15-17 significant (base-10) digits
- Floats use a fixed number of bytes - 8 bytes, 64 bits

In [None]:
float(10), float('10.5')

(10.0, 10.5)

In [None]:
float(Fraction('22/7'))

3.142857142857143

In [None]:
format(0.1, '.25f')  # blegh

'0.1000000000000000055511151'

In [None]:
a = 0.1 * 3
b = 0.3
a == b

False

In [None]:
format(a, '.25f'), format(b, '.25f')

('0.3000000000000000444089210', '0.2999999999999999888977698')

In [None]:
# Equality testing of floats - occur bc we represent decimal #s as binary in the backend
# - absolute tolerance when 2 nums are close to 0, otherwise use relative tolerance
from math import isclose
help(isclose)  # default value for abs_tol is 0, meaning you should specify when the 2 nums being compared are close to 0

Help on built-in function isclose in module math:

isclose(a, b, *, rel_tol=1e-09, abs_tol=0.0)
    Determine whether two floating point numbers are close in value.
    
      rel_tol
        maximum difference for being considered "close", relative to the
        magnitude of the input values
      abs_tol
        maximum difference for being considered "close", regardless of the
        magnitude of the input values
    
    Return True if a is close in value to b, and False otherwise.
    
    For the values to be considered close, the difference between them
    must be smaller than at least one of the tolerances.
    
    -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
    is, NaN is not close to anything, even itself.  inf and -inf are
    only close to themselves.



In [None]:
x = 1000.0000001
y = 1000.0000002
isclose(x,y)

True

In [None]:
x = 0.0000001
y = 0.0000002
isclose(x,y), isclose(x,y,abs_tol=1e-5)

(False, True)

In [None]:
isclose(0.01, 0.02, rel_tol=1e-5, abs_tol=1e-5)

False

### Rounding

In [None]:
a = round(1.9)
a, type(a)

(2, int)

In [None]:
a = round(1.9, 0)  # keeps the type
a, type(a)

(2.0, float)

In [None]:
round(1.888, 3)  # round to closest multiple of 1e-3

1.888

In [None]:
round(1.888, 2)

1.89

In [None]:
round(12345, 1), round(12345, 0), round(12345, -1), round(12345, -2), round(12345, -3), round(12345, -4), round(12345, -5)  # round to closest multiple of 1e-n

(12345, 12345, 12340, 12300, 12000, 10000, 0)

In [None]:
# Ties
round(1.25, 1), round(1.35, 1)  # might expect 1.3 and 1.4
# Python (and many other languages) implements Banker's rounding - finds the closest least significant digit that is EVEN to reduce bias from roundingb

(1.2, 1.4)

In [None]:
round(-1.25, 1), round(-1.35, 1)

(-1.2, -1.4)

In [None]:
(0.5 + 1.5 + 2.5) / 3, (1 + 2 + 3) / 3, (round(0.5) + round(1.5) + round(2.5)) / 3
# 1.33 is closer to 1.5 (actual avg) than 2 ("away from zero rounding" avg). When dealing with lots of transactions, these differences add up (think interest rates), so the less biased solution is preferred

(1.5, 2.0, 1.3333333333333333)

In [None]:
# Implement a custom "away from zero" rounding algo since it's not built in for floats
def _round(x):
    from math import copysign
    return int(x + 0.5 * copysign(1, x))

In [None]:
round(1.5), _round(1.5)

(2, 2)

In [None]:
round(2.5), _round(2.5)

(2, 3)

In [None]:
round(-1.5), _round(-1.5)

(-2, -2)

In [None]:
round(-2.5), _round(-2.5)

(-2, -3)

### Decimals
Who cares? Think of 2-6B transactions (appx # in NYSE daily) of $100.01. The difference between $100.01 and the binary representation of $100.01 is over $1000 DAILY! This is a problem

In [None]:
import decimal
from decimal import Decimal

In [None]:
decimal.getcontext()

Context(prec=28, rounding=ROUND_HALF_EVEN, Emin=-999999, Emax=999999, capitals=1, clamp=0, flags=[], traps=[InvalidOperation, DivisionByZero, Overflow])

In [None]:
# Create new context and return it (context manager)
type(decimal.localcontext())

decimal.ContextManager

In [None]:
type(decimal.getcontext())

decimal.Context

In [None]:
with decimal.localcontext() as ctx:
    ctx.prec = 6
    ctx.rounding = decimal.ROUND_HALF_UP
    print(ctx)
    print(decimal.getcontext())  # local ctx = global ctx in with block

Context(prec=6, rounding=ROUND_HALF_UP, Emin=-999999, Emax=999999, capitals=1, clamp=0, flags=[], traps=[InvalidOperation, DivisionByZero, Overflow])
Context(prec=6, rounding=ROUND_HALF_UP, Emin=-999999, Emax=999999, capitals=1, clamp=0, flags=[], traps=[InvalidOperation, DivisionByZero, Overflow])


In [None]:
x = Decimal('1.25')
y = Decimal('1.35')

with decimal.localcontext() as ctx:
    ctx.prec = 6
    ctx.rounding = decimal.ROUND_HALF_UP
    print(round(x, 1), round(y,1))
print(round(x, 1), round(y,1))  # ROUND_HALF_EVEN in global context

1.3 1.4
1.2 1.4


#### Decimal Constructors and Contexts

In [None]:
# Using strings
Decimal(10), Decimal(-10), Decimal('10.1'), Decimal('-3.1415')

(Decimal('10'), Decimal('-10'), Decimal('10.1'), Decimal('-3.1415'))

In [None]:
# Using tuples
t = (0, (3,1,4,1,5), -4)  # sign, 0 = positive, 1 = negative | val | exponent (1e<exp>)
Decimal(t)

Decimal('3.1415')

In [None]:
# Stay away from floats
format(0.1, '.25f'), Decimal(0.1)

('0.1000000000000000055511151',
 Decimal('0.1000000000000000055511151231257827021181583404541015625'))

In [None]:
Decimal(0.1) == Decimal('0.1')

False

In [None]:
# Adding in contexts
decimal.getcontext().prec = 2
a = Decimal('0.12345')
b = Decimal('0.12345')
a,b

(Decimal('0.12345'), Decimal('0.12345'))

In [None]:
0.12345 + 0.12345, a + b  # a and b are stored and operated on without their precision, but the result retains the rounding

(0.2469, Decimal('0.25'))

In [None]:
decimal.getcontext().prec = 6
a = Decimal('0.12345')
b = Decimal('0.12345')

print(a+b)
with decimal.localcontext() as ctx:
    ctx.prec = 2
    c = a + b
    print(f'c within local context: {c}')
print(f'c within global context: {c}')  # c was created in local context and retains its value regardless of future context

0.24690
c within local context: 0.25
c within global context: 0.25


### Decimal Math Operations

In [None]:
import decimal
from decimal import Decimal

In [None]:
# // and % - n = d * (n // d) + (n % d) still holds!
x = 10
y = 3
print(divmod(x,y))
print(x == y * (x // y) + (x % y))

(3, 1)
True


In [None]:
x = Decimal(10)
y = Decimal(3)
print(divmod(x,y))
print(x == y * (x // y) + (x % y))  # same results for +ve #s

(Decimal('3'), Decimal('1'))
True


In [None]:
x = -10
y = 3
print(divmod(x,y))
print(x == y * (x // y) + (x % y))

x = Decimal(-10)
y = Decimal(3)
print(divmod(x,y))
print(x == y * (x // y) + (x % y))
# eqn still holds even though decimal div performs truncation while regular div "overshoots" negative #s

(-4, 2)
True
(Decimal('-3'), Decimal('-1'))
True


In [None]:
# Other math ops
a = Decimal('1.5')
a.ln(), a.exp(), a.sqrt()

(Decimal('0.4054651081081643819780131155'),
 Decimal('4.481689070338064822602055460'),
 Decimal('1.224744871391589049098642037'))

In [None]:
import math
a = Decimal('0.1')
a.sqrt(), math.sqrt(a)  # decimal built-in operators yield different results than those from the math module. This is bc the math module will convert 1.5 to a float before performing the calculation

(Decimal('0.3162277660168379331998893544'), 0.31622776601683794)

In [None]:
x = 2
x_dec = Decimal('2')
root_float = math.sqrt(x)
root_mixed = math.sqrt(x_dec)
root_dec = x_dec.sqrt()

In [None]:
print(format(root_float, '1.27f'))  # converted to float
print(format(root_mixed, '1.27f'))  # converted to float
print(format(root_dec, '1.27f'))  # uses decimal precision

1.414213562373095145474621859
1.414213562373095145474621859
1.414213562373095048801688724


In [None]:
print(format(root_float * root_float, '1.27f'))
print(format(root_mixed * root_mixed, '1.27f'))
print(format(root_dec * root_dec, '1.27f'))  # decimal "squaring" of irrational sqrt(2) is much closer to 2 than float "squaring"

2.000000000000000444089209850
2.000000000000000444089209850
1.999999999999999999999999999


In [None]:
x = 0.01
x_dec = Decimal('0.01')
root_float = math.sqrt(x)
root_mixed = math.sqrt(x_dec)
root_dec = x_dec.sqrt()

print(format(root_float, '1.27f'))
print(format(root_mixed, '1.27f'))
print(format(root_dec, '1.27f'))  # decimal is precise, floats are not

0.100000000000000005551115123
0.100000000000000005551115123
0.100000000000000000000000000


In [None]:
print(format(root_float * root_float, '1.27f'))
print(format(root_mixed * root_mixed, '1.27f'))
print(format(root_dec * root_dec, '1.27f'))  # decimal squaring of 0.1 yields 0.01 exactly - more precise

0.010000000000000001942890293
0.010000000000000001942890293
0.010000000000000000000000000


### Drawbacks to `Decimal` vs `float`
1. Not as easy to code - construction via strings or tuples
2. Not all math funcs exist in decimal class
3. More memory overhead
4. Performance is slower than floats

If you're not worried about precision, then floats are a better choice

In [None]:
from decimal import Decimal
import sys

a = 3.1415
b = Decimal('3.1415')
sys.getsizeof(a), sys.getsizeof(b)  # ~4x more memory for decimals than floats

(24, 104)

In [None]:
from timeit import timeit

float_time = timeit('a = 3.1415', number=10_000_000)
dec_time = timeit('a = Decimal("3.1415")', globals=globals(), number=10_000_000)

In [None]:
'float time:', float_time, 'decimal time:', dec_time  # much slower to create floats - almost 20x

('float time:', 0.20122139999989486, 'decimal time:', 3.8898443000000498)

In [None]:
a = 3.1415
b = Decimal('3.1415')

float_time = timeit('a + a', globals=globals(), number=10_000_000)
dec_time = timeit('b + b', globals=globals(), number=10_000_000)

In [None]:
'float time:', float_time, 'decimal time:', dec_time  # 2x slower for decimal addition

('float time:', 0.5785249999998996, 'decimal time:', 0.940302900000006)

In [None]:
import math

a = 3.1415
b = Decimal('3.1415')

float_time = timeit('math.sqrt(a)', globals=globals(), number=1_000_000)
dec_time = timeit('b.sqrt()', globals=globals(), number=1_000_000)

In [None]:
'float time:', float_time, 'decimal time:', dec_time  # over 10x slower

('float time:', 0.19302990000005593, 'decimal time:', 3.090389700000287)

### Booleans

In [None]:
issubclass(bool, int)  # bool inherits all properties from int and implements more

True

In [None]:
type(bool) == type(int)

True

In [None]:
(1 < 3) is True  # True and False are singleton objects - they retain the same memory address for the lifetime of the code

True

In [None]:
int(True), int(False)

(1, 0)

In [None]:
True + True + True

3

In [None]:
bool(0), bool(1)  # look at truthyness of values. All python objects have a truthyness value

(False, True)

In [None]:
bool(None), bool(False), bool(0), bool('')  # special cases where __bool__(self) returns False. If __bool__ is not found, __len__ is executed. Otherwise it will return True

(False, False, False, False)

In [None]:
my_list = []
if my_list:  # checks truthyness of my_list -> my_list.__bool__() is not implemented, so truthyness is determined by my_list.__len__(), which returns 0, evaluating to falsey
    print('my list is truthy')
else:
    print('my list is falsey')

my list is falsey


In [None]:
my_list.__bool__()

AttributeError: 'list' object has no attribute '__bool__'

In [None]:
my_list.__len__() == bool(my_list)

True

### Precedence and Short-Circuiting
not --> and --> or

In [None]:
True or True and False, True or (True and False), (True or True) and False

(True, True, False)

In [None]:
True or 0, False and 1  # no need to evaluate the remainder of the expression once truthyness is known from the first term

(True, False)

In [None]:
import string

name = 'Bob'
if name and name[0] in string.digits:
    raise ValueError('Name can not start with a digit')

In [None]:
name = '99Bobs'
if name and name[0] in string.digits:
    raise ValueError('Name can not start with a digit')

ValueError: Name can not start with a digit

In [None]:
name = ''  # name.__len__() == 0 => falsy
if name and name[0] in string.digits:
    raise ValueError('Name can not start with a digit')

In [None]:
type(2 or 5)

int

### Boolean Operators

In [None]:
# X or Y: if X is truthy -> return X, otherwise evaluate Y and return it
'a' or [1,2], '' or [1,2]

('a', [1, 2])

In [None]:
1 or 1/0  # short-circuit the div by zero error

1

In [None]:
0 or 1/0

ZeroDivisionError: division by zero

In [None]:
s1 = None  # string coming from DB
s2 = ''
s3 = 'abc'

s1 = s1 or 'n/a'
s2 = s2 or 'n/a'
s3 = s3 or 'n/a'

s1, s2, s3

('n/a', 'n/a', 'abc')

In [None]:
[] or [0], None or [0]  # great way to add default vals

([0], [0])

In [None]:
# X and Y: if X is falsy -> return X, otherwise evaluate and return Y
None and 100, [] and [0]

(None, [])

In [None]:
5 and 1/5, 0 and 1/0

(0.2, 0)

In [None]:
s1 = None  # string coming from DB
s2 = ''
s3 = 'abc'

s1 and s1[0], s2 and s2[0], s3 and s3[0]  # but we don't want None or ''

(None, '', 'a')

In [None]:
# So we use the or statement to set a default value
s1 and s1[0] or 'n/a', s2 and s2[0] or 'n/a', s3 and s3[0] or 'n/a'

In [None]:
# not
not(True), not(False)

(False, True)

### Chained Comparisons

In [None]:
from fractions import Fraction
from decimal import Decimal

1.5 == Fraction('1.5') == Decimal('1.5')

True

In [None]:
1 < 2 < 3 < 4

True