# Laboratory 04, Week 05

## 1. `RationalNumber` class

Write a class that represents a rational number. A number is rational if it is can be expressed as the quotient of two integers (p and q). Define the operators seen in the tests below.

Make sure that p and q are always relative primes (you can use `math.gcd`).

In [52]:
import re
from math import gcd


class RationalNumberValueError(ValueError):
    pass


class RationalNumber(object):
    rat_pat = re.compile(r'(?P<psign>-)?\s*(?P<p>\d+)(?P<den>\s*/\s*(?P<qsign>-)?\s*(?P<q>\d+))?')
    
    def __init__(self, p, q=1):
        if q < 0:
            p, q, = -p, -q
        try:
            d = gcd(p, q)
        except TypeError:
            raise RationalNumberValueError("TODO")
        self._p, self._q = p // d, q // d
    
    @property
    def p(self):
        return self._p
    
    @p.setter
    def p(self, newp):
        if type(newp) is not int:
            raise RationalNumberValueError("TODO")
        d = gcd(newp, self._q)
        self._p = newp // d
        self._q = self._q // d
        
    @property
    def q(self):
        return self._q
    
    @q.setter
    def q(self, newq):
        if type(newq) is not int:
            raise RationalNumberValueError("TODO")
        if q < 0:
            p, q = -p, -q
        d = gcd(self._p, newq)
        self._p = self._p // d
        self._q = newq // d
    
    def __add__(self, other):
        if isinstance(other, int):
            other = RationalNumber(other, 1)
        elif isinstance(other, RationalNumber):
            pass
        else:
            raise TypeError("TODO")
        p = self._p * other._q + other._p * self._q
        q = self._q * other._q
        d = gcd(p, q)
        return RationalNumber(p // d, q // d)
        
    def __mul__(self, other):
        if isinstance(other, int):
            other = RationalNumber(other, 1)
        elif isinstance(other, RationalNumber):
            pass
        else:
            raise TypeError("TODO")
        p = self._p * other._p
        q = self._q * other._q
        d = gcd(p, q)
        return RationalNumber(p // d, q // d)
        
    def __truediv__(self, other):
        if isinstance(other, int):
            other = RationalNumber(other, 1)
        elif isinstance(other, RationalNumber):
            pass
        else:
            raise TypeError("TODO")
        p = self._p * other._q
        q = self._q * other._p
        d = gcd(p, q)
        return RationalNumber(p // d, q // d)
        
    def __eq__(self, other):
        if isinstance(other, int):
            other = RationalNumber(other, 1)
        elif isinstance(other, RationalNumber):
            pass
        else:
            raise TypeError("TODO")
        return (self._p == other._p) and (self._q == other._q)
        
    def __hash__(self):
        return hash((self._p, self._q))
    
    def __abs__(self):
        return abs(self._p / self._q)
    
    @staticmethod
    def from_str(s):
        rat_mtc = RationalNumber.rat_pat.match(s)
        if rat_mtc is None:
            return None
        rat = rat_mtc.groupdict()
        if rat['psign'] is None:
                p = int(rat['p'])
        else:
                p = -int(rat['p'])
        if 'den' not in rat.keys():
            q = 1
        else:
            if rat['qsign'] is None:
                q = int(rat['q'])
            else:
                q = -int(rat['q'])
        return RationalNumber(p, q)
        
        
r = RationalNumber(43, 2)
assert r + r == RationalNumber(43)  # q = 1 in this case

assert r * 2 == r + r

r1 = RationalNumber(3, 2)
r2 = RationalNumber(4, 3)

assert r1 * r2 == RationalNumber(12, 6)
assert r1 / r2 == RationalNumber(9, 8)

assert r1 == RationalNumber(6, 4)

### RationalNumber advanced exercises

Make the class usable as a dictionary key.

In [29]:
r1 = RationalNumber(3)
r2 = RationalNumber(3, 1)
r3 = RationalNumber(3, 2)

d = {r1: 1, r2: 2, r3: 12}
assert(len(d) == 2)

`p` and `q` can only be integers. Raise a `RationalNumberValueError` if someone tries to set them to anything else.

In [30]:
try:
    r1.p = 3.4
except RationalNumberValueError:
    print("This should happen")
else:
    print("This shouldn't happen")
    
try:
    r1.q = 3.4
except ValueError:
    print("This should happen")
else:
    print("This shouldn't happen")

This should happen
This should happen


Rational numbers may be negative. Make sure that `q` is never negative.

In [37]:
r = RationalNumber(3, -2)
assert r.p == -3 and r.q == 2
assert abs(r) == 1.5

Add a `from_str` factory method which parses the following formats:

In [53]:
r = RationalNumber(-3, 2)

assert RationalNumber.from_str("-3/2") == r
assert RationalNumber.from_str("3/-2") == r
assert RationalNumber.from_str("3 / -2") == r

## 2. Comprehension

Convert the following for loops into comprehensions:

In [58]:
l = [i-2 for i in range(-5, 10, 2)]
print(l)
l = []
for i in range(-5, 10, 2):
    l.append(i-2)
print(l)

[-7, -5, -3, -1, 1, 3, 5, 7]
[-7, -5, -3, -1, 1, 3, 5, 7]


In [59]:
l = [i for i in range(100) if i % 10 == 4]
print(l)
l = []
for i in range(100):
    if i % 10 == 4:
        l.append(i)
print(l)

[4, 14, 24, 34, 44, 54, 64, 74, 84, 94]
[4, 14, 24, 34, 44, 54, 64, 74, 84, 94]


In [60]:
l1 = [12, 1, 0, 13, -3, -4, 0, 2]
l2 = [e for e in l1 if e % 2 == 1]
print(l2)
l2 = []
for e in l1:
    if e % 2 == 1:
        l2.append(e)
print(l2)

[1, 13, -3]
[1, 13, -3]


In [61]:
l1 = [12, 1, 0, 13, -3, -4, 0, 2]
l2 = [True if e % 2 == 1 else False for e in l1]
print(l2)
l2 = []
for e in l1:
    if e % 2 == 1:
        l2.append(True)
    else:
        l2.append(False)
print(l2)

[False, True, False, True, True, False, False, False]
[False, True, False, True, True, False, False, False]


In [65]:
l1 = [3, 5, 7, 11, 13, 17, 19]
l2 = [2, 4, 6, 8, 10]

products = [x*y for x in l1 for y in l2]
print(products)
products = []
for x in l1:
    for y in l2:
        products.append(x*y)
print(products)

[6, 12, 18, 24, 30, 10, 20, 30, 40, 50, 14, 28, 42, 56, 70, 22, 44, 66, 88, 110, 26, 52, 78, 104, 130, 34, 68, 102, 136, 170, 38, 76, 114, 152, 190]
[6, 12, 18, 24, 30, 10, 20, 30, 40, 50, 14, 28, 42, 56, 70, 22, 44, 66, 88, 110, 26, 52, 78, 104, 130, 34, 68, 102, 136, 170, 38, 76, 114, 152, 190]


In [74]:
l1 = [3, 5, 7, 11, 13, 17, 19]
l2 = [2, 4, 6, 8, 10]

products = [x*y for x in l1 for y in l2 if (x + y) % 3 == 0]
print(products)
products = []
for x in l1:
    for y in l2:
        if (x + y) % 3 == 0:
            products.append(x*y)
print(products)

[18, 20, 50, 14, 56, 44, 110, 26, 104, 68, 170, 38, 152]
[18, 20, 50, 14, 56, 44, 110, 26, 104, 68, 170, 38, 152]


In [79]:
fruits = ["apple", "plum", "pear", "avocado"]

mtx = [[c*(i+1) for i, c in enumerate(fruit)] for fruit in fruits]
print(mtx)
mtx = []
for fruit in fruits:
    row = []
    for i, c in enumerate(fruit):
        row.append(c*(i+1))
    mtx.append(row)
print(mtx)

[['a', 'pp', 'ppp', 'llll', 'eeeee'], ['p', 'll', 'uuu', 'mmmm'], ['p', 'ee', 'aaa', 'rrrr'], ['a', 'vv', 'ooo', 'cccc', 'aaaaa', 'dddddd', 'ooooooo']]
[['a', 'pp', 'ppp', 'llll', 'eeeee'], ['p', 'll', 'uuu', 'mmmm'], ['p', 'ee', 'aaa', 'rrrr'], ['a', 'vv', 'ooo', 'cccc', 'aaaaa', 'dddddd', 'ooooooo']]


In [83]:
from collections import Counter


text = "ababaacdsadb"

char_freqs = {k: v for k, v in Counter(text).items()}
print(char_freqs)
char_freqs = {}
for c in text:
    try:
        char_freqs[c] += 1
    except KeyError:
        char_freqs[c] = 1
print(char_freqs)

{'a': 5, 'b': 3, 'c': 1, 'd': 2, 's': 1}
{'a': 5, 'b': 3, 'c': 1, 'd': 2, 's': 1}


In [81]:
d1 = {"a": 1, "b": 3, "c": 2}
d2 = {"a": 2, "b": 1}

d3 = {key: max(d1.get(key, 0), d2.get(key, 0)) for key in set(d1.keys()) | set(d2.keys())}
print(d3)
d3 = {}
for key in set(d1.keys()) | set(d2.keys()):
    max_val = max(d1.get(key, 0), d2.get(key, 0))
    d3[key] = max_val
print(d3)

{'c': 2, 'a': 2, 'b': 3}
{'c': 2, 'a': 2, 'b': 3}


## 3. Generators

The following piece of code downloads a small sample of the Hungarian Webcorpus. We will work on this in later exercises.

The corpus contains a single word-per-line and sentence boundaries are denoted by empty lines.

The file has 4 columns separated by TABs:
1. original word
2. lemma (stemmed word)
3. morphological analysis
4. morphological analysis candidates.

Take a look at the file before continuing.

## 3.1. Write a generator function that yields one sentence at a time as a list of tokens. Make sure to yield the very last sentence of the file as well.

In [117]:
import types


def read_sentences(filename):
    with open(filename, 'r', encoding="utf8") as f:
        sentence = []
        for l in f:
            if len(l) > 1:
                word = l.split('\t')[0]
                sentence.append(word)
            else:
                yield sentence
                sentence = []
        yield sentence
    
sentence = next(read_sentences(fn))
assert(len(sentence) == 19)
assert isinstance(sentence, list)

sentences = read_sentences(fn)
assert isinstance(sentences, types.GeneratorType)

sentences = list(sentences)
assert(len(sentences) == 90764)

## 3.2 Write a generator function that yields one sentence at a time but skips short sentences. The length limit should be a parameter of the generator which defaults to 5.

In [115]:
def read_long_sentences(filename, min_length=5):
    for sentence in read_sentences(filename):
        if len(sentence) >= min_length:
            yield sentence
    
sentences = read_long_sentences(fn)
assert isinstance(sentences, types.GeneratorType)

sentences = list(sentences)
assert len(sentences) == 85163

sentences = read_long_sentences(fn, 15)

sentences = list(sentences)
assert len(sentences) == 50059

85163


## 4. Context managers

Create a `Timer` context manager that measures the running time of the `with` block. The context manager takes an optional name argument and prints the block's name at the end too. 

In [None]:
class Timer(object):
    def __init__(self):
        pass
    
    def __enter__(self):
        pass
    
    def __exit__(self, exc_type, exc_value, traceback):
        if exc_type is not None:
            print("{0} with value {1} caught\nTraceback: {2}".format(exc_type, exc_value, traceback))
        
        
# prints "slow code ran for F seconds
# F is the total_seconds the block took to finish (float)
with Timer("slow code"):
    s = sum(range(100000))
    
# prints "unnamed ran for F seconds
with Timer():
    s = sum(range(100000))

## 5. Extra exercise, binary search tree

Create a binary search tree for integers. You should implement a `Tree` and a `Node` class.

Implement the following:
- iteration protocol for the tree. Traversal should be in-order (increasing order).
- sum(tree) - sum of all the elements
- min(tree), max(tree) - smallest, largest element
- len(tree) - number of nodes